Commit 46754745 authored by Florian Fischer's avatar Florian Fischer
Browse files

add larson benchmark

parent 5ef2bb37
#include <assert.h>
#include <stdio.h>
#if defined(_WIN32)
#define __WIN32__
#endif
#ifdef __WIN32__
#include <windows.h>
#include <conio.h>
#include <process.h>
#else
#include <unistd.h>
#include <sys/resource.h>
#include <sys/time.h>
#ifndef __SVR4
//extern "C" int pthread_setconcurrency (int) throw();
#include <pthread.h>
#endif
typedef void * LPVOID;
typedef long long LONGLONG;
typedef long DWORD;
typedef long LONG;
typedef unsigned long ULONG;
typedef union _LARGE_INTEGER {
struct {
DWORD LowPart;
LONG HighPart;
} foo;
LONGLONG QuadPart; // In Visual C++, a typedef to _ _int64} LARGE_INTEGER;
} LARGE_INTEGER;
typedef long long _int64;
#ifndef TRUE
enum { TRUE = 1, FALSE = 0 };
#endif
#include <assert.h>
#define _ASSERTE(x) assert(x)
#define _inline inline
void Sleep (long x)
{
// printf ("sleeping for %ld seconds.\n", x/1000);
sleep(x/1000);
}
void QueryPerformanceCounter (long * x)
{
struct timezone tz;
struct timeval tv;
gettimeofday (&tv, &tz);
*x = tv.tv_sec * 1000000L + tv.tv_usec;
}
void QueryPerformanceFrequency(long * x)
{
*x = 1000000L;
}
#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>
#include <string.h>
#include <ctype.h>
#include <time.h>
#include <assert.h>
#define _REENTRANT 1
#include <pthread.h>
#ifdef __sun
#include <thread.h>
#endif
typedef void * VoidFunction (void *);
void _beginthread (VoidFunction x, int, void * z)
{
pthread_t pt;
pthread_attr_t pa;
pthread_attr_init (&pa);
#if 1//defined(__SVR4)
pthread_attr_setscope (&pa, PTHREAD_SCOPE_SYSTEM); /* bound behavior */
#endif
// printf ("creating a thread.\n");
int v = pthread_create(&pt, &pa, x, z);
// printf ("v = %d\n", v);
}
#endif
#if 0
static char buf[65536];
#define malloc(v) &buf
#define free(p)
#endif
#undef CPP
//#define CPP
//#include "arch-specific.h"
#if USE_ROCKALL
//#include "FastHeap.hpp"
//FAST_HEAP theFastHeap (1024 * 1024, true, true, true);
typedef int SBIT32;
#include "SmpHeap.hpp"
SMP_HEAP theFastHeap (1024 * 1024, true, true, true);
void * operator new( unsigned int cb )
{
void *pRet = theFastHeap.New ((size_t)cb) ;
return pRet;
}
void operator delete(void *pUserData )
{
theFastHeap.Delete (pUserData) ;
}
#endif
#if 0
extern "C" void * hdmalloc (size_t sz) ;
extern "C" void hdfree (void * ptr) ;
extern "C" void hdmalloc_stats (void) ;
void * operator new( unsigned int cb )
{
void *pRet = hdmalloc((size_t)cb) ;
return pRet;
}
void operator delete(void *pUserData )
{
hdfree(pUserData) ;
}
#endif
/* Test driver for memory allocators */
/* Author: Paul Larson, palarson@microsoft.com */
#define MAX_THREADS 100
#define MAX_BLOCKS 20000000
int volatile stopflag=FALSE ;
struct lran2_st {
long x, y, v[97];
};
int TotalAllocs=0 ;
typedef struct thr_data {
int threadno ;
int NumBlocks ;
int seed ;
int min_size ;
int max_size ;
char * *array ;
int *blksize ;
int asize ;
unsigned long cAllocs ;
unsigned long cFrees ;
int cThreads ;
unsigned long cBytesAlloced ;
volatile int finished ;
struct lran2_st rgen ;
} thread_data;
void runthreads(long sleep_cnt, int min_threads, int max_threads,
int chperthread, int num_rounds) ;
void runloops(long sleep_cnt, int num_chunks ) ;
static void warmup(char **blkp, int num_chunks );
static void * exercise_heap( void *pinput) ;
static void lran2_init(struct lran2_st* d, long seed) ;
static long lran2(struct lran2_st* d) ;
ULONG CountReservedSpace() ;
char ** blkp = new char *[MAX_BLOCKS] ;
int * blksize = new int[MAX_BLOCKS] ;
long seqlock=0 ;
struct lran2_st rgen ;
int min_size=10, max_size=500 ;
int num_threads ;
ULONG init_space ;
extern int cLockSleeps ;
extern int cAllocedChunks ;
extern int cAllocedSpace ;
extern int cUsedSpace ;
extern int cFreeChunks ;
extern int cFreeSpace ;
int cChecked=0 ;
#if defined(_WIN32)
extern "C" {
extern HANDLE crtheap;
};
#endif
int main (int argc, char *argv[])
{
#if defined(USE_LFH) && defined(_WIN32)
// Activate 'Low Fragmentation Heap'.
ULONG info = 2;
HeapSetInformation (GetProcessHeap(),
HeapCompatibilityInformation,
&info,
sizeof(info));
#endif
#if 0 // defined(__SVR4)
{
psinfo_t ps;
int pid = getpid();
char fname[255];
sprintf (fname, "/proc/%d/psinfo", pid);
// sprintf (fname, "/proc/self/ps");
FILE * f = fopen (fname, "rb");
printf ("opening %s\n", fname);
if (f) {
fread (&ps, sizeof(ps), 1, f);
printf ("resident set size = %dK\n", ps.pr_rssize);
fclose (f);
}
}
#endif
#if defined(_MT) || defined(_REENTRANT)
int min_threads, max_threads ;
int num_rounds ;
int chperthread ;
#endif
unsigned seed=12345 ;
int num_chunks=10000;
long sleep_cnt;
if (argc > 7) {
sleep_cnt = atoi(argv[1]);
min_size = atoi(argv[2]);
max_size = atoi(argv[3]);
chperthread = atoi(argv[4]);
num_rounds = atoi(argv[5]);
seed = atoi(argv[6]);
max_threads = atoi(argv[7]);
min_threads = max_threads;
printf ("sleep = %ld, min = %d, max = %d, per thread = %d, num rounds = %d, seed = %d, max_threads = %d, min_threads = %d\n",
sleep_cnt, min_size, max_size, chperthread, num_rounds, seed, max_threads, min_threads);
goto DoneWithInput;
}
#if defined(_MT) || defined(_REENTRANT)
//#ifdef _MT
printf( "\nMulti-threaded test driver \n") ;
#else
printf( "\nSingle-threaded test driver \n") ;
#endif
#ifdef CPP
printf("C++ version (new and delete)\n") ;
#else
printf("C version (malloc and free)\n") ;
#endif
printf("runtime (sec): ") ;
scanf ("%ld", &sleep_cnt);
printf("chunk size (min,max): ") ;
scanf("%d %d", &min_size, &max_size ) ;
#if defined(_MT) || defined(_REENTRANT)
//#ifdef _MT
printf("threads (min, max): ") ;
scanf("%d %d", &min_threads, &max_threads) ;
printf("chunks/thread: ") ; scanf("%d", &chperthread ) ;
printf("no of rounds: ") ; scanf("%d", &num_rounds ) ;
num_chunks = max_threads*chperthread ;
#else
printf("no of chunks: ") ; scanf("%d", &num_chunks ) ;
#endif
printf("random seed: ") ; scanf("%d", &seed) ;
DoneWithInput:
if( num_chunks > MAX_BLOCKS ){
printf("Max %d chunks - exiting\n", MAX_BLOCKS ) ;
return(1) ;
}
#ifndef __WIN32__
#ifdef __SVR4
pthread_setconcurrency (max_threads);
#endif
#endif
lran2_init(&rgen, seed) ;
// init_space = CountReservedSpace() ;
#if defined(_MT) || defined(_REENTRANT)
//#ifdef _MT
runthreads(sleep_cnt, min_threads, max_threads, chperthread, num_rounds) ;
#else
runloops(sleep_cnt, num_chunks ) ;
#endif
#ifdef _DEBUG
_cputs("Hit any key to exit...") ; (void)_getch() ;
#endif
return 0;
} /* main */
void runloops(long sleep_cnt, int num_chunks )
{
int cblks ;
int victim ;
int blk_size ;
#ifdef __WIN32__
_LARGE_INTEGER ticks_per_sec, start_cnt, end_cnt;
#else
long ticks_per_sec ;
long start_cnt, end_cnt ;
#endif
_int64 ticks ;
double duration ;
double reqd_space ;
ULONG used_space ;
int sum_allocs=0 ;
QueryPerformanceFrequency( &ticks_per_sec ) ;
QueryPerformanceCounter( &start_cnt) ;
for( cblks=0; cblks<num_chunks; cblks++){
if (max_size == min_size) {
blk_size = min_size;
} else {
blk_size = min_size+lran2(&rgen)%(max_size - min_size) ;
}
#ifdef CPP
blkp[cblks] = new char[blk_size] ;
#else
blkp[cblks] = (char *) malloc(blk_size) ;
#endif
blksize[cblks] = blk_size ;
assert(blkp[cblks] != NULL) ;
}
while(TRUE){
for( cblks=0; cblks<num_chunks; cblks++){
victim = lran2(&rgen)%num_chunks ;
#ifdef CPP
delete blkp[victim] ;
#else
free(blkp[victim]) ;
#endif
if (max_size == min_size) {
blk_size = min_size;
} else {
blk_size = min_size+lran2(&rgen)%(max_size - min_size) ;
}
#ifdef CPP
blkp[victim] = new char[blk_size] ;
#else
blkp[victim] = (char *) malloc(blk_size) ;
#endif
blksize[victim] = blk_size ;
assert(blkp[victim] != NULL) ;
}
sum_allocs += num_chunks ;
QueryPerformanceCounter( &end_cnt) ;
#ifdef __WIN32__
ticks = end_cnt.QuadPart - start_cnt.QuadPart ;
duration = (double)ticks/ticks_per_sec.QuadPart ;
#else
ticks = end_cnt - start_cnt ;
duration = (double)ticks/ticks_per_sec ;
#endif
if( duration >= sleep_cnt) break ;
}
reqd_space = (0.5*(min_size+max_size)*num_chunks) ;
// used_space = CountReservedSpace() - init_space;
printf("%6.3f", duration ) ;
printf("%8.0f", sum_allocs/duration ) ;
printf(" %6.3f %.3f", (double)used_space/(1024*1024), used_space/reqd_space) ;
printf("\n") ;
}
#if defined(_MT) || defined(_REENTRANT)
//#ifdef _MT
void runthreads(long sleep_cnt, int min_threads, int max_threads, int chperthread, int num_rounds)
{
thread_data *de_area = new thread_data[max_threads] ;
thread_data *pdea;
int nperthread ;
int sum_threads ;
unsigned long sum_allocs ;
unsigned long sum_frees ;
double duration ;
#ifdef __WIN32__
_LARGE_INTEGER ticks_per_sec, start_cnt, end_cnt;
#else
long ticks_per_sec ;
long start_cnt, end_cnt ;
#endif
_int64 ticks ;
double rate_1=0, rate_n ;
double reqd_space ;
ULONG used_space ;
int prevthreads ;
int i ;
QueryPerformanceFrequency( &ticks_per_sec ) ;
pdea = &de_area[0] ;
memset(&de_area[0], 0, sizeof(thread_data)) ;
prevthreads = 0 ;
for(num_threads=min_threads; num_threads <= max_threads; num_threads++ )
{
warmup(&blkp[prevthreads*chperthread], (num_threads-prevthreads)*chperthread );
nperthread = chperthread ;
stopflag = FALSE ;
for(i=0; i< num_threads; i++){
de_area[i].threadno = i+1 ;
de_area[i].NumBlocks = num_rounds*nperthread;
de_area[i].array = &blkp[i*nperthread] ;
de_area[i].blksize = &blksize[i*nperthread] ;
de_area[i].asize = nperthread ;
de_area[i].min_size = min_size ;
de_area[i].max_size = max_size ;
de_area[i].seed = lran2(&rgen) ; ;
de_area[i].finished = 0 ;
de_area[i].cAllocs = 0 ;
de_area[i].cFrees = 0 ;
de_area[i].cThreads = 0 ;
de_area[i].finished = FALSE ;
lran2_init(&de_area[i].rgen, de_area[i].seed) ;
#ifdef __WIN32__
_beginthread((void (__cdecl*)(void *)) exercise_heap, 0, &de_area[i]) ;
#else
_beginthread(exercise_heap, 0, &de_area[i]) ;
#endif
}
QueryPerformanceCounter( &start_cnt) ;
// printf ("Sleeping for %ld seconds.\n", sleep_cnt);
Sleep(sleep_cnt * 1000L) ;
stopflag = TRUE ;
for(i=0; i<num_threads; i++){
while( !de_area[i].finished ){
#ifdef __WIN32__
Sleep(1);
#elif defined(__SVR4)
thr_yield();
#else
sched_yield();
#endif
}
}
QueryPerformanceCounter( &end_cnt) ;
sum_frees = sum_allocs =0 ;
sum_threads = 0 ;
for(i=0;i< num_threads; i++){
sum_allocs += de_area[i].cAllocs ;
sum_frees += de_area[i].cFrees ;
sum_threads += de_area[i].cThreads ;
de_area[i].cAllocs = de_area[i].cFrees = 0;
}
#ifdef __WIN32__
ticks = end_cnt.QuadPart - start_cnt.QuadPart ;
duration = (double)ticks/ticks_per_sec.QuadPart ;
#else
ticks = end_cnt - start_cnt ;
duration = (double)ticks/ticks_per_sec ;
#endif
for( i=0; i<num_threads; i++){
if( !de_area[i].finished )
printf("Thread at %d not finished\n", i) ;
}
rate_n = sum_allocs/duration ;
if( rate_1 == 0){
rate_1 = rate_n ;
}
reqd_space = (0.5*(min_size+max_size)*num_threads*chperthread) ;
// used_space = CountReservedSpace() - init_space;
used_space = 0;
printf ("Throughput = %8.0f operations per second.\n", sum_allocs / duration);
#if 0
printf("%2d ", num_threads ) ;
printf("%6.3f", duration ) ;
printf("%6.3f", rate_n/rate_1 ) ;
printf("%8.0f", sum_allocs/duration ) ;
printf(" %6.3f %.3f", (double)used_space/(1024*1024), used_space/reqd_space) ;
printf("\n") ;
#endif
Sleep(5000L) ; // wait 5 sec for old threads to die
prevthreads = num_threads ;
printf ("Done sleeping...\n");
}
delete [] de_area;
}
static void * exercise_heap( void *pinput)
{
thread_data *pdea;
int cblks=0 ;
int victim ;
long blk_size ;
int range ;
if( stopflag ) return 0;
pdea = (thread_data *)pinput ;
pdea->finished = FALSE ;
pdea->cThreads++ ;
range = pdea->max_size - pdea->min_size ;
/* allocate NumBlocks chunks of random size */
for( cblks=0; cblks<pdea->NumBlocks; cblks++){
victim = lran2(&pdea->rgen)%pdea->asize ;
#ifdef CPP
delete pdea->array[victim] ;
#else
free(pdea->array[victim]) ;
#endif
pdea->cFrees++ ;
if (range == 0) {
blk_size = pdea->min_size;
} else {
blk_size = pdea->min_size+lran2(&pdea->rgen)%range ;
}
#ifdef CPP
pdea->array[victim] = new char[blk_size] ;
#else
pdea->array[victim] = (char *) malloc(blk_size) ;
#endif
pdea->blksize[victim] = blk_size ;
assert(pdea->array[victim] != NULL) ;
pdea->cAllocs++ ;
/* Write something! */
volatile char * chptr = ((char *) pdea->array[victim]);
*chptr++ = 'a';
volatile char ch = *((char *) pdea->array[victim]);
*chptr = 'b';
if( stopflag ) break ;
}
// printf("Thread %u terminating: %d allocs, %d frees\n",
// pdea->threadno, pdea->cAllocs, pdea->cFrees) ;
pdea->finished = TRUE ;
if( !stopflag ){
#ifdef __WIN32__
_beginthread((void (__cdecl*)(void *)) exercise_heap, 0, pdea) ;
#else
_beginthread(exercise_heap, 0, pdea) ;
#endif
} else {
printf ("thread stopping.\n");
}
#ifndef _WIN32
pthread_exit (NULL);
#endif
return 0;
}
static void warmup(char **blkp, int num_chunks )
{
int cblks ;
int victim ;
int blk_size ;
LPVOID tmp ;
for( cblks=0; cblks<num_chunks; cblks++){
if (min_size == max_size) {
blk_size = min_size;
} else {
blk_size = min_size+lran2(&rgen)%(max_size-min_size) ;
}
#ifdef CPP
blkp[cblks] = new char[blk_size] ;
#else
blkp[cblks] = (char *) malloc(blk_size) ;