diff --git a/benchmarks/larson.cc b/benchmarks/larson.cc new file mode 100644 index 0000000000000000000000000000000000000000..be8038fa4d6f285b93a535c0d540f18019e5c1b4 --- /dev/null +++ b/benchmarks/larson.cc @@ -0,0 +1,744 @@ +#include <assert.h> +#include <stdio.h> + +#if defined(_WIN32) +#define __WIN32__ +#endif + +#ifdef __WIN32__ +#include <windows.h> +#include <conio.h> +#include <process.h> + +#else +#include <unistd.h> +#include <sys/resource.h> +#include <sys/time.h> + +#ifndef __SVR4 +//extern "C" int pthread_setconcurrency (int) throw(); +#include <pthread.h> +#endif + + +typedef void * LPVOID; +typedef long long LONGLONG; +typedef long DWORD; +typedef long LONG; +typedef unsigned long ULONG; +typedef union _LARGE_INTEGER { + struct { + DWORD LowPart; + LONG HighPart; + } foo; + LONGLONG QuadPart; // In Visual C++, a typedef to _ _int64} LARGE_INTEGER; +} LARGE_INTEGER; +typedef long long _int64; +#ifndef TRUE +enum { TRUE = 1, FALSE = 0 }; +#endif +#include <assert.h> +#define _ASSERTE(x) assert(x) +#define _inline inline +void Sleep (long x) +{ + // printf ("sleeping for %ld seconds.\n", x/1000); + sleep(x/1000); +} + +void QueryPerformanceCounter (long * x) +{ + struct timezone tz; + struct timeval tv; + gettimeofday (&tv, &tz); + *x = tv.tv_sec * 1000000L + tv.tv_usec; +} + +void QueryPerformanceFrequency(long * x) +{ + *x = 1000000L; +} + + +#include <stdio.h> +#include <stdlib.h> +#include <stddef.h> +#include <string.h> +#include <ctype.h> +#include <time.h> +#include <assert.h> + +#define _REENTRANT 1 +#include <pthread.h> +#ifdef __sun +#include <thread.h> +#endif +typedef void * VoidFunction (void *); +void _beginthread (VoidFunction x, int, void * z) +{ + pthread_t pt; + pthread_attr_t pa; + pthread_attr_init (&pa); + +#if 1//defined(__SVR4) + pthread_attr_setscope (&pa, PTHREAD_SCOPE_SYSTEM); /* bound behavior */ +#endif + + // printf ("creating a thread.\n"); + int v = pthread_create(&pt, &pa, x, z); + // printf ("v = %d\n", v); +} +#endif + + +#if 0 +static char buf[65536]; + +#define malloc(v) &buf +#define free(p) +#endif + +#undef CPP +//#define CPP +//#include "arch-specific.h" + +#if USE_ROCKALL +//#include "FastHeap.hpp" +//FAST_HEAP theFastHeap (1024 * 1024, true, true, true); + +typedef int SBIT32; + +#include "SmpHeap.hpp" +SMP_HEAP theFastHeap (1024 * 1024, true, true, true); + +void * operator new( unsigned int cb ) +{ + void *pRet = theFastHeap.New ((size_t)cb) ; + return pRet; +} + +void operator delete(void *pUserData ) +{ + theFastHeap.Delete (pUserData) ; +} +#endif + +#if 0 +extern "C" void * hdmalloc (size_t sz) ; +extern "C" void hdfree (void * ptr) ; +extern "C" void hdmalloc_stats (void) ; +void * operator new( unsigned int cb ) +{ + void *pRet = hdmalloc((size_t)cb) ; + return pRet; +} + +void operator delete(void *pUserData ) +{ + hdfree(pUserData) ; +} +#endif + + + +/* Test driver for memory allocators */ +/* Author: Paul Larson, palarson@microsoft.com */ +#define MAX_THREADS 100 +#define MAX_BLOCKS 20000000 + +int volatile stopflag=FALSE ; + +struct lran2_st { + long x, y, v[97]; +}; + +int TotalAllocs=0 ; + +typedef struct thr_data { + + int threadno ; + int NumBlocks ; + int seed ; + + int min_size ; + int max_size ; + + char * *array ; + int *blksize ; + int asize ; + + unsigned long cAllocs ; + unsigned long cFrees ; + int cThreads ; + unsigned long cBytesAlloced ; + + volatile int finished ; + struct lran2_st rgen ; + +} thread_data; + +void runthreads(long sleep_cnt, int min_threads, int max_threads, + int chperthread, int num_rounds) ; +void runloops(long sleep_cnt, int num_chunks ) ; +static void warmup(char **blkp, int num_chunks ); +static void * exercise_heap( void *pinput) ; +static void lran2_init(struct lran2_st* d, long seed) ; +static long lran2(struct lran2_st* d) ; +ULONG CountReservedSpace() ; + +char ** blkp = new char *[MAX_BLOCKS] ; +int * blksize = new int[MAX_BLOCKS] ; +long seqlock=0 ; +struct lran2_st rgen ; +int min_size=10, max_size=500 ; +int num_threads ; +ULONG init_space ; + +extern int cLockSleeps ; +extern int cAllocedChunks ; +extern int cAllocedSpace ; +extern int cUsedSpace ; +extern int cFreeChunks ; +extern int cFreeSpace ; + +int cChecked=0 ; + +#if defined(_WIN32) +extern "C" { + extern HANDLE crtheap; +}; +#endif + +int main (int argc, char *argv[]) +{ +#if defined(USE_LFH) && defined(_WIN32) + // Activate 'Low Fragmentation Heap'. + ULONG info = 2; + HeapSetInformation (GetProcessHeap(), + HeapCompatibilityInformation, + &info, + sizeof(info)); +#endif +#if 0 // defined(__SVR4) + { + psinfo_t ps; + int pid = getpid(); + char fname[255]; + sprintf (fname, "/proc/%d/psinfo", pid); + // sprintf (fname, "/proc/self/ps"); + FILE * f = fopen (fname, "rb"); + printf ("opening %s\n", fname); + if (f) { + fread (&ps, sizeof(ps), 1, f); + printf ("resident set size = %dK\n", ps.pr_rssize); + fclose (f); + } + } +#endif + +#if defined(_MT) || defined(_REENTRANT) + int min_threads, max_threads ; + int num_rounds ; + int chperthread ; +#endif + unsigned seed=12345 ; + int num_chunks=10000; + long sleep_cnt; + + if (argc > 7) { + sleep_cnt = atoi(argv[1]); + min_size = atoi(argv[2]); + max_size = atoi(argv[3]); + chperthread = atoi(argv[4]); + num_rounds = atoi(argv[5]); + seed = atoi(argv[6]); + max_threads = atoi(argv[7]); + min_threads = max_threads; + printf ("sleep = %ld, min = %d, max = %d, per thread = %d, num rounds = %d, seed = %d, max_threads = %d, min_threads = %d\n", + sleep_cnt, min_size, max_size, chperthread, num_rounds, seed, max_threads, min_threads); + goto DoneWithInput; + } + +#if defined(_MT) || defined(_REENTRANT) + //#ifdef _MT + printf( "\nMulti-threaded test driver \n") ; +#else + printf( "\nSingle-threaded test driver \n") ; +#endif +#ifdef CPP + printf("C++ version (new and delete)\n") ; +#else + printf("C version (malloc and free)\n") ; +#endif + printf("runtime (sec): ") ; + scanf ("%ld", &sleep_cnt); + + printf("chunk size (min,max): ") ; + scanf("%d %d", &min_size, &max_size ) ; +#if defined(_MT) || defined(_REENTRANT) + //#ifdef _MT + printf("threads (min, max): ") ; + scanf("%d %d", &min_threads, &max_threads) ; + printf("chunks/thread: ") ; scanf("%d", &chperthread ) ; + printf("no of rounds: ") ; scanf("%d", &num_rounds ) ; + num_chunks = max_threads*chperthread ; +#else + printf("no of chunks: ") ; scanf("%d", &num_chunks ) ; +#endif + printf("random seed: ") ; scanf("%d", &seed) ; + + DoneWithInput: + + if( num_chunks > MAX_BLOCKS ){ + printf("Max %d chunks - exiting\n", MAX_BLOCKS ) ; + return(1) ; + } + +#ifndef __WIN32__ +#ifdef __SVR4 + pthread_setconcurrency (max_threads); +#endif +#endif + + lran2_init(&rgen, seed) ; + // init_space = CountReservedSpace() ; + +#if defined(_MT) || defined(_REENTRANT) + //#ifdef _MT + runthreads(sleep_cnt, min_threads, max_threads, chperthread, num_rounds) ; +#else + runloops(sleep_cnt, num_chunks ) ; +#endif + +#ifdef _DEBUG + _cputs("Hit any key to exit...") ; (void)_getch() ; +#endif + + return 0; + +} /* main */ + +void runloops(long sleep_cnt, int num_chunks ) +{ + int cblks ; + int victim ; + int blk_size ; +#ifdef __WIN32__ + _LARGE_INTEGER ticks_per_sec, start_cnt, end_cnt; +#else + long ticks_per_sec ; + long start_cnt, end_cnt ; +#endif + _int64 ticks ; + double duration ; + double reqd_space ; + ULONG used_space ; + int sum_allocs=0 ; + + QueryPerformanceFrequency( &ticks_per_sec ) ; + QueryPerformanceCounter( &start_cnt) ; + + for( cblks=0; cblks<num_chunks; cblks++){ + if (max_size == min_size) { + blk_size = min_size; + } else { + blk_size = min_size+lran2(&rgen)%(max_size - min_size) ; + } +#ifdef CPP + blkp[cblks] = new char[blk_size] ; +#else + blkp[cblks] = (char *) malloc(blk_size) ; +#endif + blksize[cblks] = blk_size ; + assert(blkp[cblks] != NULL) ; + } + + while(TRUE){ + for( cblks=0; cblks<num_chunks; cblks++){ + victim = lran2(&rgen)%num_chunks ; +#ifdef CPP + delete blkp[victim] ; +#else + free(blkp[victim]) ; +#endif + + if (max_size == min_size) { + blk_size = min_size; + } else { + blk_size = min_size+lran2(&rgen)%(max_size - min_size) ; + } +#ifdef CPP + blkp[victim] = new char[blk_size] ; +#else + blkp[victim] = (char *) malloc(blk_size) ; +#endif + blksize[victim] = blk_size ; + assert(blkp[victim] != NULL) ; + } + sum_allocs += num_chunks ; + + QueryPerformanceCounter( &end_cnt) ; +#ifdef __WIN32__ + ticks = end_cnt.QuadPart - start_cnt.QuadPart ; + duration = (double)ticks/ticks_per_sec.QuadPart ; +#else + ticks = end_cnt - start_cnt ; + duration = (double)ticks/ticks_per_sec ; +#endif + + if( duration >= sleep_cnt) break ; + } + reqd_space = (0.5*(min_size+max_size)*num_chunks) ; + // used_space = CountReservedSpace() - init_space; + + printf("%6.3f", duration ) ; + printf("%8.0f", sum_allocs/duration ) ; + printf(" %6.3f %.3f", (double)used_space/(1024*1024), used_space/reqd_space) ; + printf("\n") ; + +} + + +#if defined(_MT) || defined(_REENTRANT) +//#ifdef _MT +void runthreads(long sleep_cnt, int min_threads, int max_threads, int chperthread, int num_rounds) +{ + thread_data *de_area = new thread_data[max_threads] ; + thread_data *pdea; + int nperthread ; + int sum_threads ; + unsigned long sum_allocs ; + unsigned long sum_frees ; + double duration ; +#ifdef __WIN32__ + _LARGE_INTEGER ticks_per_sec, start_cnt, end_cnt; +#else + long ticks_per_sec ; + long start_cnt, end_cnt ; +#endif + _int64 ticks ; + double rate_1=0, rate_n ; + double reqd_space ; + ULONG used_space ; + int prevthreads ; + int i ; + + QueryPerformanceFrequency( &ticks_per_sec ) ; + + pdea = &de_area[0] ; + memset(&de_area[0], 0, sizeof(thread_data)) ; + + prevthreads = 0 ; + for(num_threads=min_threads; num_threads <= max_threads; num_threads++ ) + { + + warmup(&blkp[prevthreads*chperthread], (num_threads-prevthreads)*chperthread ); + + nperthread = chperthread ; + stopflag = FALSE ; + + for(i=0; i< num_threads; i++){ + de_area[i].threadno = i+1 ; + de_area[i].NumBlocks = num_rounds*nperthread; + de_area[i].array = &blkp[i*nperthread] ; + de_area[i].blksize = &blksize[i*nperthread] ; + de_area[i].asize = nperthread ; + de_area[i].min_size = min_size ; + de_area[i].max_size = max_size ; + de_area[i].seed = lran2(&rgen) ; ; + de_area[i].finished = 0 ; + de_area[i].cAllocs = 0 ; + de_area[i].cFrees = 0 ; + de_area[i].cThreads = 0 ; + de_area[i].finished = FALSE ; + lran2_init(&de_area[i].rgen, de_area[i].seed) ; + +#ifdef __WIN32__ + _beginthread((void (__cdecl*)(void *)) exercise_heap, 0, &de_area[i]) ; +#else + _beginthread(exercise_heap, 0, &de_area[i]) ; +#endif + + } + + QueryPerformanceCounter( &start_cnt) ; + + // printf ("Sleeping for %ld seconds.\n", sleep_cnt); + Sleep(sleep_cnt * 1000L) ; + + stopflag = TRUE ; + + for(i=0; i<num_threads; i++){ + while( !de_area[i].finished ){ +#ifdef __WIN32__ + Sleep(1); +#elif defined(__SVR4) + thr_yield(); +#else + sched_yield(); +#endif + } + } + + + QueryPerformanceCounter( &end_cnt) ; + + sum_frees = sum_allocs =0 ; + sum_threads = 0 ; + for(i=0;i< num_threads; i++){ + sum_allocs += de_area[i].cAllocs ; + sum_frees += de_area[i].cFrees ; + sum_threads += de_area[i].cThreads ; + de_area[i].cAllocs = de_area[i].cFrees = 0; + } + + +#ifdef __WIN32__ + ticks = end_cnt.QuadPart - start_cnt.QuadPart ; + duration = (double)ticks/ticks_per_sec.QuadPart ; +#else + ticks = end_cnt - start_cnt ; + duration = (double)ticks/ticks_per_sec ; +#endif + + for( i=0; i<num_threads; i++){ + if( !de_area[i].finished ) + printf("Thread at %d not finished\n", i) ; + } + + + rate_n = sum_allocs/duration ; + if( rate_1 == 0){ + rate_1 = rate_n ; + } + + reqd_space = (0.5*(min_size+max_size)*num_threads*chperthread) ; + // used_space = CountReservedSpace() - init_space; + used_space = 0; + + printf ("Throughput = %8.0f operations per second.\n", sum_allocs / duration); + +#if 0 + printf("%2d ", num_threads ) ; + printf("%6.3f", duration ) ; + printf("%6.3f", rate_n/rate_1 ) ; + printf("%8.0f", sum_allocs/duration ) ; + printf(" %6.3f %.3f", (double)used_space/(1024*1024), used_space/reqd_space) ; + printf("\n") ; +#endif + + Sleep(5000L) ; // wait 5 sec for old threads to die + + prevthreads = num_threads ; + + printf ("Done sleeping...\n"); + + } + delete [] de_area; +} + + +static void * exercise_heap( void *pinput) +{ + thread_data *pdea; + int cblks=0 ; + int victim ; + long blk_size ; + int range ; + + if( stopflag ) return 0; + + pdea = (thread_data *)pinput ; + pdea->finished = FALSE ; + pdea->cThreads++ ; + range = pdea->max_size - pdea->min_size ; + + /* allocate NumBlocks chunks of random size */ + for( cblks=0; cblks<pdea->NumBlocks; cblks++){ + victim = lran2(&pdea->rgen)%pdea->asize ; +#ifdef CPP + delete pdea->array[victim] ; +#else + free(pdea->array[victim]) ; +#endif + pdea->cFrees++ ; + + if (range == 0) { + blk_size = pdea->min_size; + } else { + blk_size = pdea->min_size+lran2(&pdea->rgen)%range ; + } +#ifdef CPP + pdea->array[victim] = new char[blk_size] ; +#else + pdea->array[victim] = (char *) malloc(blk_size) ; +#endif + + pdea->blksize[victim] = blk_size ; + assert(pdea->array[victim] != NULL) ; + + pdea->cAllocs++ ; + + /* Write something! */ + + volatile char * chptr = ((char *) pdea->array[victim]); + *chptr++ = 'a'; + volatile char ch = *((char *) pdea->array[victim]); + *chptr = 'b'; + + + if( stopflag ) break ; + } + + // printf("Thread %u terminating: %d allocs, %d frees\n", + // pdea->threadno, pdea->cAllocs, pdea->cFrees) ; + pdea->finished = TRUE ; + + if( !stopflag ){ +#ifdef __WIN32__ + _beginthread((void (__cdecl*)(void *)) exercise_heap, 0, pdea) ; +#else + _beginthread(exercise_heap, 0, pdea) ; +#endif + } else { + printf ("thread stopping.\n"); + } +#ifndef _WIN32 + pthread_exit (NULL); +#endif + return 0; +} + +static void warmup(char **blkp, int num_chunks ) +{ + int cblks ; + int victim ; + int blk_size ; + LPVOID tmp ; + + + for( cblks=0; cblks<num_chunks; cblks++){ + if (min_size == max_size) { + blk_size = min_size; + } else { + blk_size = min_size+lran2(&rgen)%(max_size-min_size) ; + } +#ifdef CPP + blkp[cblks] = new char[blk_size] ; +#else + blkp[cblks] = (char *) malloc(blk_size) ; +#endif + blksize[cblks] = blk_size ; + assert(blkp[cblks] != NULL) ; + } + + /* generate a random permutation of the chunks */ + for( cblks=num_chunks; cblks > 0 ; cblks--){ + victim = lran2(&rgen)%cblks ; + tmp = blkp[victim] ; + blkp[victim] = blkp[cblks-1] ; + blkp[cblks-1] = (char *) tmp ; + } + + for( cblks=0; cblks<4*num_chunks; cblks++){ + victim = lran2(&rgen)%num_chunks ; +#ifdef CPP + delete blkp[victim] ; +#else + free(blkp[victim]) ; +#endif + + if (max_size == min_size) { + blk_size = min_size; + } else { + blk_size = min_size+lran2(&rgen)%(max_size - min_size) ; + } +#ifdef CPP + blkp[victim] = new char[blk_size] ; +#else + blkp[victim] = (char *) malloc(blk_size) ; +#endif + blksize[victim] = blk_size ; + assert(blkp[victim] != NULL) ; + } +} +#endif // _MT + +#ifdef __WIN32__ +ULONG CountReservedSpace() +{ + MEMORY_BASIC_INFORMATION info; + char *addr=NULL ; + ULONG size=0 ; + + while( true){ + VirtualQuery(addr, &info, sizeof(info)); + switch( info.State){ + case MEM_FREE: + case MEM_RESERVE: + break ; + case MEM_COMMIT: + size += info.RegionSize ; + break ; + } + addr += info.RegionSize ; + if( addr >= (char *)0x80000000UL ) break ; + } + + return size ; + +} +#endif + +// ======================================================= + +/* lran2.h + * by Wolfram Gloger 1996. + * + * A small, portable pseudo-random number generator. + */ + +#ifndef _LRAN2_H +#define _LRAN2_H + +#define LRAN2_MAX 714025l /* constants for portable */ +#define IA 1366l /* random number generator */ +#define IC 150889l /* (see e.g. `Numerical Recipes') */ + +//struct lran2_st { +// long x, y, v[97]; +//}; + +static void +lran2_init(struct lran2_st* d, long seed) +{ + long x; + int j; + + x = (IC - seed) % LRAN2_MAX; + if(x < 0) x = -x; + for(j=0; j<97; j++) { + x = (IA*x + IC) % LRAN2_MAX; + d->v[j] = x; + } + d->x = (IA*x + IC) % LRAN2_MAX; + d->y = d->x; +} + +static +long lran2(struct lran2_st* d) +{ + int j = (d->y % 97); + + d->y = d->v[j]; + d->x = (IA*d->x + IC) % LRAN2_MAX; + d->v[j] = d->x; + return d->y; +} + +#undef IA +#undef IC + +#endif + +