diff --git a/speedymalloc.c b/speedymalloc.c index eee7250bc5b5afacf0d8a99cbd4e390dc447023c..1656f25aad1a19862ddf07244635cdacbde57359 100644 --- a/speedymalloc.c +++ b/speedymalloc.c @@ -164,6 +164,11 @@ static inline size_t alignup(size_t size, size_t alignment) { return (size + mask) & ~mask; } +static inline size_t aligndown(size_t size, size_t alignment) { + size_t mask = alignment -1; + return size & ~mask; +} + static size_t MEMSIZE = 0; static int grow_bump_region() { @@ -175,13 +180,18 @@ static int grow_bump_region() { exit(errno); } + tls_t* ltls = tls; + // init tls - if (!tls) - tls = (tls_t*)mem; + if (!tls) { + tls = (tls_t*) mem; + ltls = tls; + ltls->end = ((uintptr_t) ltls) + sizeof(tls_t); + } else { + ltls->end = (uintptr_t) mem; + } - tls_t* ltls = tls; - ltls->ptr = ((uintptr_t)ltls) + sizeof(tls_t); - ltls->end = ((uintptr_t)ltls) + MEMSIZE; + ltls->ptr = ltls->end + MEMSIZE; #ifdef MADVISE_WILLNEED ltls->next_willneed = ltls->ptr; #endif @@ -212,33 +222,31 @@ static void* bump_alloc(size_t size, size_t alignment) { // expensive thread-local storage operations tls_t* ltls = tls; - // allocate size header - ltls->ptr += sizeof(size_t); + uintptr_t new_ptr = aligndown(ltls->ptr - size, alignment) - sizeof(size_t); // regrow bump region - if (unlikely((ltls->ptr + size + alignment) > ltls->end)) { + if (unlikely(new_ptr < ltls->end)) { if (grow_bump_region() < 0) return NULL; - ltls = tls; - ltls->ptr = tls->ptr + sizeof(size_t); + // recalculate new_ptr + new_ptr = aligndown(ltls->ptr - size, alignment) - sizeof(size_t); + assert((uintptr_t) new_ptr > ltls->ptr); } - // align ptr - ltls->ptr = alignup(ltls->ptr, alignment); - #ifdef MADVISE_WILLNEED - if(unlikely(ltls->ptr >= ltls->next_willneed)) { - madvise((void*)ltls->next_willneed, WILLNEED_SIZE, MADV_WILLNEED); - ltls->next_willneed += WILLNEED_SIZE; + if (unlikely(ltls->ptr <= ltls->next_willneed)) { + ltls->next_willneed -= WILLNEED_SIZE; + madvise((void*) ltls->next_willneed, WILLNEED_SIZE, MADV_WILLNEED); } #endif - void* ptr = (void*)ltls->ptr; - ptr2chunk(ptr)->size = size; - ltls->ptr += size; + chunk_t* chunk = (chunk_t*) new_ptr; + assert((uintptr_t) chunk2ptr(chunk) % alignment == 0); + chunk->size = size; + ltls->ptr = new_ptr; - return ptr; + return chunk2ptr(chunk); } static void* prepare_chunk(chunk_t* chunk, size_t bin_id) {