diff --git a/src/speedymalloc_2.c b/src/speedymalloc_2.c
index a0797145f05e10c84af9c77b3c90af6124b4de68..02bf1e616b8aa0672b504d53a9687f54521dd744 100644
--- a/src/speedymalloc_2.c
+++ b/src/speedymalloc_2.c
@@ -48,7 +48,10 @@ typedef struct TLStates {
 __thread tls_t* tls;
 
 static inline int size2bin(size_t size) {
-	assert(size > 0 && size < CACHE_BINS * CACHE_BIN_SEPERATION);
+	assert(size < CACHE_BINS * CACHE_BIN_SEPERATION);
+	if (size == 0)
+		return 0;
+
 	return (size - 1) / CACHE_BIN_SEPERATION;
 }
 
@@ -57,7 +60,7 @@ static inline size_t bin2size(int bin) {
 	return (bin + 1) * CACHE_BIN_SEPERATION;
 }
 
-static void init_tls(void) {
+static tls_t* init_tls(void) {
 	void *mem = mmap(NULL, MEMSIZE, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
 	if (mem == MAP_FAILED) {
 		perror("mmap");
@@ -67,13 +70,12 @@ static void init_tls(void) {
 
 	tls->ptr = ((uintptr_t)tls) + sizeof(tls_t);
 	tls->end = (uintptr_t)tls + MEMSIZE;
+	return (tls_t*)mem;
 }
 
-inline static void* bump_alloc(size_t size, size_t align) {
+inline static void* bump_alloc(tls_t* ltls, size_t size, size_t align) {
 	assert(align % 2 == 0);
 
-	tls_t* ltls = tls;
-
 	// allocate size header
 	uintptr_t ptr_w_header = ltls->ptr + sizeof(size_t);
 
@@ -95,9 +97,8 @@ inline static void* bump_alloc(size_t size, size_t align) {
 
 void* malloc(size_t size) {
 	tls_t* ltls = tls;
-
 	if (unlikely(ltls == NULL)) {
-		init_tls();
+		ltls = init_tls();
 	}
 
 	// cached sizes
@@ -109,22 +110,22 @@ void* malloc(size_t size) {
 			ltls->bins[bin] = chunk->next;
 			return chunk2ptr(chunk);
 		}
-		return bump_alloc(bin2size(bin), MIN_ALIGNMENT);
+		return bump_alloc(ltls, bin2size(bin), MIN_ALIGNMENT);
 	}
 
-	return bump_alloc(size, MIN_ALIGNMENT);
+	return bump_alloc(ltls, size, MIN_ALIGNMENT);
 }
 
 void free(void* ptr) {
-	tls_t* ltls = tls;
-	if (unlikely(ltls == NULL)) {
-		init_tls();
-	}
-
 	if (ptr == NULL) {
 		return;
 	}
 
+	tls_t* ltls = tls;
+	if (unlikely(ltls == NULL)) {
+		ltls = init_tls();
+	}
+
 	chunk_t* chunk = ptr2chunk(ptr);
 
 	if (chunk->size < CACHE_BINS * CACHE_BIN_SEPERATION) {
@@ -155,11 +156,12 @@ void* memalign(size_t alignment, size_t size) {
 		return NULL;
 	}
 
-	if (unlikely(tls == NULL)) {
-		init_tls();
+	tls_t* ltls = tls;
+	if (unlikely(ltls == NULL)) {
+		ltls = init_tls();
 	}
 
-	return bump_alloc(size, alignment);
+	return bump_alloc(ltls, size, alignment);
 }
 
 int posix_memalign(void **memptr, size_t alignment, size_t size)