Closing out oldmalloc

Submitted by Rich Felker on June 2, 2020, 9:16 p.m.

Details

Message ID 20200602211639.GE1079@brightrain.aerifal.cx
State New
Series "Closing out oldmalloc"
Headers show

Commit Message

Rich Felker June 2, 2020, 9:16 p.m.
On Sat, May 23, 2020 at 11:13:49PM -0400, Rich Felker wrote:
> Before dropping in mallocng and saying goodbye to oldmalloc, I'd kinda
> like to leave its final state as something "non-broken" -- in
> particular, without the unbounded heap expansion bug. There's a patch
> from around a year ago that some affected users have tried, that
> should fix it fully, and that removes a lot of the sketchy/fragile
> code including the stuff broken by the lock-skipping bug:
> 
> https://www.openwall.com/lists/musl/2019/04/12/4
> 
> I think I'd like to apply this. It probably wouldn't get much/any use
> and wouldn't appear as the malloc used in a release, but would be nice
> to have it somewhere where it's not forgotten. It could also be a
> candidate for backporting to the 1.1.x branch if that ends up
> happening.

I have a reduced version of it with minor fixes that I very well may
commit. The only intended functional change is that, unlike the above,
it keeps the ability to split chunks that are in the desired bin but
larger than needed to satisfy the allocation request. This of course
makes more cases where the full split/merge lock must be taken.

For what it's worth, in the limited tests I've done, before or after
the changes it's barely faster and possibly slower than just using a
single global lock. However I think theoretically what remains of the
fine-grained locking still can help real multithreaded loads with
sufficient fragmentation that the bins usually have free chunks in
them.

Rich

Patch hide | download patch | download mbox

diff --git a/src/malloc/malloc.c b/src/malloc/malloc.c
index a803d4c9..20598ec3 100644
--- a/src/malloc/malloc.c
+++ b/src/malloc/malloc.c
@@ -17,7 +17,7 @@ 
 static struct {
 	volatile uint64_t binmap;
 	struct bin bins[64];
-	volatile int free_lock[2];
+	volatile int split_merge_lock[2];
 } mal;
 
 int __malloc_replaced;
@@ -128,7 +128,6 @@  void __dump_heap(int x)
 
 static struct chunk *expand_heap(size_t n)
 {
-	static int heap_lock[2];
 	static void *end;
 	void *p;
 	struct chunk *w;
@@ -138,13 +137,8 @@  static struct chunk *expand_heap(size_t n)
 	 * we need room for an extra zero-sized sentinel chunk. */
 	n += SIZE_ALIGN;
 
-	lock(heap_lock);
-
 	p = __expand_heap(&n);
-	if (!p) {
-		unlock(heap_lock);
-		return 0;
-	}
+	if (!p) return 0;
 
 	/* If not just expanding existing space, we need to make a
 	 * new sentinel chunk below the allocated space. */
@@ -167,8 +161,6 @@  static struct chunk *expand_heap(size_t n)
 	w = MEM_TO_CHUNK(p);
 	w->csize = n | C_INUSE;
 
-	unlock(heap_lock);
-
 	return w;
 }
 
@@ -198,96 +190,44 @@  static void unbin(struct chunk *c, int i)
 	NEXT_CHUNK(c)->psize |= C_INUSE;
 }
 
-static int alloc_fwd(struct chunk *c)
-{
-	int i;
-	size_t k;
-	while (!((k=c->csize) & C_INUSE)) {
-		i = bin_index(k);
-		lock_bin(i);
-		if (c->csize == k) {
-			unbin(c, i);
-			unlock_bin(i);
-			return 1;
-		}
-		unlock_bin(i);
-	}
-	return 0;
-}
-
-static int alloc_rev(struct chunk *c)
+static void bin_chunk(struct chunk *self, int i)
 {
-	int i;
-	size_t k;
-	while (!((k=c->psize) & C_INUSE)) {
-		i = bin_index(k);
-		lock_bin(i);
-		if (c->psize == k) {
-			unbin(PREV_CHUNK(c), i);
-			unlock_bin(i);
-			return 1;
-		}
-		unlock_bin(i);
-	}
-	return 0;
+	self->next = BIN_TO_CHUNK(i);
+	self->prev = mal.bins[i].tail;
+	self->next->prev = self;
+	self->prev->next = self;
+	if (self->prev == BIN_TO_CHUNK(i))
+		a_or_64(&mal.binmap, 1ULL<<i);
 }
 
-
-/* pretrim - trims a chunk _prior_ to removing it from its bin.
- * Must be called with i as the ideal bin for size n, j the bin
- * for the _free_ chunk self, and bin j locked. */
-static int pretrim(struct chunk *self, size_t n, int i, int j)
+static void trim(struct chunk *self, size_t n)
 {
-	size_t n1;
+	size_t n1 = CHUNK_SIZE(self);
 	struct chunk *next, *split;
 
-	/* We cannot pretrim if it would require re-binning. */
-	if (j < 40) return 0;
-	if (j < i+3) {
-		if (j != 63) return 0;
-		n1 = CHUNK_SIZE(self);
-		if (n1-n <= MMAP_THRESHOLD) return 0;
-	} else {
-		n1 = CHUNK_SIZE(self);
-	}
-	if (bin_index(n1-n) != j) return 0;
+	if (n >= n1 - DONTCARE) return;
 
 	next = NEXT_CHUNK(self);
 	split = (void *)((char *)self + n);
 
-	split->prev = self->prev;
-	split->next = self->next;
-	split->prev->next = split;
-	split->next->prev = split;
 	split->psize = n | C_INUSE;
 	split->csize = n1-n;
 	next->psize = n1-n;
 	self->csize = n | C_INUSE;
-	return 1;
-}
 
-static void trim(struct chunk *self, size_t n)
-{
-	size_t n1 = CHUNK_SIZE(self);
-	struct chunk *next, *split;
-
-	if (n >= n1 - DONTCARE) return;
+	int i = bin_index(n1-n);
+	lock_bin(i);
 
-	next = NEXT_CHUNK(self);
-	split = (void *)((char *)self + n);
-
-	split->psize = n | C_INUSE;
-	split->csize = n1-n | C_INUSE;
-	next->psize = n1-n | C_INUSE;
-	self->csize = n | C_INUSE;
+	bin_chunk(split, i);
 
-	__bin_chunk(split);
+	unlock_bin(i);
 }
 
 void *malloc(size_t n)
 {
 	struct chunk *c;
 	int i, j;
+	uint64_t mask;
 
 	if (adjust_size(&n) < 0) return 0;
 
@@ -303,33 +243,37 @@  void *malloc(size_t n)
 	}
 
 	i = bin_index_up(n);
-	for (;;) {
-		uint64_t mask = mal.binmap & -(1ULL<<i);
-		if (!mask) {
-			c = expand_heap(n);
-			if (!c) return 0;
-			if (alloc_rev(c)) {
-				struct chunk *x = c;
-				c = PREV_CHUNK(c);
-				NEXT_CHUNK(x)->psize = c->csize =
-					x->csize + CHUNK_SIZE(c);
-			}
-			break;
+	if (i<63 && (mal.binmap & (1ULL<<i))) {
+		lock_bin(i);
+		c = mal.bins[i].head;
+		if (c != BIN_TO_CHUNK(i) && CHUNK_SIZE(c)-n <= DONTCARE) {
+			unbin(c, i);
+			unlock_bin(i);
+			return CHUNK_TO_MEM(c);
 		}
+		unlock_bin(i);
+	}
+	lock(mal.split_merge_lock);
+	for (mask = mal.binmap & -(1ULL<<i); mask; mask -= (mask&-mask)) {
 		j = first_set(mask);
 		lock_bin(j);
 		c = mal.bins[j].head;
 		if (c != BIN_TO_CHUNK(j)) {
-			if (!pretrim(c, n, i, j)) unbin(c, j);
+			unbin(c, j);
 			unlock_bin(j);
 			break;
 		}
 		unlock_bin(j);
 	}
-
-	/* Now patch up in case we over-allocated */
+	if (!mask) {
+		c = expand_heap(n);
+		if (!c) {
+			unlock(mal.split_merge_lock);
+			return 0;
+		}
+	}
 	trim(c, n);
-
+	unlock(mal.split_merge_lock);
 	return CHUNK_TO_MEM(c);
 }
 
@@ -382,6 +326,8 @@  void *realloc(void *p, size_t n)
 	self = MEM_TO_CHUNK(p);
 	n1 = n0 = CHUNK_SIZE(self);
 
+	if (n<=n0 && n0-n<=DONTCARE) return p;
+
 	if (IS_MMAPPED(self)) {
 		size_t extra = self->psize;
 		char *base = (char *)self - extra;
@@ -408,27 +354,24 @@  void *realloc(void *p, size_t n)
 	/* Crash on corrupted footer (likely from buffer overflow) */
 	if (next->psize != self->csize) a_crash();
 
-	/* Merge adjacent chunks if we need more space. This is not
-	 * a waste of time even if we fail to get enough space, because our
-	 * subsequent call to free would otherwise have to do the merge. */
-	if (n > n1 && alloc_fwd(next)) {
-		n1 += CHUNK_SIZE(next);
-		next = NEXT_CHUNK(next);
-	}
-	/* FIXME: find what's wrong here and reenable it..? */
-	if (0 && n > n1 && alloc_rev(self)) {
-		self = PREV_CHUNK(self);
-		n1 += CHUNK_SIZE(self);
-	}
-	self->csize = n1 | C_INUSE;
-	next->psize = n1 | C_INUSE;
+	lock(mal.split_merge_lock);
 
-	/* If we got enough space, split off the excess and return */
-	if (n <= n1) {
-		//memmove(CHUNK_TO_MEM(self), p, n0-OVERHEAD);
-		trim(self, n);
-		return CHUNK_TO_MEM(self);
+	size_t nsize = next->csize & C_INUSE ? 0 : CHUNK_SIZE(next);
+	if (n0+nsize >= n) {
+		int i = bin_index(nsize);
+		lock_bin(i);
+		if (!(next->csize & C_INUSE)) {
+			unbin(next, i);
+			unlock_bin(i);
+			next = NEXT_CHUNK(next);
+			self->csize = next->psize = n0+nsize | C_INUSE;
+			trim(self, n);
+			unlock(mal.split_merge_lock);
+			return CHUNK_TO_MEM(self);
+		}
+		unlock_bin(i);
 	}
+	unlock(mal.split_merge_lock);
 
 copy_realloc:
 	/* As a last resort, allocate a new chunk and copy to it. */
@@ -443,59 +386,51 @@  copy_free_ret:
 void __bin_chunk(struct chunk *self)
 {
 	struct chunk *next = NEXT_CHUNK(self);
-	size_t final_size, new_size, size;
-	int reclaim=0;
-	int i;
-
-	final_size = new_size = CHUNK_SIZE(self);
 
 	/* Crash on corrupted footer (likely from buffer overflow) */
 	if (next->psize != self->csize) a_crash();
 
-	for (;;) {
-		if (self->psize & next->csize & C_INUSE) {
-			self->csize = final_size | C_INUSE;
-			next->psize = final_size | C_INUSE;
-			i = bin_index(final_size);
-			lock_bin(i);
-			lock(mal.free_lock);
-			if (self->psize & next->csize & C_INUSE)
-				break;
-			unlock(mal.free_lock);
-			unlock_bin(i);
-		}
+	lock(mal.split_merge_lock);
 
-		if (alloc_rev(self)) {
-			self = PREV_CHUNK(self);
-			size = CHUNK_SIZE(self);
-			final_size += size;
-			if (new_size+size > RECLAIM && (new_size+size^size) > size)
-				reclaim = 1;
-		}
+	size_t osize = CHUNK_SIZE(self), size = osize;
+
+	/* Since we hold split_merge_lock, only transition from free to
+	 * in-use can race; in-use to free is impossible */
+	size_t psize = self->psize & C_INUSE ? 0 : CHUNK_PSIZE(self);
+	size_t nsize = next->csize & C_INUSE ? 0 : CHUNK_SIZE(next);
 
-		if (alloc_fwd(next)) {
-			size = CHUNK_SIZE(next);
-			final_size += size;
-			if (new_size+size > RECLAIM && (new_size+size^size) > size)
-				reclaim = 1;
+	if (psize) {
+		int i = bin_index(psize);
+		lock_bin(i);
+		if (!(self->psize & C_INUSE)) {
+			struct chunk *prev = PREV_CHUNK(self);
+			unbin(prev, i);
+			self = prev;
+			size += psize;
+		}
+		unlock_bin(i);
+	}
+	if (nsize) {
+		int i = bin_index(nsize);
+		lock_bin(i);
+		if (!(next->csize & C_INUSE)) {
+			unbin(next, i);
 			next = NEXT_CHUNK(next);
+			size += nsize;
 		}
+		unlock_bin(i);
 	}
 
-	if (!(mal.binmap & 1ULL<<i))
-		a_or_64(&mal.binmap, 1ULL<<i);
-
-	self->csize = final_size;
-	next->psize = final_size;
-	unlock(mal.free_lock);
+	int i = bin_index(size);
+	lock_bin(i);
 
-	self->next = BIN_TO_CHUNK(i);
-	self->prev = mal.bins[i].tail;
-	self->next->prev = self;
-	self->prev->next = self;
+	self->csize = size;
+	next->psize = size;
+	bin_chunk(self, i);
+	unlock(mal.split_merge_lock);
 
 	/* Replace middle of large chunks with fresh zero pages */
-	if (reclaim) {
+	if (size > RECLAIM && (size^(size-osize)) > size-osize) {
 		uintptr_t a = (uintptr_t)self + SIZE_ALIGN+PAGE_SIZE-1 & -PAGE_SIZE;
 		uintptr_t b = (uintptr_t)next - SIZE_ALIGN & -PAGE_SIZE;
 #if 1