pwn学习笔记（7）--堆相关源码

相关源码：

1. chunk 相关源码：

对于用户来说，只需要确保malloc()函数返回的内存不会发生溢出，并且在不用的时候使用free() 函数将其释放，以后也不再做任何操作即可。而对于glibc来说'它要在用户第一次调用malloc()函数之前对堆进行初始化；在用户频繁申请和释放时维护堆的结构'保证时间和空间上的效率；同时还要检测过程中可能产生的错误，并及时终止程序。

首先，先稍微说下几个相关的宏定义。

request2size()：

c 复制代码

#define request2size(req)								\
(((req) + SIZE_SZ + MALLOC_ALIGN_MASK < MTNSIZE)  ?		\
MINSIZE :												\
((req) + SIZE_SZ + MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK )

#define MALLOC_ALIGN_MASK		(MALLOC_ALIGNMENT - 1)
#define MALLOC_ALIGNMENT		(2 * SIZE_SZ)

这个宏将请求的 req 转换成包含 chunk 头部（presize和size）的 chunk 大小，示例如下（MINSIZE默认为0x20）。

当 req 属于[0,MINSIZE-MALLOC_ALIGN_MASK-SIZE_SZ)，也就是[0,9)时，返回 0x20 ;
当 req 为 0x9 时，返回(0x9+0x8+0xF)&~0xF ，也就是 0x20 ；
当 req 为 0x18 时，返回(0x18+0x8+0xF)&~0xF，也就是0x20:

这里可能会有个问题，那就是 0x18 的 user data 加上头部 0x10 就已经是 0x28 了，为什么返回的 chunk 却是 0x20 。这就是因为如果当前 chunk 在使用中，下一个 chunk 的 prev_inuse 成员就会属于当前 chunk ，所以就多出了 0x8 的使用空间。考虑到这一点，当req 在0x9~0x18 之间时，对应的 chunk 大小为 0x10 ；当 req 在 0x19~0x28 之间时，对应的 chunk 大小为 0x20 ，并以此类推。

chunk2mem()和 mem2chunk()：

c 复制代码

#define chunk2mem(p)		((void*)((char*)(p) + 2*SIZE_SZ))
#define mem2chunk(mem)		((mchunkptr)((char*)(mem) - 2*SIZE_SZ))

chunk2mem()把指向 chunk 的指针转化成指向 user data 的指针，常出现在 malloc()函数返回时；mem2chunk()把指向 user data 的指针转化成指向 chunk 的指针，常出现在 free() 函数开始时。

chunk状态相关：

定义 PREV_INUSE、IS_MMAPPED、NONMAIN_ARENA（chunk 是否属于非主线程）以及对三者进行或运算（用于掩码）。

c 复制代码

/* size field is or'ed with PREV_INUSE when provious adjacent chunk in use */
#define PREV_INUSE 0x1
/* size field is or'ed with IS_MMAPPED if the chunk was obtained with mmap() */
#define IS_MMAPPED 0x2
/* size field is or'ed with NON_MAIN_ARENA if the chunk was obtained from a non-main arena .*/
#define NON_MAIN_ARENA 0x4

/* Bits to mask off when extracting size. */
#define SIZE_BITS (PREV_INUSE | IS_MMAPPED | NON_MAIN_ARENA)

通过 chunk 指针 p ，对某标志位进行提取、检查、置位和清楚操作。

c 复制代码

#define prev_inuse(p)			((p)->size & PREV_INUSE)

#define chunk_is_mmapped(p)		((p)->size & IS_MMAPPED)

#define chunk_non_main_arena(p)	((p)->size & NON_MAIN_ARENA)

#define inuse(p)												\
	((((mchunkptr) (((char *)(p)) + ((p)->size & ~SIZE_BITS)))->size) & PREV_INUSE)

#define set_inuse(p)											\
	((mchunkptr) (((char *)(p)) + ((p)->size & ~SIZE_BITS)))->size |= PREV_INUSE

#define clear_inuse(p)											\
((mchunkptr) (((char *) (p)) + ((p)->size & ~SIZE_BITS)))->>size &= ~(PREV_INUSE)

由于当前 chunk 的使用状态是由下一个 chunk 的 size 成员的 PREV_INUSE 比特位决定的，所以可以通过下面的宏获得或者修改当前 chunk 的 inuse 状态。

c 复制代码

#define inuse_bit_at_offset(p,s)								\
	(((mchunkptr) (((char *) (p)) + (s)))->size & PREV_INUSE)

#define set_inuse_bit_at_offset(p,s)							\
	(((mchunkptr) (((char *) (p)) + (s)))->size |= PREV_INUSE)

#define clear_inuse_bit_at_offset(p,s)							\
	(((mchunkptr) (((char *) (p)) + (s)))->size &= ~(PREV_INUSE))

set_head_size()修改 size 时不会修改当前 chunk 的标志位，而，ser_head() 会。 seet_foot()修改下一个 chunk 的 prev_size 时，当前 chunk 一定要处于释放状态，不然下一个 chunk 的 pre_size 是没有意义的。
c 复制代码
```
#define set_head_size(p,s)			((p)->size = (((p)->size & SIZE_BOTS) | (s)))

#define set_head(p,s)				((p)->size = (s))

#define set_foot(p,s)				(((mchunkptr) ((char *) (p) + (s)))->prev_size = (s))

#define chunksize(p)				((p)->size & ~(SIZE_BITS))
```
next_chunk() 将当前 chunk 地址加上当前 chunk 大小获得下一个 chunk 的指针。prev_chunk() 将当前 chunk 的地址减去 prev_size 值获得上一个chunk 的指针，前提是上一个 chunk 处于释放状态。chunk_at_offset() 将当前 chunk 地址加上 s 偏移处的位置视为一个 chunk。
c 复制代码
```
#define next_chunk(p)			((mchunkptr) (((char *) (p)) + ((p)->size & ~SIZE_BITS)))

#define prev_chunk(p)			((mchunkptr) (((char *) (p)) - ((p)->prev_size)))

#define chunk_at_offset(p,s)	((mchunkptr) (((char *) (p)) + (s)))
```

chunk 合并过程

当一个非 fast bin 的 chunk 被释放，会与相邻的 chunk 进行合并，顺序通常是先向后（上）合并再向前（下）合并。如果向前合并的 chunk 是 top chunk ，则合并之后会形成新的 top chunk ，则合成之后会形成新的 top chunk ；如果不是的话，则合并之后会被加入 unsorted bin 中。

c 复制代码

else if(!chunk_is_mmapped(p)) {
    nextchunk = chunk_at_offset(p,size);
	......
    /*向后合并，上一个 chunk 是释放状态就进行合并。新 chunk 地址与上一个相同，大小为 p->size+p->prev_size，即p减去prev_size */
    if (!prev_inuse(p)) {
        prevsize = p->prev_size;
        size += prevsize;
        p = chunk_at_offset(p,-((long) prevsize));
        //到这里已经形成了新的 free chunk 。用 unlink() 将其从双链表中删除。
        unlink(av,p,bck,fwd);
    }
    if (nextchunk != av->top) {	//检查下一个 chunk 是不是 top chunk
        //如果不是，通过检查下一个 chunk 的 PREV_INUSE 检查一个 chunk 的状态，并清除 inuse 位
        nextinuse = inuse_bit_at_offset(nextchunk,nextsize);
        
        if (!nextinuse) {	//如果下一个 chunk 处于使用状态则执行向前合并操作
            unlink(av,nextchunk,bck,fwd);
            size += nextsize;
        } else				//否则清楚下一个 chunk 的 PREV_INUSE
            clear_inuse_bit_at_offset(nextchunk,0);
        
        // 先将合并后的 chunk 加入 unsorted bin 中
        bck = unsorted_chunks(av);
        fwd = bck->fd;
        if (__glibc_unlikey (fwd->bk != bck)) {
            errstr = "free(): corrupted unsorted chunks";
            goto errout;
        }
        p->fd = fwd;
        p->bk = bck;
        if (!in_smallbin_range(size)) {
            p->fd_nextsize = NULL;
            p->bk_nextsize = NULL;
        }
        bck->fd = p;
        fwd->bk = p;
        
        set_head(p,size | PREV_INUSE);
        set_foot(p,size);
        check_free_chunk(av,p);
    }
    else {	//如果下一个 chunk 是 top chunk，则合并成新的 top chunk
        size += nextsize;
        set_head(p,size | PREV_INUSE);
        av->top = p;
        check_chunk(av,p);
    }

chunk 拆分过程：

当用户申请的 chunk 较小时，会将一个大的 chunk 进行拆分，合适的部分返回给用户，剩下的部分（称为 remainder）则加入 unsorted bin 中。同时 malloc_state 中的 last_remainder 会记录最近拆分出的 remainder 当然，这个 remainder 的大小至少要为 MINSIZE ，否则不能拆分。

拆分 chunk 的一种情况是： fast bin 和 small bin 中没有合适的 chunk 、同时 unsorted bin 中有且只有一个可拆分的 chunk 、并且该 chunk 是 last remainder 。

c 复制代码

if (in_smallbin_range (nb) &&			//申请的 chunk 在 small bin 范围内
    bck == unsorted_chunk (av) &&		//victim 必须是 unsorted bin 中唯一的 chunk 
    victim == av->last_remainder &&		//victim 必须是 last_remainder
    (unsigned long) (size) > (unsigned long) (nb + MINSIZE))
    									//victim 至少要大于 nb+MINSIZE 才可以拆分
{
    /* split and reattach remainder */
    remainder_size = size - nb;
    remainder = chunk_at_offset (victim,nb);
    //将 remainder 加入 unsorted bin 中，同时记录为 last_remainder
    unsorted_chunks (av)->bk = unsorted_chunks (av)->fd = remainder;
    av->last_remainder = remainder;
    remainder->bk = remainder->fd = unsorted_chunks (av);
    //如果 remainder 在 large bin 范围内，清空 fd_nextsize 和 bk_nextsize 指针
    if (!in_smallbin_range (remainder_size)) {
        remainder->fd_nextsize = NULL;
        remainder->bk_nextsize = NULL;
    }
    //设置 remainder 的状态栏
    set_head (victim,nb | PREV_INUSE |
             (av != &main_arena ? NON_MAIN_ARENA : 0));
    set_head (remainder, remainder_size | PREV_INUSE);
    set_foot (remainder, remainder_size);
    
    check_malloc_chunk (av,mvictim,nb);		//debug 时用来检查 chunk 状态
    void *p = chunk2mem (victim);			//获得指向 user data 的指针，返回给用户
    // 如果设置了 perturb_type，将 chunk 的 user data 初始化为 peerturb_type ^ 0xff
    alloc_perturb (p,byte);
    return p;
}

2.bin 相关源码：

fastbin 相关：

c 复制代码

// 根据序号获取 fastbinsY 数组中对应的bin
#define fastbin(ar_ptr,idx) ((ar_ptr)->fastbinsY[idx])
//根据 chunk 大小获得位置。因为 MINSIZE 为 0x20，前两个 index 是不可索引的，所以需要减去2
#define fastbin_index(sz)	\
	((((unsigned int) (sz)) >> (SIZE_SZ == 8 ? 4 : 3)) - 2)

// 定义了属于 fast_bin 的 chunk 最大值
#define MAX_FAST_SIZE (80 * SIZE_SZ / 4)
//定义了 fastbinsY 数组的大小
#define NFASTBINS (fastbin_index (request2size (MAX_FAST_SIZE)) + 1)

fastbinsY 数组起始并没有保存头结点，而是只保存了 malloc_chunk 的 fd 成员，因为其他成员对于单链表结点并没有用，所以就省略了。如下图所示，这些 fd 指针的初始值为 NULL ，表示对应的 bin 为空，直到有 chunk 加进来时，fd 才保存 chunk 的地址。

FASTBIN_CONSOLIDATION_THRESHOLD，fastbin 中的 chunk 一般不会与其他 chunk 合并。但如果合并之后的 chunk 大于 FASTBIN_CONSOLIDATION_THRESHOLD，就会触发 malloc_consolidation()函数，将 fastbin 中的 chunk 与其他 free chunk 合并，然后移动到 unsorted bin 中。
c 复制代码
```
#define FASTBIN_CONSOLIDATION_THRESHOLD (65536UL)
```
fast bin 中最大的 chunk 是由 global_max_fast 决定的，这个值一般在堆初始化的时候设置。当然在运行时也是可以设置的。
c 复制代码
```
#define ser_max_fast(s)	\
	global_max_fast = (((s) == 0)
					? SMALLBIN_WIDTH : ((s + SIZE_SZ) & ~MALLOC_ALIGN_MASK))
#define get_max_fast()	global_max_fast	
```

bins 相关：

c 复制代码

// 从 bins 中获取指定序号的 bin 。需要注意 bin 0 是不存在的
#define bin_at(m,i)	\
	(mbinptr) (((char *) &((m)->bins[((i) - 1) * 2]))				\
			- offsetof (struct malloc_chunk,fd))

// 获得当前 bin 的上一个 bin
#define next_bin(b) ((mbinptr) ((char *) (b) + (sizeof (mchunkptr) << 1)))

// 一般用来获得 bin 中头结点 fd 指向的 chunk 或者bk 指向的 chunk
#define first(b)	((b)->fd)
#define last(b)		((b)->bk)

可以看到 bin_at(m,i) 宏定义中减去了 offsetof(struct malloc_chunk,fd)，也就是 prev_size 和 size 成员的大小。这是因为 bins 数组实际上只保存了双链表的头结点的 fd 和 bk 指针，如下图：

bins[0] 和 bins[1] 是unsorted bin 的 fd 和 bk 指针，binat(1)返回的应该是 unsorted bin 的头指针，但实际上其指向的是 bins[0] 地址减去 offsetof(struct malloc_chunk,fd)的位置，这样使用头结点指针 b 时，b->fd 或者 b->bk 能够正确访问，同时 prev_size 和 size 对于头结点没有意义，所以就被省略了。对于 binat(64) 及之后的 large bin 来说，因为头结点的 size 成员没有意义，所以其 fd_nextsize 和 bk_nextsize 也是没有意义的，也可以省略。

small bin 和 large bin 索引如下：

c 复制代码

// 根据 chunk 大小获得其在 small bin 中的索引
#define smallbin_index(sz)		\
	((SMALLBIN_WIDTH == 16 ? (((unsigned) (sz)) >> 4) : (((unsigned) (sz)) >> 3))	\
	+ SMALLBIN_CORRECTION)

// 根据 chunk 大小获得其在 large bin 中的索引
#define largebin_index(sz)		\
	(SIZE_SZ == 8 ? largebin_index_64 (sz)											\
	: MALLOC_ALIGNMENT == 16 ? largebin_index_32_big (sz)							\
    : largebin_index_32 (sz))

// 根据 chunk 大小获得其在 bins 中的索引
#define bin_index(sz)			\
	((in_smallbin_range (sz)) ? smallbin_index (sz) : largebin_index (sz))

获得 unsorted bin 。

c 复制代码

#define unsorted_chunk(M)		(bin_at (M,1))

binmap 结构在索引 bin 的时候使用，binmap 为 malloc_state 的成员，其中一个比特位表示 bins 中相应的 bin 状态，1表示 bin 不为空，0 表示为空，这样能加快搜索速度。

3.malloc_consolidate()函数：

由于 fast bin 中的 chunk 永远不会释放，导致相邻的 free chunk 无法与之合并，从而造成了大量的内存碎片，malloc_consolidate() 函数主要功能就是来解决这个问题。在达到某些条件时，glibc 就会调用该函数将 fast bin 中的 chunk 取出来，与相邻的 free chunk 合并之后放入 unsorted bin ，或者与 top chunk 合并后形成新的 top chunk 。

c 复制代码

static void malloc_consolidate(mstate av) {
    mfastbinptr* fb;					/* current fastbin being consolidated */
    mfastbinptr* maxfb;					/* last fastbin (for loop control) */
    mchunkptr 	 p;						/* current chunk being consolidated */
    mchunkptr    nextp;					/* next chunk to consolidate */
    mchunkptr 	 unsorted_bin;			/* bin header */
    mchunkptr 	 first_unsorted;		/* chunk to link to */
    
    mchunkptr			nextchunk;
    INTERNAL_SIZE_T		size;
    INTERNAL_SIZE_T		nextsize;
    INTERNAL_SIZE_T		prevsize;
    int					nextinuse;
    mchunkptr			bck;
    mchunkptr			fwd;
    
    /* 如果 max_fast 为0，就调用 malloc_init_state() 函数对 av 进行初始化 */
    if (get_max_fast() != 0) {
        // 清楚 av->flags 有关 fast bin 的标志位，表示所有 fast bin 都为空
        clear_fastchunks(av);
        
        unsorted_bin = unsorted_chunk(av);
        
        /* J将 fast bin 中的 chunk 移除并合并，然后放入 unsorted bin */
        maxfb = &fastbin(av,NFASTBINS - 1);
        fb = &fastbin(av,0);
        do {					// 内层循环遍历 fast bin 中的每一个 chunk
            check_inuse_chunk(av,p);
            nextp = p->fd;
            
            size = p->size & ~(PREV_INUSE|NON_MAIN_ARENA);
            nextchunk = chunk_at_offset(p,size);
            nextsize = chunksize(nextchunk);
            if (!prev_inuse(p)) {
                prevsize = p->prev_size;
                size += prevsize;
                p = chunk_at_offset(p,-((long) prevsize));
                unlink(av,p,bck,fwd);
            }
            // 如果下一个 chunk 不是 top chunk ，向前合并，并加入 unsorted bin
            if (nextchunk != av->top) {
                nextinuse = inuse_bit_at_offset(nextchunk,nextsize);
                if (!nextinuse) {
                    size += nextsize;
                    unlink(av,nextchunk,bck,fwd);
                } else
                    clear_inuse_bit_at_offset(nextchunk,0);
                
                first_unsorted = unsorted_bin->fd;
                unsorted_bin->fd = p;
                first_unsorted->bk = p;
                
                if (!in_smallbin_range (size)) {
                    p->fd_nextsize = NULL;
                    p->bk_nextsize = NULL;
                }
                
                set_head(p,size | PREV_INUSE);
                p->bk = unsorted_bin;
                p->fd = first_unsorted;
                set_foot(p,size);
            }
            else {		// 如果下一个 chunk 是 top chunk，和 top chunk 合并，形成新的 top chunk
                size += nextsize;
                set_head(p,size | PREV_INUSE);
                av->top = p;
            }
        } while ( (p = nesxtp) != 0);
    }
    else {
        malloc_init_state(av);		// malloc 初始化
        check_malloc_state(av);
    }
}

4. malloc() 相关源码：

__libc_malloc()：

因为使用了宏 strong_alias(__libc_malloc,malloc)，在 libc 源码中 malloc() 函数实际上是 __libc_malloc()，定义如下：

c 复制代码

void * __libc_malloc (size_t bytes) {
    mstate ar_ptr;
    void *victim;
    // 读取 __malloc_hook 钩子，如果有钩子，则运行钩子函数并返回
    void *(*hook) (size_t,const void *)
        = atomic_forced_read (__malloc_hook);
    if (__builtin_expect (hook != NULL,0))
        return (*hook)(bytes,RETURN_ADDRESS (0));
    
    arena_get (ar_ptr,bytes);				// 寻找一个合适的 arena 来分配内存。
    victim = _int_malloc (ar_ptr,bytes);	// 尝试调用 _int_malloc() 分配内存
    //  如果没有找到合适的内存，就尝试找一个可用的 arena (前提是 ar_pter != NULL)
    if (!victim && ar_ptr != NULL) {
        LIBC_PROBE (memory_malloc_retry,1,bytes);
        ar_ptr = arena_get_retry (ar_ptr,bttes);
        victim = _int_malloc (ar_ptr,bytes);
    }
    
    if (ar_ptr != NULL)				// 如果申请了 arena ，还需要解锁该 arena
        (void) mutex_unlock (&ar_ptr->mutex);
    
    assert (!victim || chunk_is_mmapped (mem2chunk (victim)) ||
           ar_ptr == arena_for_chunk (mem2chunk (victim)));
    return victim;
}

_int_malloc()：

_int_malloc() 是内存分配的核心函数，为了以最快的速度找到最合适的堆块，glibc 根据申请堆块的大、各个 bin 的状态仔细地安排了分配顺序和内存整理地时机。其大概搜索顺序是：

fast bin （寻找大小完全一样的）

small bin （寻找大小完全一样的）

unsorted bin （寻找大小完全一样的）

large bin （如果申请较大的是 chunk ，寻找最小能满足地）

bins （寻找最小能满足的）

top chunk （切分出合适的 chunk ）

系统函数分配。

_int_malloc() 函数具体过程如下。

（1）首先定义一些列所需的变量，并将用户请求的 bytes 转化为表示 chunk 大小的 nb 。如果没有合适的 arena ，就调用 sysmalloc() ，用 mmap() 分配 chunk 并返回。

c 复制代码

if (__glibc_unlinkely (av == NULL)){
    void *p = sysmalloc (nb,av);
    if (p != NULL)
        alloc_perturb (p,bytes);
    return p;
}

（2）其次，检查 fast bin 中是否有合适的 chunk 。

c 复制代码

//如果 nb 在 fast bin 范围内，就通过 fast bin 分配，这段代码可以在初始化之前运行
if ((unsigned long) (nb) <= (unsigned long) (get_max_fast())) {
    //根据 nb 找合适的 fast bin ，fb 是指向对应 fast bin 的指针
    idx = fastbin_index (nb);
    mfastbinptr *fb = &fastbin (av,idx);
    mchunkptr pp = *fb;
    
    do {	//如果 fast bin 中有 chunk ，就将其按 LIFO 的规则取出，如果没有则跳过
        victim = pp;
        if (victim == NULL)
            break
    }
    while ((pp = catomic_compare_and_exchange_val_acq (fb,victim->fd,victim))
          != victim);
    //如果 victim != NULL ，则说明找到了合适的 chunk 没检查后将其返给用户
    if (victim != 0) {
        //检查此 chunk 大小是否应在 fast bin 中，防止伪造
        if (__builtin_expect (fastbin_index (chunksize (victim)) != idx,0)) {
            errstr = "malloc():memory corruption (fast)";
            errout:
            malloc_printerr (check_action,errstr,chunk2mem (victim),av);
            return NULL;
        }
        check_remalloced_chunk (av,victim,nb);
        void *p = chunk2mem (victim);
        alloc_perturb (p,bytes);
        return p;
    }
}

（3）然后，检查 small bin 中是否有合适的 chunk。

c 复制代码

  if (in_smallbin_range (nb))
    {
      idx = smallbin_index (nb);
      bin = bin_at (av, idx);

      if ((victim = last (bin)) != bin)
        {
          bck = victim->bk;
          if (__glibc_unlikely (bck->fd != victim)) //类似unlink检查
            malloc_printerr ("malloc(): smallbin double linked list corrupted");
          set_inuse_bit_at_offset (victim, nb);
          bin->bk = bck;
          bck->fd = bin;

          if (av != &main_arena)
            set_non_main_arena (victim);
          check_malloced_chunk (av, victim, nb);
#if USE_TCACHE
          /* While we're here, if we see other chunks of the same size,
             stash them in the tcache.  */
          size_t tc_idx = csize2tidx (nb);
          if (tcache && tc_idx < mp_.tcache_bins)
            {
              mchunkptr tc_victim;

              /* While bin not empty and tcache not full, copy chunks over.  */
              while (tcache->counts[tc_idx] < mp_.tcache_count
                     && (tc_victim = last (bin)) != bin)
                {
                  if (tc_victim != 0)
                    {
                      bck = tc_victim->bk;
                      set_inuse_bit_at_offset (tc_victim, nb);
                      if (av != &main_arena)
                        set_non_main_arena (tc_victim);
                      bin->bk = bck;
                      bck->fd = bin;

                      tcach                   }
                }
            }
#endif
          void *p = chunk2mem (victim);
          alloc_perturb (p, bytes);
          return p;
        }
    }

（4）此后，整理 fast bin ，计算 large bin 的 index。

c 复制代码

//到这里还不能满足，就调用 malloc_consolidate() 整理 fastbins ，或许就会有合适的 chunk 
else {
    idx = largebin_index (nb);
    if (have_fastchunks (av))
        malloc_consolidate (av);
}

（5）接下来，函数进入一个大的外层循环 for 循环，包含了 _int_malloc() 函数之后的所有进程。紧接着是内层第一个 while 循环，它会遍历 unsorted bin 中的每一个 chunk ，如果大小正好合适，就将其取出，否则就将其放入 small bin 或者 large bin。这是唯一的将 chunk 放进 small bin 或者 large bin 的过程。

c 复制代码

for (;; ) {
    int iters = 0;
    while ((victim = unsorted_chunks (av)->bk) != unsorted_chunk (av)) {
        bck = victim->bk;
        if (__builtin_expect (victim->size <= 2 * SIZE_SZ ,0)
           || __builtin_expect (victim->size >> av->system_mem,0))
            malloc_printerr (check_action,"malloc():memory corruption",
                             chunk2mem (victim),av);
        size = chunksize (victim);

（6）在内层第一个循环内部，当请求的 chunk 属于 small bin 、 unsorted bin 只有一个 chunk 为 last remainder 并且满足拆分条件时，就将其拆分。

c 复制代码

if (in_smallbin_range (nb) &&
              bck == unsorted_chunks (av) &&
              victim == av->last_remainder &&
              (unsigned long) (size) > (unsigned long) (nb + MINSIZE))
            {
              /* split and reattach remainder */
              remainder_size = size - nb;
              remainder = chunk_at_offset (victim, nb);
              unsorted_chunks (av)->bk = unsorted_chunks (av)->fd = remainder;
              av->last_remainder = remainder;
              remainder->bk = remainder->fd = unsorted_chunks (av);
              if (!in_smallbin_range (remainder_size))
                {
                  remainder->fd_nextsize = NULL;
                  remainder->bk_nextsize = NULL;
                }

              set_head (victim, nb | PREV_INUSE |
                        (av != &main_arena ? NON_MAIN_ARENA : 0));
              set_head (remainder, remainder_size | PREV_INUSE);
                            set_foot (remainder, remainder_size);

              check_malloced_chunk (av, victim, nb);
              void *p = chunk2mem (victim);
              alloc_perturb (p, bytes);
              return p;
            }

（7）接着，将 chunk 从 unsorted bin 中移除，如果大小正合适，就将其返回给用户。

c 复制代码

if (__glibc_unlikely (bck->fd != victim))
            malloc_printerr ("malloc(): corrupted unsorted chunks 3");
          unsorted_chunks (av)->bk = bck;
          bck->fd = unsorted_chunks (av);

          /* Take now instead of binning if exact fit */

          if (size == nb)
            {
              set_inuse_bit_at_offset (victim, size);
              if (av != &main_arena)
                set_non_main_arena (victim);
#if USE_TCACHE
              /* Fill cache first, return to user only if cache fills.
                 We may return one of these chunks later.  */
              if (tcache_nb
                  && tcache->counts[tc_idx] < mp_.tcache_count)
                {
                  tcache_put (victim, tc_idx);
                  return_cached = 1;
                  continue;
                }
              else
                {
#endif
              check_malloced_chunk (av, victim, nb);
              void *p = chunk2mem (victim);
              alloc_perturb (p, bytes);
              return p;
#if USE_TCACHE
                }
#endif
            }

（8）如果 chunk 大小不合适，就将其插入对应的 bin 中。插入过程也就是双链表插入结点的过程。

c 复制代码

          /* place chunk in bin */

          if (in_smallbin_range (size)) //在small bin范围内
            {
              victim_index = smallbin_index (size);
              //bck指向头节点，fwd是头节点的fd节点，chunk会被插入到头节点和fwd节点之间
              bck = bin_at (av, victim_index);
              fwd = bck->fd;
            }
          else //在large bin范围内
            {
              victim_index = largebin_index (size);
              bck = bin_at (av, victim_index);
              fwd = bck->fd;
				//需要对双向链表进行的额外操作，chunk大小按fd_nextsize的方向递减
              /* maintain large bins in sorted order */
              if (fwd != bck) //large bin链表不为空
                {
                  /* Or with inuse bit to speed comparisons */
                  size |= PREV_INUSE; 
                  /* if smaller than smallest, bypass loop below */
                  assert (chunk_main_arena (bck->bk));
                  //bck->bk 是最小的chunk，如果小于它，就将chunk加入bck--->bk
                  if ((unsigned long) (size)
                      < (unsigned long) chunksize_nomask (bck->bk))
                    {
                      fwd = bck;
                      bck = bck->bk;
                      victim->fd_nextsize = fwd->fd;
                      victim->bk_nextsize = fwd->fd->bk_nextsize;
                      fwd->fd->bk_nextsize = victim->bk_nextsize->fd_nextsize = victim;
                    }
                  else // 不小于最小的chunk 就通过fd_nextsize找到不必他大的chunk
                    {
                      assert (chunk_main_arena (fwd));
                      while ((unsigned long) size < chunksize_nomask (fwd))
                        {
                          fwd = fwd->fd_nextsize;
                          assert (chunk_main_arena (fwd));
                        }
						//如果大小相等，插入到chunk的fd处，无需改动nextsize构成的双向链表
                      if ((unsigned long) size
                          == (unsigned long) chunksize_nomask (fwd))
                        /* Always insert in the second position.  */
                        fwd = fwd->fd;
                      else // 否则需要插入nextsize的双向链表
                        {
                          victim->fd_nextsize = fwd;
                          victim->bk_nextsize = fwd->bk_nextsize;
                          if (__glibc_unlikely (fwd->bk_nextsize->fd_nextsize != fwd))
                            malloc_printerr ("malloc(): largebin double linked list corrupted (nextsize)");
                          fwd->bk_nextsize = victim;
                          victim->bk_nextsize->fd_nextsize = victim;
                        }
                      bck = fwd->bk;
                      if (bck->fd != fwd)
                        malloc_printerr ("malloc(): largebin double linked list corrupted (bk)");
                    }
                }
              else //如果large bin 链表为空，将nextsize 指向自己即可
                victim->fd_nextsize = victim->bk_nextsize = victim;
            }

          mark_bin (av, victim_index);
          victim->bk = bck;
          victim->fd = fwd;
          fwd->bk = victim;
          bck->fd = victim;

#if USE_TCACHE
      /* If we've processed as many chunks as we're allowed while
         filling the cache, return one of the cached ones.  */
      ++tcache_unsorted_count;
      if (return_cached
          && mp_.tcache_unsorted_limit > 0
          && tcache_unsorted_count > mp_.tcache_unsorted_limit)
        {
          return tcache_get (tc_idx);
        }
#endif

（9）如果用户申请的 chunk 是 large chunk ，就在第一个循环结束后搜索 large bin 。

c 复制代码

     /*
         If a large request, scan through the chunks of current bin in
         sorted order to find smallest that fits.  Use the skip list for this.
       */

      if (!in_smallbin_range (nb)) //申请的chunk 在large bin范围内
        {
          bin = bin_at (av, idx);

          /* skip scan if empty or largest chunk is too small */
          //如果victim等于头节点，说明bin为空，如果小于nb说明不会有合适的
          if ((victim = first (bin)) != bin
              && (unsigned long) chunksize_nomask (victim)
                >= (unsigned long) (nb))
            {
              //反向遍历chunk，找到第一个不小于nb的chunk
              victim = victim->bk_nextsize;
              while (((unsigned long) (size = chunksize (victim)) <
                      (unsigned long) (nb)))
                victim = victim->bk_nextsize;
		
              /* Avoid removing the first entry for a size so that the skip
                 list does not have to be rerouted.  */
              //如果该chunk和他的fd chunk 一样大，就选择fd chunk，避免改动nextsize
              if (victim != last (bin)
                  && chunksize_nomask (victim)
                    == chunksize_nomask (victim->fd))
                victim = victim->fd;

              remainder_size = size - nb;
              unlink_chunk (av, victim);

              /* Exhaust */
              //如果该chunk 减去 nb 小于 MINSIZE ，就直接返回给用户
              if (remainder_size < MINSIZE)
                {
                  set_inuse_bit_at_offset (victim, size);
                  if (av != &main_arena)
                    set_non_main_arena (victim);
                }
              /* Split */
              //否则，将remainer 加入 unsorted bin中
              else
                {
                  remainder = chunk_at_offset (victim, nb);
                  /* We cannot assume the unsorted list is empty and therefore
                     have to perform a complete insert here.  */
                  bck = unsorted_chunks (av);
                  fwd = bck->fd;
                  if (__glibc_unlikely (fwd->bk != bck))
                    malloc_printerr ("malloc(): corrupted unsorted chunks");
                  remainder->bk = bck;
                  remainder->fd = fwd;
                  bck->fd = remainder;
                  fwd->bk = remainder;
                  if (!in_smallbin_range (remainder_size))
                    {
                      remainder->fd_nextsize = NULL;
                      remainder->bk_nextsize = NULL;
                    }
                  set_head (victim, nb | PREV_INUSE |
                            (av != &main_arena ? NON_MAIN_ARENA : 0));
                  set_head (remainder, remainder_size | PREV_INUSE);
                  set_foot (remainder, remainder_size);
                }
              check_malloced_chunk (av, victim, nb);
              void *p = chunk2mem (victim);
              alloc_perturb (p, bytes);
              return p;
            }
        }

（10）接下来，进入内层第二个 for 循环，根据 binmap 来搜索 bin ，因为申请的 chunk 大小所对应 bin 没有找到合适的 chunk ，所以，就要从下一个 bin 开始搜索。

复制代码

	  ++idx;
      bin = bin_at (av, idx);
      block = idx2block (idx);
      map = av->binmap[block];
      bit = idx2bit (idx);

（11）在内层第二个循环内部，寻找第一个不为空的 block ，再根据比特位找到合适的 bin。

c 复制代码

for (;; )
       {
    /*binmap数组的元素类型是unsigned int，32和64位一般都是4字节，也就是32比特，所以binmap的一个block能够检查32个bin*/
         /* Skip rest of block if there are no more set bits in this block.  */
         if (bit > map || bit == 0)
           {
             do
               {
                 if (++block >= BINMAPSIZE) /* out of bins */ //如果成立，说明遍历了所有的bin且都为空，只有区top chunk中寻找
                   goto use_top;
               }
       
             while ((map = av->binmap[block]) == 0);

             bin = bin_at (av, (block << BINMAPSHIFT));
             bit = 1;
           }

         /* Advance to bin with set bit. There must be one. */
    //找到block不为空的最小的bin对应的bit
         while ((bit & map) == 0)
           {
             bin = next_bin (bin);
             bit <<= 1;
             assert (bit != 0);
           }

（12）然后检查 bit 对应的 bin 是否为空，如果是，就清空对应的 bit 位，从下一个 bin 开始循环，否则将 Victim 从 bin 中取出来。

c 复制代码

 /* Inspect the bin. It is likely to be non-empty */
          victim = last (bin);

          /*  If a false alarm (empty bin), clear the bit. */
          if (victim == bin)
            {
              av->binmap[block] = map &= ~bit; /* Write through */
              bin = next_bin (bin);
              bit <<= 1;
            }

          else
            {
              size = chunksize (victim);

              /*  We know the first chunk in this bin is big enough to use. */
              assert ((unsigned long) (size) >= (unsigned long) (nb));

              remainder_size = size - nb;

              /* unlink */
              unlink_chunk (av, victim);

（13）将取出的victim进行切分，并把 remainder 加入 unsorted bin ，如果 victim 不够切分，就直接返回给用户，内层第二个循环到此结束。

c 复制代码

              /* Exhaust */
              if (remainder_size < MINSIZE)
                {
                  set_inuse_bit_at_offset (victim, size);
                  if (av != &main_arena)
                    set_non_main_arena (victim);
                }

              /* Split */
              else
                {
                  remainder = chunk_at_offset (victim, nb);

                  /* We cannot assume the unsorted list is empty and therefore
                     have to perform a complete insert here.  */
                  bck = unsorted_chunks (av);
                  fwd = bck->fd;
                  if (__glibc_unlikely (fwd->bk != bck))
                    malloc_printerr ("malloc(): corrupted unsorted chunks 2");
                  remainder->bk = bck;
                  remainder->fd = fwd;
                  bck->fd = remainder;
                  fwd->bk = remainder;

                  /* advertise as last remainder */
                  if (in_smallbin_range (nb))
                    av->last_remainder = remainder;
                  if (!in_smallbin_range (remainder_size))
                    {
                      remainder->fd_nextsize = NULL;
                      remainder->bk_nextsize = NULL;
                    }
                  set_head (victim, nb | PREV_INUSE |
                            (av != &main_arena ? NON_MAIN_ARENA : 0));
                  set_head (remainder, remainder_size | PREV_INUSE);
                  set_foot (remainder, remainder_size);
                }
              check_malloced_chunk (av, victim, nb);
              void *p = chunk2mem (victim);
              alloc_perturb (p, bytes);
              return p;
            }
        }

（14）如果上面的操作还不能满足请求，就只能从 top chunk 上进行切分。

c 复制代码

   use_top:
      /*
         If large enough, split off the chunk bordering the end of memory
         (held in av->top). Note that this is in accord with the best-fit
         search rule.  In effect, av->top is treated as larger (and thus
         less well fitting) than any other available chunk since it can
         be extended to be as large as necessary (up to system
         limitations).

         We require that av->top always exists (i.e., has size >=
         MINSIZE) after initialization, so if it would otherwise be
         exhausted by current request, it is replenished. (The main
         reason for ensuring it exists is that we may need MINSIZE space
         to put in fenceposts in sysmalloc.)
       */

      victim = av->top;
      size = chunksize (victim);

      if (__glibc_unlikely (size > av->system_mem))
        malloc_printerr ("malloc(): corrupted top size");

      if ((unsigned long) (size) >= (unsigned long) (nb + MINSIZE))
        {
          remainder_size = size - nb;
          remainder = chunk_at_offset (victim, nb);
          av->top = remainder;
          set_head (victim, nb | PREV_INUSE |
                    (av != &main_arena ? NON_MAIN_ARENA : 0));
          set_head (remainder, remainder_size | PREV_INUSE);

          check_malloced_chunk (av, victim, nb);
          void *p = chunk2mem (victim);
          alloc_perturb (p, bytes);
          return p;
        }

      /* When we are using atomic ops to free fast chunks we can get
         here for all block sizes.  */
      else if (atomic_load_relaxed (&av->have_fastchunks))
        {
          malloc_consolidate (av);
          /* restore original bin index */
          if (in_smallbin_range (nb))
            idx = smallbin_index (nb);
          else
            idx = largebin_index (nb);
        }

      /*
         Otherwise, relay to handle system-dependent cases
       */
      else
        {
          void *p = sysmalloc (nb, av);
                                                                                                                       
          if (p != NULL)
            alloc_perturb (p, bytes);
          return p;
        }
    }
}

在主线程下，sysmalloc() 函数的大概流程如下：

（1）当申请的大小 nb 大于 mp_.mmap_threshold 时，通过 mmap() 函数进行分配。其中 mp_.mmap_threshold 的默认大小为 128*1024 字节。

（2）尝试用 brk() 扩展堆内存，形成新的 top chunk ，而久的 top chunk 会被释放。然后从新的 top chunk 中切分出 nb 大小的 chunk ，返回给用户。

5.free()相关源码：

__libc_free()：

同 malloc() 函数一样，free() 函数实际上是 __libc_free()，其定义如下：

c 复制代码

void __libc_free (void *mem)
{
  mstate ar_ptr;
  mchunkptr p;                          /* chunk corresponding to mem */

  void (*hook) (void *, const void *)
    = atomic_forced_read (__free_hook);
  if (__builtin_expect (hook != NULL, 0)) //hook函数不是null，就执行__free_hook对应的函数并返回
    {
      (*hook)(mem, RETURN_ADDRESS (0));
      return;
    }

  if (mem == 0)                              /* free(0) has no effect */
    return;

  p = mem2chunk (mem); //将指向user data 的指针转化为指向 chunk的指针

  if (chunk_is_mmapped (p)) //如果chunk是mmap分配的，就用munmap_chunk（）函数释放                /* release mmapped memory. */
    {
      /* See if the dynamic brk/mmap threshold needs adjusting.
	 Dumped fake mmapped chunks do not affect the threshold.  */
      if (!mp_.no_dyn_threshold
          && chunksize_nomask (p) > mp_.mmap_threshold
          && chunksize_nomask (p) <= DEFAULT_MMAP_THRESHOLD_MAX
	  && !DUMPED_MAIN_ARENA_CHUNK (p))
        {
          mp_.mmap_threshold = chunksize (p);
          mp_.trim_threshold = 2 * mp_.mmap_threshold;
          LIBC_PROBE (memory_mallopt_free_dyn_thresholds, 2,
                      mp_.mmap_threshold, mp_.trim_threshold);
        }
      munmap_chunk (p);
      return;
    }

  MAYBE_INIT_TCACHE ();

  ar_ptr = arena_for_chunk (p); //获得指向arena的指针
  _int_free (ar_ptr, p, 0); //调用init函数进行释放
    
}

_int_free()：

函数开头先定义了一些列所需的变量，并获得要释放的 chunk 的大小，并对 chunk 做一些检查。

c 复制代码

static void _int_free (mstate av, mchunkptr p, int have_lock)
{
  INTERNAL_SIZE_T size;        /* its size */
  mfastbinptr *fb;             /* associated fastbin */
  mchunkptr nextchunk;         /* next contiguous chunk */
  INTERNAL_SIZE_T nextsize;    /* its size */
  int nextinuse;               /* true if nextchunk is used */
  INTERNAL_SIZE_T prevsize;    /* size of previous contiguous chunk */
  mchunkptr bck;               /* misc temp for linking */
  mchunkptr fwd;               /* misc temp for linking */

  size = chunksize (p);

  /* Little security check which won't hurt performance: the
     allocator never wrapps around at the end of the address space.
     Therefore we can exclude some size values which might appear
     here by accident or by "design" from some intruder.  */
    
    //第一个条件筛选掉一些特别大的size，第二个条件检查chunk是否对齐
  if (__builtin_expect ((uintptr_t) p > (uintptr_t) -size, 0)
      || __builtin_expect (misaligned_chunk (p), 0))
    malloc_printerr ("free(): invalid pointer");
  /* We know that each chunk is at least MINSIZE bytes in size or a
     multiple of MALLOC_ALIGNMENT.  */
    //分别检查size是否过小以及size是否对其
  if (__glibc_unlikely (size < MINSIZE || !aligned_OK (size)))
    malloc_printerr ("free(): invalid size");

  check_inuse_chunk(av, p); //仅当定义了Malloc_Debug时使用

然后，判断该 chunk 是否在 fast bin 范围内，如果是，则插入 fast bin 中。

c 复制代码

//如果size 小于global_max_fast,则将其加入fast bin中
if ((unsigned long)(size) <= (unsigned long)(get_max_fast ())

#if TRIM_FASTBINS
      /*
	If TRIM_FASTBINS set, don't place chunks
	bordering top into fastbins
      */
    //TRIM_FASTBINS默认为0，不会将与Top chunk靠近的fast bin删掉
      && (chunk_at_offset(p, size) != av->top)
#endif
      ) {

    //检查下一个chunk的大小，不能小于2*SIZE，也不能大于av->system_mem
    if (__builtin_expect (chunksize_nomask (chunk_at_offset (p, size))
			  <= 2 * SIZE_SZ, 0)
	|| __builtin_expect (chunksize (chunk_at_offset (p, size))
			     >= av->system_mem, 0))
      {
	bool fail = true;
	/* We might not have a lock at this point and concurrent modifications
	   of system_mem might result in a false positive.  Redo the test after
	   getting the lock.  */
	if (!have_lock)
	  {
	    __libc_lock_lock (av->mutex);
	    fail = (chunksize_nomask (chunk_at_offset (p, size)) <= 2 * SIZE_SZ
		    || chunksize (chunk_at_offset (p, size)) >= av->system_mem);
	    __libc_lock_unlock (av->mutex);
	  }

	if (fail)
	  malloc_printerr ("free(): invalid next size (fast)");
      }
	//释放之前将user data部分填充为 perturb byte
    free_perturb (chunk2mem(p), size - 2 * SIZE_SZ);
	//在av中设置位，表示fast bin中有 free chunk
    atomic_store_relaxed (&av->have_fastchunks, true);
    unsigned int idx = fastbin_index(size);
    fb = &fastbin (av, idx);
	//通过原子操作将p插入fast bin
    /* Atomically link P to its fastbin: P->FD = *FB; *FB = P;  */
    mchunkptr old = *fb, old2;

    if (SINGLE_THREAD_P)
      {
	/* Check that the top of the bin is not the record we are going to
	   add (i.e., double free).  */
	if (__builtin_expect (old == p, 0)) //简单检查，fast bin中的第一个chunk 是不是刚刚的chunk，防止doubble free
	  malloc_printerr ("double free or corruption (fasttop)");
	p->fd = PROTECT_PTR (&p->fd, old);
	*fb = p;
      }
    else
      do
	{
	  /* Check that the top of the bin is not the record we are going to
	     add (i.e., double free).  */
	  if (__builtin_expect (old == p, 0))
	    malloc_printerr ("double free or corruption (fasttop)");
	  old2 = old;
	  p->fd = PROTECT_PTR (&p->fd, old);
	}
      while ((old = catomic_compare_and_exchange_val_rel (fb, p, old2))
	     != old2);

    /* Check that size of fastbin chunk at the top is the same as
       size of the chunk that we are adding.  We can dereference OLD
       only if we have the lock, otherwise it might have already been
       allocated again.  */
   //再次检查，fast bin
    if (have_lock && old != NULL
	&& __builtin_expect (fastbin_index (chunksize (old)) != idx, 0))
      malloc_printerr ("invalid fastbin entry (free)");
  }

如果该 chunk 并非 mmap() 生成的，就需要进行合并，过程如前面所讲，先向后合并，再向前合并。如果合并之后的 chunk 超过了 FASTBIN_CONSOLIDATION_THRESHOLD ，就会整理 fast bin 并向系统返还内存。 _int_free() 函数到此结束。

c 复制代码

/*
      If freeing a large space, consolidate possibly-surrounding
      chunks. Then, if the total unused topmost memory exceeds trim
      threshold, ask malloc_trim to reduce top.

      Unless max_fast is 0, we don't know if there are fastbins
      bordering top, so we cannot tell for sure whether threshold
      has been reached unless fastbins are consolidated.  But we
      don't want to consolidate on each free.  As a compromise,
      consolidation is performed if FASTBIN_CONSOLIDATION_THRESHOLD
      is reached.
    */

    if ((unsigned long)(size) >= FASTBIN_CONSOLIDATION_THRESHOLD) {
      if (atomic_load_relaxed (&av->have_fastchunks))
	malloc_consolidate(av);

      if (av == &main_arena) {
#ifndef MORECORE_CANNOT_TRIM
	if ((unsigned long)(chunksize(av->top)) >=
	    (unsigned long)(mp_.trim_threshold))
	  systrim(mp_.top_pad, av);
#endif
      } else {
	/* Always try heap_trim(), even if the top chunk is not
	   large, because the corresponding heap might go away.  */
	heap_info *heap = heap_for_ptr(top(av));

	assert(heap->ar_ptr == av);
	heap_trim(heap, mp_.top_pad);
      }
    }

    if (!have_lock)
      __libc_lock_unlock (av->mutex);
  }
  /*
    If the chunk was allocated via mmap, release via munmap().
  */

  else {
    munmap_chunk (p);
  }
}