BetaMao

ptmalloc小记

字数统计: 1.5k阅读时长: 7 min
2018/02/26 Share

本篇记录下学习ptmalloc过程中遇到的疑惑以及编译大佬的文章~

疑惑

  1. lastreminder
    它总是指向切割后的部分,可能在largebin里切割,也可能在topchunk,或者其他,哪里有切割它就被更新指向该处。
  2. unsortedbins
    它会倒着遍历,除非精确匹配,否则没遍历一个,就将其放入对应bin。

ptmalloc fanzine

这几篇文章介绍了一些典型的ptmalloc meta-data攻击技术(来源[2]),编译记录如下,注意平台版本不同可能实现也不同:

munmap madness

分配mmap chunks

使用mmap分配的情形:

  1. 使用非主分配区
  2. 所需chunksize达到mmap分配阈值

释放mmap chunks

thread local caching in glibc malloc

tcache介绍

它被集成到了libc2.26中,它对每个线程增加一个bin缓存,这样能显著地提高性能,默认情况下,每个线程有64个bins,以16(8)递增,mensize从24(12)到1032(516):

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
#if USE_TCACHE
/* We want 64 entries. This is an arbitrary limit, which tunables can reduce. */
# define TCACHE_MAX_BINS 64
# define MAX_TCACHE_SIZE tidx2usize (TCACHE_MAX_BINS-1)

/* Only used to pre-fill the tunables. */
# define tidx2usize(idx) (((size_t) idx) * MALLOC_ALIGNMENT + MINSIZE - SIZE_SZ)

/* When "x" is from chunksize(). */
# define csize2tidx(x) (((x) - MINSIZE + MALLOC_ALIGNMENT - 1) / MALLOC_ALIGNMENT)
/* When "x" is a user-provided size. */
# define usize2tidx(x) csize2tidx (request2size (x))

/* With rounding and alignment, the bins are...
idx 0 bytes 0..24 (64-bit) or 0..12 (32-bit)
idx 1 bytes 25..40 or 13..20
idx 2 bytes 41..56 or 21..28
etc. */

/* This is another arbitrary limit, which tunables can change. Each
tcache bin will hold at most this number of chunks. */
# define TCACHE_FILL_COUNT 7
#endif

每个bin是单链表结构,整个tcache为tcache_perthread_struct

1
2
3
4
5
6
7
8
9
10
11
12
13
14
/* We overlay this structure on the user-data portion of a chunk when the chunk is stored in the per-thread cache.  */
typedef struct tcache_entry
{
struct tcache_entry *next;
} tcache_entry;

/* There is one of these for each thread, which contains the per-thread cache (hence "tcache_perthread_struct"). Keeping overall size low is mildly important. Note that COUNTS and ENTRIES are redundant (we could have just counted the linked list each time), this is for performance reasons. */
typedef struct tcache_perthread_struct
{
char counts[TCACHE_MAX_BINS];
tcache_entry *entries[TCACHE_MAX_BINS];
} tcache_perthread_struct;

static __thread tcache_perthread_struct *tcache = NULL;
tcache使用

chunk进入tcache:

  1. 释放时,_int_free中在检查了size合法后,放入fastbin之前,它先尝试将其放入tcache:
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    #if USE_TCACHE
    {
    size_t tc_idx = csize2tidx (size);

    if (tcache
    && tc_idx < mp_.tcache_bins //大小合适
    && tcache->counts[tc_idx] < mp_.tcache_count) //未满
    {
    tcache_put (p, tc_idx);
    return;
    }
    }
    #endif
    static void tcache_put (mchunkptr chunk, size_t tc_idx)
    {
    tcache_entry *e = (tcache_entry *) chunk2mem (chunk);
    assert (tc_idx < TCACHE_MAX_BINS);
    e->next = tcache->entries[tc_idx];
    tcache->entries[tc_idx] = e; //和fastbin一样,单链表FILO
    ++(tcache->counts[tc_idx]);
    }
  2. _int_malloc中,若fastbins中取出块则将对应bin中其余chunk填入tcache对应项直到填满(smallbins中也是如此):
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
      if ((unsigned long) (nb) <= (unsigned long) (get_max_fast ()))
    {
    idx = fastbin_index (nb);
    mfastbinptr *fb = &fastbin (av, idx);
    mchunkptr pp = *fb;
    REMOVE_FB (fb, victim, pp);
    if (victim != 0)
    {
    if (__builtin_expect (fastbin_index (chunksize (victim)) != idx, 0))
    {
    errstr = "malloc(): memory corruption (fast)";
    errout:
    malloc_printerr (check_action, errstr, chunk2mem (victim), av);
    return NULL;
    }
    check_remalloced_chunk (av, victim, nb);
    #if USE_TCACHE
    /* While we're here, if we see other chunks of the same size,
    stash them in the tcache. */
    size_t tc_idx = csize2tidx (nb);
    if (tcache && tc_idx < mp_.tcache_bins)
    {
    mchunkptr tc_victim;

    /* While bin not empty and tcache not full, copy chunks over. */
    while (tcache->counts[tc_idx] < mp_.tcache_count
    && (pp = *fb) != NULL)
    {
    REMOVE_FB (fb, tc_victim, pp);
    if (tc_victim != 0)
    {
    tcache_put (tc_victim, tc_idx);
    }
    }
    }
    #endif
    void *p = chunk2mem (victim);
    alloc_perturb (p, bytes);
    return p;
    }
    }
  3. 当进入unsorted bin中找到精确的大小时,并不是直接返回而是先加入tcache中,直到填满:
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
        while(遍历unsorted bin){
    ....
    //当精确匹配
    if (size == nb)
    {
    set_inuse_bit_at_offset (victim, size);
    if (av != &main_arena)
    set_non_main_arena (victim);
    #if USE_TCACHE
    /* Fill cache first, return to user only if cache fills.
    We may return one of these chunks later. */
    if (tcache_nb
    && tcache->counts[tc_idx] < mp_.tcache_count)
    {
    tcache_put (victim, tc_idx);
    return_cached = 1;
    continue;
    }
    else
    {
    #endif //满了才直接返回
    check_malloced_chunk (av, victim, nb);
    void *p = chunk2mem (victim);
    alloc_perturb (p, bytes);
    return p;
    #if USE_TCACHE
    }
    #endif
    }
    }

从tcache获取chunk:

  1. __libc_malloc_int_malloc之前
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    void * __libc_malloc (size_t bytes)
    {
    mstate ar_ptr;
    void *victim;

    //hook();
    #if USE_TCACHE
    /* int_free also calls request2size, be careful to not pad twice. */
    size_t tbytes = request2size (bytes);
    size_t tc_idx = csize2tidx (tbytes);

    MAYBE_INIT_TCACHE ();

    DIAG_PUSH_NEEDS_COMMENT;
    if (tc_idx < mp_.tcache_bins
    /*&& tc_idx < TCACHE_MAX_BINS*/ /* to appease gcc */
    && tcache
    && tcache->entries[tc_idx] != NULL)
    {
    return tcache_get (tc_idx);
    }
    DIAG_POP_NEEDS_COMMENT;
    #endif

    arena_get (ar_ptr, bytes);

    victim = _int_malloc (ar_ptr, bytes);
  2. 在遍历完unsorted bin之后,若是有缓存则取出并返回:
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
          while ((victim = unsorted_chunks (av)->bk) != unsorted_chunks (av))
    {
    //遍历~~~~~
    }

    #if USE_TCACHE
    /* If all the small chunks we found ended up cached, return one now. */
    if (return_cached)
    {
    return tcache_get (tc_idx);
    }
    #endif
  3. 在遍历unsorted bin时,大小不匹配的chunk将会被放入对应的bins,若达到tcache_unsorted_limit限制且之前已经存入过chunk则在此时取出(默认无限制):
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
          while ((victim = unsorted_chunks (av)->bk) != unsorted_chunks (av))
    {
    //遍历~~~~~
    //大小不等时,放入对应bin~
    #if USE_TCACHE
    /* If we've processed as many chunks as we're allowed while
    filling the cache, return one of the cached ones. */
    ++tcache_unsorted_count;
    if (return_cached
    && mp_.tcache_unsorted_limit > 0
    && tcache_unsorted_count > mp_.tcache_unsorted_limit)
    {
    return tcache_get (tc_idx);
    }
    #endif

    #define MAX_ITERS 10000
    if (++iters >= MAX_ITERS)
    break;
    }

对旧的利用技术的影响

由上可知tcache发生在比较前面,在它之前只有size等很少的完整性校验(只有存入前有size >= MINSIZE && aligned_OK (size) && !misaligned_chunk (p) && (uintptr_t) p <= (uintptr_t) -size),而它本身并没有什么完整性校验,于是利用它进行攻击会简单很多。

house of spirit

在之前,这种利用手段主要用在fastbin,因为它的检查要少很多,包括要释放的chunk大小正确且属于fastbin,下一个chunk大小要适中,不能小于2*SIZE_SZ且不能大于已分配内存。small的检查更多,但是若使用tcache,就只需要关心当前chunk的地址及大小,大小也覆盖到了更多smallbin的范围了。

double free

和fastbin一样,但是fastbin不能连续释放同一个chunk,而它无这种限制而且它适用更大的chunk。

Overlapping chunks

若能更改指定chunk的size,增加其值,那么在释放后再分配对应大小就能控制其后的chunk了。

tcache poisoning

对于已经在tcache里面的chunk,更改它的fd值即可在malloc时分配任意地址!

Smallbin cache filling bck write

一直存在,在unsorted bin的unlink中,它是最原始的unlink未进行其他检查(如bck->fd != victim),这样又可以进行DWORD SHOOT啦:

1
2
unsorted_chunks (av)->bk = bck;
bck->fd = unsorted_chunks (av);

参考

[1]华庭- 《Ptmalloc2 源代码分析》
[2]andigena-《ptmalloc fanzine
[3]glibc wiki malloc internals

CATALOG
  1. 1. 疑惑
  2. 2. ptmalloc fanzine
    1. 2.1. munmap madness
      1. 2.1.1. 分配mmap chunks
      2. 2.1.2. 释放mmap chunks
    2. 2.2. thread local caching in glibc malloc
      1. 2.2.1. tcache介绍
        1. 2.2.1.1. tcache使用
      2. 2.2.2. 对旧的利用技术的影响
        1. 2.2.2.1. house of spirit
        2. 2.2.2.2. double free
        3. 2.2.2.3. Overlapping chunks
        4. 2.2.2.4. tcache poisoning
        5. 2.2.2.5. Smallbin cache filling bck write
  3. 3. 参考