glibc2.31,但是适用于glibc2.34及以前的版本

概述

线程退出的时候,需要释放掉这个线程所占有的资源,并且停止内核和CPU对其的调度,才算是结束了这个线程。换句话说,线程退出需要用户层面和内核层面的共同工作。内核层面,glibc通过_exit()系统调用来终止线程,用户层面则通过exit()函数。在pwn中,系统调用很难被利用,我们通常重点关注exit函数。

源码分析

exit.c中定义了exit函数和__run_exit_handlers函数,其中exit函数其实就是对__run_exit_handlers的封装:

1
2
3
4
5
void
exit (int status)
{
__run_exit_handlers (status, &__exit_funcs, true, true);
}

可以看到exit函数调用了__run_exit_handlers函数并以__exit_funcs等变量为参数。在查看核心函数之前,先来关注与其相关的数据结构。

相关数据结构

我们追溯__exit_funcs,可以发现其在cxa_atexit.c中有定义:

1
2
static struct exit_function_list initial;
struct exit_function_list *__exit_funcs = &initial;

在exit.h中对exit_function_list结构体有定义:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
enum
{
ef_free, /* `ef_free' MUST be zero! */
ef_us,
ef_on,
ef_at,
ef_cxa
};

struct exit_function
{
/* `flavour' should be of type of the `enum' above but since we need
this element in an atomic operation we have to use `long int'. */
long int flavor;
union
{
void (*at)(void);
struct
{
void (*fn)(int status, void *arg);
void *arg;
} on;
struct
{
void (*fn)(void *arg, int status);
void *arg;
void *dso_handle;
} cxa;
} func;
};

struct exit_function_list
{
struct exit_function_list *next;
size_t idx;
struct exit_function fns[32];
};

exit执行时会进行一些析构工作,exit_function结构体是对单个析构函数的描述,其中flavour变量描述的是函数类型:

  • ef_free: 此位置空闲(没有析构函数)
  • ef_us: 此位置函数使用中,但尚未写入函数指针,类型未知等待写入
  • ef_at: 没有参数的函数
  • ef_on: 先传入状态码再传入第二参数的函数
  • ef_cxa: 先传入第一参数后状态码的函数

因为单个函数只会有一种类型,所以使用了联合体。而exit_function_list是维护多个析构函数的单链表,idx储存的是函数个数。而initial变量则是exit的第一个析构函数链表,储存在libc的.data段,而往后的其他链表节点则是通过malloc分配的。

__run_exit_handlers()

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
void
attribute_hidden
__run_exit_handlers(int status, struct exit_function_list **listp,
bool run_list_atexit, bool run_dtors)
{
/* First, call the TLS destructors. */
#ifndef SHARED
if (&__call_tls_dtors != NULL)
#endif
if (run_dtors)
__call_tls_dtors(); //首先释放tls

/* We do it this way to handle recursive calls to exit () made by
the functions registered with `atexit' and `on_exit'. We call
everyone on the list and use the status value in the last
exit (). */
while (true) //析构函数处理大循环
{
struct exit_function_list *cur;

__libc_lock_lock(__exit_funcs_lock); //给互斥锁上锁

restart:
cur = *listp; //最开始的就是initial变量

if (cur == NULL)
{
/* Exit processing complete. We will not allow any more
atexit/on_exit registrations. */
__exit_funcs_done = true;
__libc_lock_unlock(__exit_funcs_lock); //给互斥锁解锁
break;
}

while (cur->idx > 0)
{
struct exit_function *const f = &cur->fns[--cur->idx];
const uint64_t new_exitfn_called = __new_exitfn_called;

/* Unlock the list while we call a foreign function. */
__libc_lock_unlock(__exit_funcs_lock);
switch (f->flavor)
{
void (*atfct)(void);
void (*onfct)(int status, void *arg);
void (*cxafct)(void *arg, int status);

case ef_free:
case ef_us:
break;
case ef_on:
onfct = f->func.on.fn; //绑定on类型的函数指针
#ifdef PTR_DEMANGLE
PTR_DEMANGLE(onfct); //用宏来解密函数指针
#endif
onfct(status, f->func.on.arg); //执行相应析构函数
break;
case ef_at:
atfct = f->func.at;
#ifdef PTR_DEMANGLE
PTR_DEMANGLE(atfct);
#endif
atfct();
break;
case ef_cxa:
/* To avoid dlclose/exit race calling cxafct twice (BZ 22180),
we must mark this function as ef_free. */
f->flavor = ef_free; //为了防止条件竞争,执行过的cxa类型析构函数之后会被置free
cxafct = f->func.cxa.fn;
#ifdef PTR_DEMANGLE
PTR_DEMANGLE(cxafct);
#endif
cxafct(f->func.cxa.arg, status);
break;
}
/* Re-lock again before looking at global state. */
__libc_lock_lock(__exit_funcs_lock);

if (__glibc_unlikely(new_exitfn_called != __new_exitfn_called))
/* The last exit function, or another thread, has registered
more exit functions. Start the loop over. */
goto restart;
}

*listp = cur->next; //切换到下一个节点
if (*listp != NULL)
/* Don't free the last element in the chain, this is the statically
allocate element. */
free(cur); //释放除了第一个以外的其他节点

__libc_lock_unlock(__exit_funcs_lock);
}

if (run_list_atexit)
RUN_HOOK(__libc_atexit, ());

_exit(status); //系统调用,内核exit
}

TLS:Thread Local Storage,是一种数据的储存方式,作用是保持数据在线程内全局可访问,而不能被其他线程访问到。

__run_exit_handlers函数的大致流程就是,先进行两个条件判断,成立则调用__call_tls_dtors函数释放tls。我们来追踪一下这个函数(在cxa_thread_atexit_impl.c中)。

__call_tls_dtors()

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
/* Call the destructors.  This is called either when a thread returns from the
initial function or when the process exits via the exit function. */
void
__call_tls_dtors (void)
{
while (tls_dtor_list)
{
struct dtor_list *cur = tls_dtor_list;
dtor_func func = cur->func;
#ifdef PTR_DEMANGLE
PTR_DEMANGLE (func);
#endif

tls_dtor_list = tls_dtor_list->next;
func (cur->obj);

/* Ensure that the MAP dereference happens before
l_tls_dtor_count decrement. That way, we protect this access from a
potential DSO unload in _dl_close_worker, which happens when
l_tls_dtor_count is 0. See CONCURRENCY NOTES for more detail. */
atomic_fetch_add_release (&cur->map->l_tls_dtor_count, -1);
free (cur);
}
}

如果tls_dtor_list不为空,则解密每个节点中的func函数指针,并以obj成员为参数执行。所以其实可以劫持tls_dtor_list写入system和binsh,前提是要泄露解密的key,这个后面再看。我们先来看tls_dtor_list的原型:

1
2
3
4
5
6
7
8
9
10
typedef void (*dtor_func) (void *);
struct dtor_list
{
dtor_func func;
void *obj;
struct link_map *map;
struct dtor_list *next;
};

static __thread struct dtor_list *tls_dtor_list;

那么也就是如果有任意写的机会,可以往tls_dtor_list里写加密后的system地址到func和binsh地址到obj。顺带一提,这个攻击方法在2.35也适用。

但是一般做题的时候pwndbg并不能读取到tls_dtor_list的符号,所以只能通过其他途径来找到其所在处。我们将libc扔到ida查找一下这个函数,看看汇编层面这个函数是如何运行的。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
.text:0000000000047280                 public __call_tls_dtors
.text:0000000000047280 __call_tls_dtors proc near ; CODE XREF: sub_467B0:loc_46A08↑p
.text:0000000000047280 ; DATA XREF: LOAD:000000000000E420↑o
.text:0000000000047280 ; __unwind {
.text:0000000000047280 endbr64
.text:0000000000047284 push rbp
.text:0000000000047285 push rbx
.text:0000000000047286 sub rsp, 8
.text:000000000004728A mov rbx, cs:off_1EBD60
.text:0000000000047291 mov rbp, fs:[rbx]
.text:0000000000047295 test rbp, rbp
.text:0000000000047298 jz short loc_472DD
.text:000000000004729A nop word ptr [rax+rax+00h]
.text:00000000000472A0
.text:00000000000472A0 loc_472A0: ; CODE XREF: __call_tls_dtors+5B↓j
.text:00000000000472A0 mov rdx, [rbp+18h]
.text:00000000000472A4 mov rax, [rbp+0]
.text:00000000000472A8 ror rax, 11h
.text:00000000000472AC xor rax, fs:30h
.text:00000000000472B5 mov fs:[rbx], rdx
.text:00000000000472B9 mov rdi, [rbp+8]
.text:00000000000472BD call rax
.text:00000000000472BF mov rax, [rbp+10h]
.text:00000000000472C3 lock sub qword ptr [rax+460h], 1
.text:00000000000472CC mov rdi, rbp
.text:00000000000472CF call j_free
.text:00000000000472D4 mov rbp, fs:[rbx]
.text:00000000000472D8 test rbp, rbp
.text:00000000000472DB jnz short loc_472A0
.text:00000000000472DD
.text:00000000000472DD loc_472DD: ; CODE XREF: __call_tls_dtors+18↑j
.text:00000000000472DD add rsp, 8
.text:00000000000472E1 pop rbx
.text:00000000000472E2 pop rbp
.text:00000000000472E3 retn
.text:00000000000472E3 ; } // starts at 47280
.text:00000000000472E3 __call_tls_dtors endp

第9行给rbx赋了一个地址,然后将fs+[rbx]赋给了rbp,此时rbp就指向tls_dtor_list。第11行的test就是在检查其是否为空。第17行,将func赋给rax,然后右循环位移17位,取fs+0x30的内容进行异或。这就完成了函数指针的解密操作。后面就是传入参数并执行了。

fs+0x30是个啥呢?fs是段寄存器,里面存放着GDT表的索引,linux会让fs指向当前线程的控制块,也就是tcbhead_t结构体,很多和线程相关的全局变量会储存在这个结构体当中,其定义在tls.h中:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
typedef struct
{
void *tcb; /* Pointer to the TCB. Not necessarily the
thread descriptor used by libpthread. */
dtv_t *dtv;
void *self; /* Pointer to the thread descriptor. */
int multiple_threads;
int gscope_flag;
uintptr_t sysinfo;
uintptr_t stack_guard;
uintptr_t pointer_guard;
unsigned long int vgetcpu_cache[2];
/* Bit 0: X86_FEATURE_1_IBT.
Bit 1: X86_FEATURE_1_SHSTK.
*/
unsigned int feature_1;
int __glibc_unused1;
/* Reservation of some values for the TM ABI. */
void *__private_tm[4];
/* GCC split stack support. */
void *__private_ss;
/* The lowest address of shadow stack, */
unsigned long long int ssp_base;
/* Must be kept even if it is no longer used by glibc since programs,
like AddressSanitizer, depend on the size of tcbhead_t. */
__128bits __glibc_unused2[8][4] __attribute__ ((aligned (32)));

void *__padding[8];
} tcbhead_t;

pointer_guard就是我们要找的fs:0x30,而stack_guard就是常见的canary,也就是fs:0x28。在pwndbg中查找这个结构体地址的方法也很简单,可以通过符号搜索,也可以直接输入tls作为指令,就可以输出其地址了,计算一下偏移即可。

PTR_DEMANGLE

顺便也来看一下PTR_DEMANGLE这个用来解密的宏定义:

1
2
3
4
5
6
7
8
9
10
11
12
13
#  define PTR_MANGLE(var)	asm ("xor %%fs:%c2, %0\n"		      \
"rol $2*" LP_SIZE "+1, %0" \
: "=r" (var) \
: "0" (var), \
"i" (offsetof (tcbhead_t, \
pointer_guard)))

# define PTR_DEMANGLE(var) asm ("ror $2*" LP_SIZE "+1, %0\n" \
"xor %%fs:%c2, %0" \
: "=r" (var) \
: "0" (var), \
"i" (offsetof (tcbhead_t, \
pointer_guard)))

libc在x86_64/sysdep.h中很简单粗暴地直接用汇编来写解密过程了,其中LP_SIZE定义为8。

已存在的析构函数?

libc封装了一系列函数到atexit()函数,用来添加exit的析构函数,或者说,用来注册exit的析构函数。定义如下,但这里不展开叙述了:

1
2
3
4
int atexit(void (*func)(void))
{
return __cxa_atexit((void (*)(void *))func, NULL, &__dso_handle == NULL ? NULL : __dso_handle);
}

问题在于,就算我们写程序的时候没有使用这个函数来添加析构函数,程序exit的时候也会执行一些析构函数。显然肯定是有谁注册了这些析构函数,并且是早在程序启动还没开始执行main时就注册完成了。

程序逆向得多了的话,应该不难发现每个elf文件都有个_start()函数,并且这个函数还占据.text段的第一个位置。显而易见,这个函数才是一个elf文件真正的入口。那么这个函数干了什么事呢?

_start()

这个函数的源码在glibc中是以汇编的形式出现的,在sysdeps/x86_64/start.S中:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
ENTRY (_start) //告诉编译器,这是程序入口
/* Clearing frame pointer is insufficient, use CFI. */
cfi_undefined (rip)
/* Clear the frame pointer. The ABI suggests this be done, to mark
the outermost frame obviously. */
xorl %ebp, %ebp //初始化栈底

/* Extract the arguments as encoded on the stack and set up
the arguments for __libc_start_main (int (*main) (int, char **, char **),
int argc, char *argv,
void (*init) (void), void (*fini) (void),
void (*rtld_fini) (void), void *stack_end).
The arguments are passed via registers and on the stack:
main: %rdi
argc: %rsi
argv: %rdx
init: %rcx
fini: %r8
rtld_fini: %r9
stack_end: stack. */ /*这里写的是__libc_start_main函数的传参寄存器*/

mov %RDX_LP, %R9_LP /* Address of the shared library termination function. *//*设置参数rtld_fini*/
#ifdef __ILP32__
mov (%rsp), %esi /* Simulate popping 4-byte argument count. */
add $4, %esp
#else
popq %rsi /* Pop the argument count. */ //设置参数argc
#endif
/* argv starts just at the current stack top. */
mov %RSP_LP, %RDX_LP //设置参数argv
/* Align the stack to a 16 byte boundary to follow the ABI. */
and $~15, %RSP_LP //rsp对齐

/* Push garbage because we push 8 more bytes. */
pushq %rax

/* Provide the highest stack address to the user code (for stacks
which grow downwards). */
pushq %rsp

#ifdef PIC
/* Pass address of our own entry points to .fini and .init. */
//设置参数init和fini
mov __libc_csu_fini@GOTPCREL(%rip), %R8_LP
mov __libc_csu_init@GOTPCREL(%rip), %RCX_LP

mov main@GOTPCREL(%rip), %RDI_LP //设置参数main函数地址
#else
/* Pass address of our own entry points to .fini and .init. */
mov $__libc_csu_fini, %R8_LP
mov $__libc_csu_init, %RCX_LP

mov $main, %RDI_LP
#endif

/* Call the user's main function, and exit with its value.
But let the libc call main. Since __libc_start_main in
libc.so is called very early, lazy binding isn't relevant
here. Use indirect branch via GOT to avoid extra branch
to PLT slot. In case of static executable, ld in binutils
2.26 or above can convert indirect branch into direct
branch. */
//调用
call *__libc_start_main@GOTPCREL(%rip)

hlt /* Crash if somehow `exit' does return. */
END (_start)

本人不是很习惯看att风格的汇编,我们对照ida里的汇编来看。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
.text:0000000000401140                 public _start
.text:0000000000401140 _start proc near ; DATA XREF: LOAD:00000000003FE018↑o
.text:0000000000401140 ; __unwind {
.text:0000000000401140 endbr64
.text:0000000000401144 xor ebp, ebp
.text:0000000000401146 mov r9, rdx ; rtld_fini
.text:0000000000401149 pop rsi ; argc
.text:000000000040114A mov rdx, rsp ; ubp_av
.text:000000000040114D and rsp, 0FFFFFFFFFFFFFFF0h
.text:0000000000401151 push rax
.text:0000000000401152 push rsp ; stack_end
.text:0000000000401153 mov r8, offset __libc_csu_fini ; fini
.text:000000000040115A mov rcx, offset __libc_csu_init ; init
.text:0000000000401161 mov rdi, offset main ; main
.text:0000000000401168 call cs:__libc_start_main_ptr
.text:000000000040116E hlt
.text:000000000040116E ; } // starts at 401140
.text:000000000040116E _start endp

会发现,start函数就调用了一个__libc_start_main函数,这个函数一旦返回,就直接hlt停机了。那么就能先大致推测,main函数是由__libc_start_main函数引导启动的,其他程序的准备包括析构函数的注册也是在此进行。

libc_start_main()

我们追踪这个函数,在libc-start.c中有定义:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
STATIC int
LIBC_START_MAIN (int (*main) (int, char **, char ** MAIN_AUXVEC_DECL),
int argc, char **argv,
#ifdef LIBC_START_MAIN_AUXVEC_ARG
ElfW(auxv_t) *auxvec,
#endif
__typeof (main) init,
void (*fini) (void),
void (*rtld_fini) (void), void *stack_end)
{
/* Result of the 'main' function. */
int result; //保存main函数的返回地址

__libc_multiple_libcs = &_dl_starting_up && !_dl_starting_up;

#ifndef SHARED
_dl_relocate_static_pie (); //设置程序基址,重新定位

char **ev = &argv[argc + 1]; //保存环境变量

__environ = ev; //这个就是ssp攻击中常见的获取栈地址的环境变量

/* Store the lowest stack address. This is done in ld.so if this is
the code for the DSO. */
__libc_stack_end = stack_end; //保存栈底地址

...//省略了一些看不懂且应该没啥用的代码

/* Initialize very early so that tunables can use it. */
__libc_init_secure ();

__tunables_init (__environ);

ARCH_INIT_CPU_FEATURES ();

/* Perform IREL{,A} relocations. */
ARCH_SETUP_IREL ();

/* The stack guard goes into the TCB, so initialize it early. */
ARCH_SETUP_TLS ();

/* In some architectures, IREL{,A} relocations happen after TLS setup in
order to let IFUNC resolvers benefit from TCB information, e.g. powerpc's
hwcap and platform fields available in the TCB. */
ARCH_APPLY_IREL ();

/* Set up the stack checker's canary. */
uintptr_t stack_chk_guard = _dl_setup_stack_chk_guard (_dl_random); //生成canary,然后保存
# ifdef THREAD_SET_STACK_GUARD
THREAD_SET_STACK_GUARD (stack_chk_guard);
# else
__stack_chk_guard = stack_chk_guard;
# endif

# ifdef DL_SYSDEP_OSCHECK
if (!__libc_multiple_libcs)
{
/* This needs to run to initiliaze _dl_osversion before TLS
setup might check it. */
DL_SYSDEP_OSCHECK (__libc_fatal);
}
# endif

/* Initialize libpthread if linked in. */
if (__pthread_initialize_minimal != NULL)
__pthread_initialize_minimal (); //初始化TLS

/* Set up the pointer guard value. */
uintptr_t pointer_chk_guard = _dl_setup_pointer_guard (_dl_random,
stack_chk_guard); //生成pointer_guard,并保存
# ifdef THREAD_SET_POINTER_GUARD
THREAD_SET_POINTER_GUARD (pointer_chk_guard);
# else
__pointer_chk_guard_local = pointer_chk_guard;
# endif

#endif /* !SHARED */

/* Register the destructor of the dynamic linker if there is any. */
if (__glibc_likely (rtld_fini != NULL))
__cxa_atexit ((void (*) (void *)) rtld_fini, NULL, NULL); //注册ld的析构函数

#ifndef SHARED
/* Call the initializer of the libc. This is only needed here if we
are compiling for the static library in which case we haven't
run the constructors in `_dl_start_user'. */
__libc_init_first (argc, argv, __environ); //初始化libc

/* Register the destructor of the program, if any. */
if (fini)
__cxa_atexit ((void (*) (void *)) fini, NULL, NULL); //注册fini函数

/* Some security at this point. Prevent starting a SUID binary where
the standard file descriptors are not opened. We have to do this
only for statically linked applications since otherwise the dynamic
loader did the work already. */
if (__builtin_expect (__libc_enable_secure, 0))
__libc_check_standard_fds ();
#endif

/* Call the initializer of the program, if any. */
#ifdef SHARED
if (__builtin_expect (GLRO(dl_debug_mask) & DL_DEBUG_IMPCALLS, 0))
GLRO(dl_debug_printf) ("\ninitialize program: %s\n\n", argv[0]);
#endif
if (init)
(*init) (argc, argv, __environ MAIN_AUXVEC_PARAM); //调用构造函数init

... //省略了一些代码,大概是对线程的初始化和一些维护工作

/* Nothing fancy, just call the function. */
result = main (argc, argv, __environ MAIN_AUXVEC_PARAM); //调用main函数
#endif

exit (result); //如果main函数返回了,会先返回到这里,这个函数帮main调用exit函数来终止线程。
}

现在我们清楚了一件事:libc_start_main函数确实注册了几个析构函数,还调用了个构造函数。根据注释我们可以知道,rtld_fini函数是ld的析构函数,fini是elf的析构函数。

奇怪的是,elf当中的fini函数是一个空函数,直接retn了,什么都没有。而调用的那个init则负责遍历.init_array段,实现段上多个构造函数的调用。而和.init_array挨得很近的.fini_array段,则由ld的rtld_fini负责,显然elf中的fini函数本身没什么用处。

_dl_fini()

rtld_fini实际指向_dl_fini函数,我们跟踪一下这个函数,在elf/dl-fini.c中有定义:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
void
_dl_fini (void)
{
/* Lots of fun ahead. We have to call the destructors for all still
loaded objects, in all namespaces. The problem is that the ELF
specification now demands that dependencies between the modules
are taken into account. I.e., the destructor for a module is
called before the ones for any of its dependencies.

To make things more complicated, we cannot simply use the reverse
order of the constructors. Since the user might have loaded objects
using `dlopen' there are possibly several other modules with its
dependencies to be taken into account. Therefore we have to start
determining the order of the modules once again from the beginning. */

/* We run the destructors of the main namespaces last. As for the
other namespaces, we pick run the destructors in them in reverse
order of the namespace ID. */
#ifdef SHARED
int do_audit = 0;
again:
#endif
for (Lmid_t ns = GL(dl_nns) - 1; ns >= 0; --ns) //遍历_rtld_global中的所有非共享模块: _dl_ns[DL_NNS]
{
/* Protect against concurrent loads and unloads. */
__rtld_lock_lock_recursive (GL(dl_load_lock)); //对rtld_global上锁

unsigned int nloaded = GL(dl_ns)[ns]._ns_nloaded;
/* No need to do anything for empty namespaces or those used for
auditing DSOs. */
if (nloaded == 0
#ifdef SHARED
|| GL(dl_ns)[ns]._ns_loaded->l_auditing != do_audit
#endif
)
__rtld_lock_unlock_recursive (GL(dl_load_lock)); //如果这个命名空间中没有模块,则直接解锁
else //否则遍历模块
{
/* Now we can allocate an array to hold all the pointers and
copy the pointers in. */
struct link_map *maps[nloaded]; //把这个命名空间中的所有模块指针, 都复制到maps数组中

unsigned int i;
struct link_map *l;
assert (nloaded != 0 || GL(dl_ns)[ns]._ns_loaded == NULL);
for (l = GL(dl_ns)[ns]._ns_loaded, i = 0; l != NULL; l = l->l_next)
/* Do not handle ld.so in secondary namespaces. */
if (l == l->l_real)
{
assert (i < nloaded);

maps[i] = l;
l->l_idx = i;
++i;

/* Bump l_direct_opencount of all objects so that they
are not dlclose()ed from underneath us. */
++l->l_direct_opencount;
}
assert (ns != LM_ID_BASE || i == nloaded);
assert (ns == LM_ID_BASE || i == nloaded || i == nloaded - 1);
unsigned int nmaps = i; //模块数量

/* Now we have to do the sorting. We can skip looking for the
binary itself which is at the front of the search list for
the main namespace. */
_dl_sort_maps (maps + (ns == LM_ID_BASE), nmaps - (ns == LM_ID_BASE),
NULL, true); //排序确定析构顺序

/* We do not rely on the linked list of loaded object anymore
from this point on. We have our own list here (maps). The
various members of this list cannot vanish since the open
count is too high and will be decremented in this loop. So
we release the lock so that some code which might be called
from a destructor can directly or indirectly access the
lock. */
__rtld_lock_unlock_recursive (GL(dl_load_lock)); //解锁

/* 'maps' now contains the objects in the right order. Now
call the destructors. We have to process this array from
the front. */
for (i = 0; i < nmaps; ++i) //按顺序析构每一个模块
{
struct link_map *l = maps[i];

if (l->l_init_called)
{
/* Make sure nothing happens if we are called twice. */
l->l_init_called = 0;

/* Is there a destructor function? */
/* 是否包含fini_array节, 或者fini节 */
if (l->l_info[DT_FINI_ARRAY] != NULL
|| l->l_info[DT_FINI] != NULL)
{
/* When debugging print a message first. */
if (__builtin_expect (GLRO(dl_debug_mask)
& DL_DEBUG_IMPCALLS, 0))
_dl_debug_printf ("\ncalling fini: %s [%lu]\n\n",
DSO_FILENAME (l->l_name),
ns);

/* First see whether an array is given. */
if (l->l_info[DT_FINI_ARRAY] != NULL)
{
/*
l->l_addr: 模块l的加载基地址
l->l_info[DT_FINI_ARRAY]: 模块l中fini_array节的描述符
l->l_info[DT_FINI_ARRAY]->d_un.d_ptr: 模块l中fini_arrary节的偏移
array: 为模块l的fini_array节的内存地址
*/
ElfW(Addr) *array =
(ElfW(Addr) *) (l->l_addr
+ l->l_info[DT_FINI_ARRAY]->d_un.d_ptr);
/*
ELF中 fini_arraysz节用来记录fini_array节的大小
l->l_info[DT_FINI_ARRAYSZ]: 模块l中fini_arraysz节描述符
l->l_info[DT_FINI_ARRAYSZ]->d_un.d_val: 就是fini_array节的大小, 以B为单位
i: fini_array节的大小/一个指针大小, 即fini_array中有多少个析构函数
*/
unsigned int i = (l->l_info[DT_FINI_ARRAYSZ]->d_un.d_val
/ sizeof (ElfW(Addr)));
while (i-- > 0) //从后往前调用析构函数
((fini_t) array[i]) ();
}

/* Next try the old-style destructor. */
if (l->l_info[DT_FINI] != NULL) //调用fini段中的函数
DL_CALL_DT_FINI
(l, l->l_addr + l->l_info[DT_FINI]->d_un.d_ptr);
}

...

/* Correct the previous increment. */
--l->l_direct_opencount;
}
}
}

...
}

是不是看的一脸懵逼?根据注释我们可以发现ld还调用了一个叫_dl_open的函数。这个函数干了什么事呢?我们知道动态链接的elf文件会共享libc里的符号,把libc里的符号加载到不同进程的过程叫映射,映射工作由动态链接器也就是ld来完成。我们把进程空间中的每个单独文件称之为模块。ld则调用了_dl_open函数来映射这些模块到进程空间中,并且把所有映射的文件都记录在__rtld_global中。同样的,当进程结束时,ld还要负责用_dl_close()卸载这些模块。

__rtld_global结构体

我们追踪一下这个结构体,在ldsodefs.h中有定义:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
struct rtld_global
{
#endif
/* Don't change the order of the following elements. 'dl_loaded'
must remain the first element. Forever. */

/* Non-shared code has no support for multiple namespaces. */
#ifdef SHARED
# define DL_NNS 16
#else
# define DL_NNS 1
#endif
EXTERN struct link_namespaces
{
/* A pointer to the map for the main map. */
struct link_map *_ns_loaded; //每个模块用_ns_loaded描述, 这个命名空间中所映射的模块组成一个双向链表, _ns_loaded就是这个链表的指针
/* Number of object in the _dl_loaded list. */
unsigned int _ns_nloaded; //模块数量
/* Direct pointer to the searchlist of the main object. */
struct r_scope_elem *_ns_main_searchlist; //映射模块的搜索表
/* This is zero at program start to signal that the global scope map is
allocated by rtld. Later it keeps the size of the map. It might be
reset if in _dl_close if the last global object is removed. */
unsigned int _ns_global_scope_alloc;

/* During dlopen, this is the number of objects that still need to
be added to the global scope map. It has to be taken into
account when resizing the map, for future map additions after
recursive dlopen calls from ELF constructors. */
unsigned int _ns_global_scope_pending_adds;

/* Search table for unique objects. */
struct unique_sym_table //这个命名空间中的符号表, 单个命名空间中的符号不允许重复
{
__rtld_lock_define_recursive (, lock)
struct unique_sym
{
uint32_t hashval; //符号的哈希值
const char *name; //符号名称
const ElfW(Sym) *sym; //符号
const struct link_map *map; //所属模块
} *entries; //索引指针
size_t size; //元素数量
size_t n_elements;
void (*free) (void *); //析构函数
} _ns_unique_sym_table;
/* Keep track of changes to each namespace' list. */
struct r_debug _ns_debug;
} _dl_ns[DL_NNS]; //一个命名空间一个link_namespace结构体
/* One higher than index of last used namespace. */
EXTERN size_t _dl_nns; //使用了多少个命名空间

/* During the program run we must not modify the global data of
loaded shared object simultanously in two threads. Therefore we
protect `_dl_open' and `_dl_close' in dl-close.c.

This must be a recursive lock since the initializer function of
the loaded object might as well require a call to this function.
At this time it is not anymore a problem to modify the tables. */
__rtld_lock_define_recursive (EXTERN, _dl_load_lock)
/* This lock is used to keep __dl_iterate_phdr from inspecting the
list of loaded objects while an object is added to or removed
from that list. */
__rtld_lock_define_recursive (EXTERN, _dl_load_write_lock)

...
};

extern struct rtld_global _rtld_global __rtld_global_attribute__;

定义很复杂,我们只关注想要看的部分。

  • ns代表着NameSpace
  • nns代表着Num of NameSpace
  • struct rtld_global先以命名空间为单位建立了一个数组 dl_ns[DL_NNS]
  • _在每个命名空间内部加载的模块以双向链表组织, 通过_ns_loaded索引
  • 同时每个命名空间内部又有一个符号表_ns_unique_sym_table, 记录着所有模块导出的符号集合

再来关注link_map结构体:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
struct link_map
{
/* These first few members are part of the protocol with the debugger.
This is the same format used in SVR4. */
//模块的基地址
ElfW(Addr) l_addr; /* Difference between the address in the ELF file and the addresses in memory. */ //模块的基地址
char *l_name; /* Absolute file name object was found in. */ //模块的文件名
ElfW(Dyn) *l_ld; /* Dynamic section of the shared object. */ //指向elf的dyn节
struct link_map *l_next, *l_prev; /* Chain of loaded objects. */

/* All following members are internal to the dynamic linker.
They may change without notice. */

/* This is an element which is only ever different from a pointer to
the very same copy of this type for ld.so when it is used in more
than one namespace. */
struct link_map *l_real;

/* Number of the namespace this link map belongs to. */
Lmid_t l_ns; //模块所属命名空间的idx

struct libname_list *l_libname;
/* Indexed pointers to dynamic section.
[0,DT_NUM) are indexed by the processor-independent tags.
[DT_NUM,DT_NUM+DT_THISPROCNUM) are indexed by the tag minus DT_LOPROC.
[DT_NUM+DT_THISPROCNUM,DT_NUM+DT_THISPROCNUM+DT_VERSIONTAGNUM) are
indexed by DT_VERSIONTAGIDX(tagvalue).
[DT_NUM+DT_THISPROCNUM+DT_VERSIONTAGNUM,
DT_NUM+DT_THISPROCNUM+DT_VERSIONTAGNUM+DT_EXTRANUM) are indexed by
DT_EXTRATAGIDX(tagvalue).
[DT_NUM+DT_THISPROCNUM+DT_VERSIONTAGNUM+DT_EXTRANUM,
DT_NUM+DT_THISPROCNUM+DT_VERSIONTAGNUM+DT_EXTRANUM+DT_VALNUM) are
indexed by DT_VALTAGIDX(tagvalue) and
[DT_NUM+DT_THISPROCNUM+DT_VERSIONTAGNUM+DT_EXTRANUM+DT_VALNUM,
DT_NUM+DT_THISPROCNUM+DT_VERSIONTAGNUM+DT_EXTRANUM+DT_VALNUM+DT_ADDRNUM)
are indexed by DT_ADDRTAGIDX(tagvalue), see <elf.h>. */
/*
l_info是ELF节描述符组成的的数组
ELF中一个节, 使用一个ElfW(Dyn)描述
各个类型的节在l_info中的下标固定, 因此可以通过下标来区分节的类型
*/
ElfW(Dyn) *l_info[DT_NUM + DT_THISPROCNUM + DT_VERSIONTAGNUM
+ DT_EXTRANUM + DT_VALNUM + DT_ADDRNUM];
const ElfW(Phdr) *l_phdr; /* Pointer to program header table in core. */ //elf的头表
ElfW(Addr) l_entry; /* Entry point location. */ //elf的入口
ElfW(Half) l_phnum; /* Number of program header entries. */ //头表的节数
ElfW(Half) l_ldnum; /* Number of dynamic segment entries. */ //dyn中的描述符数量

...
};

总而言之,我们可以知道,glibc按照: 命名空间->模块->节 的形式描述所有的模块, 通过_ns_unique_sym_table描述命名空间中所有的可见符号。这时候我们再返回去看dl_fini()的流程,不难发现他的工作就是:

  1. 遍历rtld_global中的所有命名空间
  2. 遍历命名空间中的所有模块
  3. 找到这个模块所在的fini_array段,并调用其中所有的函数指针
  4. 找到这个模块所在的fini段,并调用所有函数指针
可劫持的lock与unlock函数指针

所以__rtld_global结构体非常重要。这个结构体里还有什么其他重要的东西没有呢?有,那就是用来上锁和解锁的两个函数指针,在摘抄过来的代码的60行和64行。如果去追溯dl_fini函数中的__rtld_lock_lock_recursive (GL(dl_load_lock));,会发现它宏定义展开后,其实调用的就是结构体中第60行的函数。而__rtld_global结构体又是写在ld.so.2文件的.data段中的,所以如果我们泄露了libc地址或者ld地址,有任意写的机会,我们就可以劫持这两个函数指针为ogg从而getshell。

别忘了,他们也是析构函数本身,劫持后只有机会执行一次,然后就会被置为ef_free了。详情看这篇文章

其他利用点

不想写了,看看这个师傅的文章吧。

参考链接

exit()分析与利用

Glibc2.32源码分析之exit部分

Linux x86 Program Start Up

⬆︎TOP