这里不再解释vDSO的概念,而直接谈其意义:
vDSO类似一个信息公告板,用户可以直取所需,而无需为此办理任何手续。
vDSO相当于内核直接暴露出来的一个C库,作为GLIBC的补充。
…
类似gettimeofday之类的调用,每次都陷入内核去拿一个时间戳,显得有点昂贵了,不如内核把时间戳放在一个公共的可以暴露给任何用户的地方,用户自己去看就行了,这是vDSO的典型用例。
为了简单化描述,我们关闭ASLR:
[root@localhost ~]# sysctl -w kernel.randomize_va_space=0
随便打开一个ping程序,获取其/proc/pid/smap中vdso的map区间:
7ffff7ffa000-7ffff7ffc000 r-xp 00000000 00:00 0 [vdso]
Size: 8 kB
...
我们将其dd出来:
[root@localhost ~]# dd if=/proc/3688/mem of=./vsdo.dd obs=1 bs=1 skip=140737354113024 count=8192
随后我们看看它是什么:
[root@localhost ~]# file ./vdso.dd
./vdso.dd: ELF 64-bit LSB shared object, x86-64, version 1 (SYSV), dynamically linked, BuildID[sha1]=09be88363f7ca8b05e2cb54a82d16bec2e840186, stripped
那么,接下来可以objdump了,就像对待普通的动态链接库一样:
[root@localhost ~]# objdump -T vdso.dd
vdso.dd: 文件格式 elf64-x86-64
DYNAMIC SYMBOL TABLE:
ffffffffff700354 l d .eh_frame_hdr0000000000000000 .eh_frame_hdr
ffffffffff700700 w DF .text000000000000059d LINUX_2.6 clock_gettime
0000000000000000 g DO *ABS*0000000000000000 LINUX_2.6 LINUX_2.6
ffffffffff700ca0 g DF .text00000000000002d5 LINUX_2.6 __vdso_gettimeofday
ffffffffff700fa0 g DF .text000000000000003d LINUX_2.6 __vdso_getcpu
ffffffffff700ca0 w DF .text00000000000002d5 LINUX_2.6 gettimeofday
ffffffffff700f80 w DF .text0000000000000016 LINUX_2.6 time
ffffffffff700fa0 w DF .text000000000000003d LINUX_2.6 getcpu
ffffffffff700700 g DF .text000000000000059d LINUX_2.6 __vdso_clock_gettime
ffffffffff700f80 g DF .text0000000000000016 LINUX_2.6 __vdso_time
看看,看看,里面竟都是些什么东西,竟是一些时间公告函数啊,这意味着如果你想获取时间,调这里的函数就好了,我们看看最简单的time系统调用是如何来获取时间的,下面是对待vdso.dd文件的objdump -D的结果:
ffffffffff700f80 <__vdso_time>:
ffffffffff700f80: 55 push %rbp
ffffffffff700f81: 48 85 ff test %rdi,%rdi
ffffffffff700f84: 48 8b 04 25 a8 f0 5f mov 0xffffffffff5ff0a8,%rax
ffffffffff700f8b: ff
ffffffffff700f8c: 48 89 e5 mov %rsp,%rbp
ffffffffff700f8f: 74 03 je ffffffffff700f94 <__vdso_time>
ffffffffff700f91: 48 89 07 mov %rax,(%rdi)
ffffffffff700f94: 5d pop %rbp
ffffffffff700f95: c3 retq
很显然,并没有调用任何系统调用,而是直接从地址0xffffffffff5ff0a8处拿到了时间,那么地址0xffffffffff5ff0a8一定就是内核映射到用户态的时间公告板的位置了。
记住地址0xffffffffff5ff0a8,用户态的分析到此告一段落,我们进入内核去看一看。
首先从/proc/kallsyms中查到vdso的位置:
ffffffff81941000 D vdso_start
ffffffff819424b0 D vdso_end
其次我们找到内核时间公告板vsyscall_gtod_data的位置:
ffffffff81a75080 D vsyscall_gtod_data
我们看一下该公告板的值:
crash> struct vsyscall_gtod_data.wall_time_sec ffffffff81a75080
wall_time_sec = 1600912854
crash> struct vsyscall_gtod_data.wall_time_sec ffffffff81a75080
wall_time_sec = 1600912856
crash> struct vsyscall_gtod_data.wall_time_sec ffffffff81a75080
wall_time_sec = 1600912857
显然,公告板的wall_time_sec字段就是返回给time的值了。下面我们找到它的地址:
crash> struct vsyscall_gtod_data ffffffff81a75080 -o
struct vsyscall_gtod_data {
[ffffffff81a75080] seqcount_t seq;
struct {
int vclock_mode;
cycle_t cycle_last;
cycle_t mask;
u32 mult;
u32 shift;
[ffffffff81a75088] } clock;
[ffffffff81a750a8] time_t wall_time_sec;
[ffffffff81a750b0] u64 wall_time_snsec;
[ffffffff81a750b8] u64 monotonic_time_snsec;
[ffffffff81a750c0] time_t monotonic_time_sec;
[ffffffff81a750c8] struct timezone sys_tz;
[ffffffff81a750d0] struct timespec wall_time_coarse;
[ffffffff81a750e0] struct timespec monotonic_time_coarse;
}
嗯,就是0xffffffff81a750a8了。它就是映射到0xffffffffff5ff0a8暴露给用户态的那个地址了。
我们接下来证实这一点:
修改掉映射地址,返回给time调用以0.
我们再看公告板:
crash> struct vsyscall_gtod_data ffffffff81a75080
...
sys_tz = {
tz_minuteswest = 0,
tz_dsttime = 0
},
我们把sys_tz映射出去怎样,这个值是一直为0的,我们期望的就是time返回0.
为此,我们首先拿到sys_tz和wall_time_sec之间的偏移:
crash> eval ffffffff81a750c8-ffffffff81a750a8
hexadecimal: 20
decimal: 32
octal: 40
因此,我们只要把vdso的time函数代码改掉即可:
ffffffffff700f84: 48 8b 04 25 a8 f0 5f mov 0xffffffffff5ff0a8,%rax
改为:
ffffffffff700f84: 48 8b 04 25 c8 f0 5f mov 0xffffffffff5ff0c8,%rax
即将time函数的第8个字节,0xa8改成0xc8即可:
通过模式匹配,可以拿到time函数在vdso页面的偏移:
f80: 55 push rbp
f81: 48 85 ff test rdi,rdi
f84: 48 8b 04 25 a8 f0 5f mov rax,QWORD PTR ds:0xffffffffff5ff0a8
f8b: ff
f8c: 48 89 e5 mov rbp,rsp
f8f: 74 03 je 0xf94
f91: 48 89 07 mov QWORD PTR [rdi],rax
f94: 5d pop rbp
f95: c3 ret
即0xf80.
那么0xffffffff81941f80便是time函数其地址了:
unsigned char *addr = (unsigned char *)0xffffffff81941f80;
addr[8] = 0xc8;
在修改之前,我们先编程验证:
#include
#include
typedef time_t (*time_func)(time_t *);
int main(int argc, char *argv[])
{
time_t tloc;
// 直接从地址拿值
unsigned long *p = (unsigned long *)0xffffffffff5ff0a8;
// 通过函数拿值
time_func func = (time_func)0x7ffff7ffaf80;
func(&tloc);
printf("%ld
", tloc);
printf("%lu
", *p);
}
预期的结果应该是两种方式获取的是同一个值:
[root@localhost ~]# ./a.out
1600923922
1600923922
[root@localhost ~]# ./a.out
1600923923
1600923923
[root@localhost ~]#
下面将内核页面对应的指令修改之:
[root@localhost ~]# cat modtime.stp
#!/usr/local/bin/stap -g
function modtime(val:long)
%{
unsigned char *addr = (unsigned char *)0xffffffff81941f80;
unsigned char c = (unsigned char)STAP_ARG_val;
addr[8] = c;
%}
probe begin
{
modtime($1)
exit()
}
执行之:
[root@localhost ~]# ./modtime.stp 0xc8
[root@localhost ~]# ./a.out
0
1600924228
[root@localhost ~]# ./a.out
0
1600924229
[root@localhost ~]# ./modtime.stp 0xa8
[root@localhost ~]# ./a.out
1600924238
1600924238
[root@localhost ~]#
当修改了vdso页面的指令后,所有调用time的进程都将异常,这是很显然的:
top - 08:00:00 up 42 min, 3 users, load average: 0.00, 0.00, 0.00
Tasks: 114 total, 1 running, 113 sleeping, 0 stopped, 0 zombie
%Cpu(s): 0.0 us, 0.0 sy, 0.0 ni,100.0 id, 0.0 wa, 0.0 hi, 0.0 si, 0.0 st
KiB Mem : 0 total, 0 free, 0 used, 0 buff/cache
KiB Swap: 0 total, 0 free, 0 used. 0 avail Mem
PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND
1 root 20 0 51696 3808 2492 S 0.0 inf 0:01.29 systemd
2 root 20 0 0 0 0 S 0.0 -nan 0:00.00 kthreadd
3 root 20 0 0 0 0 S 0.0 -nan 0:00.00 ksoftirqd/0
7 root rt 0 0 0 0 S 0.0 -nan 0:00.01 migration/0
8 root 20 0 0 0 0 S 0.0 -nan 0:00.00 rcu_bh
9 root 20 0 0 0 0 S 0.0 -nan 0:00.00 rcuob/0
10 root 20 0 0 0 0 S 0.0 -nan 0:00.00 rcuob/1
值得一提的是,在vdso之前,vsyscall机制也是类似,只是说它仅仅提供了一种map,而没有抽象出动态链接的含义,因此也就无法享受ASLR带来的安全保护了。
浙江温州皮鞋湿,下雨进水不会胖。