KASAN解决内核内存越界访问,释放后使用问题

目录

1、整体思路

 2、内核支持KASAN功能,打开调试信息

3、 使用 addr2line 工具定位到异常代码行

4、读数据越界访问案例分析

 4.1、越界访问源码分析

 4.2、读越界kasan报告分析

5、写数据越界访问案例分析

5.1、写越界访问代码分析

5.2、写越界kasan报告分析


KASAN是一个动态检测内存错误的工具。KASAN可以检测全局变量、栈、堆分配的内存发生越界访问 "out-of-bounds" 和 释放后访问 "use-after-free"等问题,但是不能解决内存泄漏问题。

1、整体思路

a、内核支持 kasan 配置项

b、内核打开调试信息,增加-g编译参数,为了可以使用 addr2line 工具越界访问的代码行

c、分析kasan的打印日志定位信息,kasan的日志信息主要分为4部分

  • 1) 读或写异常访问的栈回溯信息。
  • 2) 数据结构空间分配的堆栈信息。
  • 3) 数据结构内存释放的内存信息。
  • 4) 异常访问地址附近内存的可用状态。

d、根据kasan的堆栈打印,定位到具体的代码行,分析问题。 

 2、内核支持KASAN功能,打开调试信息

 a、内核支持 kasan 配置项,当出现越界访问时会打印kasan的报告日志。

CONFIG_SLUB_DEBUG=y
CONFIG_KASAN=y

b、内核打开调试信息,增加-g编译参数,编译器优化等级改为 -O0 ,为了可以使用 addr2line 工具越界访问的代码行。

linux/src/Makefile 文件,搜索编译参数 KBUILD_CFLAGS,在编译参数上增加 -g 选项。KBUILD_CFLAGS   := -DCONFIG_DH_MODIFY -Wall -g -Wundef -Werror=strict-prototypes -Wno-trigraphs \
           -fno-strict-aliasing -fno-common -fshort-wchar -fno-PIE \
           -Werror=implicit-function-declaration -Werror=implicit-int \
           -Werror=return-type -Wno-format-security \
           -std=gnu89 -fstack-protector-all -z noexecstack

c、注意:开启上面选项之后所有的驱动模块需要重新编译,不然驱动模块可能无法正常执行。

3、 使用 addr2line 工具定位到异常代码行

 以防火墙模块为例:[  131.412368]  memory_init+0x124/0x2ec [netctrl]

memory_init 函数大小为 0x2ec 字节,代码执行到偏移 0x124 出异常,我们需要定位 0x124 对应哪一行代码。

a、从ko模块中获取 memory_init 函数的起始地址,有三种方式,nm、readelf、objdump工具

这里只介绍 nm 和 readelf 两种工具。

[user]$ nm netctrl.ko | grep memory_init
000000000000107c T memory_init
[user]$ readelf -s netctrl.ko | grep memory_init
   162: 000000000000107c   748 FUNC    GLOBAL DEFAULT    1 memory_init

 b、根据 0x124/0x2ec 偏移地址定位到代码行地址

memory_init 函数起始地址为 000000000000107c
memory_init+0x124/0x2ec [netctrl]异常代码对应的地址为:

0x11A0 = 0x000000000000107c + 0x124

c、使用addr2line工具找到异常代码所在文件名和行号

 memory_init 函数源码为:

int memory_init(void)
{
	V_TEMP_LISTS *pTempV  = NULL;/*临时白名单*/
	
	pIpv4 = (V4_LISTS*)kmalloc(g_max_list_num * sizeof(V4_LISTS), GFP_KERNEL);
	if (!pIpv4)
		goto __error;
	memset(pIpv4, 0, g_max_list_num * sizeof(V4_LISTS));

	pIpv6 = (V6_LISTS*)kmalloc(g_max_list_num * sizeof(V6_LISTS), GFP_KERNEL);
	if (!pIpv6)
		goto __error;
/netfireware/comm.c:68 对应行:	memset(pIpv6, 0, g_max_list_num * sizeof(V6_LISTS));
/* addr2line工具定位为此行,确定是此行分配了 pTempV 的空间, g_max_list_num 为64 */
	pTempV = (V_TEMP_LISTS*)kmalloc(g_max_list_num * sizeof(V_TEMP_LISTS), GFP_KERNEL);
	if (!pTempV)
		goto __error;
	memset(pTempV, 0, g_max_list_num * sizeof(V_TEMP_LISTS));

	pIcmpV = (V_ICMP_LISTS*)kmalloc(g_max_list_num * sizeof(V_ICMP_LISTS), GFP_KERNEL);
	if (!pIcmpV)
		goto __error;
	memset(pIcmpV, 0, g_max_list_num * sizeof(V_ICMP_LISTS));

	pMac = (MAC_LISTS*)kmalloc(g_max_list_num * sizeof(MAC_LISTS), GFP_KERNEL);
	if (!pMac)
		goto __error;
	memset(pMac, 0, g_max_list_num * sizeof(MAC_LISTS));
    
	return 0;
}

aarch64-linux-gnu-rk3588-v1-addr2line -C -f -e netctrl.ko 0x11a0 memory_init

[user]$ aarch64-linux-gnu-rk3588-v1-addr2line -C -f -e netctrl.ko 0x11a0 memory_init
kmalloc
/home/233410/d3u3588/rklinux/src/./include/linux/slab.h:557

上面可以定位到是 memory_init 函数中的 kmalloc 行,但是 memory_init 函数有多个 kmalloc;我们希望定位具体哪个 kmalloc 函数;找 0x11a0 地址前24字节地址即可。

[user]$ aarch64-linux-gnu-rk3588-v1-addr2line -C -f -e netctrl.ko 0x1188 memory_init
memory_init
/home/233410/d3u3588/netfireware/comm.c:68

4、读数据越界访问案例分析

 4.1、越界访问源码分析

int temp_inside(unsigned char* dip, unsigned short port, int version, int isOut)
{
	int i = 0;
	......................................................................
	/* g_max_list_num 为64,i最大只能到63,进入循环执行后,再执行i++此时i等于64;退出循环 */
	for (i = 0; i < g_max_list_num; i++)
	{
		if (pTempV[i].seconds && (jiffies/HZ - pTempV[i].seconds > 3599))
		{
			memset(&pTempV[i], 0, sizeof(V_TEMP_LISTS));
		}
	}
	......................................................................
	if (isOut)	/* isOut 入参为 0 */
		pTempV[i].out_drop++;
	else
		/* 执行完循环后,i为64,pTempV数组只有64个元素,所以i=64会出现越界访问 */
		pTempV[i].in_drop++;

	return 0;
}

 4.2、读越界kasan报告分析

[  131.411886] ==================================================================
/* 越界访问出现在 [netctrl] 模块 temp_inside 函数的 +0x278/0x2a0 偏移位置 */
[  131.411924] BUG: KASAN: slab-out-of-bounds in temp_inside+0x278/0x2a0 [netctrl]
/* 越界访问是读8字节数据时出现的,具体地址在 ffffff810cffee18 ,异常访问是 UPDATASOCKET 任务,pid = 1288 触发的 */
[  131.411934] Read of size 8 at addr ffffff810cffee18 by task UPDATASOCKET/1288
[  131.411941] 
[  131.411951] CPU: 2 PID: 1288 Comm: UPDATASOCKET Tainted: P           O      5.10.66 #1
[  131.411959] Hardware name: Rockchip RK3588 NVR DEMO LP4 V10 Board (DT)
/* 异常访问的堆栈信息,定位异常访问的代码行:[  131.412037]  temp_inside+0x278/0x2a0 [netctrl]
nm netctrl.ko | grep temp_inside
[233410@yanfa219_ubuntu18-jk128:weops ckms]$ nm netctrl.ko | grep temp_inside
0000000000001a4c T temp_inside

具体异常访问代码地址: 0000000000001CC4 = 0000000000001a4c + 0x278

aarch64-linux-gnu-rk3588-v1-addr2line -C -f -e netctrl.ko 0x1cc4 temp_inside
[233410@yanfa219_ubuntu18-jk128:weops ckms]$ aarch64-linux-gnu-rk3588-v1-addr2line -C -f -e netctrl.ko 0x1cc4 temp_inside
temp_inside
/home/233410/d3u3588/netfireware/comm.c:363

找到异常访问代码行,进行分析即可。
 */
[  131.411966] Call trace:
[  131.411978]  dump_backtrace+0x0/0x2b8
[  131.411986]  show_stack+0x24/0x30
[  131.411997]  dump_stack_lvl+0x108/0x14c
[  131.412008]  print_address_description.constprop.0+0x38/0x280
[  131.412017]  kasan_report+0x14c/0x1f0
[  131.412026]  __asan_load8+0x3c/0xa8
[  131.412037]  temp_inside+0x278/0x2a0 [netctrl]
[  131.412048]  hook_ipv6_in+0x168/0x1d4 [netctrl]
[  131.412058]  nf_hook_slow+0x7c/0xec
[  131.412068]  NF_HOOK.constprop.0+0xf0/0x150
[  131.412076]  ipv6_rcv+0x70/0x8c
[  131.412085]  __netif_receive_skb_one_core+0xe8/0x130
[  131.412094]  __netif_receive_skb+0xac/0xb4
[  131.412102]  process_backlog+0x10c/0x218
[  131.412110]  net_rx_action+0x23c/0x4c0
[  131.412118]  __do_softirq+0x308/0x404
[  131.412127]  do_softirq+0x64/0x80
[  131.412137]  netif_rx_ni+0xc4/0x14c
[  131.412146]  dev_loopback_xmit+0xb0/0xc8
[  131.412154]  NF_HOOK.constprop.0+0x100/0x160
[  131.412163]  ip6_finish_output2+0x2ac/0x8d8
[  131.412171]  __ip6_finish_output+0x218/0x21c
[  131.412179]  ip6_output+0x190/0x218
[  131.412188]  dst_output+0x4c/0x60
[  131.412197]  ip6_local_out+0x48/0x5c
[  131.412205]  ip6_send_skb+0x58/0xe8
[  131.412214]  udp_v6_send_skb+0x3c0/0x5c8
[  131.412223]  udpv6_sendmsg+0x910/0xb88
[  131.412231]  inet6_sendmsg+0x6c/0x88
[  131.412241]  sock_sendmsg_nosec+0x4c/0x6c
[  131.412250]  __sys_sendto+0x14c/0x1bc
[  131.412259]  __arm64_sys_sendto+0x84/0xa0
[  131.412268]  el0_svc_common.constprop.0+0x1a8/0x244
[  131.412276]  do_el0_svc+0xc8/0x100
[  131.412286]  el0_svc+0x20/0x30
[  131.412295]  el0_sync_handler+0xd8/0x184
[  131.412302]  el0_sync+0x1a0/0x1c0
[  131.412309] 
/* 异常访问地址对应的数据结构空间分配的堆栈信息,找到实际分配的数据结构信息;
根据 [  131.412368]  memory_init+0x124/0x2ec [netctrl] ,通过 2、 的分析,我们可以知道 
pTempV = (V_TEMP_LISTS*)kmalloc(g_max_list_num * sizeof(V_TEMP_LISTS), GFP_KERNEL),确定是此行分配了 pTempV 的空间;
g_max_list_num 为64,V_TEMP_LISTS 结构体大小为 4*6+8*4 = 56 字节,因此确认 pTempV 指针指向了一片 56 * 65 = 3584 字节的空间。
 */
[  131.412315] Allocated by task 756:
[  131.412325]  kasan_save_stack+0x28/0x58
[  131.412333]  kasan_set_track+0x28/0x3c
[  131.412341]  ____kasan_kmalloc+0x84/0x9c
[  131.412349]  __kasan_kmalloc+0x10/0x1c
[  131.412357]  __kmalloc+0x1b4/0x234
[  131.412368]  memory_init+0x124/0x2ec [netctrl]
[  131.412379]  ly_ioctl+0x290/0x79c [netctrl]
[  131.412387]  vfs_ioctl+0x74/0x84
[  131.412396]  do_vfs_ioctl+0x6a8/0x898
[  131.412404]  __arm64_sys_ioctl+0x6c/0xbc
[  131.412412]  el0_svc_common.constprop.0+0x1a8/0x244
[  131.412421]  do_el0_svc+0xc8/0x100
[  131.412429]  el0_svc+0x20/0x30
[  131.412438]  el0_sync_handler+0xd8/0x184
[  131.412445]  el0_sync+0x1a0/0x1c0
[  131.412451] 
/* 异常访问地址附近的内存状态分析。
ffffff810cffe000: V_TEMP_LISTS 结构首地址
	V_TEMP_LISTS 结构共 0xE00 = 3584 字节大小,此部分空间是可访问状态,对应 00
ffffff810cffee00: V_TEMP_LISTS 结构结束地址
	中间 kmalloc 为了对齐,多分配的内存空间状态为 fc ,不可访问状态。
ffffff810cfff000: V_TEMP_LISTS 有 3584 字节大小,kmalloc为了对齐分配了 4096 字节空间

分析出2个信息:
第一:实际可访问空间内存大小:可用的起始地址为 ffffff810cffe000 ;
[  131.412560]  ffffff810cffed80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[  131.412568] >ffffff810cffee00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
可访问空间结束地址(00状态对应的空间地址)为:ffffff810cffee00;中间有 3584 个字节,与 V_TEMP_LISTS 结构体地址分析对应上。

第二:获取异常地址对应的成员名
注意 ^ 符号指向的地址,越界读取8字节数据的开始地址为 0xffffff810cffee18 = ffffff810cffe000 + e18(3608),对应的成员为 in_accept;上面堆栈打印 Read of size 8 at addr ffffff810cffee18 by task UPDATASOCKET/1288 也直接指出了异常地址。
typedef struct V_TEMP_LISTS  
{
(ffffff810cffee00)	unsigned int    ip[4];
(ffffff810cffee10)	unsigned int    port;
(ffffff810cffee14)	unsigned int    seconds;
(ffffff810cffee18)	unsigned long   in_drop;
(ffffff810cffee20)	unsigned long   in_accept;
(ffffff810cffee00)	unsigned long   out_drop;
(ffffff810cffee00)	unsigned long   out_accept;
}V_TEMP_LISTS, *PV_TEMP_LISTS;
 */
[  131.412459] The buggy address belongs to the object at ffffff810cffe000
[  131.412459]  which belongs to the cache kmalloc-4k of size 4096
[  131.412469] The buggy address is located 3608 bytes inside of
[  131.412469]  4096-byte region [ffffff810cffe000, ffffff810cfff000)
[  131.412476] The buggy address belongs to the page:
[  131.412486] page:0000000097d7c9b3 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x10cff8
[  131.412494] head:0000000097d7c9b3 order:3 compound_mapcount:0 compound_pincount:0
[  131.412504] flags: 0x8000000000010200(slab|head)
[  131.412514] raw: 8000000000010200 ffffffff0413fc00 0000000200000002 ffffff8100003500
[  131.412524] raw: 0000000000000000 0000000000040004 00000001ffffffff 0000000000000000
[  131.412531] page dumped because: kasan: bad access detected
[  131.412537] 
[  131.412543] Memory state around the buggy address:
[  131.412552]  ffffff810cffed00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[  131.412560]  ffffff810cffed80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[  131.412568] >ffffff810cffee00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[  131.412574]                             ^
[  131.412582]  ffffff810cffee80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[  131.412591]  ffffff810cffef00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[  131.412597] ==================================================================

5、写数据越界访问案例分析

5.1、写越界访问代码分析

static noinline void __init kmalloc_oob_right(void)
{
	char *ptr;
	size_t size = 123;

	pr_info("out-of-bounds to right\n");
    /* 定义了123个字节的空间 */
	ptr = kmalloc(size, GFP_KERNEL);
	if (!ptr) {
			pr_err("Allocation failed\n");
			return;
	}
    /* 写了第124个字节的地址,越界写入 */
	ptr[size] = 'x';
	kfree(ptr);
}

5.2、写越界kasan报告分析

/proc/dbug # echo oob > kasan 
[  108.292207] 
[  132.455158] open embedsky board device!
/proc/dbug # [  132.457626] printk kbuf oob
[  132.457626]  
[  132.457653] Executing function for cmd1
[  132.457672] out-of-bounds to right
[  132.457702] ==================================================================
/* 检测出错类型 (slab-out-of-bounds) ,出错的模块名 [dbg] */
[  132.457743] BUG: KASAN: slab-out-of-bounds in proc_wrbuff_write+0xe8/0x24c [dbg]
/* 写数据出错,写一个字节数据时发现越界了,打出访问出错的进程和pid号 */
[  132.457768] Write of size 1 at addr ffffff810402e47b by task echo/1151
[  132.457787] 
/* 打出访问的堆栈信息 */
[  132.457814] CPU: 4 PID: 1151 Comm: echo Tainted: P           O      5.10.66 #1
[  132.457837] Hardware name: Rockchip RK3588 NVR DEMO LP4 V10 Board (DT)
[  132.457857] Call trace:
[  132.457888]  dump_backtrace+0x0/0x2b8
[  132.457914]  show_stack+0x24/0x30
[  132.457945]  dump_stack_lvl+0x108/0x14c
[  132.457978]  print_address_description.constprop.0+0x38/0x280
[  132.458006]  kasan_report+0x14c/0x1f0
[  132.458033]  __asan_store1+0x3c/0x9c
[  132.458065]  proc_wrbuff_write+0xe8/0x24c [dbg]
[  132.458092]  proc_reg_write+0xf4/0x10c
[  132.458121]  vfs_write+0xc4/0x150
[  132.458150]  ksys_write+0xd8/0x158
[  132.458178]  __arm64_sys_write+0x50/0x64
[  132.458208]  el0_svc_common.constprop.0+0x1a8/0x244
[  132.458234]  do_el0_svc+0xc8/0x100
[  132.458263]  el0_svc+0x20/0x30
[  132.458293]  el0_sync_handler+0xd8/0x184
[  132.458318]  el0_sync+0x1a0/0x1c0
[  132.458336] 
/* 打印此内存创建函数的堆栈信息 */
[  132.458355] Allocated by task 1151:
[  132.458383]  kasan_save_stack+0x28/0x58
[  132.458409]  kasan_set_track+0x28/0x3c
[  132.458435]  ____kasan_kmalloc+0x84/0x9c
[  132.458461]  __kasan_kmalloc+0x10/0x1c
[  132.458487]  kmem_cache_alloc_trace+0x168/0x1fc
[  132.458518]  proc_wrbuff_write+0x1cc/0x24c [dbg]
[  132.458543]  proc_reg_write+0xf4/0x10c
[  132.458571]  vfs_write+0xc4/0x150
[  132.458598]  ksys_write+0xd8/0x158
[  132.458627]  __arm64_sys_write+0x50/0x64
[  132.458653]  el0_svc_common.constprop.0+0x1a8/0x244
[  132.458679]  do_el0_svc+0xc8/0x100
[  132.458706]  el0_svc+0x20/0x30
[  132.458734]  el0_sync_handler+0xd8/0x184
[  132.458758]  el0_sync+0x1a0/0x1c0
[  132.458774] 
/* 
出错地址位于一个数据结构(一片内存)的某个地址,数据结构的起始地址是 ffffff810402e400
这边空间是使用 kmalloc 函数分配的一片内存,大小是 128 字节;我们实际分配可能只分配了123,但是kmalloc因为对齐和分配粒度等原因多分配空间。
出问题的地址位于起始地址是 ffffff810402e400 后的第 123 个字节,问题地址属于 128-byte region [ffffff810402e400, ffffff810402e480)
 */
[  132.458798] The buggy address belongs to the object at ffffff810402e400
[  132.458798]  which belongs to the cache kmalloc-128 of size 128
[  132.458826] The buggy address is located 123 bytes inside of
[  132.458826]  128-byte region [ffffff810402e400, ffffff810402e480)
[  132.458847] The buggy address belongs to the page:
[  132.458874] page:00000000067f7e59 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x10402e
[  132.458898] head:00000000067f7e59 order:1 compound_mapcount:0
[  132.458924] flags: 0x8000000000010200(slab|head)
[  132.458957] raw: 8000000000010200 ffffffff03e94880 0000000800000008 ffffff8100003c80
[  132.458987] raw: 0000000000000000 0000000080200020 00000001ffffffff 0000000000000000
[  132.459008] page dumped because: kasan: bad access detected
[  132.459026] 
/* 显示内存可使用情况;00 一个字节数据可以表示 8 个字节内存的状态;看源码我们分配了 123 字节的空间;
	第一:特别注意 ^ 号指向的地址:>ffffff810402e400: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 03,15个 00 代表我们 实际分配的空间大小 = 15*8+3 = 123;上面提到128字节,填充的5字节是不可用的。
	第二:通过上面打印 Write of size 1 at addr ffffff810402e47b by task echo/1151 可知异常访问地址为: ffffff810402e47b ,也可通过下面方式计算得到:ffffff810402e400 + 7B(123),刚好是数组结束的地址。
 */
[  132.459044] Memory state around the buggy address:
[  132.459070]  ffffff810402e300: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[  132.459097]  ffffff810402e380: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[  132.459124] >ffffff810402e400: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 03
[  132.459144]                                                                 ^
[  132.459171]  ffffff810402e480: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[  132.459197]  ffffff810402e500: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[  132.459217] ==================================================================

6、kasan与kdump结合

kasan 在uboot的cmdline中配置 kasan.fault=panic 则打出kasan堆栈打印之后,调用 panie 触发kdump机制。kasan 在uboot的cmdline中配置 kasan.fault=panic 则打出kasan堆栈打印之后,调用 panie 触发kdump。

X:\work\linux\linux-6.0\mm\kasan\report.c
kasan_report
    start_report(&irq_flags, true);        /* 打印kasan的开始: ================================================================== */
    print_report(&info);                /* 打印kasan中间内容: BUG: KASAN: slab-out-of-bounds in temp_inside+0x278/0x2a0 [netctrl] */
    end_report(&irq_flags, ptr);        /* 打印kasan的结束: ================================================================== */
        if (panic_on_warn && !test_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags))
            panic("panic_on_warn set ...\n");
        if (kasan_arg_fault == KASAN_ARG_FAULT_PANIC)

            /* 设置 kasan.fault=panic ,调用panic,进入kump流程启动捕获内核 */
            panic("kasan.fault=panic set ...\n");

kasan触发到了默认不会触发cpu死机,只是将堆栈信息打印出来;但kasan已经实现一旦检测到内存越界就触发panic功能。

/* kasan.fault=report/panic */
static int __init early_kasan_fault(char *arg)
{
    if (!arg)
        return -EINVAL;

    if (!strcmp(arg, "report"))
        kasan_arg_fault = KASAN_ARG_FAULT_REPORT;
    else if (!strcmp(arg, "panic"))
        kasan_arg_fault = KASAN_ARG_FAULT_PANIC;
    else
        return -EINVAL;

    return 0;
}
early_param("kasan.fault", early_kasan_fault);

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值