移植microarrayfp指纹模块,在probe进行spi写、读操作时内核出现oops的崩溃问题。
[ 18.404728] Unable to handle kernel paging request at virtual address ffffffe0fd4533c0
[ 18.412693] Mem abort info:
[ 18.415498] ESR = 0x96000145
[ 18.418585] Exception class = DABT (current EL), IL = 32 bits
[ 18.424571] SET = 0, FnV = 0
[ 18.427633] EA = 0, S1PTW = 0
[ 18.430822] Data abort info:
[ 18.433726] ISV = 0, ISS = 0x00000145
[ 18.437591] CM = 1, WnR = 1
[ 18.440581] swapper pgtable: 4k pages, 39-bit VAs, pgdp = ffffffa0fd568000
[ 18.447475] [ffffffe0fd4533c0] pgd=0000000000000000, pud=0000000000000000
[ 18.454317] Internal error: Oops: 96000145 [#1] PREEMPT SMP
[ 18.459894] Modules linked in:
[ 18.462951] Process swapper/0 (pid: 1, stack limit = 0xffffff8008058000)
[ 18.469655] CPU: 6 PID: 1 Comm: swapper/0 Tainted: G W 4.19.157 #2
[ 18.477141] Hardware name: Qualcomm Technologies, Inc. BENGAL IDP (DT)
[ 18.483671] pstate: 00400005 (nzcv daif +PAN -UAO)
[ 18.488471] pc : __dma_inv_area+0x28/0x58
[ 18.492485] lr : fast_smmu_map_page+0x10c/0x270
[ 18.497020] sp : ffffff800805b6e0
[ 18.500333] x29: ffffff800805b6f0 x28: 000000157d4533d0
[ 18.505641] x27: 00000000000003d0 x26: ffffffbf00000000
[ 18.510950] x25: 0000000000000000 x24: 0000000000000002
[ 18.516259] x23: 0000000000000000 x22: 000000000000000a
[ 18.521566] x21: ffffffccd68c9b80 x20: ffffffa0fc26d000
[ 18.526874] x19: 0000000000001000 x18: 0000000000000078
[ 18.532182] x17: ffffffa0fcbfd000 x16: 0000000000000000
[ 18.537489] x15: ffffffa0fb81a5f0 x14: ffffffa0fc4bc018
[ 18.542797] x13: 0000000000000002 x12: 0000000000000000
[ 18.548105] x11: 0000000000000000 x10: 0000000000000000
[ 18.553413] x9 : 000000000157d453 x8 : ffffffe0fd453000
[ 18.558720] x7 : 0000000000000000 x6 : 000000000000003f
[ 18.564028] x5 : 0000000000000000 x4 : 0000000000000002
[ 18.569336] x3 : 000000000000003f x2 : 0000000000000040
[ 18.574644] x1 : ffffffe0fd4533c0 x0 : ffffffe0fd4533d0
[ 18.579953] Call trace:
[ 18.582398] __dma_inv_area+0x28/0x58
[ 18.586067] geni_se_iommu_map_buf+0xa4/0x100
[ 18.590421] spi_geni_prepare_message+0x15c/0x4a8
[ 18.595129] __spi_pump_messages+0x408/0x718
[ 18.599397] __spi_sync+0x2bc/0x310
[ 18.602882] spi_sync+0x30/0x50
[ 18.606024] mas_sync+0xd4/0x118
[ 18.609252] init_connect+0x40/0x250
[ 18.612833] mas_probe+0x43c/0x650
[ 18.616232] spi_drv_probe+0x88/0xb0
[ 18.619804] really_probe+0x540/0x720
[ 18.623470] driver_probe_device+0x74/0x148
[ 18.627651] __driver_attach+0x114/0x1c8
[ 18.631577] bus_for_each_dev+0x84/0xd0
[ 18.635416] driver_attach+0x2c/0x38
[ 18.638986] bus_add_driver+0x144/0x268
[ 18.642825] driver_register+0x78/0x110
Unable to handle kernel paging request at virtual address ffffffe0fd4533c0
其中地址ffffffe0fd4533c0
为非常大的值。首先排除空指针和hang可能。
从栈信息可知调用顺序为:
mas_probe -> init_connect -> mas_sync -> spi_sync 直到调__dma_inv_area
崩溃
使用objdump对
vmlinux文件进行反编译
运行./aarch64-linux-androidkernel-objdump -D vmlinux > obj_log
导出到obj_log文件。
搜寻obj_log
文件__dma_inv_area
看到如下汇编代码:
ffffff80080ad650 <__dma_inv_area>:
ffffff80080ad650: 8b000021 add x1, x1, x0
ffffff80080ad654: d53b0023 mrs x3, ctr_el0
ffffff80080ad658: d503201f nop
ffffff80080ad65c: d3504c63 ubfx x3, x3, #16, #4
ffffff80080ad660: d2800082 mov x2, #0x4 // #4
ffffff80080ad664: 9ac32042 lsl x2, x2, x3
ffffff80080ad668: d1000443 sub x3, x2, #0x1
ffffff80080ad66c: ea03003f tst x1, x3
ffffff80080ad670: 8a230021 bic x1, x1, x3
ffffff80080ad674: 54000040 b.eq ffffff80080ad67c <__dma_inv_area+0x2c>
ffffff80080ad678: d50b7e21 dc civac, x1
ffffff80080ad67c: ea03001f tst x0, x3
ffffff80080ad680: 8a230000 bic x0, x0, x3
ffffff80080ad684: 54000060 b.eq ffffff80080ad690 <__dma_inv_area+0x40>
ffffff80080ad688: d50b7e20 dc civac, x0
ffffff80080ad68c: 14000002 b ffffff80080ad694 <__dma_inv_area+0x44>
ffffff80080ad690: d5087620 dc ivac, x0
ffffff80080ad694: 8b020000 add x0, x0, x2
ffffff80080ad698: eb01001f cmp x0, x1
ffffff80080ad69c: 54ffffa3 b.cc ffffff80080ad690 <__dma_inv_area+0x40>
ffffff80080ad6a0: d5033f9f dsb sy
ffffff80080ad6a4: d65f03c0 ret
根据Call trace:
信息__dma_inv_area+0x28/0x58
:
0x28
代表__dma_inv_area
崩溃时所在的偏移量,从__dma_inv_area
基地址ffffff80080ad650
偏移0x28
可得到
在ffffff80080ad678: d50b7e21 dc civac, x1
处崩溃。而这条指令只有寄存器x1
的值可变。
从以下的寄存器状态可知,X1的值刚好对应崩溃的地址ffffffe0fd4533c0
[ 18.488471] pc : __dma_inv_area+0x28/0x58
[ 18.492485] lr : fast_smmu_map_page+0x10c/0x270
[ 18.497020] sp : ffffff800805b6e0
[ 18.500333] x29: ffffff800805b6f0 x28: 000000157d4533d0
[ 18.505641] x27: 00000000000003d0 x26: ffffffbf00000000
[ 18.510950] x25: 0000000000000000 x24: 0000000000000002
[ 18.516259] x23: 0000000000000000 x22: 000000000000000a
[ 18.521566] x21: ffffffccd68c9b80 x20: ffffffa0fc26d000
[ 18.526874] x19: 0000000000001000 x18: 0000000000000078
[ 18.532182] x17: ffffffa0fcbfd000 x16: 0000000000000000
[ 18.537489] x15: ffffffa0fb81a5f0 x14: ffffffa0fc4bc018
[ 18.542797] x13: 0000000000000002 x12: 0000000000000000
[ 18.548105] x11: 0000000000000000 x10: 0000000000000000
[ 18.553413] x9 : 000000000157d453 x8 : ffffffe0fd453000
[ 18.558720] x7 : 0000000000000000 x6 : 000000000000003f
[ 18.564028] x5 : 0000000000000000 x4 : 0000000000000002
[ 18.569336] x3 : 000000000000003f x2 : 0000000000000040
[ 18.574644] x1 : ffffffe0fd4533c0 x0 : ffffffe0fd4533d0
因此找出地址ffffffe0fd4533c0
从哪里来?
驱动中所调用的函数,最后看到
int mas_sync(u8 *txb, u8 *rxb, int len)
{
int ret = 0;
struct spi_message m;
struct spi_transfer t = {
.tx_buf = txb,
.rx_buf = rxb,
.len = len,
.delay_usecs = 1,
.bits_per_word = 8,
.speed_hz = smas->spi->max_speed_hz,
};
mutex_lock(&dev_lock);
spi_message_init(&m);
spi_message_add_tail(&t, &m);
ret = spi_sync(smas->spi, &m);
mutex_unlock(&dev_lock);
return ret;
}
在spi_sync(smas->spi, &m)
中,smas->spi
是驱动注册产生,只有struct spi_message m
为额外定义,m
只定义未赋值。
其中要传输的数据包含在
#define FBUF (32*1024)
static u8 stxb[FBUF];
static u8 srxb[FBUF];
struct spi_transfer t = {
.tx_buf = txb,
.rx_buf = rxb,
.len = len,
.delay_usecs = 1,
.bits_per_word = 8,
.speed_hz = smas->spi->max_speed_hz,
};
由下面可知spi_message_add_tail(&t, &m);
将struct spi_transfer
类型变量t
插入到struct spi_message
类型变量m的链表成员transfers
,由此建立变量t
与变量m
的联系。
static inline void
spi_message_add_tail(struct spi_transfer *t, struct spi_message *m)
{
list_add_tail(&t->transfer_list, &m->transfers);
}
struct spi_message {
struct list_head transfers;
struct spi_device *spi;
unsigned is_dma_mapped:1;
void (*complete)(void *context);
void *context;
unsigned frame_length;
unsigned actual_length;
int status;
struct list_head queue;
void *state;
struct list_head resources;
};
以下调用均是linux内核函数,列举其结构关系。最终看到struct spi_transfer *xfer
成员tx_buf
和rx_buf
。到geni_se_iommu_map_buf()
可知接下来内核将崩溃。
因此锁定成员tx_buf
和rx_buf
分析。
static int __spi_sync(struct spi_device *spi, struct spi_message *message)
{
struct spi_controller *ctlr = spi->controller;
//...
message->spi = spi;
//...
__spi_pump_messages(ctlr, false);
//...
}
static void __spi_pump_messages(struct spi_controller *ctlr, bool in_kthread)
{
//...
ctlr->prepare_message(ctlr, ctlr->cur_msg);
//...
}
static int spi_geni_prepare_message(struct spi_master *spi,
struct spi_message *spi_msg)
{
struct spi_geni_master *mas = spi_master_get_devdata(spi);
//...
ret = spi_geni_map_buf(mas, spi_msg);
//...
}
static int spi_geni_map_buf(struct spi_geni_master *mas,
struct spi_message *msg)
{
//...
list_for_each_entry(xfer, &msg->transfers, transfer_list)
{
struct spi_transfer *xfer;
if (xfer->rx_buf) {
ret = geni_se_iommu_map_buf(mas->wrapper_dev,
&xfer->rx_dma, xfer->rx_buf,
xfer->len, DMA_FROM_DEVICE);
}
if (xfer->tx_buf) {
ret = geni_se_iommu_map_buf(mas->wrapper_dev,
&xfer->tx_dma,
(void *)xfer->tx_buf,
xfer->len, DMA_TO_DEVICE);
}
}
//...
}
打印static u8 stxb[FBUF];
和static u8 srxb[FBUF];
地址得到
stxb
是ffffffe0fd4533c0
srxb
是ffffffe0fd4533d0
与崩溃时的指针一致。
从vmlinux看kernel代码空间地址在ffffff80xxxxxxxx
驱动中静态变量stxb
和srxb
共64KB空间可能超出空间
最后将变量由static
定义改为申请内核堆中空间kmalloc
,内核未再发生崩溃