嵌入式软件之应用调试

目录

一、strace追踪

二、gdb与gdbserver调试

2.1 gdb调试

2.2 core dump

三、修改内核打印用户段错误信息

四、自制系统调用

4.1 搭建

4.2 使用 


一、strace追踪

使用strace命令来跟踪系统调用,应用程序过程中open、read、write等就涉及到系统调用,strace工具比较简单,可以解决一些常见问题,这里用的是strace-4.5.15版本的

编译过程,其中host是运行的宿主机,CC是编译器

./configure --host=arm-linux CC=arm-linux-gcc
make

编译完成后得到strace应用程序,然后将strace拷贝到单板的bin下

输入strace,可以得到命令的用法,加入-o选项,以下会将追踪firstdrvtest这个应用程序并且将其系统调用输出在log.txt中

# insmod first_drv.ko
# strace -o log.txt ./firstdrvtest on

加载一个字符设备驱动,跟踪测试程序,查看其系统调用,在测试程序中涉及到open和write两个系统调用

# insmod First_drv.ko
# strace -o log.txt ./firstdrvtest on
first_drv_open
first_drv_write
# vi log.txt
execve("./firstdrvtest", ["./firstdrvtest", "on"], [/* 6 vars */]) = 0  
uname({sys="Linux", node="192.168.0.19", ...}) = 0
brk(0)                                  = 0x11000
access("/etc/ld.so.preload", R_OK)      = -1 ENOENT (No such file or directory)    //加载一大堆库
open("/etc/ld.so.cache", O_RDONLY)      = -1 ENOENT (No such file or directory)
open("/lib/v4l/half/libc.so.6", O_RDONLY) = -1 ENOENT (No such file or directory
stat64("/lib/v4l/half", 0xbee8c520)     = -1 ENOENT (No such file or directory)
open("/lib/v4l/libc.so.6", O_RDONLY)    = -1 ENOENT (No such file or directory)
stat64("/lib/v4l", 0xbee8c520)          = -1 ENOENT (No such file or directory)
open("/lib/half/libc.so.6", O_RDONLY)   = -1 ENOENT (No such file or directory)
stat64("/lib/half", 0xbee8c520)         = -1 ENOENT (No such file or directory)
open("/lib/libc.so.6", O_RDONLY)        = 3
read(3, "\177ELF\1\1\1a\0\0\0\0\0\0\0\0\3\0(\0\1\0\0\0\330o\1\000"..., 512) = 51
fstat64(3, {st_mode=S_IFREG|0755, st_size=1435660, ...}) = 0
mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x40
mmap2(NULL, 1150612, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x4
mprotect(0x40129000, 56980, PROT_NONE)  = 0
mmap2(0x40131000, 16384, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRI
mmap2(0x40135000, 7828, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOU
close(3)                                = 0
open("/dev/xyz", O_RDWR)                = 3  //返回值为3
write(3, "\1\0\0\0", 4)                 = 0  //把\1\0\0\0数值写到描述符为3的文件,写4个字节
exit_group(0)                           = ?
- log.txt 1/23 4%

对于1.7的busybox加载卸载驱动没有错误提示,在busybox官网中下载1.20版本的busybox,busybox是ls、cp、cd等等命令的组合,使用ls/cp/cd等命令都是到busybox的链接,编译新的busybox,并把busybox拷贝到单板的bin目录下

tar xjf busybox-1.20.0.tar.bz2
cd busybox-1.20.0/
make menuconfig
在编译选项中加上交叉编译的前缀:arm-linux-
Busybox Settings->Build Options->Cross compiler prefix   
使用默认的配置make
编译的时候出现的错误基本的C语言的错误,最快解决方法是在配置选项中把功能都去掉

在单板上加载驱动,卸载驱动的时候出错,利用strace跟踪卸载过程的系统调用,调用chdir系统调用进入/lib/modules不成功,因此创建一个/lib/modules目录,再次卸载,发现需要再创建一个/lib/modules/2.6.22.6目录,创建完之后再次卸载驱动就可以卸载

/ # insmod First_drv.ko
/ # rmmod First_drv
rmmod: chdir(/lib/modules): No such file or directory
/ # strace -o log.txt rmmod First_drv
rmmod: chdir(/lib/modules): No such file or directory
/ # vi log.txt
...
getuid32()                              = 0
chdir("/lib/modules")                   = -1 ENOENT (No such file or directory)
write(2, "rmmod: chdir(/lib/modules): No s"..., 54) = 54
exit_group(1) 
/ # strace -o log.txt rmmod First_drv
rmmod: chdir(2.6.22.6): No such file or directory
/ # vi log.txt
...
chdir("/lib/modules")                   = 0
uname({sys="Linux", node="192.168.0.19", ...}) = 0
chdir("2.6.22.6")                       = -1 ENOENT (No such file or directory)
write(2, "rmmod: chdir(2.6.22.6): No such "..., 50) = 50
exit_group(1)                           = ?
/ # mkdir /lib/modules/2.6.22.6
/ # rmmod First_drv

对于strace -o log.txt rmmod First_drv这条命令,strace是父进程,会创建一个子进程来执行rmmod First_drv命令,所有涉及的系统调用open、read、write(C库),最终会触发swi指令,这条指令会导致系统发生swi异常,进入内核态,在异常处理函数中就会调用sys_open、sys_read、sys_write
在内核源码中arch\arm\kernel\entry-common.S中会在vector_swi先判断子进程有没有被父进程跟踪,被跟踪了后会调用__sys_trace,给父进程发信号(记录)然后让子进程继续运行

    stmdb    sp!, {r4, r5}            @ push fifth and sixth args
    tst    ip, #_TIF_SYSCALL_TRACE        @ are we tracing syscalls?  //我们是否被跟踪
    bne    __sys_trace

二、gdb与gdbserver调试

2.1 gdb调试

gdb下载地址:点击这里,在PC机上运行gdb来调试ARM板上的应用程序app需要gdbserver,gdbserver是app的父进程,跟strace相似,gdb发出命令给gdbserver,从而让gdbserver控制子进程app,gdb想读变量需要带有调试信息的app的文件,编译gdb,help看到编译选项,其中host表示在哪执行,不写默认为在PC机上target目标是作用在arm板上,因此设置为rm-linux,默认安装在根目录,也可以设置为当前目录的tmp目录中

tar xjf gdb-7.4.tar.bz2
cd gdb-7.4/
./configure --target=arm-linux  //出现出错执行这条命令./configure --target=arm-linux --disable-werror
make
sudo make install //也可以安装在自己的目录 例make install prefix=$PWD/tmp

 安装gdbserver,在gdb-7.4目录下进入gdb/gdbserver目录

cd gdb/gdbserver/
./configure --host=arm-linux

配置的时候出现在linux-arm-low.c没有定义PTRACE_GETSIGINFO,我们在C库中查找,在linux/ptrace.h,而在linux-arm-low.c中是sys/ptrace.h,因此添加一项#include <linux/ptrace.h>,然后执行make编译成功

linux-arm-low.c:642: error: `PTRACE_GETSIGINFO' undeclared (first use in this function)
linux-arm-low.c:642: error: (Each undeclared identifier is reported only once
linux-arm-low.c:642: error: for each function it appears in.)
Makefile:191: recipe for target 'linux-arm-low.o' failed
book$ echo $PATH
/home/book/bin:/home/book/.local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/work/tools/gcc-3.4.5-glibc-2.3.6/bin:/snap/bin
cd /work/tools/gcc-3.4.5-glibc-2.3.6/
book$ grep "PTRACE_GETSIGINFO" * -nR
arm-linux/include/linux/ptrace.h:27:#define PTRACE_GETSIGINFO   0x4202
arm-linux/sys-include/linux/ptrace.h:27:#define PTRACE_GETSIGINFO       0x4202
distributed/arm-linux/include/linux/ptrace.h:27:#define PTRACE_GETSIGINFO       0x4202
distributed/arm-linux/sys-include/linux/ptrace.h:27:#define PTRACE_GETSIGINFO   0x4202

在单板上将编译好的gdbserver拷贝到单板的bin中去

调试程序test_debug.c如下

#include <stdio.h>

void C(int *p)
{
	*p = 0x12;
}


void B(int *p)
{
	C(p);
}

void A(int *p)
{
	B(p);
}

void A2(int *p)
{
	C(p);
}


int main(int argc, char **argv)
{
	int a;
	int *p = NULL;

	A2(&a);  // A2 > C
	printf("a = 0x%x\n", a);

	A(p);    // A > B > C

	return 0;
}

编译要调试的应用,编译时加上-g选项,让应用程序带有调试信息,在单板上执行gdbserver 192.168.1.17:2345 ./test_debug,在PC上执行arm-linux-gdb ./test_debug,在gdb界面远程连接单板:target remote 192.168.0.19:1234,调试过程如下,退出gdb执行quit命令

(gdb) target remote 192.168.0.19:1234
Remote debugging using 192.168.0.19:1234
warning: Can not parse XML target description; XML support was disabled at compile time
warning: `/lib/ld-linux.so.2': Shared library architecture unknown is not compatible with target architecture arm.
warning: `/lib/ld-linux.so.2': Shared library architecture unknown is not compatible with target architecture arm.
Reading symbols from /lib/ld-linux.so.2...(no debugging symbols found)...done.
Loaded symbols for /lib/ld-linux.so.2
0x40000bd0 in ?? () from /lib/ld-linux.so.2
(gdb) l                                                                  //列出程序
20      void A2(int *p)
21      {
22              C(p);
23      }
24
25
26      int main(int argc, char **argv)
27      {
28              int a;
29              int *p = NULL;
(gdb) l
30
31              A2(&a);  // A2 > C
32              printf("a = 0x%x\n", a);
33
34              A(p);    // A > B > C
35
36              return 0;
37      }
38
(gdb) break main                                                  //在main函数这打断点
Breakpoint 1 at 0x852c: file test_debug.c, line 29.
(gdb) c                                                                  //持续运行
Continuing.

Breakpoint 1, main (argc=1, argv=0xbefcaee4) at test_debug.c:29
29              int *p = NULL;
(gdb) break test_debug.c:31                                //在31行打断点 
Breakpoint 2 at 0x8534: file test_debug.c, line 31.
(gdb) c
Continuing.

Breakpoint 2, main (argc=1, argv=0xbefcaee4) at test_debug.c:31
31              A2(&a);  // A2 > C
(gdb) step                                                             //执行一步
A2 (p=0xbefcaea0) at test_debug.c:22
22              C(p);
(gdb) step
C (p=0xbefcaea0) at test_debug.c:6
6               *p = 0x12;
(gdb) print *p                                                         //打印出*p的值
$1 = 33620
(gdb) step
7       }
(gdb) print *p
$2 = 18
(gdb) break A                                                        //在A函数打断点
Breakpoint 3 at 0x84e8: file test_debug.c, line 17.
(gdb) c
Continuing.

Breakpoint 3, A (p=0x0) at test_debug.c:17
17              B(p);
(gdb) step
B (p=0x0) at test_debug.c:12
12              C(p);
(gdb) step
C (p=0x0) at test_debug.c:6
6               *p = 0x12;
(gdb) step                                                             //出现段错误的地方

Program received signal SIGSEGV, Segmentation fault.
0x000084ac in C (p=0x0) at test_debug.c:6
6               *p = 0x12;

 其他常见的命令

2.2 core dump

让程序在开发板上直接运行,当它发生错误时,令它产生core dump文件,然后使用gdb根据core dump文件找到发生错误的地方,在ARM板上执行ulimit -c unlimited命令,若为ulimit -c当程序出错的时候产生core dump文件的大小为0(即不产生),因此加上unlimited,执行应用程序后在程序出错时会在当前目录下生成名为core的文件

/ # ulimit -c unlimited
/ # ./test_debug
a = 0x12
Segmentation fault (core dumped)

将core文件拷贝到PC上,执行/bin/arm-linux-gdb ./test_debug ./core 

book$ sudo arm-linux-gdb ./test_debug ./core
GNU gdb (GDB) 7.4
Copyright (C) 2012 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law.  Type "show copying"
and "show warranty" for details.
This GDB was configured as "--host=x86_64-unknown-linux-gnu --target=arm-linux".
For bug reporting instructions, please see:
<http://www.gnu.org/software/gdb/bugs/>...
Reading symbols from /home/book/xiaoma/dev_demo/28th_app_debug/test_debug...done.
[New LWP 776]

warning: `/lib/ld-linux.so.2': Shared library architecture unknown is not compatible with target architecture arm.

warning: Could not load shared library symbols for /lib/libc.so.6.
Do you need "set solib-search-path" or "set sysroot"?
Core was generated by `./test_debug'.
Program terminated with signal 11, Segmentation fault.   //程序终结,收到段错误的信号11
#0  0x000084ac in C (p=0x0) at test_debug.c:6               //在C函数中*p = 0x12这条指令出错
6               *p = 0x12;
(gdb) bt                                                                              //查看函数调用关系  
#0  0x000084ac in C (p=0x0) at test_debug.c:6
#1  0x000084d0 in B (p=0x0) at test_debug.c:12
#2  0x000084f0 in A (p=0x0) at test_debug.c:17
#3  0x00008554 in main (argc=1, argv=0xbec10ee4) at test_debug.c:34

三、修改内核打印用户段错误信息

驱动调试过程中引入过直接使用物理地址,执行测试程序firstdrvtest进而出现段错误,那些出错是由内核打印出来的信息,根据那些信息可以引申出各种调试方法,在gdb调试中调试应用程序test_debug只打印出出现段错误,并没有更多的信息,因此我们可以修改内核,让内核打印出更多的信息

/ # ./test_debug
a = 0x12
Segmentation fault

执行测试程序firstdrvtest进而出现段错误,根据内核打印出来的第一局为"Unable to handle kernel %s at virtual address",在内核源码中搜索在,arch\arm\mm\fault.c中出现,__do_kernel_fault被do_bad_area调用,在do_bad_area分别用户态和内核态,在用户态中进入__do_user_fault函数,因此想要打印出信息,需要配置内核CONFIG_DEBUG_USER,还需要设置user_debug参数,此参数在uboot中设置,设置为0xff,使所有的用户信息都满足

static void
__do_kernel_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr,
		  struct pt_regs *regs)
{
	/*
	 * Are we prepared to handle this kernel fault?
	 */
	if (fixup_exception(regs))
		return;

	/*
	 * No handler, we'll have to terminate things with extreme prejudice.
	 */
	bust_spinlocks(1);
	printk(KERN_ALERT
		"Unable to handle kernel %s at virtual address %08lx\n",
		(addr < PAGE_SIZE) ? "NULL pointer dereference" :
		"paging request", addr);

	show_pte(mm, addr);
	die("Oops", regs, fsr);
	bust_spinlocks(0);
	do_exit(SIGKILL);
}

---------------------------------------

void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
{
	struct task_struct *tsk = current;
	struct mm_struct *mm = tsk->active_mm;

	/*
	 * If we are in kernel mode at this point, we
	 * have no context to handle this fault with.
	 */
	if (user_mode(regs))
		__do_user_fault(tsk, addr, fsr, SIGSEGV, SEGV_MAPERR, regs);
	else
		__do_kernel_fault(mm, addr, fsr, regs);
}

---------------------------------------

static void
__do_user_fault(struct task_struct *tsk, unsigned long addr,
		unsigned int fsr, unsigned int sig, int code,
		struct pt_regs *regs)
{
	struct siginfo si;

#ifdef CONFIG_DEBUG_USER
	if (user_debug & UDBG_SEGV) {
		printk(KERN_DEBUG "%s: unhandled page fault (%d) at 0x%08lx, code 0x%03x\n",
		       tsk->comm, sig, addr, fsr);
		show_pte(tsk->mm, addr);
		show_regs(regs);
	}
#endif

	tsk->thread.address = addr;
	tsk->thread.error_code = fsr;
	tsk->thread.trap_no = 14;
	si.si_signo = sig;
	si.si_errno = 0;
	si.si_code = code;
	si.si_addr = (void __user *)addr;
	force_sig_info(sig, &si, tsk);
}

----------------------------------------

unsigned int user_debug;

static int __init user_debug_setup(char *str)
{
	get_option(&str, &user_debug);
	return 1;
}
__setup("user_debug=", user_debug_setup);
#endif

配置内核,搜索DEBUG_USER,发现已经配置好了就不需要再配置,下面是其配置的路径

修改uboot启动参数加上user_debug=0xff

set bootargs noinitrd root=/dev/nfs nfsroot=192.168.0.106:/work/nfs_root/first_fs ip=192.168.0.19:192.168.0.106:192.168.0.1:255.255.255.0::eth0:off init=/linuxrc console=ttySAC0 user_debug=0xff

启动内核,重新执行test_debug程序,出现了打印信息,PC指向了84ac的地方

/ # ./test_debug
a = 0x12
pgd = c3cac000
[00000000] *pgd=30026031, *pte=00000000, *ppte=00000000

Pid: 771, comm:           test_debug
CPU: 0    Not tainted  (2.6.22.6 #1)
PC is at 0x84ac                                                           
LR is at 0x84d0
pc : [<000084ac>]    lr : [<000084d0>]    psr: 60000010
sp : bef4fe60  ip : bef4fe74  fp : bef4fe70
r10: 4013365c  r9 : 00000000  r8 : 00008514
r7 : 00000001  r6 : 000085cc  r5 : 00008568  r4 : bef4fee4
r3 : 00000012  r2 : 00000000  r1 : 00001000  r0 : 00000000
Flags: nZCv  IRQs on  FIQs on  Mode USER_32  Segment user
Control: c000717f  Table: 33cac000  DAC: 00000015
[<c002cd1c>] (show_regs+0x0/0x4c) from [<c0031a98>] (__do_user_fault+0x5c/0xa4)
 r4:c04a6ac0
[<c0031a3c>] (__do_user_fault+0x0/0xa4) from [<c0031d38>] (do_page_fault+0x1dc/0x20c)
 r7:c00271e0 r6:c001eaac r5:c04a6ac0 r4:ffffffec
[<c0031b5c>] (do_page_fault+0x0/0x20c) from [<c002b224>] (do_DataAbort+0x3c/0xa0)
[<c002b1e8>] (do_DataAbort+0x0/0xa0) from [<c002be48>] (ret_from_exception+0x0/0x10)
Exception stack(0xc000dfb0 to 0xc000dff8)
dfa0:                                     00000000 00001000 00000000 00000012
dfc0: bef4fee4 00008568 000085cc 00000001 00008514 00000000 4013365c bef4fe70
dfe0: bef4fe74 bef4fe60 000084d0 000084ac 60000010 ffffffff
 r8:00008514 r7:00000001 r6:000085cc r5:00008568 r4:c039bfc8
Segmentation fault

反汇编应用程序:arm-linux-objdump -D test_debug > test_debug.dis,搜索84ac,在打印信息中r3是00000012,r2是0地址,把r3存到0地址去,非法地址所以出错

00008490 <C>:
    8490:	e1a0c00d 	mov	ip, sp
    8494:	e92dd800 	stmdb	sp!, {fp, ip, lr, pc}
    8498:	e24cb004 	sub	fp, ip, #4	; 0x4
    849c:	e24dd004 	sub	sp, sp, #4	; 0x4
    84a0:	e50b0010 	str	r0, [fp, #-16]
    84a4:	e51b2010 	ldr	r2, [fp, #-16]
    84a8:	e3a03012 	mov	r3, #18	; 0x12
    84ac:	e5823000 	str	r3, [r2]   // r3=0x12, r2=0
    84b0:	e89da808 	ldmia	sp, {r3, fp, sp, pc}

但在调试信息中没有把栈打印出来,同理的道理,在内核源码中搜索内核打印信息出错的信息"Stack:",在arm\kernel\traps.c中,其中表示如果是用户模式就不打印栈信息

static void __die(const char *str, int err, struct thread_info *thread, struct pt_regs *regs)
{
	struct task_struct *tsk = thread->task;
	static int die_counter;

	printk("Internal error: %s: %x [#%d]" S_PREEMPT S_SMP "\n",
	       str, err, ++die_counter);
	print_modules();
	__show_regs(regs);
	printk("Process %s (pid: %d, stack limit = 0x%p)\n",
		tsk->comm, tsk->pid, thread + 1);

	if (!user_mode(regs) || in_interrupt()) {
		dump_mem("Stack: ", regs->ARM_sp,
			 THREAD_SIZE + (unsigned long)task_stack_page(tsk));
		dump_backtrace(regs, tsk);
		dump_instr(regs);
	}
}

模仿并在__do_user_fault函数中把栈信息打印出来

static void
__do_user_fault(struct task_struct *tsk, unsigned long addr,
		unsigned int fsr, unsigned int sig, int code,
		struct pt_regs *regs)
{
	struct siginfo si;

#ifdef CONFIG_DEBUG_USER

	unsigned long ret
	unsigned long val;
	int i = 0;
	
	if (user_debug & UDBG_SEGV) {
		printk(KERN_DEBUG "%s: unhandled page fault (%d) at 0x%08lx, code 0x%03x\n",
		       tsk->comm, sig, addr, fsr);
		show_pte(tsk->mm, addr);
		show_regs(regs);

		printk("Stack: \n");
		while(i < 1024){
			if(copy_from_user(&val, (cost void __user *)(regs->ARM_sp + i * 4), 4))
				break;
			printk("%08x ", val);
			i++;
			if(i % 8 == 0)
				printk("\n");
		}
		printk("\n End of Statck\n");
	}
#endif

	tsk->thread.address = addr;
	tsk->thread.error_code = fsr;
	tsk->thread.trap_no = 14;
	si.si_signo = sig;
	si.si_errno = 0;
	si.si_code = code;
	si.si_addr = (void __user *)addr;
	force_sig_info(sig, &si, tsk);
}

 重新编译内核并启动新内核,执行test_debug,就看到了栈信息

/ # ./test_debug
a = 0x12
pgd = c3cc8000
[00000000] *pgd=3047f031, *pte=00000000, *ppte=00000000

Pid: 737, comm:           test_debug
CPU: 0    Not tainted  (2.6.22.6 #17)
PC is at 0x84ac
LR is at 0x84d0
pc : [<000084ac>]    lr : [<000084d0>]    psr: 60000010
sp : be901e60  ip : be901e74  fp : be901e70
r10: 4013365c  r9 : 00000000  r8 : 00008514
r7 : 00000001  r6 : 000085cc  r5 : 00008568  r4 : be901ee4
r3 : 00000012  r2 : 00000000  r1 : 00001000  r0 : 00000000
Flags: nZCv  IRQs on  FIQs on  Mode USER_32  Segment user
Control: c000717f  Table: 33cc8000  DAC: 00000015
[<c002bd9c>] (show_regs+0x0/0x4c) from [<c0030aa0>] (__do_user_fault+0x64/0x144)
 r4:c04b1d40
[<c0030a3c>] (__do_user_fault+0x0/0x144) from [<c0030dd8>] (do_page_fault+0x1dc/0x20c)
[<c0030bfc>] (do_page_fault+0x0/0x20c) from [<c002a2b4>] (do_DataAbort+0x3c/0xa0)
[<c002a278>] (do_DataAbort+0x0/0xa0) from [<c002aec8>] (ret_from_exception+0x0/0x10)
Exception stack(0xc0487fb0 to 0xc0487ff8)
7fa0:                                     00000000 00001000 00000000 00000012
7fc0: be901ee4 00008568 000085cc 00000001 00008514 00000000 4013365c be901e70
7fe0: be901e74 be901e60 000084d0 000084ac 60000010 ffffffff
 r8:00008514 r7:00000001 r6:000085cc r5:00008568 r4:c0385208
Stack:
00000000 be901e84 be901e74 000084d0 000084a0 00000000 be901e98 be901e88
000084f0 000084c4 00000000 be901eb8 be901e9c 00008554 000084e4 00000000
00000012 be901ee4 00000001 00000000 be901ebc 40034f14 00008524 00000000
00000000 0000839c 00000000 00000000 4001d594 000083c4 000085cc 4000c02c
be901ee4 be901f8f 00000000 be901f9c be901fa6 be901fad be901fb8 be901fdb
be901fe9 00000000 00000010 00000003 00000006 00001000 00000011 00000064
00000003 00008034 00000004 00000020 00000005 00000006 00000007 40000000
00000008 00000000 00000009 0000839c 0000000b 00000000 0000000c 00000000
0000000d 00000000 0000000e 00000000 00000017 00000000 0000000f be901f8b
00000000 00000000 76000000 2e006c34 7365742f 65645f74 00677562 52455355
6f6f723d 4f480074 2f3d454d 52455400 74763d4d 00323031 48544150 62732f3d
2f3a6e69 2f727375 6e696273 69622f3a 752f3a6e 622f7273 53006e69 4c4c4548
69622f3d 68732f6e 44575000 2e002f3d 7365742f 65645f74 00677562 00000000

 End of Statck
Segmentation fault

根据反汇编可以知道回溯函数调用关系(参考驱动调试),最终回溯到main函数中lr的值为40034f14,此地址为动态库的地址,通过gdb来确定地址,输入info file命令查看,然后输入回车把剩下的地址都打印出来,但是动态链接,已经退出的程序不好确定动态库的地址

(gdb) info file
Symbols from "/home/book/xiaoma/dev_demo/28th_app_debug/test_debug".
Remote serial target in gdb-specific protocol:
Debugging a target over a serial line.
        While running this, GDB does not access memory from...
Local exec file:
        `/home/book/xiaoma/dev_demo/28th_app_debug/test_debug', file type elf32-littlearm.
        Entry point: 0x839c
        0x000080f4 - 0x00008107 is .interp
        0x00008108 - 0x00008128 is .note.ABI-tag
        0x00008128 - 0x00008168 is .hash
        0x00008168 - 0x00008218 is .dynsym
        0x00008218 - 0x000082c6 is .dynstr
        0x000082c6 - 0x000082dc is .gnu.version
        0x000082dc - 0x000082fc is .gnu.version_r
        0x000082fc - 0x00008324 is .rel.dyn
        0x00008324 - 0x00008344 is .rel.plt
        0x00008344 - 0x00008358 is .init
        0x00008358 - 0x0000839c is .plt
        0x0000839c - 0x0000866c is .text
        0x0000866c - 0x00008678 is .fini
        0x00008678 - 0x00008688 is .rodata
        0x00010688 - 0x00010694 is .data
        0x00010694 - 0x00010698 is .eh_frame
        0x00010698 - 0x00010760 is .dynamic
        0x00010760 - 0x00010768 is .ctors
        0x00010768 - 0x00010770 is .dtors
        0x00010770 - 0x00010774 is .jcr
        0x00010774 - 0x000107a4 is .got
        0x000107a4 - 0x000107a8 is .bss
        0x40000114 - 0x40000138 is .note.gnu.build-id in /lib/ld-linux.so.2
        0x40000138 - 0x400001fc is .hash in /lib/ld-linux.so.2
        0x400001fc - 0x400002e4 is .gnu.hash in /lib/ld-linux.so.2
        0x400002e4 - 0x400004c4 is .dynsym in /lib/ld-linux.so.2
        0x400004c4 - 0x4000066d is .dynstr in /lib/ld-linux.so.2
        0x4000066e - 0x400006aa is .gnu.version in /lib/ld-linux.so.2
        0x400006ac - 0x40000774 is .gnu.version_d in /lib/ld-linux.so.2
        0x40000774 - 0x400007dc is .rel.dyn in /lib/ld-linux.so.2
        0x400007dc - 0x400007fc is .rel.plt in /lib/ld-linux.so.2
        0x40000800 - 0x40000850 is .plt in /lib/ld-linux.so.2
        0x40000850 - 0x40000858 is .plt.got in /lib/ld-linux.so.2
        0x40000860 - 0x400198dd is .text in /lib/ld-linux.so.2
        0x400198e0 - 0x4001db60 is .rodata in /lib/ld-linux.so.2
        0x4001db60 - 0x4001e224 is .eh_frame_hdr in /lib/ld-linux.so.2
        0x4001e224 - 0x400221c8 is .eh_frame in /lib/ld-linux.so.2
        0x40023c80 - 0x40023f40 is .data.rel.ro in /lib/ld-linux.so.2
        0x40023f40 - 0x40023ff8 is .dynamic in /lib/ld-linux.so.2
---Type <return> to continue, or q <return> to quit---
        0x40023ff8 - 0x40024000 is .got in /lib/ld-linux.so.2
        0x40024000 - 0x4002401c is .got.plt in /lib/ld-linux.so.2
        0x40024020 - 0x40024858 is .data in /lib/ld-linux.so.2
        0x40024858 - 0x40024918 is .bss in /lib/ld-linux.so.2

但是根据静态链接来编译test_debug.c,再根据其反汇编就可以得到回溯到其调用关系,回溯之后main是被__libc_start_main这个函数调用的

arm-linux-gcc -o test_debug test_debug.c -static
arm-linux-objdump -D test_debug > test_debug.dis

四、自制系统调用

4.1 搭建

 

应用程序调用C库(glibc)写的函数open read write进入内核态,open read write是指就是执行swi #val1,swi #val2,swi #val3,这些swi指令会触发异常,CPU就会跳到异常向量的入口中去vector_swi(得到"导致异常的指令",取出里面的avl,根据val,调用对应的函数sys_open...一大堆),这些函数放在一个数组中,在arch\arm\kernel\entry-common.S中,个人的内核源码中CONFIG_OABI_COMPAT,CONFIG_AEABI,CONFIG_ARM_THUMB都没有配置,其中ldr    scno, [lr, #-4],这条指令得到swi指令,然后存放在scno寄存器中,然后检查是否是swi指令,在机器码码中27-24位都为1就表示位swi指令,后面sys_call_table就是那些函数的数组,bic    scno, scno, #0xff000000清除高八位,后面都值就是val值,最终得到函数指针给pc调用sys函数,val值可能从0开始,也可能从90000开始

ENTRY(vector_swi)
	sub	sp, sp, #S_FRAME_SIZE
	stmia	sp, {r0 - r12}			@ Calling r0 - r12
	add	r8, sp, #S_PC
	stmdb	r8, {sp, lr}^			@ Calling sp, lr
	mrs	r8, spsr			@ called from non-FIQ mode, so ok.
	str	lr, [sp, #S_PC]			@ Save calling PC
	str	r8, [sp, #S_PSR]		@ Save CPSR
	str	r0, [sp, #S_OLD_R0]		@ Save OLD_R0
	zero_fp

	/*
	 * Get the system call number.
	 */

#if defined(CONFIG_OABI_COMPAT)

	/*
	 * If we have CONFIG_OABI_COMPAT then we need to look at the swi
	 * value to determine if it is an EABI or an old ABI call.
	 */
#ifdef CONFIG_ARM_THUMB
	tst	r8, #PSR_T_BIT
	movne	r10, #0				@ no thumb OABI emulation
	ldreq	r10, [lr, #-4]			@ get SWI instruction
#else
	ldr	r10, [lr, #-4]			@ get SWI instruction
  A710(	and	ip, r10, #0x0f000000		@ check for SWI		)
  A710(	teq	ip, #0x0f000000						)
  A710(	bne	.Larm710bug						)
#endif

#elif defined(CONFIG_AEABI)

	/*
	 * Pure EABI user space always put syscall number into scno (r7).
	 */
  A710(	ldr	ip, [lr, #-4]			@ get SWI instruction	)
  A710(	and	ip, ip, #0x0f000000		@ check for SWI		)
  A710(	teq	ip, #0x0f000000						)
  A710(	bne	.Larm710bug						)

#elif defined(CONFIG_ARM_THUMB)

	/* Legacy ABI only, possibly thumb mode. */
	tst	r8, #PSR_T_BIT			@ this is SPSR from save_user_regs
	addne	scno, r7, #__NR_SYSCALL_BASE	@ put OS number in
	ldreq	scno, [lr, #-4]

#else

	/* Legacy ABI only. */
	ldr	scno, [lr, #-4]			@ get SWI instruction
  A710(	and	ip, scno, #0x0f000000		@ check for SWI		)
  A710(	teq	ip, #0x0f000000						)
  A710(	bne	.Larm710bug						)

#endif

#ifdef CONFIG_ALIGNMENT_TRAP
	ldr	ip, __cr_alignment
	ldr	ip, [ip]
	mcr	p15, 0, ip, c1, c0		@ update control register
#endif
	enable_irq

	get_thread_info tsk
	adr	tbl, sys_call_table		@ load syscall table pointer
	ldr	ip, [tsk, #TI_FLAGS]		@ check for syscall tracing

#if defined(CONFIG_OABI_COMPAT)

	/*
	 * If the swi argument is zero, this is an EABI call and we do nothing.
	 *
	 * If this is an old ABI call, get the syscall number into scno and
	 * get the old ABI syscall table address.
	 */
	bics	r10, r10, #0xff000000
	eorne	scno, r10, #__NR_OABI_SYSCALL_BASE
	ldrne	tbl, =sys_oabi_call_table
#elif !defined(CONFIG_AEABI)
	bic	scno, scno, #0xff000000		@ mask off SWI op-code
	eor	scno, scno, #__NR_SYSCALL_BASE	@ check OS number
#endif

	stmdb	sp!, {r4, r5}			@ push fifth and sixth args
	tst	ip, #_TIF_SYSCALL_TRACE		@ are we tracing syscalls?
	bne	__sys_trace

	cmp	scno, #NR_syscalls		@ check upper syscall limit
	adr	lr, ret_fast_syscall		@ return address
	ldrcc	pc, [tbl, scno, lsl #2]		@ call sys_* routine  @//tbl数组的基地址,scno相对号码,lsl左移2位,一个函数指针占据4个字节

-----------------------------------------

#define __NR_OABI_SYSCALL_BASE	0x900000

#if defined(__thumb__) || defined(__ARM_EABI__)
#define __NR_SYSCALL_BASE	0
#else
#define __NR_SYSCALL_BASE	__NR_OABI_SYSCALL_BASE
#endif

在内核中搜索系统函数,在arch\arm\kernel\calls.S涉及,其中就是以数组的呈现,在后面仿造一项,写一个sys_hello,并且sys_hello属于352个

/* 0 */		CALL(sys_restart_syscall)
		CALL(sys_exit)
		CALL(sys_fork_wrapper)
		CALL(sys_read)
		CALL(sys_write)
/* 5 */		CALL(sys_open)
		CALL(sys_close)
		CALL(sys_ni_syscall)		/* was sys_waitpid */
		CALL(sys_creat)

...
/* 340 */	CALL(sys_splice)
		CALL(sys_sync_file_range2)
		CALL(sys_tee)
		CALL(sys_vmsplice)
		CALL(sys_move_pages)
/* 345 */	CALL(sys_getcpu)
		CALL(sys_ni_syscall)		/* eventually epoll_pwait */
		CALL(sys_kexec_load)
		CALL(sys_utimensat)
		CALL(sys_signalfd)
/* 350 */	CALL(sys_timerfd)
		CALL(sys_eventfd)
        CALL(sys_hello)
....

其头文件的申明在include\linux\syscalls.h中,sys函数在fs\read_write.c中呈现,申明,并仿造一个sys_hello函数

----------------------------------------
syscalls.h
asmlinkage ssize_t sys_readahead(int fd, loff_t offset, size_t count);
asmlinkage ssize_t sys_readv(unsigned long fd,
				const struct iovec __user *vec,
				unsigned long vlen);
asmlinkage ssize_t sys_write(unsigned int fd, const char __user *buf,
				size_t count);

asmlinkage void sys_hello(const char __user * buf, size_t count);

----------------------------------------
read_write.c
asmlinkage ssize_t sys_write(unsigned int fd, const char __user * buf, size_t count)
{
	struct file *file;
	ssize_t ret = -EBADF;
	int fput_needed;

	file = fget_light(fd, &fput_needed);
	if (file) {
		loff_t pos = file_pos_read(file);
		ret = vfs_write(file, buf, count, &pos);
		file_pos_write(file, pos);
		fput_light(file, fput_needed);
	}

	return ret;
}

asmlinkage void sys_hello(const char __user * buf, size_t count)
{
	char ker_buf[100];
	if(buf){
		copy_from_user(ker_buf, buf, (count < 100) ? count : 100);
		ker_buf[99] = '\0';
		printk("sys_hello : %s\n", ker_buf);
	}
}

在C库中参考sysdeps\unix\sysv\linux\arm\brk.c,里面用嵌入汇编的方法实现系统调用,a1实际上就是r0,一个别名,%1代表r第一个数addr,%2代表i(立即数),表示SYS_ify (brk)这是为一个立即数,最后把a0赋给%0就是newbrk变量,=表示接受输出的结果

int
__brk (void *addr)
{
  void *newbrk;

  asm ("mov a1, %1\n"	/* save the argment in r0 */
       "swi %2\n"	/* do the system call */
       "mov %0, a1;"	/* keep the return value */
       : "=r"(newbrk)  //输出
       : "r"(addr), "i" (SYS_ify (brk))  //输入
       : "a1"); //输入

  __curbrk = newbrk;

  if (newbrk < addr)
    {
      __set_errno (ENOMEM);
      return -1;
    }

  return 0;
}

仿造其方法,写一个应用程序调用系统函数,输入buf和count,SYS_ify为一个宏到系统调用,其定义为基地址加上值,对于hello就是基地址加上352,返回值不需要管,其中有三个冒号,第一个是输出(不能忽略),第二是输入,第三个表示过程中会改变的寄存器,在编译过程中没有sysdep.h此头文件,在C库查找中没有这个文件,这里直接不包含,唯一不确定的是__NR_SYSCALL_BASE这个宏,在内核中是0x900000,在这里直接定义

#include <errno.h>
#include <unistd.h>
//#include <sysdep.h>

//#if defined(__thumb__) || defined(__ARM_EABI__)
//#define __NR_SYSCALL_BASE	0
//#else
#define __NR_SYSCALL_BASE	0x900000
//#endif

void hello(char *buf, int count)
{
	/* swi */

	asm ("mov r0, %0\n"   /* save the argment in r0 */
	     "mov r1, %1\n"   /* save the argment in r0 */
		 "swi %2\n"   /* do the system call */
		 :	
		 : "r"(buf), "r"(count), "i" (__NR_SYSCALL_BASE + 352)
		 : "r0", "r1");
}

int main(int argc, char **argv)
{
	printf("in app, call hello\n");
	hello("xiaoma", 6);
	return 0;
}

重新编译内核,使用新内核启动,执行应用程序,看到了sys_hello

/ # ./test_system_call
in app, call hello
sys_hello : xiaoma

4.2 使用 

对于gdb和gdbserver需要依赖PC和单板都在有网络的情况,使用自制的系统调用打断点,这方法不常用,用这方法可以用来处理特难的问题

  1. 修改应用程序的可执行文件,替换"某个位置"的代码为swi val指令
  2. 执行程序
  3. 进入sys_hello,在sys_hello打印信息,执行原来的指令,返回

写一个简单的应用程序test_sc_sleep.c,如下

#include <stdio.h>
#include <unistd.h>

int cnt = 0;

void C(void)
{
	int i = 0;

	while (1)
	{
		printf("Hello, cnt = %d, i = %d\n", cnt, i);
		cnt++;
		i = i + 2;
        sleep(5);
	}
}

void B(void)
{
	C();
}


void A(void)
{
	B();
}

int main(int argc, char **argv)
{
	A();
	return 0;
}

替换:先看反汇编,确定机器码,test_sc_sleep.dis中在C函数把e2833002机器码替换为swi指令,相当于在上面i = i + 2的地方打个断点,而swi的指令,反汇编之前的test_system_call.c文件,得到swi指令为ef900160 ,在执行文件中test_sc_sleep搜索e2833002机器码替换为ef900160  

000084c0 <C>:
    84c0:	e1a0c00d 	mov	ip, sp
    84c4:	e92dd800 	stmdb	sp!, {fp, ip, lr, pc}
    84c8:	e24cb004 	sub	fp, ip, #4	; 0x4
    84cc:	e24dd004 	sub	sp, sp, #4	; 0x4
    84d0:	e3a03000 	mov	r3, #0	; 0x0
    84d4:	e50b3010 	str	r3, [fp, #-16]
    84d8:	e59f3038 	ldr	r3, [pc, #56]	; 8518 <.text+0x14c>
    84dc:	e59f0038 	ldr	r0, [pc, #56]	; 851c <.text+0x150>
    84e0:	e5931000 	ldr	r1, [r3]
    84e4:	e51b2010 	ldr	r2, [fp, #-16]
    84e8:	ebffffb1 	bl	83b4 <.text-0x18>
    84ec:	e59f2024 	ldr	r2, [pc, #36]	; 8518 <.text+0x14c>
    84f0:	e59f3020 	ldr	r3, [pc, #32]	; 8518 <.text+0x14c>
    84f4:	e5933000 	ldr	r3, [r3]
    84f8:	e2833001 	add	r3, r3, #1	; 0x1
    84fc:	e5823000 	str	r3, [r2]
    8500:	e51b3010 	ldr	r3, [fp, #-16]
    8504:	e2833002 	add	r3, r3, #2	; 0x2
                        // 因为这条指令比较简单
                        // 把它替换为swi指令"ef900160        swi     0x00900160"
    
    8508:	e50b3010 	str	r3, [fp, #-16]
    850c:	e3a00005 	mov	r0, #5	; 0x5
    8510:	ebffffaa 	bl	83c0 <.text-0xc>
    8514:	eaffffef 	b	84d8 <C+0x18>
    8518:	000107c8 	andeq	r0, r1, r8, asr #15
    851c:	00008688 	andeq	r8, r0, r8, lsl #13


----------------

0001078c <cnt>:
   1078c:	00000000 	andeq	r0, r0, r0
Disassembly of section .comment:

修改系统调用打印出变量的值,在test_sc_sleep的反汇编里cnt的变量的地址为0001078c执行替换的指令add    r3, r3, #2    ; 0x2,对于这条指令需要我们找到当前进程的寄存器,这个寄存器的值就存放在pt_regs中,在内核源码中搜索pt_regs,同时在其结果中搜索current,这里使用source insight,在反汇编中r3加1就把值存放在r2后才执行的swi指令,因此执行test_sc_sleep_swi(修改了swi指令的执行程序)cnt值为0,在sys_hello就是打印出来的就是1,同理对于局部变量来说i是改变了值后执行了swi指令再把值存放到地址中,因此i打印出来的值在hello中是一致的

asmlinkage void sys_hello(const char __user * buf, size_t count)
{
	static int cnt = 0;
	int val;
	int ret;
	struct pt_regs *regs; 
	
	/* 1. 输出一些调试信息 */
	/* 应用程序test_sc_sleep的反汇编里: 000107c8 <cnt>: */
	copy_from_user(&val, (const void __user *)0x000107c8, 4);
	printk("sys_hello: cnt = %d\n", val);

	/* 2. 执行被替换的指令: add	r3, r3, #2	; 0x2 */
	/* 搜 pt_regs , 在它的结果里再搜 current */
	regs = task_pt_regs(current); // 用task_pt_regs这个宏得到当前进程的pt_regs
	regs->ARM_r3 += 2;

	/* 打印局部变量i */
	copy_from_user(&val, (const void __user *)(regs->ARM_fp - 16), 4);// 局部变量的地址在反汇编可知
	printk("sys_hello: i = %d\n", val);

#if 0	
	/* 3. 返回 */
	if (++cnt == 5)
	{
		copy_from_user(&val, (const void __user *)0x8504, 4); //0x8504是反汇编中修改的指令的地址
		printk("[0x8504] code = 0x%x\n", val);
		printk("regs->ARM_lr  = 0x%x\n", regs->ARM_lr);
		val = 0xe2833002;
		//if (copy_to_user((const void __user *)0x8504, &val, 4))
		//	printk("restore code error!\n");
		ret = access_process_vm(current, 0x8504, &val, 4, 1);
		printk("access_process_vm ret = %d\n", ret);
		
		cnt = 0;
	}
#endif
	
	return;
}

执行修改了swi执行的test_sc_sleep_swi,结果如下,能将我们需要的值打印出来

/ # ./test_sc_sleep_swi

Hello, cnt = 0, i = 0
sys_hello: cnt = 1
sys_hello: i = 0
Hello, cnt = 1, i = 2
sys_hello: cnt = 2
sys_hello: i = 2
Hello, cnt = 2, i = 4
sys_hello: cnt = 3
sys_hello: i = 4

  • 1
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值