上一篇博文中 电池温度检测原理和示例代码 ,由于驱动要使用对数函数而从网上参看一个实现
double ln(double a) { int N = 15; int k,nk; double x,xx,y; x = (a-1)/(a+1); xx = x*x; nk = 2*N+1; y = 1.0/nk; for(k=N;k>0;k--) { nk = nk - 2; y = 1.0/nk+xx*y; } return 2.0*x*y; }
尴尬的是当时内核不支持浮点运算所以停滞了, 时间紧迫只能折中先列出各个温度和电压对应表, 通过查表方式处理, 今天着重了解一下为何内核使用浮点运算“前世今生”
一、早期处理器
由于早期处理器硬件压根没有浮点运算功能, 所以编译器自然不会编译出对应指令, 但是确实有需要的场景, 无论应用程序还是内核驱动, 所有出现了浮点模拟器,具体配置在:
原理很简单, 就是编译器对浮点运算代码还是编译对应“指令”, 当运行时ARM指令集没有该“指令”从而导致指令异常, 然后在异常处理函数中调用浮点模拟器函数计算该“指令”并返回结果
优点不言而喻是能够支持浮点需求场景, 对应用程序还是驱动都透明;缺点也显而易见, 这种“指令异常” 方式会增加系统无效负载, 影响中断响应时间等。
Linus Torvalds认为内核不应该进行浮点运算, 所以后来的内核版本menuconfig我没看到有这个浮点模拟器, 相比于内核应用程序使用到浮点运算需求更大, 总不能“躺枪”也不能用吧?
所以就出现了个软浮点库, 编译器在编译应用程序发现这是一条浮点运算时会用函数进行替代! 比如上面ln()函数的x = (a-1)/(a+1); 减法 加法 最后相除都调用函数解决!
x = (a-1)/(a+1); 反汇编如下: 84c8: e3a0300f mov r3, #15 84cc: e50b3020 str r3, [fp, #-32] ; 0xffffffe0 84d0: e14b03dc ldrd r0, [fp, #-60] ; 0xffffffc4 84d4: e3a02000 mov r2, #0 84d8: e3a035ff mov r3, #1069547520 ; 0x3fc00000 84dc: e2833603 add r3, r3, #3145728 ; 0x300000 84e0: eb000188 bl 8b08 <__aeabi_dsub> 84e4: e1a02000 mov r2, r0 84e8: e1a03001 mov r3, r1 84ec: e1a04002 mov r4, r2 84f0: e1a05003 mov r5, r3 84f4: e14b03dc ldrd r0, [fp, #-60] ; 0xffffffc4 84f8: e3a02000 mov r2, #0 84fc: e3a035ff mov r3, #1069547520 ; 0x3fc00000 8500: e2833603 add r3, r3, #3145728 ; 0x300000 8504: eb000180 bl 8b0c <__adddf3> 8508: e1a02000 mov r2, r0 850c: e1a03001 mov r3, r1 8510: e1a00004 mov r0, r4 8514: e1a01005 mov r1, r5 8518: eb000301 bl 9124 <__aeabi_ddiv>
这很好理解, 毕竟应用使用浮点频率较高, 靠浮点模拟器总产生异常应用态切成内核态处理完再返回用户态, 太耗性能了。
二、现代处理器
现代处理器硬件已经支持浮点运算, 而且作为一个组件对待, 可选配, 根据需求和方案选型处理器时评估是否需要硬件支持。在STM系列浮点硬件单元叫做FPU(float process unit)吧,
ARM中归属协处理器那边的。所以我们知道如果要使用硬件浮点的话, 需要满足三个条件:
1. 硬件要有浮点运算单元 2. 软件要配置使能硬件浮点,比如Linux内核要选中下面配置(主要设置协处理器)
3. 编译器指定-mfloat-abi=softfp 或-mfloat-abi=hard 编译浮点指令而不是用函数替换!
同样是上面ln()函数的x = (a-1)/(a+1); 采用了硬浮点指令后反汇编代码如下:
x = (a-1)/(a+1); 反汇编如下:
84b0: ed1b6b0d vldr d6, [fp, #-52] ; 0xffffffcc 84b4: ed9f7b33 vldr d7, [pc, #204] ; 8588 <ln+0xf0> 84b8: ee365b47 vsub.f64 d5, d6, d7 84bc: ed1b6b0d vldr d6, [fp, #-52] ; 0xffffffcc 84c0: ed9f7b30 vldr d7, [pc, #192] ; 8588 <ln+0xf0> 84c4: ee366b07 vadd.f64 d6, d6, d7 84c8: ee857b06 vdiv.f64 d7, d5, d6
三、示例代码
我们分别写个应用程序和驱动程序, 且分别用软浮点和硬浮点测试其性能
1.应用程序
代码如下:
/* arm-none-linux-gnueabi-gcc -mfloat-abi=soft application_test.c -lrt -o soft.bin * arm-none-linux-gnueabi-gcc -mfloat-abi=softfp application_test.c -lrt -o softfp.bin * arm-none-linux-gnueabi-gcc -mfloat-abi=hard application_test.c -lrt fail!! */ #include<stdio.h> #include<time.h> double ln(double a) { int N = 15; int k,nk; double x,xx,y; x = (a-1)/(a+1); xx = x*x; nk = 2*N+1; y = 1.0/nk; for(k=N;k>0;k--) { nk = nk - 2; y = 1.0/nk+xx*y; } return 2.0*x*y; } int main() { int t1; double Rt, Vadc; struct timespec time_start={ 0, 0},time_end={ 0, 0}; clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &time_start); /* 根据采样电压反推热敏电阻的温度 */ for(Vadc=1; Vadc<1800; Vadc++) { Rt = Vadc * 47000 / (1800-Vadc); t1=1/(ln(Rt/10000)/3950+1/298.15)-273.15; } clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &time_end); printf("start time %ld s,%ld ns\n", time_start.tv_sec, time_start.tv_nsec); printf("end time %ld s,%ld ns\n", time_end.tv_sec, time_end.tv_nsec); printf("duration:%ld s %ld ns\n", time_end.tv_sec-time_start.tv_sec, time_end.tv_nsec-time_start.tv_nsec); printf("%.10fmv = %d\n", Vadc, t1); return 0; }
反汇编可以看出硬浮点直接指令, 软浮点是用函数替换而且是静态链接, 导致bin文件比较大
/* arm-none-linux-gnueabi-objdump -d soft.bin > objdump_soft.txt */ soft.bin: file format elf32-littlearm Disassembly of section .init: 00008388 <_init>: 8388: e92d4008 push {r3, lr} 838c: eb000024 bl 8424 <call_gmon_start> 8390: e8bd8008 pop {r3, pc} Disassembly of section .plt: 00008394 <.plt>: 8394: e52de004 push {lr} ; (str lr, [sp, #-4]!) 8398: e59fe004 ldr lr, [pc, #4] ; 83a4 <_init+0x1c> 839c: e08fe00e add lr, pc, lr 83a0: e5bef008 ldr pc, [lr, #8]! 83a4: 00009168 .word 0x00009168 83a8: e28fc600 add ip, pc, #0, 12 83ac: e28cca09 add ip, ip, #36864 ; 0x9000 83b0: e5bcf168 ldr pc, [ip, #360]! ; 0x168 83b4: e28fc600 add ip, pc, #0, 12 83b8: e28cca09 add ip, ip, #36864 ; 0x9000 83bc: e5bcf160 ldr pc, [ip, #352]! ; 0x160 83c0: e28fc600 add ip, pc, #0, 12 83c4: e28cca09 add ip, ip, #36864 ; 0x9000 83c8: e5bcf158 ldr pc, [ip, #344]! ; 0x158 83cc: e28fc600 add ip, pc, #0, 12 83d0: e28cca09 add ip, ip, #36864 ; 0x9000 83d4: e5bcf150 ldr pc, [ip, #336]! ; 0x150 83d8: e28fc600 add ip, pc, #0, 12 83dc: e28cca09 add ip, ip, #36864 ; 0x9000 83e0: e5bcf148 ldr pc, [ip, #328]! ; 0x148 Disassembly of section .text: 000083e8 <_start>: 83e8: e3a0b000 mov fp, #0 83ec: e3a0e000 mov lr, #0 83f0: e49d1004 pop {r1} ; (ldr r1, [sp], #4) 83f4: e1a0200d mov r2, sp 83f8: e52d2004 push {r2} ; (str r2, [sp, #-4]!) 83fc: e52d0004 push {r0} ; (str r0, [sp, #-4]!) 8400: e59fc010 ldr ip, [pc, #16] ; 8418 <_start+0x30> 8404: e52dc004 push {ip} ; (str ip, [sp, #-4]!) 8408: e59f000c ldr r0, [pc, #12] ; 841c <_start+0x34> 840c: e59f300c ldr r3, [pc, #12] ; 8420 <_start+0x38> 8410: ebffffe7 bl 83b4 <_init+0x2c> 8414: ebffffef bl 83d8 <_init+0x50> 8418: 00009328 .word 0x00009328 841c: 00008628 .word 0x00008628 8420: 00009260 .word 0x00009260 00008424 <call_gmon_start>: 8424: e59f0014 ldr r0, [pc, #20] ; 8440 <call_gmon_start+0x1c> 8428: e59f2014 ldr r2, [pc, #20] ; 8444 <call_gmon_start+0x20> 842c: e08f3000 add r3, pc, r0 8430: e7933002 ldr r3, [r3, r2] 8434: e3530000 cmp r3, #0 8438: 012fff1e bxeq lr 843c: eaffffdf b 83c0 <_init+0x38> 8440: 000090d8 .word 0x000090d8 8444: 00000020 .word 0x00000020 00008448 <__do_global_dtors_aux>: 8448: e59f3010 ldr r3, [pc, #16] ; 8460 <__do_global_dtors_aux+0x18> 844c: e5d32000 ldrb r2, [r3] 8450: e3520000 cmp r2, #0 8454: 03a02001 moveq r2, #1 8458: 05c32000 strbeq r2, [r3] 845c: e12fff1e bx lr 8460: 00011538 .word 0x00011538 00008464 <frame_dummy>: 8464: e59f0020 ldr r0, [pc, #32] ; 848c <frame_dummy+0x28> 8468: e92d4008 push {r3, lr} 846c: e5903000 ldr r3, [r0] 8470: e3530000 cmp r3, #0 8474: 08bd8008 popeq {r3, pc} 8478: e59f3010 ldr r3, [pc, #16] ; 8490 <frame_dummy+0x2c> 847c: e3530000 cmp r3, #0 8480: 08bd8008 popeq {r3, pc} 8484: e12fff33 blx r3 8488: e8bd8008 pop {r3, pc} 848c: 00011410 .word 0x00011410 ... 00008498 <ln>: 8498: e92d4830 push {r4, r5, fp, lr} 849c: e28db00c add fp, sp, #12 84a0: e24dd030 sub sp, sp, #48 ; 0x30 84a4: e14b03fc strd r0, [fp, #-60] ; 0xffffffc4 84a8: e3a0300f mov r3, #15 84ac: e50b3020 str r3, [fp, #-32] ; 0xffffffe0 84b0: e14b03dc ldrd r0, [fp, #-60] ; 0xffffffc4 84b4: e3a02000 mov r2, #0 84b8: e3a035ff mov r3, #1069547520 ; 0x3fc00000 84bc: e2833603 add r3, r3, #3145728 ; 0x300000 84c0: eb0000fc bl 88b8 <__aeabi_dsub> 84c4: e1a02000 mov r2, r0 84c8: e1a03001 mov r3, r1 84cc: e1a04002 mov r4, r2 84d0: e1a05003 mov r5, r3 84d4: e14b03dc ldrd r0, [fp, #-60] ; 0xffffffc4 84d8: e3a02000 mov r2, #0 84dc: e3a035ff mov r3, #1069547520 ; 0x3fc00000 84e0: e2833603 add r3, r3, #3145728 ; 0x300000 84e4: eb0000f4 bl 88bc <__adddf3> 84e8: e1a02000 mov r2, r0 84ec: e1a03001 mov r3, r1 84f0: e1a00004 mov r0, r4 84f4: e1a01005 mov r1, r5 84f8: eb000275 bl 8ed4 <__aeabi_ddiv> 84fc: e1a02000 mov r2, r0 8500: e1a03001 mov r3, r1 8504: e14b22fc strd r2, [fp, #-44] ; 0xffffffd4 8508: e14b02dc ldrd r0, [fp, #-44] ; 0xffffffd4 850c: e14b22dc ldrd r2, [fp, #-44] ; 0xffffffd4 8510: eb0001d4 bl 8c68 <__aeabi_dmul> 8514: e1a02000 mov r2, r0 8518: e1a03001 mov r3, r1 851c: e14b23f4 strd r2, [fp, #-52] ; 0xffffffcc 8520: e51b3020 ldr r3, [fp, #-32] ; 0xffffffe0 8524: e1a03083 lsl r3, r3, #1 8528: e2833001 add r3, r3, #1 852c: e50b3014 str r3, [fp, #-20] ; 0xffffffec 8530: e51b0014 ldr r0, [fp, #-20] ; 0xffffffec 8534: eb000194 bl 8b8c <__aeabi_i2d> 8538: e1a02000 mov r2, r0 853c: e1a03001 mov r3, r1 8540: e3a00000 mov r0, #0 8544: e3a015ff mov r1, #1069547520 ; 0x3fc00000 8548: e2811603 add r1, r1, #3145728 ; 0x300000 854c: eb000260 bl 8ed4 <__aeabi_ddiv> 8550: e1a02000 mov r2, r0 8554: e1a03001 mov r3, r1 8558: e14b21fc strd r2, [fp, #-28] ; 0xffffffe4 855c: e51b3020 ldr r3, [fp, #-32] ; 0xffffffe0 8560: e50b3010 str r3, [fp, #-16] 8564: ea00001c b 85dc <ln+0x144> 8568: e51b3014 ldr r3, [fp, #-20] ; 0xffffffec 856c: e2433002 sub r3, r3, #2 8570: e50b3014 str r3, [fp, #-20] ; 0xffffffec 8574: e51b0014 ldr r0, [fp, #-20] ; 0xffffffec 8578: eb000183 bl 8b8c <__aeabi_i2d> 857c: e1a02000 mov r2, r0 8580: e1a03001 mov r3, r1 8584: e3a00000 mov r0, #0 8588: e3a015ff mov r1, #1069547520 ; 0x3fc00000 858c: e2811603 add r1, r1, #3145728 ; 0x300000 8590: eb00024f bl 8ed4 <__aeabi_ddiv> 8594: e1a02000 mov r2, r0 8598: e1a03001 mov r3, r1 859c: e1a04002 mov r4, r2 85a0: e1a05003 mov r5, r3 85a4: e14b03d4 ldrd r0, [fp, #-52] ; 0xffffffcc 85a8: e14b21dc ldrd r2, [fp, #-28] ; 0xffffffe4 85ac: eb0001ad bl 8c68 <__aeabi_dmul> 85b0: e1a02000 mov r2, r0 85b4: e1a03001 mov r3, r1 85b8: e1a00004 mov r0, r4 85bc: e1a01005 mov r1, r5 85c0: eb0000bd bl 88bc <__adddf3> 85c4: e1a02000 mov r2, r0 85c8: e1a03001 mov r3, r1 85cc: e14b21fc strd r2, [fp, #-28] ; 0xffffffe4 85d0: e51b3010 ldr r3, [fp, #-16] 85d4: e2433001 sub r3, r3, #1 85d8: e50b3010 str r3, [fp, #-16] 85dc: e51b3010 ldr r3, [fp, #-16] 85e0: e3530000 cmp r3, #0 85e4: caffffdf bgt 8568 <ln+0xd0> 85e8: e14b22dc ldrd r2, [fp, #-44] ; 0xffffffd4 85ec: e1a00002 mov r0, r2 85f0: e1a01003 mov r1, r3 85f4: eb0000b0 bl 88bc <__adddf3> 85f8: e1a02000 mov r2, r0 85fc: e1a03001 mov r3, r1 8600: e1a00002 mov r0, r2 8604: e1a01003 mov r1, r3 8608: e14b21dc ldrd r2, [fp, #-28] ; 0xffffffe4 860c: eb000195 bl 8c68 <__aeabi_dmul> 8610: e1a02000 mov r2, r0 8614: e1a03001 mov r3, r1 8618: e1a00002 mov r0, r2 861c: e1a01003 mov r1, r3 8620: e24bd00c sub sp, fp, #12 8624: e8bd8830 pop {r4, r5, fp, pc} 00008628 <main>: 8628: e92d4830 push {r4, r5, fp, lr} 862c: e28db00c add fp, sp, #12 8630: e24dd030 sub sp, sp, #48 ; 0x30 8634: e3a03000 mov r3, #0 8638: e50b302c str r3, [fp, #-44] ; 0xffffffd4 863c: e3a03000 mov r3, #0 8640: e50b3028 str r3, [fp, #-40] ; 0xffffffd8 8644: e3a03000 mov r3, #0 8648: e50b3034 str r3, [fp, #-52] ; 0xffffffcc 864c: e3a03000 mov r3, #0 8650: e50b3030 str r3, [fp, #-48] ; 0xffffffd0 8654: e3a00002 mov r0, #2 8658: e24b302c sub r3, fp, #44 ; 0x2c 865c: e1a01003 mov r1, r3 8660: ebffff59 bl 83cc <_init+0x44> 8664: e3a02000 mov r2, #0 8668: e3a035ff mov r3, #1069547520 ; 0x3fc00000 866c: e2833603 add r3, r3, #3145728 ; 0x300000 8670: e14b21fc strd r2, [fp, #-28] ; 0xffffffe4 8674: ea000046 b 8794 <main+0x16c> 8678: e14b01dc ldrd r0, [fp, #-28] ; 0xffffffe4 867c: e28f3f7b add r3, pc, #492 ; 0x1ec 8680: e1c320d0 ldrd r2, [r3] 8684: eb000177 bl 8c68 <__aeabi_dmul> 8688: e1a02000 mov r2, r0 868c: e1a03001 mov r3, r1 8690: e1a04002 mov r4, r2 8694: e1a05003 mov r5, r3 8698: e28f1f76 add r1, pc, #472 ; 0x1d8 869c: e1c100d0 ldrd r0, [r1] 86a0: e14b21dc ldrd r2, [fp, #-28] ; 0xffffffe4 86a4: eb000083 bl 88b8 <__aeabi_dsub> 86a8: e1a02000 mov r2, r0 86ac: e1a03001 mov r3, r1 86b0: e1a00004 mov r0, r4 86b4: e1a01005 mov r1, r5 86b8: eb000205 bl 8ed4 <__aeabi_ddiv> 86bc: e1a02000 mov r2, r0 86c0: e1a03001 mov r3, r1 86c4: e14b22f4 strd r2, [fp, #-36] ; 0xffffffdc 86c8: e14b02d4 ldrd r0, [fp, #-36] ; 0xffffffdc 86cc: e28f3f6b add r3, pc, #428 ; 0x1ac 86d0: e1c320d0 ldrd r2, [r3] 86d4: eb0001fe bl 8ed4 <__aeabi_ddiv> 86d8: e1a02000