用汇编实现浮点数的乘法运算---FPU

最新推荐文章于 2024-02-07 17:05:03 发布

p.c.wang

最新推荐文章于 2024-02-07 17:05:03 发布

阅读量6.7k

点赞数

分类专栏： ARM

ARM 专栏收录该内容

24 篇文章 1 订阅

订阅专栏

#include <stdio.h>
#include <sys/time.h> 


#define INIT_TIMER_VALIABLE \
 struct timeval tpstart,tpend; \   
    float timeuse;   

#define START_TIMER gettimeofday(&tpstart,NULL);

#define END_PRINTF_TIMER(name) \         
       gettimeofday(&tpend,NULL); \
       timeuse=(tpend.tv_sec*1000*1000+tpend.tv_usec)-(tpstart.tv_sec*1000*1000+tpstart.tv_usec);   \
       printf("func :%s:time use(us) %f\n",name,timeuse); 
       
       
float vfp_operate(float f1, float f2)
{
	float sum=0;
	__asm__ __volatile__(
	   "vmov  s1, %1\n"
	   "vmov  s2, %2\n"
	   "fmuls  s0, s1, s2\n"
	   "vmov  %0, s0\n"
	   :"=r"(sum)
	   :"r"(f1),"r"(f2)
	);
	return sum;
}
int main()
{
      float f1,f2;
      float result;
      INIT_TIMER_VALIABLE
      
    printf("input float data1:");
    scanf("%f",&f1);
    printf("input float data2:");
    scanf("%f",&f2);
    
    START_TIMER
    result=f1*f2;
    END_PRINTF_TIMER("use system function")
    printf("result is %f\n",result);
    
    START_TIMER
    result=vfp_operate(f1,f2);
    END_PRINTF_TIMER("use vfp_operate")
   printf("result is %f\n",result); 	
}
/*
测试结果：
./vfp_helloworld 
input float data1:0.125
input float data2:1.684
func :use system function:time use(us) 12.000000
result is 0.210500
func :use system function:time use(us) 5.000000
result is 0.210500


*/

测试使用编译选项：

源代码：
#include <stdio.h>
int main()
{
    float f1,f2;
    printf("input float data1:");
    scanf("%f",&f1);
    printf("input float data2:");
    scanf("%f",&f2);
    
    printf("float %f x %f =%f\n", f1,f2,f1*f2);
}

编译选项：(只是多增加了-mfloat-abi=softfp 测试发现=sofltfp和=hard编译出的汇编一样)
arm-none-linux-gnueabi-gcc vfp_helloworld.c -S          arm-none-linux-gnueabi-gcc vfp_helloworld.c -S -o vfp_helloworld.asm         
-mfloat-abi=softfp -o vfp_helloworld.asm
 	.cpu arm10tdmi                                            	.cpu arm10tdmi                                     
	.eabi_attribute 27, 3                                     	.fpu softvfp                                       
	.fpu vfp                                                  	.eabi_attribute 20, 1                              
	.eabi_attribute 20, 1                                     	.eabi_attribute 21, 1                              
	.eabi_attribute 21, 1                                     	.eabi_attribute 23, 3                              
	.eabi_attribute 23, 3                                     	.eabi_attribute 24, 1                              
	.eabi_attribute 24, 1                                     	.eabi_attribute 25, 1                              
	.eabi_attribute 25, 1                                     	.eabi_attribute 26, 2                              
	.eabi_attribute 26, 2                                     	.eabi_attribute 30, 6                              
	.eabi_attribute 30, 6                                     	.eabi_attribute 18, 4                              
	.eabi_attribute 18, 4                                     	.file	"vfp_helloworld.c"                           
	.file	"vfp_helloworld.c"                                  	.section	.rodata                                  
	.section	.rodata                                         	.align	2                                          
	.align	2                                                 .LC0:                                                
.LC0:                                                       	.ascii	"input float data1:\000"                   
	.ascii	"input float data1:\000"                          	.align	2                                          
	.align	2                                                 .LC1:                                                
.LC1:                                                       	.ascii	"%f\000"                                   
	.ascii	"%f\000"                                          	.align	2                                          
	.align	2                                                 .LC2:                                                
.LC2:                                                       	.ascii	"input float data2:\000"                   
	.ascii	"input float data2:\000"                          	.global	__aeabi_f2d                                
	.align	2                                                 	.global	__aeabi_fmul    <<<------这里说明软件浮点运算。                              
.LC3:                                                       	.align	2                                          
	.ascii	"float %f x %f =%f\012\000"                       .LC3:                                                
	.text                                                     	.ascii	"float %f x %f =%f\012\000"                
	.align	2                                                 	.text                                              
	.global	main                                              	.align	2                                          
	.type	main, %function                                     	.global	main                                       
main:                                                       	.type	main, %function                              
	.fnstart                                                  main:                                                
.LFB2:                                                      	.fnstart                                           
	@ args = 0, pretend = 0, frame = 16                       .LFB2:                                               
	@ frame_needed = 1, uses_anonymous_args = 0               	@ args = 0, pretend = 0, frame = 16                
	stmfd	sp!, {fp, lr}                                       	@ frame_needed = 1, uses_anonymous_args = 0        
	.save {fp, lr}                                            	stmfd	sp!, {r4, r5, r6, r7, r8, fp, lr}            
.LCFI0:                                                     	.save {r4, r5, r6, r7, r8, fp, lr}                 
	.setfp fp, sp, #4                                         .LCFI0:                                              
	add	fp, sp, #4                                            	.setfp fp, sp, #24                                 
.LCFI1:                                                     	add	fp, sp, #24                                    
	.pad #32                                                  .LCFI1:                                              
	sub	sp, sp, #32                                           	.pad #36                                           
.LCFI2:                                                     	sub	sp, sp, #36                                    
	ldr	r0, .L3                                               .LCFI2:                                              
	bl	printf                                                	ldr	r0, .L3                                        
	sub	r3, fp, #8                                            	bl	printf                                         
	ldr	r0, .L3+4                                             	sub	r3, fp, #32                                    
	mov	r1, r3                                                	ldr	r0, .L3+4                                      
	bl	scanf                                                 	mov	r1, r3                                         
	ldr	r0, .L3+8                                             	bl	scanf                                          
	bl	printf                                                	ldr	r0, .L3+8                                      
	sub	r3, fp, #12                                           	bl	printf                                         
	ldr	r0, .L3+4                                             	sub	r3, fp, #36                                    
	mov	r1, r3                                                	ldr	r0, .L3+4                                      
	bl	scanf                                                 	mov	r1, r3                                         
	flds	s15, [fp, #-8]                                      	bl	scanf                                          
	fcvtds	d5, s15                                           	ldr	r3, [fp, #-32]	@ float                        
	flds	s15, [fp, #-12]                                     	mov	r0, r3                                         
	fcvtds	d6, s15                                           	bl	__aeabi_f2d                                    
	flds	s14, [fp, #-8]                                      	mov	r5, r0                                         
	flds	s15, [fp, #-12]                                     	mov	r6, r1                                         
	fmuls	s15, s14, s15  <<<-----直接使用硬件浮点指令              	ldr	r3, [fp, #-36]	@ float                        
	fcvtds	d7, s15                                           	mov	r0, r3                                         
	fstd	d6, [sp, #0]                                        	bl	__aeabi_f2d                                    
	fstd	d7, [sp, #8]                                        	mov	r7, r0                                         
	ldr	r0, .L3+12                                            	mov	r8, r1                                         
	fmrrd	r2, r3, d5                                          	ldr	r3, [fp, #-32]	@ float                        
	bl	printf                                                	ldr	r2, [fp, #-36]	@ float                        
	sub	sp, fp, #4                                            	mov	r0, r3                                         
	ldmfd	sp!, {fp, pc}                                       	mov	r1, r2                                         
.L4:                                                        	bl	__aeabi_fmul   <<<------这里调用软件浮点运算。                                         
	.align	2                                                 	mov	r3, r0                                         
.L3:                                                        	mov	r0, r3                                         
	.word	.LC0                                                	bl	__aeabi_f2d                                    
	.word	.LC1                                                	mov	r3, r0                                         
	.word	.LC2                                                	mov	r4, r1                                         
	.word	.LC3                                                	stmia	sp, {r7-r8}                                  
.LFE2:                                                      	str	r3, [sp, #8]                                   
	.fnend                                                    	str	r4, [sp, #12]                                  
	.size	main, .-main                                        	ldr	r0, .L3+12                                     
	.ident	"GCC: (Sourcery G++ Lite 2009q1-203) 4.3.3"       	mov	r2, r5                                         
	.section	.note.GNU-stack,"",%progbits                    	mov	r3, r6                                         
                                                            	bl	printf                                         
                                                            	sub	sp, fp, #24                                    
                                                            	ldmfd	sp!, {r4, r5, r6, r7, r8, fp, pc}            
                                                            .L4:                                                 
                                                            	.align	2                                          
                                                            .L3:                                                 
                                                            	.word	.LC0                                         
                                                            	.word	.LC1                                         
                                                            	.word	.LC2                                         
                                                            	.word	.LC3                                         
                                                            .LFE2:                                               
                                                            	.fnend                                             
                                                            	.size	main, .-main                                 
                                                            	.ident	"GCC: (Sourcery G++ Lite 2009q1-203) 4.3.3"
                                                            	.section	.note.GNU-stack,"",%progbits

摘自：RM的pdf文档的说明

浮点运算的支持

ARM 处理器内核不包含浮点硬件。必须使用以下两种方法之一，另行提供对浮点算法的支持：
在软件中，使用浮点库 fplib。此库提供了执行浮点运算可以调用的函数，无需额外的硬件。请参阅《库指南》中第 4-2 页的软件浮点库 fplib。
在硬件中，使用含 VFP 硬件协处理器的 ARM 处理器内核来进行所需的浮点运算。 VFP 是执行 IEEE 浮点的协处理器体系结构，支持单精度和双精度，但不支持扩展精度。
Note
在实际编程中，VFP 中的浮点运算实际是组合使用硬件（执行常见的情况）和软件（处理不常见的情况和导致异常的情况）执行的。请参阅VFP 支持。
Example 5.2 是一个用 C 执行浮点算法的函数，用以说明浮点算法的软件和硬件支持的不同。
Example 5.2. 浮点运算
float foo(float num1, float num2)
{
float temp, temp2;
temp = num1 + num2;
temp2 = num2 * num2;
return temp2-temp;
}
如果使用命令行选项 --cpu 5TE --fpu softvfp 编译Example 5.2 的 C 代码，则编译器生成的机器代码的反汇编如Example 5.3 所示。在本示例中，在软件中通过调用库例程（如 __aeabi_fmul）来执行浮点算法。
Example 5.3. 软件中对浮点运算的支持
||foo|| PROC
PUSH {r4-r6, lr}
MOV r4, r1
BL __aeabi_fadd <<<<----直接向加
MOV r5, r0
MOV r1, r4
MOV r0, r4
BL __aeabi_fmul <<<<<----然后相乘
MOV r1, r5
POP {r4-r6, lr}
B __aeabi_fsub
ENDP

如果使用命令行选项 --fpu vfp 编译Example 5.2 的 C 代码，则编译器生成的机器代码的反汇编如Example 5.4 所示。在本示例中，在硬件中通过浮点算法指令（如 VMUL.F32）来执行浮点算法。
Example 5.4. 硬件中对浮点运算的支持
||foo|| PROC
VADD.F32 s2, s0, s1
VMUL.F32 s0, s1, s1
VSUB.F32 s0, s0, s2
BX lr
ENDP

在实际编程中，使用硬件支持浮点算法的代码更为紧凑，并提供比在软件中执行浮点算法的代码更佳的性能。但是，浮点算法的硬件支持需要 VFP 协处理器。

缺省情况下，如果有 VFP 协处理器，则会生成 VFP 指令。如果没有 VFP 协处理器，则编译器会生成调用软件浮点库 fplib 的代码，用于执行浮点运算。fplib 是 C 库 RealView Development Suite 标准分发的组成部分。

转自：http://blog.csdn.net/sno_guo/article/details/8472809

p.c.wang

关注

0
点赞
踩
7

收藏

觉得还不错? 一键收藏
0
评论
用汇编实现浮点数的乘法运算---FPU

#include #include #define INIT_TIMER_VALIABLE \ struct timeval tpstart,tpend; \ float timeuse; #define START_TIMER gettimeofday(&tpstart,NULL);#define END_PRINTF_TIMER(name) \
复制链接

扫一扫