Linux下VFP NEON浮点编译

最新推荐文章于 2024-06-29 02:22:35 发布

liujia2100

最新推荐文章于 2024-06-29 02:22:35 发布

阅读量1.1w

点赞数 2

分类专栏：浮点&VFP&NEON

本文链接：https://blog.csdn.net/liujia2100/article/details/27236477

版权

浮点&VFP&NEON 专栏收录该内容

3 篇文章 0 订阅

订阅专栏

NEON:SIMD(Single Instruction Multiple Data 单指令多重数据) 指令集，其针对多媒体和讯号处理程式具备标准化的加速能力。

VFP: (Vector Float Point), 向量浮点运算单元，arm11（s3c6410 支持VFPv2），Cortex-A8（s5pv210）支持VFPv3.

NEON和VFPv3 浮点协处理器共享寄存器组，所以在汇编时，指令是一样的。

编译选项：

-mfpu = name（neon or vfpvx）指定FPU 单元

-mfloat-abi = name（soft、hard、 softfp）：指定软件浮点或硬件浮点或兼容软浮点调用接口

如果只指定 -mfpu，那么默认编译不会选择选择硬件浮点指令集

如果只指定 -mfloat-abi = hard或者softfp，那么编译会使用硬件浮点指令集

测试C文件

int main(void)
{
	float f1, f2, f3;
	f1 = 1.2;
	f2 = 1.3;
	f3 = f1 / f2;
	return 0;
}

1、 arm-eabi-gcc -S hello.c -mfpu=neon

	.arch armv5te
	.fpu softvfp
	.eabi_attribute 20, 1
	.eabi_attribute 21, 1
	.eabi_attribute 23, 3
	.eabi_attribute 24, 1
	.eabi_attribute 25, 1
	.eabi_attribute 26, 2
	.eabi_attribute 30, 6
	.eabi_attribute 18, 4
	.file	"hello.c"
	.global	__aeabi_fdiv
	.text
	.align	2
	.global	main
	.type	main, %function
main:
	.fnstart
.LFB0:
	@ args = 0, pretend = 0, frame = 16
	@ frame_needed = 1, uses_anonymous_args = 0
	stmfd	sp!, {fp, lr}
	.save {fp, lr}
.LCFI0:
	.setfp fp, sp, #4
	add	fp, sp, #4
.LCFI1:
	.pad #16
	sub	sp, sp, #16
.LCFI2:
	ldr	r3, .L3	@ float
	str	r3, [fp, #-16]	@ float
	ldr	r3, .L3+4	@ float
	str	r3, [fp, #-12]	@ float
	ldr	r0, [fp, #-16]	@ float
	ldr	r1, [fp, #-12]	@ float
	bl	__aeabi_fdiv
	mov	r3, r0
	str	r3, [fp, #-8]	@ float
	mov	r3, #0
	mov	r0, r3
	sub	sp, fp, #4
	ldmfd	sp!, {fp, pc}
.L4:
	.align	2
.L3:
	.word	1067030938
	.word	1067869798
.LFE0:
	.fnend
	.size	main, .-main
	.ident	"GCC: (Sourcery G++ Lite 2009q3-67) 4.4.1"
	.section	.note.GNU-stack,"",%progbits

2、 arm-eabi-gcc -S hello.c -mfpu=vfp

	.arch armv5te
	.fpu softvfp
	.eabi_attribute 20, 1
	.eabi_attribute 21, 1
	.eabi_attribute 23, 3
	.eabi_attribute 24, 1
	.eabi_attribute 25, 1
	.eabi_attribute 26, 2
	.eabi_attribute 30, 6
	.eabi_attribute 18, 4
	.file	"hello.c"
	.global	__aeabi_fdiv
	.text
	.align	2
	.global	main
	.type	main, %function
main:
	.fnstart
.LFB0:
	@ args = 0, pretend = 0, frame = 16
	@ frame_needed = 1, uses_anonymous_args = 0
	stmfd	sp!, {fp, lr}
	.save {fp, lr}
.LCFI0:
	.setfp fp, sp, #4
	add	fp, sp, #4
.LCFI1:
	.pad #16
	sub	sp, sp, #16
.LCFI2:
	ldr	r3, .L3	@ float
	str	r3, [fp, #-16]	@ float
	ldr	r3, .L3+4	@ float
	str	r3, [fp, #-12]	@ float
	ldr	r0, [fp, #-16]	@ float
	ldr	r1, [fp, #-12]	@ float
	bl	__aeabi_fdiv
	mov	r3, r0
	str	r3, [fp, #-8]	@ float
	mov	r3, #0
	mov	r0, r3
	sub	sp, fp, #4
	ldmfd	sp!, {fp, pc}
.L4:
	.align	2
.L3:
	.word	1067030938
	.word	1067869798
.LFE0:
	.fnend
	.size	main, .-main
	.ident	"GCC: (Sourcery G++ Lite 2009q3-67) 4.4.1"
	.section	.note.GNU-stack,"",%progbits

可以看到上面两个例子，使用的是 .fpu softvfp

3、 arm-eabi-gcc -S hello.c -mfpu=neon -mfloat-abi=hard

	.arch armv5te
	.eabi_attribute 27, 3
	.eabi_attribute 28, 1
	.fpu neon
	.eabi_attribute 20, 1
	.eabi_attribute 21, 1
	.eabi_attribute 23, 3
	.eabi_attribute 24, 1
	.eabi_attribute 25, 1
	.eabi_attribute 26, 2
	.eabi_attribute 30, 6
	.eabi_attribute 18, 4
	.file	"hello.c"
	.text
	.align	2
	.global	main
	.type	main, %function
main:
	.fnstart
.LFB0:
	@ args = 0, pretend = 0, frame = 16
	@ frame_needed = 1, uses_anonymous_args = 0
	@ link register save eliminated.
	str	fp, [sp, #-4]!
	.save {fp}
.LCFI0:
	.setfp fp, sp, #0
	add	fp, sp, #0
.LCFI1:
	.pad #20
	sub	sp, sp, #20
.LCFI2:
	flds	s15, .L3
	fsts	s15, [fp, #-16]
	flds	s15, .L3+4
	fsts	s15, [fp, #-12]
	flds	s14, [fp, #-16]
	flds	s15, [fp, #-12]
	fdivs	s15, s14, s15
	fsts	s15, [fp, #-8]
	mov	r3, #0
	mov	r0, r3
	add	sp, fp, #0
	ldmfd	sp!, {fp}
	bx	lr
.L4:
	.align	2
.L3:
	.word	1067030938
	.word	1067869798
.LFE0:
	.fnend
	.size	main, .-main
	.ident	"GCC: (Sourcery G++ Lite 2009q3-67) 4.4.1"
	.section	.note.GNU-stack,"",%progbits

4、 arm-eabi-gcc -S hello.c -mfpu=neon -mfloat-abi=softfp

	.arch armv5te
	.eabi_attribute 27, 3
	.fpu neon
	.eabi_attribute 20, 1
	.eabi_attribute 21, 1
	.eabi_attribute 23, 3
	.eabi_attribute 24, 1
	.eabi_attribute 25, 1
	.eabi_attribute 26, 2
	.eabi_attribute 30, 6
	.eabi_attribute 18, 4
	.file	"hello.c"
	.text
	.align	2
	.global	main
	.type	main, %function
main:
	.fnstart
.LFB0:
	@ args = 0, pretend = 0, frame = 16
	@ frame_needed = 1, uses_anonymous_args = 0
	@ link register save eliminated.
	str	fp, [sp, #-4]!
	.save {fp}
.LCFI0:
	.setfp fp, sp, #0
	add	fp, sp, #0
.LCFI1:
	.pad #20
	sub	sp, sp, #20
.LCFI2:
	flds	s15, .L3
	fsts	s15, [fp, #-16]
	flds	s15, .L3+4
	fsts	s15, [fp, #-12]
	flds	s14, [fp, #-16]
	flds	s15, [fp, #-12]
	fdivs	s15, s14, s15
	fsts	s15, [fp, #-8]
	mov	r3, #0
	mov	r0, r3
	add	sp, fp, #0
	ldmfd	sp!, {fp}
	bx	lr
.L4:
	.align	2
.L3:
	.word	1067030938
	.word	1067869798
.LFE0:
	.fnend
	.size	main, .-main
	.ident	"GCC: (Sourcery G++ Lite 2009q3-67) 4.4.1"
	.section	.note.GNU-stack,"",%progbits

5、 arm-eabi-gcc -S hello.c -mfpu=vfpv3 -mfloat-abi=softfp

	.arch armv5te
	.eabi_attribute 27, 3
	.fpu vfpv3
	.eabi_attribute 20, 1
	.eabi_attribute 21, 1
	.eabi_attribute 23, 3
	.eabi_attribute 24, 1
	.eabi_attribute 25, 1
	.eabi_attribute 26, 2
	.eabi_attribute 30, 6
	.eabi_attribute 18, 4
	.file	"hello.c"
	.text
	.align	2
	.global	main
	.type	main, %function
main:
	.fnstart
.LFB0:
	@ args = 0, pretend = 0, frame = 16
	@ frame_needed = 1, uses_anonymous_args = 0
	@ link register save eliminated.
	str	fp, [sp, #-4]!
	.save {fp}
.LCFI0:
	.setfp fp, sp, #0
	add	fp, sp, #0
.LCFI1:
	.pad #20
	sub	sp, sp, #20
.LCFI2:
	flds	s15, .L3
	fsts	s15, [fp, #-16]
	flds	s15, .L3+4
	fsts	s15, [fp, #-12]
	flds	s14, [fp, #-16]
	flds	s15, [fp, #-12]
	fdivs	s15, s14, s15
	fsts	s15, [fp, #-8]
	mov	r3, #0
	mov	r0, r3
	add	sp, fp, #0
	ldmfd	sp!, {fp}
	bx	lr
.L4:
	.align	2
.L3:
	.word	1067030938
	.word	1067869798
.LFE0:
	.fnend
	.size	main, .-main
	.ident	"GCC: (Sourcery G++ Lite 2009q3-67) 4.4.1"
	.section	.note.GNU-stack,"",%progbits

6、 arm-eabi-gcc -S hello.c -mfpu=vfpv3 -mfloat-abi=hard

	.arch armv5te
	.eabi_attribute 27, 3
	.eabi_attribute 28, 1
	.fpu vfpv3
	.eabi_attribute 20, 1
	.eabi_attribute 21, 1
	.eabi_attribute 23, 3
	.eabi_attribute 24, 1
	.eabi_attribute 25, 1
	.eabi_attribute 26, 2
	.eabi_attribute 30, 6
	.eabi_attribute 18, 4
	.file	"hello.c"
	.text
	.align	2
	.global	main
	.type	main, %function
main:
	.fnstart
.LFB0:
	@ args = 0, pretend = 0, frame = 16
	@ frame_needed = 1, uses_anonymous_args = 0
	@ link register save eliminated.
	str	fp, [sp, #-4]!
	.save {fp}
.LCFI0:
	.setfp fp, sp, #0
	add	fp, sp, #0
.LCFI1:
	.pad #20
	sub	sp, sp, #20
.LCFI2:
	flds	s15, .L3
	fsts	s15, [fp, #-16]
	flds	s15, .L3+4
	fsts	s15, [fp, #-12]
	flds	s14, [fp, #-16]
	flds	s15, [fp, #-12]
	fdivs	s15, s14, s15
	fsts	s15, [fp, #-8]
	mov	r3, #0
	mov	r0, r3
	add	sp, fp, #0
	ldmfd	sp!, {fp}
	bx	lr
.L4:
	.align	2
.L3:
	.word	1067030938
	.word	1067869798
.LFE0:
	.fnend
	.size	main, .-main
	.ident	"GCC: (Sourcery G++ Lite 2009q3-67) 4.4.1"
	.section	.note.GNU-stack,"",%progbits

从上面可以看到，使用softfp和hard使用的指令集是一样的，都是硬件浮点， neon和vfp的区别，仅仅体现在.fpu vfpv3和.fpu neon.

7、 arm-eabi-gcc -S hello.c -mfloat-abi=hard

	.arch armv5te
	.eabi_attribute 27, 3
	.eabi_attribute 28, 1
	.fpu vfp
	.eabi_attribute 20, 1
	.eabi_attribute 21, 1
	.eabi_attribute 23, 3
	.eabi_attribute 24, 1
	.eabi_attribute 25, 1
	.eabi_attribute 26, 2
	.eabi_attribute 30, 6
	.eabi_attribute 18, 4
	.file	"hello.c"
	.text
	.align	2
	.global	main
	.type	main, %function
main:
	.fnstart
.LFB0:
	@ args = 0, pretend = 0, frame = 16
	@ frame_needed = 1, uses_anonymous_args = 0
	@ link register save eliminated.
	str	fp, [sp, #-4]!
	.save {fp}
.LCFI0:
	.setfp fp, sp, #0
	add	fp, sp, #0
.LCFI1:
	.pad #20
	sub	sp, sp, #20
.LCFI2:
	flds	s15, .L3
	fsts	s15, [fp, #-16]
	flds	s15, .L3+4
	fsts	s15, [fp, #-12]
	flds	s14, [fp, #-16]
	flds	s15, [fp, #-12]
	fdivs	s15, s14, s15
	fsts	s15, [fp, #-8]
	mov	r3, #0
	mov	r0, r3
	add	sp, fp, #0
	ldmfd	sp!, {fp}
	bx	lr
.L4:
	.align	2
.L3:
	.word	1067030938
	.word	1067869798
.LFE0:
	.fnend
	.size	main, .-main
	.ident	"GCC: (Sourcery G++ Lite 2009q3-67) 4.4.1"
	.section	.note.GNU-stack,"",%progbits

当直接使用-mfloat-abi=hard时，会默认使用.fpu vfp硬件浮点。