rvv向量化 epi-builtins-ref C语言intrinsics接口学习

本文详细介绍了RISC-V向量指令集中的向量类型、掩码类型以及多个基本运算指令如vfadd、vfdiv等的原型和操作过程,为初学者提供了一个学习RISC-V向量化编程的起点。
摘要由CSDN通过智能技术生成

纠结了很久的riscv vector指令集的向量化编程,不知从何入手,那就一点一点学习吧

Vector Types
向量类型命名规则:

__epi_<factor>x<ty>
  • factor是向量相对于VLEN/ELEN的元素个数。
    __epi_2xf32和Lepi_2xf64具有相同数量的元素,但不同的元素类型。
  • ty是元素类型。
    __epi_2xf32和__epi_4xf32具有不同数量的元素,但元素类型相同。

向量类型接口定义
Mask Types
掩码类型与LMUL无关,因为它们总是使用单个向量寄存器,但是值仍然是有用的,掩码的元素类型为i1。
掩码类型
例如,两个__epi_2x之间的关系操作将计算_epi_2xi1类型的掩码。

还有一种混合类型,有需要的读者可自行查看官方文档

指令详解
Instruction:
vsetvli
Prototypes:

unsigned long int __builtin_epi_vsetvl(unsigned long int rvl,
							/* constant */ unsigned long int sew,
							/* constant */ unsigned long int lmul);

Operation:

gvl = compute_vector_length(rvl, sew, lmul)
result = gvl

Instruction
vsetvli
Prototypes:

	unsigned long int
	__builtin_epi_vsetvlmax(/* constant */ unsigned long int sew,	
							/* constant */ unsigned long int lmul);

Operation:

gvl = compute_vlmax(sew, lmul)
result = gvl

Instruction:
vfadd.vv
Prototypes:

	__epi_2xf32 __builtin_epi_vfadd_2xf32(__epi_2xf32 a, __epi_2xf32 b,
											unsigned long int gvl);
											
	__epi_1xf64 __builtin_epi_vfadd_1xf64(__epi_1xf64 a, __epi_1xf64 b,
											unsigned long int gvl);
											
	__epi_4xf32 __builtin_epi_vfadd_4xf32(__epi_4xf32 a, __epi_4xf32 b,		
											unsigned long int gvl);
											
	__epi_2xf64 __builtin_epi_vfadd_2xf64(__epi_2xf64 a, __epi_2xf64 b,		
											unsigned long int gvl);
											
	__epi_8xf32 __builtin_epi_vfadd_8xf32(__epi_8xf32 a, __epi_8xf32 b,	
											unsigned long int gvl);
											
	__epi_4xf64 __builtin_epi_vfadd_4xf64(__epi_4xf64 a, __epi_4xf64 b,	
											unsigned long int gvl);
											
	__epi_16xf32 __builtin_epi_vfadd_16xf32(__epi_16xf32 a, __epi_16xf32 b,
											unsigned long int gvl);
											
	__epi_8xf64 __builtin_epi_vfadd_8xf64(__epi_8xf64 a, __epi_8xf64 b,
											unsigned long int gvl);

Operation:

for element = 0 to gvl - 1
result[element] = a[element] + b[element]
result[gvl : VLMAX] = 0

部分指令还有掩码操作类型,主要是为了处理分支,在此不作过多介绍,可自行查看文档,后面对于Prototypes也只选取其中一种类型,只要搞懂了向量类型的定义,就很容易能够理解

Instruction:
vfdiv.vv
Prototypes:

__epi_2xf32 __builtin_epi_vfdiv_2xf32(__epi_2xf32 a, __epi_2xf32 b,
										unsigned long int gvl);

Operation:

for element = 0 to gvl - 1
result[element] = a[element] / b[element]
result[gvl : VLMAX] = 0

Instruction:
vfmacc.vv
Prototypes:

__epi_2xf32 __builtin_epi_vfmacc_2xf32(__epi_2xf32 c, __epi_2xf32 a,
											__epi_2xf32 b, unsi	gned long int gvl);

Operation:

for element = 0 to gvl - 1
result[element] = a[element] * b[element] + c[element]
result[gvl : VLMAX] = 0

Instructions:
vfmadd.vv
Prototypes:

__epi_2xf32 __builtin_epi_vfmadd_2xf32(__epi_2xf32 a, __epi_2xf32 b,
									__epi_2xf32 c, unsigned long int gvl);

Operation:

for element = 0 to gvl - 1
result[element] = a[element] * b[element] + c[element]
result[gvl : VLMAX] = 0

这里的vfmadd.vv和vfmacc.vv指令操作貌似是一样的,可能这个文档有问题,查看riscv vector sepc手册,发现这两条指令并不相同,如下:
在这里插入图片描述
Instruction:
vfmax.vv
Prototypes:

__epi_2xf32 __builtin_epi_vfmax_2xf32(__epi_2xf32 a, __epi_2xf32 b,
										unsigned long int gvl);

Operation:

for element = 0 to gvl - 1
result[element] = max(a[element], b[element])
result[gvl : VLMAX] = 0

Instruction:
vfmin.vv
Prototypes:

__epi_2xf32 __builtin_epi_vfmin_2xf32(__epi_2xf32 a, __epi_2xf32 b,	
										unsigned long int gvl);

Operation:

for element = 0 to gvl - 1
result[element] = min(a[element], b[element])
result[gvl : VLMAX] = 0

Instruction:
vfmsac.vv
Prototypes:

__epi_2xf32 __builtin_epi_vfmsac_2xf32(__epi_2xf32 c, __epi_2xf32 a,
								__epi_2xf32 b, unsigned long int gvl);

Operation:

for element = 0 to gvl - 1
result[element] = a[element] * b[element] - c[element]
result[gvl : VLMAX] = 0

Instruction:
vfmsub.vv
Prototypes:

__epi_2xf32 __builtin_epi_vfmsub_2xf32(__epi_2xf32 a, __epi_2xf32 b,
								__epi_2xf32 c, unsigned long int gvl);

Operation:

for element = 0 to gvl - 1
result[element] = a[element] * b[element] - c[element]
result[gvl : VLMAX] = 0

这两条指令的实现也有问题,与上面的vfmacc.vv与vfmadd.vv类似

Instructions:
vfmul.vv
Prototypes:

__epi_2xf32 __builtin_epi_vfmul_2xf32(__epi_2xf32 a, __epi_2xf32 b,
										unsigned long int gvl);

Operation:

for element = 0 to gvl - 1
result[element] = a[element] * b[element]
result[gvl : VLMAX] = 0

Instruction:
vfnmacc.vv
Prototypes:

__epi_2xf32 __builtin_epi_vfnmacc_2xf32(__epi_2xf32 c, __epi_2xf32 a,
							__epi_2xf32 b, unsigned long int gvl);

Operation:

for element = 0 to gvl - 1
result[element] = -( a[element] * b[element] ) - c[element]
result[gvl : VLMAX] = 0

Instruction:
vfnmadd.vv
Prototypes:

__epi_2xf32 __builtin_epi_vfnmadd_2xf32(__epi_2xf32 a, __epi_2xf32 b,
							__epi_2xf32 c, unsigned long int gvl);

Operation:

for element = 0 to gvl - 1
result[element] = -( a[element] * b[element] ) - c[element]
result[gvl : VLMAX] = 0

Instruction:
vfnmsac.vv
Prototypes:

__epi_2xf32 __builtin_epi_vfnmsac_2xf32(__epi_2xf32 c, __epi_2xf32 a,
							__epi_2xf32 b, unsigned long int gvl);

Operation:

for element = 0 to gvl - 1
result[element] = -( a[element] * b[element] ) + c[element]
result[gvl : VLMAX] = 0

Instruction:
vfnmsub.vv
Prototypes:

__epi_2xf32 __builtin_epi_vfnmsub_2xf32(__epi_2xf32 a, __epi_2xf32 b,
							__epi_2xf32 c, unsigned long int gvl);

Operation:

for element = 0 to gvl - 1
result[element] = a[element] * b[element] - c[element]
result[gvl : VLMAX] = 0

Instruction:
vfredmax.vs
Prototypes:

__epi_2xf32 __builtin_epi_vfredmax_2xf32(__epi_2xf32 a, __epi_2xf32 b,
										unsigned long int gvl);

Operation:

if gvl > 0:
	current_max = b[0]
	for element = 0 to gvl - 1
		current_max = max(current_max, a[element])
	result[0] = current_max
	result[1 : VLMAX] = 0

Instruction:
vfredmin.vs
Prototypes:

__epi_2xf32 __builtin_epi_vfredmin_2xf32(__epi_2xf32 a, __epi_2xf32 b,
									unsigned long int gvl);

Operation:

if gvl > 0:
	current_min = b[0]
	for element = 0 to gvl - 1
		current_min = max(current_min, a[element])
	result[0] = current_min
	result[1 : VLMAX] = 0

Instruction:
vfredosum.vs
Prototypes:

__epi_2xf32 __builtin_epi_vfredosum_2xf32(__epi_2xf32 a, __epi_2xf32 b,
									unsigned long int gvl);

Operation:

if gvl > 0:
	current_sum = b[0]
	for element = 0 to gvl - 1
		current_sum = current_sum + a[element]
	result[0] = current_sum
	result[1 : VLMAX] = 0

剩余指令在此不再赘述,想继续学习的同学,可自行查看阅读pdf(已上传),根据我的总结的模板,阅读起来会轻松容易

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

清枫♚

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值