一个简单的vector叠加示例
GCC version 4.7.0
#include <stdlib.h>
#include <stdint.h>
#include <unistd.h>
#include <x86intrin.h>
typedef union i4factor{
__v4si v;
int u4[4];
}
__attribute__((aligned(16)))
i4factor_t;
//typedef int v4sf __attribute__ ((mode(V4SF))); // vector of four single floats
#if 0
int a[4] = {1, 2, 3, 4};
int b[4] = {4, 3, 2, 1};
void vectorAdd(void)
{
int i = 0;
for (i = 0; i < 4; i++) {
a[i] = a[i] + b[i];
}
}
#else
i4factor_t a, b, c;
void vectorAdd(void)
{
//c = __builtin_addv4si(a, b);
c.v = a.v + b.v;
}
#endif
int main(int argc, char **argv)
{
a.u4[0] = 1;
a.u4[1] = 2;
a.u4[2] = 3;
a.u4[3] = 4;
b.u4[0] = 4;
b.u4[1] = 3;
b.u4[2] = 2;
b.u4[3] = 1;
vectorAdd();
//printf("%d, %d, %d, %d\n", a[0], a[1], a[2], a[3]);
printf("%d, %d, %d, %d\n", c.u4[0], c.u4[1], c.u4[2], c.u4[3]);
return 0;
}
编译
# gcc -march=core2 -O2 -pipe -ggdb -c v4si.c -o v4si.o
# gcc v4si.o -o v4si
执行
# ./v4si
5, 5, 5, 5
objdump 查看是使用了SIMD指令
# objdump -dS v4si.o |grep -22 c.v |tail -25
void vectorAdd(void)
{
//c = __builtin_addv4si(a, b);
c.v = a.v + b.v;
63: 66 0f fe 05 00 00 00 paddd 0x0(%rip),%xmm0 # 6b <main+0x6b>
6a: 00
6b: 66 0f 7f 05 00 00 00 movdqa %xmm0,0x0(%rip) # 73 <main+0x73>
72: 00