AVX指令集

最新推荐文章于 2025-02-19 15:28:39 发布

千千万万成长录

最新推荐文章于 2025-02-19 15:28:39 发布

阅读量1.3k

点赞数

文章标签： linux 运维服务器

原文链接：https://blog.csdn.net/mutourend/article/details/100074798 https://blog.csdn.net/zachariah2000/article/details/120731767

版权

使用Linux:

1.查看CPU所支持的指令集

对于Linux系统，可运行cat /proc/cpuinfo | grep flags来查看当前CPU所支持的指令集：

由此可知,我CPU支持AVX2指令集，但是不支持AVX-512指令集。

二、Ubuntu中运行问题一：编译错误

#include<immintrin.h>
#include<iostream>
#include<cmath>
#include <cstdio>
using namespace std;

void sinx(int, int, float*, float*);
void print_MM(__m256);

int main()
{
        int N = 8, terms = 3;
        float x[8] = { 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8 }, result[8];
        sinx(N, terms, x, result);
        for (int i = 0; i < N; ++i)
        {
                printf("sin(%.3f) = %.10f;%.10f\n", x[i], result[i], sin(x[i]));
        }
        return 0;
}

void sinx(int N, int terms, float* x, float* result)
{
        float three_fact = 6;
        for (int i = 0; i < N; i += 8)
        {
                __m256 origx = _mm256_loadu_ps(&x[i]);
                print_MM(origx);
                __m256 value = origx;
                __m256 numer = _mm256_mul_ps(origx, _mm256_mul_ps(origx, origx));
                __m256 denom = _mm256_broadcast_ss(&three_fact);


                int sign = -1;

                for (int j = 1; j <= terms; j++)
                {
                        //value += sign * numer / denom
                        __m256 tmp1 = _mm256_div_ps(_mm256_mul_ps(_mm256_set1_ps(sign), numer), denom);
                        value = _mm256_add_ps(value, tmp1);
                        numer = _mm256_mul_ps(numer, _mm256_mul_ps(origx, origx));
                        float tmp2 = (float)((2 * j + 2) * (2 * j + 3));
                        denom = _mm256_mul_ps(denom, _mm256_broadcast_ss(&tmp2));
                        sign *= -1;
                }
                _mm256_storeu_ps(&result[i], value);
        }
}
void print_MM(__m256 test)
{
        float out[8];
        _mm256_storeu_ps(&out[0], test);
        for (int i = 0; i < 8; ++i)
        {
                cout << out[i] << "  ";
        }
        cout << endl;
}

将这份代码（上面代码已经是改正过的代码）复制到Linux系统利用如下命令编译