摘要: openblas主要用于矩阵运算的加速
1. openblas交叉编译
-
下载openblas,这里用0.3.7版本
https://github.com/xianyi/OpenBLAS -
编译
前提
- 需要安装好交叉编译工具链aarch64-linux-gnu
下载地址如下:
https://releases.linaro.org/components/toolchain/binaries/6.3-2017.05/aarch64-linux-gnu/
拷贝到编译服务器,海思交叉切换到该开源工具链,执行如下命令:
tar xf gcc-linaro-6.3.1-2017.05-x86_64_aarch64-linux-gnu.tar.xz
vi .profile
PATH="$HOME/bin:$PATH:$HOME/bin/gcc-linaro-6.3.1-2017.05-x86_64_aarch64-linux-gnu/bin"
source .profile
- 交叉编译器验证
aarch64-linux-gnu-gcc --version
- 交叉编译openblas
tar -zxvf OpenBLAS-0.3.7.tar.gz
cd OpenBLAS-0.3.7
make BINARY=64 CC=aarch64-linux-gnu-gcc NOFORTRAN=1 HOSTCC=gcc TARGET=ARMV8
make PREFIX=../openblas_install install
- 结果
到安装目录openblas_install,有lib, include, bin文件夹
.
├── bin
├── include
│ ├── cblas.h
│ ├── f77blas.h
│ ├── lapacke_config.h
│ ├── lapacke.h
│ ├── lapacke_mangling.h
│ ├── lapacke_utils.h
│ └── openblas_config.h
└── lib
├── cmake
│ └── openblas
│ ├── OpenBLASConfig.cmake
│ └── OpenBLASConfigVersion.cmake
├── libopenblas.a -> libopenblas_armv8p-r0.3.7.a
├── libopenblas_armv8p-r0.3.7.a
├── libopenblas_armv8p-r0.3.7.so
├── libopenblas.so -> libopenblas_armv8p-r0.3.7.so
├── libopenblas.so.0 -> libopenblas_armv8p-r0.3.7.so
└── pkgconfig
└── openblas.pc
2. 使用测试
code
#include <cblas.h>
#include <stdio.h>
int main() {
int i = 0;
double A[6] = {1.0,3.0,1.0,-3.0,4.0,-1.0};
double B[6] = {1.0,4.0,1.0,-3.0,4.0,-1.0};
double C[9] = {.5,.5,.5,1.5,.5,2.5,.5,.5,.5};
int M = 3; // row of A and C
int N = 3; // col of B and C
int K = 2; // col of A and row of B
double alpha = 1.0;
double beta = 0.0;
cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, A, K, B, N, beta, C, N);
//CblasRowMajor表示行优先
//CblasNoTrans表示不转置
for (i = 0; i < 9; i++) {
printf("%lf ", C[i]);
}
printf("\n");
return 1;
}
编译cmakelists.txt
cmake_minimum_required (VERSION 2.6)
project (TEST)
set (TEST_VERSION 0.1)
set(CMAKE_BUILD_TYPE "Debug")
set(CMAKE_CXX_FLAGS_DEBUG "$ENV{CXXFLAGS} -O0 -Wall -g -ggdb -DDEBUG")
set(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -O3 -Wall")
aux_source_directory(${PROJECT_SOURCE_DIR} DIR_SRC)
include_directories(/home/yangna/chenjun/HISI3559a/openblas_install/include) # 头文件
# link_directories(/home/yangna/chenjun/HISI3559a/openblas_install/lib)
find_library(Openblas_LIBS openblas /home/yangna/chenjun/HISI3559a/openblas_install/lib) # 库文件
add_executable(main ${DIR_SRC})
target_link_libraries(main ${Openblas_LIBS})
target_link_libraries(main -lm) # 这两个选项是必须要的
target_link_libraries(main -lpthread)
- cmake的写法和其他动态库的使用是一样的。添加头文件,和库文件
- 链接的时候加上
-lm -lpthread
两项
2.1 cmake+cxx_compiler指定
- 我们平时编译c++的项目就是:
mkdir build && cd build
cmake ..
make
- 上面是采用系统默认的g++进行编译的。我们编译hisi3559a的项目时,应该要用aarch64-linux-gnu-g++交叉编译器。可以通过cmake的CMAKE_CXX_COMPILER进行指定。于是交叉编译的命令为:
mkdir build && cd build
cmake -DCMAKE_CXX_COMPILER=/home/yangna/Atlas500_DDK/toolchains/Euler_compile_env_cross/arm/cross_compile/install/bin/aarch64-linux-gnu-g++ ..
make
输出
Scanning dependencies of target main
[ 50%] Building CXX object CMakeFiles/main.dir/main.cpp.o
[100%] Linking CXX executable main
[100%] Built target main
2.2 cmake+CMAKE_TOOLCHAIN_FILE指定
-
上面的方法在命令行中,要输入很长的路径,还不能复用。可以用cmake的cmake_toolchain_file进行指定
-
新建一个hisi3559.cmake文件(一般应用开发文档都会给出的),写入:
set(EULER_CROSS_PATH /home/yangna/Atlas500_DDK/toolchains/Euler_compile_env_cross)
set(CMAKE_SYSTEM_NAME Linux)
set(CMAKE_SYSTEM_PROCESSOR arm)
set(tools ${EULER_CROSS_PATH}/arm/cross_compile/install/)
#set(CMAKE_SYSROOT ${tools}/sysroot)
set(CMAKE_C_COMPILER ${tools}/bin/aarch64-linux-gnu-gcc)
set(CMAKE_CXX_COMPILER ${tools}/bin/aarch64-linux-gnu-g++)
set(CMAKE_AR ${tools}/bin/aarch64-linux-gnu-ar)
set(CMAKE_RANLIB ${tools}/bin/aarch64-linux-gnu-ranlib)
set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
- 编译。这样就简洁,复用。
mkdir build && cd build
cmake -DCMAKE_TOOLCHAIN_FILE=../atlas500_host.cmake ..
make
3. hisi3559上执行
- 将编译得到的可执行文件main传到hisi3559上
- 将openblas_install/lib文件上传hisi3559,并设置环境变量
vi /etc/profile
# for openblas lib , 2020-08-04
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/root/1_profile/openblas_lib
source /etc/profile
-
执行main
./main
-
结果
-8.000000 16.000000 -2.000000 10.000000 -8.000000 4.000000 7.000000 12.000000 5.000000