1,源码下载
git clone --recursive https://github.com/ROCm/HIPIFY.git
sudo apt install clang-dev
2,编译并安装
2.1 通常方式
hipify-clang 文档:
https://github.com/ROCm/HIPIFY/blob/amd-staging/docs/hipify-clang.md
编译命令:
cmake -DCMAKE_INSTALL_PREFIX=../dist -DCMAKE_BUILD_TYPE=Release ..
make -j install
此时 hipify-clang 会被安装到 HIPIFY/dist/bin 中,
测试:
cd ../dist/bin
hipify --help
如果系统中存在多个llvm版本,在执行翻译命令时,比如hipify-clang ./vectorAdd.cu --cuda-path=/usr/local/cuda-12.1可能会发生错误,如下提示:
CommandLine Error: Option 'static-func-full-module-prefix' registered more than once!
LLVM ERROR: inconsistency in registered CommandLine option
这时需要使用自制的LLVM,如下2.2节所示。
2.2 自制LLVM的方式
2.2.1 下载llvm源码
wget https://github.com/llvm/llvm-project/archive/refs/tags/llvmorg-17.0.6.tar.gz
解压,tar zxf llvmorg.....
2.2.2 配置编译LLVM
cd llvmorg.....
mkdir -p build ../dist/local
cd build
cmake -G "Unix Makefiles" ../llvm \
-DLLVM_ENABLE_PROJECTS="clang;clang-tools-extra;compiler-rt" \
-DLLVM_BUILD_EXAMPLES=ON -DLLVM_TARGETS_TO_BUILD="host" \
-DCMAKE_BUILD_TYPE=Release -DLLVM_ENABLE_ASSERTIONS=ON \
-DLLVM_ENABLE_RUNTIMES=all -DLLVM_BUILD_LLVM_DYLIB=ON \
-DCMAKE_INSTALL_PREFIX=../../dist/local
构建:
make -j12
12要小于本机支持的超线程个数,否则会比较卡;
安装:
make -j install
测试时,llvm 被install在如下文件夹:
/home/hipper/ex_dock_hipify/dist/local
加入PATH和LD_LIBRARY_PATH env variable中去,分别将 ....local/bin ....local/lib
ls /home/hipper/ex_dock_hipify/dist/local 如图:
2.2.3 配置编译HIPIFY
cd HIPIFY/
mkdir build_d
cd build_d
cmake \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_INSTALL_PREFIX=../../dist \
..
构建且安装:
make -j install
不需要的配置:
# -DCMAKE_PREFIX_PATH=/home/hipper/ex_dock_hipify/dist/local \
# -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-12.1
# 指定 LLVM 安装目录的配置方法:
#-DCMAKE_PREFIX_PATH=/home/hipper/ex_dock_hipify/dist/local
3. 示例
3.1翻译 .cu 文件到 .hip 文件
命令:
/home/hipper/ex_dock_hipify/HIPIFY/dist/bin/hipify-clang ./vectorAdd.cu --cuda-path=/usr/local/cuda-12.1
会在 ./ 目录中生成 vectoreAdd.cu.hip 的文件。
其中,hipify-clang 并不检查输入文件的扩展名,比如这里的.cu,它只检查文件内部的内容,将cuda生态的关键字有机地翻译成 hip生态的关键字,输出文件会在原文件名的基础上加上 .hip 后缀;
源代码分别如下。
使用 cuda samples中的vectoradd.cu为例,源码如下:
vectorAdd.cu
#include <stdio.h>
#include <cuda_runtime.h>
__global__ void vectorAdd(const float *A, const float *B, float *C,
int numElements) {
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < numElements) {
C[i] = A[i] + B[i] + 0.0f;
}
}
int main(void) {
cudaError_t err = cudaSuccess;
int numElements = 50000;
size_t size = numElements * sizeof(float);
printf("[Vector addition of %d elements]\n", numElements);
float *h_A = (float *)malloc(size);
float *h_B = (float *)malloc(size);
float *h_C = (float *)malloc(size);
if (h_A == NULL || h_B == NULL || h_C == NULL) {
fprintf(stderr, "Failed to allocate host vectors!\n");
exit(EXIT_FAILURE);
}
for (int i = 0; i < numElements; ++i) {
h_A[i] = rand() / (float)RAND_MAX;
h_B[i] = rand() / (float)RAND_MAX;
}
float *d_A = NULL;
err = cudaMalloc((void **)&d_A, size);
if (err != cudaSuccess) {
fprintf(stderr, "Failed to allocate device vector A (error code %s)!\n",
cudaGetErrorString(err));
exit(EXIT_FAILURE);
}
float *d_B = NULL;
err = cudaMalloc((void **)&d_B, size);
if (err != cudaSuccess) {
fprintf(stderr, "Failed to allocate device vector B (error code %s)!\n",
cudaGetErrorString(err));
exit(EXIT_FAILURE);
}
float *d_C = NULL;
err = cudaMalloc((void **)&d_C, size);
if (err != cudaSuccess) {
fprintf(stderr, "Failed to allocate device vector C (error code %s)!\n",
cudaGetErrorString(err));
exit(EXIT_FAILURE);
}
printf("Copy input data from the host memory to the CUDA device\n");
err = cudaMemcpy(d_A, h_A, size, cudaMemcpyHostToDevice);
if (err != cudaSuccess) {
fprintf(stderr,
"Failed to copy vector A from host to device (error code %s)!\n",
cudaGetErrorString(err));
exit(EXIT_FAILURE);
}
err = cudaMemcpy(d_B, h_B, size, cudaMemcpyHostToDevice);
if (err != cudaSuccess) {
fprintf(stderr,
"Failed to copy vector B from host to device (error code %s)!\n",
cudaGetErrorString(err));
exit(EXIT_FAILURE);
}
int threadsPerBlock = 256;
int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock;
printf("CUDA kernel launch with %d blocks of %d threads\n", blocksPerGrid,
threadsPerBlock);
vectorAdd<<<blocksPerGrid, threadsPerBlock>>>(d_A, d_B, d_C, numElements);
err = cudaGetLastError();
if (err != cudaSuccess) {
fprintf(stderr, "Failed to launch vectorAdd kernel (error code %s)!\n",
cudaGetErrorString(err));
exit(EXIT_FAILURE);
}
printf("Copy output data from the CUDA device to the host memory\n");
err = cudaMemcpy(h_C, d_C, size, cudaMemcpyDeviceToHost);
if (err != cudaSuccess) {
fprintf(stderr,
"Failed to copy vector C from device to host (error code %s)!\n",
cudaGetErrorString(err));
exit(EXIT_FAILURE);
}
for (int i = 0; i < numElements; ++i) {
if (fabs(h_A[i] + h_B[i] - h_C[i]) > 1e-5) {
fprintf(stderr, "Result verification failed at element %d!\n", i);
exit(EXIT_FAILURE);
}
}
printf("Test PASSED\n");
err = cudaFree(d_A);
if (err != cudaSuccess) {
fprintf(stderr, "Failed to free device vector A (error code %s)!\n",
cudaGetErrorString(err));
exit(EXIT_FAILURE);
}
err = cudaFree(d_B);
if (err != cudaSuccess) {
fprintf(stderr, "Failed to free device vector B (error code %s)!\n",
cudaGetErrorString(err));
exit(EXIT_FAILURE);
}
err = cudaFree(d_C);
if (err != cudaSuccess) {
fprintf(stderr, "Failed to free device vector C (error code %s)!\n",
cudaGetErrorString(err));
exit(EXIT_FAILURE);
}
free(h_A);
free(h_B);
free(h_C);
printf("Done\n");
return 0;
}
生成的 vectorAdd.cu.hip :
#include <stdio.h>
#include <hip/hip_runtime.h>
__global__ void vectorAdd(const float *A, const float *B, float *C,
int numElements) {
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < numElements) {
C[i] = A[i] + B[i] + 0.0f;
}
}
int main(void) {
hipError_t err = hipSuccess;
int numElements = 50000;
size_t size = numElements * sizeof(float);
printf("[Vector addition of %d elements]\n", numElements);
float *h_A = (float *)malloc(size);
float *h_B = (float *)malloc(size);
float *h_C = (float *)malloc(size);
if (h_A == NULL || h_B == NULL || h_C == NULL) {
fprintf(stderr, "Failed to allocate host vectors!\n");
exit(EXIT_FAILURE);
}
for (int i = 0; i < numElements; ++i) {
h_A[i] = rand() / (float)RAND_MAX;
h_B[i] = rand() / (float)RAND_MAX;
}
float *d_A = NULL;
err = hipMalloc((void **)&d_A, size);
if (err != hipSuccess) {
fprintf(stderr, "Failed to allocate device vector A (error code %s)!\n",
hipGetErrorString(err));
exit(EXIT_FAILURE);
}
float *d_B = NULL;
err = hipMalloc((void **)&d_B, size);
if (err != hipSuccess) {
fprintf(stderr, "Failed to allocate device vector B (error code %s)!\n",
hipGetErrorString(err));
exit(EXIT_FAILURE);
}
float *d_C = NULL;
err = hipMalloc((void **)&d_C, size);
if (err != hipSuccess) {
fprintf(stderr, "Failed to allocate device vector C (error code %s)!\n",
hipGetErrorString(err));
exit(EXIT_FAILURE);
}
printf("Copy input data from the host memory to the CUDA device\n");
err = hipMemcpy(d_A, h_A, size, hipMemcpyHostToDevice);
if (err != hipSuccess) {
fprintf(stderr,
"Failed to copy vector A from host to device (error code %s)!\n",
hipGetErrorString(err));
exit(EXIT_FAILURE);
}
err = hipMemcpy(d_B, h_B, size, hipMemcpyHostToDevice);
if (err != hipSuccess) {
fprintf(stderr,
"Failed to copy vector B from host to device (error code %s)!\n",
hipGetErrorString(err));
exit(EXIT_FAILURE);
}
int threadsPerBlock = 256;
int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock;
printf("CUDA kernel launch with %d blocks of %d threads\n", blocksPerGrid,
threadsPerBlock);
vectorAdd<<<blocksPerGrid, threadsPerBlock>>>(d_A, d_B, d_C, numElements);
err = hipGetLastError();
if (err != hipSuccess) {
fprintf(stderr, "Failed to launch vectorAdd kernel (error code %s)!\n",
hipGetErrorString(err));
exit(EXIT_FAILURE);
}
printf("Copy output data from the CUDA device to the host memory\n");
err = hipMemcpy(h_C, d_C, size, hipMemcpyDeviceToHost);
if (err != hipSuccess) {
fprintf(stderr,
"Failed to copy vector C from device to host (error code %s)!\n",
hipGetErrorString(err));
exit(EXIT_FAILURE);
}
for (int i = 0; i < numElements; ++i) {
if (fabs(h_A[i] + h_B[i] - h_C[i]) > 1e-5) {
fprintf(stderr, "Result verification failed at element %d!\n", i);
exit(EXIT_FAILURE);
}
}
printf("Test PASSED\n");
err = hipFree(d_A);
if (err != hipSuccess) {
fprintf(stderr, "Failed to free device vector A (error code %s)!\n",
hipGetErrorString(err));
exit(EXIT_FAILURE);
}
err = hipFree(d_B);
if (err != hipSuccess) {
fprintf(stderr, "Failed to free device vector B (error code %s)!\n",
hipGetErrorString(err));
exit(EXIT_FAILURE);
}
err = hipFree(d_C);
if (err != hipSuccess) {
fprintf(stderr, "Failed to free device vector C (error code %s)!\n",
hipGetErrorString(err));
exit(EXIT_FAILURE);
}
free(h_A);
free(h_B);
free(h_C);
printf("Done\n");
return 0;
}
3.2 编译运行 vectorAdd.cu.hip
编译:
$ /opt/rocm/bin/hipcc ./vectorAdd.cu.hip -o vectorAdd
运行效果如下图:
3.3 一批hipify-clang 使用参考
./hipify-clang square.cu --cuda-path=/usr/local/cuda-12.3 -I /usr/local/cuda-12.3/samples/common/inc
./hipify-clang cpp17.cu --cuda-path=/usr/local/cuda-12.3 -- -std=c++17
./hipify-clang cpp17.cu ../../square.cu /home/user/cuda/intro.cu --cuda-path=/usr/local/cuda-12.3 -- -std=c++17
./hipify-clang square.cu --cuda-path=/usr/local/cuda-12.3 --clang-resource-directory=/usr/llvm/18.1.1/dist/lib/clang/18
hipify-clang intro.cu -cuda-path="C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.3.2" --print-stats
hipify-clang intro.cu -cuda-path="C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.3.2" --print-stats-csv
hipify-clang vectorAdd.cu --cuda-path=/usr/local/cuda-12.3 -o vectorAdd.hip --
当没有指定build-path,从而没有指定 compile-comand.json时,命令行选项中需要出现 -- ;
如果命令行选项中同时并没有出现 --, 那么hipify-clang 会判断后帮助自动添加 --
4, 带hipify-clang test 的构建
cmake
-DHIPIFY_CLANG_TESTS=ON \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_INSTALL_PREFIX=../dist \
-DCMAKE_PREFIX_PATH=/usr/llvm/18.1.1/dist \
-DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-12.3.2 \
-DCUDA_DNN_ROOT_DIR=/usr/local/cudnn-8.9.7 \
-DCUDA_CUB_ROOT_DIR=/usr/local/cub-2.1.0 \
-DLLVM_EXTERNAL_LIT=/usr/llvm/18.1.1/build/bin/llvm-lit \
../hipify
5, hipify-per 示例
$ ../local_d/bin/hipify-perl vectorAdd.cu > vectorAdd.hip
6,对main函数的一点注释
HIPIFY/src/main.cpp at amd-staging · ROCm/HIPIFY · GitHub
/*
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <cstdio>
#include <fstream>
#include <set>
#include <cmath>
#include <chrono>
#include <iomanip>
#include <sstream>
#include "CUDA2HIP.h"
#include "CUDA2HIP_Scripting.h"
#include "LLVMCompat.h"
#include "HipifyAction.h"
#include "ArgParse.h"
#include "StringUtils.h"
#include "llvm/Support/Debug.h"
#include "clang/Basic/Diagnostic.h"
#include "clang/Basic/DiagnosticIDs.h"
#include "clang/Basic/DiagnosticOptions.h"
#include "clang/Driver/Driver.h"
#include "clang/Driver/Compilation.h"
#include "clang/Driver/Tool.h"
#include "clang/Frontend/TextDiagnosticPrinter.h"
#if LLVM_VERSION_MAJOR < 8
#include "llvm/Support/Path.h"
#endif
constexpr auto DEBUG_TYPE = "cuda2hip";
namespace ct = clang::tooling;
void cleanupHipifyOptions(std::vector<const char*> &args) {
for (const auto &a : hipifyOptions) {
args.erase(std::remove(args.begin(), args.end(), "--" + a), args.end());
args.erase(std::remove(args.begin(), args.end(), "-" + a), args.end());
}
for (const auto &a : hipifyOptionsWithTwoArgs) {
// remove all "-option=value" and "--option=value"
args.erase(
std::remove_if(args.begin(), args.end(),
[a](const std::string &s) { return s.find("--" + a + "=") == 0 || s.find("-" + a + "=") == 0; }
),
args.end()
);
// remove all pairs of arguments "--option value" and "-option value"
auto it = args.erase(
std::remove_if(args.begin(), args.end(),
[a](const std::string &s) { return s.find("--" + a) == 0 || s.find("-" + a) == 0; }
),
args.end()
);
if (it != args.end()) {
args.erase(it);
}
}
}
void sortInputFiles(int argc, const char **argv, std::vector<std::string> &files) {
if (files.size() < 2) return;
IntrusiveRefCntPtr<clang::DiagnosticOptions> diagOpts(new clang::DiagnosticOptions());
clang::TextDiagnosticPrinter diagClient(llvm::errs(), &*diagOpts);
clang::DiagnosticsEngine Diagnostics(IntrusiveRefCntPtr<clang::DiagnosticIDs>(new clang::DiagnosticIDs()), &*diagOpts, &diagClient, false);
std::unique_ptr<clang::driver::Driver> driver(new clang::driver::Driver("", "nvptx64-nvidia-cuda", Diagnostics));
std::vector<const char*> Args(argv, argv + argc);
cleanupHipifyOptions(Args);
std::unique_ptr<clang::driver::Compilation> C(driver->BuildCompilation(Args));
std::vector<std::string> sortedFiles;
for (const auto &J : C->getJobs()) {
if (std::string(J.getCreator().getName()) != "clang") continue;
const auto &JA = J.getArguments();
for (size_t i = 0; i < JA.size(); ++i) {
const auto &A = std::string(JA[i]);
if (std::find(files.begin(), files.end(), A) != files.end() &&
i > 0 && std::string(JA[i - 1]) == "-main-file-name") {
sortedFiles.push_back(A);
}
}
}
if (sortedFiles.empty()) return;
std::reverse(sortedFiles.begin(), sortedFiles.end());
files.assign(sortedFiles.begin(), sortedFiles.end());
}
void appendArgumentsAdjusters(ct::RefactoringTool &Tool, const std::string &sSourceAbsPath, const char *hipify_exe) {
if (!IncludeDirs.empty()) {// 这个可能是命令行工具
for (std::string s : IncludeDirs) {
Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster(s.c_str(), ct::ArgumentInsertPosition::BEGIN));
Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster("-I", ct::ArgumentInsertPosition::BEGIN));
}
}
if (!MacroNames.empty()) {
for (std::string s : MacroNames) {
Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster(s.c_str(), ct::ArgumentInsertPosition::BEGIN));
Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster("-D", ct::ArgumentInsertPosition::BEGIN));
}
}
static int Dummy;
std::string hipify = llvm::sys::fs::getMainExecutable(hipify_exe, (void *)&Dummy);// "/home/hipper/llvm_3_4_0_ex/browse_llvm_17/ex/hipify_ex/local_d/bin/hipify-clang"
std::string hipify_parent_path = std::string(llvm::sys::path::parent_path(hipify));// "/home/hipper/llvm_3_4_0_ex/browse_llvm_17/ex/hipify_ex/local_d/bin"
// Includes for clang's CUDA wrappers for using by old packaged hipify-clang
std::string clang_inc_path_old = hipify_parent_path + "/include";
Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster(clang_inc_path_old.c_str(), ct::ArgumentInsertPosition::BEGIN));
Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster("-Xclang", ct::ArgumentInsertPosition::BEGIN));
Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster("-internal-isystem", ct::ArgumentInsertPosition::BEGIN));
Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster("-Xclang", ct::ArgumentInsertPosition::BEGIN));
clang_inc_path_old.append("/cuda_wrappers");
Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster(clang_inc_path_old.c_str(), ct::ArgumentInsertPosition::BEGIN));
Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster("-Xclang", ct::ArgumentInsertPosition::BEGIN));
Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster("-internal-isystem", ct::ArgumentInsertPosition::BEGIN));
Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster("-Xclang", ct::ArgumentInsertPosition::BEGIN));
// Includes for clang's CUDA wrappers for using by new packaged hipify-clang
std::string clang_inc_path_new = hipify_parent_path + "/../include/hipify";
Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster(clang_inc_path_new.c_str(), ct::ArgumentInsertPosition::BEGIN));
Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster("-Xclang", ct::ArgumentInsertPosition::BEGIN));
Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster("-internal-isystem", ct::ArgumentInsertPosition::BEGIN));
Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster("-Xclang", ct::ArgumentInsertPosition::BEGIN));
clang_inc_path_new.append("/cuda_wrappers");
Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster(clang_inc_path_new.c_str(), ct::ArgumentInsertPosition::BEGIN));
Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster("-Xclang", ct::ArgumentInsertPosition::BEGIN));
Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster("-internal-isystem", ct::ArgumentInsertPosition::BEGIN));
Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster("-Xclang", ct::ArgumentInsertPosition::BEGIN));
// Standard c++14 by default
Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster("-std=c++14", ct::ArgumentInsertPosition::BEGIN));
std::string sInclude = "-I" + sys::path::parent_path(sSourceAbsPath).str();
#if defined(HIPIFY_CLANG_RES)
Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster("-resource-dir=" HIPIFY_CLANG_RES, ct::ArgumentInsertPosition::BEGIN));
#endif
Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster(sInclude.c_str(), ct::ArgumentInsertPosition::BEGIN));
Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster("-fno-delayed-template-parsing", ct::ArgumentInsertPosition::BEGIN));
if (llcompat::pragma_once_outside_header()) {
Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster("-Wno-pragma-once-outside-header", ct::ArgumentInsertPosition::BEGIN));
}
Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster("--cuda-host-only", ct::ArgumentInsertPosition::BEGIN));
if (!CudaGpuArch.empty()) {
std::string sCudaGpuArch = "--cuda-gpu-arch=" + CudaGpuArch;
Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster(sCudaGpuArch.c_str(), ct::ArgumentInsertPosition::BEGIN));
}
if (!CudaPath.empty()) {
std::string sCudaPath = "--cuda-path=" + CudaPath;
Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster(sCudaPath.c_str(), ct::ArgumentInsertPosition::BEGIN));
}
llcompat::addTargetIfNeeded(Tool);
Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster("cuda", ct::ArgumentInsertPosition::BEGIN));
Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster("-x", ct::ArgumentInsertPosition::BEGIN));
if (Verbose) {
Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster("-v", ct::ArgumentInsertPosition::END));
}
Tool.appendArgumentsAdjuster(ct::getClangSyntaxOnlyAdjuster());
}
bool generatePython() {
bool bToRoc = TranslateToRoc;
TranslateToRoc = true;
bool bToPython = python::generate(GeneratePython);
TranslateToRoc = bToRoc;
return bToPython;
}
void printVersions() {
llvm::errs() << "\n" << sHipify << "Supports ROCm HIP from " << Statistics::getHipVersion(hipVersions::HIP_5000) << " up to " << Statistics::getHipVersion(hipVersions::HIP_LATEST);
llvm::errs() << "\n" << sHipify << "Supports CUDA Toolkit from " << Statistics::getCudaVersion(cudaVersions::CUDA_70) << " up to " << Statistics::getCudaVersion(cudaVersions::CUDA_LATEST);
llvm::errs() << "\n" << sHipify << "Supports cuDNN from " << Statistics::getCudaVersion(cudaVersions::CUDNN_705) << " up to " << Statistics::getCudaVersion(cudaVersions::CUDNN_LATEST) << " \n";
}
int main(int argc, const char **argv) {
std::vector<const char*> new_argv(argv, argv + argc);
std::string sCompilationDatabaseDir;
auto it = std::find(new_argv.begin(), new_argv.end(), std::string("-p"));
bool bCompilationDatabase = it == new_argv.end() ? false : true;
bool bNoCompilationDatabaseDir = false;
if (bCompilationDatabase) {
if (it+1 != new_argv.end()) sCompilationDatabaseDir = *(it+1);
else bNoCompilationDatabaseDir = true;
} else {
for (auto &s : new_argv) {//检查命令行参数是否指定了 build_path
std::string str = std::string(s);
if (str.find("-p=") != std::string::npos) {
bCompilationDatabase = true;
sCompilationDatabaseDir = str.substr(3, str.size()-3);
if (sCompilationDatabaseDir.empty()) {
bNoCompilationDatabaseDir = true;
}
break;
}
}
}
if (bCompilationDatabase && bNoCompilationDatabaseDir) {//指定了-p,却没指定路径,参数使用错误,报错。
llvm::errs() << "\n" << sHipify << sError << "Must specify compilation database directory" << "\n";
return 1;
}
if (!bCompilationDatabase && std::find(new_argv.begin(), new_argv.end(), std::string("--")) == new_argv.end()) {//如果没有指定build_path,且命令行中找不到 "--",则给命令行参数数组补上 "--"
new_argv.push_back("--");
new_argv.push_back(nullptr);
argv = new_argv.data();
argc++;
}
llcompat::PrintStackTraceOnErrorSignal();
#if LLVM_VERSION_MAJOR > 12
auto cop = ct::CommonOptionsParser::create(argc, argv, ToolTemplateCategory, llvm::cl::ZeroOrMore);
if (!cop) {
llvm::errs() << "\n" << sHipify << sError << cop.takeError() << "\n";
return 1;
}
ct::CommonOptionsParser &OptionsParser = cop.get();
#else
ct::CommonOptionsParser OptionsParser(argc, argv, ToolTemplateCategory, llvm::cl::ZeroOrMore);
#endif
if (!llcompat::CheckCompatibility()) {//llvm-10 新功能警告信息
return 1;
}
std::unique_ptr<ct::CompilationDatabase> compilationDatabase;
std::vector<std::string> fileSources;//一次可以翻译多个.cu 文件称为.hip文件,文件名存储于 fileSources
if (bCompilationDatabase) {
std::string serr;
compilationDatabase = ct::CompilationDatabase::loadFromDirectory(sCompilationDatabaseDir, serr);
if (nullptr == compilationDatabase.get()) {
llvm::errs() << "\n" << sHipify << sError << "loading Compilation Database from \"" << sCompilationDatabaseDir << "compile_commands.json\" failed\n";
return 1;
}
fileSources = compilationDatabase->getAllFiles();
} else {
fileSources = OptionsParser.getSourcePathList();//OptionParser将 .cu 文件名择了出来
}
if (fileSources.empty() && !GeneratePerl && !GeneratePython && !GenerateMarkdown && !GenerateCSV && !Versions) {// .cu 文件列表不能为空
llvm::errs() << "\n" << sHipify << sError << "Must specify at least 1 positional argument for source file" << "\n";
return 1;
}
if (Versions) printVersions();//如果命令行选项指示需要打印版本信息
if (!GenerateMarkdown && !GenerateCSV && !DocFormat.empty()) {
llvm::errs() << "\n" << sHipify << sError << "Must specify a document type to generate: \"md\" and | or \"csv\"" << "\n";
return 1;
}
if (!perl::generate(GeneratePerl)) {//若生成 per 翻译脚本出错 --perl
llvm::errs() << "\n" << sHipify << sError << "hipify-perl generating failed" << "\n";
return 1;
}
if (!generatePython()) {//若生成 python 翻译脚本出错 --python
llvm::errs() << "\n" << sHipify << sError << "hipify-python generating failed" << "\n";
return 1;
}
if (!doc::generate(GenerateMarkdown, GenerateCSV)) {//生成 Markdown 和 csv 出错
llvm::errs() << "\n" << sHipify << sError << "Documentation generating failed" << "\n";
return 1;
}
if (fileSources.empty()) {//需要转换的 .cu 源文件名列表为空
return 0;
}
std::string dst = OutputFilename, dstDir = OutputDir;//定义空string 变量
std::error_code EC;
std::string sOutputDirAbsPath = getAbsoluteDirectoryPath(OutputDir, EC, "output");
if (EC) {
return 1;
}
if (!dst.empty()) {
if (fileSources.size() > 1) {
llvm::errs() << sHipify << sConflict << "-o and multiple source files are specified\n";
return 1;
}
if (Inplace) {
llvm::errs() << sHipify << sConflict << "both -o and -inplace options are specified\n";
return 1;
}
if (NoOutput) {
llvm::errs() << sHipify << sConflict << "both -no-output and -o options are specified\n";
return 1;
}
if (!dstDir.empty()) {
dst = sOutputDirAbsPath + "/" + dst;
}
}
if (NoOutput && Inplace) {//既不要输出,又要替换,矛盾;
llvm::errs() << sHipify << sConflict << "both -no-output and -inplace options are specified\n";
return 1;
}
if (!dstDir.empty() && Inplace) {//既要指定输出路径,又要本地替换,矛盾;
llvm::errs() << sHipify << sConflict << "both -o-dir and -inplace options are specified\n";
return 1;
}
if (Examine) {
NoOutput = PrintStats = true;
}
int Result = 0;
SmallString<128> tmpFile;
StringRef sourceFileName, ext = "hip", csv_ext = "csv";
std::string sTmpFileName, sSourceAbsPath;
std::string sTmpDirAbsParh = getAbsoluteDirectoryPath(TemporaryDir, EC);
if (EC) {
return 1;
}
// Arguments for the Statistics print routines.
std::unique_ptr<std::ostream> csv = nullptr;
llvm::raw_ostream *statPrint = nullptr;
bool create_csv = false;
if (!OutputStatsFilename.empty()) {
PrintStatsCSV = true;
create_csv = true;
} else {
if (PrintStatsCSV && fileSources.size() > 1) {
OutputStatsFilename = "sum_stat.csv";
create_csv = true;
}
}
if (create_csv) {
if (!OutputDir.empty()) {
OutputStatsFilename = sOutputDirAbsPath + "/" + OutputStatsFilename;
}
csv = std::unique_ptr<std::ostream>(new std::ofstream(OutputStatsFilename, std::ios_base::trunc));
}
if (PrintStats) {
statPrint = &llvm::errs();
}
sortInputFiles(argc, argv, fileSources);
for (const auto &src : fileSources) {
// Create a copy of the file to work on. When we're done, we'll move this onto the
// output (which may mean overwriting the input, if we're in-place).
// Should we fail for some reason, we'll just leak this file and not corrupt the input.
sSourceAbsPath = getAbsoluteFilePath(src, EC);//获取源文件的绝对路径
if (EC) {
continue;
}
sourceFileName = sys::path::filename(sSourceAbsPath);//
if (dst.empty()) {
if (Inplace) {
dst = src;
} else {
dst = src + "." + ext.str();//"vectorAdd.cu" + "." + "hip"
if (!dstDir.empty()) {
dst = sOutputDirAbsPath + "/" + sourceFileName.str() + "." + ext.str();
}
}
}
if (TemporaryDir.empty()) {
EC = sys::fs::createTemporaryFile(sourceFileName, ext, tmpFile);//创建临时文件 /tmp/vectorAdd.cu-1a01d7.hip
if (EC) {
llvm::errs() << "\n" << sHipify << sError << EC.message() << ": " << tmpFile << "\n";
Result = 1;
continue;
}
} else {
sTmpFileName = sTmpDirAbsParh + "/" + sourceFileName.str() + "." + ext.str();
tmpFile = sTmpFileName;
}
EC = sys::fs::copy_file(src, tmpFile);//将 vectorAdd.cu中的内容,拷贝到 /tmp/vectorAdd.cu-1a01d7.hip 中去。
if (EC) {
llvm::errs() << "\n" << sHipify << sError << EC.message() << ": while copying " << src << " to " << tmpFile << "\n";
Result = 1;
continue;
}
if (PrintStatsCSV) {//是否要输出统计信息到csv文件,给csv文件取名字
if (OutputStatsFilename.empty()) {
OutputStatsFilename = sourceFileName.str() + "." + csv_ext.str();
if (!OutputDir.empty()) {
OutputStatsFilename = sOutputDirAbsPath + "/" + OutputStatsFilename;
}
}
if (!csv) {
csv = std::unique_ptr<std::ostream>(new std::ofstream(OutputStatsFilename, std::ios_base::trunc));
}
}
// Initialise the statistics counters for this file.
Statistics::setActive(src);
// RefactoringTool operates on the file in-place. Giving it the output path is no good,
// because that'll break relative includes, and we don't want to overwrite the input file.
// So what we do is operate on a copy, which we then move to the output.
// Tool是重构代码动作的管理和记录者,最后实施重构,通过 runAndSave 成员方法。
// Tool 携带了编译选项信息、被重构的源文件名 /tmp/vectorAdd.cu-1a01d7.hip
ct::RefactoringTool Tool((bCompilationDatabase ? *compilationDatabase.get() : OptionsParser.getCompilations()), std::string(tmpFile.c_str()));
ct::Replacements &replacementsToUse = llcompat::getReplacements(Tool, tmpFile.c_str());//定义 Replacements 变量,指定被重构的源文件
ReplacementsFrontendActionFactory<HipifyAction> actionFactory(&replacementsToUse);// 用 actionFactory 持有了 replacementsToUse 这个 Replacements 类的对象
appendArgumentsAdjusters(Tool, sSourceAbsPath, argv[0]);
Statistics ¤tStat = Statistics::current();
// Hipify _all_ the things!
if (Tool.runAndSave(&actionFactory)) {//实施重构replacements
currentStat.hasErrors = true;
Result = 1;
LLVM_DEBUG(llvm::dbgs() << "Skipped some replacements.\n");
}
// Copy the tmpfile to the output
if (!NoOutput && !currentStat.hasErrors) {
EC = sys::fs::copy_file(tmpFile, dst);//将临时文件复制成为输出的目标文件 cp /tmp/vectorAdd.cu-1a01d7.hip ./vectorAdd.cu.hip
if (EC) {
llvm::errs() << "\n" << sHipify << sError << EC.message() << ": while copying " << tmpFile << " to " << dst << "\n";
Result = 1;
continue;
}
}
// Remove the tmp file without error check
if (!SaveTemps) {
sys::fs::remove(tmpFile);//删除临时文件 rm /tmp/vectorAdd.cu-1a01d7.hip
}
Statistics::current().markCompletion();
Statistics::current().print(csv.get(), statPrint);
dst.clear();
}
if (fileSources.size() > 1) {
Statistics::printAggregate(csv.get(), statPrint);
}
return Result;
}