源码安装 HIPIFY 和应用示例,将cuda生态源码转化成HIP生态源码

Eloudy

已于 2024-03-27 14:49:17 修改

阅读量1.2k

点赞数 9

文章标签： cuda ROCm 算法

于 2024-02-29 10:49:25 首次发布

本文链接：https://blog.csdn.net/eloudy/article/details/136364439

版权

1，源码下载

GitHub - ROCm/HIPIFY: HIPIFY: Convert CUDA to Portable C++ CodeHIPIFY: Convert CUDA to Portable C++ Code. Contribute to ROCm/HIPIFY development by creating an account on GitHub.https://github.com/ROCm/HIPIFY.git

git clone --recursive https://github.com/ROCm/HIPIFY.git

sudo apt install clang-dev

2，编译并安装

2.1 通常方式

hipify-clang 文档：

https://github.com/ROCm/HIPIFY/blob/amd-staging/docs/hipify-clang.md

编译命令：

cmake -DCMAKE_INSTALL_PREFIX=../dist -DCMAKE_BUILD_TYPE=Release  ..

make -j install

此时 hipify-clang 会被安装到 HIPIFY/dist/bin 中，

测试：

cd  ../dist/bin
hipify --help

如果系统中存在多个llvm版本，在执行翻译命令时，比如hipify-clang ./vectorAdd.cu --cuda-path=/usr/local/cuda-12.1可能会发生错误，如下提示：

CommandLine Error: Option 'static-func-full-module-prefix' registered more than once!
LLVM ERROR: inconsistency in registered CommandLine option

这时需要使用自制的LLVM，如下2.2节所示。

2.2 自制LLVM的方式

2.2.1 下载llvm源码

wget https://github.com/llvm/llvm-project/archive/refs/tags/llvmorg-17.0.6.tar.gz

解压，tar zxf llvmorg.....

2.2.2 配置编译LLVM

cd llvmorg.....

mkdir -p build ../dist/local

cd build

cmake -G "Unix Makefiles" ../llvm      \
-DLLVM_ENABLE_PROJECTS="clang;clang-tools-extra;compiler-rt"           \
-DLLVM_BUILD_EXAMPLES=ON           -DLLVM_TARGETS_TO_BUILD="host"      \
-DCMAKE_BUILD_TYPE=Release           -DLLVM_ENABLE_ASSERTIONS=ON       \
-DLLVM_ENABLE_RUNTIMES=all             -DLLVM_BUILD_LLVM_DYLIB=ON      \
-DCMAKE_INSTALL_PREFIX=../../dist/local

构建：

make -j12

12要小于本机支持的超线程个数，否则会比较卡；

安装：

make -j install

测试时，llvm 被install在如下文件夹：

/home/hipper/ex_dock_hipify/dist/local

加入PATH和LD_LIBRARY_PATH env variable中去,分别将 ....local/bin ....local/lib

ls /home/hipper/ex_dock_hipify/dist/local 如图：

2.2.3 配置编译HIPIFY

cd HIPIFY/

mkdir build_d

cd build_d

cmake \
 -DCMAKE_BUILD_TYPE=Release \
 -DCMAKE_INSTALL_PREFIX=../../dist \
 ..

构建且安装：

make -j install

不需要的配置：

# -DCMAKE_PREFIX_PATH=/home/hipper/ex_dock_hipify/dist/local \
# -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-12.1

# 指定 LLVM 安装目录的配置方法：

#-DCMAKE_PREFIX_PATH=/home/hipper/ex_dock_hipify/dist/local

3. 示例

3.1翻译 .cu 文件到 .hip 文件

命令：

/home/hipper/ex_dock_hipify/HIPIFY/dist/bin/hipify-clang ./vectorAdd.cu  --cuda-path=/usr/local/cuda-12.1

会在 ./ 目录中生成 vectoreAdd.cu.hip 的文件。

其中，hipify-clang 并不检查输入文件的扩展名，比如这里的.cu，它只检查文件内部的内容，将cuda生态的关键字有机地翻译成 hip生态的关键字，输出文件会在原文件名的基础上加上 .hip 后缀；

源代码分别如下。

使用 cuda samples中的vectoradd.cu为例，源码如下：

vectorAdd.cu

#include <stdio.h>
#include <cuda_runtime.h>

__global__ void vectorAdd(const float *A, const float *B, float *C,
                          int numElements) {
  int i = blockDim.x * blockIdx.x + threadIdx.x;

  if (i < numElements) {
    C[i] = A[i] + B[i] + 0.0f;
  }
}

int main(void) {
  cudaError_t err = cudaSuccess;
  int numElements = 50000;
  size_t size = numElements * sizeof(float);
  printf("[Vector addition of %d elements]\n", numElements);

  float *h_A = (float *)malloc(size);
  float *h_B = (float *)malloc(size);
  float *h_C = (float *)malloc(size);
  if (h_A == NULL || h_B == NULL || h_C == NULL) {
    fprintf(stderr, "Failed to allocate host vectors!\n");
    exit(EXIT_FAILURE);
  }

  for (int i = 0; i < numElements; ++i) {
    h_A[i] = rand() / (float)RAND_MAX;
    h_B[i] = rand() / (float)RAND_MAX;
  }

  float *d_A = NULL;
  err = cudaMalloc((void **)&d_A, size);

  if (err != cudaSuccess) {
    fprintf(stderr, "Failed to allocate device vector A (error code %s)!\n",
            cudaGetErrorString(err));
    exit(EXIT_FAILURE);
  }

  float *d_B = NULL;
  err = cudaMalloc((void **)&d_B, size);

  if (err != cudaSuccess) {
    fprintf(stderr, "Failed to allocate device vector B (error code %s)!\n",
            cudaGetErrorString(err));
    exit(EXIT_FAILURE);
  }

  float *d_C = NULL;
  err = cudaMalloc((void **)&d_C, size);

  if (err != cudaSuccess) {
    fprintf(stderr, "Failed to allocate device vector C (error code %s)!\n",
            cudaGetErrorString(err));
    exit(EXIT_FAILURE);
  }

  printf("Copy input data from the host memory to the CUDA device\n");
  err = cudaMemcpy(d_A, h_A, size, cudaMemcpyHostToDevice);

  if (err != cudaSuccess) {
    fprintf(stderr,
            "Failed to copy vector A from host to device (error code %s)!\n",
            cudaGetErrorString(err));
    exit(EXIT_FAILURE);
  }

  err = cudaMemcpy(d_B, h_B, size, cudaMemcpyHostToDevice);

  if (err != cudaSuccess) {
    fprintf(stderr,
            "Failed to copy vector B from host to device (error code %s)!\n",
            cudaGetErrorString(err));
    exit(EXIT_FAILURE);
  }

  int threadsPerBlock = 256;
  int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock;
  printf("CUDA kernel launch with %d blocks of %d threads\n", blocksPerGrid,
         threadsPerBlock);
  vectorAdd<<<blocksPerGrid, threadsPerBlock>>>(d_A, d_B, d_C, numElements);
  err = cudaGetLastError();

  if (err != cudaSuccess) {
    fprintf(stderr, "Failed to launch vectorAdd kernel (error code %s)!\n",
            cudaGetErrorString(err));
    exit(EXIT_FAILURE);
  }

  printf("Copy output data from the CUDA device to the host memory\n");
  err = cudaMemcpy(h_C, d_C, size, cudaMemcpyDeviceToHost);

  if (err != cudaSuccess) {
    fprintf(stderr,
            "Failed to copy vector C from device to host (error code %s)!\n",
            cudaGetErrorString(err));
    exit(EXIT_FAILURE);
  }

  for (int i = 0; i < numElements; ++i) {
    if (fabs(h_A[i] + h_B[i] - h_C[i]) > 1e-5) {
      fprintf(stderr, "Result verification failed at element %d!\n", i);
      exit(EXIT_FAILURE);
    }
  }

  printf("Test PASSED\n");

  err = cudaFree(d_A);

  if (err != cudaSuccess) {
    fprintf(stderr, "Failed to free device vector A (error code %s)!\n",
            cudaGetErrorString(err));
    exit(EXIT_FAILURE);
  }

  err = cudaFree(d_B);

  if (err != cudaSuccess) {
    fprintf(stderr, "Failed to free device vector B (error code %s)!\n",
            cudaGetErrorString(err));
    exit(EXIT_FAILURE);
  }

  err = cudaFree(d_C);

  if (err != cudaSuccess) {
    fprintf(stderr, "Failed to free device vector C (error code %s)!\n",
            cudaGetErrorString(err));
    exit(EXIT_FAILURE);
  }

  free(h_A);
  free(h_B);
  free(h_C);

  printf("Done\n");
  return 0;
}

生成的 vectorAdd.cu.hip :

#include <stdio.h>
#include <hip/hip_runtime.h>

__global__ void vectorAdd(const float *A, const float *B, float *C,
                          int numElements) {
  int i = blockDim.x * blockIdx.x + threadIdx.x;

  if (i < numElements) {
    C[i] = A[i] + B[i] + 0.0f;
  }
}

int main(void) {
  hipError_t err = hipSuccess;
  int numElements = 50000;
  size_t size = numElements * sizeof(float);
  printf("[Vector addition of %d elements]\n", numElements);

  float *h_A = (float *)malloc(size);
  float *h_B = (float *)malloc(size);
  float *h_C = (float *)malloc(size);
  if (h_A == NULL || h_B == NULL || h_C == NULL) {
    fprintf(stderr, "Failed to allocate host vectors!\n");
    exit(EXIT_FAILURE);
  }

  for (int i = 0; i < numElements; ++i) {
    h_A[i] = rand() / (float)RAND_MAX;
    h_B[i] = rand() / (float)RAND_MAX;
  }

  float *d_A = NULL;
  err = hipMalloc((void **)&d_A, size);

  if (err != hipSuccess) {
    fprintf(stderr, "Failed to allocate device vector A (error code %s)!\n",
            hipGetErrorString(err));
    exit(EXIT_FAILURE);
  }

  float *d_B = NULL;
  err = hipMalloc((void **)&d_B, size);

  if (err != hipSuccess) {
    fprintf(stderr, "Failed to allocate device vector B (error code %s)!\n",
            hipGetErrorString(err));
    exit(EXIT_FAILURE);
  }

  float *d_C = NULL;
  err = hipMalloc((void **)&d_C, size);

  if (err != hipSuccess) {
    fprintf(stderr, "Failed to allocate device vector C (error code %s)!\n",
            hipGetErrorString(err));
    exit(EXIT_FAILURE);
  }

  printf("Copy input data from the host memory to the CUDA device\n");
  err = hipMemcpy(d_A, h_A, size, hipMemcpyHostToDevice);

  if (err != hipSuccess) {
    fprintf(stderr,
            "Failed to copy vector A from host to device (error code %s)!\n",
            hipGetErrorString(err));
    exit(EXIT_FAILURE);
  }

  err = hipMemcpy(d_B, h_B, size, hipMemcpyHostToDevice);

  if (err != hipSuccess) {
    fprintf(stderr,
            "Failed to copy vector B from host to device (error code %s)!\n",
            hipGetErrorString(err));
    exit(EXIT_FAILURE);
  }

  int threadsPerBlock = 256;
  int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock;
  printf("CUDA kernel launch with %d blocks of %d threads\n", blocksPerGrid,
         threadsPerBlock);
  vectorAdd<<<blocksPerGrid, threadsPerBlock>>>(d_A, d_B, d_C, numElements);
  err = hipGetLastError();

  if (err != hipSuccess) {
    fprintf(stderr, "Failed to launch vectorAdd kernel (error code %s)!\n",
            hipGetErrorString(err));
    exit(EXIT_FAILURE);
  }

  printf("Copy output data from the CUDA device to the host memory\n");
  err = hipMemcpy(h_C, d_C, size, hipMemcpyDeviceToHost);

  if (err != hipSuccess) {
    fprintf(stderr,
            "Failed to copy vector C from device to host (error code %s)!\n",
            hipGetErrorString(err));
    exit(EXIT_FAILURE);
  }

  for (int i = 0; i < numElements; ++i) {
    if (fabs(h_A[i] + h_B[i] - h_C[i]) > 1e-5) {
      fprintf(stderr, "Result verification failed at element %d!\n", i);
      exit(EXIT_FAILURE);
    }
  }

  printf("Test PASSED\n");

  err = hipFree(d_A);

  if (err != hipSuccess) {
    fprintf(stderr, "Failed to free device vector A (error code %s)!\n",
            hipGetErrorString(err));
    exit(EXIT_FAILURE);
  }

  err = hipFree(d_B);

  if (err != hipSuccess) {
    fprintf(stderr, "Failed to free device vector B (error code %s)!\n",
            hipGetErrorString(err));
    exit(EXIT_FAILURE);
  }

  err = hipFree(d_C);

  if (err != hipSuccess) {
    fprintf(stderr, "Failed to free device vector C (error code %s)!\n",
            hipGetErrorString(err));
    exit(EXIT_FAILURE);
  }

  free(h_A);
  free(h_B);
  free(h_C);

  printf("Done\n");
  return 0;
}

3.2 编译运行 vectorAdd.cu.hip

编译：

$ /opt/rocm/bin/hipcc ./vectorAdd.cu.hip -o vectorAdd

运行效果如下图：

3.3 一批hipify-clang 使用参考

 ./hipify-clang square.cu --cuda-path=/usr/local/cuda-12.3 -I /usr/local/cuda-12.3/samples/common/inc
 ./hipify-clang cpp17.cu --cuda-path=/usr/local/cuda-12.3 -- -std=c++17
 ./hipify-clang cpp17.cu ../../square.cu /home/user/cuda/intro.cu --cuda-path=/usr/local/cuda-12.3 -- -std=c++17
 ./hipify-clang square.cu --cuda-path=/usr/local/cuda-12.3 --clang-resource-directory=/usr/llvm/18.1.1/dist/lib/clang/18
 
 
 hipify-clang intro.cu -cuda-path="C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.3.2" --print-stats
 hipify-clang intro.cu -cuda-path="C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v12.3.2" --print-stats-csv
 
 hipify-clang vectorAdd.cu --cuda-path=/usr/local/cuda-12.3 -o vectorAdd.hip --

当没有指定build-path，从而没有指定 compile-comand.json时，命令行选项中需要出现 -- ；

如果命令行选项中同时并没有出现 --, 那么hipify-clang 会判断后帮助自动添加 --

4, 带hipify-clang test 的构建

cmake
 -DHIPIFY_CLANG_TESTS=ON \
 -DCMAKE_BUILD_TYPE=Release \
 -DCMAKE_INSTALL_PREFIX=../dist \
 -DCMAKE_PREFIX_PATH=/usr/llvm/18.1.1/dist \
 -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-12.3.2 \
 -DCUDA_DNN_ROOT_DIR=/usr/local/cudnn-8.9.7 \
 -DCUDA_CUB_ROOT_DIR=/usr/local/cub-2.1.0 \
 -DLLVM_EXTERNAL_LIT=/usr/llvm/18.1.1/build/bin/llvm-lit \
 ../hipify

5, hipify-per 示例

$ ../local_d/bin/hipify-perl vectorAdd.cu > vectorAdd.hip

6，对main函数的一点注释

HIPIFY/src/main.cpp at amd-staging · ROCm/HIPIFY · GitHub

/*
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/

#include <cstdio>
#include <fstream>
#include <set>
#include <cmath>
#include <chrono>
#include <iomanip>
#include <sstream>
#include "CUDA2HIP.h"
#include "CUDA2HIP_Scripting.h"
#include "LLVMCompat.h"
#include "HipifyAction.h"
#include "ArgParse.h"
#include "StringUtils.h"
#include "llvm/Support/Debug.h"
#include "clang/Basic/Diagnostic.h"
#include "clang/Basic/DiagnosticIDs.h"
#include "clang/Basic/DiagnosticOptions.h"
#include "clang/Driver/Driver.h"
#include "clang/Driver/Compilation.h"
#include "clang/Driver/Tool.h"
#include "clang/Frontend/TextDiagnosticPrinter.h"

#if LLVM_VERSION_MAJOR < 8
#include "llvm/Support/Path.h"
#endif

constexpr auto DEBUG_TYPE = "cuda2hip";

namespace ct = clang::tooling;

void cleanupHipifyOptions(std::vector<const char*> &args) {
  for (const auto &a : hipifyOptions) {
    args.erase(std::remove(args.begin(), args.end(), "--" + a), args.end());
    args.erase(std::remove(args.begin(), args.end(), "-" + a), args.end());
  }
  for (const auto &a : hipifyOptionsWithTwoArgs) {
    // remove all "-option=value" and "--option=value"
    args.erase(
      std::remove_if(args.begin(), args.end(),
        [a](const std::string &s) { return s.find("--" + a + "=") == 0 || s.find("-" + a + "=") == 0; }
      ),
      args.end()
    );
    // remove all pairs of arguments "--option value" and "-option value"
    auto it = args.erase(
      std::remove_if(args.begin(), args.end(),
        [a](const std::string &s) { return s.find("--" + a) == 0 || s.find("-" + a) == 0; }
      ),
      args.end()
    );
    if (it != args.end()) {
        args.erase(it);
    }
  }
}

void sortInputFiles(int argc, const char **argv, std::vector<std::string> &files) {
  if (files.size() < 2) return;
  IntrusiveRefCntPtr<clang::DiagnosticOptions> diagOpts(new clang::DiagnosticOptions());
  clang::TextDiagnosticPrinter diagClient(llvm::errs(), &*diagOpts);
  clang::DiagnosticsEngine Diagnostics(IntrusiveRefCntPtr<clang::DiagnosticIDs>(new clang::DiagnosticIDs()), &*diagOpts, &diagClient, false);
  std::unique_ptr<clang::driver::Driver> driver(new clang::driver::Driver("", "nvptx64-nvidia-cuda", Diagnostics));
  std::vector<const char*> Args(argv, argv + argc);
  cleanupHipifyOptions(Args);
  std::unique_ptr<clang::driver::Compilation> C(driver->BuildCompilation(Args));
  std::vector<std::string> sortedFiles;
  for (const auto &J : C->getJobs()) {
    if (std::string(J.getCreator().getName()) != "clang") continue;
    const auto &JA = J.getArguments();
    for (size_t i = 0; i < JA.size(); ++i) {
      const auto &A = std::string(JA[i]);
      if (std::find(files.begin(), files.end(), A) != files.end() &&
        i > 0 && std::string(JA[i - 1]) == "-main-file-name") {
        sortedFiles.push_back(A);
      }
    }
  }
  if (sortedFiles.empty()) return;
  std::reverse(sortedFiles.begin(), sortedFiles.end());
  files.assign(sortedFiles.begin(), sortedFiles.end());
}

void appendArgumentsAdjusters(ct::RefactoringTool &Tool, const std::string &sSourceAbsPath, const char *hipify_exe) {
  if (!IncludeDirs.empty()) {// 这个可能是命令行工具
    for (std::string s : IncludeDirs) {
      Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster(s.c_str(), ct::ArgumentInsertPosition::BEGIN));
      Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster("-I", ct::ArgumentInsertPosition::BEGIN));
    }
  }
  if (!MacroNames.empty()) {
    for (std::string s : MacroNames) {
      Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster(s.c_str(), ct::ArgumentInsertPosition::BEGIN));
      Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster("-D", ct::ArgumentInsertPosition::BEGIN));
    }
  }
  static int Dummy;
  std::string hipify = llvm::sys::fs::getMainExecutable(hipify_exe, (void *)&Dummy);// "/home/hipper/llvm_3_4_0_ex/browse_llvm_17/ex/hipify_ex/local_d/bin/hipify-clang"
  std::string hipify_parent_path = std::string(llvm::sys::path::parent_path(hipify));// "/home/hipper/llvm_3_4_0_ex/browse_llvm_17/ex/hipify_ex/local_d/bin"
  // Includes for clang's CUDA wrappers for using by old packaged hipify-clang
  std::string clang_inc_path_old = hipify_parent_path + "/include";
  Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster(clang_inc_path_old.c_str(), ct::ArgumentInsertPosition::BEGIN));
  Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster("-Xclang", ct::ArgumentInsertPosition::BEGIN));
  Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster("-internal-isystem", ct::ArgumentInsertPosition::BEGIN));
  Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster("-Xclang", ct::ArgumentInsertPosition::BEGIN));
  clang_inc_path_old.append("/cuda_wrappers");
  Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster(clang_inc_path_old.c_str(), ct::ArgumentInsertPosition::BEGIN));
  Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster("-Xclang", ct::ArgumentInsertPosition::BEGIN));
  Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster("-internal-isystem", ct::ArgumentInsertPosition::BEGIN));
  Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster("-Xclang", ct::ArgumentInsertPosition::BEGIN));
  // Includes for clang's CUDA wrappers for using by new packaged hipify-clang
  std::string clang_inc_path_new = hipify_parent_path + "/../include/hipify";
  Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster(clang_inc_path_new.c_str(), ct::ArgumentInsertPosition::BEGIN));
  Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster("-Xclang", ct::ArgumentInsertPosition::BEGIN));
  Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster("-internal-isystem", ct::ArgumentInsertPosition::BEGIN));
  Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster("-Xclang", ct::ArgumentInsertPosition::BEGIN));
  clang_inc_path_new.append("/cuda_wrappers");
  Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster(clang_inc_path_new.c_str(), ct::ArgumentInsertPosition::BEGIN));
  Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster("-Xclang", ct::ArgumentInsertPosition::BEGIN));
  Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster("-internal-isystem", ct::ArgumentInsertPosition::BEGIN));
  Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster("-Xclang", ct::ArgumentInsertPosition::BEGIN));
  // Standard c++14 by default
  Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster("-std=c++14", ct::ArgumentInsertPosition::BEGIN));
  std::string sInclude = "-I" + sys::path::parent_path(sSourceAbsPath).str();
#if defined(HIPIFY_CLANG_RES)
  Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster("-resource-dir=" HIPIFY_CLANG_RES, ct::ArgumentInsertPosition::BEGIN));
#endif
  Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster(sInclude.c_str(), ct::ArgumentInsertPosition::BEGIN));
  Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster("-fno-delayed-template-parsing", ct::ArgumentInsertPosition::BEGIN));
  if (llcompat::pragma_once_outside_header()) {
    Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster("-Wno-pragma-once-outside-header", ct::ArgumentInsertPosition::BEGIN));
  }
  Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster("--cuda-host-only", ct::ArgumentInsertPosition::BEGIN));
  if (!CudaGpuArch.empty()) {
    std::string sCudaGpuArch = "--cuda-gpu-arch=" + CudaGpuArch;
    Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster(sCudaGpuArch.c_str(), ct::ArgumentInsertPosition::BEGIN));
  }
  if (!CudaPath.empty()) {
    std::string sCudaPath = "--cuda-path=" + CudaPath;
    Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster(sCudaPath.c_str(), ct::ArgumentInsertPosition::BEGIN));
  }
  llcompat::addTargetIfNeeded(Tool);
  Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster("cuda", ct::ArgumentInsertPosition::BEGIN));
  Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster("-x", ct::ArgumentInsertPosition::BEGIN));
  if (Verbose) {
    Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster("-v", ct::ArgumentInsertPosition::END));
  }
  Tool.appendArgumentsAdjuster(ct::getClangSyntaxOnlyAdjuster());
}

bool generatePython() {
  bool bToRoc = TranslateToRoc;
  TranslateToRoc = true;
  bool bToPython = python::generate(GeneratePython);
  TranslateToRoc = bToRoc;
  return bToPython;
}

void printVersions() {
  llvm::errs() << "\n" << sHipify << "Supports ROCm HIP from " << Statistics::getHipVersion(hipVersions::HIP_5000) << " up to " << Statistics::getHipVersion(hipVersions::HIP_LATEST);
  llvm::errs() << "\n" << sHipify << "Supports CUDA Toolkit from " << Statistics::getCudaVersion(cudaVersions::CUDA_70) << " up to " << Statistics::getCudaVersion(cudaVersions::CUDA_LATEST);
  llvm::errs() << "\n" << sHipify << "Supports cuDNN from " << Statistics::getCudaVersion(cudaVersions::CUDNN_705) << " up to " << Statistics::getCudaVersion(cudaVersions::CUDNN_LATEST) << " \n";
}

int main(int argc, const char **argv) {
  std::vector<const char*> new_argv(argv, argv + argc);
  std::string sCompilationDatabaseDir;
  auto it = std::find(new_argv.begin(), new_argv.end(), std::string("-p"));
  bool bCompilationDatabase = it == new_argv.end() ? false : true;
  bool bNoCompilationDatabaseDir = false;
  if (bCompilationDatabase) {
    if (it+1 != new_argv.end()) sCompilationDatabaseDir = *(it+1);
    else bNoCompilationDatabaseDir = true;
  } else {
    for (auto &s : new_argv) {//检查命令行参数是否指定了 build_path
      std::string str = std::string(s);
      if (str.find("-p=") != std::string::npos) {
        bCompilationDatabase = true;
        sCompilationDatabaseDir = str.substr(3, str.size()-3);
        if (sCompilationDatabaseDir.empty()) {
          bNoCompilationDatabaseDir = true;
        }
        break;
      }
    }
  }
  if (bCompilationDatabase && bNoCompilationDatabaseDir) {//指定了-p，却没指定路径，参数使用错误，报错。
    llvm::errs() << "\n" << sHipify << sError << "Must specify compilation database directory" << "\n";
    return 1;
  }
  if (!bCompilationDatabase && std::find(new_argv.begin(), new_argv.end(), std::string("--")) == new_argv.end()) {//如果没有指定build_path,且命令行中找不到 "--",则给命令行参数数组补上 "--"
    new_argv.push_back("--");
    new_argv.push_back(nullptr);
    argv = new_argv.data();
    argc++;
  }
  llcompat::PrintStackTraceOnErrorSignal();
#if LLVM_VERSION_MAJOR > 12
  auto cop = ct::CommonOptionsParser::create(argc, argv, ToolTemplateCategory, llvm::cl::ZeroOrMore);
  if (!cop) {
    llvm::errs() << "\n" << sHipify << sError << cop.takeError() << "\n";
    return 1;
  }
  ct::CommonOptionsParser &OptionsParser = cop.get();
#else
  ct::CommonOptionsParser OptionsParser(argc, argv, ToolTemplateCategory, llvm::cl::ZeroOrMore);
#endif
  if (!llcompat::CheckCompatibility()) {//llvm-10 新功能警告信息
    return 1;
  }
  std::unique_ptr<ct::CompilationDatabase> compilationDatabase;
  std::vector<std::string> fileSources;//一次可以翻译多个.cu 文件称为.hip文件，文件名存储于 fileSources
  if (bCompilationDatabase) {
    std::string serr;
    compilationDatabase = ct::CompilationDatabase::loadFromDirectory(sCompilationDatabaseDir, serr);
    if (nullptr == compilationDatabase.get()) {
      llvm::errs() << "\n" << sHipify << sError << "loading Compilation Database from \"" << sCompilationDatabaseDir << "compile_commands.json\" failed\n";
      return 1;
    }
    fileSources = compilationDatabase->getAllFiles();
  } else {
    fileSources = OptionsParser.getSourcePathList();//OptionParser将 .cu 文件名择了出来
  }
  if (fileSources.empty() && !GeneratePerl && !GeneratePython && !GenerateMarkdown && !GenerateCSV && !Versions) {// .cu 文件列表不能为空
    llvm::errs() << "\n" << sHipify << sError << "Must specify at least 1 positional argument for source file" << "\n";
    return 1;
  }
  if (Versions) printVersions();//如果命令行选项指示需要打印版本信息
  if (!GenerateMarkdown && !GenerateCSV && !DocFormat.empty()) {
    llvm::errs() << "\n" << sHipify << sError << "Must specify a document type to generate: \"md\" and | or \"csv\"" << "\n";
    return 1;
  }
  if (!perl::generate(GeneratePerl)) {//若生成 per 翻译脚本出错 --perl
    llvm::errs() << "\n" << sHipify << sError << "hipify-perl generating failed" << "\n";
    return 1;
  }
  if (!generatePython()) {//若生成 python 翻译脚本出错 --python
    llvm::errs() << "\n" << sHipify << sError << "hipify-python generating failed" << "\n";
    return 1;
  }
  if (!doc::generate(GenerateMarkdown, GenerateCSV)) {//生成 Markdown 和 csv 出错
    llvm::errs() << "\n" << sHipify << sError << "Documentation generating failed" << "\n";
    return 1;
  }
  if (fileSources.empty()) {//需要转换的 .cu 源文件名列表为空
    return 0;
  }
  std::string dst = OutputFilename, dstDir = OutputDir;//定义空string 变量
  std::error_code EC;
  std::string sOutputDirAbsPath = getAbsoluteDirectoryPath(OutputDir, EC, "output");
  if (EC) {
    return 1;
  }
  if (!dst.empty()) {
    if (fileSources.size() > 1) {
      llvm::errs() << sHipify << sConflict << "-o and multiple source files are specified\n";
      return 1;
    }
    if (Inplace) {
      llvm::errs() << sHipify << sConflict << "both -o and -inplace options are specified\n";
      return 1;
    }
    if (NoOutput) {
      llvm::errs() << sHipify << sConflict << "both -no-output and -o options are specified\n";
      return 1;
    }
    if (!dstDir.empty()) {
      dst = sOutputDirAbsPath + "/" + dst;
    }
  }
  if (NoOutput && Inplace) {//既不要输出，又要替换，矛盾；
    llvm::errs() << sHipify << sConflict << "both -no-output and -inplace options are specified\n";
    return 1;
  }
  if (!dstDir.empty() && Inplace) {//既要指定输出路径，又要本地替换，矛盾；
    llvm::errs() << sHipify << sConflict << "both -o-dir and -inplace options are specified\n";
    return 1;
  }
  if (Examine) {
    NoOutput = PrintStats = true;
  }
  int Result = 0;
  SmallString<128> tmpFile;
  StringRef sourceFileName, ext = "hip", csv_ext = "csv";
  std::string sTmpFileName, sSourceAbsPath;
  std::string sTmpDirAbsParh = getAbsoluteDirectoryPath(TemporaryDir, EC);
  if (EC) {
    return 1;
  }
  // Arguments for the Statistics print routines.
  std::unique_ptr<std::ostream> csv = nullptr;
  llvm::raw_ostream *statPrint = nullptr;
  bool create_csv = false;
  if (!OutputStatsFilename.empty()) {
    PrintStatsCSV = true;
    create_csv = true;
  } else {
    if (PrintStatsCSV && fileSources.size() > 1) {
      OutputStatsFilename = "sum_stat.csv";
      create_csv = true;
    }
  }
  if (create_csv) {
    if (!OutputDir.empty()) {
      OutputStatsFilename = sOutputDirAbsPath + "/" + OutputStatsFilename;
    }
    csv = std::unique_ptr<std::ostream>(new std::ofstream(OutputStatsFilename, std::ios_base::trunc));
  }
  if (PrintStats) {
    statPrint = &llvm::errs();
  }
  sortInputFiles(argc, argv, fileSources);
  for (const auto &src : fileSources) {
    // Create a copy of the file to work on. When we're done, we'll move this onto the
    // output (which may mean overwriting the input, if we're in-place).
    // Should we fail for some reason, we'll just leak this file and not corrupt the input.
    sSourceAbsPath = getAbsoluteFilePath(src, EC);//获取源文件的绝对路径
    if (EC) {
      continue;
    }
    sourceFileName = sys::path::filename(sSourceAbsPath);//
    if (dst.empty()) {
      if (Inplace) {
        dst = src;
      } else {
        dst = src + "." + ext.str();//"vectorAdd.cu" + "." + "hip"
        if (!dstDir.empty()) {
          dst = sOutputDirAbsPath + "/" + sourceFileName.str() + "." + ext.str();
        }
      }
    }
    if (TemporaryDir.empty()) {
      EC = sys::fs::createTemporaryFile(sourceFileName, ext, tmpFile);//创建临时文件 /tmp/vectorAdd.cu-1a01d7.hip
      if (EC) {
        llvm::errs() << "\n" << sHipify << sError << EC.message() << ": " << tmpFile << "\n";
        Result = 1;
        continue;
      }
    } else {
      sTmpFileName = sTmpDirAbsParh + "/" + sourceFileName.str() + "." + ext.str();
      tmpFile = sTmpFileName;
    }
    EC = sys::fs::copy_file(src, tmpFile);//将 vectorAdd.cu中的内容，拷贝到 /tmp/vectorAdd.cu-1a01d7.hip 中去。
    if (EC) {
      llvm::errs() << "\n" << sHipify << sError << EC.message() << ": while copying " << src << " to " << tmpFile << "\n";
      Result = 1;
      continue;
    }
    if (PrintStatsCSV) {//是否要输出统计信息到csv文件，给csv文件取名字
      if (OutputStatsFilename.empty()) {
        OutputStatsFilename = sourceFileName.str() + "." + csv_ext.str();
        if (!OutputDir.empty()) {
          OutputStatsFilename = sOutputDirAbsPath + "/" + OutputStatsFilename;
        }
      }
      if (!csv) {
        csv = std::unique_ptr<std::ostream>(new std::ofstream(OutputStatsFilename, std::ios_base::trunc));
      }
    }
    // Initialise the statistics counters for this file.
    Statistics::setActive(src);
    // RefactoringTool operates on the file in-place. Giving it the output path is no good,
    // because that'll break relative includes, and we don't want to overwrite the input file.
    // So what we do is operate on a copy, which we then move to the output.
    // Tool是重构代码动作的管理和记录者，最后实施重构，通过 runAndSave 成员方法。
    // Tool 携带了编译选项信息、被重构的源文件名 /tmp/vectorAdd.cu-1a01d7.hip
    ct::RefactoringTool Tool((bCompilationDatabase ? *compilationDatabase.get() : OptionsParser.getCompilations()), std::string(tmpFile.c_str()));
    ct::Replacements &replacementsToUse = llcompat::getReplacements(Tool, tmpFile.c_str());//定义 Replacements 变量，指定被重构的源文件
    
    ReplacementsFrontendActionFactory<HipifyAction> actionFactory(&replacementsToUse);// 用 actionFactory 持有了 replacementsToUse 这个 Replacements 类的对象
    appendArgumentsAdjusters(Tool, sSourceAbsPath, argv[0]);
    
    Statistics &currentStat = Statistics::current();
    // Hipify _all_ the things!
    if (Tool.runAndSave(&actionFactory)) {//实施重构replacements
      currentStat.hasErrors = true;
      Result = 1;
      LLVM_DEBUG(llvm::dbgs() << "Skipped some replacements.\n");
    }
    // Copy the tmpfile to the output
    if (!NoOutput && !currentStat.hasErrors) {
      EC = sys::fs::copy_file(tmpFile, dst);//将临时文件复制成为输出的目标文件 cp /tmp/vectorAdd.cu-1a01d7.hip ./vectorAdd.cu.hip
      if (EC) {
        llvm::errs() << "\n" << sHipify << sError << EC.message() << ": while copying " << tmpFile << " to " << dst << "\n";
        Result = 1;
        continue;
      }
    }
    // Remove the tmp file without error check
    if (!SaveTemps) {
      sys::fs::remove(tmpFile);//删除临时文件 rm /tmp/vectorAdd.cu-1a01d7.hip
    }
    Statistics::current().markCompletion();
    Statistics::current().print(csv.get(), statPrint);
    dst.clear();
  }
  if (fileSources.size() > 1) {
    Statistics::printAggregate(csv.get(), statPrint);
  }
  return Result;
}