1、编译需要打开的选项:

  set(USE_SORT ON)

  参考资料:

  discuss.tvm.ai/t/solved-cant-run-tutorials-ssd-model-on-my-own-cpu/2005

  2、编译gpu模型:

  编译时,打开编译cuda选项:tvm_option(USE_CUDA "Build with CUDA" ON)

  在jetson nano上编译GPU版本时,需要将cuda加入到环境变量里面去:

  export CUBA_HOME=/usr/local/cuda-10.0:$PATH

  export LD_LIBRARY_PATH=/usr/local/cuda-10.0/lib64:$LD_LIBRARY_PATH

  export PATH=/usr/local/cuda-10.0/bin:$PATH

  将target = tvm.target.create("llvm -mcpu=haswell")替换为:target = "cuda"

  参考资料:

  github.com/

  3、Andorid编译

  cp make/config.mk

  APP_ABI = armeabi-v7a

  ./make_standalone_toolchain.py –arch arm --api 23 --install-dir /opt/android-toolchain-armv7 -mfloat-abi=soft

  参考资料:

  discuss.tvm.ai

  4、LLVM 在windows上编译

  1)下载LLVM源码

  首先下载LLVM源码,下载地址为:

  github.com/MirrorYuChen/llvm-project/tree/release/6.x

  这里对应LLVM版本为6.x,后面需要用LLD工具,这个源码里面就自带有,然后在LLVM文件夹下面新建一个build文件夹,并在此文件夹路径下打开cmd窗口,输入如下命令:

  cmake -G "Visual Studio 15 2017 Win64" .. -Thost=x64 -DLLVM_ENABLE_PROJECTS=lld

  打开生成的llvm.sln项目,切换到release x64模式编译,大约需要1小时时间编译完成,并运行install。

  Jetson nano:

  >> git clone https://github.com/llvm/llvm-project llvm-project

  >> cd llvm-project

  >> mkdir build

  >> cd build

  >> cmake -DCMAKE_BUILD_TYPE=Release -DLLVM_ENABLE_PROJECTS=lld -DCMAKE_INSTALL_PREFIX=/usr/local ../../llvm-project/llvm

  >> make -j3 && make install

  2)下载tvm源码

  git clone --recursive https://github.com/dmlc/tvm/

  在tvm项目路径下新建build子文件夹,并在当前路径下新建一个bash.sh文件,文件内容为:

  cmake -G "Visual Studio 15 2017 Win64" -DCMAKE_BUILD_TYPE=Release \

  -DCMAKE_CONFIGURATION_TYPES="Release" .. \

  -DLLVM_DIR=D:\softW\LLVM\lib\cmake\llvm

  后面LLVM路径对应到刚install生成的LLVM路径,打开生成的tvm.sln项目,编译运行。

  3)安装

  先新建一个conda环境变量:

  conda create -n tf python==3.5

  激活环境:

  activate tf

  分别安装tensorflow和mxnet

  pip install tensorflow

  pip install mxnet

  分别进入tvm、topi、nnvm文件夹下,运行下面命令进行安装

  python setup.py install

  安装完成之后,可以进入tvm的tutorials子文件夹下,运行相关例程。

  这里是一个ssd运行例程:

  测试代码为:

  #!/usr/bin/python3

  import os

  import tvm

  import numpy as np

  import time

  from tvm.contrib.download import download

  from tvm.contrib import graph_runtime

  current_milli_time = lambda: int(round(time.time() * 1000))

  test_image = "dog.jpg"

  dshape = (1, 3, 512, 512)

  #dshape = (1, 3, 608, 608)

  dtype = "float32"

  image_url = "https://cloud.githubusercontent.com/assets/3307514/20012567/" \

  "cbb60336-a27d-11e6-93ff-cbc3f09f5c9e.jpg"

  download(image_url, test_image)

  # Preprocess image

  import cv2

  test_image_path = test_image

  image = cv2.imread(test_image_path)

  img_data = cv2.resize(image, (dshape[2], dshape[3]))

  img_data = img_data[:, :, (2, 1, 0)].astype(np.float32)

  img_data -= np.array([123, 117, 104])

  img_data = np.transpose(np.array(img_data), (2, 0, 1))

  ctx = tvm.cpu()

  target="llvm"

  #base = "deploy_ssd_resnet50_512/{}/".format(target)

  #base = "deploy_ssd_inceptionv3_512/{}/".format(target)

  #base = "deploy_ssd_mobilenet_512/{}/".format(target)

  #base = "deploy_ssd_mobilenet_608/{}/".format(target)

  #base = "cpu-model/"

  base = "./"

  path_lib = base + "model.so"

  path_graph = base + "model.json"

  path_param = base + "model.params"

  graph = open(path_graph).read()

  params = bytearray(open(path_param, "rb").read())

  lib = tvm.module.load(path_lib)

  class_names = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair",

  "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant",

  "sheep", "sofa", "train", "tvmonitor"]

  ######################################################################

  # Create TVM runtime and do inference

  # Build TVM runtime

  m = graph_runtime.create(graph, lib, ctx)

  m.load_params(params)

  input_data = tvm.nd.array(img_data.astype(dtype))

  # dryrun

  m.run(data = input_data)

  # execute

  t1 = current_milli_time()

  m.run(data = input_data)

  # get outputs

  tvm_output = m.get_output(0)

  t2 = current_milli_time()

  print(base)

  print("time: {} ms".format(t2 - t1))

  out = tvm_output.asnumpy()[0]

  i = 0无锡×××医院 https://yyk.familydoctor.com.cn/20612/

  for det in out:

  cid = int(det[0])

  if cid < 0:

  continue

  score = det[1]

  if score < 0.5:

  continue

  i += 1

  print(i, class_names[cid], det)

  ######################################################################

  # Display result

  def display(img, out, thresh=0.5):

  import random

  import matplotlib as mpl

  import matplotlib.pyplot as plt

  mpl.rcParams['figure.figsize'] = (10, 10)

  pens = dict()

  plt.clf()

  plt.imshow(img)

  for det in out:

  cid = int(det[0])

  if cid < 0:

  continue

  score = det[1]

  if score < thresh:

  continue

  if cid not in pens:

  pens[cid] = (random.random(), random.random(), random.random())

  scales = [img.shape[1], img.shape[0]] * 2

  xmin, ymin, xmax, ymax = [int(p * s) for p, s in zip(det[2:6].tolist(), scales)]

  rect = plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, fill=False,

  edgecolor=pens[cid], linewidth=3)

  plt.gca().add_patch(rect)

  text = class_names[cid]

  plt.gca().text(xmin, ymin-2, '{:s} {:.3f}'.format(text, score),

  bbox=dict(facecolor=pens[cid], alpha=0.5),

  fontsize=12, color='white')

  plt.show()

  image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

  display(image, tvm_output.asnumpy()[0], thresh=0.45)

  代码来自于github