def build(
ir_mod,
target=None,
target_host=None,
executor=Executor("graph"),
runtime=Runtime("cpp"),
workspace_memory_pools=None,
constant_memory_pools=None,
params=None,
mod_name="default",
):
# fmt: off
# pylint: disable=line-too-long
"""Helper function that builds a Relay function to run on TVM graph executor.
Parameters
----------
ir\_mod : :py:class:`~tvm.IRModule`
The IR module to build. Using relay.Function is deprecated.
target : None, or any multi-target like object, see Target.canon\_multi\_target
For homogeneous compilation, the unique build target.
For heterogeneous compilation, a dictionary or list of possible build targets.
Defaults to the current target in the environment if None.
target\_host : None, or any target like object, see Target.canon\_target
Host compilation target, if target is device.
executor : Optional[Executor]
The executor configuration with which to build the model.
Defaults to "graph" if no executor specified.
runtime : Optional[Runtime]
Runtime configuration to use when building the model.
Defaults to "cpp" if no runtime specified.
workspace\_memory\_pools : Optional[WorkspaceMemoryPools]
The object that contains an Array of WorkspacePoolInfo objects
that hold properties of read-write workspace pools that could be
used by the inference.
constant\_memory\_pools : Optional[ConstantMemoryPools]
The object that contains an Array of ConstantPoolInfo objects
that hold properties of read-only pools that could be
used by the inference.
params : dict of str to NDArray
Input parameters to the graph that do not change
during inference time. Used for constant folding.
mod\_name: Optional[str]
The module name we will build
Returns
-------
factory\_module : tvm.relay.backend.executor\_factory.ExecutorFactoryModule
The runtime factory for the TVM graph executor.
"""
tvm.device()
函数使用给定的设备类型和id构造TVM设备:
def device(dev_type, dev_id=0):
"""Construct a TVM device with given device type and id.
Parameters
----------
dev\_type: int or str
The device type mask or name of the device.
dev\_id : int, optional
The integer device id
Returns
-------
dev: tvm.runtime.Device
The corresponding device.
Examples
--------
Device can be used to create reflection of device by
string representation of the device type.
.. code-block:: python
assert tvm.device("cpu", 1) == tvm.cpu(1)
assert tvm.device("cuda", 0) == tvm.cuda(0)
"""
tvm.contrib.graph_executor.GraphModule
类是tvm.runtime.Module
的浅包装,使用它可以直接调用tvm.runtime.Module
的set_input()
、run()
、get_output()
函数:
class GraphModule(object):
"""Wrapper runtime module.
This is a thin wrapper of the underlying TVM module.
you can also directly call set\_input, run, and get\_output
of underlying module functions
Parameters
----------
module : tvm.runtime.Module
The internal tvm module that holds the actual graph functions.
Attributes
----------
module : tvm.runtime.Module
The internal tvm module that holds the actual graph functions.
Examples
--------
.. code-block:: python
import tvm
from tvm import relay
from tvm.contrib import graph\_executor
# build the library using graph executor
lib = relay.build(...)
lib.export\_library("compiled\_lib.so")
# load it back as a runtime
lib: tvm.runtime.Module = tvm.runtime.load\_module("compiled\_lib.so")
# Call the library factory function for default and create
# a new runtime.Module, wrap with graph module.
gmod = graph\_executor.GraphModule(lib["default"](dev))
# use the graph module.
gmod.set\_input("x", data)
gmod.run()
"""
第3步:运行
预处理
preprocess.py
from tvm.contrib.download import download_testdata
from PIL import Image
import numpy as np
def read\_img():
img_url = "https://s3.amazonaws.com/model-server/inputs/kitten.jpg"
img_path = download_testdata(img_url, "imagenet\_cat.png", module="data")
# 重设大小为 224x224
resized_image = Image.open(img_path).resize((224, 224))
img_data = np.asarray(resized_image).astype("float32")
# ONNX 需要 NCHW 输入, 因此对数组进行转换
img_data = np.transpose(img_data, (2, 0, 1))
# 根据 ImageNet 进行标准化
imagenet_mean = np.array([0.485, 0.456, 0.406])
imagenet_stddev = np.array([0.229, 0.224, 0.225])
norm_img_data = np.zeros(img_data.shape).astype("float32")
for i in range(img_data.shape[0]):
norm_img_data[i, :, :] = (img_data[i, :, :] / 255 - imagenet_mean[i]) / imagenet_stddev[i]
# 添加 batch 维度
img_data = np.expand_dims(norm_img_data, axis=0)
return img_data
运行
直接调用tvm.contrib.graph_executor.GraphModule
的set_input()、run()、get_output()函数:
class GraphModule(object):
def set\_input(self, key=None, value=None, \*\*params):
"""Set inputs to the module via kwargs
Parameters
----------
key : int or str
The input key
value : the input value.
The input value
params : dict of str to NDArray
Additional arguments
"""
def run(self, \*\*input_dict):
"""Run forward execution of the graph
Parameters
----------
input\_dict: dict of str to NDArray
List of input values to be feed to
"""
def get\_output(self, index, out=None):
"""Get index-th output to out
Parameters
----------
index : int
The output index
out : NDArray
The output array container
"""
后处理
postprocess.py
import numpy as np
from scipy.special import softmax
from tvm.contrib.download import download_testdata
def print\_labels(data):
# 下载标签列表
labels_url = "https://s3.amazonaws.com/onnx-model-zoo/synset.txt"
labels_path = download_testdata(labels_url, "synset.txt", module="data")
with open(labels_path, "r") as f:
labels = [l.rstrip() for l in f]
scores = softmax(data)
scores = np.squeeze(scores)
ranks = np.argsort(scores)[::-1]
for rank in ranks[0:5]:
print("class='%s' with probability=%f" % (labels[rank], scores[rank]))
性能测量
performance.py
import timeit
import numpy as np
def test\_perf(module):
timing_number = 10
timing_repeat = 10
timing_results_list = (
np.array(timeit.Timer(lambda: module.run()).repeat(repeat=timing_repeat, number=timing_number))
\* 1000 / timing_number
)
timing_statistics = {
"mean": np.mean(timing_results_list),
"median": np.median(timing_results_list),
"std": np.std(timing_results_list),
}
return timing_statistics
第4步:调优
tvm.autotvm.measure_option()
设置测量选项,为了测量配置,需要构建并运行它,也就是设置Builder
和Runner
参数:
def measure\_option(builder, runner):
"""
Set options for measure. To measure a config, we will build it and run it.
So we have to set options for these two steps.
They have their own options on timeout, parallel, etc.
Parameters
----------
builder: Builder
Specify how to build programs
runner: Runner
Specify how to run programs
Examples
--------
# example setting for using local devices
>>> measure\_option = autotvm.measure\_option(
>>> builder=autotvm.LocalBuilder(), # use all local cpu cores for compilation
>>> runner=autotvm.LocalRunner( # measure them sequentially
>>> number=10,
>>> timeout=5)
>>> )
Note
----
To make measurement results accurate, you should pick the correct value for the argument
`number` and `repeat` in Runner(). Some devices need a certain minimum running time to
"warm up," such as GPUs that need time to reach a performance power state.
Using `min\_repeat\_ms` can dynamically adjusts `number`, so it is recommended.
The typical value for NVIDIA GPU is 150 ms.
"""
tvm.autotvm.LocalBuilder
类在本地编译代码:
class LocalBuilder(Builder):
"""Run compilation on local machine
Parameters
----------
timeout: float
The timeout of a compilation
n\_parallel: int
The number of tasks run in parallel. "None" will use all cpu cores
build\_kwargs: dict
If supplied, additional kwargs passed to build\_func. Overrides any build\_kwargs supplied
by the Runner.
build\_func: callable or str
If is 'default', use default build function
If is 'ndk', use function for android ndk
If id 'stackvm', use function for stackvm
If is callable, use it as custom build function, expect lib\_format field.
do\_fork: bool
If False, do not fork when building. Requires n\_parallel=1.
runtime: Optional[Runtime]
Specify the runtime to generate artifacts for
"""
tvm.autotvm.LocalRunner
类在本地运行代码:
class LocalRunner(RPCRunner):
"""Run generated code on local devices.
Parameters
----------
timeout: float
The timeout of a compilation
number: int
The number of times to run the generated code for taking average.
We call these runs as one `repeat` of measurement.
repeat : int, optional
The number of times to repeat the measurement.
In total, the generated code will be run (1 + number x repeat) times,
where the first one is warm up and will be discarded.
The returned result contains `repeat` costs,
each of which is an average of `number` costs.
min\_repeat\_ms: int, optional
The minimum duration of one `repeat` in milliseconds.
By default, one `repeat` contains `number` runs. If this parameter is set,
the parameters `number` will be dynamically adjusted to meet the
minimum duration requirement of one `repeat`.
i.e., When the run time of one `repeat` falls below this time, the `number` parameter
will be automatically increased.
cooldown\_interval: float, optional
The cool down interval between two measurements.
enable\_cpu\_cache\_flush: bool
Whether to flush cache on CPU between repeated measurements.
Flushing cache can make the measured latency of one operator closer to
its actual latency during end-to-end inference.
To make this option effective, the argument `number` should also be set to 1.
This is only has effect on CPU task.
Note
----
This is a "fake" local mode. We start a silent rpc tracker and rpc server
for the user. In this way we reuse timeout/isolation mechanism in RPC infrastructure.
"""
tvm.autotvm.task.extract_from_program()
函数从要调优的tvm.IRModule
或ralay.funtion.Function
中提取任务:
def extract\_from\_program(mod, params, target, target_host=None, ops=None):
"""Extract tuning tasks from a relay program.
This function is the single program version of extract\_from\_multiple\_program.
Parameters
----------
mod: tvm.IRModule or relay.function.Function
The module or function to tune
params: dict of str to numpy array
The associated parameters of the program
target: tvm.target.Target
The compilation target
target\_host: tvm.target.Target
The host compilation target
ops: List[tvm.ir.Op] or None
List of relay ops to be tuned. If not specified, all tunable ops will be extracted.
Returns
-------
task: Array of autotvm.task.Task
collected tasks
"""
tvm.autotvm.tuner.XGBTuner
类是使用xgboost作为cost模型的调优器,tuner_obj.tune()
调用了XGBTuner.tune()
进行调优:
class XGBTuner(ModelBasedTuner):
"""Tuner that uses xgboost as cost model
Parameters
----------
task: Task
The tuning task
plan\_size: int
The size of a plan. After `plan\_size` trials, the tuner will refit a new cost model
and do planing for the next `plan\_size` trials.
feature\_type: str, optional
If is 'itervar', use features extracted from IterVar (loop variable).
If is 'knob', use flatten ConfigEntity directly.
If is 'curve', use sampled curve feature (relation feature).
Note on choosing feature type:
For single task tuning, 'itervar' and 'knob' are good.
'itervar' is more accurate but 'knob' is much faster.
There are some constraints on 'itervar', if you meet
problems with feature extraction when using 'itervar',
you can switch to 'knob'.
For cross-shape tuning (e.g. many convolutions with different shapes),
'itervar' and 'curve' has better transferability,
'knob' is faster.
For cross-device or cross-operator tuning, you can use 'curve' only.
loss\_type: str
If is 'reg', use regression loss to train cost model.
The cost model predicts the normalized flops.
If is 'rank', use pairwise rank loss to train cost model.
The cost model predicts relative rank score.
If is 'rank-binary', use pairwise rank loss with binarized labels to train cost model.
The cost model predicts relative rank score.
num\_threads: int, optional
The number of threads.
optimizer: str or ModelOptimizer, optional
If is 'sa', use a default simulated annealing optimizer.
Otherwise it should be a ModelOptimizer object.
diversity\_filter\_ratio: int or float, optional
If is not None, the tuner will first select
top-(plan\_size \* diversity\_filter\_ratio) candidates according to the cost model
and then pick batch\_size of them according to the diversity metric.
log\_interval: int = 50
The verbose level.
If is 0, output nothing.
Otherwise, output debug information every `verbose` iterations.
"""
def tune(self, \*args, \*\*kwargs): # pylint: disable=arguments-differ
super(XGBTuner, self).tune(\*args, \*\*kwargs)
# manually close pool to avoid multiprocessing issues
self.cost_model._close_pool()
XGBTuner.tune()
函数内部调用了XGBTuner
的祖先类tvm.autotvm.tuner.Tuner
的Tuner.tune()
函数:
class Tuner(object):
"""Base class for tuners
Parameters
----------
task: autotvm.task.Task
Tuning Task
"""
def tune(self, n_trial, measure_option, early_stopping=None, callbacks=(), si_prefix="G"):
"""Begin tuning
Parameters
----------
n\_trial: int
Maximum number of configs to try (measure on real hardware)
measure\_option: dict
The options for how to measure generated code.
You should use the return value ot autotvm.measure\_option for this argument.
early\_stopping: int, optional
Early stop the tuning when not finding better configs in this number of trials
callbacks: List of callable
A list of callback functions. The signature of callback function is
(Tuner, List of MeasureInput, List of MeasureResult)
### 最后
Python崛起并且风靡,因为优点多、应用领域广、被大牛们认可。学习 Python 门槛很低,但它的晋级路线很多,通过它你能进入机器学习、数据挖掘、大数据,CS等更加高级的领域。Python可以做网络应用,可以做科学计算,数据分析,可以做网络爬虫,可以做机器学习、自然语言处理、可以写游戏、可以做桌面应用…Python可以做的很多,你需要学好基础,再选择明确的方向。这里给大家分享一份全套的 Python 学习资料,给那些想学习 Python 的小伙伴们一点帮助!
#### 👉Python所有方向的学习路线👈
Python所有方向的技术点做的整理,形成各个领域的知识点汇总,它的用处就在于,你可以按照上面的知识点去找对应的学习资源,保证自己学得较为全面。
![](https://img-blog.csdnimg.cn/img_convert/604bae65027d4d67fb62410deb210454.png)
#### 👉Python必备开发工具👈
工欲善其事必先利其器。学习Python常用的开发软件都在这里了,给大家节省了很多时间。
![](https://img-blog.csdnimg.cn/img_convert/fa276175617e0048f79437bd30465479.png)
#### 👉Python全套学习视频👈
我们在看视频学习的时候,不能光动眼动脑不动手,比较科学的学习方法是在理解之后运用它们,这时候练手项目就很适合了。
![](https://img-blog.csdnimg.cn/img_convert/16ac689cb023166b2ffa9c677ac40fc0.png)
#### 👉实战案例👈
学python就与学数学一样,是不能只看书不做题的,直接看步骤和答案会让人误以为自己全都掌握了,但是碰到生题的时候还是会一筹莫展。
因此在学习python的过程中一定要记得多动手写代码,教程只需要看一两遍即可。
![](https://img-blog.csdnimg.cn/img_convert/0d8c31c50236a205928a1d8ae8a0b883.png)
#### 👉大厂面试真题👈
我们学习Python必然是为了找到高薪的工作,下面这些面试题是来自阿里、腾讯、字节等一线互联网大厂最新的面试资料,并且有阿里大佬给出了权威的解答,刷完这一套面试资料相信大家都能找到满意的工作。
![](https://img-blog.csdnimg.cn/img_convert/99461e47e58e503d2bc1dc6f4668534a.png)
**[需要这份系统化学习资料的朋友,可以戳这里无偿获取](https://bbs.csdn.net/topics/618317507)**
**一个人可以走的很快,但一群人才能走的更远!不论你是正从事IT行业的老鸟或是对IT行业感兴趣的新人,都欢迎加入我们的的圈子(技术交流、学习资源、职场吐槽、大厂内推、面试辅导),让我们一起学习成长!**