# Get Started with Tensor Expression
#
# TVM使用一个定义域特定张量表达式来高效地构造核。
# In[1]:
import tvm
import tvm.testing
from tvm import te
import numpy as np
# In[2]:
tgt_host = "llvm"
tgt = "cuda"
# In[3]:
n = te.var("n") #symbolic variable 符号变量
A = te.placeholder((n,), name="A") #placeholder Tensor
B = te.placeholder((n,), name="B") #placeholder Tensor
C = te.compute(A.shape, lambda i: A[i] + B[i], name="C") #compute operation 没有计算发生,只是定义了如何计算
print(type(C))
# In[4]:
s = te.create_schedule(C.op) #construct the schedule
# In[5]:
bx, tx = s[C].split(C.op.axis[0], factor=64)
# In[6]:
"""
我们将迭代轴bx和tx绑定到GPU计算网格中的线程。这些是GPU特定的构造,允许我们生成在GPU上运行的代码。
"""
if tgt=="cuda":
s[C].bind(bx, te.thread_axis("blockIdx.x"))
s[C].bind(tx, te.thread_axis("threadIdx.x"))
# In[7]:
fadd = tvm.build(s, [A, B, C], tgt, target_host=tgt_host, name="myadd") #compile it into a TVM function
# In[12]:
ctx = tvm.context(tgt, 0) #create a GPU context
n = 1024
a = tvm.nd.array(np.random.uniform(size=n).astype(A.dtype), ctx)
b = tvm.nd.array(np.random.uniform(size=n).astype(B.dtype), ctx)
c = tvm.nd.array(np.zeros(n, dtype=C.dtype), ctx) #copy the data to the GPU
fadd(a, b, c) #run the actual computation
tvm.testing.assert_allclose(c.asnumpy(), a.asnumpy() + b.asnumpy()) #verify correctness(asnumpy() copies the GPU array back to the CPU)
# In[13]:
if tgt == "cuda":
dev_module = fadd.imported_modules[0]
print("-----GPU code-----")
print(dev_module.get_source()) #inspect the generated code
# In[14]:
from tvm.contrib import cc
from tvm.contrib import utils
temp = utils.tempdir()
fadd.save(temp.relpath("myadd.o")) #it saves the compiled host module into an object file.
if tgt == "cuda":
fadd.imported_modules[0].save(temp.relpath("myadd.ptx")) #it saves the device module into a ptx file.
cc.create_shared(temp.relpath("myadd.so"),[temp.relpath("myadd.o")]) #cc.create_shared calls a compiler(gcc) to create a shared library
print(temp.listdir())
# In[15]:
fadd1 = tvm.runtime.load_module(temp.relpath("myadd.so")) #load the host module
if tgt == "cuda":
fadd1_dev = tvm.runtime.load_module(temp.relpath("myadd.ptx")) #load the device module
fadd1.import_module(fadd1_dev) #link the host and device module
fadd1(a, b, c)
tvm.testing.assert_allclose(c.asnumpy(), a.asnumpy() + b.asnumpy())
# In[16]:
fadd.export_library(temp.relpath("myadd_pack.so")) #pack everything into one library
fadd2 = tvm.runtime.load_module(temp.relpath("myadd_pack.so")) #load the module
fadd2(a, b, c)
tvm.testing.assert_allclose(c.asnumpy(), a.asnumpy() + b.asnumpy())
# In[ ]:
TVM张量表达式
最新推荐文章于 2023-02-08 14:08:40 发布