cython强大之处还有就是与numpy的良好融合.下面是一个例子:
命名为convolve_cy.pyx
from __future__ import division
import numpy as np
def naive_convolve(f, g):
# f is an image and is indexed by (v, w)
# g is a filter kernel and is indexed by (s, t),
# it needs odd dimensions
# h is the output image and is indexed by (x, y),
# it is not cropped
if g.shape[0] % 2 != 1 or g.shape[1] % 2 != 1:
raise ValueError("Only odd dimensions on filter supported")
# smid and tmid are number of pixels between the center pixel
# and the edge, ie for a 5x5 filter they will be 2.
#
# The output size is calculated by adding smid, tmid to each
# side of the dimensions of the input image.
vmax = f.shape[0]
wmax = f.shape[1]
smax = g.shape[0]
tmax = g.shape[1]
smid = smax // 2
tmid = tmax // 2
xmax = vmax + 2*smid
ymax = wmax + 2*tmid
# Allocate result image.
h = np.zeros([xmax, ymax], dtype=f.dtype)
# Do convolution
for x in range(xmax):
for y in range(ymax):
# Calculate pixel value for h at (x,y). Sum one component
# for each pixel (s, t) of the filter g.
s_from = max(smid - x, -smid)
s_to = min((xmax - x) - smid, smid + 1)
t_from = max(tmid - y, -tmid)
t_to = min((ymax - y) - tmid, tmid + 1)
value = 0
for s in range(s_from, s_to):
for t in range(t_from, t_to):
v = x - smid + s
w = y - tmid + t
value += g[smid - s, tmid - t] * f[v, w]
h[x, y] = value
return h
编译成为convolve_cy.so
文件
In [1]: import numpy as np In [2]: import convolve_py In [3]: convolve_py.naive_convolve(np.array([[1, 1, 1]], dtype=np.int), ... np.array([[1],[2],[1]], dtype=np.int)) Out [3]: array([[1, 1, 1], [2, 2, 2], [1, 1, 1]]) In [4]: import convolve_cy In [4]: convolve_cy.naive_convolve(np.array([[1, 1, 1]], dtype=np.int), ... np.array([[1],[2],[1]], dtype=np.int)) Out [4]: array([[1, 1, 1], [2, 2, 2], [1, 1, 1]]) In [11]: N = 600 In [12]: f = np.arange(N*N, dtype=np.int).reshape((N,N)) In [13]: g = np.arange(81, dtype=np.int).reshape((9, 9)) In [19]: %timeit convolve_py.naive_convolve(f, g) 16 s ± 70.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) In [20]: %timeit convolve_cy.naive_convolve(f, g) 13.5 s ± 99.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
结果很差, 尝试用cython加数据类型:
import numpy as np # We now need to fix a datatype for our arrays. I've used the variable # DTYPE for this, which is assigned to the usual NumPy runtime # type info object. DTYPE = np.intc def naive_convolve(f, g): if g.shape[0] % 2 != 1 or g.shape[1] % 2 != 1: raise ValueError("Only odd dimensions on filter supported") assert f.dtype == DTYPE and g.dtype == DTYPE # The "cdef" keyword is also used within functions to type variables. It # can only be used at the top indentation level (there are non-trivial # problems with allowing them in other places, though we'd love to see # good and thought out proposals for it). # Py_ssize_t is the proper C type for Python array indices. cdef Py_ssize_t x, y, s, t, v, w, s_from, s_to, t_from, t_to cdef Py_ssize_t vmax = f.shape[0] cdef Py_ssize_t wmax = f.shape[1] cdef Py_ssize_t smax = g.shape[0] cdef Py_ssize_t tmax = g.shape[1] cdef Py_ssize_t smid = smax // 2 cdef Py_ssize_t tmid = tmax // 2 cdef Py_ssize_t xmax = vmax + 2*smid cdef Py_ssize_t ymax = wmax + 2*tmid h = np.zeros([xmax, ymax], dtype=DTYPE) # It is very important to type ALL your variables. You do not get any # warnings if not, only much slower code (they are implicitly typed as # Python objects). # For the value variable, we want to use the same data type as is # stored in the array, so we use int because it correspond to np.intc. # NB! An important side-effect of this is that if "value" overflows its # datatype size, it will simply wrap around like in C, rather than raise # an error like in Python. cdef int value for x in range(xmax): for y in range(ymax): # Cython has built-in C functions for min and max. # This makes the following lines very fast. s_from = max(smid - x, -smid) s_to = min((xmax - x) - smid, smid + 1) t_from = max(tmid - y, -tmid) t_to = min((ymax - y) - tmid, tmid + 1) value = 0 for s in range(s_from, s_to): for t in range(t_from, t_to): v = x - smid + s w = y - tmid + t value += g[smid - s, tmid - t] * f[v, w] h[x, y] = value return h
结果是:
In [22]: %timeit convolve_typed.naive_convolve(f, g) 55.8 s ± 199 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)用cython的注释功能,
cython convolve_cy.pyx -a
黄色的地方代表与python交互比较多的地方,也是效率比较差的,应该改成c风格的比较好一些, 而且用内存视图来存取数组(memoryview),像这样的:
cdef int [:] foo # 1D memoryview cdef int [:, :] foo # 2D memoryview cdef int [:, :, :] foo # 3D memoryview ... # You get the idea.修改 convolve_cy.pyx文件.
import numpy as np
DTYPE = np.intc
# It is possible to declare types in the function declaration.
def naive_convolve(int [:,:] f, int [:,:] g):
if g.shape[0] % 2 != 1 or g.shape[1] % 2 != 1:
raise ValueError("Only odd dimensions on filter supported")
# We don't need to check for the type of NumPy array here because
# a check is already performed when calling the function.
cdef Py_ssize_t x, y, s, t, v, w, s_from, s_to, t_from, t_to
cdef Py_ssize_t vmax = f.shape[0]
cdef Py_ssize_t wmax = f.shape[1]
cdef Py_ssize_t smax = g.shape[0]
cdef Py_ssize_t tmax = g.shape[1]
cdef Py_ssize_t smid = smax // 2
cdef Py_ssize_t tmid = tmax // 2
cdef Py_ssize_t xmax = vmax + 2*smid
cdef Py_ssize_t ymax = wmax + 2*tmid
h_np = np.zeros([xmax, ymax], dtype=DTYPE)
cdef int [:,:] h = h_np
cdef int value
for x in range(xmax):
for y in range(ymax):
s_from = max(smid - x, -smid)
s_to = min((xmax - x) - smid, smid + 1)
t_from = max(tmid - y, -tmid)
t_to = min((ymax - y) - tmid, tmid + 1)
value = 0
for s in range(s_from, s_to):
for t in range(t_from, t_to):
v = x - smid + s
w = y - tmid + t
value += g[smid - s, tmid - t] * f[v, w]
h[x, y] = value
return h_np
看看结果:
In [22]: %timeit convolve_memview.naive_convolve(f, g) 57.1 ms ± 268 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
比之前的快了大约280倍.再加上关于检测边界条件的装饰器:
@cython.boundscheck(False) #省去了所有的数组越界检查, 当你知道下标访问不会越界的时候可以使用它
@cython.wraparound(False) #消除了相对数组尾部的负数下标的处理(类似Python列表)
... cimport cython @cython.boundscheck(False) # Deactivate bounds checking @cython.wraparound(False) # Deactivate negative indexing. def naive_convolve(int [:, :] f, int [:, :] g): ...
In [23]: %timeit convolve_index.naive_convolve(f, g) 19.8 ms ± 299 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)又快了将近3倍.这样通过cpython对python程序优化,总共快了800多倍.