cython强大之处还有就是与numpy的良好融合.下面是一个例子:
命名为convolve_cy.pyx
from __future__ import division
import numpy as np
def naive_convolve(f, g):
    # f is an image and is indexed by (v, w)
    # g is a filter kernel and is indexed by (s, t),
    #   it needs odd dimensions
    # h is the output image and is indexed by (x, y),
    #   it is not cropped
    if g.shape[0] % 2 != 1 or g.shape[1] % 2 != 1:
        raise ValueError("Only odd dimensions on filter supported")
    # smid and tmid are number of pixels between the center pixel
    # and the edge, ie for a 5x5 filter they will be 2.
    #
    # The output size is calculated by adding smid, tmid to each
    # side of the dimensions of the input image.
    vmax = f.shape[0]
    wmax = f.shape[1]
    smax = g.shape[0]
    tmax = g.shape[1]
    smid = smax // 2
    tmid = tmax // 2
    xmax = vmax + 2*smid
    ymax = wmax + 2*tmid
    # Allocate result image.
    h = np.zeros([xmax, ymax], dtype=f.dtype)
    # Do convolution
    for x in range(xmax):
        for y in range(ymax):
            # Calculate pixel value for h at (x,y). Sum one component
            # for each pixel (s, t) of the filter g.
            s_from = max(smid - x, -smid)
            s_to = min((xmax - x) - smid, smid + 1)
            t_from = max(tmid - y, -tmid)
            t_to = min((ymax - y) - tmid, tmid + 1)
            value = 0
            for s in range(s_from, s_to):
                for t in range(t_from, t_to):
                    v = x - smid + s
                    w = y - tmid + t
                    value += g[smid - s, tmid - t] * f[v, w]
            h[x, y] = value
    return h编译成为convolve_cy.so文件
In [1]: import numpy as np In [2]: import convolve_py In [3]: convolve_py.naive_convolve(np.array([[1, 1, 1]], dtype=np.int), ... np.array([[1],[2],[1]], dtype=np.int)) Out [3]: array([[1, 1, 1], [2, 2, 2], [1, 1, 1]]) In [4]: import convolve_cy In [4]: convolve_cy.naive_convolve(np.array([[1, 1, 1]], dtype=np.int), ... np.array([[1],[2],[1]], dtype=np.int)) Out [4]: array([[1, 1, 1], [2, 2, 2], [1, 1, 1]]) In [11]: N = 600 In [12]: f = np.arange(N*N, dtype=np.int).reshape((N,N)) In [13]: g = np.arange(81, dtype=np.int).reshape((9, 9)) In [19]: %timeit convolve_py.naive_convolve(f, g) 16 s ± 70.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) In [20]: %timeit convolve_cy.naive_convolve(f, g) 13.5 s ± 99.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
结果很差, 尝试用cython加数据类型:
import numpy as np # We now need to fix a datatype for our arrays. I've used the variable # DTYPE for this, which is assigned to the usual NumPy runtime # type info object. DTYPE = np.intc def naive_convolve(f, g): if g.shape[0] % 2 != 1 or g.shape[1] % 2 != 1: raise ValueError("Only odd dimensions on filter supported") assert f.dtype == DTYPE and g.dtype == DTYPE # The "cdef" keyword is also used within functions to type variables. It # can only be used at the top indentation level (there are non-trivial # problems with allowing them in other places, though we'd love to see # good and thought out proposals for it). # Py_ssize_t is the proper C type for Python array indices. cdef Py_ssize_t x, y, s, t, v, w, s_from, s_to, t_from, t_to cdef Py_ssize_t vmax = f.shape[0] cdef Py_ssize_t wmax = f.shape[1] cdef Py_ssize_t smax = g.shape[0] cdef Py_ssize_t tmax = g.shape[1] cdef Py_ssize_t smid = smax // 2 cdef Py_ssize_t tmid = tmax // 2 cdef Py_ssize_t xmax = vmax + 2*smid cdef Py_ssize_t ymax = wmax + 2*tmid h = np.zeros([xmax, ymax], dtype=DTYPE) # It is very important to type ALL your variables. You do not get any # warnings if not, only much slower code (they are implicitly typed as # Python objects). # For the value variable, we want to use the same data type as is # stored in the array, so we use int because it correspond to np.intc. # NB! An important side-effect of this is that if "value" overflows its # datatype size, it will simply wrap around like in C, rather than raise # an error like in Python. cdef int value for x in range(xmax): for y in range(ymax): # Cython has built-in C functions for min and max. # This makes the following lines very fast. s_from = max(smid - x, -smid) s_to = min((xmax - x) - smid, smid + 1) t_from = max(tmid - y, -tmid) t_to = min((ymax - y) - tmid, tmid + 1) value = 0 for s in range(s_from, s_to): for t in range(t_from, t_to): v = x - smid + s w = y - tmid + t value += g[smid - s, tmid - t] * f[v, w] h[x, y] = value return h
结果是:
In [22]: %timeit convolve_typed.naive_convolve(f, g) 55.8 s ± 199 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)用cython的注释功能,
cython convolve_cy.pyx -a
黄色的地方代表与python交互比较多的地方,也是效率比较差的,应该改成c风格的比较好一些, 而且用内存视图来存取数组(memoryview),像这样的:
cdef int [:] foo # 1D memoryview cdef int [:, :] foo # 2D memoryview cdef int [:, :, :] foo # 3D memoryview ... # You get the idea.修改 convolve_cy.pyx文件.
import numpy as np
DTYPE = np.intc
# It is possible to declare types in the function declaration.
def naive_convolve(int [:,:] f, int [:,:] g):
    if g.shape[0] % 2 != 1 or g.shape[1] % 2 != 1:
        raise ValueError("Only odd dimensions on filter supported")
    # We don't need to check for the type of NumPy array here because
    # a check is already performed when calling the function.
    cdef Py_ssize_t x, y, s, t, v, w, s_from, s_to, t_from, t_to
    cdef Py_ssize_t vmax = f.shape[0]
    cdef Py_ssize_t wmax = f.shape[1]
    cdef Py_ssize_t smax = g.shape[0]
    cdef Py_ssize_t tmax = g.shape[1]
    cdef Py_ssize_t smid = smax // 2
    cdef Py_ssize_t tmid = tmax // 2
    cdef Py_ssize_t xmax = vmax + 2*smid
    cdef Py_ssize_t ymax = wmax + 2*tmid
    h_np =  np.zeros([xmax, ymax], dtype=DTYPE)
    cdef int [:,:] h = h_np
    cdef int value
    for x in range(xmax):
        for y in range(ymax):
            s_from = max(smid - x, -smid)
            s_to = min((xmax - x) - smid, smid + 1)
            t_from = max(tmid - y, -tmid)
            t_to = min((ymax - y) - tmid, tmid + 1)
            value = 0
            for s in range(s_from, s_to):
                for t in range(t_from, t_to):
                    v = x - smid + s
                    w = y - tmid + t
                    value += g[smid - s, tmid - t] * f[v, w]
            h[x, y] = value
    return h_np看看结果:
In [22]: %timeit convolve_memview.naive_convolve(f, g) 57.1 ms ± 268 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
比之前的快了大约280倍.再加上关于检测边界条件的装饰器:
@cython.boundscheck(False) #省去了所有的数组越界检查, 当你知道下标访问不会越界的时候可以使用它
@cython.wraparound(False) #消除了相对数组尾部的负数下标的处理(类似Python列表)
... cimport cython @cython.boundscheck(False) # Deactivate bounds checking @cython.wraparound(False) # Deactivate negative indexing. def naive_convolve(int [:, :] f, int [:, :] g): ...
In [23]: %timeit convolve_index.naive_convolve(f, g) 19.8 ms ± 299 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)又快了将近3倍.这样通过cpython对python程序优化,总共快了800多倍.
                  
                  
                  
                  
本文介绍使用Cython对Python中的卷积运算进行优化的方法。通过类型声明、使用内存视图及禁用边界检查等技巧,实现了从原始Python代码到高效Cython版本的转变,性能提升了800多倍。
          
      
          
                
                
                
                
              
                
                
                
                
                
              
                
                
              
            
                  
					1143
					
被折叠的  条评论
		 为什么被折叠?
		 
		 
		
    
  
    
  
            


            