python与c混合编程cython(2)

cython强大之处还有就是与numpy的良好融合.下面是一个例子:

命名为convolve_cy.pyx

from __future__ import division
import numpy as np
def naive_convolve(f, g):
    # f is an image and is indexed by (v, w)
    # g is a filter kernel and is indexed by (s, t),
    #   it needs odd dimensions
    # h is the output image and is indexed by (x, y),
    #   it is not cropped
    if g.shape[0] % 2 != 1 or g.shape[1] % 2 != 1:
        raise ValueError("Only odd dimensions on filter supported")
    # smid and tmid are number of pixels between the center pixel
    # and the edge, ie for a 5x5 filter they will be 2.
    #
    # The output size is calculated by adding smid, tmid to each
    # side of the dimensions of the input image.
    vmax = f.shape[0]
    wmax = f.shape[1]
    smax = g.shape[0]
    tmax = g.shape[1]
    smid = smax // 2
    tmid = tmax // 2
    xmax = vmax + 2*smid
    ymax = wmax + 2*tmid
    # Allocate result image.
    h = np.zeros([xmax, ymax], dtype=f.dtype)
    # Do convolution
    for x in range(xmax):
        for y in range(ymax):
            # Calculate pixel value for h at (x,y). Sum one component
            # for each pixel (s, t) of the filter g.
            s_from = max(smid - x, -smid)
            s_to = min((xmax - x) - smid, smid + 1)
            t_from = max(tmid - y, -tmid)
            t_to = min((ymax - y) - tmid, tmid + 1)
            value = 0
            for s in range(s_from, s_to):
                for t in range(t_from, t_to):
                    v = x - smid + s
                    w = y - tmid + t
                    value += g[smid - s, tmid - t] * f[v, w]
            h[x, y] = value
    return h

编译成为convolve_cy.so文件

In [1]: import numpy as np
In [2]: import convolve_py
In [3]: convolve_py.naive_convolve(np.array([[1, 1, 1]], dtype=np.int),
...     np.array([[1],[2],[1]], dtype=np.int))
Out [3]:
array([[1, 1, 1],
    [2, 2, 2],
    [1, 1, 1]])
In [4]: import convolve_cy
In [4]: convolve_cy.naive_convolve(np.array([[1, 1, 1]], dtype=np.int),
...     np.array([[1],[2],[1]], dtype=np.int))
Out [4]:
array([[1, 1, 1],
    [2, 2, 2],
    [1, 1, 1]])
In [11]: N = 600
In [12]: f = np.arange(N*N, dtype=np.int).reshape((N,N))
In [13]: g = np.arange(81, dtype=np.int).reshape((9, 9))
In [19]: %timeit convolve_py.naive_convolve(f, g)
16 s ± 70.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
In [20]: %timeit convolve_cy.naive_convolve(f, g)
13.5 s ± 99.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)

结果很差, 尝试用cython加数据类型:

import numpy as np

# We now need to fix a datatype for our arrays. I've used the variable
# DTYPE for this, which is assigned to the usual NumPy runtime
# type info object.
DTYPE = np.intc

def naive_convolve(f, g):
    if g.shape[0] % 2 != 1 or g.shape[1] % 2 != 1:
        raise ValueError("Only odd dimensions on filter supported")
    assert f.dtype == DTYPE and g.dtype == DTYPE
    # The "cdef" keyword is also used within functions to type variables. It
    # can only be used at the top indentation level (there are non-trivial
    # problems with allowing them in other places, though we'd love to see
    # good and thought out proposals for it).

    # Py_ssize_t is the proper C type for Python array indices.
    cdef Py_ssize_t x, y, s, t, v, w, s_from, s_to, t_from, t_to

    cdef Py_ssize_t vmax = f.shape[0]
    cdef Py_ssize_t wmax = f.shape[1]
    cdef Py_ssize_t smax = g.shape[0]
    cdef Py_ssize_t tmax = g.shape[1]
    cdef Py_ssize_t smid = smax // 2
    cdef Py_ssize_t tmid = tmax // 2
    cdef Py_ssize_t xmax = vmax + 2*smid
    cdef Py_ssize_t ymax = wmax + 2*tmid
    h = np.zeros([xmax, ymax], dtype=DTYPE)
    # It is very important to type ALL your variables. You do not get any
    # warnings if not, only much slower code (they are implicitly typed as
    # Python objects).
    # For the value variable, we want to use the same data type as is
    # stored in the array, so we use int because it correspond to np.intc.
    # NB! An important side-effect of this is that if "value" overflows its
    # datatype size, it will simply wrap around like in C, rather than raise
    # an error like in Python.
    cdef int value
    for x in range(xmax):
        for y in range(ymax):
            # Cython has built-in C functions for min and max.
            # This makes the following lines very fast.
            s_from = max(smid - x, -smid)
            s_to = min((xmax - x) - smid, smid + 1)
            t_from = max(tmid - y, -tmid)
            t_to = min((ymax - y) - tmid, tmid + 1)
            value = 0
            for s in range(s_from, s_to):
                for t in range(t_from, t_to):
                    v = x - smid + s
                    w = y - tmid + t
                    value += g[smid - s, tmid - t] * f[v, w]
            h[x, y] = value
    return h

结果是:

In [22]: %timeit convolve_typed.naive_convolve(f, g)
55.8 s ± 199 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
用cython的注释功能,
cython convolve_cy.pyx -a

黄色的地方代表与python交互比较多的地方,也是效率比较差的,应该改成c风格的比较好一些, 而且用内存视图来存取数组(memoryview),像这样的:

cdef int [:] foo         # 1D memoryview
cdef int [:, :] foo      # 2D memoryview
cdef int [:, :, :] foo   # 3D memoryview
...                      # You get the idea.
修改 convolve_cy.pyx文件.
import numpy as np

DTYPE = np.intc

# It is possible to declare types in the function declaration.
def naive_convolve(int [:,:] f, int [:,:] g):
    if g.shape[0] % 2 != 1 or g.shape[1] % 2 != 1:
        raise ValueError("Only odd dimensions on filter supported")

    # We don't need to check for the type of NumPy array here because
    # a check is already performed when calling the function.
    cdef Py_ssize_t x, y, s, t, v, w, s_from, s_to, t_from, t_to
    cdef Py_ssize_t vmax = f.shape[0]
    cdef Py_ssize_t wmax = f.shape[1]
    cdef Py_ssize_t smax = g.shape[0]
    cdef Py_ssize_t tmax = g.shape[1]
    cdef Py_ssize_t smid = smax // 2
    cdef Py_ssize_t tmid = tmax // 2
    cdef Py_ssize_t xmax = vmax + 2*smid
    cdef Py_ssize_t ymax = wmax + 2*tmid

    h_np =  np.zeros([xmax, ymax], dtype=DTYPE)
    cdef int [:,:] h = h_np

    cdef int value
    for x in range(xmax):
        for y in range(ymax):
            s_from = max(smid - x, -smid)
            s_to = min((xmax - x) - smid, smid + 1)
            t_from = max(tmid - y, -tmid)
            t_to = min((ymax - y) - tmid, tmid + 1)
            value = 0
            for s in range(s_from, s_to):
                for t in range(t_from, t_to):
                    v = x - smid + s
                    w = y - tmid + t
                    value += g[smid - s, tmid - t] * f[v, w]
            h[x, y] = value
    return h_np

看看结果:

In [22]: %timeit convolve_memview.naive_convolve(f, g)
57.1 ms ± 268 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)

比之前的快了大约280倍.再加上关于检测边界条件的装饰器:

@cython.boundscheck(False) #省去了所有的数组越界检查, 当你知道下标访问不会越界的时候可以使用它
@cython.wraparound(False) #消除了相对数组尾部的负数下标的处理(类似Python列表)

...
cimport cython
@cython.boundscheck(False)  # Deactivate bounds checking
@cython.wraparound(False)   # Deactivate negative indexing.
def naive_convolve(int [:, :] f, int [:, :] g):
...

In [23]: %timeit convolve_index.naive_convolve(f, g)
19.8 ms ± 299 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
又快了将近3倍.这样通过cpython对python程序优化,总共快了800多倍.
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值