python用什么来写模块-用cython来写python的c模块

0x01

在用python写excel时,发现写10000行数据,每行50列时, 最快的excel库也要10秒,而在我的项目中用的xlsxwriter耗时10多秒,测试代码如下。这对于一个web服务来说,耗时实在是太长了。

测试

import sys

from time import clock

import openpyxl

import pyexcelerate

import xlsxwriter

import xlwt

from openpyxl.utils import get_column_letter

# Default to 1000 rows x 50 cols.

if len(sys.argv) > 1:

row_max = int(sys.argv[1])

col_max = 50

else:

row_max = 10000

col_max = 50

if len(sys.argv) > 2:

col_max = int(sys.argv[2])

def print_elapsed_time(module_name, elapsed):

""" Print module run times in a consistent format. """

print(" %-22s: %6.2f" % (module_name, elapsed))

def time_xlsxwriter():

""" Run XlsxWriter in default mode. """

start_time = clock()

workbook = xlsxwriter.Workbook('xlsxwriter.xlsx')

worksheet = workbook.add_worksheet()

for row in range(row_max // 2):

for col in range(col_max):

worksheet.write_string(row * 2, col, "Row: %d Col: %d" % (row, col))

for col in range(col_max):

worksheet.write_number(row * 2 + 1, col, row + col)

workbook.close()

elapsed = clock() - start_time

print_elapsed_time('xlsxwriter', elapsed)

def time_xlsxwriter_optimised():

""" Run XlsxWriter in optimised/constant memory mode. """

start_time = clock()

workbook = xlsxwriter.Workbook('xlsxwriter_opt.xlsx',

{'constant_memory': True})

worksheet = workbook.add_worksheet()

for row in range(row_max // 2):

for col in range(col_max):

worksheet.write_string(row * 2, col, "Row: %d Col: %d" % (row, col))

for col in range(col_max):

worksheet.write_number(row * 2 + 1, col, row + col)

workbook.close()

elapsed = clock() - start_time

print_elapsed_time('xlsxwriter (optimised)', elapsed)

def time_openpyxl():

""" Run OpenPyXL in default mode. """

start_time = clock()

workbook = openpyxl.workbook.Workbook()

worksheet = workbook.active

for row in range(row_max // 2):

for col in range(col_max):

colletter = get_column_letter(col + 1)

worksheet.cell('%s%s' % (colletter, row * 2 + 1)).value = "Row: %d Col: %d" % (row, col)

for col in range(col_max):

colletter = get_column_letter(col + 1)

worksheet.cell('%s%s' % (colletter, row * 2 + 2)).value = row + col

workbook.save('openpyxl.xlsx')

elapsed = clock() - start_time

print_elapsed_time('openpyxl', elapsed)

def time_openpyxl_optimised():

""" Run OpenPyXL in optimised mode. """

start_time = clock()

workbook = openpyxl.workbook.Workbook()

worksheet = workbook.create_sheet()

for row in range(row_max // 2):

string_data = ["Row: %d Col: %d" % (row, col) for col in range(col_max)]

worksheet.append(string_data)

num_data = [row + col for col in range(col_max)]

worksheet.append(num_data)

workbook.save('openpyxl_opt.xlsx')

elapsed = clock() - start_time

print_elapsed_time('openpyxl (optimised)', elapsed)

def time_pyexcelerate():

""" Run pyexcelerate in "faster" mode. """

start_time = clock()

workbook = pyexcelerate.Workbook()

worksheet = workbook.new_sheet('Sheet1')

for row in range(row_max // 2):

for col in range(col_max):

worksheet.set_cell_value(row * 2 + 1, col + 1, "Row: %d Col: %d" % (row, col))

for col in range(col_max):

worksheet.set_cell_value(row * 2 + 2, col + 1, row + col)

workbook.save('pyexcelerate.xlsx')

elapsed = clock() - start_time

print_elapsed_time('pyexcelerate', elapsed)

def time_xlwt():

""" Run xlwt in default mode. """

start_time = clock()

workbook = xlwt.Workbook()

worksheet = workbook.add_sheet('Sheet1')

for row in range(row_max // 2):

for col in range(col_max):

worksheet.write(row * 2, col, "Row: %d Col: %d" % (row, col))

for col in range(col_max):

worksheet.write(row * 2 + 1, col, row + col)

workbook.save('xlwt.xls')

elapsed = clock() - start_time

print_elapsed_time('xlwt', elapsed)

print("")

print("Versions:")

print(" %-12s: %s" % ('python', sys.version[:5]))

print(" %-12s: %s" % ('openpyxl', openpyxl.__version__))

print(" %-12s: %s" % ('pyexcelerate', pyexcelerate.__version__))

print(" %-12s: %s" % ('xlsxwriter', xlsxwriter.__version__))

print(" %-12s: %s" % ('xlwt', xlwt.__VERSION__))

print("")

print("Dimensions:")

print(" Rows = %d" % row_max)

print(" Cols = %d" % col_max)

print("")

print("Times:")

time_pyexcelerate()

time_xlwt()

time_xlsxwriter_optimised()

time_xlsxwriter()

time_openpyxl_optimised()

time_openpyxl()

print("")

0x02

为了提高python的效率,很自然的就想到了c, 查询相关资料后,发现写c代码的方式有几种。

第一种, 直接利用ctype 调用动态链接库

from ctypes import windll # 首先导入 ctypes 模块的 windll 子模块

somelibc = windll.LoadLibrary(some.dll) # 使用 windll 模块的 LoadLibrary 导入动态链接库

第二种 利用ctypes来写搭起c与python的桥梁

第三种直接用c来封装c代码,并生成动态链接库。

#include

#include

/* module functions */

static PyObject * /* returns object */

message(PyObject *self, PyObject *args) /* self unused in modules */

{ /* args from Python call */

char *fromPython, result[64];

if (! PyArg_Parse(args, "(s)", &fromPython)) /* convert Python -> C */

return NULL; /* null=raise exception */

else {

strcpy(result, "Hello, "); /* build up C string */

strcat(result, fromPython); /* add passed Python string */

return Py_BuildValue("s", result); /* convert C -> Python */

}

}

/* registration table */

static struct PyMethodDef hello_methods[] = {

{"message", message, 1}, /* method name, C func ptr, always-tuple */

{NULL, NULL} /* end of table marker */

};

/* module initializer */

void inithello( ) /* called on first import */

{ /* name matters if loaded dynamically */

(void) Py_InitModule3("hello", hello_methods); /* mod name, table ptr */

}

这种方式代码效率最高,缺点是与py版本不兼容

第四种,利用cython生成c代码,这种方式是最先进的,也是最推荐的。

0x03

用cython可以参考cython的官网。

首先写.pxd文件,类似于c语言的.h头文件,定义函数签名等

这里我直接调用了c的excel库libxlsxwriter,并且安装libxlsxwriter到系统路径中去了。c中的函数签名直接copy到.pxd文件中就可以了,需要注意的是,如果c定于的是一个struct,如lxw_error,那么在.pxd中直接写上pass就好了,cython在生成代码的时候会自动帮我们找到这个struct。

#cexcel.pyx

cdef extern from "xlsxwriter/format.h":

ctypedef struct lxw_error:

pass

cdef extern from "xlsxwriter/common.h":

ctypedef int lxw_col_t

ctypedef int lxw_row_t

ctypedef struct lxw_format:

pass

cdef extern from "xlsxwriter/worksheet.h":

ctypedef struct lxw_worksheet:

pass

lxw_error worksheet_write_string(lxw_worksheet *worksheet,

lxw_row_t row,

lxw_col_t col,

const char *string,

lxw_format *cformat);

lxw_error worksheet_write_number(lxw_worksheet *worksheet,

lxw_row_t row,

lxw_col_t col,

double number,

lxw_format *cformat);

lxw_error worksheet_set_column(lxw_worksheet *worksheet,

lxw_col_t first_col,

lxw_col_t last_col,

double width, lxw_format *format);

cdef extern from "xlsxwriter/workbook.h":

ctypedef struct lxw_workbook:

pass

ctypedef struct lxw_workbookoptions:

pass

lxw_workbook *new_workbook(const char *filename);

lxw_worksheet *workbook_add_worksheet(lxw_workbook *workbook,

const char *sheetname);

lxw_error workbook_close(lxw_workbook *workbook);

cdef extern from "xlsxwriter/custom.h":

lxw_format *get_my_style(lxw_workbook *workbook, int name)

定义好.pxd文件后,下面就开始写我们的代码逻辑了,定义在.pyx文件中代码如下,其中cexcel就是之前我们定义的cexcel.pxd文件,在WorkBook类中,如果需要使用一个c变量, 那么我们需要用cdef语句先声明这个变量的类型。

#excel.pyx

cimport cexcel

cdef class WorkBook:

cdef cexcel.lxw_workbook *_c_workbook

cdef cexcel.lxw_worksheet *_c_worksheet

cdef cexcel.lxw_format *_c_header

cdef cexcel.lxw_format *_c_str

cdef cexcel.lxw_format *_c_num

cdef cexcel.lxw_format *cformat

def __cinit__(self, const char *filename):

self._c_workbook = cexcel.new_workbook(filename)

self._c_header = cexcel.get_my_style(self._c_workbook, 0)

self._c_str = cexcel.get_my_style(self._c_workbook, 1)

self._c_num = cexcel.get_my_style(self._c_workbook, 2)

def add_worksheet(self, const char *sheetname):

self._c_worksheet = cexcel.workbook_add_worksheet(self._c_workbook, sheetname)

return self

def write_header(self, cexcel.lxw_row_t row, cexcel.lxw_col_t col, const char *string):

cexcel.worksheet_write_string(self._c_worksheet, row, col, string, self._c_header)

def write_string(self, cexcel.lxw_row_t row, cexcel.lxw_col_t col, const char *string):

cexcel.worksheet_write_string(self._c_worksheet, row, col, string, self._c_str)

def write_number(self, cexcel.lxw_row_t row, cexcel.lxw_col_t col, number):

if number in {'--', ''}:

cexcel.worksheet_write_string(self._c_worksheet, row, col, number, self._c_num)

else:

cexcel.worksheet_write_number(self._c_worksheet, row, col, number, self._c_num)

def write_percent(self, cexcel.lxw_row_t row, cexcel.lxw_col_t col, const char *string):

cexcel.worksheet_write_string(self._c_worksheet, row, col, string, self._c_num)

def close(self):

cexcel.workbook_close(self._c_workbook)

def set_column(self, cexcel.lxw_col_t first_col, cexcel.lxw_col_t last_col, double width):

cexcel.worksheet_set_column(self._c_worksheet, first_col, last_col, width, NULL)

0x04

写完模块的业务逻辑之后,我们只需要编写setup.py文件,利用disutls把我们的cython模块安装到系统路径或者虚拟环境中

代码如下

from Cython.Build import cythonize

from setuptools import setup, find_packages, Extension

ext_modules = cythonize([

Extension("py_c_xlsxwriter", ["excel.pyx"],

libraries=["xlsxwriter"], include_dirs=['/home/linl/Desktop/py_c_xlsxwriter/libxlsxwriter/lib'])])

setup(

name = "cpexcel",

version = '0.0.4',

keywords = 'c xlsxwriter cython',

license = 'MIT License',

url = 'https://github.com/drinksober',

install_requires = ['Cython'],

author = 'drinksober',

author_email = 'drinksober@foxmail.com',

packages = find_packages(),

platforms = 'any',

ext_modules = cythonize(ext_modules)

)

然后执行python setup.py install,一个完成的cython模块就完成了。

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值