import time,ctypes
import pandas as pd
import pyexcelerate,xlsxwriter
def test_pyexcelerate(fn_prefix,data):
wb = pyexcelerate.Workbook()
wb.new_sheet("sheet name", data=data)
wb.save(fn_prefix+"_pyexcelerate.xlsx")
def test_libxl(fn_prefix,data):
dll = ctypes.windll.LoadLibrary('bin64_libxl_362.dll')
book = dll.xlCreateXMLBookCA()
sheet= dll.xlBookAddSheetA(book, "Sheet10", 0)
for nrow,rec in enumerate(data, start = 1):
for ncol,val in enumerate(rec, start = 0):
if isinstance(val,float):
dll.xlSheetWriteNumA(sheet, nrow, ncol, ctypes.c_double(val), 0)
else:
dll.xlSheetWriteStrA(sheet, nrow, ncol, ctypes.c_char_p(val), 0)
dll.xlBookSaveA(book, fn_prefix+"_libxl.xlsx")
dll.xlBookReleaseA(book)
def test_xlsxwriter(fn_prefix,data):
workbook = xlsxwriter.Workbook(fn_prefix+"_xlsxwriter.xlsx")
worksheet = workbook.add_worksheet()
for nrow,rec in enumerate(data):
for ncol,val in enumerate(rec):
worksheet.write(nrow, ncol, val)
workbook.close()
def bench1():
for nline in (1000,10000,len(csv_data)):
fn_prefix=fn[:-4]+'_'+str(nline)
data = csv_data[:nline]
print '*** %s lines xlsx benchmark ***' %nline
for func in (test_pyexcelerate, test_libxl, test_xlsxwriter):
t1=time.time()
apply(func,(fn_prefix,data))
tm=time.time()-t1
print '%s: %0.3f' % (func.__name__, tm)
print ' '
def bench2(func,fn,nline,csv_data):
fn_prefix=fn[:-4]+'_'+str(nline)
data = csv_data[:nline]
print '*** %s lines xlsx benchmark ***' %nline
t1=time.time()
apply(func,(fn_prefix,data))
tm=time.time()-t1
print '%s: %0.3f' % (func.__name__, tm)
print ' '
def test_xlsxwriter_to_excel(df,fn):
print '*** to_excel xlsx benchmark ***'
t1=time.time()
df.to_excel(fn+'.xlsx', sheet_name='Sheet1',index=False)
tm=time.time()-t1
print 'pandas to_excel(%s): %0.3f' % (pd.get_option('io.excel.xlsx.writer'),tm)
if ( __name__ == "__main__"):
fn ='EutranCellTdd.csv'
df = pd.read_csv(fn,encoding='gbk',na_values=["no value"])
df = df.fillna(0)
csv_data=df.values.tolist()
func=(test_pyexcelerate, test_libxl, test_xlsxwriter)
bench2(func[2],fn,len(csv_data),csv_data)
*** 1000 lines xlsx benchmark ***
test_pyexcelerate: 0.633
test_libxl: 0.531
test_xlsxwriter: 1.456
*** 10000 lines xlsx benchmark ***
test_pyexcelerate: 5.974
test_libxl: 5.475
test_xlsxwriter: 13.685
*** 97613 lines xlsx benchmark ***
test_pyexcelerate: 54.461 内存峰值 644M
test_libxl: 51.090 内存峰值 1070M
test_xlsxwriter: 117.300 内存峰值 930M
*** to_excel xlsx benchmark ***
pandas to_excel(xlsxwriter): 239.821 内存峰值922M
参考
https://www.programminghunter.com/article/7493384430/