加速python excel写入

最新推荐文章于 2024-08-26 08:22:28 发布

nongcunqq

最新推荐文章于 2024-08-26 08:22:28 发布

阅读量1.3k

点赞数

分类专栏： python excel 文章标签： python

本文链接：https://blog.csdn.net/nongcunqq/article/details/124129684

版权

python 同时被 2 个专栏收录

339 篇文章 0 订阅

订阅专栏

excel

38 篇文章 0 订阅

订阅专栏

import time,ctypes
import pandas as pd

import pyexcelerate,xlsxwriter

def test_pyexcelerate(fn_prefix,data):
    wb = pyexcelerate.Workbook()
    wb.new_sheet("sheet name", data=data)
    wb.save(fn_prefix+"_pyexcelerate.xlsx")

def test_libxl(fn_prefix,data):
    dll = ctypes.windll.LoadLibrary('bin64_libxl_362.dll')
    book = dll.xlCreateXMLBookCA()
    sheet= dll.xlBookAddSheetA(book, "Sheet10", 0)
    for nrow,rec in enumerate(data, start = 1):     # reserverd the first line for 'Created by LibXL trial version... '
        for ncol,val in enumerate(rec, start = 0):
            #print 'row: %s  , col: %s  %s' % (nrow,ncol,type(val))
            if isinstance(val,float):
                dll.xlSheetWriteNumA(sheet, nrow, ncol, ctypes.c_double(val), 0)
            else:
                dll.xlSheetWriteStrA(sheet, nrow, ncol, ctypes.c_char_p(val), 0)
    dll.xlBookSaveA(book, fn_prefix+"_libxl.xlsx")
    dll.xlBookReleaseA(book)

def test_xlsxwriter(fn_prefix,data):
    workbook = xlsxwriter.Workbook(fn_prefix+"_xlsxwriter.xlsx")
    worksheet = workbook.add_worksheet()
    for nrow,rec in enumerate(data):
        for ncol,val in enumerate(rec):
            worksheet.write(nrow, ncol, val)
    workbook.close()

def bench1():
    for nline in (1000,10000,len(csv_data)):
        fn_prefix=fn[:-4]+'_'+str(nline)
        data = csv_data[:nline]
        print '*** %s lines xlsx benchmark ***' %nline
        for func in (test_pyexcelerate, test_libxl, test_xlsxwriter):
            t1=time.time()
            apply(func,(fn_prefix,data))
            tm=time.time()-t1
            print '%s:  %0.3f' % (func.__name__, tm)
        print ' '

def bench2(func,fn,nline,csv_data):
    fn_prefix=fn[:-4]+'_'+str(nline)
    data = csv_data[:nline]
    print '*** %s lines xlsx benchmark ***' %nline
    t1=time.time()
    apply(func,(fn_prefix,data))
    tm=time.time()-t1
    print '%s:  %0.3f' % (func.__name__, tm)
    print ' '

def test_xlsxwriter_to_excel(df,fn):
    print '*** to_excel xlsx benchmark ***'
    t1=time.time()
    df.to_excel(fn+'.xlsx', sheet_name='Sheet1',index=False)
    tm=time.time()-t1
    print 'pandas to_excel(%s):  %0.3f' %  (pd.get_option('io.excel.xlsx.writer'),tm)

if ( __name__ == "__main__"):
    fn ='EutranCellTdd.csv'
    df = pd.read_csv(fn,encoding='gbk',na_values=["no value"]) #,nrows=1000
    df = df.fillna(0)
    #test_xlsxwriter_to_excel(df,fn)
    csv_data=df.values.tolist()
    #bench1()
    #for monitoring process memory usage.
    func=(test_pyexcelerate, test_libxl, test_xlsxwriter)
    bench2(func[2],fn,len(csv_data),csv_data)

*** 1000 lines xlsx benchmark ***
test_pyexcelerate:   0.633
test_libxl:          0.531
test_xlsxwriter:     1.456
 
*** 10000 lines xlsx benchmark ***
test_pyexcelerate:   5.974
test_libxl:          5.475
test_xlsxwriter:    13.685

*** 97613 lines xlsx benchmark ***
test_pyexcelerate:  54.461   内存峰值  644M
test_libxl:         51.090   内存峰值 1070M
test_xlsxwriter:   117.300   内存峰值  930M

*** to_excel xlsx benchmark ***
pandas to_excel(xlsxwriter):  239.821  内存峰值922M