安装python3.6.7 sudo pip3 install pandas sudo pip3 install pyarrow 安装2个库 vi csvToparquet.py chmod 755 csvToparquet.py import os import pandas as pd rootdir = '/home/yztmanager/wlwl/data/www/b93e09/static/csrzic/excel/zcsd' list = os.listdir(rootdir) for i in range(0,len(list)): path = os.path.join(rootdir,list[i]) if os.path.isfile(path): #df = pd.read_csv('pollution.csv') #df.to_parquet('output.parquet') pos = path.rfind("/") vin = path[pos+1:].split("_") vin = vin[0] df = pd.read_csv(path,engine='python',encoding="gb2312") df.to_parquet(rootdir+'/'+vin+'.parquet') print(path,vin) #print(rootdir+'/'+vin+'.parquet') 读取内容 import pyarrow.parquet as pq table = pq.read_table("LHWCJ95DXJ1180202.parquet") # Optionally convert to Pandas DataFrame df = table.to_pandas() bb=df.head(10) print(bb)
转载于:https://my.oschina.net/phoebus789/blog/3019710