帮一个中科院小学妹写的一个小脚本,主要是利用python中的pandas,xlrd,dbfpy包将excel中的xls/csv文件中转化成dbf文件
一、安装相关包
pip install xlrd
pip install dbfpy
二、实现
代码如下
# -*- coding: utf-8 -*-
__author__ = 'lihailin'
__mail__ = '415787837@qq.com'
__date__ = '2018-05-04'
__version__ = 1.0
import pandas as pd
import os,sys
from dbfpy import dbf
import csv
def xls2csv(xlsFile, csvFileName):
# excel文件转csv
data = pd.read_excel(xlsFile, 'pcp_lookup', index_col=0)
data.to_csv(csvFileName, encoding='utf-8')
def genDbfHeader(dbfDb, csvHeader):
'''生成dbf文件表头
Args:
dbfDb: Dbf类实例
csvHeader: list, 其值为字符串用于创建dbf表头的字段
'''
for filed in csvHeader:
# print filed,'--------'
tmp = (filed, 'C', 25) # 定义字段
dbfDb.addField(tmp)
return dbfDb
def writeDbfData(dbfDb, dbfData):
'''写数据到dbf文件
Args:
dbfDb: Dbf类实例
dbfData: dict, key为Dbf的header中的字段
'''
for onedata in dbfData:
# print onedata
rec = dbfDb.newRecord()
for key in onedata.keys(): # 对每条记录的所有字段写数据
# print(key)
rec[key] = onedata[key]
rec.store()
def csv2dbf(csvFile, dbfFile):
# csv文件转dbf文件
with open(csvFile, 'rb') as csvfile:
# dbf写表头
reader = csv.reader(csvfile, delimiter=',', quotechar='|')
header_row = next(reader)
db = dbf.Dbf(dbfFile, new=True)
db = genDbfHeader(db, header_row)
with open(csvFile, 'rb') as csvfile:
# 写dbf数据
reader = csv.DictReader(csvfile) # 字典形式
writeDbfData(db, reader)
db.close()
def main():
# 遍历一个文件夹下的所有.xlsx文件,并将其转换成.dbf文件
xlsDictory = '.' #文件夹
for xlsFile in os.listdir(xlsDictory):
tmp = xlsFile.split('.')[0]
# if '.xlsx' in xlsFile:
# continue
# if '.csv' not in xlsFile:
# continue
csvFileName = '%s.csv' % tmp
if '.xlsx' in xlsFile: # 如果是.xlsx先转成.csv
xls2csv(xlsFile, csvFileName)
dbfFileName = '%s.dbf' % tmp
csv2dbf(csvFileName, dbfFileName)
os.system('mkdir dbfFile')
os.system('mv *.dbf dbfFile')
if __name__ == '__main__':
main()
环境
- python2.7