有个需求,需要把10000份json文件的数据导入到excel,但考虑到内存问题,一次读取1000份json文件并写入excel,就需要追加数据。
json文件格式:product1~10000.json
import json
import xlrd
import xlwt
import os
from xlutils.copy import copy
def readJsonFile(findex):
fileName = "json/product"+str(findex)+".json"
items = {}
if os.path.exists(fileName):
with open(fileName,'r',encoding='utf-8') as f:
try:
jsonData = json.load(f)
if 'tags' in jsonData:
tags = jsonData['tags']
palettes = jsonData['palettes']
tagsStr = ",".join(str(i) for i in tags)
colors = ",".join(str(i['color']) for i in palettes)
ratios = ",".join(str(i['ratio']) for i in palettes)
items['fileName'] = fileName
items['annotation'] = jsonData['annotation']
items['name'] = jsonData['name']
items['ext'] = jsonData['ext']
items['width'] = jsonData['width']
items['height'] = jsonData['height']
items['id'] = jsonData['id']
items['tags'] = tagsStr
items['colors'] = colors
items['ratios'] = ratios
return items
except ValueError:
return None
def writeExcel(listItem):
data = xlrd.open_workbook("tags_a.xlsx")
table = data.sheet_by_index(0)
rows = table.nrows
excel = copy(data)
tableCopy = excel.get_sheet(0)
index = 0
for litem in listItem:
if not litem is None:
tableCopy.write(index+rows, 0, litem['fileName'])
tableCopy.write(index+rows, 1, litem['annotation'])
tableCopy.write(index+rows, 2, litem['name'])
tableCopy.write(index+rows, 3, litem['ext'])
tableCopy.write(index+rows, 4, litem['width'])
tableCopy.write(index+rows, 5, litem['height'])
tableCopy.write(index+rows, 6, litem['id'])
tableCopy.write(index+rows, 7, litem['height'])
tableCopy.write(index+rows, 8, litem['tags'])
tableCopy.write(index+rows, 9, litem['colors'])
tableCopy.write(index+rows, 10, litem['ratios'])
index += 1
excel.save("tags_a.xlsx")
for i in range(0, 10):
listItem = []
for j in range(0, 1000):
x = i * 1000 + j + 1
item = readJsonFile(x)
listItem.append(item)
print(x)
writeExcel(listItem)