1 Json格式转CSV格式
(1)单层json数据格式如下:(123.json)
123.json格式数据没有进行嵌套,
{"actionResult": "success", "id": "7315021997", "type": "userlog", "time": "2020-11-09 16:55:37"}
{"actionResult": "success", "id": "7315021997", "type": "userlog", "time": "2020-11-09 16:55:37"}
{"actionResult": "success", "id": "7315021997", "type": "userlog", "time": "2020-11-09 16:55:37"}
{"actionResult": "success", "id": "7315021997", "type": "userlog", "time": "2020-11-09 16:55:37"}
python代码转换如下:
import csv
import json
import codecs
'''
1 将json文件格式转为csv文件格式并保存。
'''
class Json_Csv():
#初始化方法,创建要转成的1234.csv文件。
def __init__(self):
self.save_csv = open('1234.csv', 'w', encoding='utf-8', newline='')
self.write_csv = csv.writer(self.save_csv, delimiter=',') #以,为分隔符
def trans(self,filename):
with codecs.open(filename,'r',encoding='utf-8') as f:
read=f.readlines()
flag=True
for index,info in enumerate(read):
data=json.loads(info)
if index <10000000: #读取json文件前多多少行写入csv文件 。要是想写入全部,则去掉判断。
if flag: #截断第一行当做head
keys=list(data.keys()) #将得到的keys用列表的形式封装好,才能写入csv
self.write_csv.writerow(keys)
flag=False #释放
value=list(data.values()) #写入values,也要是列表形式
self.write_csv.writerow(value)
self.save_csv.close() #写完就关闭
if __name__=='__main__':
json_csv=Json_Csv()
path = '123.json'
json_csv.trans(path)
处理之后得到的数据结果如下:
actionResult,id,type,time
success,7315021997,userlog,2020-11-09 16:55:37
success,7315021997,userlog,2020-11-09 16:55:37
success,7315021997,userlog,2020-11-09 16:55:37
success,7315021997,userlog,2020-11-09 16:55:37
(2)可以指定字段进行输出,不用输出全部字段
同样的123.json数据,指定id,type,time三个字段输出,只需要指定写入的首行三个字段,然后将他的value拿到遍历出来,python代码如下
import json
import csv
import codecs
class nestJson_Csv():
# 初始化方法,创建csv文件。
def __init__(self):
self.save_csv = open('../Json/12345.csv', 'w', encoding='utf-8', newline='')
self.write_csv = csv.writer(self.save_csv, delimiter=',') # 以,为分隔符
def trans(self, filename):
with codecs.open(filename, 'r', encoding='utf-8') as f:
self.write_csv.writerow(['id','type','time'])
read = f.readlines()
flag = True
for index, info in enumerate(read):
data = json.loads(info)
if index < 900000: # 读取json文件的前90万行写入csv文件 。要是想写入全部,则去掉判断。
if flag: # 截断第一行当做head
id_value = data['id'] # 获取time
type_value = data['type'] # 获取rcip
time_value = data['time'] # 获取time
value = [id_value,type_value,time_value] # 写入values,也要是列表形式
self.write_csv.writerow(value)
self.save_csv.close() # 写完就关闭
if __name__=='__main__':
json_csv=nestJson_Csv()
path='../Json/123.json'
json_csv.trans(path)
输出结果如下:
id,type,time
7315021997,userlog,2020-11-09 16:55:37
7315021997,userlog,2020-11-09 16:55:37
7315021997,userlog,2020-11-09 16:55:37
7315021997,userlog,2020-11-09 16:55:37
(3 )嵌套json处理,道理和上述的指定字段输出一样,假如处理的json如下:
{"id": "7315021997", "type": "userlog", "time": "2020-11-09 16:55:37", "clientInfo": {"deviceID": "DA478083", "ip": "119.49.149.49", "vip": "10.0.74.238"}, "rcip": "10.1.7.17"}
{"id": "7315021998", "type": "userlog", "time": "2020-11-09 16:55:37", "clientInfo": {"deviceID": "DA478083", "ip": "119.49.149.49", "vip": "10.0.74.238"}, "rcip": "10.1.7.17"}
{"id": "7315021999", "type": "userlog", "time": "2020-11-09 16:55:37", "clientInfo": {"deviceID": "DA478083", "ip": "119.49.149.49", "vip": "10.0.74.238"}, "rcip": "10.1.7.17"}
{"id": "7315021990", "type": "userlog", "time": "2020-11-09 16:55:37", "clientInfo": {"deviceID": "DA478083", "ip": "119.49.149.49", "vip": "10.0.74.238"}, "rcip": "10.1.7.17"}
{"id": "7315021991", "type": "userlog", "time": "2020-11-09 16:55:37", "clientInfo": {"deviceID": "DA478083", "ip": "119.49.149.49", "vip": "10.0.74.238"}, "rcip": "10.1.7.17"}
{
"id": "7315021997",
"type": "userlog",
"time": "2020-11-09 16:55:37",
"clientInfo": {
"deviceID": "DA478083",
"ip": "119.49.149.49",
"vip": "10.0.74.238"
},
"rcip": "10.1.7.17"
}
输出指定的字段,如id,time,ip,vip,rcip 代码修改如下:
import json
import csv
import codecs
class nestJson_Csv():
# 初始化方法,创建csv文件。
def __init__(self):
self.save_csv = open('../Json/vpnlog.csv', 'w', encoding='utf-8', newline='')
self.write_csv = csv.writer(self.save_csv, delimiter=',') # 以,为分隔符
def trans(self, filename):
with codecs.open(filename, 'r', encoding='utf-8') as f:
self.write_csv.writerow(['id','time','ip','vip','rcip'])
read = f.readlines()
flag = True
for index, info in enumerate(read):
data = json.loads(info)
if index < 900000: # 读取json文件的前90万行写入csv文件 。要是想写入全部,则去掉判断。
if flag: # 截断第一行当做head
id_value = data['id'] # 获取id
time_value = data['time'] # 获取time
ip_value = data['clientInfo']['ip'] # 获取clientInfo中的ip
vip_value = data['clientInfo']['vip'] # 获取clientInfo中的vip
rcip_value = data['rcip'] # 获取rcip
value = [id_value,time_value,ip_value,vip_value,rcip_value ] # 写入values,也要是列表形式
self.write_csv.writerow(value)
self.save_csv.close() # 写完就关闭
if __name__=='__main__':
json_csv=nestJson_Csv()
path='../Json/vpnlog.json'
json_csv.trans(path)
输出的csv格式如下:
id,time,ip,vip,rcip
7315021997,2020-11-09 16:55:37,119.49.149.49,10.0.74.238,10.1.7.17
7315021998,2020-11-09 16:55:37,119.49.149.49,10.0.74.238,10.1.7.17
7315021999,2020-11-09 16:55:37,119.49.149.49,10.0.74.238,10.1.7.17
7315021990,2020-11-09 16:55:37,119.49.149.49,10.0.74.238,10.1.7.17
7315021991,2020-11-09 16:55:37,119.49.149.49,10.0.74.238,10.1.7.17