python json数据清洗工具_python3 清洗json数据

本文介绍了一个使用Python3处理JSON数据的工具,通过读取JSON文件,清洗并转换为CSV格式。工具包括遍历目录中所有txt文件、去除BOM特殊字符、解析指定JSON字段,并调用配置文件中的处理函数进行数据转换,最后将结果写入CSV文件。
摘要由CSDN通过智能技术生成

-----------------

getCommunityData.py

-----------------

import json

import os

import os.path

import csv

import parameterConfig

from getFocusedPerson import get_focused_person

#遍历目录(rootdir) 遍历到的每个文件都执行dirFunc

def waklThroughDir(rootdir, outputfile, jsonfields, dirFunc):

#parent 为上一级目录

#filenames为目录下的所有文件

#os.walk为遍历获取目录下所有文件的系统函数

for parent, dirnames, filenames in os.walk(rootdir):

for filename in filenames:

#获取后缀为txt的文件

if(filename.split('.')[-1] == 'txt'):

#(os.path.join(parent, filename)这一句拼装完整的文件路径

#dirFunc为对该文件的操作函数

#jsonfields为需要解析的字段

#outputfile为解析后生成的文件名

dirFunc(os.path.join(parent, filename), jsonfields, outputfile)

#创建CSV文件并写入第一行

def createCsv(file, first_line):

if not os.path.exists(file):

csvfile = open(file, 'a+')

writer = csv.writer(csvfile)

writer.writerow(first_line)

else:

csvfile = open(file, 'a+')

writer = csv.writer(csvfile)

return writer

def jsonArrayToCsv(json_file_name, json_fields, csv_file_name):

#读取JSON文件的内容

text = open(json_file_name).read()

#特殊处理,去除从WINDOWS系统带过来的BOM特殊字符

if text.startswith(u'\ufeff'):

text = text.encode('utf8')[3:].decode('utf8')

#将文本内容的JSON数据转换成自定义的JSON对象

json_data = json.loads(text)

for row in json_data['rows']:

jsonToCsv(csv_file_name, row, json_fields)

def jsonToCsv(csv_file_name, data, json_fields):

#CSV文件的第一行标题

first_line = []

#需要写入CSV文件的数据内容

raw_data = []

#遍历json_fields获取转换格式

for fields in json_fields:

#将标题插入first_line

name = fields[0]

field = fields[1]

proc = fields[2]

first_line.append(name)

#获取字段名

split_value = field.split('/')

length = len(split_value)

#字段可能不存在

if split_value[0] not in data:

field_data = ""

#有两级字段

elif(length == 2):

field_data = data[split_value[0]][split_value[1]]

#有一级字段

else:

field_data = data[split_value[0]]

if(field_data != "" and proc != ""):

field_data = getattr(parameterConfig, proc)(field_data)

raw_data.append(field_data)

#创建CVS文件,并写入第一行

writer = createCsv(csv_file_name, first_line)

#写入数据内容

writer.writerow(raw_data)

def main_proc(proc_name, rootdir, outputfile, jsonfields):

#打印函数名

print(proc_name)

#将rootdir目录下的所有文件按照jsonfields的格式使用jsonArrayToCsv函数转换为名为outputfile的CSV文件

waklThroughDir(rootdir, outputfile, jsonfields, jsonArrayToCsv)

#主函数入口

if __name__ == '__main__':

main_proc("******", parameterConfig.PERSON_ROOTDIR, parameterConfig.PERSON_OUTPUTFILE, parameterConfig.PERSON_JSONFIELDS)

main_proc("******", parameterConfig.SYFW_ROOTDIR, parameterConfig.SYFW_OUTPUTFILE, parameterConfig.SYFW_JSONFIELDS)

main_proc("*******", parameterConfig.CZF_ROOTDIR, parameterConfig.CZF_OUTPUTFILE, parameterConfig.CZF_JSONFIELDS)

main_proc("*******", parameterConfig.XFAJ_ROOTDIR, parameterConfig.XFAJ_OUTPUTFILE, parameterConfig.XFAJ_JSONFIELDS)

main_proc("********", parameterConfig.LDRK_ROOTDIR, parameterConfig.LDRK_OUTPUTFILE, parameterConfig.LDRK_JSONFIELDS)

get_focused_person("********", parameterConfig.ZDRY_ROOTDIR)

----------------------

parameterConfig.py

----------------------

CZF_ROOTDIR = "出租房"

CZF_OUTPUTFILE = "出租房322.txt"

CZF_JSONFIELDS = [("地址", "address", ""),

("建造时间", "builtYear", ""),

("创建时间", "createDate", ""),

("危险系数编号", "hiddenDangerLevel/id", ""),

("房屋面积", "houseArea", ""),

("房屋结构编号", "houseStructure/id", ""),

("房屋类型", "houseType", ""),

("居住人数", "memberNum", ""),

("所在网格", "organization/orgName", ""),

("更新时间", "updateDate", ""),

("出租人姓名", "rentalPerson", ""),

("出租人联系方式", "rentalMobileNumber", ""),

("出租方式编号", "rentalType/id", "")

]

XFAJ_ROOTDIR = "消防安监"

XFAJ_OUTPUTFILE = "消防安监821.txt"

XFAJ_JSONFIELDS = [("地址", "address", ""),

("名称", "companyName", ""),

("负责人姓名", "manger", ""),

("负责人联系方式", "managerTelephone", ""),

("所在网格", "orgPathName", "")

]

SYFW_ROOTDIR = "实有房屋"

SYFW_OUTPUTFILE = "实有房屋3969.txt"

SYFW_JSONFIELDS = [("地址", "address", ""),

("是否是出租房", "isRentalHouse", "getRentalHouse"),

("是否是空置房", "memberNum", "getEmptyHouse"),

("居住人数", "memberNum", ""),

("数据录入时间", "createDate", ""),

("数据更新时间", "updateDate", ""),

("所在网格", "organization/id", ""),

("房屋编号", "id", ""),

("经度", "updateDate", ""),

("纬度", "rentalPerson", "")

]

def getRentalHouse(rentalhouse):

if (rentalhouse == 'false'):

isRentalHouse = '否'

else:

isRentalHouse = '是'

return isRentalHouse

def getEmptyHouse(memberNum):

if (memberNum == '0'):

isEmptyHouse = '是'

else:

isEmptyHouse = '否'

return isEmptyHouse

PERSON_ROOTDIR = "户籍人口12415"

PERSON_OUTPUTFILE = "户籍人口.txt"

PERSON_JSONFIELDS = [("姓名", "name", ""),

("性别", "gender/id", "getGender"),

("身份证号", "idCardNo", ""),

("住所地址", "currentAddress", ""),

("户籍地址", "nativePlaceAddress", ""),

("手机号", "mobileNumber", ""),

("座机号", "telephone", ""),

("户籍派出所", "nativePoliceStation", ""),

("是否死亡", "death", "isDeath"),

("所在网格", "organization/orgName", ""),

("创建时间", "createDate", ""),

("更新时间", "updateDate", ""),

("生日", "birthday", ""),

("省", "province", ""),

("市", "city", ""),

("区", "district", ""),

("个人编号", "id", ""),

("房屋编号", "houseId", ""),

]

def getGender(id):

if (id == '1'):

gender = '男'

else:

gender = '女'

return gender

def isDeath(death):

if (death == 'false'):

death = '否'

else:

death = '是'

return death

LDRK_ROOTDIR = "流动人口"

LDRK_OUTPUTFILE = "流动人口830.txt"

LDRK_JSONFIELDS = [("姓名", "name", ""),

("性别", "gender/id", "getGender"),

("身份证号", "idCardNo", ""),

("住所地址", "currentAddress", ""),

("户籍地址", "nativePlaceAddress", ""),

("手机号", "mobileNumber", ""),

("座机号", "telephone", ""),

("是否死亡", "death", "isDeath"),

("所在网格", "organization/orgName", ""),

("创建时间", "createDate", ""),

("更新时间", "updateDate", ""),

("生日", "birthday", ""),

("省", "province", ""),

("市", "city", ""),

("区", "district", ""),

("个人编号", "id", "")

]

ZDRY_ROOTDIR = "重点人员"

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值