Python文本处理:解析json格式的数据

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/OiteBody/article/details/81167375

json模块

版本:python2.7

Python提供json模块处理json格式的数据,主要功能:

json.dumps(): 将dict转为str
json.loads(): 将str转为dict

小案例

目标

解析一个包含json数据格式的文件,并对数据进行统计。

编码

import json

'''
json.dumps(): 将dict转为str
json.loads(): 将str转为dict
'''

def readFile(path):
    datas=[]
    try:
        print("path:%s" %(path))
        f = open(path, 'r')
        # 读取每一行用readlines()
        for line in f.readlines():
            datas.append(line)
    finally:
        print("read end.data num:%d" %(len(datas)))
    return datas

def sumElp(datas, key, metric1, metric2):
    sumDic = {}
    for data in datas:
        load_line = json.loads(data)
        if load_line.get(key):
            elpTypes = load_line.get(key)
            #print("elpTypes:%s" %(elpTypes))
            for elpType in elpTypes:
                sKey = elpType.get("elpType") 
                sCount = elpType.get("count")
                if sumDic.get(sKey):
                    count = sumDic.get(sKey)
                    gCount = sCount
                    sumDic[sKey] = count + gCount
                else:
                    sumDic[sKey] = sCount

        metricName = load_line.get("metricName")
        inMetricKey = metricName + "-in"
        outMetricKey = metricName + "-out"
        #print("inMetricKey:%s, outMetricKey:%s" %(inMetricKey, outMetricKey))
        if not sumDic.get(inMetricKey):
            sumDic[inMetricKey] = 0

        if not sumDic.get(outMetricKey):
            sumDic[outMetricKey] = 0

        if metricName == metric1:
            sumDic[inMetricKey] = sumDic.get(inMetricKey) + load_line.get("inTotalRecords")
            sumDic[outMetricKey] = sumDic.get(outMetricKey) + load_line.get("outTotalRecords")
        elif metricName == metric2:
            sumDic[inMetricKey] = sumDic.get(inMetricKey) + load_line.get("inTotalRecords")
            sumDic[outMetricKey] = sumDic.get(outMetricKey) + load_line.get("outTotalRecords")
        else:
            print("something wrong")

    print(sumDic)
    return sumDic


datas = readFile("data_json.txt")
sumElp(datas, "elpTypeStatistic", "trans.task.resid", "load.task.resid")

附:需要解析的data_json.txt数据

{"inTotalRecords":17512,"metricName":"trans.task.resid","outTotalRecords":35024,"recordTime":1532072536085,"resId":"AJ_192","startTime":1532072530006,"taskId":"t_AJ_192"}
{"elpTypeStatistic":[{"count":5786,"elpType":"entity_monitor"},{"count":5786,"elpType":"relation_vehicleinfo"}],"inTotalRecords":11572,"metricName":"load.task.resid","outTotalRecords":11572,"recordTime":1532072537621,"resId":"AJ_192","startTime":1532072532006,"taskId":"t_AJ_192"}
{"elpTypeStatistic":[{"count":10966,"elpType":"entity_monitor"},{"count":10966,"elpType":"relation_vehicleinfo"}],"inTotalRecords":21932,"metricName":"load.task.resid","outTotalRecords":21932,"recordTime":1532072546199,"resId":"AJ_192","startTime":1532072537628,"taskId":"t_AJ_192"}
{"elpTypeStatistic":[{"count":760,"elpType":"entity_monitor"},{"count":760,"elpType":"relation_vehicleinfo"}],"inTotalRecords":1520,"metricName":"load.task.resid","outTotalRecords":1520,"recordTime":1532072548838,"resId":"AJ_192","startTime":1532072546202,"taskId":"t_AJ_192"}
{"elpTypeStatistic":[{"count":11976,"elpType":"entity_monitor"},{"count":11976,"elpType":"relation_vehicleinfo"}],"inTotalRecords":23952,"metricName":"load.task.resid","outTotalRecords":23952,"recordTime":1532072555231,"resId":"AJ_192","startTime":1532072548936,"taskId":"t_AJ_192"}
{"inTotalRecords":118733,"metricName":"trans.task.resid","outTotalRecords":237466,"recordTime":1532072564461,"resId":"AJ_192","startTime":1532072536093,"taskId":"t_AJ_192"}
{"elpTypeStatistic":[{"count":14368,"elpType":"entity_monitor"},{"count":14368,"elpType":"relation_vehicleinfo"}],"inTotalRecords":28736,"metricName":"load.task.resid","outTotalRecords":28736,"recordTime":1532072566799,"resId":"AJ_192","startTime":1532072555233,"taskId":"t_AJ_192"}

 

展开阅读全文

没有更多推荐了,返回首页