python递归解析JSON转换为excel输出

参考链接地址:https://blog.csdn.net/qq_17550379/article/details/80276477

参考了此博客的内容,在此基础上在对数据进行处理:

代码如下:

 

import json
import pandas as pd

def dict_generator(indict, pre=None):
    """
        把json递归的解析为key,value结构
    """
    pre = pre[:] if pre else []
    if isinstance(indict, dict):
        for key, value in indict.items():
            if isinstance(value, dict):
                if len(value) == 0:
                    yield pre+[key, '{}']
                else:
                    for d in dict_generator(value, pre + [key]):
                        yield d
            elif isinstance(value, list):
                if len(value) == 0:                   
                    yield pre+[key, '[]']
                else:
                    for v in value:
                        for d in dict_generator(v, pre + [key]):
                            yield d
            elif isinstance(value, tuple):
                if len(value) == 0:
                    yield pre+[key, '()']
                else:
                    for v in value:
                        for d in dict_generator(v, pre + [key]):
                            yield d
            else:
                yield pre + [key, value]
    else:
        yield indict

def  get_all_record_list(read_file_name):
    """
        每一个对象的json的dict  转换为list
    """
        all_record_list = []
        record_dict = {}
        columns_set = set() 
        num = 0
        KEY_INDEX_NAME =  'hits.hits._index'
        
        
        fh = open(read_file_name,'r')
        sJOSN = fh.read()
        sValue = json.loads(sJOSN)
        
        for line in dict_generator(sValue):
            key = '.'.join(line[0:-1])
            value = line[-1]
            columns_set.add(key)
            record_dict[key] = value 
            if key == KEY_INDEX_NAME and num > 0:   
                all_record_list.append(record_dict.copy())
                record_dict.clear()
                record_dict[key] = value
            num = num + 1  
        all_record_list.append(record_dict)
        return all_record_list,columns_set


def list_convert_df(all_record_list,columns_set):
    """
        每一个对象的json的dict  转换为list,并且把缺失的字段补上。然后转换为df
    """
    
        record_list = []
        combin_list = []
        
        for  record in  all_record_list:
            for column in columns_set:
                record_list.append(record.get(column,''))
            combin_list.append(record_list.copy())    
            record_list.clear()
        
        df = pd.DataFrame(combin_list,columns=columns_set)
        print ("write over")  
        return df

def change_id_to_first(df): 
    """
      把每一个小的json的id转换为df之后,调到最前头。
    """
        KEY_ID_NAME = 'hits.hits._id'
        df_id = df[KEY_ID_NAME]
        df = df.drop(KEY_ID_NAME,axis=1)
        df.insert(0,KEY_ID_NAME,df_id)
        return df


    
    
if __name__ == "__main__":
            
    read_file_name = 'file/esdata20181030.txt'
    write_file_name = 'file/wirte20181030.csv'
    
    
    all_record_list,columns_set  = get_all_record_list(read_file_name)
    df = list_convert_df(all_record_list,columns_set)
    df = change_id_to_first(df)
    
    df.to_excel('file/excel_to_python.xlsx', sheet_name='mysheet')

    #df.to_csv('file/excel_to_python.csv',index=False) 
     




 

 

 

  • 4
    点赞
  • 12
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值