json数据-高考录取数据分析

最新推荐文章于 2024-06-07 23:18:39 发布

徐尼莫

最新推荐文章于 2024-06-07 23:18:39 发布

阅读量563

点赞数

分类专栏： Python Json数据

本文链接：https://blog.csdn.net/qq_34140940/article/details/89388584

版权

Python 同时被 2 个专栏收录

2 篇文章 0 订阅

订阅专栏

Json数据

1 篇文章 0 订阅

订阅专栏

json数据-高考招考计划及录取数据处理

#导入模块
import pandas as pd
import os
import re

#设置工作路径
work_directory = r'D:\Nemo\高考分析\data'
os.chdir(work_directory)
file_name=r'理科历史数据.csv'
science_data=pd.read_csv(file_name,index_col = None, header = 0, engine = 'python', encoding = 'gbk')

file_name=r'文科历史数据.csv'
art_data=pd.read_csv(file_name,index_col = None, header = 0, engine = 'python', encoding = 'gbk')

#数据清洗
def replace_single_quote(object_data):
    temp1=object_data["college_AdmissionScore"]
    temp2=object_data["Admission_detail"]
    pattern01 = re.compile(r"'")
    pattern02 = re.compile(r", ]")
    pattern03 = re.compile("\["+", ")
    pattern04 = re.compile(", ,")
    pattern05 = re.compile("\n")
    for i in range(len(temp1)):
        temp1[i]=str(temp1[i])
        temp2[i]=str(temp2[i])
        if not temp1[i]=='nan':
            temp1[i]=re.sub(pattern01,"",temp1[i])
        if not temp2[i]=='nan':
            temp2[i]=re.sub(pattern01,"",temp2[i])
        temp1[i]=re.sub(pattern02,"]",temp1[i])
        temp2[i]=re.sub(pattern02,"]",temp2[i])
        temp1[i]=re.sub(pattern03,"[",temp1[i])
        temp2[i]=re.sub(pattern03,"[",temp2[i])
        temp1[i]=re.sub(pattern04,",",temp1[i])
        temp2[i]=re.sub(pattern04,",",temp2[i])
        temp1[i]=re.sub(pattern05,"",temp1[i])
        temp2[i]=re.sub(pattern05,"",temp2[i])
    return object_data

#部分字段转为dataframe
def josn2df(object_data):
    result = pd.DataFrame()
    for i in range(object_data.shape[0]):
        print("now is running row %s"%i+" !")
        if (object_data.iloc[i,3] == 'nan' or object_data.iloc[i,2]== 'nan' or object_data.iloc[i,1] == 'nan'):
            continue
        col3=pd.read_json(object_data.iloc[i,3], orient='records')
        col2=pd.read_json(object_data.iloc[i,2], orient='records')
        col1=pd.read_json("["+object_data.iloc[i,1]+"]", orient='records')
        if col3.size ==0:
            col23 = col2
        if col2.size ==0:
            col23 = col3
        if col2.size ==0 and col3.size ==0:
            continue
        if not(col2.size ==0 or col3.size ==0):
            col23=pd.merge(col3,col2,how="outer",left_on=["historyProYear","collegeHistoryID"],right_on=["matricDiffYear","collegeHistoryID"])
        col123=pd.merge(col1,col23,how="outer",right_on=["collegeHistoryID"],left_on=["collegeHistoryId"])
        result = pd.concat([result,col123])
    result.reset_index=None
    return result

#数据清洗
art_data=replace_single_quote(art_data)
science_data=replace_single_quote(science_data)

#内容为json的dataframe解析为dataframe
art_data_df = josn2df(art_data)
science_data_df = josn2df(science_data)

#写为excel
writer = pd.ExcelWriter('高考分析历史数据.xlsx')
art_data_df.to_excel(writer,'文科')
science_data_df.to_excel(writer,'理科')
writer.save()


#读取json文件数据，并转为dataframe
def jsonfile2dataframe(filename):
    data_str = open(filename,encoding='utf-8').read()
    data_str = "["+data_str+"]"
    result =pd.read_json(data_str, orient='records')
    return result

#读取json文件
science_CollegePlan_df = jsonfile2dataframe("CollegePlan理科json.json")
art_CollegePlan_df = jsonfile2dataframe("CollegePlan文科json.json")

#写为excel
writer = pd.ExcelWriter('高考分析CollegePlan.xlsx')
art_CollegePlan_df.to_excel(writer,'CollegePlan文科')
science_CollegePlan_df.to_excel(writer,'CollegePlan理科')
writer.save()

徐尼莫

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
json数据-高考录取数据分析

json数据-高考招考计划及录取数据处理#导入模块import pandas as pdimport osimport re#设置工作路径work_directory = r'D:\Nemo\高考分析\data'os.chdir(work_directory)file_name=r'理科历史数据.csv'science_data=pd.read_csv(file_name,in...
复制链接

扫一扫