面板数据转截面数据、截面数据转面板数据、合并多个面板数据

         最近找数据整理用excel一直重复操作给恶心吐了,主要重复三种操作,多个面板的合并汇总,面板转横截面数据、截面数据转面板数据。所以自己写了代码解放双手,秒出结果,直接复制文件地址即可实现!

合并面板数据

def merge_panel(file_path,file_finish):
    file_name_list = os.listdir(file_path)
    file_list = []

    for i in file_name_list:
        a = file_path + '\\' + i
        file_list.append(a)
        # print(a)

    data1 = pd.DataFrame(columns=['临时'])

    for file in file_list:
        data = pd.read_excel(file, sheet_name=0)
        names_list = data.columns.tolist()
        # print(names_list)
        # data.reindex(columns=names_list)
        data['临时'] = data[names_list[0]].astype(str) + data[names_list[1]].str[:2].astype(str)
        names_list.insert(names_list.index(names_list[2]), '临时')
        # print(names_list)
        data = data.reindex(columns=names_list)
        data = data.set_index([names_list[0], names_list[1]])
        data1 = data1.merge(data, how='outer', on='临时')
        # print(data1)
        # print(data1)
        # print(data[names_list[1]].str[:2])
        # print(data)

    data1["time"] = data1["临时"].str.extract('(\d+)')
    # print(df)
    data1["place"] = data1["临时"].str.extract('(\D+)', expand=False).str.strip()
    names_list1 = data1.columns.tolist()
    # print(names_list1)
    names_list1.insert(names_list1.index(names_list1[1]), 'time')
    # print(names_list1)
    names_list1.insert(names_list1.index('time') + 1, "place")
    data1 = data1.reindex(columns=names_list1[:-2])
    data1 = data1.drop(['临时'], axis=1)
    # data1 = data1.set_index(0)
    # print(names_list1[:-2])
    # print(data1.columns.tolist())
    names_list2 = data1.columns.values
    data1 = data1.set_index(names_list2[1])
    data1.to_excel(file_finish)

截面数据转面板数据

def to_panel(file,file_finish):
    data = pd.read_excel(file, sheet_name=0)

    name = data.columns.values
    data = data.melt(id_vars=name[0], value_vars=name[1:]).set_index(name[0])
    # data.to_excel(file_finish)
    data["time"] = data["variable"].str.extract('(\d+)')
    # print(df)
    data["variable"] = data["variable"].str.extract('(\D+)', expand=False).str.strip()
    data = data.loc[:, ['time', 'variable', 'value']]
    data = data.pivot_table(index=[name[0], 'time'], columns=["variable"]).reset_index().set_index(name[0])

    # print(data.columns.values)

    name12 = data.columns.values[:1]
    name_list1 = []
    for i in name12:
        name_list1.append(i[0])

    names = data.columns.values[1:]
    name_list = []
    for i in names:
        name_list.append(i[1])

    name_sum = name_list1 + name_list
    # print(name_sum)
    data.set_axis(name_sum, axis='columns', inplace=True)
    # data.drop(data.index[0:3], inplace=True)
    data = data.reset_index()
    data = data.set_index('time')
    data.to_excel(file_finish)

面板数据转截面数据

def reshape_wide(file,file_finish):
    data = pd.read_excel(file, sheet_name=0)
    # 获取列名
    list_name = data.columns.values
    # 读取省份(城市)
    city_list = list(set(data[list_name[0]]))
    data_Section = pd.DataFrame()
    city_list.sort()
    data_Section[list_name[0]] = city_list
    # 读取年份
    year_list = list(set(data[list_name[1]]))
    year_list.sort()
    # 截面变量
    for para in list_name[2:]:
        for year in year_list:
            para_name = str(year) + para
            data_Section[para_name] = ""

    # 读取行值
    for index, row in data.iterrows():
        # 城市
        row_city = row[0]
        # 年份
        row_year = row[1]
        # 变量
        row_para = row[2:]
        paras_name = list_name[2:]
        # 将两列表合成元组
        hecheng = list(zip(row_para, str(row_year) + paras_name))
        # print(hecheng)

        city_weizhi_hang = data_Section[(data_Section[list_name[0]] == row_city)].index.tolist()

        for num, paraname in hecheng:
            data_Section.loc[city_weizhi_hang, paraname] = num
    data_Section = data_Section.set_index(list_name[0])
    data_Section.to_excel(file_finish)

 全部代码

import pandas as pd
import numpy as np
import os


# 消去空值
def read_excel(file,file_finish):
    data = pd.read_excel(file, sheet_name=0)
    print(data)
    data = data.dropna()
    data.to_excel(r"C:\Users\ZhangX\Desktop\stata.xlsx")


# 面板数据转截面数据
def reshape_wide(file,file_finish):
    data = pd.read_excel(file, sheet_name=0)
    # 获取列名
    list_name = data.columns.values
    # 读取省份(城市)
    city_list = list(set(data[list_name[0]]))
    data_Section = pd.DataFrame()
    city_list.sort()
    data_Section[list_name[0]] = city_list
    # 读取年份
    year_list = list(set(data[list_name[1]]))
    year_list.sort()
    # 截面变量
    for para in list_name[2:]:
        for year in year_list:
            para_name = str(year) + para
            data_Section[para_name] = ""

    # 读取行值
    for index, row in data.iterrows():
        # 城市
        row_city = row[0]
        # 年份
        row_year = row[1]
        # 变量
        row_para = row[2:]
        paras_name = list_name[2:]
        # 将两列表合成元组
        hecheng = list(zip(row_para, str(row_year) + paras_name))
        # print(hecheng)

        city_weizhi_hang = data_Section[(data_Section[list_name[0]] == row_city)].index.tolist()

        for num, paraname in hecheng:
            data_Section.loc[city_weizhi_hang, paraname] = num
    data_Section = data_Section.set_index(list_name[0])
    data_Section.to_excel(file_finish)


# 截面数据转面板数据
def to_panel(file,file_finish):
    data = pd.read_excel(file, sheet_name=0)

    name = data.columns.values
    data = data.melt(id_vars=name[0], value_vars=name[1:]).set_index(name[0])
    # data.to_excel(file_finish)
    data["time"] = data["variable"].str.extract('(\d+)')
    # print(df)
    data["variable"] = data["variable"].str.extract('(\D+)', expand=False).str.strip()
    data = data.loc[:, ['time', 'variable', 'value']]
    data = data.pivot_table(index=[name[0], 'time'], columns=["variable"]).reset_index().set_index(name[0])

    # print(data.columns.values)

    name12 = data.columns.values[:1]
    name_list1 = []
    for i in name12:
        name_list1.append(i[0])

    names = data.columns.values[1:]
    name_list = []
    for i in names:
        name_list.append(i[1])

    name_sum = name_list1 + name_list
    # print(name_sum)
    data.set_axis(name_sum, axis='columns', inplace=True)
    # data.drop(data.index[0:3], inplace=True)
    data = data.reset_index()
    data = data.set_index('time')
    data.to_excel(file_finish)


# 合并面板数据
def merge_panel(file_path,file_finish):
    file_name_list = os.listdir(file_path)
    file_list = []

    for i in file_name_list:
        a = file_path + '\\' + i
        file_list.append(a)
        # print(a)

    data1 = pd.DataFrame(columns=['临时'])

    for file in file_list:
        data = pd.read_excel(file, sheet_name=0)
        names_list = data.columns.tolist()
        # print(names_list)
        # data.reindex(columns=names_list)
        data['临时'] = data[names_list[0]].astype(str) + data[names_list[1]].str[:2].astype(str)
        names_list.insert(names_list.index(names_list[2]), '临时')
        # print(names_list)
        data = data.reindex(columns=names_list)
        data = data.set_index([names_list[0], names_list[1]])
        data1 = data1.merge(data, how='outer', on='临时')
        # print(data1)
        # print(data1)
        # print(data[names_list[1]].str[:2])
        # print(data)

    data1["time"] = data1["临时"].str.extract('(\d+)')
    # print(df)
    data1["place"] = data1["临时"].str.extract('(\D+)', expand=False).str.strip()
    names_list1 = data1.columns.tolist()
    # print(names_list1)
    names_list1.insert(names_list1.index(names_list1[1]), 'time')
    # print(names_list1)
    names_list1.insert(names_list1.index('time') + 1, "place")
    data1 = data1.reindex(columns=names_list1[:-2])
    data1 = data1.drop(['临时'], axis=1)
    # data1 = data1.set_index(0)
    # print(names_list1[:-2])
    # print(data1.columns.tolist())
    names_list2 = data1.columns.values
    data1 = data1.set_index(names_list2[1])
    data1.to_excel(file_finish)


# ****面板数据合并格式********** #
#  年份   省份   变量1  变量2 ...
#  2003  重庆    123  1234
#  ...   ...    ...
# **********************#

# ****面板数据转截面数格式****** #
#  省份   年份   变量1  变量2 ...
#  重庆   2003   123  1234
#  ...   ...   ...   ...
# ************************** #

# 读取需要转换的文件路径


# 合并面板数据数据指定文件夹路径即可
file_read = r"C:\Users\ZhangX\Desktop\999测试"
# 文件保存路径
file_finish = r"C:\Users\ZhangX\Desktop\999测试\汇总.xlsx"

if __name__ == '__main__':
    merge_panel(file_read,file_finish)
    # to_panel(file_read)
    # reshape_wide(file_read)

评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值