动态偏离份额程序

import pandas as pd
import numpy as np
import openpyxl

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
def SSA(area,area_obj,industry,time,input_var,data):
    time_list = sorted(list(set(list(data[time]))))
    time_span = len(time_list)
    industry_list =sorted(list(set(list(data[industry]))))
    industry_num = len(industry_list)
    area_list = sorted(list(set(list(data[area])) - set(area_obj)))
    area_num = len(area_list)
    data_units = data[data[area]!=area_obj[0]]
    data_obj = data[data[area]==area_obj[0]]
    
    #1.首先,参考目标地区的第j个产业的变化率
 #   data_obj = data_units[data_units['area'] == area_obj]
    R = pd.DataFrame(np.zeros((time_span,industry_num)),columns=industry_list)
    R['年份'] = pd.DataFrame(time_list)
    R['地区'] = area_obj[0]
    for i in industry_list:
        for j in time_list[1:]:
            base_period = j-1
            current_period = j
 #           R.iloc[j,i] = (data_obj[(data_obj[industry] == i) & (data_obj[time] == current_period)] - data_obj[(data_obj[industry] == i) & (data_obj[time] == base_period)])/(data_obj[(data_obj[industry] == i) & (data_obj[time] == base_period)])
    #2.参考目标区域总产值的增长率
    data_R_zong = []
    R_zong = [0]
    for i in time_list:
        data_R_zong_1 = sum(data_obj.loc[data_obj[time]==i,input_var])
        data_R_zong.append(data_R_zong_1)
    for i in range(1,len(data_R_zong)):
        R_zong_1 = (data_R_zong[i]-data_R_zong[i-1])/data_R_zong[i-1]
        R_zong.append(R_zong_1)
    RT = pd.DataFrame(R_zong,columns= ['总增长率'])
    RT['地区'] = area_obj[0]
    RT['时间'] = pd.DataFrame(time_list)
    #3.第i个区域的第j个产业增长率
    industry_list_name = industry_list + ['time','area']
    r = pd.DataFrame([],columns=industry_list_name)
    for i in area_list:
        ri = pd.DataFrame(np.zeros((time_span,industry_num)),index = time_list,columns=industry_list)
        ri['time'] = 2010
        ri['area'] = 0
        for j_index,j in enumerate(industry_list):
            for k_index,k in enumerate(time_list[1:]):
                
                base_period = int(k-1)
                current_period = int(k)
                ri['area'] = i
                ri.iloc[k_index+1,-2] = k #时间
                ri.iloc[k_index+1,j_index] = (float(data_units.loc[(data_units[area] ==i) & (data_units[industry] == j) & (data_units[time] == current_period),input_var]) - float(data_units.loc[(data_units[area] ==i) & (data_units[industry] == j) & (data_units[time] == base_period),input_var])+0.000001)/(float((data_units.loc[(data_units[area] ==i) & (data_units[industry] == j) & (data_units[time] == base_period),input_var]))+0.000001)
        r = r.append(ri)
    rh = pd.DataFrame([],columns=['地区','时间','产业类别','增长率'])
    r_p = pd.DataFrame(np.zeros((len(r),4)),index = r.index,columns=['地区','时间','产业类别','增长率'])
    for i_index, i in enumerate(industry_list):
        r_p['地区'] = r['area']
        r_p['时间'] = r['time']
        r_p['增长率'] = r.iloc[:,i_index]
        r_p['产业类别'] = i
        rh = rh.append(r_p)
    #4.第i个区域的总增长率
    ri_zong_pd = pd.DataFrame([],columns=['时间','地区','增长率'])
    for i in area_list:
        data_ri_zong=[]
        ri_zong = pd.DataFrame(np.zeros((time_span,3)),index = time_list,columns=['时间','地区','增长率'])
        ri_zong['地区'] = i
        for j in time_list:
            data_ri_zong_1 = sum(data_units.loc[(data_units[area] == i) & (data_units[time] == j) ,input_var])
            data_ri_zong.append(data_ri_zong_1)
            ri_zong['时间']=pd.DataFrame(time_list,index = ri_zong.index)
        for k in range(1,time_span):
  #          print(ri_zong.loc[time_list[k],'增长率'])
  #          print(time_list)
  #          print(ri_zong)
            ri_zong_1 = (data_ri_zong[k] - data_ri_zong[k-1])/data_ri_zong[k-1]
   #         print(ri_zong_1)
            ri_zong.loc[time_list[k],'增长率'] = ri_zong_1
        ri_zong_pd = ri_zong_pd.append(ri_zong)

    result = pd.DataFrame(np.zeros((len(data_units),6)),index = data_units.index,columns=['地区','年份','作业方式','N','P','D'])
    result['地区'] = data_units[area]
    result['年份'] = data_units[time]
    result['作业方式'] = data_units[industry]

    for i in area_list:
        for j in time_list:
            for k in industry_list:
                result.loc[(result['地区'] == i) & (result['年份'] == j) & (result['作业方式'] == k),'N'] = float(data_units.loc[(data_units[area] == i) & (data_units[time] == j) & (data_units[industry] == k),input_var]) * float(RT.loc[RT['时间']==j,'总增长率'])
                result.loc[(result['地区'] == i) & (result['年份'] == j) & (result['作业方式'] == k),'P'] = float(data_units.loc[(data_units[area] == i) & (data_units[time] == j) & (data_units[industry] == k),input_var]) * (float(ri_zong_pd.loc[(ri_zong_pd['地区'] == i) & (ri_zong_pd['时间'] == j),'增长率']) - float(RT.loc[RT['时间']==j,'总增长率']))
                result.loc[(result['地区'] == i) & (result['年份'] == j) & (result['作业方式'] == k),'D'] = float(data_units.loc[(data_units[area] == i) & (data_units[time] == j) & (data_units[industry] == k),input_var]) * (float(rh.loc[(rh['地区'] == i)&(rh['时间'] == j)&(rh['产业类别'] == k),'增长率']) - float(ri_zong_pd.loc[(ri_zong_pd['地区'] == i) & (ri_zong_pd['时间'] == j),'增长率']))

    return(result)
SSA(area = 'dmu',area_obj = [0],industry = '作业编码',time = '年份',input_var = 'TE',data = data_b)
  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

小蜗笔记

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值