import pandas as pd
import numpy as np
import openpyxl
pd. set_option( 'display.max_columns' , None )
pd. set_option( 'display.max_rows' , None )
def SSA ( area, area_obj, industry, time, input_var, data) :
time_list = sorted ( list ( set ( list ( data[ time] ) ) ) )
time_span = len ( time_list)
industry_list = sorted ( list ( set ( list ( data[ industry] ) ) ) )
industry_num = len ( industry_list)
area_list = sorted ( list ( set ( list ( data[ area] ) ) - set ( area_obj) ) )
area_num = len ( area_list)
data_units = data[ data[ area] != area_obj[ 0 ] ]
data_obj = data[ data[ area] == area_obj[ 0 ] ]
R = pd. DataFrame( np. zeros( ( time_span, industry_num) ) , columns= industry_list)
R[ '年份' ] = pd. DataFrame( time_list)
R[ '地区' ] = area_obj[ 0 ]
for i in industry_list:
for j in time_list[ 1 : ] :
base_period = j- 1
current_period = j
data_R_zong = [ ]
R_zong = [ 0 ]
for i in time_list:
data_R_zong_1 = sum ( data_obj. loc[ data_obj[ time] == i, input_var] )
data_R_zong. append( data_R_zong_1)
for i in range ( 1 , len ( data_R_zong) ) :
R_zong_1 = ( data_R_zong[ i] - data_R_zong[ i- 1 ] ) / data_R_zong[ i- 1 ]
R_zong. append( R_zong_1)
RT = pd. DataFrame( R_zong, columns= [ '总增长率' ] )
RT[ '地区' ] = area_obj[ 0 ]
RT[ '时间' ] = pd. DataFrame( time_list)
industry_list_name = industry_list + [ 'time' , 'area' ]
r = pd. DataFrame( [ ] , columns= industry_list_name)
for i in area_list:
ri = pd. DataFrame( np. zeros( ( time_span, industry_num) ) , index = time_list, columns= industry_list)
ri[ 'time' ] = 2010
ri[ 'area' ] = 0
for j_index, j in enumerate ( industry_list) :
for k_index, k in enumerate ( time_list[ 1 : ] ) :
base_period = int ( k- 1 )
current_period = int ( k)
ri[ 'area' ] = i
ri. iloc[ k_index+ 1 , - 2 ] = k
ri. iloc[ k_index+ 1 , j_index] = ( float ( data_units. loc[ ( data_units[ area] == i) & ( data_units[ industry] == j) & ( data_units[ time] == current_period) , input_var] ) - float ( data_units. loc[ ( data_units[ area] == i) & ( data_units[ industry] == j) & ( data_units[ time] == base_period) , input_var] ) + 0.000001 ) / ( float ( ( data_units. loc[ ( data_units[ area] == i) & ( data_units[ industry] == j) & ( data_units[ time] == base_period) , input_var] ) ) + 0.000001 )
r = r. append( ri)
rh = pd. DataFrame( [ ] , columns= [ '地区' , '时间' , '产业类别' , '增长率' ] )
r_p = pd. DataFrame( np. zeros( ( len ( r) , 4 ) ) , index = r. index, columns= [ '地区' , '时间' , '产业类别' , '增长率' ] )
for i_index, i in enumerate ( industry_list) :
r_p[ '地区' ] = r[ 'area' ]
r_p[ '时间' ] = r[ 'time' ]
r_p[ '增长率' ] = r. iloc[ : , i_index]
r_p[ '产业类别' ] = i
rh = rh. append( r_p)
ri_zong_pd = pd. DataFrame( [ ] , columns= [ '时间' , '地区' , '增长率' ] )
for i in area_list:
data_ri_zong= [ ]
ri_zong = pd. DataFrame( np. zeros( ( time_span, 3 ) ) , index = time_list, columns= [ '时间' , '地区' , '增长率' ] )
ri_zong[ '地区' ] = i
for j in time_list:
data_ri_zong_1 = sum ( data_units. loc[ ( data_units[ area] == i) & ( data_units[ time] == j) , input_var] )
data_ri_zong. append( data_ri_zong_1)
ri_zong[ '时间' ] = pd. DataFrame( time_list, index = ri_zong. index)
for k in range ( 1 , time_span) :
ri_zong_1 = ( data_ri_zong[ k] - data_ri_zong[ k- 1 ] ) / data_ri_zong[ k- 1 ]
ri_zong. loc[ time_list[ k] , '增长率' ] = ri_zong_1
ri_zong_pd = ri_zong_pd. append( ri_zong)
result = pd. DataFrame( np. zeros( ( len ( data_units) , 6 ) ) , index = data_units. index, columns= [ '地区' , '年份' , '作业方式' , 'N' , 'P' , 'D' ] )
result[ '地区' ] = data_units[ area]
result[ '年份' ] = data_units[ time]
result[ '作业方式' ] = data_units[ industry]
for i in area_list:
for j in time_list:
for k in industry_list:
result. loc[ ( result[ '地区' ] == i) & ( result[ '年份' ] == j) & ( result[ '作业方式' ] == k) , 'N' ] = float ( data_units. loc[ ( data_units[ area] == i) & ( data_units[ time] == j) & ( data_units[ industry] == k) , input_var] ) * float ( RT. loc[ RT[ '时间' ] == j, '总增长率' ] )
result. loc[ ( result[ '地区' ] == i) & ( result[ '年份' ] == j) & ( result[ '作业方式' ] == k) , 'P' ] = float ( data_units. loc[ ( data_units[ area] == i) & ( data_units[ time] == j) & ( data_units[ industry] == k) , input_var] ) * ( float ( ri_zong_pd. loc[ ( ri_zong_pd[ '地区' ] == i) & ( ri_zong_pd[ '时间' ] == j) , '增长率' ] ) - float ( RT. loc[ RT[ '时间' ] == j, '总增长率' ] ) )
result. loc[ ( result[ '地区' ] == i) & ( result[ '年份' ] == j) & ( result[ '作业方式' ] == k) , 'D' ] = float ( data_units. loc[ ( data_units[ area] == i) & ( data_units[ time] == j) & ( data_units[ industry] == k) , input_var] ) * ( float ( rh. loc[ ( rh[ '地区' ] == i) & ( rh[ '时间' ] == j) & ( rh[ '产业类别' ] == k) , '增长率' ] ) - float ( ri_zong_pd. loc[ ( ri_zong_pd[ '地区' ] == i) & ( ri_zong_pd[ '时间' ] == j) , '增长率' ] ) )
return ( result)
SSA( area = 'dmu' , area_obj = [ 0 ] , industry = '作业编码' , time = '年份' , input_var = 'TE' , data = data_b)