平均收益率
import pandas as pd
import numpy as np
import math
df1_1 = pd.read_excel('D:/Desktop/建模国赛8月培训/2/code1/x1.xlsx',sheet_name=0)
df1_2 = pd.read_excel('D:/Desktop/建模国赛8月培训/2/code1/x1.xlsx',sheet_name=1)
df1_3 = pd.read_excel('D:/Desktop/建模国赛8月培训/2/code1/x1.xlsx',sheet_name=2)
# 支出表生存年限
live_time2 = df1_2[['企业代号','开票日期']]
live_time2_min = live_time2.groupby('企业代号').min().reset_index()
live_time2_max = live_time2.groupby('企业代号').max().reset_index()
live_time2 = pd.merge(live_time2_max,live_time2_min,on='企业代号')
live_time2['生存年限1'] = live_time2['开票日期_x'] - live_time2['开票日期_y']
# 收入表生存年限
live_time3 = df1_3[['企业代号','开票日期']]
live_time3_min = live_time3.groupby('企业代号').min().reset_index()
live_time3_max = live_time3.groupby('企业代号').max().reset_index()
live_time3 = pd.merge(live_time3_max,live_time3_min,on='企业代号')
live_time3['生存年限2'] = live_time3['开票日期_x'] - live_time3['开票日期_y']
# 找出生存年限最大的值作为生存年限
live_time = pd.merge(live_time2,live_time3,on='企业代号')[['企业代号','生存年限1','生存年限2']]
x1 = list(live_time['生存年限1'])
x2 = list(live_time['生存年限2'])
x = []
for i in range(len(x1)):
if x1[i] >= x2[i]:
x.append(x1[i])
else:
x.append(x2[i])
company_id = pd.DataFrame(live_time['企业代号'])
x = pd.DataFrame(x,columns=['生存年限'])
live_time = company_id.join(x)
# 生存年限转为月份
live_time['生存年限'] = live_time['生存年限'].astype(str)
live_time['生存年限'] = live_time['生存年限'].str.split().str.get(0)
live_time['生存年限'] = live_time['生存年限'].map(lambda x: str(int(x) // 30) + '月')
outcome_sum = df1_2[['企业代号','价税合计']]
outcome_sum = outcome_sum.groupby('企业代号')['价税合计'].sum().reset_index()
# 销项
income_sum = df1_3[['企业代号','价税合计']]
income_sum = income_sum.groupby('企业代号')['价税合计'].sum().reset_index()
## 每年销项
income_sum_year = df1_3[['企业代号开票年份','价税合计']]
income_sum_year = income_sum_year.groupby('企业代号开票年份')['价税合计'].sum().reset_index()
outcome_sum_year = df1_2[['企业代号开票年份','价税合计']]
outcome_sum_year = outcome_sum_year.groupby('企业代号开票年份')['价税合计'].sum().reset_index()
# 进项,销项汇总
outcome_income_sum = pd.merge(outcome_sum,income_sum,on='企业代号')
outcome_income_sum.rename(columns={'价税合计_x' : '进项价税合计',
'价税合计_y' : '销项价税合计'},inplace=True)
outcome_income_sum_year = pd.merge(outcome_sum_year,income_sum_year,on='企业代号开票年份')
outcome_income_sum_year.rename(columns={'价税合计_x' : '开票年份'+'进项价税合计',
'价税合计_y' : '开票年份'+'销项价税合计'},inplace=True)
data = pd.merge(df1_1,live_time,on='企业代号')
data = pd.merge(data,outcome_income_sum,on='企业代号')
#data = pd.merge(df1_2,outcome_income_sum_year,on='企业代号开票年份')
data['利润'] = data['销项价税合计'] - data['进项价税合计']
data['生存年限'] = data['生存年限'].map(lambda x: int(x.replace('月','')))
data['生存年限'] = data['生存年限'].map(lambda x: math.ceil(x / 12))
data['平均利润'] = data['利润'] / data['生存年限']
data['收益率'] = (data['销项价税合计'] - data['进项价税合计']) / data['进项价税合计']
data.to_excel('D:/Desktop/建模国赛8月培训/2/code1/x123平均收益率.xlsx',encoding='utf8',index=False)
#每年收益率
每年收益率
import pandas as pd
import numpy as np
import math
df1_1 = pd.read_excel('D:/Desktop/建模国赛8月培训/2/code1/x1.xlsx',sheet_name=0)
df1_2 = pd.read_excel('D:/Desktop/建模国赛8月培训/2/code1/x1.xlsx',sheet_name=1)
df1_3 = pd.read_excel('D:/Desktop/建模国赛8月培训/2/code1/x1.xlsx',sheet_name=2)
# 支出表生存年限
live_time2 = df1_2[['企业代号','开票日期']]
live_time2_min = live_time2.groupby('企业代号').min().reset_index()
live_time2_max = live_time2.groupby('企业代号').max().reset_index()
live_time2 = pd.merge(live_time2_max,live_time2_min,on='企业代号')
live_time2['生存年限1'] = live_time2['开票日期_x'] - live_time2['开票日期_y']
# 收入表生存年限
live_time3 = df1_3[['企业代号','开票日期']]
live_time3_min = live_time3.groupby('企业代号').min().reset_index()
live_time3_max = live_time3.groupby('企业代号').max().reset_index()
live_time3 = pd.merge(live_time3_max,live_time3_min,on='企业代号')
live_time3['生存年限2'] = live_time3['开票日期_x'] - live_time3['开票日期_y']
# 找出生存年限最大的值作为生存年限
live_time = pd.merge(live_time2,live_time3,on='企业代号')[['企业代号','生存年限1','生存年限2']]
x1 = list(live_time['生存年限1'])
x2 = list(live_time['生存年限2'])
x = []
for i in range(len(x1)):
if x1[i] >= x2[i]:
x.append(x1[i])
else:
x.append(x2[i])
company_id = pd.DataFrame(live_time['企业代号'])
x = pd.DataFrame(x,columns=['生存年限'])
live_time = company_id.join(x)
# 生存年限转为月份
live_time['生存年限'] = live_time['生存年限'].astype(str)
live_time['生存年限'] = live_time['生存年限'].str.split().str.get(0)
live_time['生存年限'] = live_time['生存年限'].map(lambda x: str(int(x) // 30) + '月')
outcome_sum = df1_2[['企业代号','价税合计']]
outcome_sum = outcome_sum.groupby('企业代号')['价税合计'].sum().reset_index()
# 销项
income_sum = df1_3[['企业代号','价税合计']]
income_sum = income_sum.groupby('企业代号')['价税合计'].sum().reset_index()
## 每年销项
income_sum_year = df1_3[['企业代号开票年份','价税合计']]
income_sum_year = income_sum_year.groupby('企业代号开票年份')['价税合计'].sum().reset_index()
outcome_sum_year = df1_2[['企业代号开票年份','价税合计']]
outcome_sum_year = outcome_sum_year.groupby('企业代号开票年份')['价税合计'].sum().reset_index()
# 进项,销项汇总
outcome_income_sum = pd.merge(outcome_sum,income_sum,on='企业代号')
outcome_income_sum.rename(columns={'价税合计_x' : '进项价税合计',
'价税合计_y' : '销项价税合计'},inplace=True)
outcome_income_sum_year = pd.merge(outcome_sum_year,income_sum_year,on='企业代号开票年份')
outcome_income_sum_year.rename(columns={'价税合计_x' : '每年进项价税合计',
'价税合计_y' : '每年销项价税合计'},inplace=True)
data = pd.merge(df1_1,live_time,on='企业代号')
#data = pd.merge(data,outcome_income_sum,on='企业代号')
data = pd.merge(df1_2,outcome_income_sum_year,on='企业代号开票年份')
#data['利润'] = data['销项价税合计'] - data['进项价税合计']
#data['生存年限'] = data['生存年限'].map(lambda x: int(x.replace('月','')))
#data['生存年限'] = data['生存年限'].map(lambda x: math.ceil(x / 12))
#data['平均利润'] = data['利润'] / data['生存年限']
#data['收益率'] = (data['销项价税合计'] - data['进项价税合计']) / data['进项价税合计']
#data['收益率'] = data['收益率'] / data['生存年限']
data['每年收益率'] = (data['每年销项价税合计'] - data['每年进项价税合计']) / data['每年进项价税合计']
data.to_excel('D:/Desktop/建模国赛8月培训/2/code1/x123每年收益率.xlsx',encoding='utf8',index=False)
方差
import pandas as pd
import numpy as np
import math
df1_1 = pd.read_excel('D:/Desktop/建模国赛8月培训/2/code1/x123每年收益率.xlsx',sheet_name=0)
#f方差
r = df1_1[['企业代号','方差']]
r = r.groupby('企业代号')['方差'].sum().reset_index()
data = pd.merge(df1_1,r,on='企业代号')