2020C题数据处理代码

平均收益率

import pandas as pd
import numpy as np
import math
df1_1 = pd.read_excel('D:/Desktop/建模国赛8月培训/2/code1/x1.xlsx',sheet_name=0)
df1_2 = pd.read_excel('D:/Desktop/建模国赛8月培训/2/code1/x1.xlsx',sheet_name=1)
df1_3 = pd.read_excel('D:/Desktop/建模国赛8月培训/2/code1/x1.xlsx',sheet_name=2)



# 支出表生存年限
live_time2 = df1_2[['企业代号','开票日期']]
live_time2_min = live_time2.groupby('企业代号').min().reset_index()
live_time2_max = live_time2.groupby('企业代号').max().reset_index()
live_time2 = pd.merge(live_time2_max,live_time2_min,on='企业代号')
live_time2['生存年限1'] = live_time2['开票日期_x'] - live_time2['开票日期_y']

# 收入表生存年限
live_time3 = df1_3[['企业代号','开票日期']]
live_time3_min = live_time3.groupby('企业代号').min().reset_index()
live_time3_max = live_time3.groupby('企业代号').max().reset_index()
live_time3 = pd.merge(live_time3_max,live_time3_min,on='企业代号')
live_time3['生存年限2'] = live_time3['开票日期_x'] - live_time3['开票日期_y']

# 找出生存年限最大的值作为生存年限
live_time = pd.merge(live_time2,live_time3,on='企业代号')[['企业代号','生存年限1','生存年限2']]
x1 = list(live_time['生存年限1'])
x2 = list(live_time['生存年限2'])

x = []
for i in range(len(x1)):
    if x1[i] >= x2[i]:
        x.append(x1[i])
    else:
        x.append(x2[i])
company_id = pd.DataFrame(live_time['企业代号'])
x = pd.DataFrame(x,columns=['生存年限'])
live_time = company_id.join(x)

# 生存年限转为月份
live_time['生存年限'] = live_time['生存年限'].astype(str)
live_time['生存年限'] = live_time['生存年限'].str.split().str.get(0)
live_time['生存年限'] = live_time['生存年限'].map(lambda x: str(int(x) // 30) + '月')

outcome_sum = df1_2[['企业代号','价税合计']]
outcome_sum = outcome_sum.groupby('企业代号')['价税合计'].sum().reset_index()
# 销项
income_sum = df1_3[['企业代号','价税合计']]
income_sum = income_sum.groupby('企业代号')['价税合计'].sum().reset_index()
## 每年销项
income_sum_year = df1_3[['企业代号开票年份','价税合计']]
income_sum_year = income_sum_year.groupby('企业代号开票年份')['价税合计'].sum().reset_index()
outcome_sum_year = df1_2[['企业代号开票年份','价税合计']]
outcome_sum_year = outcome_sum_year.groupby('企业代号开票年份')['价税合计'].sum().reset_index()
# 进项,销项汇总
outcome_income_sum = pd.merge(outcome_sum,income_sum,on='企业代号')
outcome_income_sum.rename(columns={'价税合计_x' : '进项价税合计',
                                  '价税合计_y' : '销项价税合计'},inplace=True)
outcome_income_sum_year = pd.merge(outcome_sum_year,income_sum_year,on='企业代号开票年份')
outcome_income_sum_year.rename(columns={'价税合计_x' : '开票年份'+'进项价税合计',
                                  '价税合计_y' : '开票年份'+'销项价税合计'},inplace=True)


data = pd.merge(df1_1,live_time,on='企业代号')

data = pd.merge(data,outcome_income_sum,on='企业代号')
#data = pd.merge(df1_2,outcome_income_sum_year,on='企业代号开票年份')
data['利润'] = data['销项价税合计'] - data['进项价税合计']
data['生存年限'] = data['生存年限'].map(lambda x: int(x.replace('月','')))
data['生存年限'] = data['生存年限'].map(lambda x: math.ceil(x / 12))

data['平均利润'] = data['利润'] / data['生存年限']
data['收益率'] = (data['销项价税合计'] - data['进项价税合计']) / data['进项价税合计']


data.to_excel('D:/Desktop/建模国赛8月培训/2/code1/x123平均收益率.xlsx',encoding='utf8',index=False)

#每年收益率


每年收益率

import pandas as pd
import numpy as np
import math
df1_1 = pd.read_excel('D:/Desktop/建模国赛8月培训/2/code1/x1.xlsx',sheet_name=0)
df1_2 = pd.read_excel('D:/Desktop/建模国赛8月培训/2/code1/x1.xlsx',sheet_name=1)
df1_3 = pd.read_excel('D:/Desktop/建模国赛8月培训/2/code1/x1.xlsx',sheet_name=2)



# 支出表生存年限
live_time2 = df1_2[['企业代号','开票日期']]
live_time2_min = live_time2.groupby('企业代号').min().reset_index()
live_time2_max = live_time2.groupby('企业代号').max().reset_index()
live_time2 = pd.merge(live_time2_max,live_time2_min,on='企业代号')
live_time2['生存年限1'] = live_time2['开票日期_x'] - live_time2['开票日期_y']

# 收入表生存年限
live_time3 = df1_3[['企业代号','开票日期']]
live_time3_min = live_time3.groupby('企业代号').min().reset_index()
live_time3_max = live_time3.groupby('企业代号').max().reset_index()
live_time3 = pd.merge(live_time3_max,live_time3_min,on='企业代号')
live_time3['生存年限2'] = live_time3['开票日期_x'] - live_time3['开票日期_y']

# 找出生存年限最大的值作为生存年限
live_time = pd.merge(live_time2,live_time3,on='企业代号')[['企业代号','生存年限1','生存年限2']]
x1 = list(live_time['生存年限1'])
x2 = list(live_time['生存年限2'])

x = []
for i in range(len(x1)):
    if x1[i] >= x2[i]:
        x.append(x1[i])
    else:
        x.append(x2[i])
company_id = pd.DataFrame(live_time['企业代号'])
x = pd.DataFrame(x,columns=['生存年限'])
live_time = company_id.join(x)

# 生存年限转为月份
live_time['生存年限'] = live_time['生存年限'].astype(str)
live_time['生存年限'] = live_time['生存年限'].str.split().str.get(0)
live_time['生存年限'] = live_time['生存年限'].map(lambda x: str(int(x) // 30) + '月')

outcome_sum = df1_2[['企业代号','价税合计']]
outcome_sum = outcome_sum.groupby('企业代号')['价税合计'].sum().reset_index()
# 销项
income_sum = df1_3[['企业代号','价税合计']]
income_sum = income_sum.groupby('企业代号')['价税合计'].sum().reset_index()
## 每年销项
income_sum_year = df1_3[['企业代号开票年份','价税合计']]
income_sum_year = income_sum_year.groupby('企业代号开票年份')['价税合计'].sum().reset_index()
outcome_sum_year = df1_2[['企业代号开票年份','价税合计']]
outcome_sum_year = outcome_sum_year.groupby('企业代号开票年份')['价税合计'].sum().reset_index()
# 进项,销项汇总
outcome_income_sum = pd.merge(outcome_sum,income_sum,on='企业代号')
outcome_income_sum.rename(columns={'价税合计_x' : '进项价税合计',
                                  '价税合计_y' : '销项价税合计'},inplace=True)
outcome_income_sum_year = pd.merge(outcome_sum_year,income_sum_year,on='企业代号开票年份')
outcome_income_sum_year.rename(columns={'价税合计_x' : '每年进项价税合计',
                                  '价税合计_y' : '每年销项价税合计'},inplace=True)


data = pd.merge(df1_1,live_time,on='企业代号')

#data = pd.merge(data,outcome_income_sum,on='企业代号')
data = pd.merge(df1_2,outcome_income_sum_year,on='企业代号开票年份')
#data['利润'] = data['销项价税合计'] - data['进项价税合计']
#data['生存年限'] = data['生存年限'].map(lambda x: int(x.replace('月','')))
#data['生存年限'] = data['生存年限'].map(lambda x: math.ceil(x / 12))

#data['平均利润'] = data['利润'] / data['生存年限']
#data['收益率'] = (data['销项价税合计'] - data['进项价税合计']) / data['进项价税合计']
#data['收益率'] = data['收益率'] / data['生存年限']

data['每年收益率'] = (data['每年销项价税合计'] - data['每年进项价税合计']) / data['每年进项价税合计']
data.to_excel('D:/Desktop/建模国赛8月培训/2/code1/x123每年收益率.xlsx',encoding='utf8',index=False)




方差

import pandas as pd
import numpy as np
import math
df1_1 = pd.read_excel('D:/Desktop/建模国赛8月培训/2/code1/x123每年收益率.xlsx',sheet_name=0)





#f方差
r = df1_1[['企业代号','方差']]
r = r.groupby('企业代号')['方差'].sum().reset_index()


data = pd.merge(df1_1,r,on='企业代号')
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值