2021泰迪杯A题-通讯产品销售和盈利能力分析_任务一解题代码

import pandas as pd 

统计
data = pd.read_excel('非洲通讯产品销售数据.xlsx',engine = 'openpyxl',sheet_name = 0)
data1 = data.copy()
data1.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1056 entries, 0 to 1055
Data columns (total 7 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   日期      1056 non-null   datetime64[ns]
 1   国家      1056 non-null   object        
 2   城市      1056 non-null   object        
 3   地区      1056 non-null   object        
 4   服务分类    1056 non-null   object        
 5   销售额     1056 non-null   float64       
 6   利润      1056 non-null   float64       
dtypes: datetime64[ns](1), float64(2), object(4)
memory usage: 57.9+ KB
data1['年'] = data1.iloc[:,0].dt.year
data1.head(2)
日期国家城市地区服务分类销售额利润
02017-01-01Cote d'IvoireAbidjanWesternCommercial656.966.572017
12017-01-01MadagascarAntananarivoEasternPublic875.94-70.082017
data2 = data1.groupby(['年','地区'])['销售额'].sum()
data3 = data1.groupby(['年','地区'])['利润'].mean()
data4 = data1.groupby(['年','国家'])['销售额'].sum()
data5 = data1.groupby(['年','国家'])['利润'].mean()
data6 = data1.groupby(['年','服务分类'])['销售额'].sum()
data7 = data1.groupby(['年','服务分类'])['利润'].mean()
data1['季度'] = data1.iloc[:,0].dt.quarter
data1.head(2)
日期国家城市地区服务分类销售额利润季度
02017-01-01Cote d'IvoireAbidjanWesternCommercial656.966.5720171
12017-01-01MadagascarAntananarivoEasternPublic875.94-70.0820171
data8 = data1.groupby(['季度','地区'])['销售额'].sum()
data9 = data1.groupby(['季度','地区'])['利润'].mean()
data10 = data1.groupby(['季度','国家'])['销售额'].sum()
data11 = data1.groupby(['季度','国家'])['利润'].mean()
data12 = data1.groupby(['季度','服务分类'])['销售额'].sum()
data13 = data1.groupby(['季度','服务分类'])['利润'].mean()
计算同比增长率

同比增长是指和上一时期、上一年度或历史相比的增长(幅度)。 计算公式:同比增长率=(本期数-同期数)÷同期数×100%。 某个指标的同比增长率=( 现年的某个指标的值-上年同期这个指标的值)/上年同期这个指标的值
中文名: 同比增长
公式: (本期数-同期数)÷同期数×100%

data4.head(2)
年     国家     
2017  Algeria    2867.75
      Angola     1451.46
Name: 销售额, dtype: float64
data5.head(2)
年     国家     
2017  Algeria    -4.930
      Angola     30.955
Name: 利润, dtype: float64
data14 = data4.reset_index()

data15 = pd.pivot_table(data14,values = ['销售额'],index = ['国家','年'])
data15 = data15.reset_index()

data15['销售额同比增长'] = data15.groupby('国家')['销售额'].diff()

data15['销售额同比增长率'] = data15['销售额同比增长']/(data15['销售额']-data15['销售额同比增长'])

data16 = data15.pivot(index='国家',columns='年',values='销售额')
data16 = data16.reset_index()

s = data15[data15['年']==2018]['销售额同比增长率']
s=s.reset_index()
data16['2017-2018年销售额同比增长率'] = s['销售额同比增长率']

s1 = data15[data15['年']==2019]['销售额同比增长率']
s1=s1.reset_index()
data16['2018-2019年销售额同比增长率'] = s1['销售额同比增长率']

s2 = data15[data15['年']==2020]['销售额同比增长率']
s2=s2.reset_index()
data16['2019-2020年销售额同比增长率'] = s1['销售额同比增长率']

data16 = data16.set_index('国家')
data16.head(2)
20172018201920202017-2018年销售额同比增长率2018-2019年销售额同比增长率2019-2020年销售额同比增长率
国家
Algeria2867.752068.731181.662223.79-0.278623-0.428799-0.428799
Angola1451.461362.601164.262102.77-0.061221-0.145560-0.145560
data17 = data5.reset_index()

data17 = pd.pivot_table(data17,values=['利润'],index=['国家','年'])
data17 = data17.reset_index()

data17['利润同比增长'] = data17.groupby('国家')['利润'].diff()

data17['利润同比增长率'] = data17['利润同比增长']/(data17['利润']-data17['利润同比增长'])
#print(data17)
data18 = data17.pivot(index='国家',columns='年',values='利润')
data18 = data18.reset_index()

s3 = data17[data17['年']==2018]['利润同比增长率']
s3 = s3.reset_index()
data18['2017-2018年利润同比增长率'] = s3['利润同比增长率']

s4 = data17[data17['年']==2019]['利润同比增长率']
s4 = s4.reset_index()
data18['2018-2019年利润同比增长率'] = s4['利润同比增长率']

s5 = data17[data17['年']==2020]['利润同比增长率']
s5 = s5.reset_index()
data18['2019-2020年利润同比增长率'] = s5['利润同比增长率']

data18.head(2)
国家20172018201920202017-2018年利润同比增长率2018-2019年利润同比增长率2019-2020年利润同比增长率
0Algeria-4.93015.137.4800-12.5650-4.068966-0.505618-2.679813
1Angola30.955-15.901.537520.7425-1.513649-1.09669812.491057

data19 = data6.reset_index()
data20 = pd.pivot_table(data19,index=['服务分类','年'],values='销售额')
data20 = data20.reset_index()

data20['各服务分类销售额同比增长'] = data20.groupby('服务分类')['销售额'].diff()
data20['各服务分类销售额同比增长率'] = data20['各服务分类销售额同比增长']/(data20['销售额']-data20['各服务分类销售额同比增长'])

data21 = data20.pivot(index='服务分类',columns='年',values='销售额')
data21 = data21.reset_index()

ss = data20[data20['年']==2018]['各服务分类销售额同比增长率']
ss = ss.reset_index()
data21['2017-2018年各服务分类销售额同比增长率'] = ss['各服务分类销售额同比增长率']

ss1 = data20[data20['年']==2019]['各服务分类销售额同比增长率']
ss1 = ss1.reset_index()
data21['2018-2019年各服务分类销售额同比增长率'] = ss1['各服务分类销售额同比增长率']

ss2 = data20[data20['年']==2020]['各服务分类销售额同比增长率']
ss2 = ss2.reset_index()
data21['2019-2020年各服务分类销售额同比增长率'] = ss2['各服务分类销售额同比增长率']

data21
服务分类20172018201920202017-2018年各服务分类销售额同比增长率2018-2019年各服务分类销售额同比增长率2019-2020年各服务分类销售额同比增长率
0Commercial42016.7643427.2051169.6742060.430.0335690.178286-0.178020
1Public44802.8140280.0438341.0740102.73-0.100948-0.0481370.045947
2Residential51826.0746863.3540885.0743736.38-0.095757-0.1275680.069740
#有关服务分类的逐年利润同比增长率方法同上,此处便不一一写出
sum
<function sum(iterable, /, start=0)>

统计
data1.head(2)
日期国家城市地区服务分类销售额利润季度
02017-01-01Cote d'IvoireAbidjanWesternCommercial656.966.5720171
12017-01-01MadagascarAntananarivoEasternPublic875.94-70.0820171
data22 = pd.pivot_table(data1,index=['地区','国家','服务分类'],values=['销售额','利润'],aggfunc=['mean',sum])
data22.head(2)
meansum
利润销售额利润销售额
地区国家服务分类
EasternBurundiCommercial19.726667409.45666759.181228.37
Public3.851429531.43000026.963720.01
s21 = data22['mean']['利润']
s21 = s21.reset_index()
s22 = data22['sum']['销售额']
s22 = s22.reset_index()
data22.drop(columns=['mean','sum'],inplace=True)
data22 = data22.reset_index()
data22['利润'] = s21['利润']
data22['销售额'] = s22['销售额']
data22.head(2)
地区国家服务分类利润销售额
0EasternBurundiCommercial19.7266671228.37
1EasternBurundiPublic3.8514293720.01
data23 = pd.pivot_table(data22,index=['地区','国家','服务分类'],values=['销售额','利润'])
data23.head(2)
C:\Users\31214\AppData\Local\Temp\ipykernel_13176\515147003.py:1: PerformanceWarning: dropping on a non-lexsorted multi-index without a level parameter may impact performance.
  data23 = pd.pivot_table(data22,index=['地区','国家','服务分类'],values=['销售额','利润'])
利润销售额
地区国家服务分类
EasternBurundiCommercial19.7266671228.37
Public3.8514293720.01

data30 = pd.read_excel('非洲通讯产品销售数据.xlsx',engine = 'openpyxl',sheet_name = 1)
data30 = data30.drop(columns=['Unnamed: 5','备注:本表格中“销售合同”为“已成交合同”。'])
data31 = data30.copy()
data31.head(2)
日期销售经理地区销售合同成交率
02017-01-01Aiden MorrisWestern130.55
12017-01-01Audrey BakerEastern110.30
data32 = data31.groupby('销售经理')['销售合同'].sum()
s32 = data31.groupby('销售经理')['成交率'].mean()
s32= s32.reset_index()
data32 = data32.reset_index()
data32['成交率'] = s32['成交率']
data32.head(2)
销售经理销售合同成交率
0Aiden Morris1900.479167
1Audrey Baker2030.378125

data41 = data1.copy()
data41.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1056 entries, 0 to 1055
Data columns (total 9 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   日期      1056 non-null   datetime64[ns]
 1   国家      1056 non-null   object        
 2   城市      1056 non-null   object        
 3   地区      1056 non-null   object        
 4   服务分类    1056 non-null   object        
 5   销售额     1056 non-null   float64       
 6   利润      1056 non-null   float64       
 7   年       1056 non-null   int64         
 8   季度      1056 non-null   int64         
dtypes: datetime64[ns](1), float64(2), int64(2), object(4)
memory usage: 74.4+ KB
data42 = pd.pivot_table(data41,index=['地区','国家','服务分类','年','季度'],values=['销售额'],aggfunc=sum).reset_index()
s = pd.pivot_table(data41,index=['地区','国家','服务分类','年','季度'],values=['利润'],aggfunc='mean').reset_index()
#s['利润']
data42['利润'] = s['利润']
data42.head(2)
地区国家服务分类季度销售额利润
0EasternBurundiCommercial20171928.0774.25
1EasternBurundiCommercial20191118.96-2.38
data42['key'] = data42['地区']+'_'+data42['国家']+'_'+data42['服务分类']
data43=data42.copy()
data43.head(2)
地区国家服务分类季度销售额利润key
0EasternBurundiCommercial20171928.0774.25Eastern_Burundi_Commercial
1EasternBurundiCommercial20191118.96-2.38Eastern_Burundi_Commercial
key = data42['key'].unique()
len(key)
156
from sklearn.linear_model import Ridge
#这是我实验的代码!不用管它
# wjx=[]
# alphas=[0.01,0.1,0.5,1,3,5,7,10,20,100]
# for a in alphas:
#     m=[]
#     coef_sum=0
#     coef_num=0
#     for i in key:
#         df1=pd.DataFrame(columns=data42.columns)
#         cs=i.split('_')
#         d1 = data42[data42['key']==i]
#         try:
#             estimator = Ridge(alpha=1)
#             estimator1 = Ridge(alpha=1)
#             estimator.fit(d1[['年','季度']],d1['销售额'])
#             estimator1.fit(d1[['年','季度']],d1['利润'])
#             result = estimator.predict([[2021,1]])[0]
#             result1 = estimator1.predict([[2021,1]])[0]
#             #print(d1)
#             #data43.loc[len(data43.index)]=[cs[0],cs[1],cs[2],2021,1,round(result,2),round(result1,2),i]
#             #df1=pd.DataFrame([[cs[0],cs[1],cs[2],2021,1,round(result,2),round(result1,2),i]],columns=data42.columns)
#             df1.loc[0]=[cs[0],cs[1],cs[2],2021,1,round(result,2),round(result1,2),i]
#             #print(df1)
#             data43=data43.append(df1)
#             coef_sum=coef_sum+1
#             coef_num=coef_num+estimator.coef_+estimator1.coef_
#             m.append({i:{result,result1}})

#         except:
#             pass
#     trust=((coef_num/(coef_sum*2))[0]+(coef_num/(coef_sum*2))[1])/2
#     #print(i+'--->'+trust)
#     #print(f'{a}----{trust}')
#     wjx.append({a:trust})

#wjx
m=[]
coef_sum=0
coef_num=0
for i in key:
    df1=pd.DataFrame(columns=data42.columns)
    cs=i.split('_')
    d1 = data42[data42['key']==i]
    try:
        estimator = Ridge(alpha=1)
        estimator1 = Ridge(alpha=1)
        estimator.fit(d1[['年','季度']],d1['销售额'])
        estimator1.fit(d1[['年','季度']],d1['利润'])
        result = estimator.predict([[2021,1]])[0]
        result1 = estimator1.predict([[2021,1]])[0]
        #print(d1)
        #data43.loc[len(data43.index)]=[cs[0],cs[1],cs[2],2021,1,round(result,2),round(result1,2),i]
        #df1=pd.DataFrame([[cs[0],cs[1],cs[2],2021,1,round(result,2),round(result1,2),i]],columns=data42.columns)
        df1.loc[0]=[cs[0],cs[1],cs[2],2021,1,round(result,2),round(result1,2),i]
        #print(df1)
        data43=data43.append(df1)
        coef_sum=coef_sum+1
        coef_num=coef_num+estimator.coef_+estimator1.coef_
        m.append({i:{result,result1}})
        
    except:
        pass

D:\Develop\Anaconda3\lib\site-packages\sklearn\base.py:450: UserWarning: X does not have valid feature names, but Ridge was fitted with feature names
  warnings.warn(
trust=((coef_num/(coef_sum*2))[0]+(coef_num/(coef_sum*2))[1])/2
data43=data43.reset_index()
data43.head(2)
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
level_0index地区国家服务分类季度销售额利润key
000EasternBurundiCommercial20171928.0774.25Eastern_Burundi_Commercial
111EasternBurundiCommercial20191118.96-2.38Eastern_Burundi_Commercial
data44=pd.pivot_table(data43,index=['地区','国家','服务分类','年','季度'],values=['利润','销售额'],aggfunc='mean')#.reset_index()
data45=data44.reset_index()
data44.head(2)
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
利润销售额
地区国家服务分类季度
EasternBurundiCommercial2017174.25928.07
20191-2.38118.96
#data44.loc[(slice(None),'Burundi'),:]
data45.head(2)
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
地区国家服务分类季度利润销售额
0EasternBurundiCommercial2017174.25928.07
1EasternBurundiCommercial20191-2.38118.96
data45[data45['年']==2021].head(2)
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}
地区国家服务分类季度利润销售额
3EasternBurundiCommercial20211-40.17-243.93
10EasternBurundiPublic20211-2.101368.01
data45.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1012 entries, 0 to 1011
Data columns (total 7 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   地区      1012 non-null   object 
 1   国家      1012 non-null   object 
 2   服务分类    1012 non-null   object 
 3   年       1012 non-null   int64  
 4   季度      1012 non-null   int64  
 5   利润      1012 non-null   float64
 6   销售额     1012 non-null   float64
dtypes: float64(2), int64(2), object(3)
memory usage: 55.5+ KB


  • 3
    点赞
  • 11
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值