商品销售预测

商业销售预测绘图``

import numpy as np
import matplotlib.pylab as plt
import pandas as pd
import os


导入数据
```python
file=os.walk(r'D:\机器学习\kaggle预测\商店销售')
filename_=[]
filename_data=[]
for root,sub,filename in file:
    for i in filename:
        path=os.path.join(root,i)
        filename_.append(i)
        filename_data.append(pd.read_csv(path))
    
[i for i in filename_]
['holidays_events.csv',
 'oil.csv',
 'sample_submission.csv',
 'stores.csv',
 'test.csv',
 'train.csv',
 'transactions.csv']
#合并数据
df_train1=filename_data[5].merge(filename_data[0],on='date',how='left')
df_train1=df_train1.merge(filename_data[1],on='date',how='left')
df_train1=df_train1.merge(filename_data[3],on='store_nbr',how='left')
df_train1=df_train1.merge(filename_data[6],on=['date','store_nbr'],how='left')
df_train1=df_train1.rename(columns={'type_x':'holiday_type','type_y':'store_type'})
df_train1.head()
iddatestore_nbrfamilysalesonpromotionholiday_typelocalelocale_namedescriptiontransferreddcoilwticocitystatestore_typeclustertransactions
002013-01-011AUTOMOTIVE0.0000HolidayNationalEcuadorPrimer dia del anoFalseNaNQuitoPichinchaD13NaN
112013-01-011BABY CARE0.0000HolidayNationalEcuadorPrimer dia del anoFalseNaNQuitoPichinchaD13NaN
222013-01-011BEAUTY0.0000HolidayNationalEcuadorPrimer dia del anoFalseNaNQuitoPichinchaD13NaN
332013-01-011BEVERAGES0.0000HolidayNationalEcuadorPrimer dia del anoFalseNaNQuitoPichinchaD13NaN
442013-01-011BOOKS0.0000HolidayNationalEcuadorPrimer dia del anoFalseNaNQuitoPichinchaD13NaN
#处理日期:
df_train1['date']=pd.to_datetime(df_train1['date'])
df_train1['year']=df_train1['date'].dt.year
df_train1['month']=df_train1['date'].dt.month
df_train1['week']=df_train1['date'].dt.isocalendar().week
df_train1['quarter']=df_train1['date'].dt.quarter
df_train1['day_of_week']=df_train1['date'].dt.day_name()
df_train1.head()
iddatestore_nbrfamilysalesonpromotionholiday_typelocalelocale_namedescription...citystatestore_typeclustertransactionsyearmonthweekquarterday_of_week
002013-01-011AUTOMOTIVE0.00HolidayNationalEcuadorPrimer dia del ano...QuitoPichinchaD13NaN2013111Tuesday
112013-01-011BABY CARE0.00HolidayNationalEcuadorPrimer dia del ano...QuitoPichinchaD13NaN2013111Tuesday
222013-01-011BEAUTY0.00HolidayNationalEcuadorPrimer dia del ano...QuitoPichinchaD13NaN2013111Tuesday
332013-01-011BEVERAGES0.00HolidayNationalEcuadorPrimer dia del ano...QuitoPichinchaD13NaN2013111Tuesday
442013-01-011BOOKS0.00HolidayNationalEcuadorPrimer dia del ano...QuitoPichinchaD13NaN2013111Tuesday

5 rows × 22 columns

store_nbr、family、cluster绘总

#将每个store_type的销量求平均值
df_st_sa=df_train1.groupby('store_type').agg({'sales':'mean'}).reset_index().sort_values(by='sales',ascending=False)
df_st_sa
store_typesales
0A708.378165
3D352.084510
1B328.275233
4E270.285490
2C197.790647
#对每一个family求均值并排序
df_fa_sa=df_train1.groupby('family').agg({'sales':'mean'}).reset_index().sort_values(by='sales',ascending=False)[:10]
df_fa_sa
familysales
12GROCERY I3790.432797
3BEVERAGES2394.912701
30PRODUCE1355.373698
7CLEANING1074.171518
8DAIRY711.175991
5BREAD/BAKERY464.150612
28POULTRY351.078816
24MEATS341.965905
25PERSONAL CARE271.192381
9DELI265.629746
#对每一个cluster求均值并排序
df_cl_sa=df_train1.groupby('cluster').agg({'sales':'mean'}).reset_index()
df_cl_sa.head()
clustersales
01327.022808
12261.025731
23194.926534
34297.537877
451120.118405
from matplotlib.gridspec import GridSpec
plt.figure(figsize=(12,8))
gs=GridSpec(2,2)
ax=plt.subplot(gs[1:,:2])
# plt.barh()
ax.bar(range(df_cl_sa.shape[0]),df_cl_sa.iloc[:,1],width=0.5)
ax.set_title('Clusters VS s Sales')

ax=plt.subplot(gs[:1,:1])
ax.barh(df_fa_sa.iloc[:,0],df_fa_sa.iloc[:,1])
ax.set_title('Average Sales Familys')

ax=plt.subplot(gs[:1,1])
ax.pie(df_st_sa.iloc[:,1],wedgeprops={'width':0.3},labels=df_st_sa.iloc[:,0])

ax.set_title('Highest Sales Stores')

plt.show()

请添加图片描述

月销售量绘总

#将每年的数据按月求均值得到新的数组
df_2013=df_train1[df_train1['year']==2013][['month','sales']]
df_2013=df_2013.groupby('month').agg({'sales':'mean'}).reset_index().rename(columns={'sales':'s13'})
df_2014=df_train1[df_train1['year']==2014][['month','sales']]
df_2014=df_2014.groupby('month').agg({'sales':'mean'}).reset_index().rename(columns={'sales':'s14'})
df_2015=df_train1[df_train1['year']==2015][['month','sales']]
df_2015=df_2015.groupby('month').agg({'sales':'mean'}).reset_index().rename(columns={'sales':'s15'})
df_2016=df_train1[df_train1['year']==2016][['month','sales']]
df_2016=df_2016.groupby('month').agg({'sales':'mean'}).reset_index().rename(columns={'sales':'s16'})
df_2017=df_train1[df_train1['year']==2017][['month','sales']]
df_2017=df_2017.groupby('month').agg({'sales':'mean'}).reset_index()
#补充2017年后面几个月的数据
df_2017_no=pd.DataFrame({'month':[9,10,11,12],'sales':[0,0,0,0]})
df_2017=df_2017.append(df_2017_no).rename(columns={'sales':'s17'})
# print(df_2017)
#将不同年份的数据合并起来
df_year=df_2013.merge(df_2014,on='month').merge(df_2015,on='month').merge(df_2016,on='month').merge(df_2017,on='month')
# tob_labels=['2013','2014','2015','2016','2017']
df_year
months13s14s15s16s17
01186.952405342.341709269.666595434.050268476.596791
12193.581846241.268892275.420792424.695398465.971468
23206.880581368.661236282.368624418.735398483.400632
34205.639071240.577087279.743138488.108774482.172948
45210.184563242.203129320.958116457.671398487.162797
56215.691343244.634652397.249619419.644575488.707278
67203.983455350.830102403.030170432.562218489.909880
78212.479434251.351805415.692304406.437390465.144891
89220.593588374.530792434.734053419.3312400.000000
910213.164266369.213666432.248428435.0021690.000000
1011231.136537384.056027426.579749462.9166750.000000
1112298.675144459.818606513.845328557.1148220.000000
#将除月份这一列的值赋给新的datafram
df_year=df_year[['s13','s14','s15','s16','s17']].replace(np.nan,0)
df_year
s13s14s15s16s17
Jan186.952405342.341709269.666595434.050268476.596791
Feb193.581846241.268892275.420792424.695398465.971468
Mar206.880581368.661236282.368624418.735398483.400632
Apr205.639071240.577087279.743138488.108774482.172948
May210.184563242.203129320.958116457.671398487.162797
Jun215.691343244.634652397.249619419.644575488.707278
Ju1203.983455350.830102403.030170432.562218489.909880
Aug212.479434251.351805415.692304406.437390465.144891
Sep220.593588374.530792434.734053419.3312400.000000
Oct213.164266369.213666432.248428435.0021690.000000
Nov231.136537384.056027426.579749462.9166750.000000
Dec298.675144459.818606513.845328557.1148220.000000
df_year.index=['Jan','Feb','Mar','Apr','May','Jun','Ju1','Aug','Sep','Oct','Nov','Dec']
y_data=df_2013['month'].tolist()#转化为一个列表
df_year


s13s14s15s16s17
Jan186.952405342.341709269.666595434.050268476.596791
Feb193.581846241.268892275.420792424.695398465.971468
Mar206.880581368.661236282.368624418.735398483.400632
Apr205.639071240.577087279.743138488.108774482.172948
May210.184563242.203129320.958116457.671398487.162797
Jun215.691343244.634652397.249619419.644575488.707278
Ju1203.983455350.830102403.030170432.562218489.909880
Aug212.479434251.351805415.692304406.437390465.144891
Sep220.593588374.530792434.734053419.3312400.000000
Oct213.164266369.213666432.248428435.0021690.000000
Nov231.136537384.056027426.579749462.9166750.000000
Dec298.675144459.818606513.845328557.1148220.000000
#画每年不同月份,平均销售
plt.figure(figsize=(12,8))
plt.barh(df_year.index,df_year.iloc[:,0],label='2013')
plt.text(100,12.5,'2013')
plt.barh(df_year.index,df_year.iloc[:,1],left=df_year.iloc[:,0],label='2014')
plt.text(500,12.5,'2014')
plt.barh(df_year.index,df_year.iloc[:,2],left=df_year.iloc[:,0]+df_year.iloc[:,1],label='2015')
plt.text(1000,12.5,'2015')
plt.barh(df_year.index,df_year.iloc[:,3],left=df_year.iloc[:,0]+df_year.iloc[:,1]+df_year.iloc[:,2],label='2016')
plt.text(1300,12.5,'2016')
plt.barh(df_year.index,df_year.iloc[:,4],left=df_year.iloc[:,0]+df_year.iloc[:,1]+df_year.iloc[:,2]+df_year.iloc[:,3],label='2017')
plt.text(1700,12.5,'2017')
plt.title('Avg Sales for Each Year',loc='left',y=1.08,fontsize=15)
plt.legend()
plt.show()

在这里插入图片描述

月、季、周,星期几绘总

#绘制每个月,每个季度,每个周的平均销售量
import calendar
df_m_sa=df_train1.groupby('month').agg({'sales':'mean'}).reset_index()
df_m_sa['sales']=round(df_m_sa['sales'],2)           #处理sales函数小数位数
df_m_sa['month_text']=df_m_sa['month'].apply(lambda x: calendar.month_abbr[x])#将数值月份转化为文字形
df_m_sa['text']=df_m_sa['month_text']+'-'+df_m_sa['sales'].astype(str)
df_w_sa=df_train1.groupby('week').agg({'sales':'mean'}).reset_index()
df_q_sa=df_train1.groupby('quarter').agg({'sales':'mean'}).reset_index()

df_m_sa.head(),df_w_sa.head(),df_q_sa.head
(    month   sales month_text        text
 0       1  341.92        Jan  Jan-341.92
 1       2  320.93        Feb  Feb-320.93
 2       3  352.01        Mar  Mar-352.01
 3       4  341.17        Apr  Apr-341.17
 4       5  345.65        May  May-345.65,

     week       sales
 0      1  409.099519
 1      2  347.534643
 2      3  338.142199
 3      4  329.186258
 4      5  344.195233,

    quarter       sales
 0        1  338.825392
 1        2  346.546038
 2        3  359.334098
 3        4  399.229622)
from matplotlib.gridspec import GridSpec
plt.figure(figsize=(12,8))
gs=GridSpec(2,2)
print(gs[:2,:2])
ax=plt.subplot(gs[:1,:1])
# plt.barh()

ax.barh(df_m_sa.iloc[:,2],df_m_sa.iloc[:,1])#为每个条形图添加标签要用循环
for a,b in enumerate(df_m_sa.iloc[:,1]):
    ax.text(b-100,a,df_m_sa.iloc[a,3])
ax.set_title('month wise avg sales analysis')

ax=plt.subplot(gs[:1,1])
ax.pie(df_q_sa.iloc[:,1],wedgeprops={'width':0.3},labels=df_q_sa.iloc[:,0],autopct='%1.2f%%',pctdistance=1.25)
ax.set_title('Quarter wise Avg Sales Analy')

ax=plt.subplot(gs[1:,:2])
ax.fill_between(df_w_sa.iloc[:,0],df_w_sa.iloc[:,1],alpha=0.6)
ax.plot(df_w_sa.iloc[:,0],df_w_sa.iloc[:,1],marker='o')
ax.set_title('Week wise Avg Sales Analysis')
# ax[0,1].set_title('Highest Sales Stores')
# ax[1,0].set_title('Clusters VS s Sales')
plt.show()
GridSpec(2, 2)[0:2, 0:2]

在这里插入图片描述

df_dw_sa=df_train1.groupby('day_of_week').agg({'sales':'mean'}).reset_index()
df_dw_sa['sales']=round(df_dw_sa['sales'],2)

df_dw_sa
day_of_weeksales
0Friday326.73
1Monday348.16
2Saturday434.79
3Sunday464.74
4Thursday286.57
5Tuesday319.92
6Wednesday330.77
plt.barh(df_dw_sa.iloc[:,0],df_dw_sa.iloc[:,1])#为每个条形图添加标签要用循环
for a,b in enumerate(df_dw_sa.iloc[:,1]):
    plt.text(b-50,a,df_dw_sa.iloc[a,1])
# plt.yticks(['Monday','Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday'])
plt.title('Avg Sales VS Day of Week')
plt.show()

在这里插入图片描述

store_nbr与holiday关联

df_st_ht=df_train1.groupby(['store_type','holiday_type']).agg({'sales':'mean'}).reset_index()
df_st_ht['sales']=round(df_st_ht['sales'],2)
df_st_ht.head()
store_typeholiday_typesales
0AAdditional957.70
1ABridge969.82
2AEvent813.56
3AHoliday723.28
4ATransfer984.63
plt.scatter(df_st_ht.iloc[:,0],df_st_ht.iloc[:,1],s=df_st_ht.iloc[:,2],c=df_st_ht.iloc[:,2],cmap='plasma')
plt.colorbar()
plt.text(4.7,5.5,'sales')
plt.xlim(-0.5,4.5)
plt.ylim(-0.5,5.5)
plt.title('Average Sales:Store Type vs holiday type ')
plt.show()

在这里插入图片描述

df_y_m_st=df_train1.groupby(['year','month','store_type']).agg({'sales':'mean'}).reset_index()
df_y_m_st['sales']=round(df_y_m_st['sales'],2)
df_y_m_st['month']=df_y_m_st['month'].apply(lambda x:calendar.month_abbr[x])
df_y_m_st.head()
yearmonthstore_typesales
02013JanA392.85
12013JanB155.11
22013JanC109.06
32013JanD191.16
42013JanE60.52

280 rows × 4 columns

# a=df_y_m_st['year']==2013
# df_y_m_st.loc[a,'month']

store_nbr与holiday绘总不同年份

a=df_y_m_st['year']==2013
b=df_y_m_st['year']==2014
c=df_y_m_st['year']==2015
d=df_y_m_st['year']==2016
e=df_y_m_st['year']==2017
fig,ax=plt.subplots(5,1,figsize=(24,20))




ax[0].scatter(df_y_m_st.loc[a,'month'],df_y_m_st.loc[a,'store_type'],df_y_m_st.loc[a,'sales'],c=df_y_m_st.loc[a,'sales'],cmap='plasma')
ax[0].text(12,0.01,'year=2013',size=20,rotation='270')
ax[0].set_xticks([])

ax[1].scatter(df_y_m_st.loc[a,'month'],df_y_m_st.loc[a,'store_type'],df_y_m_st.loc[b,'sales'],c=df_y_m_st.loc[b,'sales'],cmap='plasma')
ax[1].text(12,0.01,'year=2014',size=20,rotation='270')
ax[1].set_xticks([])
ax[2].scatter(df_y_m_st.loc[a,'month'],df_y_m_st.loc[a,'store_type'],df_y_m_st.loc[c,'sales'],c=df_y_m_st.loc[c,'sales'],cmap='plasma')
ax[2].text(12,0.01,'year=2015',size=20,rotation='270')
ax[2].set_xticks([])
ax[3].scatter(df_y_m_st.loc[a,'month'],df_y_m_st.loc[a,'store_type'],df_y_m_st.loc[d,'sales'],c=df_y_m_st.loc[d,'sales'],cmap='plasma')
ax[3].text(12,0.01,'year=2016',size=20,rotation='270')
ax[3].set_xticks([])
ax[4].scatter(df_y_m_st.loc[e,'month'],df_y_m_st.loc[e,'store_type'],df_y_m_st.loc[e,'sales'],c=df_y_m_st.loc[e,'sales'],cmap='plasma')
ax[4].text(11.5,0.01,'year=2017',size=20,rotation='270')
ax[4].set_xticks(['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Oct','Sep','Nve','Dec'])
for i in range(5):
    ax[i].spines['top'].set_visible(False)
    ax[i].spines['right'].set_visible(False)
    ax[i].spines['bottom'].set_visible(False)
    ax[i].spines['left'].set_visible(False)
    ax[i].set_ylim([-0.5,5])
    ax[i].set_xlim([-0.5,15])
    ax[i].tick_params(axis='both',which='major',labelsize=25)#改变坐标轴的大小
#     ax[i].set_colorbar()   

fig.colorbar(ax[0].scatter(df_y_m_st.loc[a,'month'],df_y_m_st.loc[a,'store_type'],df_y_m_st.loc[a,'sales'],c=df_y_m_st.loc[a,'sales'],cmap='plasma'), ax=[ax[0], ax[1],ax[2],ax[3],ax[4]], shrink=0.9)


plt.text(16,32,'sales',size=20)
plt.show()

在这里插入图片描述

month与holiday绘总

df_m_ht=df_train1.groupby(['month','holiday_type']).agg({'sales':'mean'}).reset_index()
df_m_ht['sales']=round(df_m_ht['sales'],2)
df_m_ht['month']=df_m_ht['month'].apply(lambda x :calendar.month_abbr[x])
plt.scatter(df_m_ht.iloc[:,0],df_m_ht.iloc[:,1],s=df_m_ht.iloc[:,2],c=df_m_ht.iloc[:,2],cmap='plasma')
plt.colorbar()
plt.text(12.5,6,'sales')
plt.xlim(-1,12)
plt.ylim(-0.5,5.5)
plt.title('Average Sales:Month vs holiday type ')
plt.show()

在这里插入图片描述

df_y_m_ht=df_train1.groupby(['year','month','holiday_type']).agg({'sales':'mean'}).reset_index()
df_y_m_ht['sales']=round(df_y_m_ht['sales'],2)
df_y_m_ht['month']=df_y_m_ht['month'].apply(lambda x:calendar.month_abbr[x])
df_y_m_ht.head()
yearmonthholiday_typesales
02013JanHoliday1.41
12013JanWork Day247.08
22013FebHoliday164.82
32013MarHoliday307.44
42013AprHoliday228.52

96 rows × 4 columns

month与holiday绘总不同年份

a=df_y_m_ht['year']==2013
b=df_y_m_ht['year']==2014
c=df_y_m_ht['year']==2015
d=df_y_m_ht['year']==2016
e=df_y_m_ht['year']==2017
fig,ax=plt.subplots(5,1,figsize=(24,20))

ax[0].scatter(df_y_m_ht.loc[a,'month'],df_y_m_ht.loc[a,'holiday_type'],df_y_m_ht.loc[a,'sales'],c=df_y_m_ht.loc[a,'sales'],cmap='plasma')
ax[0].text(12,0.01,'year=2013',size=20,rotation='270')
ax[0].set_xticks([])

ax[1].scatter(df_y_m_ht.loc[b,'month'],df_y_m_ht.loc[b,'holiday_type'],df_y_m_ht.loc[b,'sales'],c=df_y_m_ht.loc[b,'sales'],cmap='plasma')
ax[1].text(12,0.01,'year=2014',size=20,rotation='270')
ax[1].set_xticks([])
ax[2].scatter(df_y_m_ht.loc[c,'month'],df_y_m_ht.loc[c,'holiday_type'],df_y_m_ht.loc[c,'sales'],c=df_y_m_ht.loc[c,'sales'],cmap='plasma')
ax[2].text(12,0.01,'year=2015',size=20,rotation='270')
ax[2].set_xticks([])
ax[3].scatter(df_y_m_ht.loc[d,'month'],df_y_m_ht.loc[d,'holiday_type'],df_y_m_ht.loc[d,'sales'],c=df_y_m_ht.loc[d,'sales'],cmap='plasma')
ax[3].text(12,0.01,'year=2016',size=20,rotation='270')
ax[3].set_xticks([])
ax[4].scatter(df_y_m_ht.loc[e,'month'],df_y_m_ht.loc[e,'holiday_type'],df_y_m_ht.loc[e,'sales'],c=df_y_m_ht.loc[e,'sales'],cmap='plasma')
ax[4].text(11.5,0.01,'year=2017',size=20,rotation='270')
ax[4].set_xticks(['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Oct','Sep','Nve','Dec'])
for i in range(5):
    ax[i].spines['top'].set_visible(False)
    ax[i].spines['right'].set_visible(False)
    ax[i].spines['bottom'].set_visible(False)
    ax[i].spines['left'].set_visible(False)
    ax[i].set_ylim([-0.5,6])
    ax[i].set_xlim([-0.5,12])
    ax[i].tick_params(axis='both',which='major',labelsize=25)#改变坐标轴的大小
#     ax[i].set_colorbar()   

fig.colorbar(ax[0].scatter(df_y_m_ht.loc[a,'month'],df_y_m_ht.loc[a,'holiday_type'],df_y_m_ht.loc[a,'sales'],c=df_y_m_ht.loc[a,'sales'],cmap='plasma'), ax=[ax[0], ax[1],ax[2],ax[3],ax[4]], shrink=0.9)


plt.text(16,32,'sales',size=20)
plt.show()

在这里插入图片描述

  • 1
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值