商业销售预测绘图``
import numpy as np import matplotlib.pylab as plt import pandas as pd import os
导入数据
```python
file=os.walk(r'D:\机器学习\kaggle预测\商店销售')
filename_=[]
filename_data=[]
for root,sub,filename in file:
for i in filename:
path=os.path.join(root,i)
filename_.append(i)
filename_data.append(pd.read_csv(path))
[ i for i in filename_]
['holidays_events.csv',
'oil.csv',
'sample_submission.csv',
'stores.csv',
'test.csv',
'train.csv',
'transactions.csv']
df_train1= filename_data[ 5 ] . merge( filename_data[ 0 ] , on= 'date' , how= 'left' )
df_train1= df_train1. merge( filename_data[ 1 ] , on= 'date' , how= 'left' )
df_train1= df_train1. merge( filename_data[ 3 ] , on= 'store_nbr' , how= 'left' )
df_train1= df_train1. merge( filename_data[ 6 ] , on= [ 'date' , 'store_nbr' ] , how= 'left' )
df_train1= df_train1. rename( columns= { 'type_x' : 'holiday_type' , 'type_y' : 'store_type' } )
df_train1. head( )
id date store_nbr family sales onpromotion holiday_type locale locale_name description transferred dcoilwtico city state store_type cluster transactions 0 0 2013-01-01 1 AUTOMOTIVE 0.000 0 Holiday National Ecuador Primer dia del ano False NaN Quito Pichincha D 13 NaN 1 1 2013-01-01 1 BABY CARE 0.000 0 Holiday National Ecuador Primer dia del ano False NaN Quito Pichincha D 13 NaN 2 2 2013-01-01 1 BEAUTY 0.000 0 Holiday National Ecuador Primer dia del ano False NaN Quito Pichincha D 13 NaN 3 3 2013-01-01 1 BEVERAGES 0.000 0 Holiday National Ecuador Primer dia del ano False NaN Quito Pichincha D 13 NaN 4 4 2013-01-01 1 BOOKS 0.000 0 Holiday National Ecuador Primer dia del ano False NaN Quito Pichincha D 13 NaN
df_train1[ 'date' ] = pd. to_datetime( df_train1[ 'date' ] )
df_train1[ 'year' ] = df_train1[ 'date' ] . dt. year
df_train1[ 'month' ] = df_train1[ 'date' ] . dt. month
df_train1[ 'week' ] = df_train1[ 'date' ] . dt. isocalendar( ) . week
df_train1[ 'quarter' ] = df_train1[ 'date' ] . dt. quarter
df_train1[ 'day_of_week' ] = df_train1[ 'date' ] . dt. day_name( )
df_train1. head( )
id date store_nbr family sales onpromotion holiday_type locale locale_name description ... city state store_type cluster transactions year month week quarter day_of_week 0 0 2013-01-01 1 AUTOMOTIVE 0.0 0 Holiday National Ecuador Primer dia del ano ... Quito Pichincha D 13 NaN 2013 1 1 1 Tuesday 1 1 2013-01-01 1 BABY CARE 0.0 0 Holiday National Ecuador Primer dia del ano ... Quito Pichincha D 13 NaN 2013 1 1 1 Tuesday 2 2 2013-01-01 1 BEAUTY 0.0 0 Holiday National Ecuador Primer dia del ano ... Quito Pichincha D 13 NaN 2013 1 1 1 Tuesday 3 3 2013-01-01 1 BEVERAGES 0.0 0 Holiday National Ecuador Primer dia del ano ... Quito Pichincha D 13 NaN 2013 1 1 1 Tuesday 4 4 2013-01-01 1 BOOKS 0.0 0 Holiday National Ecuador Primer dia del ano ... Quito Pichincha D 13 NaN 2013 1 1 1 Tuesday
5 rows × 22 columns
store_nbr、family、cluster绘总
df_st_sa= df_train1. groupby( 'store_type' ) . agg( { 'sales' : 'mean' } ) . reset_index( ) . sort_values( by= 'sales' , ascending= False )
df_st_sa
store_type sales 0 A 708.378165 3 D 352.084510 1 B 328.275233 4 E 270.285490 2 C 197.790647
df_fa_sa= df_train1. groupby( 'family' ) . agg( { 'sales' : 'mean' } ) . reset_index( ) . sort_values( by= 'sales' , ascending= False ) [ : 10 ]
df_fa_sa
family sales 12 GROCERY I 3790.432797 3 BEVERAGES 2394.912701 30 PRODUCE 1355.373698 7 CLEANING 1074.171518 8 DAIRY 711.175991 5 BREAD/BAKERY 464.150612 28 POULTRY 351.078816 24 MEATS 341.965905 25 PERSONAL CARE 271.192381 9 DELI 265.629746
df_cl_sa= df_train1. groupby( 'cluster' ) . agg( { 'sales' : 'mean' } ) . reset_index( )
df_cl_sa. head( )
cluster sales 0 1 327.022808 1 2 261.025731 2 3 194.926534 3 4 297.537877 4 5 1120.118405
from matplotlib. gridspec import GridSpec
plt. figure( figsize= ( 12 , 8 ) )
gs= GridSpec( 2 , 2 )
ax= plt. subplot( gs[ 1 : , : 2 ] )
ax. bar( range ( df_cl_sa. shape[ 0 ] ) , df_cl_sa. iloc[ : , 1 ] , width= 0.5 )
ax. set_title( 'Clusters VS s Sales' )
ax= plt. subplot( gs[ : 1 , : 1 ] )
ax. barh( df_fa_sa. iloc[ : , 0 ] , df_fa_sa. iloc[ : , 1 ] )
ax. set_title( 'Average Sales Familys' )
ax= plt. subplot( gs[ : 1 , 1 ] )
ax. pie( df_st_sa. iloc[ : , 1 ] , wedgeprops= { 'width' : 0.3 } , labels= df_st_sa. iloc[ : , 0 ] )
ax. set_title( 'Highest Sales Stores' )
plt. show( )
月销售量绘总
df_2013= df_train1[ df_train1[ 'year' ] == 2013 ] [ [ 'month' , 'sales' ] ]
df_2013= df_2013. groupby( 'month' ) . agg( { 'sales' : 'mean' } ) . reset_index( ) . rename( columns= { 'sales' : 's13' } )
df_2014= df_train1[ df_train1[ 'year' ] == 2014 ] [ [ 'month' , 'sales' ] ]
df_2014= df_2014. groupby( 'month' ) . agg( { 'sales' : 'mean' } ) . reset_index( ) . rename( columns= { 'sales' : 's14' } )
df_2015= df_train1[ df_train1[ 'year' ] == 2015 ] [ [ 'month' , 'sales' ] ]
df_2015= df_2015. groupby( 'month' ) . agg( { 'sales' : 'mean' } ) . reset_index( ) . rename( columns= { 'sales' : 's15' } )
df_2016= df_train1[ df_train1[ 'year' ] == 2016 ] [ [ 'month' , 'sales' ] ]
df_2016= df_2016. groupby( 'month' ) . agg( { 'sales' : 'mean' } ) . reset_index( ) . rename( columns= { 'sales' : 's16' } )
df_2017= df_train1[ df_train1[ 'year' ] == 2017 ] [ [ 'month' , 'sales' ] ]
df_2017= df_2017. groupby( 'month' ) . agg( { 'sales' : 'mean' } ) . reset_index( )
df_2017_no= pd. DataFrame( { 'month' : [ 9 , 10 , 11 , 12 ] , 'sales' : [ 0 , 0 , 0 , 0 ] } )
df_2017= df_2017. append( df_2017_no) . rename( columns= { 'sales' : 's17' } )
df_year= df_2013. merge( df_2014, on= 'month' ) . merge( df_2015, on= 'month' ) . merge( df_2016, on= 'month' ) . merge( df_2017, on= 'month' )
df_year
month s13 s14 s15 s16 s17 0 1 186.952405 342.341709 269.666595 434.050268 476.596791 1 2 193.581846 241.268892 275.420792 424.695398 465.971468 2 3 206.880581 368.661236 282.368624 418.735398 483.400632 3 4 205.639071 240.577087 279.743138 488.108774 482.172948 4 5 210.184563 242.203129 320.958116 457.671398 487.162797 5 6 215.691343 244.634652 397.249619 419.644575 488.707278 6 7 203.983455 350.830102 403.030170 432.562218 489.909880 7 8 212.479434 251.351805 415.692304 406.437390 465.144891 8 9 220.593588 374.530792 434.734053 419.331240 0.000000 9 10 213.164266 369.213666 432.248428 435.002169 0.000000 10 11 231.136537 384.056027 426.579749 462.916675 0.000000 11 12 298.675144 459.818606 513.845328 557.114822 0.000000
df_year= df_year[ [ 's13' , 's14' , 's15' , 's16' , 's17' ] ] . replace( np. nan, 0 )
df_year
s13 s14 s15 s16 s17 Jan 186.952405 342.341709 269.666595 434.050268 476.596791 Feb 193.581846 241.268892 275.420792 424.695398 465.971468 Mar 206.880581 368.661236 282.368624 418.735398 483.400632 Apr 205.639071 240.577087 279.743138 488.108774 482.172948 May 210.184563 242.203129 320.958116 457.671398 487.162797 Jun 215.691343 244.634652 397.249619 419.644575 488.707278 Ju1 203.983455 350.830102 403.030170 432.562218 489.909880 Aug 212.479434 251.351805 415.692304 406.437390 465.144891 Sep 220.593588 374.530792 434.734053 419.331240 0.000000 Oct 213.164266 369.213666 432.248428 435.002169 0.000000 Nov 231.136537 384.056027 426.579749 462.916675 0.000000 Dec 298.675144 459.818606 513.845328 557.114822 0.000000
df_year. index= [ 'Jan' , 'Feb' , 'Mar' , 'Apr' , 'May' , 'Jun' , 'Ju1' , 'Aug' , 'Sep' , 'Oct' , 'Nov' , 'Dec' ]
y_data= df_2013[ 'month' ] . tolist( )
df_year
s13 s14 s15 s16 s17 Jan 186.952405 342.341709 269.666595 434.050268 476.596791 Feb 193.581846 241.268892 275.420792 424.695398 465.971468 Mar 206.880581 368.661236 282.368624 418.735398 483.400632 Apr 205.639071 240.577087 279.743138 488.108774 482.172948 May 210.184563 242.203129 320.958116 457.671398 487.162797 Jun 215.691343 244.634652 397.249619 419.644575 488.707278 Ju1 203.983455 350.830102 403.030170 432.562218 489.909880 Aug 212.479434 251.351805 415.692304 406.437390 465.144891 Sep 220.593588 374.530792 434.734053 419.331240 0.000000 Oct 213.164266 369.213666 432.248428 435.002169 0.000000 Nov 231.136537 384.056027 426.579749 462.916675 0.000000 Dec 298.675144 459.818606 513.845328 557.114822 0.000000
plt. figure( figsize= ( 12 , 8 ) )
plt. barh( df_year. index, df_year. iloc[ : , 0 ] , label= '2013' )
plt. text( 100 , 12.5 , '2013' )
plt. barh( df_year. index, df_year. iloc[ : , 1 ] , left= df_year. iloc[ : , 0 ] , label= '2014' )
plt. text( 500 , 12.5 , '2014' )
plt. barh( df_year. index, df_year. iloc[ : , 2 ] , left= df_year. iloc[ : , 0 ] + df_year. iloc[ : , 1 ] , label= '2015' )
plt. text( 1000 , 12.5 , '2015' )
plt. barh( df_year. index, df_year. iloc[ : , 3 ] , left= df_year. iloc[ : , 0 ] + df_year. iloc[ : , 1 ] + df_year. iloc[ : , 2 ] , label= '2016' )
plt. text( 1300 , 12.5 , '2016' )
plt. barh( df_year. index, df_year. iloc[ : , 4 ] , left= df_year. iloc[ : , 0 ] + df_year. iloc[ : , 1 ] + df_year. iloc[ : , 2 ] + df_year. iloc[ : , 3 ] , label= '2017' )
plt. text( 1700 , 12.5 , '2017' )
plt. title( 'Avg Sales for Each Year' , loc= 'left' , y= 1.08 , fontsize= 15 )
plt. legend( )
plt. show( )
月、季、周,星期几绘总
import calendar
df_m_sa= df_train1. groupby( 'month' ) . agg( { 'sales' : 'mean' } ) . reset_index( )
df_m_sa[ 'sales' ] = round ( df_m_sa[ 'sales' ] , 2 )
df_m_sa[ 'month_text' ] = df_m_sa[ 'month' ] . apply ( lambda x: calendar. month_abbr[ x] )
df_m_sa[ 'text' ] = df_m_sa[ 'month_text' ] + '-' + df_m_sa[ 'sales' ] . astype( str )
df_w_sa= df_train1. groupby( 'week' ) . agg( { 'sales' : 'mean' } ) . reset_index( )
df_q_sa= df_train1. groupby( 'quarter' ) . agg( { 'sales' : 'mean' } ) . reset_index( )
df_m_sa. head( ) , df_w_sa. head( ) , df_q_sa. head
( month sales month_text text
0 1 341.92 Jan Jan-341.92
1 2 320.93 Feb Feb-320.93
2 3 352.01 Mar Mar-352.01
3 4 341.17 Apr Apr-341.17
4 5 345.65 May May-345.65,
week sales
0 1 409.099519
1 2 347.534643
2 3 338.142199
3 4 329.186258
4 5 344.195233,
quarter sales
0 1 338.825392
1 2 346.546038
2 3 359.334098
3 4 399.229622)
from matplotlib. gridspec import GridSpec
plt. figure( figsize= ( 12 , 8 ) )
gs= GridSpec( 2 , 2 )
print ( gs[ : 2 , : 2 ] )
ax= plt. subplot( gs[ : 1 , : 1 ] )
ax. barh( df_m_sa. iloc[ : , 2 ] , df_m_sa. iloc[ : , 1 ] )
for a, b in enumerate ( df_m_sa. iloc[ : , 1 ] ) :
ax. text( b- 100 , a, df_m_sa. iloc[ a, 3 ] )
ax. set_title( 'month wise avg sales analysis' )
ax= plt. subplot( gs[ : 1 , 1 ] )
ax. pie( df_q_sa. iloc[ : , 1 ] , wedgeprops= { 'width' : 0.3 } , labels= df_q_sa. iloc[ : , 0 ] , autopct= '%1.2f%%' , pctdistance= 1.25 )
ax. set_title( 'Quarter wise Avg Sales Analy' )
ax= plt. subplot( gs[ 1 : , : 2 ] )
ax. fill_between( df_w_sa. iloc[ : , 0 ] , df_w_sa. iloc[ : , 1 ] , alpha= 0.6 )
ax. plot( df_w_sa. iloc[ : , 0 ] , df_w_sa. iloc[ : , 1 ] , marker= 'o' )
ax. set_title( 'Week wise Avg Sales Analysis' )
plt. show( )
GridSpec(2, 2)[0:2, 0:2]
df_dw_sa= df_train1. groupby( 'day_of_week' ) . agg( { 'sales' : 'mean' } ) . reset_index( )
df_dw_sa[ 'sales' ] = round ( df_dw_sa[ 'sales' ] , 2 )
df_dw_sa
day_of_week sales 0 Friday 326.73 1 Monday 348.16 2 Saturday 434.79 3 Sunday 464.74 4 Thursday 286.57 5 Tuesday 319.92 6 Wednesday 330.77
plt. barh( df_dw_sa. iloc[ : , 0 ] , df_dw_sa. iloc[ : , 1 ] )
for a, b in enumerate ( df_dw_sa. iloc[ : , 1 ] ) :
plt. text( b- 50 , a, df_dw_sa. iloc[ a, 1 ] )
plt. title( 'Avg Sales VS Day of Week' )
plt. show( )
store_nbr与holiday关联
df_st_ht= df_train1. groupby( [ 'store_type' , 'holiday_type' ] ) . agg( { 'sales' : 'mean' } ) . reset_index( )
df_st_ht[ 'sales' ] = round ( df_st_ht[ 'sales' ] , 2 )
df_st_ht. head( )
store_type holiday_type sales 0 A Additional 957.70 1 A Bridge 969.82 2 A Event 813.56 3 A Holiday 723.28 4 A Transfer 984.63
plt. scatter( df_st_ht. iloc[ : , 0 ] , df_st_ht. iloc[ : , 1 ] , s= df_st_ht. iloc[ : , 2 ] , c= df_st_ht. iloc[ : , 2 ] , cmap= 'plasma' )
plt. colorbar( )
plt. text( 4.7 , 5.5 , 'sales' )
plt. xlim( - 0.5 , 4.5 )
plt. ylim( - 0.5 , 5.5 )
plt. title( 'Average Sales:Store Type vs holiday type ' )
plt. show( )
df_y_m_st= df_train1. groupby( [ 'year' , 'month' , 'store_type' ] ) . agg( { 'sales' : 'mean' } ) . reset_index( )
df_y_m_st[ 'sales' ] = round ( df_y_m_st[ 'sales' ] , 2 )
df_y_m_st[ 'month' ] = df_y_m_st[ 'month' ] . apply ( lambda x: calendar. month_abbr[ x] )
df_y_m_st. head( )
year month store_type sales 0 2013 Jan A 392.85 1 2013 Jan B 155.11 2 2013 Jan C 109.06 3 2013 Jan D 191.16 4 2013 Jan E 60.52
280 rows × 4 columns
store_nbr与holiday绘总不同年份
a= df_y_m_st[ 'year' ] == 2013
b= df_y_m_st[ 'year' ] == 2014
c= df_y_m_st[ 'year' ] == 2015
d= df_y_m_st[ 'year' ] == 2016
e= df_y_m_st[ 'year' ] == 2017
fig, ax= plt. subplots( 5 , 1 , figsize= ( 24 , 20 ) )
ax[ 0 ] . scatter( df_y_m_st. loc[ a, 'month' ] , df_y_m_st. loc[ a, 'store_type' ] , df_y_m_st. loc[ a, 'sales' ] , c= df_y_m_st. loc[ a, 'sales' ] , cmap= 'plasma' )
ax[ 0 ] . text( 12 , 0.01 , 'year=2013' , size= 20 , rotation= '270' )
ax[ 0 ] . set_xticks( [ ] )
ax[ 1 ] . scatter( df_y_m_st. loc[ a, 'month' ] , df_y_m_st. loc[ a, 'store_type' ] , df_y_m_st. loc[ b, 'sales' ] , c= df_y_m_st. loc[ b, 'sales' ] , cmap= 'plasma' )
ax[ 1 ] . text( 12 , 0.01 , 'year=2014' , size= 20 , rotation= '270' )
ax[ 1 ] . set_xticks( [ ] )
ax[ 2 ] . scatter( df_y_m_st. loc[ a, 'month' ] , df_y_m_st. loc[ a, 'store_type' ] , df_y_m_st. loc[ c, 'sales' ] , c= df_y_m_st. loc[ c, 'sales' ] , cmap= 'plasma' )
ax[ 2 ] . text( 12 , 0.01 , 'year=2015' , size= 20 , rotation= '270' )
ax[ 2 ] . set_xticks( [ ] )
ax[ 3 ] . scatter( df_y_m_st. loc[ a, 'month' ] , df_y_m_st. loc[ a, 'store_type' ] , df_y_m_st. loc[ d, 'sales' ] , c= df_y_m_st. loc[ d, 'sales' ] , cmap= 'plasma' )
ax[ 3 ] . text( 12 , 0.01 , 'year=2016' , size= 20 , rotation= '270' )
ax[ 3 ] . set_xticks( [ ] )
ax[ 4 ] . scatter( df_y_m_st. loc[ e, 'month' ] , df_y_m_st. loc[ e, 'store_type' ] , df_y_m_st. loc[ e, 'sales' ] , c= df_y_m_st. loc[ e, 'sales' ] , cmap= 'plasma' )
ax[ 4 ] . text( 11.5 , 0.01 , 'year=2017' , size= 20 , rotation= '270' )
ax[ 4 ] . set_xticks( [ 'Jan' , 'Feb' , 'Mar' , 'Apr' , 'May' , 'Jun' , 'Jul' , 'Aug' , 'Oct' , 'Sep' , 'Nve' , 'Dec' ] )
for i in range ( 5 ) :
ax[ i] . spines[ 'top' ] . set_visible( False )
ax[ i] . spines[ 'right' ] . set_visible( False )
ax[ i] . spines[ 'bottom' ] . set_visible( False )
ax[ i] . spines[ 'left' ] . set_visible( False )
ax[ i] . set_ylim( [ - 0.5 , 5 ] )
ax[ i] . set_xlim( [ - 0.5 , 15 ] )
ax[ i] . tick_params( axis= 'both' , which= 'major' , labelsize= 25 )
fig. colorbar( ax[ 0 ] . scatter( df_y_m_st. loc[ a, 'month' ] , df_y_m_st. loc[ a, 'store_type' ] , df_y_m_st. loc[ a, 'sales' ] , c= df_y_m_st. loc[ a, 'sales' ] , cmap= 'plasma' ) , ax= [ ax[ 0 ] , ax[ 1 ] , ax[ 2 ] , ax[ 3 ] , ax[ 4 ] ] , shrink= 0.9 )
plt. text( 16 , 32 , 'sales' , size= 20 )
plt. show( )
month与holiday绘总
df_m_ht= df_train1. groupby( [ 'month' , 'holiday_type' ] ) . agg( { 'sales' : 'mean' } ) . reset_index( )
df_m_ht[ 'sales' ] = round ( df_m_ht[ 'sales' ] , 2 )
df_m_ht[ 'month' ] = df_m_ht[ 'month' ] . apply ( lambda x : calendar. month_abbr[ x] )
plt. scatter( df_m_ht. iloc[ : , 0 ] , df_m_ht. iloc[ : , 1 ] , s= df_m_ht. iloc[ : , 2 ] , c= df_m_ht. iloc[ : , 2 ] , cmap= 'plasma' )
plt. colorbar( )
plt. text( 12.5 , 6 , 'sales' )
plt. xlim( - 1 , 12 )
plt. ylim( - 0.5 , 5.5 )
plt. title( 'Average Sales:Month vs holiday type ' )
plt. show( )
df_y_m_ht= df_train1. groupby( [ 'year' , 'month' , 'holiday_type' ] ) . agg( { 'sales' : 'mean' } ) . reset_index( )
df_y_m_ht[ 'sales' ] = round ( df_y_m_ht[ 'sales' ] , 2 )
df_y_m_ht[ 'month' ] = df_y_m_ht[ 'month' ] . apply ( lambda x: calendar. month_abbr[ x] )
df_y_m_ht. head( )
year month holiday_type sales 0 2013 Jan Holiday 1.41 1 2013 Jan Work Day 247.08 2 2013 Feb Holiday 164.82 3 2013 Mar Holiday 307.44 4 2013 Apr Holiday 228.52
96 rows × 4 columns
month与holiday绘总不同年份
a= df_y_m_ht[ 'year' ] == 2013
b= df_y_m_ht[ 'year' ] == 2014
c= df_y_m_ht[ 'year' ] == 2015
d= df_y_m_ht[ 'year' ] == 2016
e= df_y_m_ht[ 'year' ] == 2017
fig, ax= plt. subplots( 5 , 1 , figsize= ( 24 , 20 ) )
ax[ 0 ] . scatter( df_y_m_ht. loc[ a, 'month' ] , df_y_m_ht. loc[ a, 'holiday_type' ] , df_y_m_ht. loc[ a, 'sales' ] , c= df_y_m_ht. loc[ a, 'sales' ] , cmap= 'plasma' )
ax[ 0 ] . text( 12 , 0.01 , 'year=2013' , size= 20 , rotation= '270' )
ax[ 0 ] . set_xticks( [ ] )
ax[ 1 ] . scatter( df_y_m_ht. loc[ b, 'month' ] , df_y_m_ht. loc[ b, 'holiday_type' ] , df_y_m_ht. loc[ b, 'sales' ] , c= df_y_m_ht. loc[ b, 'sales' ] , cmap= 'plasma' )
ax[ 1 ] . text( 12 , 0.01 , 'year=2014' , size= 20 , rotation= '270' )
ax[ 1 ] . set_xticks( [ ] )
ax[ 2 ] . scatter( df_y_m_ht. loc[ c, 'month' ] , df_y_m_ht. loc[ c, 'holiday_type' ] , df_y_m_ht. loc[ c, 'sales' ] , c= df_y_m_ht. loc[ c, 'sales' ] , cmap= 'plasma' )
ax[ 2 ] . text( 12 , 0.01 , 'year=2015' , size= 20 , rotation= '270' )
ax[ 2 ] . set_xticks( [ ] )
ax[ 3 ] . scatter( df_y_m_ht. loc[ d, 'month' ] , df_y_m_ht. loc[ d, 'holiday_type' ] , df_y_m_ht. loc[ d, 'sales' ] , c= df_y_m_ht. loc[ d, 'sales' ] , cmap= 'plasma' )
ax[ 3 ] . text( 12 , 0.01 , 'year=2016' , size= 20 , rotation= '270' )
ax[ 3 ] . set_xticks( [ ] )
ax[ 4 ] . scatter( df_y_m_ht. loc[ e, 'month' ] , df_y_m_ht. loc[ e, 'holiday_type' ] , df_y_m_ht. loc[ e, 'sales' ] , c= df_y_m_ht. loc[ e, 'sales' ] , cmap= 'plasma' )
ax[ 4 ] . text( 11.5 , 0.01 , 'year=2017' , size= 20 , rotation= '270' )
ax[ 4 ] . set_xticks( [ 'Jan' , 'Feb' , 'Mar' , 'Apr' , 'May' , 'Jun' , 'Jul' , 'Aug' , 'Oct' , 'Sep' , 'Nve' , 'Dec' ] )
for i in range ( 5 ) :
ax[ i] . spines[ 'top' ] . set_visible( False )
ax[ i] . spines[ 'right' ] . set_visible( False )
ax[ i] . spines[ 'bottom' ] . set_visible( False )
ax[ i] . spines[ 'left' ] . set_visible( False )
ax[ i] . set_ylim( [ - 0.5 , 6 ] )
ax[ i] . set_xlim( [ - 0.5 , 12 ] )
ax[ i] . tick_params( axis= 'both' , which= 'major' , labelsize= 25 )
fig. colorbar( ax[ 0 ] . scatter( df_y_m_ht. loc[ a, 'month' ] , df_y_m_ht. loc[ a, 'holiday_type' ] , df_y_m_ht. loc[ a, 'sales' ] , c= df_y_m_ht. loc[ a, 'sales' ] , cmap= 'plasma' ) , ax= [ ax[ 0 ] , ax[ 1 ] , ax[ 2 ] , ax[ 3 ] , ax[ 4 ] ] , shrink= 0.9 )
plt. text( 16 , 32 , 'sales' , size= 20 )
plt. show( )