# read the dataset
data_BM = pd.read_csv("bigmart_data.csv")#drop the null data
data_BM = data_BM.dropna(how ='any')# view the top results#data_BM.head()
4.chart
4.1 Line chart
price_by_item = data_BM.groupby("Item_Type").Item_MRP.mean()[:10]#将数据转化为列表
x = price_by_item.index.tolist()
y = price_by_item.index.tolist()#set figure size
plt.figure(figsize =(14,8))#set title
plt.title('Mean price for each item type')#set axis label
plt.xlabel('Item Type')
plt.ylabel('Mean Type')#set xticks
plt.xticks(np.arange(len(x)),x)#plot
plt.plot(x,y)#out<<
4.2 bar chart
# sales by out size
sales_by_outlet_size = data_BM.groupby('Outlet_Size').Item_Outlet_Sales.mean()[:10]#sort by sales
sales_by_outlet_size.sort_values(inplace =True)
x = sales_by_outlet_size.index.tolist()
y = sales_by_outlet_size.values.tolist()#set labels
plt.xlabel('Outlet_Size')
plt.ylabel('Sales')#set ticks
plt.xticks(np.arange(len(x)),x)
plt.bar(x,y,color =['red','orange','magenta'])#out<<
data = data_BM['Item_Outlet_Sales']#create outlier point shape
read_diamond =dict(markerfacecolor ='r',maker ='D')#set title
plt.title('Item Sales distribution')#make the boxplot
plt.boxplot(data.values,labels =['Item Sales'],flierprops = red_diamond);#out<<
data = data_BM[['Item_Weight','Item_MRP']]#create outlier point shape
red_diamond =dict(markerfacecolor ='r',marker ='D')
fig,ax = plt.subplots()#make the boxplots
plt.bocplot(data.values,labels =['Item Sales','Item MRP (price)'],flierprops = red_diamond);#out<<
4.5 violin plots
data = data_BM[['Item_Weight','Item_MRP']]#generate subplots
fig,ax = plt.subplots()#add labels to x axis
plt.xticks([1,2],['Item_Weight','Item MRP'])#make the violinplot
plt.violinplot(data.values);#out<<
4.6 scatter plot
#set label of axes
plt.xlabel('Item_weight')
plt.ylabel('Item_visibility')#plot
plt.scatter(data["Item_Weight"][:200],data_BM["Item_Visibility"][:200])#out<<
4.7 bubble plot
# set label of axes
plt.xlabel('Item_MRP')
plt.ylabel('Item_Outlet_Sales')# set title
plt.title('Item Outlet Sales vs Item MRP (price)')# plot#注意与上图的区别
plt.scatter(data_BM["Item_MRP"][:100], data_BM["Item_Outlet_Sales"][:100], s=data_BM["Item_Visibility"][:100]*1000, c='red')#out<<