对Yohobuy鞋靴类别商品的数据爬取见Yohobuy数据爬取
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['KaiTi']
df = pd.DataFrame(pd.read_csv('D:\shoes.csv',header = 0))
df.sort_values(["deal"],ascending = False)
dft10 = df[:10]
ts = pd.Series(dft10['deal'].values,index = dft10['title'])
plt.figure(figsize = (12,8))
ts.plot.bar(edgecolor='black', color='pink',fontsize=12)
plt.title('Yohobuy 价格最贵的鞋靴top10',size=15)
plt.xlabel('鞋靴型号',size=15)
plt.ylabel('商品标价',size=15)
plt.savefig('Yohobuy 价格最贵的鞋靴top10.png')
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['KaiTi']
df = pd.DataFrame(pd.read_csv('D:\shoes.csv',header = 0))
df_converse = df[df['brand']=='converse']
df_VANS = df[df['brand']=='VANS']
df_PUMA = df[df['brand']=='PUMA']
df_adidas = df[df['brand']=='adidas Originals']
df_NBA = df[df['brand']=='NBA']
df_Nike = df[df['brand']=='Nike']
df_UGG = df[df['brand']=='UGG']
df_Reebok = df[df['brand']=='Reebok']
ts0 = pd.Series(df['deal'].values)
ts1 = pd.Series(df_converse['deal'].values)
ts2 = pd.Series(df_VANS['deal'].values)
ts3 = pd.Series(df_PUMA['deal'].values)
ts4 = pd.Series(df_adidas['deal'].values)
ts5 = pd.Series(df_NBA['deal'].values)
ts6 = pd.Series(df_Nike['deal'].values)
ts7 = pd.Series(df_UGG['deal'].values)
ts8 = pd.Series(df_Reebok['deal'].values)
plt.figure(figsize = (12,8))
ts0.plot.kde(legend=True,label='全品类',xlim=(0,2500),ylim=(0,0.01),fontsize=15)
ts1.plot.kde(legend=True,label='converse')
ts2.plot.kde(legend=True,label='VANS')
ts3.plot.kde(legend=True,label='PUMA')
ts4.plot.kde(legend=True,label='adidas Originals')
ts5.plot.kde(legend=True,label='NBA')
ts6.plot.kde(legend=True,label='Nike')
ts7.plot.kde(legend=True,label='UGG')
ts8.plot.kde(legend=True,label='Reebok')
plt.legend(prop={'size':15})
plt.title('主流品牌价格区间kde比对图',size=15)
plt.xlabel('鞋靴价格',size=15)
plt.ylabel('density',size=15)
plt.savefig('主流品牌价格区间kde比对图.png')
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math
plt.rcParams['font.sans-serif'] = ['KaiTi']
df = pd.DataFrame(pd.read_csv('D:\shoes.csv',header = 0))
counts = (df['deal']).value_counts()
counts1 = counts[:20].sort_values(ascending = True)
print(counts1)
plt.figure(figsize = (12,8))
counts1.plot.barh(edgecolor='#E6E6E6', color='#EE6666',fontsize=15)
plt.xlabel('商品数量',size=15)
plt.ylabel('商品标价',size=15)
plt.title('Yohobuy 商家最爱给出的商品标价top20',size=15)
plt.savefig('Yohobuy 商家最爱给出的商品标价top20.png')
1280 53
539 55
1299 56
680 57
469 65
1190 65
569 65
790 75
339 81
1199 97
299 98
369 105
899 115
999 116
1099 123
499 136
699 166
399 179
799 182
599 183
Name: deal, dtype: int64
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['KaiTi']
df = pd.DataFrame(pd.read_csv('D:\shoes.csv',header = 0))
counts = df['brand'].value_counts()
plt.figure(figsize = (12,8))
counts.T.plot.pie(fontsize=12)
plt.xlabel('品牌名称',size=15)
plt.title('Yohobuy 鞋靴品牌商品数量比例图',size=15)
plt.savefig('Yohobuy 鞋靴品牌商品数量比例图.png')