【spider】Yohobuy数据处理

对Yohobuy鞋靴类别商品的数据爬取见Yohobuy数据爬取

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

plt.rcParams['font.sans-serif'] = ['KaiTi']

df = pd.DataFrame(pd.read_csv('D:\shoes.csv',header = 0))
df.sort_values(["deal"],ascending = False)
dft10 = df[:10]
ts = pd.Series(dft10['deal'].values,index = dft10['title'])
plt.figure(figsize = (12,8))
ts.plot.bar(edgecolor='black', color='pink',fontsize=12)
plt.title('Yohobuy 价格最贵的鞋靴top10',size=15)
plt.xlabel('鞋靴型号',size=15)
plt.ylabel('商品标价',size=15)
plt.savefig('Yohobuy 价格最贵的鞋靴top10.png')

在这里插入图片描述

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

plt.rcParams['font.sans-serif'] = ['KaiTi']

df = pd.DataFrame(pd.read_csv('D:\shoes.csv',header = 0))
df_converse = df[df['brand']=='converse']
df_VANS = df[df['brand']=='VANS']
df_PUMA = df[df['brand']=='PUMA']
df_adidas = df[df['brand']=='adidas Originals']
df_NBA = df[df['brand']=='NBA']
df_Nike = df[df['brand']=='Nike']
df_UGG = df[df['brand']=='UGG']
df_Reebok = df[df['brand']=='Reebok']
ts0 = pd.Series(df['deal'].values)
ts1 = pd.Series(df_converse['deal'].values)
ts2 = pd.Series(df_VANS['deal'].values)
ts3 = pd.Series(df_PUMA['deal'].values)
ts4 = pd.Series(df_adidas['deal'].values)
ts5 = pd.Series(df_NBA['deal'].values)
ts6 = pd.Series(df_Nike['deal'].values)
ts7 = pd.Series(df_UGG['deal'].values)
ts8 = pd.Series(df_Reebok['deal'].values)
plt.figure(figsize = (12,8))
ts0.plot.kde(legend=True,label='全品类',xlim=(0,2500),ylim=(0,0.01),fontsize=15)
ts1.plot.kde(legend=True,label='converse')
ts2.plot.kde(legend=True,label='VANS')
ts3.plot.kde(legend=True,label='PUMA')
ts4.plot.kde(legend=True,label='adidas Originals')
ts5.plot.kde(legend=True,label='NBA')
ts6.plot.kde(legend=True,label='Nike')
ts7.plot.kde(legend=True,label='UGG')
ts8.plot.kde(legend=True,label='Reebok')
plt.legend(prop={'size':15})
plt.title('主流品牌价格区间kde比对图',size=15)
plt.xlabel('鞋靴价格',size=15)
plt.ylabel('density',size=15)
plt.savefig('主流品牌价格区间kde比对图.png')

在这里插入图片描述

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math

plt.rcParams['font.sans-serif'] = ['KaiTi']

df = pd.DataFrame(pd.read_csv('D:\shoes.csv',header = 0))
counts = (df['deal']).value_counts()
counts1 = counts[:20].sort_values(ascending = True)
print(counts1)
plt.figure(figsize = (12,8))
counts1.plot.barh(edgecolor='#E6E6E6', color='#EE6666',fontsize=15)
plt.xlabel('商品数量',size=15)
plt.ylabel('商品标价',size=15)
plt.title('Yohobuy 商家最爱给出的商品标价top20',size=15)
plt.savefig('Yohobuy 商家最爱给出的商品标价top20.png')
1280     53
539      55
1299     56
680      57
469      65
1190     65
569      65
790      75
339      81
1199     97
299      98
369     105
899     115
999     116
1099    123
499     136
699     166
399     179
799     182
599     183
Name: deal, dtype: int64

在这里插入图片描述

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

plt.rcParams['font.sans-serif'] = ['KaiTi']

df = pd.DataFrame(pd.read_csv('D:\shoes.csv',header = 0))
counts = df['brand'].value_counts()
plt.figure(figsize = (12,8))
counts.T.plot.pie(fontsize=12)
plt.xlabel('品牌名称',size=15)
plt.title('Yohobuy 鞋靴品牌商品数量比例图',size=15)
plt.savefig('Yohobuy 鞋靴品牌商品数量比例图.png')

在这里插入图片描述

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值