Python按销量爬取淘宝医用口罩前三页导入Excel并生成柱状图

搞session搞不来,只能是假登录获取cookie来登入商品界面,现在好像都是这种反爬虫机制了....无语子

具体关于得到head头的cookie参考了一下(写的挺棒的):https://blog.csdn.net/Guanhai1617/article/details/104120581?utm_medium=distribute.pc_relevant.none-task-blog-BlogCommendFromMachineLearnPai2-2.nonecase&depth_1-utm_source=distribute.pc_relevant.none-task-blog-BlogCommendFromMachineLearnPai2-2.nonecase

 代码:

import re
import requests
import openpyxl
import os
from openpyxl.chart import BarChart, Series, Reference
file_path=os.path.join(os.getcwd(),'医用口罩表.xlsx')

ex_file=openpyxl.Workbook()
sheet_1=ex_file.active
sheet_1.title='口罩供应商'
sheet_1['A1']='商品名'
sheet_1['B1']='价格'

head = {
    'authority': 's.taobao.com',
    'cache-control': 'max-age=0',
    'upgrade-insecure-requests': '1',
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36 Edg/83.0.478.61',
    'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
    'sec-fetch-site': 'same-origin',
    'sec-fetch-mode': 'navigate',
    'sec-fetch-user': '?1',
    'sec-fetch-dest': 'document',
    'referer': 'https://www.taobao.com/',
    'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
    'cookie': 'cna=ZgeSF8aFQVACAbYjez4Ty5y9; t=8d85d6b62500f814ed50db2334f0add9; lgc=tb273784045; tracknick=tb273784045; mt=ci=3_1; thw=cn; sgcookie=EbIswQASovHSpBjnzvBGi; uc3=lg2=VT5L2FSpMGV7TQ%3D%3D&vt3=F8dBxGPqBv6pkowrnic%3D&nk2=F5RHpr9uzAWJicw%3D&id2=Vy0SO4B6bdNywg%3D%3D; uc4=id4=0%40VXqco%2FSSqiyYs5UXCwvMGyfEQi9m&nk4=0%40FY4MtL6QaaEQPaMnL1B9ECPNtmc0WA%3D%3D; _cc_=Vq8l%2BKCLiw%3D%3D; enc=kfakK3WSKs%2B1oSznMN9NCKKDpwt7vBhces3UcPxcrFhj6ZDUq2x1u%2FIT%2F6%2BcRKh3jFeYoL0mdgMzHZ6jXpCXEg%3D%3D; tfstk=ceGNBm4bbCdwBCFXe5NV1fIrqH9OaoE0V6zzIYU9uD4R4Cyg4sfcDyYCDyzVvDeG.; hng=CN%7Czh-CN%7CCNY%7C156; v=0; uc1=cookie14=UoTV6OdKEo%2BuDg%3D%3D; cookie2=178023102a4627e1de09dd51e785519f; _tb_token_=70e0e3fe7b613; alitrackid=www.taobao.com; lastalitrackid=www.taobao.com; l=eBLoDB0eOg5ISLiSBOfaourza779LIRbouPzaNbMiOCPOvfH5SJhWZlGuqLMCnGVnsieR3l2C1l6B0YLRyzHh2nk8b8CgsDLVdTh.; isg=BOHh3pSHGrtWD7bPqgMryXls8K37jlWAANmQckO2j-hHqgB8i969UZToCN4sZO24; JSESSIONID=0C30007EE8F7A0FA543EA9BA8B3A0D88',
}
infos=[]
url='https://s.taobao.com/search?initiative_id=tbindexz_20170306&ie=utf8&spm=a21bo.2017.201856-taobao-item.2&sourceId=tb.index&search_type=item&ssid=s5-e&commend=all&imgfile=&q=%E5%8C%BB%E7%94%A8%E5%8F%A3%E7%BD%A9&suggest=history_1&_input_charset=utf-8&wq=&suggest_query=&source=suggest&sort=sale-desc&bcoffset=0&p4ppushleft=%2C44&s='
for i in range(0,3):
    url_new=url+str(i*44)  #读三页
    r=requests.get(url_new,headers=head)
    if r.status_code==200:
        r.encoding=r.apparent_encoding
        title=re.findall(r'\"raw_title\":\"(.*?)\"',r.text)
        price=re.findall(r'\"view_price\":\"(.*?)\"',r.text)
        for j in range(len(price)):
            infos.append([title[j],eval(price[j])])  #保存商品名和价格

goods=[]
for i in range(0,len(infos)):
    goods.append(infos[i][0])
    goods.append(infos[i][1])

end_num='B'+str(len(goods)//2+1)
i=0
for line in sheet_1['A2':end_num]:
    for one_cell in line:
        sheet_1[one_cell.coordinate]=goods[i]   #表单添加数据
        i+=1

ex_file.save(file_path) #保存并创建文件到指定路径

ex_file=openpyxl.load_workbook('医用口罩表.xlsx',data_only=True)  #重新打开文件
sheet_1=ex_file.active
sheet_1.column_dimensions['A'].width=70     #设置列宽
data=openpyxl.chart.Reference(sheet_1,min_row=1,min_col=2,max_row=len(goods)//2+1)  #读取数据
titles=openpyxl.chart.Reference(sheet_1,min_row=2,min_col=1,max_row=len(goods)//2+1)  #读取纵轴
chart=openpyxl.chart.BarChart3D()
chart.title='医用口罩比较'
chart.add_data(data=data,titles_from_data=True)  #导入数据
chart.set_categories(titles)  #绑定纵轴
chart.height=15
chart.width=100

sheet_1.add_chart(chart,'D2')  #设置位置
ex_file.save('医用口罩表.xlsx')

生成

打开它

先看数据区:(爬了三页,效果不错

柱状图就在旁边太大了勉强看看吧:

爬虫到这里就结束了,简单入了个门,不搞了,专心搞点其它喜欢的。

 

 

  • 1
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值