搞session搞不来,只能是假登录获取cookie来登入商品界面,现在好像都是这种反爬虫机制了....无语子
具体关于得到head头的cookie参考了一下(写的挺棒的):https://blog.csdn.net/Guanhai1617/article/details/104120581?utm_medium=distribute.pc_relevant.none-task-blog-BlogCommendFromMachineLearnPai2-2.nonecase&depth_1-utm_source=distribute.pc_relevant.none-task-blog-BlogCommendFromMachineLearnPai2-2.nonecase
代码:
import re import requests import openpyxl import os from openpyxl.chart import BarChart, Series, Reference file_path=os.path.join(os.getcwd(),'医用口罩表.xlsx') ex_file=openpyxl.Workbook() sheet_1=ex_file.active sheet_1.title='口罩供应商' sheet_1['A1']='商品名' sheet_1['B1']='价格' head = { 'authority': 's.taobao.com', 'cache-control': 'max-age=0', 'upgrade-insecure-requests': '1', 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36 Edg/83.0.478.61', 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9', 'sec-fetch-site': 'same-origin', 'sec-fetch-mode': 'navigate', 'sec-fetch-user': '?1', 'sec-fetch-dest': 'document', 'referer': 'https://www.taobao.com/', 'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6', 'cookie': 'cna=ZgeSF8aFQVACAbYjez4Ty5y9; t=8d85d6b62500f814ed50db2334f0add9; lgc=tb273784045; tracknick=tb273784045; mt=ci=3_1; thw=cn; sgcookie=EbIswQASovHSpBjnzvBGi; uc3=lg2=VT5L2FSpMGV7TQ%3D%3D&vt3=F8dBxGPqBv6pkowrnic%3D&nk2=F5RHpr9uzAWJicw%3D&id2=Vy0SO4B6bdNywg%3D%3D; uc4=id4=0%40VXqco%2FSSqiyYs5UXCwvMGyfEQi9m&nk4=0%40FY4MtL6QaaEQPaMnL1B9ECPNtmc0WA%3D%3D; _cc_=Vq8l%2BKCLiw%3D%3D; enc=kfakK3WSKs%2B1oSznMN9NCKKDpwt7vBhces3UcPxcrFhj6ZDUq2x1u%2FIT%2F6%2BcRKh3jFeYoL0mdgMzHZ6jXpCXEg%3D%3D; tfstk=ceGNBm4bbCdwBCFXe5NV1fIrqH9OaoE0V6zzIYU9uD4R4Cyg4sfcDyYCDyzVvDeG.; hng=CN%7Czh-CN%7CCNY%7C156; v=0; uc1=cookie14=UoTV6OdKEo%2BuDg%3D%3D; cookie2=178023102a4627e1de09dd51e785519f; _tb_token_=70e0e3fe7b613; alitrackid=www.taobao.com; lastalitrackid=www.taobao.com; l=eBLoDB0eOg5ISLiSBOfaourza779LIRbouPzaNbMiOCPOvfH5SJhWZlGuqLMCnGVnsieR3l2C1l6B0YLRyzHh2nk8b8CgsDLVdTh.; isg=BOHh3pSHGrtWD7bPqgMryXls8K37jlWAANmQckO2j-hHqgB8i969UZToCN4sZO24; JSESSIONID=0C30007EE8F7A0FA543EA9BA8B3A0D88', } infos=[] url='https://s.taobao.com/search?initiative_id=tbindexz_20170306&ie=utf8&spm=a21bo.2017.201856-taobao-item.2&sourceId=tb.index&search_type=item&ssid=s5-e&commend=all&imgfile=&q=%E5%8C%BB%E7%94%A8%E5%8F%A3%E7%BD%A9&suggest=history_1&_input_charset=utf-8&wq=&suggest_query=&source=suggest&sort=sale-desc&bcoffset=0&p4ppushleft=%2C44&s=' for i in range(0,3): url_new=url+str(i*44) #读三页 r=requests.get(url_new,headers=head) if r.status_code==200: r.encoding=r.apparent_encoding title=re.findall(r'\"raw_title\":\"(.*?)\"',r.text) price=re.findall(r'\"view_price\":\"(.*?)\"',r.text) for j in range(len(price)): infos.append([title[j],eval(price[j])]) #保存商品名和价格 goods=[] for i in range(0,len(infos)): goods.append(infos[i][0]) goods.append(infos[i][1]) end_num='B'+str(len(goods)//2+1) i=0 for line in sheet_1['A2':end_num]: for one_cell in line: sheet_1[one_cell.coordinate]=goods[i] #表单添加数据 i+=1 ex_file.save(file_path) #保存并创建文件到指定路径 ex_file=openpyxl.load_workbook('医用口罩表.xlsx',data_only=True) #重新打开文件 sheet_1=ex_file.active sheet_1.column_dimensions['A'].width=70 #设置列宽 data=openpyxl.chart.Reference(sheet_1,min_row=1,min_col=2,max_row=len(goods)//2+1) #读取数据 titles=openpyxl.chart.Reference(sheet_1,min_row=2,min_col=1,max_row=len(goods)//2+1) #读取纵轴 chart=openpyxl.chart.BarChart3D() chart.title='医用口罩比较' chart.add_data(data=data,titles_from_data=True) #导入数据 chart.set_categories(titles) #绑定纵轴 chart.height=15 chart.width=100 sheet_1.add_chart(chart,'D2') #设置位置 ex_file.save('医用口罩表.xlsx')
生成
打开它
先看数据区:(爬了三页,效果不错
柱状图就在旁边太大了勉强看看吧:
爬虫到这里就结束了,简单入了个门,不搞了,专心搞点其它喜欢的。