import re import requests import pandas as pd import json import re import requests headers={'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36', 'referer': 'https://www.taobao.com/', 'cookie':'t=************; cna=*******; thw=cn; ali_ab=*********; tg=0; hng=*****; _cc_=*****; uc3=id2=&nk2=&lg2=; tracknick=; enc=*****; cookie2=*****; _tb_token_=*****; x=*****; uc1=cookie14=*****;lng=zh_CN; skt=***; csg=***; v=0; alitrackid=www.taobao.com; lastalitrackid=www.taobao.com; _m_h5_tk=*****; _m_h5_tk_enc=*****; JSESSIONID=*****; l=*****; isg=*****' } url_1='https://s.taobao.com/search?q=%E6%89%8B%E6%9C%BA&imgfile=&js=1&stats_click=search_radio_all%3A1&initiative_id=staobaoz_20200831&ie=utf8' res=requests.get(url_1,headers=headers,timeout=10) res.text data=re.findall('g_page_config =(.*?)g_srp_loadCss.*',res.text,re.S) price=[] name=[] address=[] count=[] title=[] id_=[] detail_url=[] sale=[] for i in range(10): url=url_1.format(i) res=requests.get(url,headers=headers,timeout=10) data=re.findall('g_page_config =(.*?)g_srp_loadCss',res.text,re.S) cont=json.loads(data[0].strip()[:-1]) for item in cont['mods']['itemlist']['data']['auctions']: name.append(item['nick']) print(item['nick']) print('第'+str(i)+'页爬取完成') result={'店铺名称':name} results=pd.DataFrame(result)
python利用cookie爬淘宝数据
最新推荐文章于 2023-12-30 12:07:45 发布