import requests
import re
import pandas as pd
import csv
import time
headers = {
'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.81 Mobile Safari/537.36',
'Cookie': '_m_h5_tk=ef0513f22718f8072d0b40bff34a2e65_1636339552583; _m_h5_tk_enc=323099f2cc8c3d3691e5a49dc981462e; cna=xfplGOXnUTQCAXWsv88mNiok; _samesite_flag_=true; cookie2=13b96c3b20784d179cff7f4a20377a6b; t=cb34fdefa9ece00d42377cc813e934e5; _tb_token_=e863be0e353bb; xlly_s=1; sgcookie=E100uo0BCqdP0v5ES32UKT%2FPFA1QzkLzx0FcoWDp5objoqojrJxpDM2q06Rq32suEe1oaVPCZpBSqjkBmTi9gSpgCAZhII6fjWaTLRzr%2FCrarCM%3D; unb=2211782433192; uc3=nk2=F5RARoLmdcWFbE0%3D&id2=UUpgR1XATCjfdpiQJw%3D%3D&vt3=F8dCujuuTwIonu6TYDg%3D&lg2=V32FPkk%2Fw0dUvg%3D%3D; csg=46d85c9d; lgc=tb543843493; cancelledSubSites=empty; cookie17=UUpgR1XATCjfdpiQJw%3D%3D; dnk=tb543843493; skt=35b3d8c4327d964c; existShop=MTYzNjMzMTI5Ng%3D%3D; uc4=id4=0%40U2gqyOiDNzpzjaWmUtpOW9ILo6Ejj2Ii&nk4=0%40FY4L6o%2FDoNPourh5SH9Egrwk16rrzA%3D%3D; tracknick=tb543843493; _cc_=WqG3DMC9EA%3D%3D; _l_g_=Ug%3D%3D; sg=32f; _nk_=tb543843493; cookie1=UUGrcDCxCr61znivd%2FfrsVifpdtSuQsD8bu6ew66QPQ%3D; enc=XiJ2L3Vb6QzxjpJPrRRNNIuS2WR637S2ClZCdzQzDOxn7oX%2FbwBl1epZUknvxa%2BFFCObsbtSas4gHnjQS2F%2BnQqlJ70mmbT1BnGQyYiUzCw%3D; JSESSIONID=AC28A352F512EBAE6BDE9B872523F7C5; tfstk=cE6fBPY7PNBrHgPZ3iZr728AgWpNZYHX4oTlc1t-U14zMUsfipMeRVDfj4pwX31..; l=eBTf0AJrgXIACz12KOfwourza77OSIRAguPzaNbMiOCP_I1p59olW6C9qM89C3GVh6VeR3o2xuSvBeYBcS0H3CPM2j-la_kmn; isg=BBERToTPFKjwgXi8DFjQLa1SIBurfoXwcD98i_OmDVj3mjHsO86VwL94OG58kh0o; mt=ci=35_1; uc1=cookie21=W5iHLLyFe3xm&existShop=false&cookie15=VFC%2FuZ9ayeYq2g%3D%3D&cookie16=WqG3DMC9UpAPBHGz5QBErFxlCA%3D%3D&cookie14=Uoe3cc1WVxc3tw%3D%3D&pas=0; thw=cn; hng=CN%7Czh-CN%7CCNY%7C156',
'Referer': 'https://s.taobao.com/',
}
data = []
for i in range(1,41):
try:
page_url = 'https://s.taobao.com/search?q=%E7%99%BD%E9%85%92&ie=utf8&s={}d'.format(i * 44)
resp = requests.get(page_url, headers=headers)
text = resp.content.decode('utf-8')
# print(text)
title = re.findall(r'''"raw_title":"(.*?)"''',text)
sale = re.findall(r'''"view_sales":"(.*?)人付款"''',text)
price = re.findall(r'''"view_price":"(.*?)"''',text)
id = re.findall(r'''"nid":"(.*?)"''',text)
comment = re.findall(r'''"comment_count":"(.*?)"''',text)
# print(title)
for j in range(len(title)):
data.append([title[j], sale[j], comment[j], price[j], id[j]])
time.sleep(3)
except:
pass
data = pd.DataFrame(data, columns=['title', 'sale', 'comment', 'price', 'id'])
data.to_csv('淘宝.csv', encoding='utf-8', index=False)
淘宝(正则)
最新推荐文章于 2022-06-28 14:45:43 发布