import requsets
import re
import csv
import time
import random
def acquireData():
headers = {
'authority': 's.taobao.com',
'cache-control': 'max-age=0',
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.150 Safari/537.36',
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'sec-fetch-site': 'same-origin',
'sec-fetch-mode': 'navigate',
'sec-fetch-user': '?1',
'sec-fetch-dest': 'document',
'referer': 'https://s.taobao.com/search?q=%E5%B9%B3%E6%9D%BF&imgfile=&commend=all&ssid=s5-e&search_type=item&sourceId=tb.index&spm=a21bo.2017.201856-taobao-item.1&ie=utf8&initiative_id=tbindexz_20170306',
'accept-language': 'zh-CN,zh;q=0.9',
'cookie': 'miid=886571451609503279; thw=cn; tracknick=%5Cu6F20%5Cu4E36%5Cu6D41%5Cu5E74; _cc_=UIHiLt3xSw%3D%3D; cna=XPY/Gcr64gICAd9oPwK6cZxh; __guid=154677242.610519076024746100.1624545374981.7412; hng=CN%7Czh-CN%7CCNY%7C156; _m_h5_tk=2c5f0fe716a02efbd2f0d1edc6bc542d_1627305813161; _m_h5_tk_enc=dd9c3694c5024570f06e00e5f4d17631; xlly_s=1; _samesite_flag_=true; cookie2=18f9e16f49510185a1b9646a8b714fe3; t=438d163c95833f6c8d65bb2403c13f3f; _tb_token_=ee55ebef1a838; sgcookie=E1004c16ExrCAD5x5j7kF8p4hlGs4f%2B1NcdPUrpCS4ycfh2OEZL0cUdvMGO8AtaHwNsybpZF4YFvqxGzT0M%2B337f%2Fw%3D%3D; unb=2747656955; uc3=nk2=p2bODe0%2FwE4%3D&vt3=F8dCuwJMTt25WuxtoX4%3D&lg2=V32FPkk%2Fw0dUvg%3D%3D&id2=UU8M9asjFAHX6A%3D%3D; csg=52a61766; lgc=%5Cu6F20%5Cu4E36%5Cu6D41%5Cu5E74; cancelledSubSites=empty; cookie17=UU8M9asjFAHX6A%3D%3D; dnk=%5Cu6F20%5Cu4E36%5Cu6D41%5Cu5E74; skt=d16ddf255b675ee3; existShop=MTYyNzI5ODMxOA%3D%3D; uc4=id4=0%40U22LO6DdRA6fglmTmrOjRlQC%2BSH7&nk4=0%40pVB%2Bf9qqQIYv2F%2BQgBgw%2FAOEuA%3D%3D; _l_g_=Ug%3D%3D; sg=%E5%B9%B454; _nk_=%5Cu6F20%5Cu4E36%5Cu6D41%5Cu5E74; cookie1=AiHKKKelDUvWRpdjtVjie2WqiG9GbZMAsBqLCOkT2H8%3D; enc=%2BhbdZ6vvp5GZYz6EoiYGDpJGNDOmzrBisa4yuSPRAlZ7ldU6FBRni09MHfDOHw%2BsIbxlchkO1d3fvAYynfTNzw%3D%3D; JSESSIONID=B53B6E4271AACD4A0697EA9F3178F9F9; monitor_count=1; mt=ci=88_1; uc1=cookie14=Uoe2yte7RKedug%3D%3D&cookie16=URm48syIJ1yk0MX2J7mAAEhTuw%3D%3D&pas=0&existShop=false&cookie21=UtASsssmeW6lpyd%2BB%2B3t&cookie15=WqG3DMC9VAQiUQ%3D%3D; tfstk=cFIPBsASVuEzkb-QW3tEdRQVBRxRZSXlKmJ6rZa7snwQEI8liRcpnuUJ3pisPUf..; l=eBPZ4LmIvZr8PwmDBOfwourza77OSIRAIuPzaNbMiOCP_85p5e_dW6TU29Y9C3GVh6jJR37vCcawBeYBq61Inxv92j-la_kmn; isg=BL-_Q5PT4wiHOtks72scEePaTpNJpBNG4JERY1GMW261YN_iWXSjlj04pjCeGeu-',
}
for page in range(100):
params = (
('q', '平板'), #对应获取商品的名称,也可以用转码:\u5E73\u677F
('imgfile', ''),
('commend', 'all'),
('ssid', 's5-e'),
('search_type', 'item'),
('sourceId', 'tb.index'),
('spm', 'a21bo.2017.201856-taobao-item.1'),
('ie', 'utf8'),
('initiative_id', 'tbindexz_20170306'),
('s',str(page*44)), #页数
)
response = requests.get('https://s.XXX.com/search', headers=headers, params=params)
shangpinming = re.findall('"raw_title":"(.*?)"', response.text)
jiage = re.findall('"view_price":"(.*?)"', response.text)
fahuodi = re.findall('"item_loc":"(.*?)"', response.text)
fukuanrenshu = re.findall('"view_sales":"(.*?)人付款"', response.text)
dianpumingcheng = re.findall('"nick":"(.*?)"', response.text)
csv_file = open('pingban_1.csv', 'a', newline='', encoding='utf-8')
writer = csv.writer(csv_file)
for i in range(44):
try:
writer.writerow([dianpumingcheng[i], shangpinming[i], jiage[i], fahuodi[i], fukuanrenshu[i]])
with open('shangpinmingcheng_1.txt', 'a', encoding='utf-8') as f:
f.write(shangpinming[i])
except:
pass
csv_file.close()
print(f"已爬取完第{page+1}页数据.......")
time.sleep(random.randint(2,4))
print(f"总共爬取{page+1}页数据.......")
if __name__ == '__main__':
acquireData()
python 爬虫
于 2021-07-26 18:54:05 首次发布