python 爬虫

import requsets
import re
import csv
import time
import random

def acquireData():
    headers = {
       'authority': 's.taobao.com',
       'cache-control': 'max-age=0',
       'upgrade-insecure-requests': '1',
       'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.150 Safari/537.36',
       'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
       'sec-fetch-site': 'same-origin',
       'sec-fetch-mode': 'navigate',
       'sec-fetch-user': '?1',
       'sec-fetch-dest': 'document',
       'referer': 'https://s.taobao.com/search?q=%E5%B9%B3%E6%9D%BF&imgfile=&commend=all&ssid=s5-e&search_type=item&sourceId=tb.index&spm=a21bo.2017.201856-taobao-item.1&ie=utf8&initiative_id=tbindexz_20170306',
       'accept-language': 'zh-CN,zh;q=0.9',
       'cookie': 'miid=886571451609503279; thw=cn; tracknick=%5Cu6F20%5Cu4E36%5Cu6D41%5Cu5E74; _cc_=UIHiLt3xSw%3D%3D; cna=XPY/Gcr64gICAd9oPwK6cZxh; __guid=154677242.610519076024746100.1624545374981.7412; hng=CN%7Czh-CN%7CCNY%7C156; _m_h5_tk=2c5f0fe716a02efbd2f0d1edc6bc542d_1627305813161; _m_h5_tk_enc=dd9c3694c5024570f06e00e5f4d17631; xlly_s=1; _samesite_flag_=true; cookie2=18f9e16f49510185a1b9646a8b714fe3; t=438d163c95833f6c8d65bb2403c13f3f; _tb_token_=ee55ebef1a838; sgcookie=E1004c16ExrCAD5x5j7kF8p4hlGs4f%2B1NcdPUrpCS4ycfh2OEZL0cUdvMGO8AtaHwNsybpZF4YFvqxGzT0M%2B337f%2Fw%3D%3D; unb=2747656955; uc3=nk2=p2bODe0%2FwE4%3D&vt3=F8dCuwJMTt25WuxtoX4%3D&lg2=V32FPkk%2Fw0dUvg%3D%3D&id2=UU8M9asjFAHX6A%3D%3D; csg=52a61766; lgc=%5Cu6F20%5Cu4E36%5Cu6D41%5Cu5E74; cancelledSubSites=empty; cookie17=UU8M9asjFAHX6A%3D%3D; dnk=%5Cu6F20%5Cu4E36%5Cu6D41%5Cu5E74; skt=d16ddf255b675ee3; existShop=MTYyNzI5ODMxOA%3D%3D; uc4=id4=0%40U22LO6DdRA6fglmTmrOjRlQC%2BSH7&nk4=0%40pVB%2Bf9qqQIYv2F%2BQgBgw%2FAOEuA%3D%3D; _l_g_=Ug%3D%3D; sg=%E5%B9%B454; _nk_=%5Cu6F20%5Cu4E36%5Cu6D41%5Cu5E74; cookie1=AiHKKKelDUvWRpdjtVjie2WqiG9GbZMAsBqLCOkT2H8%3D; enc=%2BhbdZ6vvp5GZYz6EoiYGDpJGNDOmzrBisa4yuSPRAlZ7ldU6FBRni09MHfDOHw%2BsIbxlchkO1d3fvAYynfTNzw%3D%3D; JSESSIONID=B53B6E4271AACD4A0697EA9F3178F9F9; monitor_count=1; mt=ci=88_1; uc1=cookie14=Uoe2yte7RKedug%3D%3D&cookie16=URm48syIJ1yk0MX2J7mAAEhTuw%3D%3D&pas=0&existShop=false&cookie21=UtASsssmeW6lpyd%2BB%2B3t&cookie15=WqG3DMC9VAQiUQ%3D%3D; tfstk=cFIPBsASVuEzkb-QW3tEdRQVBRxRZSXlKmJ6rZa7snwQEI8liRcpnuUJ3pisPUf..; l=eBPZ4LmIvZr8PwmDBOfwourza77OSIRAIuPzaNbMiOCP_85p5e_dW6TU29Y9C3GVh6jJR37vCcawBeYBq61Inxv92j-la_kmn; isg=BL-_Q5PT4wiHOtks72scEePaTpNJpBNG4JERY1GMW261YN_iWXSjlj04pjCeGeu-',
    }
    for page in range(100):
       params = (
           ('q', '平板'),  #对应获取商品的名称,也可以用转码:\u5E73\u677F
           ('imgfile', ''),
           ('commend', 'all'),
           ('ssid', 's5-e'),
           ('search_type', 'item'),
           ('sourceId', 'tb.index'),
           ('spm', 'a21bo.2017.201856-taobao-item.1'),
           ('ie', 'utf8'),
           ('initiative_id', 'tbindexz_20170306'),
           ('s',str(page*44)), #页数
       )
       response = requests.get('https://s.XXX.com/search', headers=headers, params=params)

       shangpinming = re.findall('"raw_title":"(.*?)"', response.text)
       jiage = re.findall('"view_price":"(.*?)"', response.text)
       fahuodi = re.findall('"item_loc":"(.*?)"', response.text)
       fukuanrenshu = re.findall('"view_sales":"(.*?)人付款"', response.text)
       dianpumingcheng = re.findall('"nick":"(.*?)"', response.text)

       csv_file = open('pingban_1.csv', 'a', newline='', encoding='utf-8')
       writer = csv.writer(csv_file)
       for i in range(44):
           try:
               writer.writerow([dianpumingcheng[i], shangpinming[i], jiage[i], fahuodi[i], fukuanrenshu[i]])
               with open('shangpinmingcheng_1.txt', 'a', encoding='utf-8') as f:
                    f.write(shangpinming[i])
           except:
                pass
       csv_file.close()
       print(f"已爬取完第{page+1}页数据.......")
       time.sleep(random.randint(2,4))
    print(f"总共爬取{page+1}页数据.......")

if __name__ == '__main__':
    acquireData()

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值