淘宝(正则)

import requests
import re
import pandas as pd
import csv
import time


headers = {
 'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.81 Mobile Safari/537.36',
'Cookie': '_m_h5_tk=ef0513f22718f8072d0b40bff34a2e65_1636339552583; _m_h5_tk_enc=323099f2cc8c3d3691e5a49dc981462e; cna=xfplGOXnUTQCAXWsv88mNiok; _samesite_flag_=true; cookie2=13b96c3b20784d179cff7f4a20377a6b; t=cb34fdefa9ece00d42377cc813e934e5; _tb_token_=e863be0e353bb; xlly_s=1; sgcookie=E100uo0BCqdP0v5ES32UKT%2FPFA1QzkLzx0FcoWDp5objoqojrJxpDM2q06Rq32suEe1oaVPCZpBSqjkBmTi9gSpgCAZhII6fjWaTLRzr%2FCrarCM%3D; unb=2211782433192; uc3=nk2=F5RARoLmdcWFbE0%3D&id2=UUpgR1XATCjfdpiQJw%3D%3D&vt3=F8dCujuuTwIonu6TYDg%3D&lg2=V32FPkk%2Fw0dUvg%3D%3D; csg=46d85c9d; lgc=tb543843493; cancelledSubSites=empty; cookie17=UUpgR1XATCjfdpiQJw%3D%3D; dnk=tb543843493; skt=35b3d8c4327d964c; existShop=MTYzNjMzMTI5Ng%3D%3D; uc4=id4=0%40U2gqyOiDNzpzjaWmUtpOW9ILo6Ejj2Ii&nk4=0%40FY4L6o%2FDoNPourh5SH9Egrwk16rrzA%3D%3D; tracknick=tb543843493; _cc_=WqG3DMC9EA%3D%3D; _l_g_=Ug%3D%3D; sg=32f; _nk_=tb543843493; cookie1=UUGrcDCxCr61znivd%2FfrsVifpdtSuQsD8bu6ew66QPQ%3D; enc=XiJ2L3Vb6QzxjpJPrRRNNIuS2WR637S2ClZCdzQzDOxn7oX%2FbwBl1epZUknvxa%2BFFCObsbtSas4gHnjQS2F%2BnQqlJ70mmbT1BnGQyYiUzCw%3D; JSESSIONID=AC28A352F512EBAE6BDE9B872523F7C5; tfstk=cE6fBPY7PNBrHgPZ3iZr728AgWpNZYHX4oTlc1t-U14zMUsfipMeRVDfj4pwX31..; l=eBTf0AJrgXIACz12KOfwourza77OSIRAguPzaNbMiOCP_I1p59olW6C9qM89C3GVh6VeR3o2xuSvBeYBcS0H3CPM2j-la_kmn; isg=BBERToTPFKjwgXi8DFjQLa1SIBurfoXwcD98i_OmDVj3mjHsO86VwL94OG58kh0o; mt=ci=35_1; uc1=cookie21=W5iHLLyFe3xm&existShop=false&cookie15=VFC%2FuZ9ayeYq2g%3D%3D&cookie16=WqG3DMC9UpAPBHGz5QBErFxlCA%3D%3D&cookie14=Uoe3cc1WVxc3tw%3D%3D&pas=0; thw=cn; hng=CN%7Czh-CN%7CCNY%7C156',
  'Referer': 'https://s.taobao.com/',
}


data = []
for i in range(1,41):
    try:
        page_url = 'https://s.taobao.com/search?q=%E7%99%BD%E9%85%92&ie=utf8&s={}d'.format(i * 44)
        resp = requests.get(page_url, headers=headers)
        text = resp.content.decode('utf-8')
        # print(text)
        title = re.findall(r'''"raw_title":"(.*?)"''',text)
        sale = re.findall(r'''"view_sales":"(.*?)人付款"''',text)
        price = re.findall(r'''"view_price":"(.*?)"''',text)
        id = re.findall(r'''"nid":"(.*?)"''',text)
        comment = re.findall(r'''"comment_count":"(.*?)"''',text)
    # print(title)
        for j in range(len(title)):
            data.append([title[j], sale[j], comment[j], price[j], id[j]])
        time.sleep(3)
    except:
        pass

data = pd.DataFrame(data, columns=['title', 'sale', 'comment', 'price', 'id'])
data.to_csv('淘宝.csv', encoding='utf-8', index=False)
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

juicy-hua

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值