淘宝手动添加cookies爬取产品,后续添加自动登录模块测试

import re
import time
from urllib.parse import urlencode
import requests
#手动复制浏览器内的cookies
cookie = "登录后的cookies"
cookie = dict([i.split('=',1) for i in cookie.split(";")])
class TaoBao():
    def __init__(self,query):
        self.headers ={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36"}
        self.cookies = cookie
        self.base_url = "https://s.taobao.com/search?"
        self.query = query
    def run(self):
        url_list = self.get_url()
        for url in url_list:
            print(url)
            res = self.get_message(url)
            self.get_parse(res)

    def get_parse(self,res):
        pattern = r'"raw_title":"(.*?)","pic_url":".*?","detail_url":"(.*?)","view_price":"(.*?)","view_fee":".*?","item_loc":"(.*?)","view_sales":"(.*?)","comment_count":"(.*?)","user_id":".*?","nick":"(.*?)","shopcard"'
        s = re.compile(pattern,re.S)
        # ('Apple/苹果 iPhone XS Max XS 手机全新港版美版日版韩版有锁分期', '//item.taobao.com/item.htm?id\\u003d577133467672\\u0026ns\\u003d1\\u0026abbucket\\u003d1#detail', '4399.00', '广东 深圳', '752人付款', '5508', 'wh80008')
        detail_message_list = s.findall(res.content.decode())
        for detail_message in detail_message_list:
            item = {}
            item['raw_title']=detail_message[0]
            item['detail_url']=detail_message[1]
            item['view_price']=detail_message[2]
            item['item_loc']=detail_message[3]
            item['view_sales']=detail_message[4]
            item['comment_count']=detail_message[5]
            item['nick']=detail_message[6]
            print(item)
    def get_message(self,url):
        res = requests.get(url,headers=self.headers,cookies=self.cookies)
        return res

    def get_url(self):
        for i in range(2):
            time.sleep(3)
            params = {
                "q": self.query,
                "bcoffset": "4",
                "p4ppushleft": "%2C48",
                "ntoffset": "4",
                "s": str(48 * i),
            }
            url = self.base_url + urlencode(params)
            yield url
            print("第", i+1, "下载结束")
#需要查询的产品的名字
taobao = TaoBao("电脑")
taobao.run()

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值