天猫商品评论获取

代码很简单,朋友需要就简单写了一下
只针对天猫,其他没有测试

# -*- coding: utf-8 -*-
"""
-------------------------------------------------
    File Name   :   Tianmao.py
    date        :   19-1-23
    Author      :    Hebel
-------------------------------------------------
    Description:
    note: 
-------------------------------------------------
"""
import requests
import re
import json
from urllib import parse


def collect_tianmao_goods_comments(goods_url ,cookies, fied_page_number=0):
    """
    采集天猫商品评论, 没有指定翻页默认第一页。
    :param goods_url:
    :param cookies:
    :param fied_page_number: 指定翻页页数,不指定会获取所有页
    :return:
    """

    assert isinstance(cookies, dict)
    if cookies.get("x5sec", None) is None:
        raise ValueError("无效的cookies,缺失 x5sec 值")
    parseResult = parse.urlparse(goods_url)
    param_dict = parse.parse_qs(parseResult.query)
    id  = param_dict.get("id")[0] if param_dict.get("id") else None     #商品id
    user_id  = param_dict.get("user_id")[0] if param_dict.get("user_id") else "725677994"   # 卖家用户id ,725677994是天猫超市的user_id
    assert id and user_id
    url = "https://rate.tmall.com/list_detail_rate.htm?itemId={id}&spuId=385196899&sellerId={user_id}&order=3&currentPage={page_number}"
    start_url = url.format(id=id, user_id=user_id, page_number=1)
    headers = {'accept': "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
                'accept-encoding': "gzip, deflate, br",
                'accept-language': "zh-CN,zh;q=0.9",
                'user-agent': "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) snap Chromium/71.0.3578.98 Chrome/71.0.3578.98 Safari/537.36"}
    print(start_url)
    resp = requests.get(url=start_url, headers=headers, cookies=cookies, timeout=6)
    if not resp.status_code is 200:
        raise IOError("请求失败")
    html = resp.content.decode("utf8")
    str_json_list = re.findall('jsonp\d+\((.*)\)',html)
    if str_json_list:
        dict_data = json.loads(str_json_list[0])
        rateDetail = dict_data.get("rateDetail")
        paginator = rateDetail.get("paginator")
        items = paginator.get("items")
        print("当前商品评论总数:{count}".format(count=items))
        page_number_all = items//20+1 if items%20>0 else items//20
        print("当前总页数",page_number_all)
        page_number = page_number_all if fied_page_number <= 0 else page_number_all if fied_page_number > page_number_all else fied_page_number
        comments_all_list = []
        for pn in range(page_number):
            pn+=1
            new_url = url.format(id=id, user_id=user_id, page_number=pn)
            resp = requests.get(url=new_url, headers=headers, cookies=cookies, timeout=6)
            if resp.status_code is 200:
                html = resp.content.decode("utf8")
                str_json_list = re.findall('jsonp\d+\((.*)\)', html)
                if str_json_list:
                    dict_data = json.loads(str_json_list[0])
                    rateDetail = dict_data.get("rateDetail")
                    rateList = rateDetail.get("rateList")
                    comments_all_list.extend(rateList)
        if comments_all_list:
            return comments_all_list
        else:
            return "获取评论失败"




if __name__ == '__main__':

    # 商品url
    goods_url = "https://detail.tmall.com/item.htm?spm=a220m.1000858.1000725.13.17635faanAojEG&id=19903071933&skuId=3629482623721&areaId=440100&user_id=1601196901&cat_id=50076895&is_b=1&rn=59a6f41da133ea26d10f936220f732db"

    # 需要登录账号后的cookie才可以获取评论
    cookies = {'x5sec': '7b22726174656d616e616765723b32223a226361383530303865386661396433336465363930353433363061313130643265434a4c2f6f4f4946454d65337063542f394e37725a426f4d4d6a55324d446b794d544d774d447378227d'}

    fied_page_number = 7
    # #
    comments_data = collect_tianmao_goods_comments(goods_url=goods_url,cookies=cookies, fied_page_number=fied_page_number)
    if not comments_data is None:
        print(len(comments_data))
        # for comments in comments_data:
        #     print(comments)
  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 4
    评论
评论 4
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

_JackSparrow

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值