爬取qq音乐评论信息

import time

import requests
from bs4 import BeautifulSoup
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36 Edg/85.0.564.63'}
lasthotcommentid = ''
for pagenum in range(5):
    print("第{}页".format(pagenum))
    params = {
        "g_tk_new_20200303": "5381",
        "g_tk": "5381",
        "loginUin": "0",
        "hostUin": "0",
        "format": "json",
        "inCharset": "utf8",
        "outCharset": "GB2312",
        "notice": "0",
        "platform": "yqq.json",
        "needNewCode": "0",
        "cid": "205360772",
        "reqtype": "2",
        "biztype": "1",
        "topid": "212877900",
        "cmd": "8",
        "needmusiccrit": "0",
        "pagenum": pagenum,
        "pagesize": "25",
        "lasthotcommentid": lasthotcommentid,
        "domain": "qq.com",
        "ct": "24",
        "cv": "10101010"
    }
    res = requests.get("https://c.y.qq.com/base/fcgi-bin/fcg_global_comment_h5.fcg", headers=headers, params=params)
    data = res.json()
    for item in data['comment']['commentlist']:
        #处理json没有rootcommentcontent
        if 'rootcommentcontent' in item:
            print(item['nick'] + ":" + item['rootcommentcontent'])
        #print('{}:{}'.format(item['nick'], item['rootcommentcontent']))
    lasthotcommentid = data['comment']['commentlist'][-1]['commentid']
    print(lasthotcommentid)
    time.sleep(1)
  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值