import time
import requests
from bs4 import BeautifulSoup
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36 Edg/85.0.564.63'}
lasthotcommentid = ''
for pagenum in range(5):
print("第{}页".format(pagenum))
params = {
"g_tk_new_20200303": "5381",
"g_tk": "5381",
"loginUin": "0",
"hostUin": "0",
"format": "json",
"inCharset": "utf8",
"outCharset": "GB2312",
"notice": "0",
"platform": "yqq.json",
"needNewCode": "0",
"cid": "205360772",
"reqtype": "2",
"biztype": "1",
"topid": "212877900",
"cmd": "8",
"needmusiccrit": "0",
"pagenum": pagenum,
"pagesize": "25",
"lasthotcommentid": lasthotcommentid,
"domain": "qq.com",
"ct": "24",
"cv": "10101010"
}
res = requests.get("https://c.y.qq.com/base/fcgi-bin/fcg_global_comment_h5.fcg", headers=headers, params=params)
data = res.json()
for item in data['comment']['commentlist']:
#处理json没有rootcommentcontent
if 'rootcommentcontent' in item:
print(item['nick'] + ":" + item['rootcommentcontent'])
#print('{}:{}'.format(item['nick'], item['rootcommentcontent']))
lasthotcommentid = data['comment']['commentlist'][-1]['commentid']
print(lasthotcommentid)
time.sleep(1)
爬取qq音乐评论信息
最新推荐文章于 2021-08-15 10:00:16 发布