参考爬取网易云音乐评论,典型的ajax加载,大多人是去破解js加密,有点繁琐。
爬取周杰伦-晴天这一条评论,因为数据量稍大。以后爬取整个歌手。
这是加密api的情况:
import requests
def get_comment():
url = r'https://music.163.com/weapi/v1/resource/comments/R_SO_4_2069470?csrf_token='
headers = {
'Host': 'music.163.com',
'Referer': 'https://music.163.com/song?id=482999668',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) '
'AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/71.0.3578.98 Safari/537.36',
# 加上cookies, 否则{'code': -460, 'msg': 'Cheating'}
# proxies也是必须的,这里勉强用自己的vpn代替了
}
proxies = {
'https': 'https://110.52.234.72',
'https': 'https://119.101.112.66'
}
formdata = {
"params": 'Wk3drXP2/Nj8YbOQoL3ORmBM784lqxwm0VELQyBipJWx/rd8fUmklRZ6vL+G1f2dbZ/8WE7f25gWe+2BdXp3+d2AwkiTy5DxeVd4SiHX5qat+jU642hSysQVtHDfJHmCi6rjndr/YEBSccqnzIbueeA9H08OlzAZoYa5T6xlbQpxgtTdX5E1MF6R71ykxkS8',
"encSecKey": '1d5e93ee97662d6f9dfaf07dbe4b9d4f9ffe6b90b484d8acc14696214a556000198d51ce3d87d9123db07f96307c919c02d84fa4a204e9d0a387404141fd43400fb2ec9aaa07ae99d99df133cc6d4c31ee8ab7859d83351b154c1ab2bed81a84159a25956ed1485551639e37fc3502ab049a03051ca40f85ef4dd648aabe9286'
}
response = requests.post(url=url, headers=headers, proxies=proxies)
print(response.status_code)
result = response.json()
print(result)
comments = result.get("comments")
for comment in comments:
user = comment.get('user')
img_url = user.get('avatarUrl')
name = user.get('nickname')
uid= user.get('userId')
commentid = comment.get("commentId")
commenttime = comment.get("time")
content = comment.get('content')
print(name, uid, img_url, commentid,commenttime,content)
get_comment()
偶然看到没有加密的url尝试了下,出现requests.exceptions.Pr