python实战5
爬取网易云音乐任意一首歌的热门评论(不能翻页),主要用到 requests模块和json模块。
这里简单介绍一下json模块,简单来说,json模块就是用字符串把python的数据结构给封装起来。json.loads(把字符串还原成python的数据结构)
json.dumps(将python的数据结构封装成字符串)
本次爬取网页的思路是:找到目标文件-----下载目标文件-----提取关键数据
接下来上代码:
import requests
import json
def get_hot_comments(res):
comments_json=json.loads(res.text)
hot_comments=comments_json['hotComments'] #分析发现hotComments这个字段是评论
with open('hot_comments.txt','w',encoding='utf-8')as file:
for each in hot_comments:
file.write(each['user']['nickname']+':\n\n')
file.write(each['content']+'\n')
file.write("------------------我是好看的分割线-------------\n")
def get_url(url):
#把headers写的详细些防止服务器认出自己是机器
name_id=url.split('=')[1]
headers={"user-agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 SE 2.X MetaSr 1.0",
"referer":"https://music.163.com/song?id=1293886117"}
params="MqFyU6niwPLZScQF8Vgb7wVnpPkYlaDiTgkcD0qazlMf7H2o5tMC1e3YEh6NbQx/Fa/8KbKgMeNYL/fslYQqPkDCLGKLOTPceamDmMYsQJ2QBBFjIUH7wRhjU7P0C05Lg1h6MPTJD4mZFmAheFKZwqde/czHxC/qXqVQe2X3fdKw7ai0lH0iFLJJIy7nujJr"
encSecKey="464437d3d765231b4ada5056493311fcca22c79f7cfffb8a9720923e0300fb841d21d7652bd312b411ba57fab7d43d031f4f13addb29acf44874c1b0822b12c5478ba64e551fefc795632fc785b2c2366d7da6295dfaac5423353ba45840b0ab01527961f9e4ee3fe0602bb534d5f2d78ee8b76a9bf1b39d56e2c6df68244aff"
data={"params":params,"encSecKey":encSecKey}
target_url="https://music.163.com/weapi/v1/resource/comments/R_SO_4_{}?csrf_token=".format(name_id)
res=requests.post(target_url,headers=headers,data=data)
return res
def main():
url=input("请输入网址:")
res=get_url(url)
get_hot_comments(res)
if __name__=="__main__":
main()