这里以爬取网易云某一首歌的评论为例,当然爬取歌词等方法也是一模一样的,只是请求的参数不一样,这里比较难的地方就是解密和加密的过程,需要弄清楚。
评论加载、歌词等都是通过Ajax请求来的,但下面的参数被加密了
import requests
import json
from fake_useragent import UserAgent
from Crypto.Cipher import AES
from base64 import b64encode
if __name__ == "__main__":
url = '"https://music.163.com/weapi/comment/resource/comments/get?csrf_token="'
# 1.真实参数
data = {
'rid': "R_SO_4_1294378245",
'threadId': "R_SO_4_1294378245",
'pageNo': "1",
'pageSize': "20",
'cursor': "-1",
'offset': "0",
'orderType': "1",
'csrf_token': ""
}
# 2.处理加密过程 获取params key
e = '010001'
f = "00e0b509f6259df8642dbc35662901477df22677ec152b5ff68ace615bb7b725152b3ab17a876aea8a5aa76d2e417629ec4ee341f56135fccf695280104e0312ecbda92557c93870114af6c9d05c4f7f0c3685b7a46bee255932575cce10b424d813cfe4875d3e82047b97ddef52741d546b8e289dc6935b3ece0462db0a22b8e7"
g = "0CoJUm6Qyw8W8jud"
# 注意i是要查看的 因为params和key的生成都需要i
i = "6mGGm0cVaHLsaC79"
# 20.生成16倍数的数据长度
def to_16(data):
pad = 16 - len(data) % 16
data += chr(pad) * pad
return data
# 21.加密算法
iv = "0102030405060708"
def EncryptAES(data, key):
aes = AES.new(key=key.encode("utf-8"), mode=AES.MODE_CBC, iv=iv.encode("utf-8")) #创建加密器 三个参数都是字节
data = to_16(data)
bs = aes.encrypt(data.encode("utf-8")) #不能用decode直接解码 不能被utf-8识别 需要base64进行处理 且加密内容长度必须是16倍数
return str(b64encode(bs), "utf-8")
# 22.获取params
def get_encText(data, g, i):
first = EncryptAES(data, g)
second = EncryptAES(first, i)
return second
# 23.获取key
def get_encSecKey():
return "1c8b259c6995466fc7a408070cabd721764f997eade0871dbda28cb552fed9fe00ef96e0e7d1f68db1b5768f882bd4639aa6fd50ae92f0916acdb3f2e9cc1588ec738858b4ca61720cda8e01ddeb158aeac244063cdbc500d5880b59dfbfb13f9e4d38166db22d3c87cf03b286968415e5db7a366a490eb65c8da9de0e98fba9"
# 3.发送请求
encrypt_data = {
'params' : get_encText(data=json.dumps(data), g=g, i=i),
'encSecKey' : get_encSecKey()
}
headers = {
'User-Agent':UserAgent().random
}
res = requests.post(url=url, data=encrypt_data, headers=headers).content
print(res)
with open("./cloudmusic.json",'wb') as fp:
fp.write(res)