分析网址:酷狗音乐 - 就是歌多
F12网络分析查询,难点为下面图中的红色框,特别是 signature 的获取,调试了一下,原先MD5的方式已改
播放
前面查询页面查询关键字:
调试js代码
断点运行,获取参数
参数 o的详细内容
再次跟踪断点之前参数获取代码,发现只有一个时间戳,其它都是固定
源代码
import hashlib #md5, sha1, sha224, sha256, sha384, and sha512
import requests
import re
import time
import json
import pandas as pd
class Public_kugo():
def __init__(self):
#这个地方替换成自已的Cooike,其中的kg_mid与kg_dfid在后面Head中要用到,单独拿出来
self.kg_mid = '92f43c1ca98aeef7f805654bc16b687e'
self.kg_dfid = '10GOa11RqXIo2jvJbD3jt3BN'
self.url = "https://complexsearch.kugou.com/v2/search/song" #目标URL
self.url2 = "https://wwwapi.kugou.com/yy/index.php"
self.head = {
'accept': '*/*',
'accept-encoding': 'gzip, deflate, br',
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8',
'cookie': 'kg_mid='+self.kg_mid+'; kg_dfid='+self.kg_dfid+'; kg_dfid_collect=d41d8cd98f00b204e9800998ecf8427e; Hm_lvt_aedee6983d4cfc62f509129360d6bb3d=1629682120; Hm_lpvt_aedee6983d4cfc62f509129360d6bb3d=1629683486',
'referer': 'https://www.kugou.com/',
'sec-ch-ua': '" Not;A Brand";v="99", "Google Chrome";v="91", "Chromium";v="91"',
'sec-ch-ua-mobile': '?0',
'sec-fetch-dest': 'script',
'sec-fetch-mode': 'no-cors',
'sec-fetch-site': 'same-site',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.106 Safari/537.36',
}
self.data = ["NVPh5oo715z5DIWAeQlhMDsWXXQV4hwt", "bitrate=0", "callback=callback123", "clienttime=1629688889293"
, "clientver=2000", "dfid=-", "inputtype=0", "iscorrection=1" , "isfuzzy=0"
, "keyword=学猫叫", "mid=1629688889293", "page=1", "pagesize=30"
, "platform=WebFilter", "privilege_filter=0", "srcappid=2919", "tag=em", "userid=0"
, "uuid=1629688889293", "NVPh5oo715z5DIWAeQlhMDsWXXQV4hwt"]
self.params={
'callback': 'callback123',
'keyword': '学猫叫',
'page': '1',
'pagesize': '30',
'bitrate': '0',
'isfuzzy': '0',
'tag': 'em',
'inputtype': '0',
'platform': 'WebFilter',
'userid': '0',
'clientver': '2000',
'iscorrection': '1',
'privilege_filter': '0',
'srcappid': '2919',
'clienttime': '1629688889293',
'mid': '1629688889293',
'uuid': '1629688889293',
'dfid': '-',
'signature': 'A819E0B497883BF0435D2D26667B5B5F'
}
self.params2 = {
'r': 'play/getdata',
'callback': 'jQuery191017104544130952526_1629693532392',
'hash': '2BE99967C762BC56E2DF23682C755FE8',
'dfid': self.kg_dfid,
'mid': self.kg_mid,
'platid': '4',
'album_id': '',
'_': '1629693532394',
}
def gen_md5(self,row):
byte_row = row.encode("utf8", "ignore")
md5 = hashlib.md5()
md5.update(byte_row)
return md5.hexdigest().upper()
def gettime(self):
return '%d' % (time.time() * 1000)
def Get_html(self,val):
time=self.gettime()
self.data[3] = 'clienttime=' + time
self.data[9]='keyword='+val
self.data[10] = 'mid=' + time
self.data[18] = 'uuid=' + time
self.params['keyword']=val
self.params['clienttime']=time
self.params['mid']=time
self.params['uuid']=time
self.params['signature']=self.gen_md5(''.join(self.data))
response= requests.get(self.url, params=self.params, headers=self.head).content.decode('utf-8')
# 获取json内容
response = re.sub(r'callback123\((.*)\)$', r'\1', response)
return json.loads(response)['data']['lists']
def Get_html2(self,id,hs):
time = self.gettime()
self.params2['callback']='jQuery191017104544130952526_'+time
self.params2['hash'] = hs
self.params2['album_id'] = id
self.params2['_'] = time
response= requests.get(self.url2, params=self.params2, headers=self.head).content.decode('utf-8')
# 获取json内容
response = re.sub(r'jQuery.*_'+time+'\((.*)\);$', r'\1', response)
song=json.loads(response)['data']
if(isinstance(song,dict)):
if('play_url' in 'play_url' in song.keys() and 'audio_name' in song.keys()):
if (song['play_url']!=''):
return (song['audio_name'],song['play_url'])
def Search(self,val):
songs= self.Get_html(val)
i=0
for o in songs:
name = re.sub(r'<em>(.*?)</em>',r'\1', o['FileName'])#OriSongName
id=o['AlbumID']
#hs=o['FileHash']
hs=o['HQFileHash']
# hs=o['MvHash']
# hs=o['SQFileHash']
retsong=self.Get_html2(id,hs)
if(pd.isnull(retsong)==False):
i = i + 1
print(i,retsong[0],retsong[1])
#print(i,name,id,hs)
if __name__ == '__main__':
cx=Public_kugo()
cx.Search('好人一生平安')
结果