爬取杰伦哥QQ音乐网页版第一页全部歌曲的歌词
import requests
import re
id_list = []
music_name = []
url = 'https://u.y.qq.com/cgi-bin/musicu.fcg'
headers = {
'origin': 'https://y.qq.com',
# 请求来源,本案例中其实是不需要加这个参数的,只是为了演示
'referer': 'https://y.qq.com/n/yqq/song/004Z8Ihr0JIu5s.html',
# 请求来源,携带的信息比“origin”更丰富,本案例中其实是不需要加这个参数的,只是为了演示
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
# 标记了请求从什么设备,什么浏览器上发出
}
params = {
'-': 'getUCGI05246952304022079',
'g_tk': '5381',
'loginUin': '************', #这里用自己的哦~
'hostUin': '0',
'format': 'json',
'inCharset': 'utf8',
'outCharset': 'utf-8',
'notice': '0',
'platform': 'yqq.json',
'needNewCode': '0',
'data': '%7B%22comm%22%3A%7B%22ct%22%3A24%2C%22cv%22%3A0%7D%2C%22singer%22%3A%7B%22method%22%3A%22get_singer_detail_info%22%2C%22param%22%3A%7B%22sort%22%3A5%2C%22singermid%22%3A%220025NhlN2yWrP4%22%2C%22sin%22%3A0%2C%22num%22%3A10%7D%2C%22module%22%3A%22music.web_singer_info_svr%22%7D%7D'
}
res_comment = requests.get(url,params=params,headers=headers)
res_josn = res_comment.json()
list_common = res_josn['singer']['data']['songlist']
for i in list_common:
music_name.append(i['name'])
id_list.append(str(i['id']))
for id,name in zip(id_list,music_name):
print('----------------------'+ name + '--------------------------')
url = 'https://c.y.qq.com/lyric/fcgi-bin/fcg_query_lyric_yqq.fcg'
hraders = {
'origin': 'https://y.qq.com',
# 请求来源,本案例中其实是不需要加这个参数的,只是为了演示
'referer': 'https://y.qq.com/n/yqq/song/004Z8Ihr0JIu5s.html',
# 请求来源,携带的信息比“origin”更丰富,本案例中其实是不需要加这个参数的,只是为了演示
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
}
params = {
'nobase64': '1',
'musicid': id,
'-': 'jsonp1',
'g_tk': '5381',
'loginUin': '************', #这里用自己的哦~
'hostUin': '0',
'format': 'json',
'inCharset': 'utf8',
'outCharset': 'utf-8',
'notice': '0',
'platform': 'yqq.json',
'needNewCode': '0'
}
res_lyrics = requests.get(url,params = params,headers = headers)
music_lyrics = res_lyrics.json()
lyrics_temp = music_lyrics['lyric']
#歌词格式处理
lyrics = re.sub("[A-Za-z0-9\&\#\[\;]","",lyrics_temp)
lyrics = re.sub("\]","\n",lyrics)
print(lyrics)
最后处理歌词的部分用到了re库