戴上耳机, 这个世界与我无关...
让我们用音乐洗涤心灵吧...
我们从哪个网站爬取资源呢?
专治各种不服...
打开酷狗官网, 可以看到搜索框,我们要爬取的数据就是搜索歌曲后,
酷狗后台返回的歌曲列表以及每首歌的歌曲信息(歌词、作者、url等)
敲F12键进入开发者模式,选择Network - All (这里就是酷狗前后台交互的所有请求列表)
这么多请求, 我应该选哪个? 一个一个试?
然后查看返回的json数据
{
"status": 1,
"error_code": 0,
"data": {
"page": 1,
"tab": "全部",
"lists": [
{
"SongName": "雨一直下",
"OwnerCount": 61910,
"MvType": 2,
"TopicRemark": "",
"SQFailProcess": 4,
"Source": "",
"Bitrate": 128,
"HQExtName": "mp3",
"SQFileSize": 37489904,
"ResFileSize": 0,
"AudioCdn": 100,
"MvTrac": 3,
"SQDuration": 293,
"ExtName": "mp3",
"Auxiliary": "",
"SongLabel": "",
"Scid": 339102,
"OriSongName": "雨一直下",
"FailProcess": 4,
"SQBitrate": 1022,
"HQBitrate": 320,
"Audioid": 339102,
"HiFiQuality": 2,
"Grp": [
{
"SongName": "雨一直下",
"OwnerCount": 31,
"MvType": 2,
"TopicRemark": "",
"SQFailProcess": 4,
"Source": "",
"Bitrate": 128,
"HQExtName": "mp3",
"SQFileSize": 37489904,
"ResFileSize": 0,
"AudioCdn": 100,
"MvTrac": 3,
"SQDuration": 293,
"ExtName": "mp3",
"Auxiliary": "",
"SongLabel": "",
"Scid": 339102,
"OriSongName": "雨一直下",
"FailProcess": 4,
"SQBitrate": 1022,
"HQBitrate": 320,
"Audioid": 339102,
"HiFiQuality": 2,
"OriOtherName": "",
"AlbumPrivilege": 8,
"TopicUrl": "",
"SuperFileHash": "",
"ASQPrivilege": 10,
"M4aSize": 1225472,
"IsOriginal": 0,
"Privilege": 8,
"ResBitrate": 0,
"FileHash": "8AA6B442D0541FE6645611A108E6FD32",
"SQPayType": 3,
"HQPrice": 200,
"Type": "audio",
"trans_param": {
"cid": 5111971,
"pay_block_tpl": 1,
"musicpack_advance": 0,
"display_rate": 0,
"display": 0
},
"SourceID": 0,
"A320Privilege": 10,
"FileName": "张宇 - 雨一直下",
"AlbumID": "973971",
"ID": "32163792",
"SuperFileSize": 0,
"QualityLevel": 3,
"SQFileHash": "7F0933165786A9BC9858A8B56EED0D43",
"AlbumName": "男人的好 新歌+精选",
"HQPrivilege": 10,
"SuperBitrate": 0,
"SuperDuration": 0,
"MixSongID": "32163792",
"ResFileHash": "",
"PublishAge": 255,
"SuperExtName": "",
"HQFileHash": "32CB70DDFF57B235550C75B5FC46D030",
"HQPkgPrice": 1,
"Duration": 293,
"FileSize": 4694905,
"OtherName": "",
"SQPkgPrice": 1,
"PkgPrice": 1,
"HQFileSize": 11735431,
"HQFailProcess": 4,
"OldCpy": 1,
"SQPrivilege": 10,
"SQPrice": 200,
"ResDuration": 0,
"SingerId": [
3537
],
"Price": 200,
"HQPayType": 3,
"SingerName": "张宇",
"Publish": 1,
"MvHash": "E8BD1926CF6298D452E16F8904CB795A",
"SQExtName": "flac",
"HQDuration": 293,
"PayType": 3,
"HasAlbum": 1,
"mvTotal": 0,
"Accompany": 1
},
{
"SongName": "雨一直下",
"OwnerCount": 12,
"MvType": 2,
"TopicRemark": "",
"SQFailProcess": 4,
"Source": "",
"Bitrate": 128,
"HQExtName": "mp3",
"SQFileSize": 37489904,
"ResFileSize": 0,
"AudioCdn": 100,
"MvTrac": 3,
"SQDuration": 293,
"ExtName": "mp3",
"Auxiliary": "",
"SongLabel": "",
"Scid": 339102,
"OriSongName": "雨一直下",
"FailProcess": 4,
"SQBitrate": 1022,
"HQBitrate": 320,
"Audioid": 339102,
"HiFiQuality": 2,
"OriOtherName": "",
"AlbumPrivilege": 8,
"TopicUrl": "",
"SuperFileHash": "",
"ASQPrivilege": 10,
"M4aSize": 1225472,
"IsOriginal": 0,
"Privilege": 8,
"ResBitrate": 0,
"FileHash": "8AA6B442D0541FE6645611A108E6FD32",
"SQPayType": 3,
"HQPrice": 200,
"Type": "audio",
"trans_param": {
"cid": 31864779,
"pay_block_tpl": 1,
"musicpack_advance": 0,
"display_rate": 0,
"display": 0
},
"SourceID": 0,
"A320Privilege": 10,
"FileName": "张宇 - 雨一直下",
"AlbumID": "2400135",
"ID": "62076604",
"SuperFileSize": 0,
"QualityLevel": 3,
"SQFileHash": "7F0933165786A9BC9858A8B56EED0D43",
"AlbumName": "重拾男人心",
"HQPrivilege": 10,
"SuperBitrate": 0,
"SuperDuration": 0,
"MixSongID": "62076604",
"ResFileHash": "",
"PublishAge": 255,
"SuperExtName": "",
"HQFileHash": "32CB70DDFF57B235550C75B5FC46D030",
"HQPkgPrice": 1,
"Duration": 293,
"FileSize": 4694905,
"OtherName": "",
"SQPkgPrice": 1,
"PkgPrice": 1,
"HQFileSize": 11735431,
"HQFailProcess": 4,
"OldCpy": 1,
"SQPrivilege": 10,
"SQPrice": 200,
"ResDuration": 0,
"SingerId": [
3537
],
"Price": 200,
"HQPayType": 3,
"SingerName": "张宇",
"Publish": 1,
"MvHash": "E8BD1926CF6298D452E16F8904CB795A",
"SQExtName": "flac",
"HQDuration": 293,
"PayType": 3,
"HasAlbum": 1,
"mvTotal": 0,
"Accompany": 1
}
],
"OriOtherName": "",
"AlbumPrivilege": 8,
"TopicUrl": "",
"SuperFileHash": "",
"ASQPrivilege": 10,
"M4aSize": 1225472,
"IsOriginal": 1,
"Privilege": 8,
"ResBitrate": 0,
"FileHash": "8AA6B442D0541FE6645611A108E6FD32",
"SQPayType": 3,
"HQPrice": 200,
"trans_param": {
"cid": 2456823,
"pay_block_tpl": 1,
"musicpack_advance": 0,
"display_rate": 0,
"display": 0
},
"Type": "audio",
"FoldType": 0,
"SourceID": 0,
"A320Privilege": 10,
"FileName": "张宇 - 雨一直下",
"AlbumID": "982663",
"ID": "32243475",
"SuperFileSize": 0,
"QualityLevel": 3,
"SQFileHash": "7F0933165786A9BC9858A8B56EED0D43",
"AlbumName": "雨一直下",
"HQPrivilege": 10,
"SuperBitrate": 0,
"SuperDuration": 0,
"MixSongID": "32243475",
"ResFileHash": "",
"PublishAge": 255,
"SuperExtName": "",
"HQFileHash": "32CB70DDFF57B235550C75B5FC46D030",
"HQPkgPrice": 1,
"Duration": 293,
"FileSize": 4694905,
"OtherName": "",
"SQPkgPrice": 1,
"PkgPrice": 1,
"HQFileSize": 11735431,
"HQFailProcess": 4,
"OldCpy": 1,
"SQPrivilege": 10,
"SQPrice": 200,
"ResDuration": 0,
"SingerId": [
3537
],
"Price": 200,
"HQPayType": 3,
"SingerName": "张宇",
"Publish": 1,
"MvHash": "E8BD1926CF6298D452E16F8904CB795A",
"SQExtName": "flac",
"HQDuration": 293,
"PayType": 3,
"HasAlbum": 1,
"mvTotal": 0,
"Accompany": 1
}
],
"chinesecount": 4,
"searchfull": 1,
"correctiontype": 0,
"subjecttype": 0,
"aggregation": [
{
"key": "DJ",
"count": 0
},
{
"key": "现场",
"count": 0
},
{
"key": "广场舞",
"count": 0
},
{
"key": "伴奏",
"count": 0
},
{
"key": "铃声",
"count": 0
}
],
"allowerr": 0,
"correctionsubject": "",
"correctionforce": 0,
"total": 36,
"istagresult": 0,
"istag": 0,
"correctiontip": "",
"pagesize": 20
}
}
复制代码
requests的json()方法, 可以把json字符串, 转成python可以识别的dict或者list
这些数据使我们通过访问一下链接获得的
https://songsearch.kugou.com/song_search_v2?callback=jQuery112409264783558861354_1559273651647&keyword=%E9%9B%A8%E4%B8%80%E7%9B%B4%E4%B8%8B&page=1&pagesize=30&userid=-1&clientver=&platform=WebFilter&tag=em&filter=2&iscorrection=1&privilege_filter=0&_=1559273651658
复制代码
这也太长了, 有些参数是不是没有必要啊?
我们通过postman筛一下...
postman, 接口测试工具, 最好的接口测试工具, 不怕违反广告法...
安装之后打开, 粘贴我们之前的url
然后点击音乐详情页
我们使用hash值
8AA6B442D0541FE6645611A108E6FD32
来搜索请求...
我们找到了请求数据的url
https://wwwapi.kugou.com/yy/index.php?r=play/getdata&callback=jQuery19101729051683512821_1559276678103&hash=8AA6B442D0541FE6645611A108E6FD32&album_id=982663&dfid=3d7kV00OqOe70A00N74FV8Ue&mid=15b47ea8a82b0a8111b91cccb1c52055&platid=4&_=1559276678104
复制代码
参数依旧很多, 我们使用postman过滤一下参数...
我们找到了想要的参数, play_url
mid是从cookie中获得的, 变化的频率不高
整理一下思路:
完整源码
import requests
res = ''
def get_music_list(music_name):
music_list = []
print('歌曲[{}]的列表如下, 请选择序号:'.format(music_name))
res = requests.get('https://songsearch.kugou.com/song_search_v2?keyword='+music_name).json()['data']['lists']
for i,music in enumerate(res):
print('{} -- {}'.format(i+1,music['FileName']))
music_list.append({'name':music['FileName'],'hash':music['FileHash'],'aid':music['AlbumID']})
return music_list
def get_play_url(music_hash,music_aid):
'''
获取歌曲的url
'''
aid = ''
if music_aid:
aid = '&album_id='+music_aid
url = "https://wwwapi.kugou.com/yy/index.php?r=play/getdata&hash={}&mid=4d9f1c937f33674bb55a4fa9096e97e8".format(music_hash)+aid
return requests.get(url).json()['data']['play_url']
def download_music(name,url):
'''
根据url下载歌曲
'''
print('[{}] 正在下载...'.format(name))
with open('{}.mp3'.format(name),'wb') as file:
file.write(requests.get(url).content)
print('[{}] 已经下载完毕!'.format(name))
if __name__ == "__main__":
music_name = input('请输入你想听的歌曲: ')
music_list = get_music_list(music_name)
music_index = input('请输入序号: ')
music_play_url = get_play_url(music_list[int(music_index)-1]['hash'],music_list[int(music_index)-1]['aid'])
music_name = music_list[int(music_index)-1]['name']
download_music(music_name,music_play_url)
复制代码
have fun !!! o(* ̄︶ ̄*)o
快速跳转:
猫哥教你写爬虫 000--开篇.md
猫哥教你写爬虫 001--print()函数和变量.md
猫哥教你写爬虫 002--作业-打印皮卡丘.md
猫哥教你写爬虫 003--数据类型转换.md
猫哥教你写爬虫 004--数据类型转换-小练习.md
猫哥教你写爬虫 005--数据类型转换-小作业.md
猫哥教你写爬虫 006--条件判断和条件嵌套.md
猫哥教你写爬虫 007--条件判断和条件嵌套-小作业.md
猫哥教你写爬虫 008--input()函数.md
猫哥教你写爬虫 009--input()函数-人工智能小爱同学.md
猫哥教你写爬虫 010--列表,字典,循环.md
猫哥教你写爬虫 011--列表,字典,循环-小作业.md
猫哥教你写爬虫 012--布尔值和四种语句.md
猫哥教你写爬虫 013--布尔值和四种语句-小作业.md
猫哥教你写爬虫 014--pk小游戏.md
猫哥教你写爬虫 015--pk小游戏(全新改版).md
猫哥教你写爬虫 016--函数.md
猫哥教你写爬虫 017--函数-小作业.md
猫哥教你写爬虫 018--debug.md
猫哥教你写爬虫 019--debug-作业.md
猫哥教你写爬虫 020--类与对象(上).md
猫哥教你写爬虫 021--类与对象(上)-作业.md
猫哥教你写爬虫 022--类与对象(下).md
猫哥教你写爬虫 023--类与对象(下)-作业.md
猫哥教你写爬虫 024--编码&&解码.md
猫哥教你写爬虫 025--编码&&解码-小作业.md
猫哥教你写爬虫 026--模块.md
猫哥教你写爬虫 027--模块介绍.md
猫哥教你写爬虫 028--模块介绍-小作业-广告牌.md
猫哥教你写爬虫 029--爬虫初探-requests.md
猫哥教你写爬虫 030--爬虫初探-requests-作业.md
猫哥教你写爬虫 031--爬虫基础-html.md
猫哥教你写爬虫 032--爬虫初体验-BeautifulSoup.md
猫哥教你写爬虫 033--爬虫初体验-BeautifulSoup-作业.md
猫哥教你写爬虫 034--爬虫-BeautifulSoup实践.md
猫哥教你写爬虫 035--爬虫-BeautifulSoup实践-作业-电影top250.md
猫哥教你写爬虫 036--爬虫-BeautifulSoup实践-作业-电影top250-作业解析.md
猫哥教你写爬虫 037--爬虫-宝宝要听歌.md
猫哥教你写爬虫 038--带参数请求.md
猫哥教你写爬虫 039--存储数据.md
猫哥教你写爬虫 040--存储数据-作业.md
猫哥教你写爬虫 041--模拟登录-cookie.md
猫哥教你写爬虫 042--session的用法.md
猫哥教你写爬虫 043--模拟浏览器.md
猫哥教你写爬虫 044--模拟浏览器-作业.md
猫哥教你写爬虫 045--协程.md
猫哥教你写爬虫 046--协程-实践-吃什么不会胖.md
猫哥教你写爬虫 047--scrapy框架.md
猫哥教你写爬虫 048--爬虫和反爬虫.md
猫哥教你写爬虫 049--完结撒花.md