酷狗音乐的爬虫练习

龙崎大佬

已于 2022-05-26 11:16:06 修改

阅读量858

点赞数 4

分类专栏： python练习文章标签： python sql

于 2021-03-20 10:06:25 首次发布

本文链接：https://blog.csdn.net/lsf_longqi/article/details/115023072

版权

python练习专栏收录该内容

8 篇文章 1 订阅

订阅专栏

酷狗音乐的爬虫练习（只是作为新手练习，大神勿喷啊）

import requests
import json
import os
import time
import random


music_name = input('输入要下载的歌曲名称：')
url = f'https://songsearch.kugou.com/song_search_v2?callback=jQuery1124042761514747027074_1580194546707&keyword={music_name}&page=1&pagesize=10&userid=-1&clientver=&platform=WebFilter&tag=em&filter=2&iscorrection=1&privilege_filter=0&_=1580194546709'
headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'
    }
response = requests.get(url,headers=headers)
html_data = response.text
# print(html_data)
# print(len(html_data))
html_info = html_data[html_data.find('(')+1:-2]   #不规则的dict类型。需要进行转换
html_data_info  = json.loads(html_info)   #将str转换为字典dict类型
lists = html_data_info['data']['lists']   #获取歌曲列表
list_hash = []
album_id_list = []
list_id_info = []
for list in lists:
    SongName = list['SongName'].replace('<em>','').replace('</em>','')    #将<em>进行替换
    singerName = list['SingerName'].replace('<em>','').replace('</em>','')
    FileHash = list['FileHash']
    AlbumID = list['AlbumID']
    album_id_list.append(AlbumID)
    list_hash.append(FileHash)         #提取FileHash
    row = {
        '歌手':singerName,
        '歌名':SongName
        # 'hash':FileHash
    }
    list_id_info.append(row)

print(list_hash)
# print(list_id_info)
a = 1
if len(list_id_info) >= 1:
    for name in list_id_info:
        print(a,name)
        a = a + 1#显示歌曲列表信息 序号
    all = input('是否需要下载全部歌曲：')
    if all == '是':
        for hash,albumid in zip(list_hash,album_id_list):
            try:
                time.sleep(random.randint(2,5))
                movie_url = f'https://wwwapi.kugou.com/yy/index.php?r=play/getdata&hash={hash}&dfid=33ZWG24MjQ2x0kYBM53k7N6C&mid=2070c26ef7eeac6d59cb3372ce068810&platid=4&album_id={albumid}&_=1606976867986'
                print(movie_url)
                # movie_url = f'https://wwwapi.kugou.com/yy/index.php?r=play/getdata&hash={hash}&album_id=38606351&dfid=0zce6Z1T3Kjv19ea8L221lxk&mid=fe277423a92d0e4b3326e3a660cc8689&platid=4&_=1603503473577'
                response = requests.get(url=movie_url, headers=headers).text
                response_json = json.loads(response)  # 将str转换为字典dict类型
                # print(response_json)

                purl = response_json['data']['play_url']
                author_name = response_json['data']['author_name']
                song_name = response_json['data']['song_name']
                print(purl)
                purl_data = requests.get(url=purl, headers=headers).content
                try:
                    os.mkdir('酷狗音乐')
                except Exception as e:
                    print(e, '文件夹已经存在，程序继续执行!!!')
                finally:
                    with open('酷狗音乐\\' + song_name + '-' + author_name + '.mp3', 'wb') as f:
                        f.write(purl_data)
                        print(song_name + '-' + author_name, '下载完成')
                        time.sleep(0.2)
            except Exception as e :
                pass
            continue
    else:
        num = int(input('需要下载的序号：'))
        hash_id_info = list_hash[int(num-1)]
        album_id_info = album_id_list[int(num-1)]
        print(hash_id_info,album_id_info)
        movie_url = f'https://wwwapi.kugou.com/yy/index.php?r=play/getdata&hash={hash_id_info}&dfid=33ZWG24MjQ2x0kYBM53k7N6C&mid=2070c26ef7eeac6d59cb3372ce068810&platid=4&album_id={album_id_info}&_=1606976867986'
        # movie_url = f'https://wwwapi.kugou.com/yy/index.php?r=play/getdata&hash={hash_id_info}&album_id=38606351&dfid=0zce6Z1T3Kjv19ea8L221lxk&mid=fe277423a92d0e4b3326e3a660cc8689&platid=4&_=1606976867986'
        print(movie_url)
        response = requests.get(url=movie_url,headers=headers).text
        response_json = json.loads(response)  #将str转换为字典dict类型
        print(response_json)
        purl = response_json['data']['play_url']
        author_name = response_json['data']['author_name']
        song_name = response_json['data']['song_name']
        print(purl)
        purl_data = requests.get(url=purl,headers=headers).content
        try:
            os.mkdir('酷狗音乐')
        except Exception as e:
            print(e,'文件夹已经存在，程序继续执行!!!')
        finally:
            with open('酷狗音乐\\'+song_name+'-'+author_name+'.mp3','wb') as f:
                f.write(purl_data)
                print(song_name+'-'+author_name,'下载完成')
                time.sleep(0.2)
else:
    print('找不到歌曲信息哦~~~')