【源码】某网站自动下载音乐mp3和歌词

最新推荐文章于 2024-05-30 11:43:13 发布

码农之家★资源共享

最新推荐文章于 2024-05-30 11:43:13 发布

阅读量482

点赞数 8

文章标签： python

本文链接：https://blog.csdn.net/qq_41201689/article/details/136612599

版权

导入必要的库：使用了requests进行HTTP请求，BeautifulSoup来解析HTML内容，以及json, os, time, tqdm, urllib.parse等库来处理数据、文件系统操作、显示进度条和处理URL。
设置请求头：定义了headers变量，包括模拟浏览器的User-Agent和cookie，以确保能够成功地从网站请求数据。
定义基本参数：用户通过输入指定感兴趣的歌手名，脚本会自动编码这个名称并构建用于搜索的URL。同时，用户还可以选择是否下载歌词。
获取音乐下载路径：get_music_download_url函数通过传入的歌曲ID来构造歌曲和歌词的下载URL，然后发送请求以获取实际的下载链接。
下载文件：download_file函数用于下载歌曲或歌词文件。它会检查文件是否已存在，如果不存在，则开始下载过程，并显示下载进度。
主函数(main)：首先发送请求到指定的搜索URL，并使用BeautifulSoup解析得到的HTML内容。然后，它会遍历每首歌曲，获取歌曲的详细信息（包括歌曲ID、标题、歌手等），并调用download_file函数下载歌曲和（可选的）歌词文件。下载过程中，考虑到网站可能有流量限制，脚本设定了每次下载后的延时（默认30秒），以避免被服务器封禁。
错误处理：在尝试下载每首歌曲时，脚本通过try-except结构来捕获并处理可能发生的异常，确保程序的稳健性。
启动脚本：最后，通过检查__name__变量，确定当脚本被直接执行而不是作为模块导入时，调用main函数启动下载过程。

直接附上代码

import requests
from bs4 import BeautifulSoup
import json
import os
import time
# tqdm用于显示进度条
from tqdm import tqdm
import urllib.parse  # 将网址中的中文转化为编码后的字符
 
# 忽略特定类型的警告
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning, message="invalid escape sequence")
 
 
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.89 Safari/537.36',
    'cookie':'__51uvsct__JZKGLNhr7gNTmF1z=1; __51vcke__JZKGLNhr7gNTmF1z=dbcc8135-b908-58b9-ab0f-f09772cc8ef9; __51vuft__JZKGLNhr7gNTmF1z=1673170099915; __vtins__JZKGLNhr7gNTmF1z=%7B%22sid%22%3A%20%2250340dc9-526b-5b41-8642-2fa520c011a5%22%2C%20%22vd%22%3A%2030%2C%20%22stt%22%3A%204104371%2C%20%22dr%22%3A%20616811%2C%20%22expires%22%3A%201673176004282%2C%20%22ct%22%3A%201673174204282%7D'
}
server = 'https://www.gequbao.com'
input_singer=input("请输入歌手名：")
# input_singer='林俊杰'
input_singer=urllib.parse.quote(input_singer)
singer = 'https://www.gequbao.com/s/{}'.format(input_singer)
is_download_lrc = input("是否下载歌词? 默认不下载(yes,no)")
 
 
# 获取音乐下载路径
def get_music_download_url(song_id):
    # 歌词下载地址
    lrc_download_url = 'https://www.gequbao.com/download/lrc/{}'.format(song_id)
    # 解析歌曲下载链接
    song_download_url = 'https://www.gequbao.com/api/play_url?id={}&json=1'.format(song_id)
    res = requests.get(url=song_download_url, headers=headers)
    res.encoding = 'utf-8'
    decoded_data = json.loads(res.text.encode('utf-8').decode('unicode_escape'))
    download_url = decoded_data['data']['url']
    return lrc_download_url,download_url
 
# 下载文件（名称，下载地址，下载目录，下载的文件类型）
def download_file(file_name, download_url, download_directory, file_extension="mp3"):  
    # 确保目录存在  
    if not os.path.exists(download_directory):  
        os.makedirs(download_directory)  
    # 获取文件名，注意！此处会自动给文件名称增加后缀，默认MP3
    file_path = os.path.join(download_directory, f"{file_name}.{file_extension}")  
    # 检查文件是否已存在  
    if os.path.exists(file_path):  
        print(f"[-] 文件已存在： {file_path}")  
        return
    # 发起请求并下载文件
    try:
        with requests.get(download_url, stream=True) as response:
            if response.status_code >= 200 and response.status_code < 300:
                total_size = int(response.headers.get('content-length', 0))
                with open(file_path, 'wb') as file, tqdm(
                        total=total_size, unit='B', unit_scale=True,
                        desc=f"[+] 正在下载：{file_name}.{file_extension}", leave=True) as pbar:
                    for chunk in response.iter_content(chunk_size=1024):
                        file.write(chunk)
                        pbar.update(len(chunk))
            else:
                print(f"[x] HTTP错误：{response.status_code}")
    except requests.exceptions.RequestException as e:  
        print(f"[x] 下载失败! {file_name}.{file_extension}","\t{}".format(e))
 
 
def main():
    res = requests.get(singer, headers=headers)
    res.encoding = 'utf-8'
    html = res.text
    # 使用自带的html.parser解析
    soup = BeautifulSoup(html, 'html.parser')
    # 歌曲的列表
    songs = soup.find('div', class_='card-text').find_all(class_='text-primary')
    # 歌手名列表
    singers = soup.find('div', class_='card-text').find_all(class_='text-success')
    # 搜索的歌手名称
    search_singer_name = soup.find('input', id='s-input-line')['value']
    print('查询到歌曲数: %d ' % len(songs))
    for each_song,each_singer in zip(songs,singers):
        try:
            # 歌曲ID
            song_id = each_song.get('href').replace('/music/','')
            # 歌名
            song_title = each_song.get_text().strip()
            # 歌手
            song_singer = each_singer.get_text().strip()
            # 保存路径
            save_url = os.getcwd()+'\\songs\\{}'.format(search_singer_name)
            # 下载保存的文件名格式
            file_name = '{} - {}'.format(song_singer,song_title)
            # 先判断是否存在，然后再获取下载地址，减少请求，反之请求频繁
            # 确保目录存在  
            if not os.path.exists(save_url):  
                os.makedirs(save_url)  
            # 获取文件名  
            is_file_name = os.path.join(save_url, f"{file_name}.mp3")
            # 检查文件是否已存在  
            if os.path.exists(is_file_name):  
                print("[-] 文件已存在： {}".format(is_file_name.replace(save_url,'').replace('\\','')))
                continue
             
            lrc_url,song_url = get_music_download_url(song_id)
            # print('----->',song_title,song_url)
            download_file(file_name, song_url, save_url, 'mp3')
            if(is_download_lrc == 'yes'):   # 是否下载歌词
                download_file(file_name, lrc_url,  save_url, 'lrc')
        except Exception as e:
            print(e)
         
        #TODO 针对此脚本，这个网站有限流，目前休眠20秒不太好使，可以调长点，测试下来 30秒没啥问题
        # 休眠 
        time.sleep(30)
 
if __name__ == '__main__':
    main()