网易云音乐分类爬取下载
通过对网易云歌单进行request请求,分析单个歌单所包含的音乐信息,再进行爬取下载
效果演示
源码演示
import datetime
import os
import random
import time
import django
from asyncio.windows_events import NULL
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'music_system.settings')
django.setup()
import requests
import re
from multiprocessing import Pool
import urllib
import sqlite3
from info_manage.models import CategoryModel,MusicModel
from bs4 import BeautifulSoup
data= ['全部']
data1 = ['流行', '摇滚', '民谣', '电子', '舞曲', '说唱', '轻音乐', '爵士', '乡村', 'R&B/Soul', '古典', '民族', '英伦', '金属', '朋克', '蓝调', '雷鬼',
'世界音乐', '拉丁', 'New Age', '古风', '后摇', 'Bossa Nova']
data2 = ['清晨', '夜晚', '学习', '工作', '午休', '下午茶', '地铁', '驾车', '运动', '旅行', '散步', '酒吧']
data3 = ['怀旧', '清新', '浪漫', '伤感', '治愈', '放松', '孤独', '感动', '兴奋', '快乐', '安静', '思念']
data4 = ['综艺', '影视原声', 'ACG', '儿童', '校园', '游戏', '70后', '80后', '90后', '网络歌曲', 'KTV', '经典', '翻唱', '吉他', '钢琴', '器乐', '榜单', '00后']
headers = {
'Referer': 'https://music.163.com/',
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.89 "
"Safari/537.36"
}
gobal_type = None
def get_page(url,type_data):
res = requests.get(url, headers=headers)
data = re.findall('<a title="(.*?)" href="/playlist\?id=(\d+)" class="msk"></a>', res.text)
print(data)
pool = Pool(processes=4)
pool.map(get_songs, [(d, type_data) for d in data[:len(data) - 1]])
print("下载完毕!")
def get_songs(args):
data,type_data = args
# print(data[1])
playlist_url = "https://music.163.com/playlist?id=%s" % data[1]
time.sleep(random.randint(0, 3))
res = requests.get(playlist_url, headers=headers)
for i in re.findall(r'<a href="/song\?id=(\d+)">(.*?)</a>', res.text):
try:
# print("Downing--" + i[1])
ID = i[0]
# print(ID)
url = 'https://music.163.com/song/media/outer/url?id='
req = requests.get(url + ID, headers=headers, allow_redirects=False)
req1 = requests.get(url='https://music.163.com/song?id={}'.format(ID), headers=headers)
soup = BeautifulSoup(req1.text, 'html.parser')
# 查找包含图片链接的 meta 标签
image_meta_tag = soup.find("meta", property="og:image")
# 提取图片链接
if image_meta_tag:
image_url = image_meta_tag.get("content")
# print("图片链接:", image_url)
response = requests.get(image_url)
# 检查响应状态码
if response.status_code == 200:
# 提取图片数据
image_data = response.content
# 指定保存图片的文件路径
file_path = "media/image/{}.jpg".format(ID) # 请替换为您希望保存的文件路径
# print(file_path)
# 将图片数据写入文件
with open(file_path, "wb") as file:
file.write(image_data)
print("图片已保存到:", file_path)
else:
print("无法下载图片:", response.status_code)
else:
print("未找到图片链接")
# 提取歌手信息
artist_p = soup.find("p", class_="des s-fc4")
artists = None
if artist_p:
# 在 span 标签中查找歌手信息
artist_span = artist_p.find("span")
if artist_span:
# 提取歌手信息
artists = artist_span["title"]
print("歌手信息:", artists)
else:
print("未找到歌手信息")
#歌词信息
req1 = requests.get(url='http://music.163.com/api/song/lyric?id={}&lv=-1&kv=-1&tv=-1'.format(ID),
headers=headers)
song_content=req1.json()['lrc']['lyric']
# print(req.headers)
# print(req.text)
musicLink = req.headers['Location']
if (len(musicLink) > 30):
category_id = savesql_category(name=type_data)
savesql(name=i[1],author=artists, image='{}.jpg'.format(ID),file='{}.mp3'.format(ID),content=song_content,category_id=category_id)
# print(i[1])#这个是歌曲名字
# print(musicLink)#这个是歌曲链接
# urllib.request.urlretrieve(musicLink, 'C:\\网易云\\' + i[1] + '.mp3')
# print("Dend Downing + " + i[1])
response = requests.get(musicLink)
# 检查响应状态码
if response.status_code == 200:
# 提取 MP3 文件数据
mp3_data = response.content
# 指定保存 MP3 文件的文件路径
file_path = "media/image/{}.mp3".format(ID) # 请替换为您希望保存的文件路径及文件名
# 将 MP3 文件数据写入文件
with open(file_path, "wb") as file:
file.write(mp3_data)
print("MP3文件已保存到:", file_path)
else:
print("无法下载MP3文件:", response.status_code)
except FileNotFoundError:
pass
except OSError:
pass
def savesql(name, author, image,file, content,category_id): # null的设置是为了自增
try:
MusicModel(name=name,file=file,author=author,image=image,create_time=datetime.datetime.now(),content=content,category_id=category_id).save()
except:
print(name,'插入报错')
def savesql_category(name): # null的设置是为了自增
try:
id = CategoryModel.objects.get(name=name).id
except:
CategoryModel(name=name,create_time=datetime.datetime.now()).save()
id = CategoryModel.objects.get(name=name).id
# c.execute('''CREATE TABLE IF NOT EXISTS common_music
return id
if __name__ == '__main__':
# hot_url = "https://music.163.com/discover/playlist/?order=hot"
for a in data2:
# gobal_type = a
print(a)
for i in range(0, 210,35):
# for i in range(1,5):
hot_url = 'https://music.163.com/discover/playlist/?order=hot&cat={}&limit=35&offset='.format(a) + str(i)
get_page(hot_url,a)