网易云爬取首页歌单里的所有歌曲
前言:本文章仅供个人参考使用,非商用用途,其中参考了其他的文献资料,如有不妥之处,请联系本人邮箱:wurenjie8@163.com
思路:通过首页URL获取所有首页的歌单的ID,在通过分析歌单获取里面的每一首歌的id,再进行下载保存音频文件。
from bs4 import BeautifulSoup
import requests
import os
url = 'https://music.163.com/discover'
headers = {
'Cookie': '_iuqxldmzr_=32; _ntes_nnid=7eb51552d5d4478669c6c5ec6f12dfff,1621428628585; _ntes_nuid=7eb51552d5d4478669c6c5ec6f12dfff; NMTID=00On2_Ho1af1EoCUkYhkbLrH1zSPMMAAAF5hK1rbw; WEVNSM=1.0.0; WM_TID=AFs65iudA4JBVFVQAUc%2Fw8UliPI5vssq; JSESSIONID-WYYY=t93PiQv9bfQOwYSSSGeeKO45tPi0lVlsBvPgd6ol0QR8VISe7uGRvB6bRKb33rapggo1Tfv9wjq36jlYui9i02E%2Bsz9dSXyKgvYTAFljJJTJ%5CsaXvQNcm5TToVBMAdHmOgq2%2Fn8ogBOjnaZ3pjFeFFrCsme89otbw%2Bv4iIDUPGEnxdxH%3A1622982313933; WNMCID=zzcmaq.1622980514474.01.0; WM_NI=eNTwP1i3Cpx1XXPuRw20m%2BvZpgPt453OmGlHTjLHuWP1OvzER0VsiQz38aOXVSjdTzU209BEdoJ5HO1sc4XICH8xrGyF7TaTVOpbEM5uSqP9fRi4Nh25pNu1jdr%2FkZjbaVg%3D; WM_NIKE=9ca17ae2e6ffcda170e2e6eea3f269aa8d8287d443b78e8eb3d45e839b8fbaaa3beda999b4ca46ab8db6a3f62af0fea7c3b92aa7929c98d33babed8d91fb3b9c88a096d17df2f0a3ccf552b4b9a095b73ff191a3a8ef4498b684a9f85d94ef8197f94f92f09a94ec5bbaedf882ce54968da685d77297ee00dac14fa8b186d6cb45f3adadd5db438198f899f14dbb9af9b5e4609691c083c847f7ab9d92d333a68ea7d5cd64f2ebc0aff83baabcbed0ea52aabeafb7e637e2a3',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36'
}
response = requests.request('get', url=url, headers=headers, timeout=30)
print(response.status_code)
home_html = response.text
soup = BeautifulSoup(home_html, 'lxml')
span1 = soup.findAll('a', attrs={"class": "tit s-fc0"})
data = {}
for href in span1:
a_href = href.attrs['href']
a_title = href.attrs['title']
if a_href[:12] == '/playlist?id':
data[a_title] = a_href[9:]
print(data)
my_key = input("请输入歌单名如(我字字皆你,你却句句非我):")
if my_key in data.keys():
url = 'https://music.163.com/playlist' + data[my_key]
print(url)
response = requests.request('get', url=url, headers=headers, timeout=30)
html = response.text
dir_path = os.path.abspath('.') + '/music_list/'
if not os.path.exists(dir_path):
os.mkdir(dir_path)
soup = BeautifulSoup(html, 'lxml')
ul = soup.find('ul', attrs={"class": "f-hide"})
a = ul.findAll('a')
url_dict = {}
for i in a:
href = i.attrs['href']
music_name = i.string
url = "http://music.163.com/song/media/outer/url" + href[5:]
url_dict[music_name] = url
for music_name in url_dict:
url = url_dict[music_name]
print(url)
response = requests.get(url, headers=headers, timeout=20)
with open(dir_path + "{}.mp3".format(music_name), 'wb') as f:
f.write(response.content)
print(music_name, '下载成功')
print("所有歌曲以下载完成!")
else:
print("您输入的歌单名并不存在哦!")