import requests
from lxml import etree
# url = 'https://music.163.com/#/playlist?id=5017583325'
url = 'https://music.163.com/playlist?id=5017583325' #页面进行伪装去F12找到url
headers={
'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36'
}
def get_content(url):
#1、请求要抓取的网站 - {获取网络返回的数据}
html=requests.get(url,headers=headers).content.decode('utf-8')
# print(html)
#加载
tree = etree.HTML(html) #提取数据
"""
href="/song?id=1450025246"
1450025246
https://m10.music.126.net/20200710223628/9374c032dcb478c7876df254c07d7a8f/yyaac/obj/wonDkMOGw6XDiTHCmMOi/2694770763/b19f/7299/0f78/7788084b5a1f457dd6125a3722a639f9.m4a
xpath参数:
// :根目录
[] :谓语 -条件
/ : 单个元素
@ : 提取这个元素
"""
url_music = tree.xpath('//ul[@class="f-hide"]/li/a/@href')
name = tree.xpath('//ul[@class="f-hide"]/li/a/text()')
# print(url_music,name)
# http://music.163.com/song/media/outer/url?id= #音乐外链
for index,item in enumerate(url_music):
url_id = item.split('=')[-1]
file_name = name[index]
print(url_id,file_name)
music_base='http://music.163.com/song/media/outer/url?id=%s'%url_id
print(music_base)
file_path=r'D:\Desktop\pystudy\网易云音乐\%s.mp3'%file_name #自建立文件夹的绝对路径
with open(file_path,'wb') as mu:
req = requests.get(music_base,headers=headers)
mu.write(req.content)
get_content(url)
Python网易云歌单爬取(非VIP)
最新推荐文章于 2024-09-28 16:16:51 发布