python爬虫入门 网易云音乐搜索下载
库:
selenium
requests
pyquery
re
代码:
import re
import requests
from selenium import webdriver
from pyquery import PyQuery as pq
import pymongo
def research():
research_song=input('请输入歌曲名字:')
return research_song
def page(name):
music_search = 'https://music.163.com/#/search/m/?s={}'
url=url=music_search.format(name)
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--headless')
browser = webdriver.Chrome(options=chrome_options)
browser.get(url)
browser.switch_to.frame("g_iframe")
html = browser.page_source
browser.close()
return html
def parse_page(html):
doc=pq(html)
items=doc('.item.f-cb.h-flag').items()
for item in items:
music={}
b=item.find('.td.w0 div div a b')
a=item.find('.td.w0 div div a')
singer=item.find('.td.w1 div a')
music['song']=b.attr('title')
music['singer']=singer.text()
music['href']='https://music.163.com/'+a.attr('href')
yield music
def download(song_id,singer,name):
music_api = 'http://music.163.com/song/media/outer/url?id={}.mp3'
music_url=music_api.format(str(song_id))
print(music_url)
headers={
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36'
}
respones=requests.get(music_url,headers=headers)
print(type(respones))
with open(r'%s-%s.mp3' % (singer, name), 'wb') as f:
f.write(respones.content)
# print(type(respones.content))
print('已下载 歌曲:%s-%s.mp3' % (singer, name))
def save_mongo(result):
"""
保存至mongo
:param result: 结果
:return:
"""
try:
client=pymongo.MongoClient(host='localhost',port=27017)
date_wangyiyun=client['wangyiyun']
table_song=date_wangyiyun['song']
if table_song.insert_one(result):
print('保存成功')
except Exception:
print('保存失败')
def main():
name = research()
html = page(name)
results = parse_page(html)
song_ids = re.findall('a href="/song\?id=(.*?)"', html)
i=0
# print(str(song_id))
for result in results:
singer = result['singer']
name = result['song']
save_mongo(result)
print(result)
download(song_ids[i],singer,name)
i=i+1
if __name__ == '__main__':
main()
音乐下载api自己实在找不出,百度的
歌名、歌手等信息在名为g_iframe的iframe中