2020-08-05

python爬虫入门 网易云音乐搜索下载

库:

            selenium
            requests
            pyquery
            re

代码:

import re
import  requests
from selenium import webdriver
from pyquery import  PyQuery as pq
import pymongo
def research():
    research_song=input('请输入歌曲名字:')
    return research_song

def page(name):
    music_search = 'https://music.163.com/#/search/m/?s={}'
    url=url=music_search.format(name)
    chrome_options = webdriver.ChromeOptions()
    chrome_options.add_argument('--headless')
    browser = webdriver.Chrome(options=chrome_options)
    browser.get(url)
    browser.switch_to.frame("g_iframe")
    html = browser.page_source
    browser.close()
    return html



def parse_page(html):

    doc=pq(html)
    items=doc('.item.f-cb.h-flag').items()
    for item in items:
        music={}
        b=item.find('.td.w0 div div a b')
        a=item.find('.td.w0 div div a')
        singer=item.find('.td.w1  div a')
        music['song']=b.attr('title')
        music['singer']=singer.text()
        music['href']='https://music.163.com/'+a.attr('href')
        yield music

def download(song_id,singer,name):
    music_api = 'http://music.163.com/song/media/outer/url?id={}.mp3'
    music_url=music_api.format(str(song_id))
    print(music_url)
    headers={
        'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36'
    }
    respones=requests.get(music_url,headers=headers)
    print(type(respones))
    with open(r'%s-%s.mp3' % (singer, name), 'wb') as f:
        f.write(respones.content)
        # print(type(respones.content))
        print('已下载 歌曲:%s-%s.mp3' % (singer, name))




def save_mongo(result):
    """
    保存至mongo
    :param result: 结果
    :return:
    """
    try:
        client=pymongo.MongoClient(host='localhost',port=27017)
        date_wangyiyun=client['wangyiyun']
        table_song=date_wangyiyun['song']
        if table_song.insert_one(result):
            print('保存成功')
    except Exception:
        print('保存失败')








def main():

    name = research()
    html = page(name)
    results = parse_page(html)
    song_ids = re.findall('a href="/song\?id=(.*?)"', html)
    i=0
    # print(str(song_id))
    for result in results:
        singer = result['singer']
        name = result['song']
        save_mongo(result)
        print(result)

        download(song_ids[i],singer,name)
        i=i+1


if __name__ == '__main__':
    main()

音乐下载api自己实在找不出,百度的
歌名、歌手等信息在名为g_iframe的iframe中

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值