python 爬网易云歌手id和歌手姓名

成功爬网易云的关键点在于获取网易云网站的框架源码,打开网易云音乐然后右键查看源码 发现歌手的id源码中找不到 ,观察源码会发现有标签iframe即网页嵌套网页,打开开发者工具,输入歌手id能够定位到歌手的id位置,右击查看框架源码 会发现很多东西藏在框架源码里,寻找框架源码的链接规律,依此遍历。
源码如下:

> import requests
import csv
from bs4 import BeautifulSoup
csvfile = open('F:/music/final_id.csv', 'w', encoding='utf-8-sig')
writer = csv.writer(csvfile)
def get_id(url):
    head = {
        'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
        'Accept-Encoding': 'gzip, deflate, br',
        'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
        'Cache-Control': 'max-age=0',
        'Connection': 'keep-alive',
        'Cookie': '_ntes_nnid=0227c96050f301d8d66f88f08572555d,1541424213366; _ga=GA1.2.1920571757.1541424213; _ntes_nuid=0227c96050f301d8d66f88f08572555d; P_INFO=m15836197228@163.com|1541470304|0|mail163|00&99|CN&1540604878&mail163#CN&null#10#0#0|158228&1||15836197228@163.com; nts_mail_user=15836197228@163.com:-1:1; mail_psc_fingerprint=b1d7cb50dc5d1c8d3abb51a7cc887480; _iuqxldmzr_=32; WM_TID=Egb4gWr696hARQEFRQMsPOh84Yx57b5t; WM_NI=zvz40%2B4jV4yOboEkLCTem0%2BecEEY%2B3H46UYa5zEgmxPGSHlQwEeQlm%2FJ7qW%2FrcLqWvqx9aKa3cB8RyQvqQSy%2Fs%2FVnZydIr2eIMTLZPi2qjJEcgRRZkS%2FanqAH7WlaZq1dkU%3D; WM_NIKE=9ca17ae2e6ffcda170e2e6ee93f153f3b6bc96f73ffc9e8fa2d44e838b8eabb76df3befcd1e442b0b4bdd9d32af0fea7c3b92abaaab6d5b321f8b69ad5fc45a2b18b8ccf4998bba3d1aa6fa19b8bbaaa398cecfab6dc6bf7a68b88f07494999da9ef5bb59babb2ef67ed96f992d625b2bd8492d67eb3a8f782ca6bf1bd8ed9d17393aabfb2d273edade1b7ed52bbbc8793b16f8b939cadd95cabeebf8bc94fa1b99eb3d96191f1af8ff44b8ce9998de452b3efac8cd837e2a3; __remember_me=true; MUSIC_U=a8e5240d1fcea675f80a9d81831663124dc2f6d012674251c111821b5d630155c716b40e34442360d06dd05a8f8bf833797e33104524ed092e53073575b2ca7d4b81de1d1021826bbf122d59fa1ed6a2; __csrf=0b43b89433df1eac4230654c80c768e5; JSESSIONID-WYYY=oTJj9fbCXpo92tZWu4kKYW0UT7DJDMU3%2FSpjnFzoKpsgCtk5V%5CtczcPZi%2BhKWx5985ZvRBxCYnp3lDy8myF%2BDVNwzl94jrcKFQ4mCfNsmlgTyGwljp%5CDBPJQz4o2DfTwuw404NjeGUZE4jA8PsimMNCI9NKP7JVcEoinlns0k%5CW0irif%3A1551073125018',
        'Host': 'music.163.com',
        'Referer': 'https://www.baidu.com/link?url=eE8XnnLMwsWDtppP7ST7Czah8b9hid3z-fdcgIuHNj7&ck=3821.6.62.144.149.137.155.367&shh=www.baidu.com&sht=baiduhome_pg&wd=%E7%BD%91%E6%98%93%E4%BA%91%E9%9F%B3%E4%B9%90&issp=1&f=8&ie=utf-8&rqlang=cn&tn=baiduhome_pg&inputT=4435',
        'Upgrade-Insecure-Requests': '1',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36'
        }
    writer.writerow(('id', 'name'))
    res = requests.get(url, headers=head)
    soup = BeautifulSoup(res.text, 'lxml')
    a = soup.find_all('a', attrs={'class': 'nm nm-icn f-thide s-fc0'})
    for tag in a:
        id = tag['href'].replace('/artist?id=', '')
        name = tag.string
        writer.writerow([id, name])
def main():
    list_id = [1001, 1002, 1003, 2001, 2002, 2003, 6001, 6002, 6003, 7001, 7002, 7003, 4001, 4002, 4003]
    list_ins = [-1, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 0]
    for id in list_id:
        for ins in list_ins:
            url = 'https://music.163.com/discover/artist/cat?id='+str(id)+'&initial='+str(ins)
            get_id(url)
if __name__=="__main__":
    main()

  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值