【Python——爬取网易云相关歌曲信息】

import re
import requests
import Proxy

headers = {省略}
cookies1 = {省略} 
cookies2 = {省略} 
cookies3 = {省略} 
cookies4 = {省略} 
proxies = Proxy.get_ip()#封装
url1 = 'https://music.163.com/discover/artist'
response1 = requests.get(url=url1, proxies=proxies, headers=headers, cookies=cookies1)
content1 = response1.content.decode()
# with open('wyy.txt', 'w',encoding='utf8') as f:
#     f.write(content)
datas = re.findall('href="([^"]+)"[^>]*>([^<]+)<', content1)[6:21]

for data in range(0, len(datas)):
    url2 = 'https://music.163.com' + datas[data][0]
    response2 = requests.get(url=url2, proxies=proxies, headers=headers, cookies=cookies2)
    content2 = response2.content.decode()
    singers = re.findall('<a class="f-tdn" href="(.*?)" title="(.*?)">', content2)

    for singer in range(0, len(singers)):
        url3 = 'https://music.163.com' + singers[singer][0]
        response3 = requests.get(url=url3, proxies=proxies, headers=headers, cookies=cookies3)
        content3 = response3.content.decode()
        results = re.findall('<a href="(.*?)" hidefocus="true" class="u-btn2 u-btn2-1"><i>查看歌手页</i></a>', content3)

        url4 = 'https://music.163.com' + results[0]
        response4 = requests.get(url=url4, proxies=proxies, headers=headers, cookies=cookies4)
        content4 = response4.content.decode()
        songs = re.findall(r'<li><a href="/song\?id=[0-9]+">(.*?)</a></li>', content4)
        print(f'{datas[data][1]}---{singers[singer][1]}---{songs}')
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值