# coding: utf-8
import requests
import os
from lxml import etree
import json
from spider_project.proxies import proxies
import random
class WangYiYunSpider:
'''爬取所有歌单的信息'''
def __init__(self):
self.root_url = 'http://music.163.com'
self.start_url = 'http://music.163.com/discover/playlist'
self.classname_list = [] # 所有小类名
self.class_url = 'http://music.163.com/discover/playlist/?cat={}'
self.class_url_list = [] # 所有小类url
self.playlist_urls = [] # 每一小类所有歌单的url
self.headers = {
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}
self.playlist_info = []
self.classname = ''
self.proxies = proxies
def parse_url(self, url=None):
print(url)
if url is None:
resp = requests.get(self.start_url,
headers=self.headers,
proxies=random.choice(self.proxies)
)
else:
resp = requests.get(url, headers=self.headers)
resp.encodin
python 爬取某音乐平台所有歌单信息
最新推荐文章于 2024-07-30 11:14:47 发布