scrapy爬取kw
import scrapy
import json
class KuwoSpider(scrapy.Spider):
name = 'kuwo'
allowed_domains = ['kuwo.cn']
start_urls = ['http://www.kuwo.cn/api/www/playlist/playListInfo?pid=1082685104&pn=1&rn=60']
def start_requests(self):
kw = KwHeaders()
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36',
'csrf': '3UIQ2GWU3QH',
'Referer': 'http://www.kuwo.cn'
}
cookie = {'kw_token': '3UIQ2GWU3QH'}
for url in self.start_urls:
yield scrapy.Request(url, cookies=cookie, headers=headers, dont_filter=True)
def parse(self, response):
music_list = json.loads(response.text)['data']['musicList']
for music in music_list:
rid = music['rid']
href = 'http://www.kuwo.cn/url?format=mp3&rid={0}&response=url&type=convert_url3'.format(rid)
irc_href = 'http://m.kuwo.cn/newh5/singles/songinfoandlrc?musicId={0}'.format(rid)
music['irc_href'] = irc_href
yield scrapy.Request(
href,
callback=self.parse_music_url,
meta={'music':music.copy()}
)
def parse_music_url(self, response):
'''获取歌的地址'''
item = response.meta['music']
url = json.loads(response.text)['url']
item['music_url'] = url
yield item