我们把之前写的代码都封装成类
import requests
from lxml import etree
class Wangyiyun(object):
def __init__(self, base_url):
self.content = self.request_url(base_url)
self.parse_url()
def request_url(self, base_url):
headers = {
'user-agent': "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36"}
response = requests.get(base_url, headers)
content = response.content.decode('utf-8')
tree = etree.HTML(content)
return tree
def parse_url(self):
group_list = self.content.xpath('//div[@class="blk"]')
for group in group_list:
group_name = group.xpath('.//a/text()')
group_link = group.xpath('.//a/@href')
for i, j in zip(group_name, group_link):
link = 'https://music.163.com' + j
singer_content = self.request_url(link)
singer_url = singer_content.xpath('//ul[@class="n-ltlst f-cb"]/li[position()>1]/a/@href')
for url in singer_url:
full_url = 'https://music.163.com' + url
print(full_url)
singer_info = self.request_url(full_url)
li_list = singer_info.xpath('//div[@class="m-sgerlist"]/ul/li/a/text()')
for li in li_list:
print(li)
if __name__ == '__main__':
base_url = 'https://music.163.com/discover/artist'
Wangyiyun(base_url)