大部分注释都有
import requests
from lxml import etree
from selenium import webdriver
class Music:
def __init__(self):
self.driver=webdriver.Chrome()
#该url为网易云音乐所有分类首页
self.url="https://music.163.com/#/discover/playlist/"
#该方法为点击选择分类后,跳出具体分类
def go_homeoage(self):
self.driver.get(self.url)
self.driver.switch_to_frame("contentFrame")
self.driver.find_elements_by_id("cateToggleLink")[0].click()
#跳到具体分类页面后,获取每个分类的url地址
def get_url_list(self):
url_list=[]
dd_list= self.driver.find_elements_by_xpath("//div[@id='cateListBox']//dd")
print(len(dd_list))
for dd in dd_list:
aa_list=dd.find_elements_by_xpath(".//a")
for aa in aa_list:
dict={}
dict["category"]=aa.text
dict["href"]=aa.get_attribute("href")
url_list.append(dict)
return url_list
#爬取每个分类的url
def go_every_url_list(self,url_list):
for url in url_list:
address=url["href"]
self.driver.get(address)
self.driver.switch_to_frame("contentFrame")
self.get_content_list()
# print(len(self.driver.find_elements_by_xpath("//a[@class='zbtn znxt']")))
next_page = self.driver.find_elements_by_xpath("//a[@class='zbtn znxt']")
next_page=next_page[0]if len(next_page)>0 else None
#如果有下一页,继续爬取
while(next_page!=None):
# 由于下一页按钮被遮挡,所以采用下述方法点击下一页
self.driver.execute_script('arguments[0].click()', next_page)
#爬取下一页内容
self.get_content_list()
#爬完以后看有没有下一页
next_page = self.driver.find_elements_by_xpath("//a[@class='zbtn znxt']")
next_page = next_page[0] if len(next_page) > 0 else None
self.driver.quit()
#进入到每个分类的url后,爬取具体歌单信息
def get_content_list(self):
li_list=self.driver.find_elements_by_xpath("//ul[@class='m-cvrlst f-cb']//li")
content_list=[]
for li in li_list:
dict={}
dict["title"]=li.find_element_by_xpath("./p[@class='dec']/a").text
dict["author"]=li.find_element_by_xpath(".//a[@class='nm nm-icn f-thide s-fc3']").text
content_list.append(dict)
print(content_list)
#程序逻辑
def run(self):
self.go_homeoage()
url_list=self.get_url_list()
self.go_every_url_list(url_list)
if __name__ == '__main__':
mus=Music()
mus.run()