"""http://music.163.com/#/artist/album?id=101988&limit=120&offset=0
爬取上述网址中的专辑信息"""
from selenium importwebdriverfrom urllib.request importurlretrieveimportosfrom bs4 importBeautifulSoupclassDownloadInfo():def __init__(self):
self.url= 'http://music.163.com/#/artist/album?id=101988&limit=120&offset=0'self.basePath= os.path.dirname(__file__)defmakedir(self, name):
path=os.path.join(self.basePath, name)
isExist=os.path.exists(path)if notisExist:
os.makedirs(path)print('The file is created now.')else:print('The file existed.')#切换到该目录下
os.chdir(path)returnpathdefconnect(self, url):
driver=webdriver.PhantomJS()
driver.get(url)print('success')returndriverdefgetFileNames(self, path):
pic_names=os.listdir(path)returnpic_namesdefgetInfo(self):
driver=self.connect(self.url)
driver.switch_to.frame('g_iframe')
path= self.makedir('Infos')
pic_names=self.getFileNames(path)
imgs= driver.find_elements_by_xpath("//div[@class='u-cover u-cover-alb3']/img")
titles= driver.find_elements_by_xpath("//li/p[@class='dec dec-1 f-thide2 f-pre']/a")
dates= driver.find_elements_by_xpath("//span[@class='s-fc3']")
count=0for img inimgs:
album_name=titles[count].text
count+= 1photo_name= album_name.replace('/', '') + '.jpg'
print(photo_name)if photo_name inpic_names:print('图片已下载。')else:
urlretrieve(img.get_attribute('src'), photo_name)for title intitles:print(title.text)for date indates:print(date.text)"""def getInfo(self):
driver = self.connect(self.url)
driver.switch_to.frame('g_iframe')
html = driver.page_source
path = self.makedir('Infos')
pic_names = self.getFileNames(path)
all_li = BeautifulSoup(html, 'lxml').find(id='m-song-module').find_all('li')
for li in all_li:
album_img = li.find('img')['src']
album_name = li.find('p', class_='dec')['title']
album_date = li.find('span', class_='s-fc3').get_text()
print(album_img)
print(album_name)
print(album_date)
photo_name = album_name.replace('/', '') + '.jpg'
if photo_name in pic_names:
print('图片已下载。')
else:
urlretrieve(album_img, photo_name)"""
if __name__ == '__main__':
obj=DownloadInfo()
obj.getInfo()