python frame_Python-爬虫-针对有frame框架的页面

最新推荐文章于 2023-08-09 11:36:04 发布

weixin_39639600

最新推荐文章于 2023-08-09 11:36:04 发布

阅读量160

点赞数

文章标签： python frame

"""http://music.163.com/#/artist/album?id=101988&limit=120&offset=0

爬取上述网址中的专辑信息"""

from selenium importwebdriverfrom urllib.request importurlretrieveimportosfrom bs4 importBeautifulSoupclassDownloadInfo():def __init__(self):

self.url= 'http://music.163.com/#/artist/album?id=101988&limit=120&offset=0'self.basePath= os.path.dirname(__file__)defmakedir(self, name):

path=os.path.join(self.basePath, name)

isExist=os.path.exists(path)if notisExist:

os.makedirs(path)print('The file is created now.')else:print('The file existed.')#切换到该目录下

os.chdir(path)returnpathdefconnect(self, url):

driver=webdriver.PhantomJS()

driver.get(url)print('success')returndriverdefgetFileNames(self, path):

pic_names=os.listdir(path)returnpic_namesdefgetInfo(self):

driver=self.connect(self.url)

driver.switch_to.frame('g_iframe')

path= self.makedir('Infos')

pic_names=self.getFileNames(path)

imgs= driver.find_elements_by_xpath("//div[@class='u-cover u-cover-alb3']/img")

titles= driver.find_elements_by_xpath("//li/p[@class='dec dec-1 f-thide2 f-pre']/a")

dates= driver.find_elements_by_xpath("//span[@class='s-fc3']")

count=0for img inimgs:

album_name=titles[count].text

count+= 1photo_name= album_name.replace('/', '') + '.jpg'

print(photo_name)if photo_name inpic_names:print('图片已下载。')else:

urlretrieve(img.get_attribute('src'), photo_name)for title intitles:print(title.text)for date indates:print(date.text)"""def getInfo(self):

driver = self.connect(self.url)

driver.switch_to.frame('g_iframe')

html = driver.page_source

path = self.makedir('Infos')

pic_names = self.getFileNames(path)

all_li = BeautifulSoup(html, 'lxml').find(id='m-song-module').find_all('li')

for li in all_li:

album_img = li.find('img')['src']

album_name = li.find('p', class_='dec')['title']

album_date = li.find('span', class_='s-fc3').get_text()

print(album_img)

print(album_name)

print(album_date)

photo_name = album_name.replace('/', '') + '.jpg'

if photo_name in pic_names:

print('图片已下载。')

else:

urlretrieve(album_img, photo_name)"""

if __name__ == '__main__':

obj=DownloadInfo()

obj.getInfo()

weixin_39639600

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
python frame_Python-爬虫-针对有frame框架的页面

"""http://music.163.com/#/artist/album?id=101988&limit=120&offset=0爬取上述网址中的专辑信息"""from selenium importwebdriverfrom urllib.request importurlretrieveimportosfrom bs4 importBeautifulSoupclassDownloadInf...
复制链接

扫一扫

评论

被折叠的条评论为什么被折叠?

到【灌水乐园】发言

查看更多评论

添加红包

成就一亿技术人!

hope_wisdom

发出的红包

实付元

使用余额支付

点击重新获取

扫码支付

钱包余额 0

抵扣说明：

1.余额是钱包充值的虚拟货币，按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载，可以购买VIP、付费专栏及课程。