python frame_Python-爬虫-针对有frame框架的页面

"""http://music.163.com/#/artist/album?id=101988&limit=120&offset=0

爬取上述网址中的专辑信息"""

from selenium importwebdriverfrom urllib.request importurlretrieveimportosfrom bs4 importBeautifulSoupclassDownloadInfo():def __init__(self):

self.url= 'http://music.163.com/#/artist/album?id=101988&limit=120&offset=0'self.basePath= os.path.dirname(__file__)defmakedir(self, name):

path=os.path.join(self.basePath, name)

isExist=os.path.exists(path)if notisExist:

os.makedirs(path)print('The file is created now.')else:print('The file existed.')#切换到该目录下

os.chdir(path)returnpathdefconnect(self, url):

driver=webdriver.PhantomJS()

driver.get(url)print('success')returndriverdefgetFileNames(self, path):

pic_names=os.listdir(path)returnpic_namesdefgetInfo(self):

driver=self.connect(self.url)

driver.switch_to.frame('g_iframe')

path= self.makedir('Infos')

pic_names=self.getFileNames(path)

imgs= driver.find_elements_by_xpath("//div[@class='u-cover u-cover-alb3']/img")

titles= driver.find_elements_by_xpath("//li/p[@class='dec dec-1 f-thide2 f-pre']/a")

dates= driver.find_elements_by_xpath("//span[@class='s-fc3']")

count=0for img inimgs:

album_name=titles[count].text

count+= 1photo_name= album_name.replace('/', '') + '.jpg'

print(photo_name)if photo_name inpic_names:print('图片已下载。')else:

urlretrieve(img.get_attribute('src'), photo_name)for title intitles:print(title.text)for date indates:print(date.text)"""def getInfo(self):

driver = self.connect(self.url)

driver.switch_to.frame('g_iframe')

html = driver.page_source

path = self.makedir('Infos')

pic_names = self.getFileNames(path)

all_li = BeautifulSoup(html, 'lxml').find(id='m-song-module').find_all('li')

for li in all_li:

album_img = li.find('img')['src']

album_name = li.find('p', class_='dec')['title']

album_date = li.find('span', class_='s-fc3').get_text()

print(album_img)

print(album_name)

print(album_date)

photo_name = album_name.replace('/', '') + '.jpg'

if photo_name in pic_names:

print('图片已下载。')

else:

urlretrieve(album_img, photo_name)"""

if __name__ == '__main__':

obj=DownloadInfo()

obj.getInfo()

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值