这里采用for循环爬取前5页,但不知道为什么只能爬前两页,后面会出现异常,先放在这里吧,再查查资料解决
from selenium import webdriver
import time
driver = webdriver.Firefox(executable_path=r'C:\Users\lenovo\Desktop\geckodriver.exe')
for i in range(0, 5):
link = "https://www.airbnb.cn/s/shenzhen-China/homes?items_offset=" + str(i * 18) + "&map_toggle=false"
print(link)
driver.get(link)
for house in driver.find_elements_by_css_selector('div._gig1e7'):
try:
comment = house.find_element_by_css_selector('span._69pvqtq')
comment = comment.text
except:
comment = 0
price = house.find_element_by_css_selector('span._1d8yint7')
price = price.text.replace('每晚', "")
name = house.find_element_by_css_selector('div._qrfr9x5')
name = name.text
details = house.find_element_by_css_selector('div._1etkxf1').find_element_by_tag_name('span')
details = details.text
house_type = details.split(" · ")[0]
bed_num = details.split(" · ")[1]
print(comment, price, name, house_type, bed_num)
这是报的异常
selenium.common.exceptions.StaleElementReferenceException: Message: The element reference of <div class="_gig1e7"> is stale; either the element is no longer attached