# -*- coding: UTF-8 -*-
# @Project :dome
# @Email : 274695262@qq.com
# @File :test_ajk.py
# @IDE :PyCharm
# @Author :wangruifeng
# @Time :2021/06/17 17:34
from selenium import webdriver
import time
import re
from selenium.webdriver.chrome.options import Options
from wt_data import wd_excel
chrome_options = Options()
chrome_options.add_argument('--no-sandbox')#解决DevToolsActivePort文件不存在的报错
chrome_options.add_argument('window-size=1920x3000') #指定浏览器分辨率
chrome_options.add_argument('--disable-gpu') #谷歌文档提到需要加上这个属性来规避bug
chrome_options.add_argument('--hide-scrollbars') #隐藏滚动条, 应对一些特殊页面
chrome_options.add_argument('blink-settings=imagesEnabled=false') #不加载图片, 提升速度
chrome_options.add_argument('--headless') #浏览器不提供可视化页面. linux下如果系统不支持可视化不加这条会启动失败
chrome_options.add_argument('log-level=3')
f = 1
while f < 100:
d = webdriver.Chrome(chrome_options=chrome_options)
d.get("https://tianjin.anjuke.com/sale/dongli/p"+str(f)+"/?from=sugg")
d.implicitly_wait(2) # 隐式等待
d.maximize_window() # 页面最大化
for i in range(1,61):
shi = '//*[@id="__layout"]/div/section/section[3]/section[1]/section[2]/div['+str(i)+']/a/div[2]/div[1]/section/div[1]/p[1]'
size = '//*[@id="__layout"]/div/section/section[3]/section[1]/section[2]/div['+str(i)+']/a/div[2]/div[1]/section/div[1]/p[2]'
fang = '//*[@id="__layout"]/div/section/section[3]/section[1]/section[2]/div['+str(i)+']/a/div[2]/div[1]/section/div[1]/p[3]'
ceng = '//*[@id="__layout"]/div/section/section[3]/section[1]/section[2]/div['+str(i)+']/a/div[2]/div[1]/section/div[1]/p[4]'
jia = '//*[@id="__layout"]/div/section/section[3]/section[1]/section[2]/div['+str(i)+']/a/div[2]/div[2]/p[1]/span[1]'
danjia = '//*[@id="__layout"]/div/section/section[3]/section[1]/section[2]/div['+str(i)+']/a/div[2]/div[2]/p[2]'
xiaoqu = '//*[@id="__layout"]/div/section/section[3]/section[1]/section[2]/div['+str(i)+']/a/div[2]/div[1]/section/div[2]/p[1]'
quyu = '//*[@id="__layout"]/div/section/section[3]/section[1]/section[2]/div['+str(i)+']/a/div[2]/div[1]/section/div[2]/p[2]/span[2]'
try:
res1 = d.find_element_by_xpath(size).text
house_size = re.search("[0-9]+.[0-9]+|[0-9]+", res1).group()
except:
house_size = "暂无"
try:
res2 = d.find_element_by_xpath(danjia).text
house_Price = re.search("^[0-9]+", res2).group()
except:
house_Price = "暂无"
Total_price = d.find_element_by_xpath(jia).text
pattern = d.find_element_by_xpath(shi).text
try:
floor = d.find_element_by_xpath(ceng).text
except:
floor = "暂无楼层"
orientation = d.find_element_by_xpath(fang).text
name = d.find_element_by_xpath(xiaoqu).text
region = d.find_element_by_xpath(quyu).text
wd_excel(house_size, house_Price, Total_price, pattern, floor, orientation, name, region)
print(f)
f = f+1
d.close()
安居客爬虫
最新推荐文章于 2024-04-08 13:45:22 发布