安居客爬虫

# -*- coding: UTF-8 -*-
# @Project :dome 
# @Email   : 274695262@qq.com
# @File    :test_ajk.py
# @IDE     :PyCharm 
# @Author  :wangruifeng
# @Time    :2021/06/17 17:34

from selenium import webdriver
import time
import re
from selenium.webdriver.chrome.options import Options
from wt_data import wd_excel
chrome_options = Options()
chrome_options.add_argument('--no-sandbox')#解决DevToolsActivePort文件不存在的报错
chrome_options.add_argument('window-size=1920x3000') #指定浏览器分辨率
chrome_options.add_argument('--disable-gpu') #谷歌文档提到需要加上这个属性来规避bug
chrome_options.add_argument('--hide-scrollbars') #隐藏滚动条, 应对一些特殊页面
chrome_options.add_argument('blink-settings=imagesEnabled=false') #不加载图片, 提升速度
chrome_options.add_argument('--headless') #浏览器不提供可视化页面. linux下如果系统不支持可视化不加这条会启动失败
chrome_options.add_argument('log-level=3')

f = 1
while f < 100:
    d = webdriver.Chrome(chrome_options=chrome_options)
    d.get("https://tianjin.anjuke.com/sale/dongli/p"+str(f)+"/?from=sugg")
    d.implicitly_wait(2)  # 隐式等待
    d.maximize_window()  # 页面最大化
    for i in range(1,61):
        shi = '//*[@id="__layout"]/div/section/section[3]/section[1]/section[2]/div['+str(i)+']/a/div[2]/div[1]/section/div[1]/p[1]'
        size = '//*[@id="__layout"]/div/section/section[3]/section[1]/section[2]/div['+str(i)+']/a/div[2]/div[1]/section/div[1]/p[2]'
        fang = '//*[@id="__layout"]/div/section/section[3]/section[1]/section[2]/div['+str(i)+']/a/div[2]/div[1]/section/div[1]/p[3]'
        ceng = '//*[@id="__layout"]/div/section/section[3]/section[1]/section[2]/div['+str(i)+']/a/div[2]/div[1]/section/div[1]/p[4]'
        jia = '//*[@id="__layout"]/div/section/section[3]/section[1]/section[2]/div['+str(i)+']/a/div[2]/div[2]/p[1]/span[1]'
        danjia = '//*[@id="__layout"]/div/section/section[3]/section[1]/section[2]/div['+str(i)+']/a/div[2]/div[2]/p[2]'
        xiaoqu = '//*[@id="__layout"]/div/section/section[3]/section[1]/section[2]/div['+str(i)+']/a/div[2]/div[1]/section/div[2]/p[1]'
        quyu = '//*[@id="__layout"]/div/section/section[3]/section[1]/section[2]/div['+str(i)+']/a/div[2]/div[1]/section/div[2]/p[2]/span[2]'

        try:
            res1 = d.find_element_by_xpath(size).text
            house_size = re.search("[0-9]+.[0-9]+|[0-9]+", res1).group()
        except:
            house_size = "暂无"

        try:
            res2 = d.find_element_by_xpath(danjia).text
            house_Price = re.search("^[0-9]+", res2).group()
        except:
            house_Price = "暂无"

        Total_price = d.find_element_by_xpath(jia).text
        pattern = d.find_element_by_xpath(shi).text

        try:
            floor = d.find_element_by_xpath(ceng).text
        except:
            floor = "暂无楼层"
        orientation = d.find_element_by_xpath(fang).text
        name = d.find_element_by_xpath(xiaoqu).text
        region = d.find_element_by_xpath(quyu).text
        wd_excel(house_size, house_Price, Total_price, pattern, floor, orientation, name, region)
    print(f)
    f = f+1
    d.close()

在这里插入图片描述

  • 1
    点赞
  • 9
    收藏
    觉得还不错? 一键收藏
  • 2
    评论
好的,以下是爬取安居客房价信息并利用百度地图API查询坐标的Python爬虫代码: ```python import requests from bs4 import BeautifulSoup import json def get_location(city, address): url = 'http://api.map.baidu.com/geocoder/v2/' params = { 'address': address, 'city': city, 'output': 'json', 'ak': '你的百度地图API密钥' } res = requests.get(url, params=params) json_data = json.loads(res.text) if json_data['status'] == 0: location = json_data['result']['location'] return location['lng'], location['lat'] def get_house_price(city, area): url = 'https://{0}.anjuke.com/market/{1}/'.format(city, area) headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'} res = requests.get(url=url, headers=headers) soup = BeautifulSoup(res.text, 'lxml') house_price_list = [] for tr in soup.select('.sale-estate-table tr')[1:]: tds = tr.select('td') house_name = tds[0].text.strip() house_address = tds[1].text.strip() house_price = tds[2].text.strip() house_location = get_location(city, house_address) if house_location: house_price_list.append({ 'name': house_name, 'address': house_address, 'price': house_price, 'location': house_location }) return house_price_list if __name__ == '__main__': city = 'sh' area = 'pudongxinqu' house_price_list = get_house_price(city, area) print(house_price_list) ``` 其中,`get_location`函数用于利用百度地图API查询地址的坐标,`get_house_price`函数用于爬取安居客网站上的房价信息。在`if __name__ == '__main__'`中,我们可以指定城市和区域,通过调用`get_house_price`函数来获取该区域内的房价信息,并将结果打印出来。 注意:在使用本代码前请替换百度地图API密钥。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值