爬取安居客二手房信息
import math
import time
import requests
from lxml import etree
cookies = {
'aQQ_ajkguid': '21677498-CE30-45C7-ADD2-8F8E2ABBB414',
'ctid': '12',
'ajk-appVersion': '',
'seo_source_type': '1',
'id58': 'CrIej2TkgEu5jmn3DMJVAg==',
'sessid': '36361219-6354-2A42-755A-A002E8A7FD41',
'twe': '2',
'fzq_h': '23fafebd91a95c7d87908a5566cf2b98_1701499005283_187ad6ffd93f4107a658339d9932a17f_2018862970',
'fzq_js_anjuke_ershoufang_pc': '3a3132ef8b005a7f08bbc3b23a901dec_1701499333069_23',
'obtain_by': '2',
'xxzl_cid': '63627e5da0da4c5bbd73b1b11675efe0',
'xxzl_deviceid': 'CEcgyp79MmoGL3Ur1OqE8aVW9ce9Xe6TxKD1yCyInA8ggeiP+XR/AfcBRGjoUDej',
}
headers = {
'authority': 'guangzhou.anjuke.com',
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'accept-language': 'zh-CN,zh;q=0.9',
'cache-control': 'no-cache',
# Requests sorts cookies= alphabetically
# 'cookie': 'aQQ_ajkguid=21677498-CE30-45C7-ADD2-8F8E2ABBB414; ctid=12; ajk-appVersion=; seo_source_type=1; id58=CrIej2TkgEu5jmn3DMJVAg==; sessid=36361219-6354-2A42-755A-A002E8A7FD41; twe=2; fzq_h=23fafebd91a95c7d87908a5566cf2b98_1701499005283_187ad6ffd93f4107a658339d9932a17f_2018862970; fzq_js_anjuke_ershoufang_pc=3a3132ef8b005a7f08bbc3b23a901dec_1701499333069_23; obtain_by=2; xxzl_cid=63627e5da0da4c5bbd73b1b11675efe0; xxzl_deviceid=CEcgyp79MmoGL3Ur1OqE8aVW9ce9Xe6TxKD1yCyInA8ggeiP+XR/AfcBRGjoUDej',
'pragma': 'no-cache',
'referer': 'https://guangzhou.anjuke.com/sale/tianhe/p6/',
'sec-ch-ua': '"Chromium";v="118", "Google Chrome";v="118", "Not=A?Brand";v="99"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'sec-fetch-dest': 'document',
'sec-fetch-mode': 'navigate',
'sec-fetch-site': 'same-origin',
'sec-fetch-user': '?1',
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36',
}
def ershoufang():
"""
二手房
:return:
"""
# 这里注释了其他区域
areas = {
"baiyun": "白云",
# "tianhe": "天河",
# "fanyu": "番禺",
# "haizhu": "海珠",
# "huadu": "花都",
# "yuexiu": "越秀",
# "zengcheng": "增城",
# "huangpua": "黄埔",
# "liwan": "荔湾",
# "nansha": "南沙",
# "conghua": "从化",
}
for key, area in areas.items():
for i in range(1, 51):
print(f">>>正在获取-{area}-{i}页")
response = requests.get(f'https://guangzhou.anjuke.com/sale/{key}/p{i}/', cookies=cookies, headers=headers,
)
html_text = etree.HTML(response.text)
divs = html_text.xpath('//*[@class="property-content"]')
for div in divs:
title = ''.join(div.xpath('./div/div[@class="property-content-title"]/h3/@title'))
res = {
"城市": "广州",
"区": area,
'标题': title,
'楼盘': ''.join(div.xpath('./div[@class="property-content-detail"]/section/div[2]/p[1]/text()')),
'地址': ''.join(div.xpath('./div[@class="property-content-detail"]/section/div[2]/p[2]/span/text()')),
'总价': ''.join(div.xpath('./div[@class="property-price"]/p[1]/span/text()')),
'单价': ''.join(div.xpath('./div[@class="property-price"]/p[2]/text()')),
'户型': ''.join(div.xpath('./div[@class="property-content-detail"]/section/div[1]/p[1]/span/text()')),
'面积': ''.join(div.xpath('./div[@class="property-content-detail"]/section/div[1]/p[2]/text()')).replace(' ', '').replace('\n', ''),
'朝向': ''.join(div.xpath('./div[@class="property-content-detail"]/section/div[1]/p[3]/text()')).replace(' ', '').replace('\n', ''),
'楼层': ''.join(div.xpath('./div[@class="property-content-detail"]/section/div[1]/p[4]/text()')).replace(' ', '').replace('\n', ''),
'年份': ''.join(div.xpath('./div[@class="property-content-detail"]/section/div[1]/p[5]/text()')).replace(' ', '').replace('\n', ''),
'爬取时间': time.time(), # 发布时间
}
print(res)
time.sleep(300)
if __name__ == '__main__':
ershoufang()