python爬房源信息_Python爬虫:Q房网房源信息

#爬虫项目采取xpath解析

#爬取Q房源网的详情信息并保存为csv文件

#爬取具体内容有:"小区名称", "户型", "面积", "装修", "楼层", "朝向",

#  "售价", "总价/万", "详情"

1、导入模块

import requests

import time

from lxml import etree

import csv

2、#定义spider_page()函数爬取并返回页面信息

def spider_page(url):

headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) \

AppleWebKit/537.36 (KHTML, like Gecko) \

Chrome/70.0.3538.110 Safari/537.36',

'upgrade-insecure-requests': '1',

'cookie':'acw_tc=df6fef1a15477176336286817eeb02a7224b5ac26463f80afbe8cf7952; qchatid=d59ef744-850a-427b-9340-264de69f268b; WINDOW_DEVICE_PIXEL_RATIO=1; _ga=GA1.3.1616010054.1547717677; sid=373c20fd-15cf-452e-aaab-d574fa5756c0; _jzqckmp=1; _gid=GA1.3.1123773018.1550143142; cookieId=5068cade-858f-47cd-935a-cb3a511995ac; CITY_NAME=SHENZHEN; sec_tc=AQAAAA9z2QkSZAAAt9QQ9By0mazxhvEk; acw_sc__v2=5c65796b40cb4b7fcb1e52469c34bf5ad61e042a; JSESSIONID=aaaWmDq43YXm_d82KySJw; _qzja=1.1620652135.1547717679057.1550143142187.1550154096058.1550143487410.1550154096058.0.0.0.8.3; _qzjc=1; _qzjto=5.2.0; Hm_lvt_de678bd934b065f76f05705d4e7b662c=1547717676,1550143142,1550154096; Hm_lpvt_de678bd934b065f76f05705d4e7b662c=1550154096; _dc_gtm_UA-47416713-1=1; _jzqa=1.827135515274435000.1547717679.1550143142.1550154097.3; _jzqc=1; _jzqx=1.1547717679.1550154097.3.jzqsr=shenzhen%2Eqfang%2Ecom|jzqct=/sale/f2.jzqsr=shenzhen%2Eqfang%2Ecom|jzqct=/sale; _jzqb=1.1.10.1550154097.1; _qzjb=1.1550154096058.1.0.0.0'}

response = requests.get(url, headers=headers)

time.sleep(2)#延迟两秒时间

return response.text

3、#创建csv保存函数

def csv_data(item):

with open('fangwo_info.csv', 'a+', encoding='utf-8', newline='')as csvfile:#newline设置为''可以去点换行

writer = csv.writer(csvfile)

writer.writerow(item)

4、# 解析页面所需内容

def paser_info(url):

# 解析页面

html = spider_page(url)

selector = etree.HTML(html)#以构造器的形式返回

house_infos = selector.xpath('//*[@id="cycleListings"]/ul/li')

for house_info in house_infos:

name = house_info.xpath('./div[1]/p[1]/a/text()')[0].split(' ', 1)[0]

xiangq = house_info.xpath('./div[1]/p[1]/a/text()')[0].split(' ', 1)[1]

style = house_info.xpath('./div[1]/p[2]/span[2]/text()')[0]

area = house_info.xpath('./div[1]/p[2]/span[4]/text()')[0]

decotored = house_info.xpath('./div[1]/p[2]/span[6]/text()')[0]

louceng = house_info.xpath('./div[1]/p[2]/span[8]/text()')[0].strip()

chaoxiang = house_info.xpath('./div[1]/p[2]/span[10]/text()')[0]

total = house_info.xpath('./div[2]/span[1]/text()')[0]

price = house_info.xpath('./div[2]/p/text()')[0]

info = [name, style, area, decotored, louceng, chaoxiang, price, total, xiangq]

csv_data(info)

print("正在爬取", name)#编辑器里打开显示爬取

5、#创建主函数

def main():

# 添加csv标题头

info_title = ["名称", "户型", "面积", "装修", "楼层", "朝向", "售价", "总价/万", "详情"]

csv_data(info_title)

urls = ['https://shenzhen.qfang.com/sale/f%s' % x for x in range(1, 10)]

for url in urls:

paser_info(url)

6、# 调用函数运行

if __name__ == '__main__':

main()

最后爬取结果如下:

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值