Python爬取链家郑州房价并导入Excel

郑州二手房价爬虫【链家】


废话不多说,先贴效果再贴代码

效果:
运行过程

效果图

代码:

# -*- coding: utf-8 -*-
import bs4
import requests
import time  # 引入time,计算下载时间
import xlwings as xw
import os

os.chdir("C:/Users/Haoz/Desktop")#存放文件位置
a=xw.App(visible=True,add_book=False)
wb=a.books.add()
sht = wb.sheets[0]

sht.range('a1').expand('table').value = ['位置','总价','单价','房屋户型','所在楼层','建筑面积','户型结构','套内面积','建筑类型','房屋朝向','建筑结构','装修情况','梯户比例','供暖方式','配备电梯','产权年限','链接']

def open_url(url):
    return requests.get(url, headers=
    {'User-Agent':'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36'}
                        , timeout=10)

host = 'https://zz.lianjia.com/ershoufang/jinshui/pg'
afx='co21sf1a4a5/'

detailurl = set()

count = 1#初始页
start = time.time()
size = 0
q = 100#爬取页数

while count <= q:
    url = host + str(count)+afx
    r = open_url(url)
    soup = bs4.BeautifulSoup(r.text, 'html.parser')
    targets = soup.find_all('a', class_="img")
    for i in targets:
        detailurl.add(i['href'])
    print('\r' + "正在下载:第" + str(count)+'页,'+ "已经下载:" + int(count / q * 100) * "█" + "【" + str(round(float(count / q) * 100, 2)) + "%" + "】", end="")
    count += 1

count1 = 0
chunk_size = 1024  # 每次块大小为1024
content_size = int(len(detailurl))

line=1
for i in detailurl:
    line+=1
    soup1 = bs4.BeautifulSoup(open_url(i).text, 'html.parser')
    s = soup1.find("title").text
    title=[s[s.find('郑州') + 4:-6]]
    price=[soup1.find("span", class_="total").text+'万']
    ym2=[soup1.find("span", class_="unitPriceValue").text]
    IntroContent=[i[4:] for i in list(filter(None, soup1.find_all("div", class_="content")[2].text.split('\n')))]
    sht.range('a'+str(line)).expand('table').value = title+price+ym2+IntroContent+[i]
    size = size + 1
    print('\r' + "已经下载:" + int(size / content_size * 100) * "█" + "【" + str(round(float(size / content_size) * 100, 2)) + "%" + "】",
          end="")
          
wb.save('金水区.xlsx')#文件名
end = time.time()
print("总耗时:" + str(end - start) + "秒")
  • 3
    点赞
  • 8
    收藏
    觉得还不错? 一键收藏
  • 3
    评论
评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值