2019python最新爬取链家租房信息

{import requests
from lxml import etree
import time
import pymysql
#创建游标
db = pymysql.connect(host=“”, user=“”, password=“”, db=“xxhouse”, port = 3306)
cursor = db.cursor()

在这里插入代码

headers = {‘‘User-Agent’: ‘Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.75 Safari/537.36’}
url = []
def get_url():
for i in range(1,101):
urls =‘https://sh.lianjia.com/zufang/pg’+ str(i)+’/#contentList’
url.append(urls)

在这里插入代码片

def parse_url(url):
for url in html:
print(html)
req = requests.get(url = html,headers = headers)
res = etree.HTML(req.text)
hrefs = res.xpath(‘// div [@ class =“content__list - item”] / a / @ href’)
for href in hrefs:
h_url =‘https://sh.lianjia.com’+ href
print(h_url)
ret = requests.get(url = h_url ,headers = headers)
rt = etree.HTML(ret.text)
try:
#爬取公寓详情信息
if h_url [23] ==“a”:
titles = rt.xpath(‘// div [@ id =“ aside“] / p /span[1] /text()’)
price = rt.xpath(‘// div [@ id =”aside“] / p / span [2] / text()’)
houser_names = rt.xpath(‘// DIV [@ ID =“span”] /div [2] /ul /li /p[1] /span[1] /text()’)
houser_phones = rt.xpath(‘// div [@ id =“aside”] /div[2]/ul /li/p[3] / text()’)
pictures = rt.xpath(‘// div [@ id =“mySwipe”] // ul [ 1] / div [4] / img / @ data-src’)
desc = rt.xpath(‘// div [@ id =“info”] / p / text())
title = titles [0]
price = prices [0]
picture = pictures [0]
houser_name = houser_names [0]
houser_phone = houser_phones [0]
des = desc [0]
types =’’
area =‘’
orient =‘’
else:
titles = rt.xpath(‘// div [@ class =“content clear w1150”] / p / text()’)
upgrade_times = rt.xpath(‘// div [@ class =“content__subtitle”] /text() ‘)
price = rt.xpath(’// div [@ id =“aside”] / p [1] / span / text()’)
des =‘’。join(rt.xpath(‘// div [@ id =“aside”] / p [2] / i / text()’))
houser_names = rt.xpath(‘/ / div [@ id =“aside”] / ul / li /div/span /@title’)
houser_phones = rt.xpath(‘// p [@ class =“content__aside__list - bottom oneline”] / text()’)
pictures = rt.xpath (‘// div [@ id =“mySwipe”] //ul/div[1] img/@data-src’)
d = rt.xpath(‘// ul [@ class =“content__aside__list”] /p/span/ text()’)
types = d [1]
area = d [2]
orient = d [3]
title = titles [0]
print(title)
upgrade_time = upgrade_times [1]
print(upgrade_time)
price = price [0]
print(price)
print(des)
houser_name = houser_names [0]
print(houser_name)
houser_phone = houser_phones [0]
print(houser_phone)
picture = pictures [0]
print(picture)
print(types)
print(area)
print(orient)
exept IndexError:
pass

def main():
get_url()
parse_url(url=url)

if __ name __ ==’ __ main __ ':
main()

  • 1
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值