搜房网房价小爬虫

本文仅作技术实践验证,禁止用于其它渠道

数据库mysql

CREATE TABLE `fzfang` (
  `id` bigint(20) unsigned NOT NULL AUTO_INCREMENT,
  `title` varchar(128) DEFAULT NULL,
  `cell` varchar(32) DEFAULT NULL,
  `price` decimal(8,2) DEFAULT NULL,
  `addition` varchar(128) DEFAULT NULL,
  `addtime` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
  `href` varchar(64) DEFAULT NULL,
  UNIQUE KEY `id` (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=39590 DEFAULT CHARSET=utf8
#!/usr/bin/python36
# -*- coding: utf-8 -*-

from requests import *
import time
import pymysql
from redis import Redis
from bs4 import BeautifulSoup


connection = pymysql.connect(host='localhost',
                             user='fzfang',
                             password='fzfang',
                             db='fang',
                             charset='utf8mb4',
                             cursorclass=pymysql.cursors.DictCursor)

cur = connection.cursor()

redis_conn = Redis(host='localhost', port=6379, db=0)
houses = []

for i in range(1,5):
  r = get('https://fz.esf.fang.com/house/i3'+str(i))
  fangs = BeautifulSoup(r.text, 'html5lib').find_all('dl',class_='clearfix')
  for fang in fangs:
    houses.append(fang)
  time.sleep(1)

#print(houses[0])
for h in houses:
  piece = BeautifulSoup('<html>' + str(h) + '</html>','html5lib')
  title = piece.find('span',class_='tit_shop')
  if(title):
    price = piece.find('span',class_='red').find('b')
    cell = piece.find('p',class_='add_shop').find('a')
    addition = piece.find('p',class_='tel_shop')
    addition = ' '.join(addition.text.split())
    href = piece.find_all('a')[0].get('href')
    if(redis_conn.get( title.text.strip() + price.text.strip() + addition )):
      continue
    else:
      redis_conn.setex(title.text.strip() + price.text.strip() + addition, 5*86400, 1)
      cur.execute('insert into fzfang(title,cell,price,addition,href)values("%s","%s","%s","%s","%s")' % (title.text.strip(),cell.text.strip(),price.text.strip(),addition,href))
      connection.commit()

 

  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值