本文仅作技术实践验证,禁止用于其它渠道
数据库mysql
CREATE TABLE `fzfang` (
`id` bigint(20) unsigned NOT NULL AUTO_INCREMENT,
`title` varchar(128) DEFAULT NULL,
`cell` varchar(32) DEFAULT NULL,
`price` decimal(8,2) DEFAULT NULL,
`addition` varchar(128) DEFAULT NULL,
`addtime` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
`href` varchar(64) DEFAULT NULL,
UNIQUE KEY `id` (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=39590 DEFAULT CHARSET=utf8
#!/usr/bin/python36
# -*- coding: utf-8 -*-
from requests import *
import time
import pymysql
from redis import Redis
from bs4 import BeautifulSoup
connection = pymysql.connect(host='localhost',
user='fzfang',
password='fzfang',
db='fang',
charset='utf8mb4',
cursorclass=pymysql.cursors.DictCursor)
cur = connection.cursor()
redis_conn = Redis(host='localhost', port=6379, db=0)
houses = []
for i in range(1,5):
r = get('https://fz.esf.fang.com/house/i3'+str(i))
fangs = BeautifulSoup(r.text, 'html5lib').find_all('dl',class_='clearfix')
for fang in fangs:
houses.append(fang)
time.sleep(1)
#print(houses[0])
for h in houses:
piece = BeautifulSoup('<html>' + str(h) + '</html>','html5lib')
title = piece.find('span',class_='tit_shop')
if(title):
price = piece.find('span',class_='red').find('b')
cell = piece.find('p',class_='add_shop').find('a')
addition = piece.find('p',class_='tel_shop')
addition = ' '.join(addition.text.split())
href = piece.find_all('a')[0].get('href')
if(redis_conn.get( title.text.strip() + price.text.strip() + addition )):
continue
else:
redis_conn.setex(title.text.strip() + price.text.strip() + addition, 5*86400, 1)
cur.execute('insert into fzfang(title,cell,price,addition,href)values("%s","%s","%s","%s","%s")' % (title.text.strip(),cell.text.strip(),price.text.strip(),addition,href))
connection.commit()