import time
import requests
import random
import pymysql
from lxml import etree
class AnJuKe():
# 初始化
def __init__(self, url):
self.connect = pymysql.connect(
host = 'localhost',
db = 'pachong',
user = 'root',
password = '12345'
)
self.cursor = self.connect.cursor()#创建游标
self.tree = self.get_tree(url)
self.result_city()
# 判断是否为空
def is_empty(self,data):
if data:
data = data[0]
else:
data = '无信息'
return data
# 得到tree
def get_tree(self, url):
# 代理ip
proxies_list = [{'http': 'http://117.191.11.111:8080'},
{'http': 'http://118.25.104.254:1080'},
{'http': 'http://203.195.168.154:3128'},
{'http': 'http://117.191.11.75:80'},
{'http': 'http://117.191.11.72:80'},]
proxies = random.choice(proxies_list)
headers = {'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.87 Safari/537.36'}
response = requests.get(url, headers = headers, proxies = proxies).text
# print(response)
tree = etree.HTML(response)
return tree
# 获取城市详情
def result_city(self):
# 城市列表
city_list_class = self.tree.xpath('//div[@
安居客爬虫项目,爬取房源,保存mysql数据库,详细代码如下!!!
最新推荐文章于 2022-07-19 23:12:02 发布
本文介绍了如何使用Python爬虫技术抓取安居客网站的房源信息,并详细阐述了数据清洗与存储到MySQL数据库的全过程,包括关键代码示例。
摘要由CSDN通过智能技术生成