house_链家

#!/usr/bin/env python
# -*- coding:utf-8 -*-
from selenium import webdriver
from time import sleep
from selenium.webdriver.common.by import By

# 创建游标用来连接数据库
import pymysql
conn = pymysql.connect(host='localhost',user='root',password='123456',db='huawei')
cursor = conn.cursor()
#执行一个SQL语句
cursor.execute('create table if not exists lj_house(house_name varchar(255),house_big_class varchar(255),house_small_class varchar(255),house_structure varchar(255),house_size varchar(255),house_direction varchar(255),house_make varchar(255),house_floor varchar(255),house_build varchar(255),house_type varchar(255),house_price varchar(255))')

# 实现反监测
from selenium.webdriver import ChromeOptions
option = ChromeOptions()
option.add_experimental_option('excludeSwitches',['enable-automation'])

# 实例化一个浏览器对象
bro = webdriver.Chrome(chrome_options=option)

# 对链家网址发起请求
for page in range(1, 3):

    url = 'https://bj.lianjia.com/ershoufang/pg%s/' % page
    bro.get(url)
    li_list = bro.find_elements(By.XPATH, '//*[@id="content"]/div[1]/ul/li')
    """
    //*[@id="content"]/div[1]/ul/li
    """
    for li in li_list:
        house = li.text.split('\n')
        # print(house)

        if '必看好房' in house:
            house.remove('必看好房')
        # print(house)

        # 获取二手房名称
        house_name = house[0]
        house_name = house_name.replace(',', ';')
        # print(house_name)

        # 获取二手房位置
        house_class = house[1]
        # print(house_class)

        house_class = house_class.split(' - ')
        # 获取二手房小区名称
        house_big_class = house_class[0]

        # 获取二手房地区
        house_small_class = house_class[-1]

        # 获取二手房的信息
        house_message = house[2]
        # print(house_message)
        # 以  |  进行分割二手房信息
        house_message = house_message.split(' | ')

        # 获取二手房户型
        house_structure = house_message[0]
        # print(house_structure)

        # 获取二手房面积
        house_size = house_message[1]  # 面积
        # print(house_size)

        # 获取二手房朝向
        house_direction = house_message[2]
        house_direction = house_direction.replace(' ', '')
        # print(house_direction)

        # 获取二手房装修种类
        house_make = house_message[3]
        # print(house_make)

        # 获取二手房楼层
        house_floor = house_message[4]
        # print(house_floor)

        # 获取二手房年建
        if '年' in house_message[5]:
            house_build = house_message[5]
        else:
            house_build = '暂无数据'
        # print(house_build)

        # 获取二手房楼型
        house_type = house_message[-1]
        # print(house_type)

        # 获取二手房价格
        for x in house:
            if x.isdigit():
                house_price = x

        # print(house_price)

        sql = "insert into lj_house values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"
        program = ((house_name,house_big_class,house_small_class,house_structure,house_size,house_direction,house_make,house_floor,house_build,house_type,house_price),(house_name,house_big_class,house_small_class,house_structure,house_size,house_direction,house_make,house_floor,house_build,house_type,house_price))
        cursor.executemany(sql, program)


# 提交数据,并保存到数据库中
# re = cursor.fetchall()
conn.commit()
cursor.close()
conn.close()
  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值