from selenium import webdriver
from time import sleep
from selenium.webdriver.common.by import By
import pymysql
conn = pymysql.connect(host='localhost',user='root',password='123456',db='huawei')
cursor = conn.cursor()
cursor.execute('create table if not exists lj_house(house_name varchar(255),house_big_class varchar(255),house_small_class varchar(255),house_structure varchar(255),house_size varchar(255),house_direction varchar(255),house_make varchar(255),house_floor varchar(255),house_build varchar(255),house_type varchar(255),house_price varchar(255))')
from selenium.webdriver import ChromeOptions
option = ChromeOptions()
option.add_experimental_option('excludeSwitches',['enable-automation'])
bro = webdriver.Chrome(chrome_options=option)
for page in range(1, 3):
url = 'https://bj.lianjia.com/ershoufang/pg%s/' % page
bro.get(url)
li_list = bro.find_elements(By.XPATH, '//*[@id="content"]/div[1]/ul/li')
"""
//*[@id="content"]/div[1]/ul/li
"""
for li in li_list:
house = li.text.split('\n')
if '必看好房' in house:
house.remove('必看好房')
house_name = house[0]
house_name = house_name.replace(',', ';')
house_class = house[1]
house_class = house_class.split(' - ')
house_big_class = house_class[0]
house_small_class = house_class[-1]
house_message = house[2]
house_message = house_message.split(' | ')
house_structure = house_message[0]
house_size = house_message[1]
house_direction = house_message[2]
house_direction = house_direction.replace(' ', '')
house_make = house_message[3]
house_floor = house_message[4]
if '年' in house_message[5]:
house_build = house_message[5]
else:
house_build = '暂无数据'
house_type = house_message[-1]
for x in house:
if x.isdigit():
house_price = x
sql = "insert into lj_house values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"
program = ((house_name,house_big_class,house_small_class,house_structure,house_size,house_direction,house_make,house_floor,house_build,house_type,house_price),(house_name,house_big_class,house_small_class,house_structure,house_size,house_direction,house_make,house_floor,house_build,house_type,house_price))
cursor.executemany(sql, program)
conn.commit()
cursor.close()
conn.close()