python 爬取自如租房的租房数据
#!/usr/bin/python # -*- coding: UTF-8 -*- import re import requests import pytesseract from PIL import Image from selenium import webdriver from fake_useragent import UserAgent from lxml import etree from urllib import parse import pandas as pd from datetime import datetime ua = UserAgent() headers = {"User-Agent": ua.random, "Referer": "http://gz.ziroom.com/"} class ZiRoom(object): def __init__(self): self.driver = webdriver.Chrome() self.all_data = [] def get_content(self, name): div_list = self.driver.find_elements_by_xpath('//*[@id="houseList"]/li') number = self.get_image_number() print(number) for div in div_list[1:]: try: price_list = [] # 如果网页中的值不存在 则可能会存在部分数据丢失 也就是空数据丢失 基本不会有什么影响 for i in range(2, 6): start_price = \