下面是自己写的一个类,实现了输入框输入文本,点击,等待,页面跳转,页面滚动, 获取标签内容,关闭页面等
最后附加一个百度贴吧的例子,可以自动查看贴吧的热议榜的精选帖子
# -*- coding: utf-8 -*-
import time, random
from selenium import webdriver
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
class Sel:
def __init__(self, url):
self.window_num = 0 ## 打来窗口的数量
self.driver = self.__begin(url)
def __begin(self, url):
options = webdriver.ChromeOptions()
# options.add_argument("--headless") ## 增加无界面选项
# options.add_argument('--disable-gpu') ## 如果不加这个选项,有时定位会出现问题
driver = webdriver.Chrome(options=options)
driver.maximize_window() ## 浏览器最大化
# self.driver.set_window_size(480, 800) ## 设置浏览器窗口大小
driver.get(url)
self.window_num += 1
return driver
def scroll(self): ## 页面滚动到最低端
window_height = self.driver.get_window_size()['height']
height = self.driver.find_element_by_tag_name('html').size['height'] - window_height
cur_h = 0
import random, time
while cur_h < height:
range = random.randint(300, 500)
cur_h += range
while range > 0:
range -= 20
self.driver.execute_script("window.scrollBy(0, 20)")
height = self.driver.find_element_by_tag_name('html').size['height'] - window_height
time.sleep(1.5 - random.random())
def get_cookie(self): ## 获取cookies
return self.driver.get_cookies()
def input_text(self, value, text, by=By.CSS_SELECTOR): ## 输入框中输入内容
self.driver.find_element(by, value).send_keys(text)
def wait(self, value, by=By.CSS_SELECTOR, timeout=5): ## 等待直到所操作元素出现
WebDriverWait(self.driver, timeout).until(EC.presence_of_element_located((by, value)))
def click(self, value=None, by=By.CSS_SELECTOR, custom=False, target=None):
'''
点击操作
:param value: 所点击的目标
:param by: 定位目标所用的方法
:param custom: 是否自定义
:param target: 如果自定义,该参数为自定义的目标
:return:
'''
if custom:
target.click()
else:
self.wait(value, by)
try:
self.driver.find_element(by, value).click()
except:
self.driver.execute_script('arguments[0].click()', self.driver.find_element(by, value))
if len(self.driver.window_handles) == self.window_num + 1: ## 查看是否有新页面 如果有跳转到新页面
self.window_num += 1
self.driver.switch_to.window(self.driver.window_handles[-1])
return "click succeed"
def get_text(self, value, by=By.CSS_SELECTOR, attribute_name=None):
'''
获取内容或属性值
:param value: 所定位的目标
:param by: 定位目标所用的方法
:param attribute_name: 若获取目标属性值则需要属性名
:return: 文本内容
'''
self.wait(value, by)
if attribute_name:
return self.driver.find_element(by, value).get_attribute(attribute_name)
else:
return self.driver.find_element(by, value).text
def close(self): ## 关闭当前页面
if len(self.driver.window_handles) > 1:
self.driver.close()
self.driver.switch_to.window(self.driver.window_handles[-1])
self.window_num -= 1
else:
self.driver.close()
def refresh(self): ## 刷新页面
self.driver.refresh()
def quit(self):
self.driver.quit()
if __name__ == '__main__':
url = 'https://tieba.baidu.com/'
account = '******'
passward = '******'
tieba = Sel(url)
tieba.click('.btn_login') # 点击登录
tieba.click('p[title="用户名登录"]')
tieba.input_text('#TANGRAM__PSP_10__userName', account)
tieba.input_text('#TANGRAM__PSP_10__password', passward)
tieba.click('查看榜单', By.LINK_TEXT)
hot_rang = tieba.driver.find_elements_by_css_selector('ul.topic-top-list>li')
for item in hot_rang:
topic = item.find_element_by_css_selector('a.topic-text').text
print("Topic: {}".format(topic))
tieba.click(custom=True, target=item.find_element_by_css_selector('a.topic-text'))
selects = tieba.driver.find_elements_by_css_selector('#selected-feed>ul>li')
for select in selects:
title = select.find_element_by_css_selector("a.title").text
print(" Title: {}".format(title))
tieba.click(custom=True, target=select.find_element_by_css_selector("a.title"))
tieba.scroll()
tieba.close()
tieba.close()
tieba.close()
tieba.quit()