目录
欢迎关注 『python爬虫』 专栏,持续更新中
欢迎关注 『python爬虫』 专栏,持续更新中
实现效果
签到成功
已经签到过
实现思路
库
import time
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
import sys
等待指定元素出现
def wait_for_element(driver, locator, timeout=30):
"""
#等待元素出现,超时就退出
:param driver: 浏览器对象
:param locator: 选择节点(注意这里默认采用 By.XPATH的方式寻找)
:param timeout: 最长等待时间
:return:节点或者是None
"""
try:
element = WebDriverWait(driver, timeout).until(EC.presence_of_element_located((By.XPATH, locator)))
print("找到元素:", locator,element.text)
return element
except TimeoutException:
print("未找到元素或超时",locator)
return None
等待指定元素出现并点击(请确保元素可以点击)
def wait_click_button(driver, locator, timeout=30):
"""
#等待元素出现并点击元素
:param driver: 浏览器对象
:param locator: 选择节点(注意这里默认采用 By.XPATH的方式寻找)
:param timeout: 最长等待时间
:return:True 等待并点击节点成功~
"""
result=wait_for_element(driver, locator, timeout=timeout)
if result!=None:
element_to_click = driver.find_element(By.XPATH, locator)
element_to_click.click()
return True
else:
return False
无头浏览器节省资源
第一次运行建议你注释掉下面两行代码,观看整个流程,方便你理解,后面为了节约操作消耗的资源,多线程的时候可以用无头浏览器.(简单说就是不生成界面,但是执行浏览器操作)
chrome_options.add_argument('--headless') # 启用无头模式
chrome_options.add_argument('--disable-gpu') # 禁用 GPU 加速
chrome_options = Options()# 创建 ChromeOptions 对象
chrome_options.add_argument('--headless') # 启用无头模式
chrome_options.add_argument('--disable-gpu') # 禁用 GPU 加速
chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3") # 修改 User-Agent 头部信息为 Chrome 浏览器的 User-Agent
# 启动 Chrome 浏览器
driver = webdriver.Chrome(options=chrome_options)
# 打开登录页面
driver.get('https://comicai.ai/signin')
设置窗口大小
这个步骤一般来说不是必要的,主要是因为个别奇葩网页可能会因为大小自适应导致xpath变化.
# # 获取当前窗口大小
# width = driver.execute_script("return document.body.clientWidth")
# height = driver.execute_script("return window.innerHeight")
# print("当前窗口大小为:{}x{}".format(width, height))
driver.set_window_size(800,600)#不设置固定大小,可能会导致xpath变化
用户名和密码登录填表
your_username="xxxxxxxxxxxx@qq.com"
your_password="xxxxxxxxxxxxx"
username_input_locator='//*[@id="app"]/div[1]/div[3]/div/div/input[1]'
password_input_locator='//*[@id="app"]/div[1]/div[3]/div/div/input[2]'
wait_for_element(driver, username_input_locator) #等待加载 输入框后再输入
username_input = driver.find_element(By.XPATH, '//*[@id="app"]/div[1]/div[3]/div/div/input[1]')
password_input = driver.find_element(By.XPATH, '//*[@id="app"]/div[1]/div[3]/div/div/input[2]')
username_input.send_keys(your_username)#在输入框输入用户名
password_input.send_keys(your_password)#在输入框输入密码
login_button_locator= '//*[@id="app"]/div[1]/div[3]/div/div/div[5]/button'
wait_for_element(driver, login_button_locator) #等待加载 输入框后再输入
login_button = driver.find_element(By.XPATH, '//*[@id="app"]/div[1]/div[3]/div/div/div[5]/button')
login_button.click()#点击登录按钮
登录进去后出现广告弹窗的处理
有些时候我们用xpath能找到元素但是点击不了,就是因为这种弹窗阻挡了,所以要处理好每一步的判断.
#等待并点击关掉弹窗
tanchuang_locator='//*[@id="app"]/div[2]/div/div/footer/button[2]/span/span'
wait_click_button(driver,tanchuang_locator)
numbers_element_locator='//*[@id="app"]/div[1]/nav/div/div[1]/button/span'
wait_for_element(driver,numbers_element_locator)#等待宝石数量出现
numbers_element = driver.find_element(By.XPATH, numbers_element_locator)
numbers_text_content = numbers_element.text
print("签到前宝石数量:", numbers_text_content)
进行签到(如何处理可变的xpath)
- 关键在于怎么拿到这里的领取奖励按钮的xpath,这个是可变化的xpath.
在XPath中,使用通配符 * 或 contains() 函数可以帮助我们灵活匹配元素,特别是当元素的属性值不是固定的情况下。下面我会分别介绍如何使用通配符 * 和 contains() 函数。
- 使用通配符 *: 通配符 * 可以匹配任意类型的元素,无论元素的标签名称是什么。例如,如果我们想要匹配某个节点下的所有子节点,可以使用通配符 *。以下是一个示例:
假设我们要匹配元素下的所有直接子元素:
//div/*
这个 XPath 表达式将匹配所有直接位于
元素下的子元素,无论子元素的标签名称是什么。
- 使用 contains() 函数: contains() 函数可以在属性值中搜索指定的字符串。它的用法是 contains(attribute, value),其中 attribute 是要搜索的属性,value 是要匹配的值。下面是一个示例:
假设我们要匹配一个 class 属性包含特定关键词的元素:
//*[contains(@class, 'keyword')]
领取奖励按钮xpath如下,其中[@id="el-id-3448-9"]里的数字会一直变化,每次都不一样,比如一个变式是:
//*[@id="el-id-3448-9"]/div/div[1]/div[2]/button
这里我们用了 * 和contain进行匹配!
get_score_locator = '//*[starts-with(@id, "el-id-") and contains(@id, "-")]/div/div[1]/div[2]/button'
# 等待并点击宝石进入签到界面
qd_numbers_locator = '//*[@id="app"]/div[1]/nav/div/div[1]/button'
wait_click_button(driver,qd_numbers_locator)
# 等待是否已经签到过
# //*[@id="el-id-3448-9"]/div/div[1]/div[2]/button/span/img 没有签到过的xpath
ok_get_score_locator = '//*[starts-with(@id, "el-id-") and contains(@id, "-")]/div/div[1]/div[2]/div[2]/img'
result=wait_click_button(driver, ok_get_score_locator, timeout=5)
if result==False:
print("今日还没有签到过~")
# 等待点击获取奖励
get_score_locator = '//*[starts-with(@id, "el-id-") and contains(@id, "-")]/div/div[1]/div[2]/button'
wait_click_button(driver, get_score_locator)
# # 等待点击确认奖励
accept_locator = '/html/body/div[8]/div/div/footer/button/span/span'
wait_click_button(driver, accept_locator)
time.sleep(5)#等待数据加载
numbers_element_locator = '//*[@id="app"]/div[1]/nav/div/div[1]/button/span'
wait_for_element(driver, numbers_element_locator) # 等待宝石数量出现
numbers_element = driver.find_element(By.XPATH, numbers_element_locator)
numbers_text_content = numbers_element.text
print("签到后宝石数量:", numbers_text_content)
else:
print("今日已经签到过了~")
numbers_element_locator='//*[@id="app"]/div[1]/nav/div/div[1]/button/span'
wait_for_element(driver,numbers_element_locator)#等待宝石数量出现
numbers_element = driver.find_element(By.XPATH, numbers_element_locator)
numbers_text_content = numbers_element.text
print("目前宝石数量:", numbers_text_content)
收尾工作关闭服务
# # 关闭当前窗口但不退出会话(浏览器保留)
# driver.close()
# # 关闭浏览器,彻底退出
driver.quit()
print("签到完成!")
实现代码
import time
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
import sys
def wait_for_element(driver, locator, timeout=30):
"""
#等待元素出现,超时就退出
:param driver: 浏览器对象
:param locator: 选择节点(注意这里默认采用 By.XPATH的方式寻找)
:param timeout: 最长等待时间
:return:节点或者是None
"""
try:
element = WebDriverWait(driver, timeout).until(EC.presence_of_element_located((By.XPATH, locator)))
print("找到元素:", locator,element.text)
return element
except TimeoutException:
print("未找到元素或超时",locator)
return None
def wait_click_button(driver, locator, timeout=30):
"""
#等待元素出现并点击元素
:param driver: 浏览器对象
:param locator: 选择节点(注意这里默认采用 By.XPATH的方式寻找)
:param timeout: 最长等待时间
:return:True 等待并点击节点成功~
"""
result=wait_for_element(driver, locator, timeout=timeout)
if result!=None:
element_to_click = driver.find_element(By.XPATH, locator)
element_to_click.click()
return True
else:
return False
chrome_options = Options()# 创建 ChromeOptions 对象
chrome_options.add_argument('--headless') # 启用无头模式
chrome_options.add_argument('--disable-gpu') # 禁用 GPU 加速
chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3") # 修改 User-Agent 头部信息为 Chrome 浏览器的 User-Agent
# 启动 Chrome 浏览器
driver = webdriver.Chrome(options=chrome_options)
# 打开登录页面
driver.get('https://comicai.ai/signin')
# # 获取当前窗口大小
# width = driver.execute_script("return document.body.clientWidth")
# height = driver.execute_script("return window.innerHeight")
# print("当前窗口大小为:{}x{}".format(width, height))
driver.set_window_size(800,600)#不设置固定大小,可能会导致xpath变化
# 填写用户名和密码并提交登录表单
your_username="xxxxxxxxx@qq.com"
your_password="xxxxxxxxx"
username_input_locator='//*[@id="app"]/div[1]/div[3]/div/div/input[1]'
password_input_locator='//*[@id="app"]/div[1]/div[3]/div/div/input[2]'
wait_for_element(driver, username_input_locator) #等待加载 输入框后再输入
username_input = driver.find_element(By.XPATH, '//*[@id="app"]/div[1]/div[3]/div/div/input[1]')
password_input = driver.find_element(By.XPATH, '//*[@id="app"]/div[1]/div[3]/div/div/input[2]')
username_input.send_keys(your_username)#在输入框输入用户名
password_input.send_keys(your_password)#在输入框输入密码
login_button_locator= '//*[@id="app"]/div[1]/div[3]/div/div/div[5]/button'
wait_for_element(driver, login_button_locator) #等待加载 输入框后再输入
login_button = driver.find_element(By.XPATH, '//*[@id="app"]/div[1]/div[3]/div/div/div[5]/button')
login_button.click()#点击登录按钮
#等待并点击关掉弹窗
tanchuang_locator='//*[@id="app"]/div[2]/div/div/footer/button[2]/span/span'
wait_click_button(driver,tanchuang_locator)
numbers_element_locator='//*[@id="app"]/div[1]/nav/div/div[1]/button/span'
wait_for_element(driver,numbers_element_locator)#等待宝石数量出现
numbers_element = driver.find_element(By.XPATH, numbers_element_locator)
numbers_text_content = numbers_element.text
print("签到前宝石数量:", numbers_text_content)
# 等待并点击宝石进入签到界面
qd_numbers_locator = '//*[@id="app"]/div[1]/nav/div/div[1]/button'
wait_click_button(driver,qd_numbers_locator)
# 等待是否已经签到过
# //*[@id="el-id-3448-9"]/div/div[1]/div[2]/button/span/img 没有签到过的xpath
ok_get_score_locator = '//*[starts-with(@id, "el-id-") and contains(@id, "-")]/div/div[1]/div[2]/div[2]/img'
result=wait_click_button(driver, ok_get_score_locator, timeout=5)
if result==False:
print("今日还没有签到过~")
# 等待点击获取奖励
get_score_locator = '//*[starts-with(@id, "el-id-") and contains(@id, "-")]/div/div[1]/div[2]/button'
wait_click_button(driver, get_score_locator)
# # 等待点击确认奖励
accept_locator = '/html/body/div[8]/div/div/footer/button/span/span'
wait_click_button(driver, accept_locator)
time.sleep(5)#等待数据加载
numbers_element_locator = '//*[@id="app"]/div[1]/nav/div/div[1]/button/span'
wait_for_element(driver, numbers_element_locator) # 等待宝石数量出现
numbers_element = driver.find_element(By.XPATH, numbers_element_locator)
numbers_text_content = numbers_element.text
print("签到后宝石数量:", numbers_text_content)
else:
print("今日已经签到过了~")
numbers_element_locator='//*[@id="app"]/div[1]/nav/div/div[1]/button/span'
wait_for_element(driver,numbers_element_locator)#等待宝石数量出现
numbers_element = driver.find_element(By.XPATH, numbers_element_locator)
numbers_text_content = numbers_element.text
print("目前宝石数量:", numbers_text_content)
# # 关闭当前窗口但不退出会话
# driver.close()
# # 关闭浏览器
driver.quit()
print("签到完成!")
总结
大家喜欢的话,给个👍,点个关注!给大家分享更多计算机专业学生的求学之路!
版权声明:
发现你走远了@mzh原创作品,转载必须标注原文链接
Copyright 2023 mzh
Crated:2023-3-1
欢迎关注 『python爬虫』 专栏,持续更新中
欢迎关注 『python爬虫』 专栏,持续更新中
『未完待续』