selenium模拟登陆(智慧南工)

阿宇阿星

已于 2023-06-05 12:19:57 修改

阅读量326

点赞数

分类专栏：笔记文章标签： selenium python 测试工具

于 2023-06-05 11:40:15 首次发布

yu879499

本文链接：https://blog.csdn.net/m0_53192838/article/details/131044307

版权

笔记专栏收录该内容

4 篇文章 0 订阅

订阅专栏

selenium模拟登陆

思路

使用selenium去模拟登陆比发送请求的方式更简单，所以本文采用selenium自动化模拟登陆，智慧南工(南京工业大学校园内网登陆验证界面)在更新过后，加入了验证码验证，故本文使用了深度学习的PaddleOCR进行验证码识别，识别准确率在百分之八十左右，作者在后期也会进行验证码图像处理，增加准确率。
而验证码识别思路，我提供两个：1.Tesseract识别pytesseract 2.深度学习网络识别。作者前期使用了第一个思路进行识别，但发现识别效果超级差，故改用了飞桨的OCR模型。
对于验证码的保存也有两种思路：1.找到验证码链接，直接使用request请求下载图片2.使用selenium的屏幕截图，并通过定位验证码的坐标和大小，后期截取验证码(需注意电脑默认的缩放比)。
仅供学习！！！

代码

由于实现过程较简单，故直接上代码了，selenium的环境什么的需要自己去配置。

from selenium import webdriver
import time
import cv2 as cv
import numpy as np
from selenium.webdriver.common.by import By
import pytesseract
from PIL import Image
from selenium.webdriver.support.wait import WebDriverWait
from selenium.common.exceptions import TimeoutException
from paddleocr import PaddleOCR 
import operator
class login:
    
    def __init__(self,account,passward) :
        self.account=account
        self.passward=passward
        self.url="https://u.njtech.edu.cn/cas/login?service=https%3A%2F%2Fu.njtech.edu.cn%2Foauth2%2Fauthorize%3Fclient_id%3DOe7wtp9CAMW0FVygUasZ%26response_type%3Dcode%26state%3Dnjtech%26s%3Df682b396da8eb53db80bb072f5745232"
        print("自动化登陆开始！")
        # self.header={"User-Agent":" Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 Edg/113.0.1774.42",
        #              "Accept":" text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
        #              "Content-Type": "text/html;charset=UTF-8"}
    def connect(self):
        options=webdriver.ChromeOptions()
        options.add_experimental_option("detach",True)

        driver=webdriver.Chrome(options=options)
        driver.get(self.url)
        driver.maximize_window()#全屏显示
        # time.sleep(10)
        # driver.implicitly_wait(5)#隐式等待
        
        self.save(driver)
        username=driver.find_element(By.ID,"username")
        username.send_keys(self.account)#账号
        passward=driver.find_element(By.ID,'password')
        passward.send_keys(self.passward)#密码
        code=self.save(driver)
        # code=self.img_ocr()
        print(code)
        yzm=driver.find_element(By.ID,"imgcaptcha")
        yzm.send_keys(code)
        
        
        driver.find_element(By.ID,"channelshow").click()#点击，触发隐式下拉框
        lis=driver.find_element(By.XPATH,'//*[@id="fm1"]/div/div[1]/div[1]/div[5]/div/span[3]')
        lis.click()

        # for li in lis:
        #     if"中国电信"in lis.text:
        #         li.click()
        #         break
        # driver.find_element(By.ID,"channelshow").send_keys("中国电信")
        
        # js='document.querySelector("#channel").removeAttribute("readonly");'#修改只读属性
        # driver.execute_script(js)
        
        # driver.find_element(By.ID,"channel").clear()
        # driver.find_element(By.ID,"channel").send_keys("中国电信")
        button=driver.find_element(By.ID,"login")
        button.click()
        driver.switch_to.window(driver.window_handles[-1])#跳转到新页面
        try:#验证是否登陆成功
            str=driver.find_element(By.XPATH,'/html/body/div/div[1]/div[2]/div')
            if operator.contains(str.text,"网络登陆:登录成功"):
                driver.implicitly_wait(5)#隐式等待
                driver.close()
                return True
        except:
            return False
        # try:
        #     WebDriverWait(driver,10).until(ec.presence_of_element_located(By.PATH,'/html/body/div/div[1]/div[2]/div[contains(.,"网络登陆:登录成功")]'))
        #     time.sleep(10)
        #     driver.close()
        # except TimeoutException:
        #     return False
    
    #selenium截图保存验证码图片,或者使用request直接下载验证码图片
    def save(self,driver):
        print("开始保存图片")
        path='D:\\code\\python\\Reptile\\code\\'
        code=driver.find_element(By.ID,'pc-captcha')
        #截图
        driver.save_screenshot(path+'page.png')
        loc=code.location
        size=code.size
        print("验证码坐标：",loc)
        print("验证码大小:",size)
        img=Image.open(path+'page.png')
        ##裁剪验证码
        #电脑默认缩放比为1.25
        code_img=img.crop((int(loc['x']*1.25),int(loc['y']*1.25),int(1.25*(loc['x']+size['width'])),int(1.25*(loc['y']+size['height']))))
        #使用飞桨OCR进行验证码识别
        ocr = PaddleOCR(use_angle_cls=True, lang="ch") 
        code_img=code_img.save(path+'code_img.png')
        result=ocr.ocr("D:\\code\\python\\Reptile\\code\\code_img.png",det=False)
        text=str(result)[4:8]
        
        print("验证码：",text)
        
        
        time.sleep(10)
        return text
        
        
if __name__=='__main__':
    acc='xxxxxxxxx'#自己账号
    passward='xxxxxx'#自己密码
    login=login(acc,passward)
    
    login.connect()