概述
利用webdriver和PIL库获取验证码图片(解决动态链接),百度OCR识别,实现网页登录
例:
网址:登录古诗文网
OCR
网址:百度智能云-登录
选择产品->文字识别->通用文字识别
创建应用
得到接口信息
代码实现
安装必要库
pip install selenium
pip3 install pillow
pip install baidu-aip
有时候会缺chardet库,装下就好
pip install chardet
login.py
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from aip import AipOcr
from PIL import Image
import time
def trim_image(l, u, r, d, source, to):
# 左上右下
bbox = (l, u, r, d)
img = Image.open(source)
cp_img = img.crop(bbox)
cp_img.save(to)
def getCode(path):
#百度OCR接口
code = ""
APP_ID = ''
API_KEY = ''
SECRET_KEY = ''
client = AipOcr(APP_ID, API_KEY, SECRET_KEY)
with open(path ,"rb") as f:
img=f.read()
data=client.basicGeneral(img)
for da in data["words_result"]:
code += da["words"]
code = code.replace(" ", "")
return code
# 是否无页面运行
headless = True;
time.sleep(2)
options = webdriver.ChromeOptions()
if(headless == True):
options.add_argument('--headless')
# 忽略ssl证书
options.add_argument('--ignore-ssl-errors=yes')
options.add_argument('--ignore-certificate-errors')
driver = webdriver.Chrome(options=options)
driver.set_window_size(1920, 1080); # 设置window大小
driver.get('http://') #网页登陆地址
wait = WebDriverWait(driver, 10)
time.sleep(2)
# 获取DOM元素
username_field = driver.find_element(By.XPATH, '//input[@placeholder="请输入账号"]') #根据html信息替换
password_field = driver.find_element(By.XPATH, '//input[@placeholder="请输入密码"]')
captcha_field = driver.find_element(By.XPATH, '//input[@placeholder="请输验证码"]')
login_button = driver.find_element(By.XPATH, '//button[@class="el-button login_confirm_btn el-button--primary el-button--small"]')
screenshot_path = 'C:\\' #截图地址
driver.save_screenshot(screenshot_path) #页面截图
cropped_path = '' #裁剪地址
#裁剪
if(headless == True): #headless和页面模式图片裁剪参数不一样
trim_image(1, 1, 1, 1, screenshot_path, cropped_path) #替换参数
else:
trim_image(2, 2, 2, 2, screenshot_path, cropped_path)
# 验证码识别
captcha_text = getCode(cropped_path)
# 输入
username_field.send_keys('用户名')
password_field.send_keys('密码')
captcha_field.send_keys(captcha_text)
# 登录
time.sleep(1)
login_button.click()
time.sleep(4) # 等待页面加载
if(driver.current_url == '目标网址'):
print("登录成功!")
else:
print("登录失败!! 验证码错误识别!")
# 退出
time.sleep(5)
driver.quit()
ps:能力有限,多多包涵~