获取登陆验证码,需要了解前端的基本构造,css的模块尺寸中间件,需要自己补足一下前端的基本知识能力
上代码
#!/usr/bin/python3
# -*- coding: utf-8 -*-
from selenium import webdriver
from PIL import Image
# import pytesseract
import time
from io import BytesIO
browser = webdriver.Chrome('./chromedriver')
browser.get("http://maoyan.com/films/1203575?_v_=yes")
element = browser.find_element_by_class_name("stonefont")
# 让背景变白色和文字黑色,使它更容易进行识别
browser.execute_script('document.querySelector(".banner").style.background = "white"')
browser.execute_script('document.querySelector(".stonefont").style.color = "black"')
# 截图保存验证码
# 1. 全屏截图
full_img_data = browser.get_screenshot_as_png()
# 2. 创建全屏图片操作对象
full_img = Image.open(BytesIO(full_img_data))
# mac系统 retia 屏幕, 1点 -> 2个像素
scale = 2
left = element.location["x"] * scale
top = element.location["y"] * scale
right = left + element.size["width"] * scale
bottom = top + element.size["height"] * scale
cut_info = (left,top,right,bottom)
print(cut_info)
cut_img = full_img.crop(cut_info)
cut_img.save('12-cut_img.png')
# 交给 tesseract 识别
# --psm 7 设置成单行模式识别
print(pytesseract.image_to_string(cut_img,config='--psm 7'))
time.sleep(5)
browser.quit()