参考:https://www.cnblogs.com/yanghui3306/p/14385826.html
图片都是简单处理 (细节)使用的是百度识别图片API
1.使用selenium处理图片
element = driver.find_element_by_id('imgCode') # 定位验证码图片
# 获取验证码图片在网页中的位置
left = int(element.location['x']) # 获取图片左上角坐标x
top = int(element.location['y']) # 获取图片左上角y
right = int(element.location['x'] + element.size['width']) # 获取图片右下角x
bottom = int(element.location['y'] + element.size['height']) # 获取图片右下角y
# 通过Image处理图像
path = current_dir + str(random.random()) + '.png' # 生成随机文件名
driver.save_screenshot(path) # 截取当前窗口并保存图片
im = Image.open(path) # 打开图片
im = im.crop((left, top, right, bottom)) # 截图验证码
im.save(path) # 保存验证码图片
# 使用百度API识别验证码
def get_code():
client = AipOcr(APP_ID, API_KEY, SECRET_KEY) # 百度API文档中提供的方法识别文字
# 由于我处理的验证码图片没有填多的线条,所以直接采用灰度是验证码数字更加清晰,具体的处理方式可根据验证码的实际情况而定
im = Image.open(path)
# 转换为灰度图像
im = im.convert('L')
im.save(path)
# 读取图片,应为百度API中提供的方法参数只能是字节流
with open(path, 'rb')as f:
image = f.read()
# 使用API中提供的方法识别验证码并返回验证码
code = client.basicGeneral(image)
print(code['words_result'][0]['words']) # {'words_result': [{'words': '4TBiD ', 'location': {'top': 1, 'left': 6, 'width': 43, 'height': 13}}], 'log_id': 1358288307112378368, 'words_result_num': 1}
return code['words_result'][0]['words']
2.使用request
注意要使用会话 session机制,为了保证浏览器记住cookie 保持在同一个请求,否则验证码会刷新
conn = requests.Sessoin( # 创建会话
resp = conn.get('https://so.gushiwen.cn/user/login.aspx?from=http://so.gushiwen.cn/user/collect.aspx')
selector = Selector(text=resp.text)
img_url = 'https://so.gushiwen.cn/'+selector.xpath('.//img[@id="imgCode"]/@src').get() # 获取图片的路由
img = conn.get(img_url) # 保持会话请求
filename = str(random.random()) + '.png'
with open(filename, 'wb')as f:
f.write(img.content)
# 为了后面的调用接口识别不报图片格式错误,进行一次图片转换
im = Image.open(filename)
im.save(filename)
# 使用二进制方式读取图片
with open(filename, 'rb')as f:
image = f.read()
data = client.handwriting(image) # diao'yong
# 使用API中提供的方法识别验证码并返回验证码
code = client.basicGeneral(image)
code = code['words_result'][0]['words']
3.预处理图片( 转灰度,二值化,点线降噪)
import cv2
import numpy as np
import pytesseract
from io import BytesIO
from PIL import Image,ImageEnhance
#参考
# https://www.cnblogs.com/qqandfqr/p/7866650.html
# 自适应阀值二值化
def get_dynamic_binary_image(img):
img = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY) #灰值化
# 二值化
img = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 21, 1)
cv2.imwrite('dest.png',img)
return img
# 干扰线降噪
def interference_line(img):
h, w = img.shape[:2]
# !!!opencv矩阵点是反的
# img[1,2] 1:图片的高度,2:图片的宽度
for y in range(1, w - 1):
for x in range(1, h - 1):
count = 0
if img[x, y - 1] > 245:
count = count + 1
if img[x, y + 1] > 245:
count = count + 1
if img[x - 1, y] > 245:
count = count + 1
if img[x + 1, y] > 245:
count = count + 1
if count > 2:
img[x, y] = 255
cv2.imwrite('dest-interference.png',img)
return img
# 去除边框
def clear_border(img):
h, w = img.shape[:2]
for y in range(0, w):
for x in range(0, h):
if y < 2 or y > w - 2:
img[x, y] = 255
if x < 2 or x > h -2:
img[x, y] = 255
cv2.imwrite('dest-clear_border.png',img)
return img
# 点降噪
def interference_point(img, x = 0, y = 0):
"""
9邻域框,以当前点为中心的田字框,黑点个数
:param x:
:param y:
:return:
"""
# todo 判断图片的长宽度下限
cur_pixel = img[x,y]# 当前像素点的值
height,width = img.shape[:2]
for y in range(0, width - 1):
for x in range(0, height - 1):
if y == 0: # 第一行
if x == 0: # 左上顶点,4邻域
# 中心点旁边3个点
sum = int(cur_pixel) \
+ int(img[x, y + 1]) \
+ int(img[x + 1, y]) \
+ int(img[x + 1, y + 1])
if sum <= 2 * 245:
img[x, y] = 0
elif x == height - 1: # 右上顶点
sum = int(cur_pixel) \
+ int(img[x, y + 1]) \
+ int(img[x - 1, y]) \
+ int(img[x - 1, y + 1])
if sum <= 2 * 245:
img[x, y] = 0
else: # 最上非顶点,6邻域
sum = int(img[x - 1, y]) \
+ int(img[x - 1, y + 1]) \
+ int(cur_pixel) \
+ int(img[x, y + 1]) \
+ int(img[x + 1, y]) \
+ int(img[x + 1, y + 1])
if sum <= 3 * 245:
img[x, y] = 0
elif y == width - 1: # 最下面一行
if x == 0: # 左下顶点
# 中心点旁边3个点
sum = int(cur_pixel) \
+ int(img[x + 1, y]) \
+ int(img[x + 1, y - 1]) \
+ int(img[x, y - 1])
if sum <= 2 * 245:
img[x, y] = 0
elif x == height - 1: # 右下顶点
sum = int(cur_pixel) \
+ int(img[x, y - 1]) \
+ int(img[x - 1, y]) \
+ int(img[x - 1, y - 1])
if sum <= 2 * 245:
img[x, y] = 0
else: # 最下非顶点,6邻域
sum = int(cur_pixel) \
+ int(img[x - 1, y]) \
+ int(img[x + 1, y]) \
+ int(img[x, y - 1]) \
+ int(img[x - 1, y - 1]) \
+ int(img[x + 1, y - 1])
if sum <= 3 * 245:
img[x, y] = 0
else: # y不在边界
if x == 0: # 左边非顶点
sum = int(img[x, y - 1]) \
+ int(cur_pixel) \
+ int(img[x, y + 1]) \
+ int(img[x + 1, y - 1]) \
+ int(img[x + 1, y]) \
+ int(img[x + 1, y + 1])
if sum <= 3 * 245:
img[x, y] = 0
elif x == height - 1: # 右边非顶点
sum = int(img[x, y - 1]) \
+ int(cur_pixel) \
+ int(img[x, y + 1]) \
+ int(img[x - 1, y - 1]) \
+ int(img[x - 1, y]) \
+ int(img[x - 1, y + 1])
if sum <= 3 * 245:
img[x, y] = 0
else: # 具备9领域条件的
sum = int(img[x - 1, y - 1]) \
+ int(img[x - 1, y]) \
+ int(img[x - 1, y + 1]) \
+ int(img[x, y - 1]) \
+ int(cur_pixel) \
+ int(img[x, y + 1]) \
+ int(img[x + 1, y - 1]) \
+ int(img[x + 1, y]) \
+ int(img[x + 1, y + 1])
if sum <= 4 * 245:
img[x, y] = 0
cv2.imwrite('dest-interference_point.png',img)
return img
def resize_img(img,resize=2):
(x, y) = img.size # 获取图片的大小
pic_resize = resize # 设置图片放大或者缩小倍数
x_s = int(x * pic_resize) # 放大5倍(可调)
y_s = int(y * pic_resize) # 放大5倍(可调)
img = img.resize((x_s, y_s), Image.ANTIALIAS)
return img
def enhance_img(img):
enhancer = ImageEnhance.Color(img)
enhancer = enhancer.enhance(0)
enhancer = ImageEnhance.Brightness(enhancer)
enhancer = enhancer.enhance(2)
enhancer = ImageEnhance.Contrast(enhancer)
enhancer = enhancer.enhance(8)
enhancer = ImageEnhance.Sharpness(enhancer)
img = enhancer.enhance(20)
return img
img = cv2.imread('src.png')
img = get_dynamic_binary_image(img)
img = interference_line(img)
img = interference_point(img)
img = resize_img(img,resize=2)
img = enhance_img(img)
img.save('dest.png')
text = pytesseract.image_to_string(img)
print(text.strip())