from PIL import Image
from PIL import ImageEnhance
import numpy as np
import pytesseract
from math import isnan
from pylab import NaN
import os
import math
import random
import time
import shutil
def is_number(s):
"""
检查识别出来的字符串是否为纯数字
:param s: 识别出来的字符串
:return:
"""
try:
float(s)
return True
except ValueError:
pass
try:
import unicodedata
unicodedata.numeric(s)
return True
except (TypeError, ValueError):
pass
return False
def border_color(img):
"""
:param img: Image.convert('L') # 8bit 灰度图像
:return: int(0`255)
"""
pix_list = []
pix_dict = {}
for x in range(img.width):
for y in range(img.height):
if (y < 4 or y >= img.height - 4) or (x < 4 or x >= img.width - 4):
pix = img.getpixel((x, y))
pix_list.append(pix)
pix_set = list(set(pix_list))
for i in pix_set:
pix_dict[i] = pix_list.count(i)
return pix_dict
def corners_color(img):
"""
四个3*3角的颜色, 至少一个角纯黑和一个角纯白,则该图像为马赛克图像
:param img:
:return:
"""
pixels = img.load()
# dot = 2
def count_dot(row, column, offset=1, compare='b'):
count = 0
for j in range(-offset, offset + 1):
for k in range(-offset, offset + 1):
if compare == 'b':
if pixels[row + j, column + k] > 245:
count += 1
else:
if pixels[row + j, column + k] < 10:
count += 1
return count
black = False
white = False
for i in [(2, 2), (2, img.height - 2), (img.width - 2, 2), (img.width - 2, img.height - 2),
(int(img.width / 2), img.height - 2), (int(img.width / 2), 2)]:
if count_dot(i[0], i[1], compare='b') > 7 and count_dot(i[0], i[1], compare='s') == 0:
white = True
if count_dot(i[0], i[1], compare='b') == 0 and count_dot(i[0], i[1], compare='s') > 7:
black = True
if white and black:
return True
return
# 删除边缘
def clear_border(img):
dot = 3
pixels = img.load()
for x in range(img.width):
for y in range(img.height):
if y < dot or y > img.height - dot:
pixels[x, y] = 255
if x < dot or x > img.width - dot:
pixels[x, y] = 255
return img
# 干扰线降噪
def interference_line(img):
"""
:param img:
:return:
"""
pixels = img.load()
for x in range(4, img.width - 4):
for y in range(4, img.height - 4):
count = 0
for i in range(-1, 2):
for j in range(-1, 2):
if pixels[x + i, y + j] > 225:
count += 1
if count > 4:
pixels[x, y] = 255
return img
# 点趋同, 降噪
def homoplasy_image(img, reject='b', times=5):
"""
9邻域,以当前点为中心的田字框, 找出相同点的个数
:param img: 图片
:param times: 次数
:param reject: 剔除 b 黑点 or w 白点
:return:
"""
pixels = img.load()
dot = 2
for i in range(times):
for row in range(dot, img.width - dot):
for column in range(dot, img.height - dot):
def count_dot(offset=1, compare='b'):
count = 0
for j in range(-offset, offset + 1):
for k in range(-offset, offset + 1):
if compare == 'b':
if pixels[row + j, column + k] > 245:
count += 1
else:
if pixels[row + j, column + k] < 5:
count += 1
return count
if reject == "w":
if pixels[row, column] > 245:
if count_dot(1, 's') > 4:
pixels[row, column] = 0
if count_dot(2, 's') > 18:
pixels[row, column] = 0
if reject == "b":
if pixels[row, column] < 5:
if count_dot(1) > 4:
pixels[row, column] = 255
if count_dot(2, 'b') > 18:
pixels[row, column] = 255
return img
def gen_new_black_pic(img, threshold=25, grounding="b"):
if grounding == 'b':
bkColor = 0
foreColor = 255
else:
bkColor = 255
foreColor = 0
im2 = Image.new("L", img.size, bkColor)
for y in range(img.size[1]):
for x in range(img.size[0]):
pix = img.getpixel((x, y))
if pix > threshold:
im2.putpixel((x, y), foreColor)
im2.show()
return im2
def gen_new_white_pic(img):
im2 = Image.new("L", img.size, 255)
for y in range(img.size[1]):
for x in range(img.size[0]):
pix = img.getpixel((x, y))
if pix < 18:
im2.putpixel((x, y), 0)
im2.show()
return im2
# 反相
def reverse_color(img):
pixels = img.load()
for x in range(img.size[0]):
for y in range(img.size[1]):
pixels[x, y] = 255 - pixels[x, y]
return img
# 二值化
def binary_image(img, standard=157.5):
'''
二值化,根据阈值 standard , 将所有像素都置为 0(黑色) 或 255(白色)
'''
pixels = img.load()
for x in range(img.width):
for y in range(img.height):
if standard + 4 > pixels[x, y] > standard - 4:
pixels[x, y] = 255
else:
pixels[x, y] = 0
return img
def max_value_of_border_color(dic):
"""
边缘颜色最多的值
:param dic:
:return:
"""
max_value = (0, 0)
for item in dic.items():
if max_value[1] < item[1]:
max_value = item
return max_value
def max_value_of_pic_color(img):
"""
图像中颜色最多值 # 排序,x:x[1]是按照括号内第二个字段进行排序,x:x[0]是按照第一个字段
:param img:
:return:
"""
his = img.histogram()
values = {}
for i in range(0, 256):
values[i] = his[i]
return sorted(values.items(), key=lambda x: x[1], reverse=True)
def file_list(root, suffix='png'):
file = []
"""
查找文件夹下所有的, parent == root: 限定只在指定目录下的文件,不查找子目录
:param root:
:param suffix:
:return:
"""
for parent, subdir, files in os.walk(root):
if parent == root:
for name in files:
if name.endswith(suffix):
file.append(os.path.join(parent, name))
return file
def create_dir(path):
if not os.path.exists(path):
os.makedirs(path)
# 图片转向量
def gen_vector(img):
d1 = {}
data = img.getdata()
for i in range(len(data)):
d1[i] = data[i]
return d1
# 向量取模
def modulo(vector):
total = 0
for word, count in vector.items():
total += count ** 2
return math.sqrt(total)
# 计算两个向量之间的cos值
def vector_compare(vector1, vector2):
value = 0
for word, count in vector1.items():
if word in vector2:
value += count * vector2[word]
return value / (modulo(vector1) * modulo(vector2))
#
def vertical_cut(img):
"""
竖直切割黑白照片
:param img:
:return:
"""
# 找到切割的起始和结束的横坐标
in_letter = False
found_letter = False
start = 0
letters = []
for x in range(img.size[0]):
for y in range(img.size[1]):
pix = img.getpixel((x, y))
if pix != 255:
in_letter = True
if found_letter == False and in_letter == True:
found_letter = True
start = x
if found_letter is True and in_letter == False:
found_letter = False
end = x
letters.append((start, end))
in_letter = False
print(letters)
# [(27, 47), (48, 71), (73, 101), (102, 120), (122, 147), (148, 166)]
# 打印出6个点,说明能切割成6个字母,正确
# 保存切割下来的字段
if len(letters) == 5:
for letter in letters:
# (切割的起始横坐标,起始纵坐标,切割的宽度,切割的高度)
im3 = img.crop((letter[0], 0, letter[1], img.size[1]))
# 随机生成1000-9999的数字
a = random.randint(1000, 10000)
# 更改成用时间命名
im3.save("pic/letter/%s.gif" % (time.strftime('%Y%m%d%H%M%S', time.localtime()) + str(a)))
# count += 1
def img_pretreatment(path):
"""
图片预处理,包括打开,图像灰度,分类,二值化
:param path: 图像路径
:return:
"""
# TODO 后期整合成类
img = Image.open(path) # 打开图像
n = 0
img = img.convert("L") # 图像灰度化
if corners_color(img): # 是否是马赛克图片,是就返回
return
pix_dict = border_color(img) # 检测边缘颜色
if len(pix_dict) > 200:
return
print(pix_dict)
if len(pix_dict) == 1:
# img.show()
pix_dict2 = max_value_of_pic_color(img)[:10]
img = binary_image(img, pix_dict.popitem()[0])
count = 0
for i in pix_dict2:
if i[1] > 100:
count += 1
if count < 8:
img = homoplasy_image(img, 'w', 20)
else:
img = binary_image(img, max_value_of_border_color(pix_dict)[0])
img = interference_line(img)
pix_dict3 = max_value_of_pic_color(img)[:1]
if pix_dict3[0][0] > 200:
img = homoplasy_image(img, 'b', 20)
else:
img = homoplasy_image(img, 'w', 20)
img = clear_border(img)
return img
# im = ImageEnhance.Contrast(img)
# im = im.enhance(100)
# text = pytesseract.image_to_string(im).strip()
# print("识别的字符为%s" % text)
# if len(text) == 5 and is_number(text):
# print(text)
def main():
iconic = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '0']
image_set = []
for num in iconic:
for img in os.listdir("iconic/%s/" % num):
temp = []
if img != "Thumbs.db" and img != ".DS_Store":
temp.append(gen_vector(Image.open("iconic/%s/%s" % (num, img))))
image_set.append({num: temp})
path = "pic/letter/"
for item in file_list(path, 'gif'):
# print(item)
try:
guess = []
im3 = Image.open(item)
for image in image_set:
for x, y in image.items():
# print("X=%s\nY=%s" % (x, y))
if len(y) != 0:
guess.append((vector_compare(y[0], gen_vector(im3)), x))
guess.sort(reverse=True)
print(len(guess))
print("最佳匹配", guess[0])
im3.close()
shutil.copy(item, "iconic/%s" % guess[0][1])
except Exception as err:
print("异常情况%s" % err)
pass
if __name__ == '__main__':
while True:
main()
# imge = Image.open('54031.png')
# # img.show()
# imge = imge.convert("L")
# print(corners_color(imge))
# for p in file_list(
# r"D:\Administrator\Documents\GitHub\Anti-Anti-Spider-master\1.验证码\tensorflow_cnn\webmoney_png"):
# print(p)
# img = img_pretreatment(p)
# if img:
# # img.show()
# vertical_cut(img)
# imge = Image.open(p)
# # img.show()
# imge = imge.convert("L")
# print(corners_color(imge))
# # max_value_of_pic_color(imge)
# # dic = border_color(img)
# # max_value = max_value_of_border_color(dic)
# # print(max_value)
webmoney,登录字符识别
最新推荐文章于 2021-09-28 16:59:30 发布