webmoney,登录字符识别

最新推荐文章于 2021-09-28 16:59:30 发布
张煊信
最新推荐文章于 2021-09-28 16:59:30 发布
阅读量241
点赞数
分类专栏： Python文档
本文链接：https://blog.csdn.net/JDMXI/article/details/91188890
版权
Python文档专栏收录该内容
22 篇文章 0 订阅
订阅专栏
from PIL import Image
from PIL import ImageEnhance
import numpy as np
import pytesseract
from math import isnan
from pylab import NaN
import os
import math
import random
import time
import shutil


def is_number(s):
    """
    检查识别出来的字符串是否为纯数字
    :param s: 识别出来的字符串
    :return:
    """
    try:
        float(s)
        return True
    except ValueError:
        pass
    try:
        import unicodedata
        unicodedata.numeric(s)
        return True
    except (TypeError, ValueError):
        pass
    return False


def border_color(img):
    """
    :param img: Image.convert('L')  # 8bit 灰度图像
    :return: int(0`255)
    """
    pix_list = []
    pix_dict = {}
    for x in range(img.width):
        for y in range(img.height):
            if (y < 4 or y >= img.height - 4) or (x < 4 or x >= img.width - 4):
                pix = img.getpixel((x, y))
                pix_list.append(pix)
    pix_set = list(set(pix_list))
    for i in pix_set:
        pix_dict[i] = pix_list.count(i)
    return pix_dict


def corners_color(img):
    """
    四个3*3角的颜色, 至少一个角纯黑和一个角纯白,则该图像为马赛克图像
    :param img:
    :return:
    """
    pixels = img.load()

    # dot = 2

    def count_dot(row, column, offset=1, compare='b'):
        count = 0
        for j in range(-offset, offset + 1):
            for k in range(-offset, offset + 1):
                if compare == 'b':
                    if pixels[row + j, column + k] > 245:
                        count += 1
                else:
                    if pixels[row + j, column + k] < 10:
                        count += 1
        return count

    black = False
    white = False

    for i in [(2, 2), (2, img.height - 2), (img.width - 2, 2), (img.width - 2, img.height - 2),
              (int(img.width / 2), img.height - 2), (int(img.width / 2), 2)]:

        if count_dot(i[0], i[1], compare='b') > 7 and count_dot(i[0], i[1], compare='s') == 0:
            white = True
        if count_dot(i[0], i[1], compare='b') == 0 and count_dot(i[0], i[1], compare='s') > 7:
            black = True
    if white and black:
        return True
    return


# 删除边缘
def clear_border(img):
    dot = 3
    pixels = img.load()
    for x in range(img.width):
        for y in range(img.height):
            if y < dot or y > img.height - dot:
                pixels[x, y] = 255
            if x < dot or x > img.width - dot:
                pixels[x, y] = 255
    return img


# 干扰线降噪
def interference_line(img):
    """
    :param img:
    :return:
    """
    pixels = img.load()
    for x in range(4, img.width - 4):
        for y in range(4, img.height - 4):
            count = 0
            for i in range(-1, 2):
                for j in range(-1, 2):
                    if pixels[x + i, y + j] > 225:
                        count += 1
            if count > 4:
                pixels[x, y] = 255
    return img


# 点趋同, 降噪
def homoplasy_image(img, reject='b', times=5):
    """
    9邻域,以当前点为中心的田字框, 找出相同点的个数
    :param img: 图片
    :param times: 次数
    :param reject: 剔除 b 黑点 or w 白点
    :return:
    """

    pixels = img.load()
    dot = 2
    for i in range(times):
        for row in range(dot, img.width - dot):
            for column in range(dot, img.height - dot):
                def count_dot(offset=1, compare='b'):
                    count = 0
                    for j in range(-offset, offset + 1):
                        for k in range(-offset, offset + 1):
                            if compare == 'b':
                                if pixels[row + j, column + k] > 245:
                                    count += 1
                            else:
                                if pixels[row + j, column + k] < 5:
                                    count += 1
                    return count

                if reject == "w":
                    if pixels[row, column] > 245:
                        if count_dot(1, 's') > 4:
                            pixels[row, column] = 0
                        if count_dot(2, 's') > 18:
                            pixels[row, column] = 0
                if reject == "b":
                    if pixels[row, column] < 5:
                        if count_dot(1) > 4:
                            pixels[row, column] = 255
                        if count_dot(2, 'b') > 18:
                            pixels[row, column] = 255

    return img


def gen_new_black_pic(img, threshold=25, grounding="b"):
    if grounding == 'b':
        bkColor = 0
        foreColor = 255
    else:

        bkColor = 255
        foreColor = 0

    im2 = Image.new("L", img.size, bkColor)
    for y in range(img.size[1]):
        for x in range(img.size[0]):
            pix = img.getpixel((x, y))
            if pix > threshold:
                im2.putpixel((x, y), foreColor)
    im2.show()
    return im2


def gen_new_white_pic(img):
    im2 = Image.new("L", img.size, 255)
    for y in range(img.size[1]):
        for x in range(img.size[0]):
            pix = img.getpixel((x, y))
            if pix < 18:
                im2.putpixel((x, y), 0)
    im2.show()
    return im2


# 反相
def reverse_color(img):
    pixels = img.load()
    for x in range(img.size[0]):
        for y in range(img.size[1]):
            pixels[x, y] = 255 - pixels[x, y]
    return img


# 二值化
def binary_image(img, standard=157.5):
    '''
    二值化,根据阈值 standard , 将所有像素都置为 0(黑色) 或 255(白色)
    '''
    pixels = img.load()
    for x in range(img.width):
        for y in range(img.height):
            if standard + 4 > pixels[x, y] > standard - 4:
                pixels[x, y] = 255
            else:
                pixels[x, y] = 0
    return img


def max_value_of_border_color(dic):
    """
    边缘颜色最多的值
    :param dic:
    :return:
    """
    max_value = (0, 0)
    for item in dic.items():
        if max_value[1] < item[1]:
            max_value = item
    return max_value


def max_value_of_pic_color(img):
    """
    图像中颜色最多值 # 排序，x:x[1]是按照括号内第二个字段进行排序,x:x[0]是按照第一个字段
    :param img:
    :return:
    """
    his = img.histogram()
    values = {}
    for i in range(0, 256):
        values[i] = his[i]
    return sorted(values.items(), key=lambda x: x[1], reverse=True)


def file_list(root, suffix='png'):
    file = []
    """
    查找文件夹下所有的, parent == root: 限定只在指定目录下的文件,不查找子目录
    :param root: 
    :param suffix: 
    :return: 
    """
    for parent, subdir, files in os.walk(root):
        if parent == root:
            for name in files:
                if name.endswith(suffix):
                    file.append(os.path.join(parent, name))
    return file


def create_dir(path):
    if not os.path.exists(path):
        os.makedirs(path)


# 图片转向量
def gen_vector(img):
    d1 = {}
    data = img.getdata()
    for i in range(len(data)):
        d1[i] = data[i]

    return d1


# 向量取模
def modulo(vector):
    total = 0
    for word, count in vector.items():
        total += count ** 2
    return math.sqrt(total)


# 计算两个向量之间的cos值
def vector_compare(vector1, vector2):
    value = 0
    for word, count in vector1.items():
        if word in vector2:
            value += count * vector2[word]
    return value / (modulo(vector1) * modulo(vector2))


#
def vertical_cut(img):
    """
    竖直切割黑白照片
    :param img:
    :return:
    """
    # 找到切割的起始和结束的横坐标
    in_letter = False
    found_letter = False
    start = 0

    letters = []

    for x in range(img.size[0]):
        for y in range(img.size[1]):
            pix = img.getpixel((x, y))
            if pix != 255:
                in_letter = True
        if found_letter == False and in_letter == True:
            found_letter = True
            start = x

        if found_letter is True and in_letter == False:
            found_letter = False
            end = x
            letters.append((start, end))

        in_letter = False
    print(letters)
    # [(27, 47), (48, 71), (73, 101), (102, 120), (122, 147), (148, 166)]
    # 打印出6个点，说明能切割成6个字母，正确

    # 保存切割下来的字段
    if len(letters) == 5:
        for letter in letters:
            # (切割的起始横坐标，起始纵坐标，切割的宽度，切割的高度)
            im3 = img.crop((letter[0], 0, letter[1], img.size[1]))
            # 随机生成1000-9999的数字
            a = random.randint(1000, 10000)
            # 更改成用时间命名
            im3.save("pic/letter/%s.gif" % (time.strftime('%Y%m%d%H%M%S', time.localtime()) + str(a)))
            # count += 1


def img_pretreatment(path):
    """
    图片预处理,包括打开,图像灰度,分类,二值化

    :param path:  图像路径
    :return:
    """
    # TODO 后期整合成类
    img = Image.open(path)  # 打开图像
    n = 0
    img = img.convert("L")  # 图像灰度化
    if corners_color(img):  # 是否是马赛克图片,是就返回
        return
    pix_dict = border_color(img)  # 检测边缘颜色
    if len(pix_dict) > 200:
        return
    print(pix_dict)
    if len(pix_dict) == 1:
        # img.show()
        pix_dict2 = max_value_of_pic_color(img)[:10]
        img = binary_image(img, pix_dict.popitem()[0])
        count = 0
        for i in pix_dict2:
            if i[1] > 100:
                count += 1
        if count < 8:
            img = homoplasy_image(img, 'w', 20)

    else:
        img = binary_image(img, max_value_of_border_color(pix_dict)[0])
        img = interference_line(img)
        pix_dict3 = max_value_of_pic_color(img)[:1]
        if pix_dict3[0][0] > 200:
            img = homoplasy_image(img, 'b', 20)
        else:
            img = homoplasy_image(img, 'w', 20)
        img = clear_border(img)
    return img

    # im = ImageEnhance.Contrast(img)
    # im = im.enhance(100)
    # text = pytesseract.image_to_string(im).strip()
    # print("识别的字符为%s" % text)
    # if len(text) == 5 and is_number(text):
    #     print(text)


def main():
    iconic = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '0']
    image_set = []
    for num in iconic:
        for img in os.listdir("iconic/%s/" % num):
            temp = []
            if img != "Thumbs.db" and img != ".DS_Store":
                temp.append(gen_vector(Image.open("iconic/%s/%s" % (num, img))))
            image_set.append({num: temp})
    path = "pic/letter/"
    for item in file_list(path, 'gif'):
        # print(item)
        try:
            guess = []
            im3 = Image.open(item)
            for image in image_set:
                for x, y in image.items():
                    # print("X=%s\nY=%s" % (x, y))
                    if len(y) != 0:
                        guess.append((vector_compare(y[0], gen_vector(im3)), x))

            guess.sort(reverse=True)
            print(len(guess))
            print("最佳匹配", guess[0])
            im3.close()
            shutil.copy(item, "iconic/%s" % guess[0][1])
        except Exception as err:
            print("异常情况%s" % err)
            pass


if __name__ == '__main__':
    while True:
        main()
        # imge = Image.open('54031.png')
        # # img.show()
        # imge = imge.convert("L")
        # print(corners_color(imge))
        # for p in file_list(
        #         r"D:\Administrator\Documents\GitHub\Anti-Anti-Spider-master\1.验证码\tensorflow_cnn\webmoney_png"):
        #     print(p)
        #     img = img_pretreatment(p)
        #     if img:
        #         # img.show()
        #         vertical_cut(img)

    # imge = Image.open(p)
    # # img.show()
    # imge = imge.convert("L")
    # print(corners_color(imge))
    # # max_value_of_pic_color(imge)
    # # dic = border_color(img)
    # # max_value = max_value_of_border_color(dic)
    # # print(max_value)