cnn手写汉字识别

最新推荐文章于 2024-08-14 22:19:54 发布
zehuawong
最新推荐文章于 2024-08-14 22:19:54 发布
阅读量3.3k
点赞数 2
分类专栏：神经网络 tensorflow
本文链接：https://blog.csdn.net/ZHWang102107/article/details/88777399
版权
神经网络同时被 2 个专栏收录
7 篇文章 0 订阅
订阅专栏
tensorflow
7 篇文章 0 订阅
订阅专栏
import os
import numpy as np
import struct
import PIL.Image
import cv2
import scipy.misc
from sklearn.utils import shuffle
import tensorflow as tf
from pylab import *

tf.app.flags.DEFINE_string("checkpoint", "ckpt/", "dir of checkpoint")
tf.app.flags.DEFINE_bool("restore", False, "restore from previous checkpoint")

FLAGS = tf.app.flags.FLAGS

# train_data_dir = "F:\HandWritingDatabases\HWDB1.1trn_gnt"
# test_data_dir = "F:\HandWritingDatabases\HWDB1.1tst_gnt"

#
train_data_dir = "../trn_gnt"
test_data_dir = "../tst_gnt"

# 取常用的100个汉字进行测试
char_set = "的一是了我不人在他有这个上们来到时大地为子中你说生国年着就那和要她出也得里后自以会家可下而过天去能对小多然于心学么之都好看起发当没成只如事把还用第样道想作种开美总从无情己面最女但现前些所同日手又行意动"
print(len(char_set))


# 从gnt文件中读取图像和对应的汉字
def read_from_gnt_dir(gnt_dir=train_data_dir):
    def one_file(f):
        header_size = 10
        while True:
            header = np.fromfile(f, dtype='uint8', count=header_size)
            if not header.size: break
            sample_size = header[0] + (header[1] << 8) + (header[2] << 16) + (header[3] << 24)
            tagcode = header[5] + (header[4] << 8)
            width = header[6] + (header[7] << 8)
            height = header[8] + (header[9] << 8)
            if header_size + width * height != sample_size:
                break
            image = np.fromfile(f, dtype='uint8', count=width * height).reshape((height, width))
            yield image, tagcode

    for file_name in os.listdir(gnt_dir):
        # print(file_name)
        if file_name.endswith('.gnt'):
            file_path = os.path.join(gnt_dir, file_name)
            # print(file_path)
            with open(file_path, 'rb') as f:
                for image, tagcode in one_file(f):
                    yield image, tagcode


 # 统计样本数和提取一点图像
def extractImge():
    # 统计样本数
    train_counter = 0
    test_counter = 0
    for image, tagcode in read_from_gnt_dir(gnt_dir=train_data_dir):
        tagcode_unicode = struct.pack('>H', tagcode).decode('gb2312')

        # 提取点图像
        if train_counter < 1000:
            im = PIL.Image.fromarray(image)
            im.convert('RGB').save('images/' + tagcode_unicode + str(train_counter) + '.png')
        else:
            break
        train_counter += 1

    for image, tagcode in read_from_gnt_dir(gnt_dir=test_data_dir):
        tagcode_unicode = struct.pack('>H', tagcode).decode('gb2312')
        test_counter += 1
    # 样本数
    print(train_counter, test_counter)


def resize_and_normalize_image(img):
    # 补方
    pad_size = abs(img.shape[0] - img.shape[1]) // 2
    if img.shape[0] < img.shape[1]:
        pad_dims = ((pad_size, pad_size), (0, 0))
    else:
        pad_dims = ((0, 0), (pad_size, pad_size))
    img = np.lib.pad(img, pad_dims, mode='constant', constant_values=255)
    # 缩放
    img = scipy.misc.imresize(img, (64 - 4 * 2, 64 - 4 * 2))
    img = np.lib.pad(img, ((4, 4), (4, 4)), mode='constant', constant_values=255)
    # assert img.shape == (64, 64)

    img = img.flatten()  # 降到一维
    # 像素值范围-1到1
    img = (img - 128) / 128
    return img


# one hot
def convert_to_one_hot(char):
    vector = np.zeros(len(char_set))
    vector[char_set.index(char)] = 1
    return vector


# 由于数据量不大, 可一次全部加载到RAM
train_data_x = []  # (m,4096)
train_data_y = []  # (m,100) [1,0,0] one-hot表示
train_data_count = 0
batch_size = 64  # 每次训练的图像数量 TODO: 改为128看看
num_batch = 0


def preProcessImg(image):

    # 裁剪图片 中心裁剪图片
    # image1 = cv2.imread(path, cv2.IMREAD_GRAYSCALE)  # 灰度化
    # 灰度化处理
    if len(image.shape) == 3 or len(image.shape) == 4 :
        image1 = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    else:
        image1 = image

    # ret,thresh = cv2.threshold(image,127,255,cv2.THRESH_BINARY) #简单阈值二值化
    # ret, thresh = cv2.threshold(image, 110, 255, cv2.THRESH_BINARY)  # 简单阈值二值化
    ret, image2 = cv2.threshold(image1, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)  # Otsu’s二值化
    # print("ret= ", ret)
    image3 = cv2.resize(image2,(64,64))

    image = 1 * (image3.flatten())
    image = np.asarray(image) / 255.0

    return image



def load_train_data():
    global train_data_x
    global train_data_y
    global num_batch
    global train_data_count
    for image, tagcode in read_from_gnt_dir(gnt_dir=train_data_dir):
        tagcode_unicode = struct.pack('>H', tagcode).decode('gb2312')
        if tagcode_unicode in char_set:
            print(tagcode_unicode)
            train_data_count += 1
            # image = preProcessImg(image)
            # train_data_x.append(image)
            train_data_x.append(resize_and_normalize_image(image))

            train_data_y.append(convert_to_one_hot(tagcode_unicode))

    # 33505
    print(np.shape(train_data_x))
    print(np.shape(train_data_y))

    # train_data_x, train_data_y = shuffle(train_data_x, train_data_y, random_state=0)
    # TODO TypeError: shuffle() takes no keyword arguments

    num_batch = len(train_data_x) // batch_size  # 向下取整
    print("num_batch=", num_batch)


#TODO 这里需要修改
def shuffleData():
    global train_data_x
    global train_data_y
    train_data_x, train_data_y = shuffle(train_data_x, train_data_y, random_state=0)


test_data_x = []  # 测试数据
test_data_y = []
test_data_count = 0

#TODO 修改，直接从提取好的图片文件夹中读取
def load_test_data():
    global test_data_x  # 测试数据
    global test_data_y
    global test_data_count
    for image, tagcode in read_from_gnt_dir(gnt_dir=test_data_dir):
        tagcode_unicode = struct.pack('>H', tagcode).decode('gb2312')
        if tagcode_unicode in char_set:
            test_data_count += 1
            # image = preProcessImg(image)
            # test_data_x.append(image)
            test_data_x.append(resize_and_normalize_image(image))

            test_data_y.append(convert_to_one_hot(tagcode_unicode))
    # shuffle样本
    # test_data_x, test_data_y = shuffle(test_data_x, test_data_y, random_state=0)
    print(np.shape(test_data_x))
    print(np.shape(test_data_y))


X = tf.placeholder(tf.float32, [None, 64 * 64])
Y = tf.placeholder(tf.float32, [None, 100])
keep_prob = tf.placeholder(tf.float32)


def chinese_hand_write_cnn():
    x = tf.reshape(X, shape=[-1, 64, 64, 1])
    # 3 conv layers
    w_c1 = tf.Variable(tf.random_normal([3, 3, 1, 32], stddev=0.01))
    b_c1 = tf.Variable(tf.zeros([32]))
    conv1 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(x, w_c1, strides=[1, 1, 1, 1], padding='SAME'), b_c1))
    conv1 = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

    w_c2 = tf.Variable(tf.random_normal([3, 3, 32, 64], stddev=0.01))
    b_c2 = tf.Variable(tf.zeros([64]))
    conv2 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(conv1, w_c2, strides=[1, 1, 1, 1], padding='SAME'), b_c2))
    conv2 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

    """

    w_c3 = tf.Variable(tf.random_normal([3, 3, 64, 128], stddev=0.01))
    b_c3 = tf.Variable(tf.zeros([128]))
    conv3 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(conv2, w_c3, strides=[1, 1, 1, 1], padding='SAME'), b_c3))
    conv3 = tf.nn.max_pool(conv3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
    conv3 = tf.nn.dropout(conv3, keep_prob)
    """

    # fully connect layer
    w_d = tf.Variable(tf.random_normal([16 * 16 * 64, 1024], stddev=0.01))
    b_d = tf.Variable(tf.zeros([1024]))
    dense = tf.reshape(conv2, [-1, w_d.get_shape().as_list()[0]])
    dense = tf.nn.relu(tf.add(tf.matmul(dense, w_d), b_d))
    dense = tf.nn.dropout(dense, keep_prob)

    w_out = tf.Variable(tf.random_normal([1024, 100], stddev=0.01))
    b_out = tf.Variable(tf.zeros([100]))
    # out = tf.add(tf.matmul(dense, w_out), b_out)
    out = tf.nn.softmax(tf.add(tf.matmul(dense, w_out), b_out))

    return out


lable_size = 100 #100个汉字
input_size = 64 * 64
batch_size = 64 #TODO 改成128看看？
hidden_size = 1024

# bp神经网络
def bp_nn():
    #输入层
    w1 = tf.Variable(tf.random_normal([input_size,hidden_size],stddev=0.1))
    b1 = tf.Variable(tf.constant(0.1),[hidden_size])
    #隐含层
    hidden = tf.matmul(X,w1)+b1
    hidden = tf.nn.relu(hidden)

    w2 = tf.Variable(tf.random_normal([hidden_size,lable_size],stddev=0.1))
    b2 = tf.Variable(tf.constant(0.1), [lable_size])

    #输出层
    output = tf.matmul(hidden,w2) + b2
    output = tf.nn.relu(output)
    # output = tf.nn.softmax(output)

    return  output



def train_hand_write_nn():
    output = chinese_hand_write_cnn()
    # output = bp_nn()

    loss = -tf.reduce_sum(Y * tf.log(tf.clip_by_value(output, 1e-15, 1.0)))  # loss=nan的情况，梯度爆炸？
    optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)  # 学习率0.001 还是0.0001 TODO 改变一下学习率比较一下
    # 学习率设置为0.001时候出现loss=nan 错误

    accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(output, 1), tf.argmax(Y, 1)), tf.float32))

    # TensorBoard 可视化
    tf.summary.scalar("loss", loss)
    tf.summary.scalar("accuracy", accuracy)
    merged_summary_op = tf.summary.merge_all()

    saver = tf.train.Saver(max_to_keep=1)  # 只保存最新的模型
    max_acc = 0  # TODO: 将max_acc写到文件中
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        step = 0

        if FLAGS.restore:
            # Get last checkpoint in checkpoint directory
            checkpoint = tf.train.latest_checkpoint(FLAGS.checkpoint)
            if checkpoint:
                # Restore data from checkpoint
                saver.restore(sess, checkpoint)
                step += int(checkpoint.split('-')[-1])
                print("step=",step)
                print("Train from checkpoint")

        # 命令行执行 tensorboard --logdir=./log  打开浏览器访问http://0.0.0.0:6006
        summary_writer = tf.summary.FileWriter('./log', sess.graph)

        for e in range(50):
            for i in range(num_batch):
                batch_x = train_data_x[i * batch_size: (i + 1) * batch_size]
                batch_y = train_data_y[i * batch_size: (i + 1) * batch_size]
                _, loss_, summary = sess.run([optimizer, loss, merged_summary_op],feed_dict={X: batch_x, Y: batch_y, keep_prob: 0.5})

                # 每次迭代都保存日志
                step = e * num_batch + i
                summary_writer.add_summary(summary, step)
                print(step, "loss=", loss_)

                if (step) % 10 == 0:
                    # 计算准确率
                    # acc = accuracy.eval({X: test_data_x[:100], Y: test_data_y[:100], keep_prob: 1.})
                    acc = sess.run(accuracy, feed_dict={X: test_data_x[:100], Y: test_data_y[:100], keep_prob: 1.})
                    print(step, "accuracy=", acc)
                    if (acc > max_acc):
                        max_acc = acc
                        saver.save(sess, 'ckpt/nn-model.ckpt', global_step=step+1)



#所有测试数据的准确率，返回给遗传算法用的
def predict():
    return 0;


def test(path):
    # Read test picture and resize it, turn it to grey scale.
    # tst_image = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
    # tst_image = cv2.resize(tst_image, (64, 64))
    # tst_image = np.asarray(tst_image) / 255.0
    # tst_image = tst_image.reshape([-1, 64, 64, 1])
    # tst_image = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
    tst_image = cv2.imread(path)

    tst_image = preProcessImg(tst_image)
    print(tst_image)

    # cv2.imwrite("newphoto.png", tst_image)

    # feed the test picture into network and estimate probability distribution
    with tf.Session() as sess:
        output = chinese_hand_write_cnn()
        # output = bp_nn()
        predict = tf.nn.top_k(output, 10)
        saver = tf.train.Saver()
        saver.restore(sess=sess, save_path=tf.train.latest_checkpoint('ckpt-85/')) #FLAGS.checkpoint
        value_topk, index_topk = sess.run(predict, feed_dict={X: [tst_image], keep_prob: 0.5})


        index_topk = index_topk.flatten()
        value_topk = value_topk.flatten()
        print("value_topk:",value_topk)
        print("index_topk:",index_topk)
        for i in range(len(index_topk)):
            print("预测汉字是: ", char_set[index_topk[i]]," 概率是：",value_topk[i])



def main():
    print("main")
    # load_train_data()
    # load_test_data()
    # train_hand_write_nn()
    # test('testimages/yi.png')
    #test('testimages/shang.png')
    # test('testimages/xia.png')
    #test('testimages/wo2.png')
    test('testimages/ta.jpg')


if __name__ == '__main__':
    main()