Tensorflow训练识别基础范本

一、文件结构

TF工程文件夹:

对象描述
data_real存放要识别东西的真实数据的文件夹
data_test存放测试数据、标签、tfrecords
data_train存放训练数据、标签、tfrecords
doc存放文档、相关论文等
model存放模型保存的ckpt、pb文件
networks存放模型网络结构
_tfcore.pyTensorflow输入输出相关代码
data_analyser.py对数据集的可视化分析
data_generator.py产生tfrecords文件
run.py提供训练、识别功能

二、Tensorflow输入输出范本

_tfcore.py:

# -*- coding: utf-8 -*-
import tensorflow as tf
import numpy as np


def TFRecordsWriteOne(writer,namespace,data_list,data_type,label_list):
    """
    写入一个分类样本的标签和特征
    writer = tf.python_io.TFRecordWriter('namespace.tfrecords')
    TFRecordsWriteOne(writer,'test',data_list=L_x,data_type='float' or 'bytes',label_list=L_y)
    writer.close()
    """
    if data_type=='float':   #针对普通数据
        writer.write( tf.train.Example(features=tf.train.Features(feature={
            namespace+'_data':  tf.train.Feature(float_list=tf.train.FloatList(value=data_list)),
            namespace+'_label': tf.train.Feature(int64_list=tf.train.Int64List(value=label_list))

            })).SerializeToString())
    elif data_type=='bytes': #针对图像
        writer.write( tf.train.Example(features=tf.train.Features(feature={
            namespace+'_data':  tf.train.Feature(bytes_list=tf.train.BytesList(value=data_list)),
            namespace+"_label": tf.train.Feature(int64_list=tf.train.Int64List(value=label_list))
            })).SerializeToString())


def TFRecordsBatchReader(filename_list,namespace,shuffle,data_len,data_type,label_len,batch_size):
    """
    返回一个batch的样本数据标签tensor,每run一次就会改变batch。
    train_data_batch, train_label_batch =TFRecordsBatchReader(['train.tfrecords'],'train',
        shuffle=True, data_len=1, data_type='bytes', label_len=1, batch_size=100)
    """
    _capacity=50000
    _min_after_dequeue=10000 
    filename_queue = tf.train.string_input_producer(filename_list,shuffle=False)
    reader = tf.TFRecordReader() #这个reader也可以当作图的一个节点,run之后才生效
    _, serialized_example = reader.read(filename_queue)
    if data_type=='float':
        features = tf.parse_single_example(serialized_example,features={
            namespace+'_data': tf.FixedLenFeature([data_len], tf.float32),
            namespace+'_label': tf.FixedLenFeature([label_len], tf.int64),
        })
    else:
        features = tf.parse_single_example(serialized_example,features={
            namespace+'_data': tf.FixedLenFeature([data_len], tf.string),
            namespace+'_label': tf.FixedLenFeature([label_len], tf.int64),
        })
    data =  features[namespace+'_data' ]
    label = features[namespace+'_label']
    if shuffle==True:
        data_batch,label_batch=tf.train.shuffle_batch([data,label], 
                batch_size=batch_size,capacity=_capacity,min_after_dequeue=_min_after_dequeue)
    else:
        data_batch,label_batch=tf.train.batch([data,label],batch_size=batch_size,capacity=_capacity)
    return data_batch, label_batch


if __name__ == '__main__':
    pass

二、数据可视化分析样本

data_analyser.py:

# -*- coding: utf-8 -*-
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from _tfcore import TFRecordsBatchReader


def showImageByIndex_OriData(namespace,index):

    if namespace=='train':
        ImagesFile=open('data_train/train-images.idx3-ubyte','rb')
        LabelsFile=open('data_train/train-labels.idx1-ubyte','rb')
    else:
        ImagesFile=open('data_test/t10k-images.idx3-ubyte','rb')
        LabelsFile=open('data_test/t10k-labels.idx1-ubyte','rb')

    image_index=index*28*28+16
    label_index=index+8
    ImagesFile.seek(image_index)
    LabelsFile.seek(label_index)
    imageBytes=ImagesFile.read(28*28)
    labelBytes =LabelsFile.read(1)
    img=np.zeros((28,28,3))
    for h in range(0,28):
        for w in range(0,28):
            img[h,w,:]=imageBytes[h*28+w]
    plt.figure()
    plt.title('%s: %d' % (namespace,int(labelBytes[0])),color='red') 
    plt.imshow(img)
    plt.show()
    ImagesFile.close()
    LabelsFile.close()


def showImageBytes28X28(imageBytes,labelInt):
    img=np.zeros((28,28,3))
    for h in range(0,28):
        for w in range(0,28):
            img[h,w,:]=imageBytes[h*28+w]
    plt.title('label: %d' % (labelInt),color='red') 
    plt.imshow(img)


def showImage_TFRecord(namespace):
    filename='data_'+namespace+'/'+namespace+'.tfrecords'
    data_batch, label_batch =TFRecordsBatchReader([filename],namespace,
        shuffle=True, data_len=1, data_type='bytes', label_len=1, batch_size=4)
    with tf.Session() as sess:
        coord = tf.train.Coordinator() #开线程1
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)  #开线程2
        data,label=sess.run([data_batch,label_batch])
        for i in range(0,4):
            plt.subplot(2,2,i+1)
            showImageBytes28X28(data[i,0],int(label[i,0]))
        plt.show()        
        coord.request_stop()
        coord.join(threads)


if __name__ == '__main__':
    # train:0~60000-1
    # test:0~10000-1
    showImageByIndex_OriData('test',785)
    #showImage_TFRecord('test')

三、数据制作范本

制作tfrecords的作用是为了让大量训练更加容易(减少IO时间)。
data_generator.py:

# -*- coding: utf-8 -*-
import tensorflow as tf
import numpy as np
from _tfcore import TFRecordsWriteOne


def data_generator(namespace,tag_steps=1000):

    #根据namespace打开文件,生成writer,得到样本数目
    if namespace=='train':
        ImagesFile=open('data_train/train-images.idx3-ubyte','rb')
        LabelsFile=open('data_train/train-labels.idx1-ubyte','rb')
        writer = tf.python_io.TFRecordWriter('data_train/train.tfrecords')
        sampleLen=60000
    else:
        ImagesFile=open('data_test/t10k-images.idx3-ubyte','rb')
        LabelsFile=open('data_test/t10k-labels.idx1-ubyte','rb')
        writer = tf.python_io.TFRecordWriter('data_test/test.tfrecords')
        sampleLen=10000

    #迭代每一个样本写入
    for index in range(0,sampleLen):

        #得到数据和标签列表
        image_index=index*28*28+16
        label_index=index+8
        ImagesFile.seek(image_index)
        LabelsFile.seek(label_index)
        imageBytes=ImagesFile.read(28*28)
        labelBytes =LabelsFile.read(1)
        dataList=[imageBytes]
        labelList=[int(labelBytes[0])]

        #写入数据输出提示
        TFRecordsWriteOne(writer,namespace,dataList,'bytes',labelList)
        if index%tag_steps==0 and index!=0:
            print('%s complete index %d' % (namespace,index))

    print('transfer finished')
    writer.close()


if __name__ == '__main__':
    data_generator('train')
    data_generator('test')

四、训练与识别范本

run.py:

# -*- coding: utf-8 -*-
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from _tfcore import TFRecordsBatchReader
from networks.TFNetwork_NPL import network_NLP
from networks.TFNetwork_CNN import network_CNN
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'  #忽略烦人的警告


def train_and_test(training_steps,tag_steps,restore):

    batch_size_train=1000
    batch_size_test=1000
    test_all_times=10 #10次完成测试

    #得到数据输入点
    train_recodesname='data_train/train.tfrecords'
    test_recodesname='data_test/test.tfrecords'
    data_batch_train, label_batch_train =TFRecordsBatchReader([train_recodesname],'train',
        shuffle=True, data_len=1, data_type='bytes', label_len=1, batch_size=batch_size_train)
    data_batch_test, label_batch_test =TFRecordsBatchReader([test_recodesname],'test',
        shuffle=False, data_len=1, data_type='bytes', label_len=1, batch_size=batch_size_test)
    real_input=tf.placeholder(name='real_input',dtype=tf.float32,shape=[None,28*28])

    #数据重整形
    data_batch_train=tf.cast(tf.decode_raw(data_batch_train, tf.uint8),tf.float32)
    data_batch_train=tf.reshape(data_batch_train,[batch_size_train,28*28])
    data_batch_test=tf.cast(tf.decode_raw(data_batch_test, tf.uint8),tf.float32)
    data_batch_test=tf.reshape(data_batch_test,[batch_size_test,28*28])

    #标签重整形
    label_batch_train=tf.reshape(label_batch_train,[batch_size_train])
    label_batch_test=tf.reshape(label_batch_test,[batch_size_test])

    #网络
    output_train=network_CNN(data_batch_train,dropout=True,reuse=tf.AUTO_REUSE)
    output_test =network_CNN(data_batch_test,dropout=False,reuse=tf.AUTO_REUSE)
    _real_output=network_CNN(real_input,dropout=False,reuse=tf.AUTO_REUSE)
    real_output=tf.argmax(_real_output,1,name='real_output')

    #损失函数
    loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=output_train, labels=label_batch_train))

    #优化器
    train_step = tf.train.AdamOptimizer(1e-3).minimize(loss)

    #评价标准
    correct_prediction_test = tf.equal(tf.argmax(output_test, 1), label_batch_test)
    accuracy_test = tf.reduce_mean(tf.cast(correct_prediction_test, tf.float32))

    saver = tf.train.Saver()
    with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess:
        sess.run(tf.global_variables_initializer())
        if restore==True:
            saver.restore(sess,'model/ckpt/model1.ckpt')
        coord = tf.train.Coordinator() #开线程1
        threads = tf.train.start_queue_runners(sess=sess, coord=coord) #开线程2

        #训练代码
        for i in range(training_steps+1):
            sess.run([train_step])
            if i % tag_steps == 0 :
                sum=0.0
                for _ in range(test_all_times):
                    acc = sess.run([accuracy_test])
                    sum+=float(acc[0])
                print("After %d training steps, acc is %g " % (i, sum/test_all_times))
                saver.save(sess, 'model/ckpt/model1.ckpt')

        #导出模型部分图
        output_graph_def= tf.graph_util.convert_variables_to_constants(
            sess,sess.graph_def,['real_input','real_output'])
        with tf.gfile.GFile('model/pb/model1.pb','wb') as f:
            f.write(output_graph_def.SerializeToString())

        coord.request_stop() #关线程1
        coord.join(threads)  #关线程2


def predict_real(filename):
    with tf.gfile.FastGFile(filename,'rb') as f:
        image_data = tf.image.decode_jpeg(f.read())

        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())

            image_c3 = sess.run(image_data)
            image_hd=np.zeros([1,28*28],dtype=float)
            for i in range(4,24):
                for j in range(4,24):
                    image_hd[0,i*28+j]=float(image_c3[i-4,j-4,0])

            imgx=np.zeros([28,28,3],dtype=int)
            for i in range(0,28):
                for j in range(0,28):
                    imgx[i,j,:]=int(image_hd[0,i*28+j])

            output_graph_def = tf.GraphDef()
            with open('model/pb/model1.pb', "rb") as pbf:
                output_graph_def.ParseFromString(pbf.read())
                _ = tf.import_graph_def(output_graph_def, name="")
                real_input=sess.graph.get_tensor_by_name("real_input:0")
                real_output=sess.graph.get_tensor_by_name("real_output:0")
                result=sess.run(real_output,feed_dict={real_input:image_hd})
                plt.title('%d' % (result[0]),color='red') 
                plt.imshow(imgx)


if __name__ == '__main__':
    # train_and_test(1000,tag_steps=50,restore=True)

    for i in range(0,10):
        plt.subplot(2,5,i+1)
        predict_real('data_real/%d.jpg' % (i)) 
    plt.show()

五、神经网络框架搭建范本

networks/TFNetwork_CNN.py:

# -*- coding: utf-8 -*-
import tensorflow as tf
import numpy as np


def network_CNN(input,dropout,reuse):
    with tf.variable_scope('mnist_CNN',reuse=reuse):#tf.AUTO_REUSE

        #重整input的形式为[batch, in_height, in_width, in_channels]
        input=tf.reshape(input,shape=[-1,28,28,1])

        #卷积池化层,卷积核形状[核长,核宽,核深,多少核卷积]
        #input:  [batch, 28, 28, 1]
        W_conv = tf.get_variable('W_conv1',[5,5,1,16],initializer=tf.random_normal_initializer(0, 0.1))
        b_conv = tf.get_variable('b_conv1',[16],initializer=tf.constant_initializer(0.1))
        conv2d=tf.nn.conv2d(input=input,filter=W_conv,strides=[1,1,1,1],padding='SAME')
        h_conv=tf.nn.relu(tf.add(conv2d,b_conv))
        h_pool=tf.nn.max_pool(h_conv,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')

        #h_pool: [batch,14,14,16]
        W_conv = tf.get_variable('W_conv2',[5,5,16,32],initializer=tf.random_normal_initializer(0, 0.1))
        b_conv = tf.get_variable('b_conv2',[32],initializer=tf.constant_initializer(0.1))
        conv2d=tf.nn.conv2d(input=h_pool,filter=W_conv,strides=[1,1,1,1],padding='SAME')
        h_conv=tf.nn.relu(tf.add(conv2d,b_conv))
        h_pool=tf.nn.max_pool(h_conv,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')

        #h_pool: [batch,7,7,32]
        W_conv = tf.get_variable('W_conv3',[5,5,32,32],initializer=tf.random_normal_initializer(0, 0.1))
        b_conv = tf.get_variable('b_conv3',[32],initializer=tf.constant_initializer(0.1))
        conv2d=tf.nn.conv2d(input=h_pool,filter=W_conv,strides=[1,1,1,1],padding='SAME')
        h_conv=tf.nn.relu(tf.add(conv2d,b_conv))

        #h_pool: [batch,7,7,32]
        h_flat=tf.reshape(h_conv,shape=[-1,7*7*32])

        #h_flat: [batch,7*7*32]
        W_fc = tf.get_variable('W_fc1',[7*7*32,1024],initializer=tf.random_normal_initializer(0, 0.1))
        b_fc = tf.get_variable('b_fc1',[1024],initializer=tf.constant_initializer(0.1))
        h_fc = tf.nn.relu(tf.add(tf.matmul(h_flat,W_fc),b_fc))

        W_fc = tf.get_variable('W_fc2',[1024,512],initializer=tf.random_normal_initializer(0, 0.1))
        b_fc = tf.get_variable('b_fc2',[512],initializer=tf.constant_initializer(0.1))
        h_fc = tf.nn.relu(tf.add(tf.matmul(h_fc,W_fc),b_fc))

        #h_fc: [batch,84]
        if dropout==True:
            net = tf.nn.dropout(h_fc, 0.8)
        else:
            net = tf.nn.dropout(h_fc, 1.0)

        W_fc = tf.get_variable('W_fc3',[512,10],initializer=tf.random_normal_initializer(0, 0.1))
        b_fc = tf.get_variable('b_fc3',[10],initializer=tf.constant_initializer(0.1))
        net = tf.add(tf.matmul(net,W_fc),b_fc)

    return net

networks/TFNetwork_NPL.py:


# -*- coding: utf-8 -*-
import tensorflow as tf
import numpy as np


def network_NLP(input,dropout,reuse):
    with tf.variable_scope('mnist_NPL',reuse=reuse):#tf.AUTO_REUSE
        W = tf.get_variable('W1',[28*28,1024],initializer=tf.random_normal_initializer(0, 0.01))
        b = tf.get_variable('b1',[1,1024],initializer=tf.constant_initializer(0.1))
        net = tf.add(tf.matmul(input, W),b)
        net = tf.nn.relu(net)
        W = tf.get_variable('W2',[1024,1024],initializer=tf.random_normal_initializer(0, 0.01))
        b = tf.get_variable('b2',[1,1024],initializer=tf.constant_initializer(0.1))
        net = tf.add(tf.matmul(net, W),b)
        net = tf.nn.relu(net)
        if dropout==True:
            net = tf.nn.dropout(net, 0.9)
        else:
            net = tf.nn.dropout(net, 1.0)
        W = tf.get_variable('W3',[1024,10],initializer=tf.random_normal_initializer(0, 0.01))
        b = tf.get_variable('b3',[1,10],initializer=tf.constant_initializer(0.1))
        net = tf.add(tf.matmul(net, W) , b)
        #net = tf.nn.relu(net)
    return net
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值