TfRecord+inception-v3训练图像

最新推荐文章于 2021-06-11 10:05:00 发布

程序猿的小熊

最新推荐文章于 2021-06-11 10:05:00 发布

阅读量1.2k

点赞数

文章标签： inception-v3 tfrecord tensorflow

本文链接：https://blog.csdn.net/ahnu120705097/article/details/78316938

版权

最近在使用inception-v3训练模型时遇到了一个问题，由于数据直接是从tfrecord读入，而之前参考的inception-v3的使用代码中，是直接用gfile.FastGFile(image_path,'rb').read()来直接读入jpg文件。所以不符合自己的需求，于是我参考了资料之后，对inception-v3的使用做了改动。

    with gfile.FastGFile(os.path.join(MODEL_DIR,MODEL_FILE),'rb') as f:
        graph_def=tf.GraphDef()
        graph_def.ParseFromString(f.read())
    #加载读取的Inception-v3模型，并返回数据输入所对应的张量以及计算瓶颈层
    #结果所对应的张量
    bottleneck_tensor,jpeg_data_tensor=tf.import_graph_def(
            graph_def,
            return_elements=[BOTTLENECK_TENSOR_NAME,JPEG_DATA_TENSOR_NAME])

在上面的使用代码中，涉及到两个配置选项，一个是BOTTLENECK_TENSOR_NAME，它是Inception-v3中代表瓶颈层结果的张量名称。在谷歌提供的Inception-v3模型中，

这个张量名称就是‘pool_3/reshape:0’,一个是JPEG_DATA_TENSOR_NAME，是图像输入张量所对应的名称，在之前参考的代码中，由于是直接读入jpg文件，它的写法是DecodeJpeg/contents:0。在查阅资料后，发现inception支持

(1)DecodeJpeg/contents --raw image data

(2)DecodeJpeg --uint8 image [h,w,channels=3]

(3)Cast --uint8->float32

(4)ExpandDims --add dimention[1,h,w,channels]

(5)ExpandDims/dim

(6)ResizeBilinear/size --299 x 299

(7)ResizeBilinear --resize image to 299 x 299

(8)Sub/y

(9)Sub -- -128 -normalize

(10)Mul/y (11)Mul -- /128 -normalize。所以在将JPEG_DATA_TENSOR_NAME设置为DecodeJpeg:0

后即可直接使用图像矩阵来使用inception。使用inception-v3和tfrecord的训练代码如下：

import os.path
import numpy as np
import tensorflow as tf
from tensorflow.python.platform import gfile

#Inception-v3模型瓶颈层的节点个数
BOTTLENECK_TENSOR_SIZE = 2048

#Inception-v3模型中代表瓶颈层结果的张量名称。在谷歌提供的Inception-v3模型中，这个张量
#名称就是'pool_3/reshape:0'。在训练的模型时，可以通过tensor.name来获取张量的名称。
BOTTLENECK_TENSOR_NAME = ' 0'

#图像输入张量所对应的名臣
JPEG_DATA_TENSOR_NAME='DecodeJpeg:0'

#下载的谷歌训练好的Inception-v3模型文件目录
MODEL_DIR = '/notebooks'

#下载的谷歌训练好的Inception-v3模型文件名
MODEL_FILE = 'tensorflow_inception_graph.pb'

#验证的数据百分比
VALIDATION_PERCENTAGE = 10
#测试的数据百分比
TEST_PERCENTAGE = 10

#定义神经网络的设置
LEARNING_RATE = 0.01
STEPS = 4000
BATCH=100


def read_and_decode(filename_queue,have):
    #创建一个reader来读取TFRecord文件中的样例
    reader = tf.TFRecordReader()
    #从文件中读出一个样例
    _,serialized_example = reader.read(filename_queue)
    #解析读入的一个样例
    if have:
        features = tf.parse_single_example(serialized_example,features={
            'label':tf.FixedLenFeature([],tf.int64),
            'img_raw':tf.FixedLenFeature([],tf.string)
            })
        label = tf.cast(features['label'],tf.int32)
        image = tf.decode_raw(features['img_raw'],tf.uint8)
        image = tf.reshape(image, [128, 128, 3])
        sess=tf.Session()
        coord=tf.train.Coordinator()
        threads= tf.train.start_queue_runners(coord=coord,sess=sess)
        label_list=[]
        image_list=[]
        for i in range(20):
            images,labels=sess.run([image,label])
            image_list.append(images)
            label_list.append(labels)
        coord.request_stop()
        coord.join(threads)
        return image_list,label_list
    else:
        features = tf.parse_single_example(serialized_example,features={
            'img_raw':tf.FixedLenFeature([],tf.string)
            })
        image = tf.decode_raw(features['img_raw'],tf.uint8)
        image = tf.reshape(image, [30, 30, 3])
        sess=tf.Session()
        coord=tf.train.Coordinator()
        threads= tf.train.start_queue_runners(coord=coord,sess=sess)
        label_list=[]
        image_list=[]
        for i in range(85510):
            images,labels=sess.run([image,label])
            image_list.append(images)
            label_list.append(labels)
        coord.request_stop()
        coord.join(threads)
        return image_list,label_list

#这个函数使用加载的训练好的Inception-v3模型处理一张图片，得到这个图片的特征向量。
def run_bottleneck_on_image(sess,image_data,image_data_tensor,bottleneck_tensor):
    bottleneck_values=sess.run(bottleneck_tensor,{image_data_tensor:image_data})
    bottleneck_values=np.squeeze(bottleneck_values)
    return bottleneck_values

#这个函数获取一张图片经过Inception-v3模型处理之后的特征向量。这个函数会先试图寻找
#已经计算且保存下来的特征向量，如果找不到则先计算这个特征向量，然后保存到文件
def get_or_create_bottleneck(
        sess,image_data,jpeg_data_tensor,bottleneck_tensor):
    bottleneck_values=run_bottleneck_on_image(sess,image_data,jpeg_data_tensor,bottleneck_tensor)
    return bottleneck_values

def get_random_cached_bottlenecks(
        sess,image_lists,label_lists,how_many,
        jpeg_data_tensor,bottleneck_tensor):
    bottlenecks=[]
    ground_truths=[]
    i=0;
    for img in image_lists:  
        label_name=label_lists[i]
        i=i+1
        bottleneck=get_or_create_bottleneck(
                    sess,img,
                    jpeg_data_tensor,bottleneck_tensor)
        ground_truth=np.zeros(2,dtype=np.float32)
        if label_name==1:
            ground_truth[0]=1.0
        else:
            ground_truth[1]=1.0
        bottlenecks.append(bottleneck)
        ground_truths.append(ground_truth)
    return bottlenecks,ground_truths


def main(_):
    print os.path.join(MODEL_DIR,MODEL_FILE)
    with gfile.FastGFile(os.path.join(MODEL_DIR,MODEL_FILE),'rb') as f:
        graph_def=tf.GraphDef()
        graph_def.ParseFromString(f.read())
    #加载读取的Inception-v3模型，并返回数据输入所对应的张量以及计算瓶颈层
    #结果所对应的张量
    bottleneck_tensor,jpeg_data_tensor=tf.import_graph_def(
            graph_def,
            return_elements=[BOTTLENECK_TENSOR_NAME,JPEG_DATA_TENSOR_NAME])
    
    #定义一层全链接层来解决新的图像分类问题。因为训练好的Inception-v3模型已经将原始
    #的图片抽象为了更加容易分类的特征向量了，所以不需要再训练那么复杂的神经网络来完成
    #这个新的分类任务
    bottleneck_input=tf.placeholder(
            tf.float32,[None,BOTTLENECK_TENSOR_SIZE],
            name='BottleneckInputPlaceholder')
    #定义新的标准答案输入
    ground_truth_input=tf.placeholder(
            tf.float32,[None,2],name='GroundTruthInput')
    #定义一层全链接层来解决新的图片分类问题。因为训练好的Inception-v3模型已经将原始
    #的图片抽象为了更加容易分类的特征向量了，所以不需要再训练那么复杂的神经网络来完成
    #这个新的分类任务
    with tf.name_scope('final_training_ops'):
        weights=tf.Variable(tf.truncated_normal(
            [BOTTLENECK_TENSOR_SIZE,2],stddev=0.001))
        biases=tf.Variable(tf.zeros([2]))
        logits=tf.matmul(bottleneck_input,weights)+biases
        final_tensor=tf.nn.softmax(logits)
    
    #定义交叉熵损失函数
    cross_entropy=tf.nn.softmax_cross_entropy_with_logits(
            logits=logits,labels=ground_truth_input)
    cross_entropy_mean=tf.reduce_mean(cross_entropy)
    train_step=tf.train.GradientDescentOptimizer(LEARNING_RATE).minimize(cross_entropy_mean)
    
        
    with tf.Session() as sess:
        init=tf.global_variables_initializer()
        sess.run(init)
        print '读数据'
        image_list=[]
        label_list=[]
        print '训练开始'
        #训练过程
        filename_queue1=tf.train.string_input_producer(["out/output.tfrecords"])
        image_list1, label_list1=read_and_decode(filename_queue1,True)
        for i in range(STEPS):
            #每次获取一个batch的训练数据
            if i%100==0:
                print i
            train_bottlenecks,train_ground_truth=get_random_cached_bottlenecks(
                        sess,image_list1,label_list1,BATCH,jpeg_data_tensor,bottleneck_tensor)
            sess.run(train_step,
                    feed_dict={bottleneck_input:train_bottlenecks,
                              ground_truth_input:train_ground_truth})
        print '训练结束'
        filename_queue2=tf.train.string_input_producer(["out/output2015.tfrecords"])
        image_list2=read_and_decode(filename_queue2,False)
 
        bottlenecks=[]
        rows=170
        column=503
        i=0
        for image_data in image_list2:
            bottleneck_values=run_bottleneck_on_image(sess,image_data,jpeg_data_tensor,bottleneck_tensor)
            bottlenecks.append(bottleneck_values)
            final=sess.run(final_tensor,feed_dict={bottleneck_input:bottlenecks})
            bottlenecks=[]
            np.savetxt('/notebooks/result_2015/result_2015_'+str(i/column)+'_'+str(i%column)+'.txt',final,fmt='%.2f')
               
                 
                
if __name__=='__main__':
    tf.app.run()