cifar-10 图片项目代码的改进

最新推荐文章于 2022-11-08 00:44:39 发布

AI小太阳

最新推荐文章于 2022-11-08 00:44:39 发布

阅读量332

点赞数

分类专栏：实战项目

本文链接：https://blog.csdn.net/xiaotaiyang222/article/details/81000005

版权

实战项目专栏收录该内容

2 篇文章 0 订阅

订阅专栏

说明：为了自己学习和使用方便，将cifar-10代码进行了改进，不足之处请大家批评指正。

#data_processing

#coding:utf-8
#! /usr/bin/env python
import cv2
import numpy as np
import tensorflow as tf
import os
class DataProcess(object):
    def __init__(self, batch_size, image_height, image_width, crop_height, crop_width, label_bytes, num_channels, label_dict):
		self.batch_size = batch_size
        self.image_height = image_height
        self.image_width = image_width
		self.crop_height = crop_height
		self.crop_width = crop_width
		self.label_bytes = label_bytes
		self.num_channels = num_channels
		self.label_dict = label_dict
    #把图片读成一个像素矩阵，存储在二进制文件中
	def read_image_to_file(self,path):  	
		if not os.path.isdir(path) and not os.path.isfile(path):
			return False
		if os.path.isfile(path):
			file_path = os.path.split(path) #分割出目录与文件
			lists2 = file_path[1].split('.') #分割出文件与文件扩展名
			file_ext = lists2[-1] #取出后缀名(列表切片操作)
			img_ext = ['bmp','jpeg','gif','psd','png','jpg']
			if file_ext in img_ext:
				img_file = "%s" %(path)
				image = cv2.imread(img_file)
				#将图片从BGR转换为RGB
				image1 = image[: , : , : : -1]
				#对图片进行缩放
				image2 = cv2.resize(image1, (self.image_height, self.image_width))
				#将图片转换成一维
				image3 = image2.flatten() / 255 #除以255相当于对图片的像素值归一化
				#每个存放图片的文件夹为图片的标签，提取图片的标签
				lists1 = file_path[0].split('/')
				labels = np.array((self.label_dict[lists1[-1]]))
				#将图片表示和标签合并在一起
				images = np.append(labels, image3)
				with open(image_file, 'wb')as fp:
					 fp.write(images)
		elif os.path.isdir(path):
			for x in os.listdir(path):
				self.read_image_to_file(os.path.join(path,x))
		return [image_file] 
	def read_cifar_files(self, filename_queue, distort_images = True):
		image_vec_length = self.image_height * self.image_width * self.num_channels
		record_length = self.label_bytes +  image_vec_length
		#读这个图片文件的文件列表，扭曲图片等于TRUE
		reader = tf.FixedLengthRecordReader(record_bytes = record_length)#从文件输出固定长度的字段(每个图片的存储字节数是固定的)，
		#读取固定长度字节数信息(针对bin文件使用FixedLengthRecordReader读取比较合适)
		key,record_string = reader.read(filename_queue)#
		record_bytes = tf.decode_raw(record_string,tf.uint8)#####字符串转为张量数据decode_raw  长度是record_length 3072 + 1
		
		#从张量中提取数据段，并用cast进行数据类型转换
		image_label = tf.cast(tf.slice(record_bytes,[0],[1]),tf.int32) # tf.slice 可以是list,array,tensor #先抽取图片标签
		#抽取图片的表示
		image_extracted = tf.reshape(tf.slice(record_bytes,[1],[image_vec_length]),[self.num_channels,self.image_height,self.image_width])#只能将图片先转化成这个形状不然会破坏图片原有的格式
		
		image_uint8image = tf.transpose(image_extracted,[1,2,0])#第一维转到第三维，第二维转成第一维，第三维转成第二维
		reshaped_image = tf.cast(image_uint8image,tf.float32)#改变数据类型
		final_image_1 = tf.image.resize_image_with_crop_or_pad(reshaped_image,self.crop_width,self.crop_height)
		#剪裁或填充处理，会根据原图像的尺寸和指定的目标图像的尺寸选择剪裁还是填充，如果原图像尺寸大于目标图像尺寸，则在中心位置剪裁，
		#反之则用黑色像素填充。
		if distort_images:
			final_image = tf.image.random_flip_left_right(final_image_1)#以一定概率从左到右翻转
			final_image = tf.image.random_brightness(final_image,max_delta=63) #在某范围随机调整图片亮度
			final_image = tf.image.random_contrast(final_image,lower=0.2,upper=1.8) #在某范围随机调整图片对比度 
			final_image = tf.image.per_image_standardization(final_image)  #此函数的运算过程是将整幅图片标准化（不是归一化），加速神经网络的训练。
		return final_image,image_label,final_image_1
	def input_pipeline(self,path):
		filename_queue = self.read_image_to_file(path)
		image,label,temp_value = self.read_cifar_files(filename_queue)
		min_after_dequeue = 1000 #出队后队列里至少剩下min_after_dequeue个数据
		capacity = min_after_dequeue + 3 * self.batch_size #队列的长度
		#随机生成batch 数据
		example_batch,label_batch = tf.train.shuffle_batch([image,label],self.batch_size,capacity,min_after_dequeue)
		return (example_batch,label_batch,temp_value)

#cnn_model

#coding:utf-8
import tensorflow as tf
import numpy as np
import pickle
class ModelCNN(object):


    def __init__（self, batch_size, num_classes): 
		self.input_x = tf.placeholder(tf.float32, [batch_size, crop_height, crop_width, num_channels], name="input_x")
		self.input_y = tf.placeholder(tf.int32, [batch_size, 1], name="input_y")
		
		#第一层池化
		with tf.name_scope('conv1'):
		    conv1_kernel = tf.Variable(tf.truncated_normal(shape=[5,5,3,64], stddev=0.05),dtype=tf.float32, name="conv1_kernel")
			conv1 = tf.nn.conv2d(self.input_x,conv1_kernel,[1,1,1,1],padding='SAME', name = 'conv1') #第一层 [1,1,1,1] 卷积在每一维的步长
            conv1_bias = tf.zeros(name='conv_bias1',shape=[64],dtype=tf.float32)
            conv1_add_bias = tf.nn.bias_add(conv1,conv1_bias)
            relu_conv1 = tf.nn.relu(conv1_add_bias)
		pool1 = tf.nn.max_pool(relu_conv1,ksize=[1,3,3,1],strides=[1,2,2,1],padding='SAME',name='pool_layer1')#最大值池化
		norm1 = tf.nn.lrn(pool1,depth_radius=5,bias=2.0,alpha=1e-3,beta=0.75,name='norm1')#局部归一化层 半径或窗口为5
		#第二层池化层
		with tf.variable_scope('conv2') as scope:
            conv2_kernel = tf.Variable(tf.truncated_normal(shape=[5,5,64,64], stddev=0.05),dtype=tf.float32, name="conv1_kerne2")#调用内部自定义方法
            conv2 = tf.nn.conv2d(norm1,conv2_kernel,[1,1,1,1],padding='SAME')#第二层
            conv2_bias = tf.zeros(name='conv_bias2',shape=[64],dtype=tf.float32) #64为卷积核的个数
            conv2_add_bias = tf.nn.bias_add(conv2,conv2_bias)
            relu_conv2 = tf.nn.relu(conv2_add_bias)
        pool2 = tf.nn.max_pool(rlelu_conv2,ksize=[1,3,3,1],strides=[1,2,2,1],padding='SAME',name='pool_layer2')
        norm2 = tf.nn.lrn(pool2,depth_radius=5,bias=2.0,alpha=1e-3,beta=0.75,name='norm2')#第二个归一化
        reshaped_output = tf.reshape(norm2,[batch_size,-1])
        reshaped_dim = reshaped_output.get_shape()[1].value
		#三个全连接层
		with tf.variable_scope('ful1') as scope:
			full_weight1 =  tf.Variable(tf.truncated_normal(shape=[reshaped_dim,384], stddev=0.05),dtype=tf.float32, name="full_weight1")
			full_bias1 = tf.zeros(name='full_bias1',shape=[384],dtype=tf.float32)
			full_layer1 = tf.nn.relu(tf.add(tf.matmul(reshaped_output,full_weight1),full_bias1))
		with tf.variable_scope('full2')  as scope:
			full_weight2 = tf.Variable(tf.truncated_normal(shape=[384,192], stddev=0.05),dtype=tf.float32, name="full_weight2")
			full_bias2 = tf.zeros(name='full_bias2',shape=[192],dtype=tf.float32)
			full_layer2 = tf.nn.relu(tf.add(tf.matmul(full_layer1,full_weight2),full_bias2))
		with tf.variable_scope('full3')  as scope:
			full_weight3 = tf.Variable(tf.truncated_normal(shape=[192,num_classes], stddev=0.05),dtype=tf.float32, name="full_weight3")
			full_bias3 = tf.zeros(name='full_bias3',shape=[num_classes],dtype=tf.float32)
			self.final_output = tf.add(tf.matmul(full_layer2,full_weight3),full_bias3)
			self.y_pred = tf.nn.softmax(logits = self.final_output)
		'''#将标签y 进行one-hot表示 
		sparse_labels = tf.reshape(self.input_y, [batch_size, 1])
		indices = tf.reshape(tf.range(batch_size), [batch_size, 1])
		concated = tf.concat([indices, sparse_labels], 1)
		self.dense_labels = tf.sparse_to_dense(concated,[batch_size, num_classes],1.0, 0.0)
		#计算loss 
		with tf.name_scope("loss"):
            losses = tf.nn.softmax_cross_entropy_with_logits(logits=self.final_output, labels = self.dense_labels)
            self.loss = tf.reduce_mean(losses) '''
		#计算loss  不用one-hot 表示
		with tf.name_scope("loss"):
			losses = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.final_output, labels=self.input_y)
			self.loss = tf.reduce_mean(losses)
        # Accuracy
        with tf.name_scope("accuracy"):
            correct_predictions = tf.equal(self.y_pred, self.input_y)
            self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, tf.float32), name="accuracy")

#model train

#coding:utf-8
#! /usr/bin/env python
import tensorflow as tf
import numpy as np
import os
import time
import datetime
from data_processing import DataProcess
from cnn_model import ModelCNN
from tensorflow.contrib import learn
from sklearn.model_selection import train_test_split

# Parameters
# ==================================================
tf.flags.DEFINE_string("path", "imge/", "Main path for image storage.")
tf.flags.DEFINE_integer("image_height", 32, "The height of the image read in (default: 32)") 
tf.flags.DEFINE_integer("image_width", 32, "The width of the image read in (default: 32)") 
tf.flags.DEFINE_integer("crop_height", 30, "Capture the height of the image (default: 30)")
tf.flags.DEFINE_integer("crop_width", 30, "Capture the width of the image (default: 30)")
tf.flags.DEFINE_integer("num_channels", 3, "Color channel of the picture (default: 3)") 
tf.flags.DEFINE_integer("num_classes", 6, "In the end, the map pain should be divided into several categories. (default: 6)")  
tf.flags.DEFINE_float("learning_rate", 0.1, "Dropout keep probability (default: 0.1)")
tf.flags.DEFINE_float("lr_decay", 0.9, "Update the rate of original learning (default: 0.9)")
tf.flags.DEFINE_integer("num_gens_to_wait", 250, "Learning rate decay rate (default: 250)")
tf.flags.DEFINE_integer("batch_size", 128, "Batch Size (default: 128)")  
tf.flags.DEFINE_integer("eval_every", 500, "Evaluate model on dev set after this many steps (default: 500)") 
tf.flags.DEFINE_integer("checkpoint_every", 500, "Save model after this many steps (default: 500)")
tf.flags.DEFINE_integer("num_checkpoints", 5, "Number of checkpoints to store (default: 5)")  
#存储的参数
tf.flags.DEFINE_boolean("allow_soft_placement", True, "Allow device soft device placement")
tf.flags.DEFINE_boolean("log_device_placement", False, "Log placement of ops on devices") 
FLAGS = tf.flags.FLAGS
FLAGS._parse_flags() 
print("\nParameters:")
for attr, value in sorted(FLAGS.__flags.items()):
    print("{}={}".format(attr.upper(), value))
print("")  #打印参数的属性和值

#标签字典
label_dict = dict(cat = 0, dog = 1, airplne = 2, bird = 3, horse = 4, ship = 5)
#train
with tf.Graph().as_default():
    session_conf = tf.ConfigProto(
      allow_soft_placement=FLAGS.allow_soft_placement,
      log_device_placement=FLAGS.log_device_placement)  #定义一些存储
    sess = tf.Session(config=session_conf) 
    with sess.as_default():	
		data_process = DataProcess(
			batch_size = FLAGS.batch_size,
			image_height = FLAGS.image_height,
			image_width = FLAGS.image_width,
			crop_height = FLAGS.crop_height,
			crop_width = FLAGS.crop_width,
			label_bytes = FLAGS.label_bytes,
			num_channels = FLAGS.num_channels,
			label_dict = label_dict)
		#加载训练数据
		images,labels,temp_v = data_process.input_pipeline(path = FLAGS.path)
		#划分训练集和验证集
		X_train, X_test, Y_train, Y_test = train_test_split(
		images, labels, test_size=0.20, random_state=42)
		cnn = ModelCNN(
			batch_size = FLAGS.batch_size,
			num_classes = FLAGS.num_classes)
		#定义训练的过程
		global_step = tf.Variable(0, name="global_step", trainable=False) 
		model_learning_rate = tf.train.exponential_decay(FLAGS.learning_rate,global_step,FLAGS.num_gens_to_wait,FLAGS.lr_decay,staircase=True)#下降的梯度与频数有关
		my_optimizer = tf.train.GradientDescentOptimizer(model_learning_rate)
		train_op = my_optimizer.minimize(cnn.loss)
		timestamp = str(int(time.time()))#时间戳
		out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp))#输出路径
		print("Writing to {}\n".format(out_dir))
		#保存模型
		timestamp = str(int(time.time()))
		out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp))
		print("Writing to {}\n".format(out_dir))
		checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints"))
		checkpoint_prefix = os.path.join(checkpoint_dir, "model")
		if not os.path.exists(checkpoint_dir):
			os.makedirs(checkpoint_dir)
		saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.num_checkpoints)
		sess.run(tf.global_variables_initializer())
		coord = tf.train.Coordinator()  #创建一个协调器，管理线程
		#启动QueueRunner, 此时文件名队列已经进队。
		threads=tf.train.start_queue_runners(sess=sess,coord=coord)  
		def train_step(X_train, Y_train): #训练
			feed_dict = {
			  cnn.input_x: X_train,
			  cnn.input_y: Y_train
			}
			_, step, loss, accuracy = sess.run(
				[train_op, global_step, cnn.loss, cnn.accuracy],
				feed_dict)
			time_str = datetime.datetime.now().isoformat()
			print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy))
			#train_summary_writer.add_summary(summaries, step)

		def dev_step(X_test, Y_test): #验证
			feed_dict = {
			  cnn.input_x: X_test,
			  cnn.input_y: Y_test
			}
			step, loss, accuracy = sess.run(
				[global_step, cnn.loss, cnn.accuracy],
				feed_dict)
			time_str = datetime.datetime.now().isoformat()
			print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy))
		for i in range(generations):
			train_step(X_train, Y_train)
			current_step = tf.train.global_step(sess, global_step)
			if current_step % FLAGS.evaluate_every == 0: #到周期测试
				print("\nEvaluation:")
				dev_step(X_test, Y_test)  
				print("")
			if current_step % FLAGS.checkpoint_every == 0:  #到周期保存模型
				path = saver.save(sess, checkpoint_prefix, global_step=current_step)
				print("Saved model checkpoint to {}\n".format(path))
		coord.request_stop()
		coord.join(threads)

#model test

#coding:utf-8
#! /usr/bin/env python
import tensorflow as tf
import numpy as np
import os
import math
import time
import datetime
from data_processing import DataProcess
from cnn_model import ModelCNN
from tensorflow.contrib import learn

# Parameters
# ==================================================
tf.flags.DEFINE_string("path", "imge/", "Main path for image storage.")
tf.flags.DEFINE_integer("image_height", 32, "The height of the image read in (default: 32)") 
tf.flags.DEFINE_integer("image_width", 32, "The width of the image read in (default: 32)") 
tf.flags.DEFINE_integer("crop_height", 30, "Capture the height of the image (default: 30)")
tf.flags.DEFINE_integer("crop_width", 30, "Capture the width of the image (default: 30)")
tf.flags.DEFINE_integer("num_channels", 3, "Color channel of the picture (default: 3)") 
tf.flags.DEFINE_integer("batch_size", 128, "Batch Size (default: 128)") 
tf.flags.DEFINE_integer('num_examples', 10000,
#存储的参数
tf.flags.DEFINE_boolean("allow_soft_placement", True, "Allow device soft device placement")
tf.flags.DEFINE_boolean("log_device_placement", False, "Log placement of ops on devices") 
FLAGS = tf.flags.FLAGS
FLAGS._parse_flags() 
print("\nParameters:")
for attr, value in sorted(FLAGS.__flags.items()):
    print("{}={}".format(attr.upper(), value))
print("")  #打印参数的属性和值
label_dict = dict(test = 0)
# Evaluation
# ==================================================
checkpoint_file = tf.train.latest_checkpoint(FLAGS.checkpoint_dir)  #最后一次保存的文件
graph = tf.Graph()
with graph.as_default():
    session_conf = tf.ConfigProto(
      allow_soft_placement=FLAGS.allow_soft_placement,
      log_device_placement=FLAGS.log_device_placement)
    sess = tf.Session(config=session_conf)
	with sess.as_default():
		data_process = DataProcess(
			batch_size = FLAGS.batch_size,
			image_height = FLAGS.image_height,
			image_width = FLAGS.image_width,
			crop_height = FLAGS.crop_height,
			crop_width = FLAGS.crop_width,
			label_bytes = FLAGS.label_bytes,
			num_channels = FLAGS.num_channels,
			label_dict = label_dict)
		test_images,labels,_ = data_process.input_pipeline(path = FLAGS.path)
		saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file))
        saver.restore(sess, checkpoint_file)
		coord = tf.train.Coordinator()  #创建一个协调器，管理线程
		#启动QueueRunner, 此时文件名队列已经进队。
		threads=tf.train.start_queue_runners(sess=sess,coord=coord) 
        # Get the placeholders from the graph by name
        input_x = graph.get_operation_by_name("input_x").outputs[0]

        # Tensors we want to evaluate
        predictions = graph.get_operation_by_name("output/predictions").outputs[0]
		result_predictions = sess.run(predictions, {input_x: test_images})
		coord.request_stop()
		coord.join(threads) 
# Save the evaluation to a csv
predictions_human_readable = np.column_stack((np.array(test_images), result_predictions))  #将x 和 y 按列对应 写入CSV文件
out_path = os.path.join(FLAGS.checkpoint_dir, "..", "prediction.csv")
print("Saving evaluation to {0}".format(out_path))
with open(out_path, 'w') as f:
    csv.writer(f).writerows(predictions_human_readable)