vgg16网络结构
import os
import numpy as np
import tensorflow as tf
class vgg16():
def __init__(self,imgs):
# 在类里边想要复用已经训练权重参数的时候,在模型中作为参数作为输入,类里边输入的参数,将参数在整个类中共享
self.paramerters = [] #在类的初始化时加入全局列表,将所需共享的参数加载进来
self.imgs = imgs
self.convlayers()
self.fc_layers()
self.probs= tf.nn.softmax(self.fc8)
def saver(self):
return tf.train.Saver()
def maxpool(self,name,input_data): #最大池化
out = tf.nn.max_pool(input_data, [1, 2, 2, 1], [1, 2, 2, 1], padding='SAME', name=name)
return out
def conv(self, name,input_data,out_channel,trainable=False):
in_channel = input_data.get_shape()[-1] #获得输入数据的通道数
with tf.variable_scope(name): #tf.variable_scope是一个上下文管理器,with结构中的变量都属于名称name
kernel = tf.get_variable("wetghts",[3,3,in_channel,out_channel],dtype=tf.float32,trainable=False)
biases = tf.get_variable("biases",[out_channel], dtype=tf.float32,trainable=False)
conv_res = tf.nn.conv2d(input_data, kernel, [1, 1, 1, 1], padding='SAME')
res = tf.nn.bias_add(conv_res, biases)
out = tf.nn.relu(res,name=name)
self.paramerters +=[kernel,biases] #将卷积层定义的参数(kernel,biases)加入列表
return out
def fc(self,name,input_data,out_channel,trainable=True):
shape = input_data.get_shape().as_list() #获得各个维度的维数
if len(shape) == 4: #获得维度,为数据展开做准备
size = shape[-1]*shape[-2]*shape[-3] #拉成向量后,向量的长度
else:
size = shape[1]
input_data_flat = tf.reshape(input_data,[-1,size]) #对数据展开操作,拉成一维向量
with tf.variable_scope(name):
weights = tf.get_variable(name="weight",shape=[size,out_channel],dtype=tf.float32,trainable=trainable)
biases = tf.get_variable(name="biases",shape=[out_channel],dtype=tf.float32,trainable=trainable)
res = tf.matmul(input_data_flat,weights)
out = tf.nn.relu(tf.nn.bias_add(res,biases))
self.paramerters += [weights,biases] #将全连接层定义的参数(weights,biases)加入列表
return out
def convlayers(self):
self.conv1_1 = self.conv("conv1_1", self.imgs,64,trainable=False)
self.conv1_2 = self.conv("conv1_2",self.conv1_1,64,trainable=False)
self.pool1 = self.maxpool("pool1",self.conv1_2)
self.conv2_1 = self.conv("conv2_1",self.pool1,128,trainable=False)
self.conv2_2 = self.conv("conv2_2",self.conv2_1,128,trainable=False)
self.pool2 = self.maxpool("pool2",self.conv2_2)
self.conv3_1 = self.conv("conv3_1",self.pool2,256,trainable=False)
self.conv3_2 = self.conv("conv3_2",self.conv3_1,256,trainable=False)
self.conv3_3 = self.conv("conv3_3",self.conv3_2,256,trainable=False)
self.pool3 = self.maxpool("pool3",self.conv3_3)
self.conv4_1 = self.conv("conv4_1",self.pool3,512,trainable=False)
self.conv4_2 = self.conv("conv4_2",self.conv4_1,512,trainable=False)
self.conv4_3 = self.conv("conv4_3",self.conv4_2,512,trainable=False)
self.pool4 = self.maxpool("pool4",self.conv4_3)
self.conv5_1 = self.conv("conv5_1",self.pool4,512,trainable=False)
self.conv5_2 = self.conv("conv5_2",self.conv5_1,512,trainable=False)
self.conv5_3 = self.conv("conv5_3",self.conv5_2,512,trainable=False)
self.pool5 = self.maxpool( "pool5",self.conv5_3)
def fc_layers(self):
self.fc6 = self.fc("f1", self.pool5, 4096,trainable=False)
self.fc7 = self.fc("fc2", self.fc6, 4096,trainable=False)
self.fc8 = self.fc("fc3", self.fc7, 4,trainable=True) #2表示需要分类的类别的数量
def load_weights(self,weight_file,sess): #这个函数将获取的权重载入VGG模型中
weights = np.load(weight_file)
keys = sorted(weights.keys())
for i,k in enumerate(keys):
if i not in [30,31]: #剔除fc8和softmax层的参数
sess.run(self.paramerters[i].assign(weights[k]))
print("----------weights loads------------")
数据读取
import tensorflow as tf
import numpy as np
import os
from time import time
# from vgg_preprocess import preprocess_for_train
img_width=224
img_height=224
#数据输入
def get_file(file_dir):
#对文件夹进行分类
images = []
temp = []
for root, sub_folders, files in os.walk(file_dir):
for name in files:
images.append(os.path.join(root, name))
for name in sub_folders:
temp.append(os.path.join(root, name))
labels = []
for one_folder in temp:
n_img = len(os.listdir(one_folder))
letter = one_folder.split('\\')[-1]
if letter == '凸粉':
labels = np.append(labels, n_img * [0])
elif letter == '擦花':
labels = np.append(labels, n_img * [1])
elif letter == '漏底':
labels = np.append(labels, n_img * [2])
elif letter == '碰凹':
labels = np.append(labels, n_img * [3])
# shuffle(随机打乱)
temp = np.array([images, labels])
temp = temp.transpose() # 建立images 与 labels 之间关系, 以矩阵形式展现
np.random.shuffle(temp)
image_list = list(temp[:, 0])
label_list = list(temp[:, 1])
label_list = [int(float(i)) for i in label_list]
print(image_list)
print(label_list)
return image_list, label_list
def get_batch(image_list,label_list,img_width,img_height,batch_size,capacity):
image=tf.cast(image_list,tf.string)
label=tf.cast(label_list,tf.int32)
input_queue=tf.train.slice_input_producer([image,label])
label=input_queue[1]
image_contents=tf.read_file(input_queue[0])
image=tf.image.decode_jpeg(image_contents,channels=3)
image= tf.image.resize_images(image, [224, 224], method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
image_batch,label_batch=tf.train.batch([image,label],batch_size=batch_size,num_threads=64,capacity=capacity)
label_batch=tf.reshape(label_batch,[batch_size])
return image_batch,label_batch
#转换独热编码
def onehot(labels):
n_sample=len(labels)
n_class=max(labels)+1
onehot_labels=np.zeros((n_sample,n_class))
onehot_labels[np.arange(n_sample),labels]=1
return onehot_labels
训练模型
import tensorflow as tf
import numpy as np
from time import time
import utils
import vgg16 as model
import os
def train_model():
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
batch_size= 32
capacity= 256 #内存中存储的最大数据容量
#VGG预训练是减掉的均值
means=[123.68,116.779,103.939]
start_time=time()
xs,ys =utils.get_file("./data/train") #获取图像列表和标签列表
image_batch,label_batch=utils.get_batch(xs,ys,224,224,batch_size,capacity) #通过读取列表来载入批量图片及标签
print(len(xs),len(ys))
x=tf.placeholder(tf.float32,[None,224,224,3])
y=tf.placeholder(tf.int32,[None,4]) #对“猫”和“狗”两类进行判定
vgg=model.vgg16(x)
fc8_finetuining=vgg.probs
loss_function=tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=fc8_finetuining,labels=y))#损失函数
optimizer=tf.train.GradientDescentOptimizer(learning_rate=0.0001).minimize(loss_function) #优化器
sess=tf.Session()
sess.run(tf.global_variables_initializer())
vgg.load_weights('vgg16_weights.npz',sess)#通过npz格式的文件获取VGG的相应权重参数,从而将权重注入即可实现复用
saver=tf.train.Saver()
coord= tf.train.Coordinator() #启用线程
threads=tf.train.start_queue_runners(coord=coord,sess=sess)
epoch_start_time=time()
for i in range (4):
print('开始训练......')
images,labels=sess.run([image_batch,label_batch])
labels=utils.onehot(labels)
# print(labels)
sess.run(optimizer,feed_dict={x:images,y:labels})
loss=sess.run(loss_function,feed_dict={x:images,y:labels})
print("loss:%f"%loss)
epoch_end_time=time()
print('time:',(epoch_end_time-epoch_start_time))
epoch_start_time=epoch_end_time
if(i+1)%20==0:
saver.save(sess,os.path.join('./model1/','epoch{:06d}.ckpt'.format(i+1)))
print("------epoch%d finish------------"%(i+1))
saver.save(sess,'./model1/') #模型保存
duration=time()-start_time
print('finish all time:',"{:.2}".format(duration))
coord.request_stop() #关闭线程
coord.join(threads)
if __name__ == '__main__':
train_model()
测试
import tensorflow as tf
import numpy as np
from imageio import imread
import vgg16 as model
from PIL import Image
import matplotlib.pyplot as plt
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
plt.rcParams["font.family"]='SimHei'
tf.reset_default_graph()
means=[123.68,116.779,103.939]
x=tf.placeholder(tf.float32,[None,224,224,3])
with tf.Session() as sess:
vgg=model.vgg16(x)
fc8_finetuining=vgg.probs
saver=tf.train.Saver()
print('restoring...')
saver.restore(sess,'./model/')
filepath = './test/碰凹.jpg'
image_raw_data = tf.gfile.FastGFile(filepath,'rb').read()
img_data = tf.image.decode_jpeg(image_raw_data)
plt.imshow(img_data.eval())
image = tf.image.resize_images(img_data, [224, 224], method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
img = image.eval()
print(img.shape)
image = tf.expand_dims(image,0)
print(image.shape)
# for c in range(3):
# image[:,:,c] -= means[c]
prob=sess.run(fc8_finetuining,feed_dict={x:[img]})
max_index=np.argmax(prob)
if max_index==0:
plt.title('The result: 凸粉 %.6f'%prob[:,0])
plt.show()
elif max_index==1:
plt.title('The result: 擦花 %.6f'%prob[:,1])
plt.show()
elif max_index == 2:
plt.title('The result: 漏底 %.6f' % prob[:, 2])
plt.show()
elif max_index == 3:
plt.title('The result: 碰凹 %.6f' % prob[:, 3])
plt.show()