最近在从头开始学习机器学习。看完了吴恩达老师的机器学习和深度学习视频,觉得无法联系实际,
看代码大概可以懂,于是决定自己开始写。
emmm。。让我又深刻认识到看和写是两码事,下面是代码
数据集是网上下载的,猫狗图片,应该是kaggle的
==========================================================================
代码:猫狗大战之cnn
# -*- coding: utf-8 -*-
"""
Created on Thu Nov 12 09:41:04 2020
@author: sun
"""
from PIL import Image
import tensorflow as tf
import numpy as np
import cv2
import os
num_gens_to_wait=250
lr_decay=0.9
learing_rate=0.1
CHECK_POINT_DIR='E:\\BaiduNetdiskDownload\\check\\'
LOG_DIR='E:\\BaiduNetdiskDownload\\log'
IMG_W=28
IMG_H=28
batch_size=BATCH_SIZE=16
CAPACITY=20
def get_files(file_path):
#两个文件,一个是用于存放图片路径,一个是用于对图片标记
class_train=[]
label_train=[]
#根据路径将图片遍历出来
for pic_name in os.listdir(file_path):
class_train.append(file_path+'/'+pic_name)
if 'cat' in pic_name:
label_train.append(0)
else:
label_train.append(1)
temp=np.array([class_train,label_train])#2*pic_number
temp=temp.transpose()#pic_number*2
np.random.shuffle(temp)#为了更好的shuffle
image_list=list(temp[:,0])
label_list=list(temp[:,1])
label_list = [int(i) for i in label_list]
return image_list, label_list
def get(image,label,batch_size,i):
img_names=image[batch_size*i:batch_size*(i+1)]
imgs=np.zeros((batch_size,(28*28)),dtype=np.uint8)
labels=np.zeros((batch_size,1),dtype=np.uint8)
for img_idx in range(len(img_names)):
img=cv2.imread(img_names[img_idx])
#img = color.rgb2gray(img)
img=cv2.resize(img[:,:,0],(1, 28*28)).flatten()
img=img/255
#print(img)
imgs[img_idx]=img
labels[img_idx][0]=label[img_idx]
#print(labels)
return imgs,labels
#返回将是Tensor
def get_batches(image,label,resize_w,resize_h,batch_size,capacity):
image=tf.cast(image,tf.string)#<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=499x375 at 0x24B9AFD0CC0>
label=tf.cast(label,tf.int64)
queue=tf.train.slice_input_producer([image,label])#是一个tensor生成器,作用是按照设定,每次从一个tensor列表中按顺序或者随机抽取出一个tensor放入文件名队列。
label=queue[1]
image=tf.read_file(queue[0])
# 将图像使用JPEG的格式解码从而得到图像对应的三维矩阵。Tensorflow还提供了 tf.image.decode_png函数对png格式的图像进行编码。
# 解码之后的结果为一个张量, 在使用他的取值之前需要明确调用运行的过程。
# Decode a JPEG-encoded image to a uint8 tensor 所以这里的 image_data 已经是一个tsnsor
#图像-》tensor 3维矩阵
image=tf.image.decode_jpeg(image,channels=3)
image=tf.image.resize_image_with_crop_or_pad(image,resize_w,resize_h)
image=tf.image.per_image_standardization(image)
image_batch,label_batch=tf.train.batch([image,label],batch_size=batch_size,num_threads=64,capacity=capacity)
image_batch=tf.cast(image_batch,tf.float32)
labels_batch = tf.reshape(label_batch, [batch_size])
return image_batch,labels_batch
def cnn_model(input_images,batch_size,train_logical=True):
def truncates_normal_var(name,shape,dtype):#创建变量
return(tf.get_variable(name=name,shape=shape,dtype=dtype,initializer=tf.truncated_normal_initializer(stddev=0.5)))
def zero_var(name,shape,dtype):#创建变量
return(tf.get_variable(name=name,shape=shape,dtype=dtype,initializer=tf.truncated_normal_initializer(stddev=0.0)))
with tf.variable_scope('conv1') as scope:
conv1_kernel=truncates_normal_var(name='conv1_kernel',shape=[5,5,3,64],dtype=tf.float32)
conv1=tf.nn.conv2d(input_images,conv1_kernel,[1,1,1,1],padding='SAME')
conv1_bias=zero_var('con1_b',[64],dtype=tf.float32)
con1_add_bias=tf.nn.bias_add(conv1,conv1_bias)
relu_conv1=tf.nn.relu(con1_add_bias)
pool1=tf.nn.max_pool(relu_conv1,ksize=[1,3,3,1],strides=[1,2,2,1],padding='SAME',name='pool_layer1')
norma1=tf.nn.lrn(pool1,depth_radius=5,bias=2.0,alpha=1e-3,beta=0.75,name='norm1')
with tf.variable_scope('conv2') as scope:
conv2_kernel=truncates_normal_var(name='conv2_kernel',shape=[5,5,64,64],dtype=tf.float32)
conv2=tf.nn.conv2d(norma1,conv2_kernel,[1,1,1,1],padding='SAME')
conv2_bias=zero_var('con2_b',[64],dtype=tf.float32)
con2_add_bias=tf.nn.bias_add(conv2,conv2_bias)
relu_conv2=tf.nn.relu(con2_add_bias)
pool2=tf.nn.max_pool(relu_conv2,ksize=[1,3,3,1],strides=[1,2,2,1],padding='SAME',name='pool_layer2')
norm2=tf.nn.lrn(pool2,depth_radius=5,bias=2.0,alpha=1e-3,beta=0.75,name='norm2')
reshaped_output=tf.reshape(norm2,[batch_size,-1])
reshaped_dim=reshaped_output.get_shape()[1].value
#全连接层
with tf.variable_scope('f1') as scope:
w1=truncates_normal_var(name='w1',shape=[reshaped_dim,384],dtype=tf.float32)
b1=zero_var(name='b1',shape=[384],dtype=tf.float32)
f1=tf.nn.relu(tf.matmul(reshaped_output,w1)+b1)
with tf.variable_scope('f2') as scope:
w2=truncates_normal_var(name='w2',shape=[384,192],dtype=tf.float32)
b2=zero_var(name='b2',shape=[192],dtype=tf.float32)
f2=tf.nn.relu(tf.matmul(f1,w2)+b2)
with tf.variable_scope('f3') as scope:
w3=truncates_normal_var(name='w3',shape=[192,2],dtype=tf.float32)
b3=zero_var(name='b3',shape=[2],dtype=tf.float32)
f3=tf.matmul(f2,w3)+b3
return f3
def opt_step(my_loss,gen_num):
laring_rate=tf.train.exponential_decay(learing_rate,gen_num,num_gens_to_wait,lr_decay,staircase=True)
my_optimizer=tf.train.GradientDescentOptimizer(laring_rate)
return my_optimizer.minimize(my_loss)
def my_loss(f3,targets):
targets=tf.squeeze(tf.cast(targets,tf.int32))
loss=tf.nn.sparse_softmax_cross_entropy_with_logits(logits=f3,labels=targets)
#loss=tf.nn.sigmoid_cross_entropy_with_logits(f3,targets)
my_loss=tf.reduce_mean(loss)
return my_loss
def accuracy(f3,targets):
targets=tf.squeeze(tf.cast(targets,tf.int32))
#pre=tf.cast(tf.equal(tf.round(tf.sigmoid(f3)),targets),tf.int32)
pre=tf.cast(tf.argmax(f3,1),tf.int32)
pre_cor=tf.equal(pre,targets)
accuracy=tf.reduce_mean(tf.cast(pre_cor,tf.float32))
return accuracy
#训练程序
#if __name__=="__main__":
def train():
# 数据集
train_dir='E:\\BaiduNetdiskDownload\\train/'
# 获取图片和标签集
train,train_label = get_files(train_dir)
# 生成批次
X, Y_label = get_batches(train, train_label, IMG_W, IMG_H, BATCH_SIZE, CAPACITY)
Y_pre=cnn_model(X,batch_size,train_logical=True)
# 进入模型
gen_num=tf.Variable(0,trainable=False)
my_losss=my_loss(Y_pre,Y_label)
my_opt_step=opt_step(my_losss,gen_num)
my_accuracy=accuracy(Y_pre,Y_label)
sess=tf.Session()
initt=tf.initialize_all_variables()
sess.run(initt)
# 合并 summary
summary_op = tf.summary.merge_all()
# 保存summary
train_writer = tf.summary.FileWriter(LOG_DIR, sess.graph)
saver = tf.train.Saver()
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)#启动图像管道!!!
try:
for step in np.arange(1000):
if coord.should_stop():
break
_, tra_loss, tra_acc = sess.run([my_opt_step, my_losss, my_accuracy])
if step % 10 == 0:
print('Step %d, train loss=%.2f, train accuracy = %.2f%%' %(step, tra_loss, tra_acc))
summary_str = sess.run(summary_op)
train_writer.add_summary(summary_str, step)
# 每隔2000步保存一下模型,模型保存在 checkpoint_path 中
if (step + 1) == 1000:
checkpoint_path = CHECK_POINT_DIR
saver.save(sess, checkpoint_path, global_step=step)
except tf.errors.OutOfRangeError:
print ('Done training')
finally:
coord.request_stop()
coord.join(threads)
train()
ERROR:当train到300的时候,出现Kernel died, restarting
查了查,发现有解决方法为:
参考网页:https://www.cnblogs.com/ahusun/p/6068695.html
修改Anaconda2\pkgs\spyder-3.0.1-py27_0\Lib\site-packages\spyder\plugins下面ipythonconsole.py文件,添加如下代码即可,这样打开spyder就不会出现烦人的kernel died. Restarting...
修改后,继续训练,我改为36了,之前是18,但毫无用处。。。。
如果你有缘看到这,请有解决方法的话,麻烦告诉我。。。。
我自己更改,把batch_size和capacity改小就可以了
更改后网络:
from PIL import Image
import tensorflow as tf
import numpy as np
import cv2
import os
num_gens_to_wait=250
lr_decay=0.9
learing_rate=0.1
CHECK_POINT_DIR='E:\\BaiduNetdiskDownload\\check\\'
LOG_DIR='E:\\BaiduNetdiskDownload\\log'
IMG_W=64
IMG_H=64
batch_size=BATCH_SIZE=16
CAPACITY=20
def get_files(file_path):
#两个文件,一个是用于存放图片路径,一个是用于对图片标记
class_train=[]
label_train=[]
#根据路径将图片遍历出来
for pic_name in os.listdir(file_path):
class_train.append(file_path+'/'+pic_name)
if 'cat' in pic_name:
label_train.append(0)
else:
label_train.append(1)
temp=np.array([class_train,label_train])#2*pic_number
temp=temp.transpose()#pic_number*2
np.random.shuffle(temp)#为了更好的shuffle
image_list=list(temp[:,0])
label_list=list(temp[:,1])
label_list = [int(i) for i in label_list]
return image_list, label_list
def get(image,label,batch_size,i):
img_names=image[batch_size*i:batch_size*(i+1)]
imgs=np.zeros((batch_size,(28*28)),dtype=np.uint8)
labels=np.zeros((batch_size,1),dtype=np.uint8)
for img_idx in range(len(img_names)):
img=cv2.imread(img_names[img_idx])
#img = color.rgb2gray(img)
img=cv2.resize(img[:,:,0],(1, 28*28)).flatten()
img=img/255
#print(img)
imgs[img_idx]=img
labels[img_idx][0]=label[img_idx]
#print(labels)
return imgs,labels
#返回将是Tensor
def get_batches(image,label,resize_w,resize_h,batch_size,capacity):
image=tf.cast(image,tf.string)#<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=499x375 at 0x24B9AFD0CC0>
label=tf.cast(label,tf.int64)
queue=tf.train.slice_input_producer([image,label])#是一个tensor生成器,作用是按照设定,每次从一个tensor列表中按顺序或者随机抽取出一个tensor放入文件名队列。
label=queue[1]
image=tf.read_file(queue[0])
# 将图像使用JPEG的格式解码从而得到图像对应的三维矩阵。Tensorflow还提供了 tf.image.decode_png函数对png格式的图像进行编码。
# 解码之后的结果为一个张量, 在使用他的取值之前需要明确调用运行的过程。
# Decode a JPEG-encoded image to a uint8 tensor 所以这里的 image_data 已经是一个tsnsor
#图像-》tensor 3维矩阵
image=tf.image.decode_jpeg(image,channels=3)
image=tf.image.resize_image_with_crop_or_pad(image,resize_w,resize_h)
image=tf.image.per_image_standardization(image)
image_batch,label_batch=tf.train.batch([image,label],batch_size=batch_size,num_threads=64,capacity=capacity)
image_batch=tf.cast(image_batch,tf.float32)
labels_batch = tf.reshape(label_batch, [batch_size])
return image_batch,labels_batch
def cnn_model(input_images,batch_size,train_logical=True):
def truncates_normal_var(name,shape,dtype):#创建变量
return(tf.get_variable(name=name,shape=shape,dtype=dtype,initializer=tf.truncated_normal_initializer(stddev=0.5)))
def zero_var(name,shape,dtype):#创建变量
return(tf.get_variable(name=name,shape=shape,dtype=dtype,initializer=tf.truncated_normal_initializer(stddev=0.0)))
with tf.variable_scope('conv1') as scope:
conv1_kernel=truncates_normal_var(name='conv1_kernel',shape=[3,3,3,64],dtype=tf.float32)
conv1=tf.nn.conv2d(input_images,conv1_kernel,[1,1,1,1],padding='SAME')
conv1_bias=zero_var('con1_b',[64],dtype=tf.float32)
con1_add_bias=tf.nn.bias_add(conv1,conv1_bias)
relu_conv1=tf.nn.relu(con1_add_bias)
pool1=tf.nn.max_pool(relu_conv1,ksize=[1,3,3,1],strides=[1,2,2,1],padding='SAME',name='pool_layer1')
norma1=tf.nn.lrn(pool1,depth_radius=5,bias=2.0,alpha=1e-3,beta=0.75,name='norm1')
with tf.variable_scope('conv2') as scope:
conv2_kernel=truncates_normal_var(name='conv2_kernel',shape=[3,3,64,64],dtype=tf.float32)
conv2=tf.nn.conv2d(norma1,conv2_kernel,[1,1,1,1],padding='SAME')
conv2_bias=zero_var('con2_b',[64],dtype=tf.float32)
con2_add_bias=tf.nn.bias_add(conv2,conv2_bias)
relu_conv2=tf.nn.relu(con2_add_bias)
pool2=tf.nn.max_pool(relu_conv2,ksize=[1,3,3,1],strides=[1,2,2,1],padding='SAME',name='pool_layer2')
norm2=tf.nn.lrn(pool2,depth_radius=5,bias=2.0,alpha=1e-3,beta=0.75,name='norm2')
reshaped_output=tf.reshape(norm2,[batch_size,-1])
reshaped_dim=reshaped_output.get_shape()[1].value
#全连接层
with tf.variable_scope('f1') as scope:
w1=truncates_normal_var(name='w1',shape=[reshaped_dim,128],dtype=tf.float32)
b1=zero_var(name='b1',shape=[128],dtype=tf.float32)
f1=tf.nn.relu(tf.matmul(reshaped_output,w1)+b1)
with tf.variable_scope('f2') as scope:
w2=truncates_normal_var(name='w2',shape=[128,128],dtype=tf.float32)
b2=zero_var(name='b2',shape=[128],dtype=tf.float32)
f2=tf.nn.relu(tf.matmul(f1,w2)+b2)
with tf.variable_scope('softmax_linear') as scope:
w3=truncates_normal_var(name='w3',shape=[128,2],dtype=tf.float32)
b3=zero_var(name='b3',shape=[2],dtype=tf.float32)
f3=tf.matmul(f2,w3)+b3
return f3
def opt_step(my_loss,gen_num):
laring_rate=tf.train.exponential_decay(learing_rate,gen_num,num_gens_to_wait,lr_decay,staircase=True)
my_optimizer=tf.train.GradientDescentOptimizer(laring_rate)
return my_optimizer.minimize(my_loss)
def my_loss(f3,targets):
#targets=tf.squeeze(tf.cast(targets,tf.int32))
loss=tf.nn.sparse_softmax_cross_entropy_with_logits(logits=f3,labels=targets)
#loss=tf.nn.sigmoid_cross_entropy_with_logits(f3,targets)
my_loss=tf.reduce_mean(loss)
return my_loss
def accuracy(f3,targets):
targets=tf.squeeze(tf.cast(targets,tf.int32))
#pre=tf.cast(tf.equal(tf.round(tf.sigmoid(f3)),targets),tf.int32)
pre=tf.cast(tf.argmax(f3,1),tf.int32)
pre_cor=tf.equal(pre,targets)
accuracy=tf.reduce_mean(tf.cast(pre_cor,tf.float32))
return accuracy
#训练程序
#if __name__=="__main__":
def train():
# 数据集
train_dir='E:\\BaiduNetdiskDownload\\train/'
# 获取图片和标签集
train,train_label = get_files(train_dir)
# 生成批次
X, Y_label = get_batches(train, train_label, IMG_W, IMG_H, BATCH_SIZE, CAPACITY)
Y_pre=cnn_model(X,batch_size,train_logical=True)
#Y_pre=inference(train_batch, BATCH_SIZE, N_CLASSES)
# 进入模型
gen_num=tf.Variable(0,trainable=False)
my_losss=my_loss(Y_pre,Y_label)
my_opt_step=opt_step(my_losss,gen_num)
my_accuracy=accuracy(Y_pre,Y_label)
sess=tf.Session()
initt=tf.initialize_all_variables()
sess.run(initt)
# 合并 summary
summary_op = tf.summary.merge_all()
# 保存summary
train_writer = tf.summary.FileWriter(LOG_DIR, sess.graph)
saver = tf.train.Saver()
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)#启动图像管道!!!
try:
for step in np.arange(1000):
if coord.should_stop():
break
_, tra_loss, tra_acc = sess.run([my_opt_step, my_losss, my_accuracy])
if step % 100 == 0:
print('Step %d, train loss=%.2f, train accuracy = %.2f%%' %(step, tra_loss, tra_acc))
summary_str = sess.run(summary_op)
train_writer.add_summary(summary_str, step)
# 每隔2000步保存一下模型,模型保存在 checkpoint_path 中
if (step + 1) == 100:
checkpoint_path = CHECK_POINT_DIR
saver.save(sess, checkpoint_path, global_step=step)
except tf.errors.OutOfRangeError:
print ('Done training')
finally:
coord.request_stop()
coord.join(threads)
下面去预测:
def evaluate_one_image(image_array):
with tf.Graph().as_default():
BATCH_SIZE = 1 # 因为只读取一副图片 所以batch 设置为1
N_CLASSES = 2 # 2个输出神经元,[1,0] 或者 [0,1]猫和狗的概率
# 转化图片格式
image = tf.cast(image_array, tf.float32)
# 图片标准化
image = tf.image.per_image_standardization(image)
# 图片原来是三维的 [64,64,3] 重新定义图片形状 改为一个4D 四维的 tensor
image = tf.reshape(image, [1, 64,64,3])
logit = inference(image, BATCH_SIZE, N_CLASSES)
# 因为 inference 的返回没有用激活函数,所以在这里对结果用softmax 激活
logit = tf.nn.softmax(logit)
# 定义saver
saver = tf.train.Saver()
with tf.Session() as sess:
init = tf.global_variables_initializer()
sess.run(init)
# print ('Reading checkpoints...')
print ('从指定的路径中加载模型。。。。')
# 将模型加载到sess 中
ckpt = tf.train.get_checkpoint_state(CHECK_POINT_DIR)
if ckpt and ckpt.model_checkpoint_path:
global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
saver.restore(sess, ckpt.model_checkpoint_path)
print('模型加载成功, 训练的步数为 %s' %global_step)
else:
print ('模型加载失败,,,文件没有找到')
prediction = sess.run(logit)
# 获取输出结果中最大概率的索引
max_index = np.argmax(prediction)
print (prediction)
if max_index == 0:
result = ('这是猫的概率: %.6f, 预测结果是 [%s]' %(prediction[:,0],','.join(str(i) for i in prediction[0])))
else:
result = ('这是狗的概率: %.6f, 预测结果是 [%s]' %(prediction[:,1],','.join(str(i) for i in prediction[0])))
return result
if __name__ == '__main__':
#打开要识别的图片
image = Image.open('E:\\BaiduNetdiskDownload\\test1/2.jpg')
# 显示出来
plt.imshow(image)
plt.show()
image = image.resize([64,64])
image = np.array(image)
print(evaluate_one_image(image))
train效果是不好的。。。