目录
- 模型的保存与重载
- 神经网络的基本概念
- 卷积的基本概念
下面的代码进行tensorflow模型的保存
with tf.Session() as sess:
saver = tf.train.Saver()
dir = os.path.dirname(os.path.realpath(__file__))
model_path = dir+'/3-2MNIST/model.ckpt'
saver.save(sess=sess, save_path=model_path)
下面的代码是进行重新加载模型
model_name = 'ckp-1000'
model_path = os.path.join(model_dir, model_name)
if os.path.exists(model_path + '.index'):
saver.restore(sess, model_path)
print('model restore from {}'.format(model_path))
else:
print('model {} does not exist'.format(model_path))
# 会在之前的sess重新训练,acc会增加
# nm为我们后面加进去的数据,网络结构也要写
def prediction_hand_wirte(nm):
x = tf.placeholder(tf.float32, [None, 784])
W = tf.Variable(tf.zeros([784, 10]))
b = tf.Variable(tf.zeros([10]))
prediction = tf.nn.softmax(tf.matmul(x, W)+b)
index = tf.argmax(prediction, 1)
dir = os.path.dirname(os.path.realpath(__file__))
model_path = dir + '/3-2MNIST/model.ckpt'
sess = tf.Session()
saver = tf.train.Saver()
saver.restore(sess=sess, save_path=model_path)
print(sess.run(index, feed_dict={x:nm}))
下面三个函数是将自己手写的图片转化成MNIST手写数字数据集矩阵形式(下面的函数不能直接将手写然后用手机拍出的图片进行转化,手机拍出来的背景不是纯白色,只能使用手机画画工具上写出来的样本)
import os
from PIL import Image, ImageFilter
import numpy as np
def imageprepare(argv):
"""
This function returns the pixel values.
The imput is a png file location.
"""
im = Image.open(argv).convert('L')
width = float(im.size[0])
height = float(im.size[1])
newImage = Image.new('L', (28, 28), (255)) # creates white canvas of 28x28 pixels
if width > height: # check which dimension is bigger
# Width is bigger. Width becomes 20 pixels.
nheight = int(round((20.0 / width * height), 0)) # resize height according to ratio width
if (nheight == 0): # rare case but minimum is 1 pixel
nheigth = 1
# resize and sharpen
img = im.resize((20, nheight), Image.ANTIALIAS).filter(ImageFilter.SHARPEN)
wtop = int(round(((28 - nheight) / 2), 0)) # caculate horizontal pozition
newImage.paste(img, (4, wtop)) # paste resized image on white canvas
else:
# Height is bigger. Heigth becomes 20 pixels.
nwidth = int(round((20.0 / height * width), 0)) # resize width according to ratio height
if (nwidth == 0): # rare case but minimum is 1 pixel
nwidth = 1
# resize and sharpen
img = im.resize((nwidth, 20), Image.ANTIALIAS).filter(ImageFilter.SHARPEN)
wleft = int(round(((28 - nwidth) / 2), 0)) # caculate vertical pozition
newImage.paste(img, (wleft, 4)) # paste resized image on white canvas
# newImage.save('convert_2_4.png')
tv = list(newImage.getdata()) # get pixel values
# normalize pixels to 0 and 1. 0 is pure white, 1 is pure black.
tva = [(255 - x) * 1.0 / 255.0 for x in tv]
print(tva)
# return np.asanyarray()
# print(np.asanyarray(tva).reshape(28, 28))
return np.asanyarray(tva).reshape(1, 784)
# return np.array(tva)
# 3
def getTestPicArray(filename):
im = Image.open(filename)
x_s = 28
y_s = 28
out = im.resize((x_s, y_s), Image.ANTIALIAS)
im_arr = np.array(out.convert('L'))
num0 = 0
num255 = 0
threshold = 100
for x in range(x_s):
for y in range(y_s):
if im_arr[x][y] > threshold:
num255 = num255 + 1
else:
num0 = num0 + 1
if (num255 > num0):
print("convert!")
for x in range(x_s):
for y in range(y_s):
im_arr[x][y] = 255 - im_arr[x][y]
if (im_arr[x][y] < threshold): im_arr[x][y] = 0
# if(im_arr[x][y] > threshold) : im_arr[x][y] = 0
# else : im_arr[x][y] = 255
# if(im_arr[x][y] < threshold): im_arr[x][y] = im_arr[x][y] - im_arr[x][y] / 2
# out = Image.fromarray(np.uint8(im_arr))
# out.show()
# out.save(filename.split('.')[0]+'18pix.'+filename.split('.')[1])
# print im_arr
nm = im_arr.reshape((1, 784))
nm = nm.astype(np.float32)
nm = np.multiply(nm, 1.0 / 255.0)
# print(nm.reshape(28, 28))
return nm
神经网络最简单的结构
- 定义x,y,w,b
- 计算出矩阵y
- 计算出loss
- 使用优化器最小化loss
- 进行差别分析
- 收敛
- 遍历
常用且较复杂的api
- 多分类
tf.nn.softmax()
将神经网络输出变成概率值 - 求矩阵中最大值
tf.argmax()
(适用求ont_hot
的编码的输出,或者是求多分类的输出最大值) tf.reduce_mean()
求均值tf.cast()
相应的数据类型转化,如将float转化为整数,- 二分类
tf.nn.sigmoid()
将神经网络输出变成概率值
api tf.losses.sparse_softmax_cross_entropy()功能如下
loss = tf.losses.sparse_softmax_cross_entropy(labels=y, logits=y_)
# y_ -> sofmax
# y -> one_hot
# loss = ylogy_
x = tf.placeholder(tf.float32, [None, 784]) None的表示输入的样本数不确定
难点:数据的矩阵转化,结构中对矩阵的一致性和数据类型的一致性
神经网络反向传播
卷积神经网络
size的关系
计算
步长
填充使得输出的size不变
多核使得提取多特征
为什么要使用非线性函数来做神经网络的激活函数呢?
高级的层次和低级的层次是使用全连接进行连接的,相当于每个层次之间进行矩阵的操作,如果不使用非线性函数作为激活函数的话,使用线性函数作为激活函数,多层次的神经网络就相当于单个神经网络
总结:
池化,最大值池化
平均值池化
优缺点:
tensorflow代码实现
x_image = tf.reshape(x, [-1, 3, 32, 32]) # 把图片变成三通道
# 32*32
x_image = tf.transpose(x_image, perm=[0, 2, 3, 1]) # 角度转化
# conv1: 神经元图, feature_map, 输出图像
# 卷积层
conv1 = tf.layers.conv2d(x_image,
32, # output channel number
(3,3), # kernel size
padding = 'same',
activation = tf.nn.relu,
name = 'conv1')
# 16 * 16
# 池化层
pooling1 = tf.layers.max_pooling2d(conv1,
(2, 2), # kernel size
(2, 2), # stride
name = 'pool1')
conv2 = tf.layers.conv2d(pooling1,
32, # output channel number
(3,3), # kernel size
padding = 'same',
activation = tf.nn.relu,
name = 'conv2')
# 8 * 8
pooling2 = tf.layers.max_pooling2d(conv2,
(2, 2), # kernel size
(2, 2), # stride
name = 'pool2')
conv3 = tf.layers.conv2d(pooling2,
32, # output channel number
(3,3), # kernel size
padding = 'same',
activation = tf.nn.relu,
name = 'conv3')
# 4 * 4 * 32
pooling3 = tf.layers.max_pooling2d(conv3,
(2, 2), # kernel size
(2, 2), # stride
name = 'pool3')
# 转成2维矩阵 [None, 4 * 4 * 32]
flatten = tf.layers.flatten(pooling3)
y_ = tf.layers.dense(flatten, 10) # 全连接层
最后设置步长为小数,可以试图片变成原来的形状.
w = tf.get_variable('w', [x.get_shape()[-1], 1],
initializer=tf.random_normal_initializer(0, 1))
b = tf.get_variable('b', [1],
initializer=tf.constant_initializer(0.0))
y_ = tf.matmul(x, w) +b
上面这段代码同等于
y_ = tf.layers.dense(x, 10)