Keras实现神经网络
import numpy as np
np.random.seed(2017) #为了复现
from keras.datasets import mnist
from keras.utils import np_utils
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.optimizers import RMSprop
Using TensorFlow backend.
/Users/yuyin/anaconda/lib/python2.7/site-packages/pandas/computation/__init__.py:19: UserWarning: The installed version of numexpr 2.4.4 is not supported in pandas and will be not be used
UserWarning)
数据格式说明
- x为28x28的矩阵(60000train+10000test)
- y是0-9的数字
(X_train, y_train), (X_test, y_test) = mnist.load_data()
#x标准化到0-1 y使用one-hot
X_train = X_train.reshape(X_train.shape[0], -1) / 255. # 把28x28 展开 -1是自动算列数 同时归一化 图像0-255
X_test = X_test.reshape(X_test.shape[0], -1) / 255. # 把28x28 展开 -1是自动算列数 同时归一化 图像0-255
y_train = np_utils.to_categorical(y_train, num_classes=10)
y_test = np_utils.to_categorical(y_test, num_classes=10)
#建立模型 这里只使用了2层隐藏层
model = Sequential([
Dense(32, input_dim=784), #32 是输出的维度,784 是输入的维度
Activation('relu'), #激励函数用到的是 relu 函数
Dense(10), #10个输出 输入不用定义 默认为上一层输出
Activation('softmax'), #最后激励函数是 softmax
])
#定义优化器
rmsprop = RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0)
#定义loss和评价函数 metrics评价可为cost,accuracy,score
model.compile(optimizer=rmsprop,
loss='categorical_crossentropy', #crossentropy交叉熵
metrics=['accuracy'])
#训练模型 epoch训练次数 batch_size 每批处理32个
model.fit(X_train, y_train, epochs=2, batch_size=32)
#返回测试的指标
loss, accuracy = model.evaluate(X_test, y_test)
print('test loss: ', loss)
print('test accuracy: ', accuracy)
Epoch 1/2
60000/60000 [==============================] - 4s - loss: 0.3545 - acc: 0.9015
Epoch 2/2
60000/60000 [==============================] - 4s - loss: 0.2012 - acc: 0.9425
9248/10000 [==========================>...] - ETA: 0s('test loss: ', 0.18858601193577051)
('test accuracy: ', 0.94530000000000003)
#预测
y_pre = model.predict(X_test)
#转换成数字-每列概率最大的位置
y_num=[np.argmax(x) for x in y_pre]
TensorFlow实现神经网络
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
# number 1 to 10 data
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
/Users/yuyin/anaconda/lib/python2.7/site-packages/pandas/computation/__init__.py:19: UserWarning: The installed version of numexpr 2.4.4 is not supported in pandas and will be not be used
UserWarning)
Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
数据格式说明
- x为nx784的矩阵(55000train+10000test)
- y是nx10的矩阵(one-hot)
X_train,y_train = mnist.train.images , mnist.train.labels
X_test,y_test = mnist.test.images , mnist.test.labels
#定义计算准确率
def compute_accuracy(x_val, y_val):
global prediction
y_pre = sess.run(prediction, feed_dict={xs: x_val})
#tf.argmax(input,axis) 0表示按列,1表示按行
#tf.equal(A, B)是对比这两个矩阵或者向量的相等的元素,如果是相等的那就返回True,反正返回False
correct_prediction = tf.equal(tf.argmax(y_pre,1), tf.argmax(y_val,1)) #每行最大值所在索引
#tf.cast类型转换 tf.reduce_mean 求平均值
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
result = sess.run(accuracy, feed_dict={xs: x_val, ys: y_val})
return result
##产生随机变量
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
#定义输入占位符
xs = tf.placeholder(tf.float32, [None, 784]) # 行x列 维度
ys = tf.placeholder(tf.float32, [None, 10])
#添加隐藏层
W_h1 = weight_variable([784,32])
b_h1 = bias_variable([32])
x_h1 = tf.nn.relu(tf.matmul(xs, W_h1) + b_h1)
W_h2 = weight_variable([32, 10])
b_h2 = bias_variable([10])
prediction = tf.nn.softmax(tf.matmul(x_h1, W_h2) + b_h2)
#定义loss函数 交叉熵
cross_entropy = tf.reduce_mean(-tf.reduce_sum(ys * tf.log(prediction),
reduction_indices=[1])) # loss
#定义训练优化
# train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
#定义会话
sess = tf.Session()
#初始化
init = tf.global_variables_initializer()
sess.run(init)
##训练 sess相当于model 里面有参数w,b
batch_size = 100
n_chunk = len(X_train) // batch_size
for i in range(n_chunk):
start_index = i * batch_size
end_index = start_index + batch_size
batch_xs,batch_ys = X_train[start_index:end_index] , y_train[start_index:end_index]
sess.run(train_step, feed_dict={xs: batch_xs, ys: batch_ys})
if i % 50 == 0:
print(compute_accuracy(X_test,y_test))
0.9226
0.9414
0.9438
0.9447
0.9464
0.9454
0.9504
0.9524
0.9405
0.9491
0.9538
axis维度
三维数组即是这些二维平面层叠(stacked)出来的结果。
(axis=0)表示全部平面上的对应位置,
(axis=1),每一个平面的每一列,
(axis=2),每一个平面的每一行。