未使用dropout时:
import tensorflow as tf
#使用sklearn之前要装scikit-learn这个包
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
#load data
digits=load_digits()
X=digits.data #加载从0到9的digits date
y=digits.target #y变成binary,0-9共10个数字,如果y为1,则第二个数字为1
y=LabelBinarizer().fit_transform(y)
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=.3)
def add_layer(inputs,in_size,out_size,layer_name,activation_function=None):
Weights=tf.Variable(tf.random_normal([in_size,out_size]))
biases=tf.Variable(tf.zeros([1,out_size])+0.1)
Wx_plus_b=tf.matmul(inputs,Weights)+biases
if activation_function is None:
outputs=Wx_plus_b
else:
outputs=activation_function(Wx_plus_b)
tf.summary.histogram(layer_name+'outputs',outputs)#tensorboard必备
return outputs
#define placeholder for inputs to network
xs=tf.placeholder(tf.float32,[None,64])#8*8
ys=tf.placeholder(tf.float32,[None,10])
#add output layer
l1=add_layer(xs,64,100,'l1',activation_function=tf.nn.tanh)#hidden layer
prediction=add_layer(l1,100,10,'l2',activation_function=tf.nn.softmax)#output layer
#the loss between prediction and the real date
cross_entropy=tf.reduce_mean(-tf.reduce_sum(ys*tf.log(prediction),reduction_indices=[1]))
tf.summary.scalar('loss',cross_entropy)
train_step=tf.train.GradientDescentOptimizer(0.6).minimize(cross_entropy)
sess=tf.Session()
merged=tf.summary.merge_all()
train_writter=tf.summary.FileWriter('/pycharmfiles/tensorboard/logs/train',sess.graph)
test_writter=tf.summary.FileWriter('/pycharmfiles/tensorboard/logs/test',sess.graph)
sess.run(tf.global_variables_initializer())
for i in range(1000):
sess.run(train_step,feed_dict={xs:X_train,ys:y_train})
if i%50==0:
#record loss
train_result=sess.run(merged,feed_dict={xs:X_train,ys:y_train})
test_result=sess.run(merged,feed_dict={xs:X_test,ys:y_test})
train_writter.add_summary(train_result,i)
test_writter.add_summary(test_result,i)
tensorboard的图:
加入dropout:
1
keep_prob=tf.placeholder(tf.float32)#保证多少的结果不被drop掉
2
sess.run(train_step,feed_dict={xs:X_train,ys:y_train,keep_prob:0.5})#有0.5的概率被drop掉
3
train_result=sess.run(merged,feed_dict={xs:X_train,ys:y_train,keep_prob:1})#不drop任何东西
test_result=sess.run(merged,feed_dict={xs:X_test,ys:y_test,keep_prob:1})
4
Wx_plus_b=tf.nn.dropout(Wx_plus_b,keep_prob)#更新Wx_plus_b,为drop原来的0.5
5
由于隐藏层神经元数太多,可能会导致输出离散值,因此更改隐藏层的神经元数为50
l1=add_layer(xs,64,50,'l1',activation_function=tf.nn.tanh)#hidden layer
prediction=add_layer(l1,50,10,'l2',activation_function=tf.nn.softmax)#output layer
结果:
可以看到train和test基本重叠,没有Overfitting
注意:
- 隐藏层神经元较少的时候会出现欠拟合,直接不收敛
- 如果dropout后还是不理想可以降低学习效率