一、线性问题和非线性问题
1、线性问题
某医院想用神经网络对已经有的病例进行分类,数据样本特征x包括病人的年龄x1和肿瘤的大小x2,(x[x1,x2]),对应的标签为良性或恶性(0、1)
二分类:
(1)生成数据集
- import tensorflow as tf
- import matplotlib.pyplot as plt
- import numpy as np
- from sklearn.utils import shuffle
- #模拟数据点
- def generate(sample_size, mean, cov, diff,regression):
- num_classes = 2 #len(diff)
- samples_per_class = int(sample_size/2)
- X0 = np.random.multivariate_normal(mean, cov, samples_per_class)
- Y0 = np.zeros(samples_per_class)
- for ci, d in enumerate(diff):
- # ci=0 d=3
- X1 = np.random.multivariate_normal(mean+d, cov, samples_per_class)
- Y1 = (ci+1)*np.ones(samples_per_class)
- X0 = np.concatenate((X0,X1))
- Y0 = np.concatenate((Y0,Y1))
- if regression==False: #one-hot 0 into the vector "1 0
- class_ind = [Y==class_number for class_number in range(num_classes)]
- Y = np.asarray(np.hstack(class_ind), dtype=np.float32)
- X, Y = shuffle(X0, Y0)
- return X,Y
- input_dim = 2
- np.random.seed(10)
- num_classes =2
- mean = np.random.randn(num_classes)
- cov = np.eye(num_classes)
- X, Y = generate(1000, mean, cov, [3.0],True)
- colors = ['r' if l == 0 else 'b' for l in Y[:]]
- plt.scatter(X[:,0], X[:,1], c=colors)
- plt.xlabel("Scaled age (in yrs)")
- plt.ylabel("Tumor size (in cm)")
- plt.show()
(2)构建网络模型
- lab_dim = 1
- # tf Graph Input
- input_features = tf.placeholder(tf.float32, [None, input_dim])
- input_lables = tf.placeholder(tf.float32, [None, lab_dim])
- # Set model weights
- W = tf.Variable(tf.random_normal([input_dim,lab_dim]), name="weight")
- b = tf.Variable(tf.zeros([lab_dim]), name="bias")
- output =tf.nn.sigmoid( tf.matmul(input_features, W) + b)
- cross_entropy = -(input_lables * tf.log(output) + (1 - input_lables) * tf.log(1 - output))
- ser= tf.square(input_lables - output)
- loss = tf.reduce_mean(cross_entropy)
- err = tf.reduce_mean(ser)
- optimizer = tf.train.AdamOptimizer(0.04) #尽量用这个--收敛快,会动态调节梯度
- train = optimizer.minimize(loss) # let the optimizer train
(3)训练
- maxEpochs = 50
- minibatchSize = 25
- # 启动session
- with tf.Session() as sess:
- sess.run(tf.global_variables_initializer())
- for epoch in range(maxEpochs):
- sumerr=0
- for i in range(np.int32(len(Y)/minibatchSize)):
- x1 = X[i*minibatchSize:(i+1)*minibatchSize,:]
- y1 = np.reshape(Y[i*minibatchSize:(i+1)*minibatchSize],[-1,1])
- tf.reshape(y1,[-1,1])
- _,lossval, outputval,errval = sess.run([train,loss,output,err], feed_dict={input_features: x1, input_lables:y1})
- sumerr =sumerr+errval
- print ("Epoch:", '%04d' % (epoch+1), "cost=","{:.9f}".format(lossval),"err=",sumerr/minibatchSize)
(4)可视化
- train_X, train_Y = generate(100, mean, cov, [3.0],True)
- colors = ['r' if l == 0 else 'b' for l in train_Y[:]]
- plt.scatter(train_X[:,0], train_X[:,1], c=colors)
- #plt.scatter(train_X[:, 0], train_X[:, 1], c=train_Y)
- #plt.colorbar()
- # x1w1+x2*w2+b=0
- # x2=-x1* w1/w2-b/w2
- # a*x+b*y+c = 0
- x = np.linspace(-1,8,200)
- y=-x*(sess.run(W)[0]/sess.run(W)[1])-sess.run(b)/sess.run(W)[1]
- plt.plot(x,y, label='Fitted line')
- plt.legend()
- plt.show()
多分类:
- import tensorflow as tf
- import numpy as np
- import matplotlib.pyplot as plt
- from sklearn.utils import shuffle
- from matplotlib.colors import colorConverter, ListedColormap
- # 对于上面的fit可以这么扩展变成动态的
- from sklearn.preprocessing import OneHotEncoder
- def onehot(y,start,end):
- ohe = OneHotEncoder()
- a = np.linspace(start,end-1,end-start)
- b =np.reshape(a,[-1,1]).astype(np.int32)
- ohe.fit(b)
- c=ohe.transform(y).toarray()
- return c
- #
- def generate(sample_size, num_classes, diff,regression=False):
- np.random.seed(10)
- mean = np.random.randn(2)
- cov = np.eye(2)
- #len(diff)
- samples_per_class = int(sample_size/num_classes)
- X0 = np.random.multivariate_normal(mean, cov, samples_per_class)
- Y0 = np.zeros(samples_per_class)
- for ci, d in enumerate(diff):
- X1 = np.random.multivariate_normal(mean+d, cov, samples_per_class)
- Y1 = (ci+1)*np.ones(samples_per_class)
- X0 = np.concatenate((X0,X1))
- Y0 = np.concatenate((Y0,Y1))
- #print(X0, Y0)
- if regression==False: #one-hot 0 into the vector "1 0
- Y0 = np.reshape(Y0,[-1,1])
- #print(Y0.astype(np.int32))
- Y0 = onehot(Y0.astype(np.int32),0,num_classes)
- #print(Y0)
- X, Y = shuffle(X0, Y0)
- #print(X, Y)
- return X,Y
- # Ensure we always get the same amount of randomness
- np.random.seed(10)
- input_dim = 2
- num_classes =3
- X, Y = generate(2000,num_classes, [[3.0],[3.0,0]],False)
- aa = [np.argmax(l) for l in Y]
- colors =['r' if l == 0 else 'b' if l==1 else 'y' for l in aa[:]]
- plt.scatter(X[:,0], X[:,1], c=colors)
- plt.xlabel("Scaled age (in yrs)")
- plt.ylabel("Tumor size (in cm)")
- plt.show()
- lab_dim = num_classes
- # tf Graph Input
- input_features = tf.placeholder(tf.float32, [None, input_dim])
- input_lables = tf.placeholder(tf.float32, [None, lab_dim])
- # Set model weights
- W = tf.Variable(tf.random_normal([input_dim,lab_dim]), name="weight")
- b = tf.Variable(tf.zeros([lab_dim]), name="bias")
- output = tf.matmul(input_features, W) + b
- z = tf.nn.softmax( output )
- a1 = tf.argmax(tf.nn.softmax( output ), axis=1)#按行找出最大索引,生成数组
- b1 = tf.argmax(input_lables, axis=1)
- err = tf.count_nonzero(a1-b1) #两个数组相减,不为0的就是错误个数
- cross_entropy = tf.nn.softmax_cross_entropy_with_logits( labels=input_lables,logits=output)
- loss = tf.reduce_mean(cross_entropy)#对交叉熵取均值很有必要
- optimizer = tf.train.AdamOptimizer(0.04) #尽量用这个--收敛快,会动态调节梯度
- train = optimizer.minimize(loss) # let the optimizer train
- maxEpochs = 50
- minibatchSize = 25
- # 启动session
- with tf.Session() as sess:
- sess.run(tf.global_variables_initializer())
- for epoch in range(maxEpochs):
- sumerr=0
- for i in range(np.int32(len(Y)/minibatchSize)):
- x1 = X[i*minibatchSize:(i+1)*minibatchSize,:]
- y1 = Y[i*minibatchSize:(i+1)*minibatchSize,:]
- _,lossval, outputval,errval = sess.run([train,loss,output,err], feed_dict={input_features: x1, input_lables:y1})
- sumerr =sumerr+(errval/minibatchSize)
- print ("Epoch:", '%04d' % (epoch+1), "cost=","{:.9f}".format(lossval),"err=",sumerr/minibatchSize)
- train_X, train_Y = generate(200,num_classes, [[3.0],[3.0,0]],False)
- aa = [np.argmax(l) for l in train_Y]
- colors =['r' if l == 0 else 'b' if l==1 else 'y' for l in aa[:]]
- plt.scatter(train_X[:,0], train_X[:,1], c=colors)
- x = np.linspace(-1,8,200)
- y=-x*(sess.run(W)[0][0]/sess.run(W)[1][0])-sess.run(b)[0]/sess.run(W)[1][0]
- plt.plot(x,y, label='first line',lw=3)
- y=-x*(sess.run(W)[0][1]/sess.run(W)[1][1])-sess.run(b)[1]/sess.run(W)[1][1]
- plt.plot(x,y, label='second line',lw=2)
- y=-x*(sess.run(W)[0][2]/sess.run(W)[1][2])-sess.run(b)[2]/sess.run(W)[1][2]
- plt.plot(x,y, label='third line',lw=1)
- plt.legend()
- plt.show()
- print(sess.run(W),sess.run(b))
- train_X, train_Y = generate(200,num_classes, [[3.0],[3.0,0]],False)
- aa = [np.argmax(l) for l in train_Y]
- colors =['r' if l == 0 else 'b' if l==1 else 'y' for l in aa[:]]
- plt.scatter(train_X[:,0], train_X[:,1], c=colors)
- nb_of_xs = 200
- xs1 = np.linspace(-1, 8, num=nb_of_xs)
- xs2 = np.linspace(-1, 8, num=nb_of_xs)
- xx, yy = np.meshgrid(xs1, xs2) # create the grid
- # Initialize and fill the classification plane
- classification_plane = np.zeros((nb_of_xs, nb_of_xs))
- for i in range(nb_of_xs):
- for j in range(nb_of_xs):
- #classification_plane[i,j] = nn_predict(xx[i,j], yy[i,j])
- classification_plane[i,j] = sess.run(a1, feed_dict={input_features: [[ xx[i,j], yy[i,j] ]]} )
- # Create a color map to show the classification colors of each grid point
- cmap = ListedColormap([
- colorConverter.to_rgba('r', alpha=0.30),
- colorConverter.to_rgba('b', alpha=0.30),
- colorConverter.to_rgba('y', alpha=0.30)])
- # Plot the classification plane with decision boundary and input samples
- plt.contourf(xx, yy, classification_plane, cmap=cmap)
- plt.show()
2、非线性问题:利用隐藏层的神经网络拟合操作
- import tensorflow as tf
- import numpy as np
- # 网络结构:2维输入 --> 2维隐藏层 --> 1维输出
- learning_rate = 1e-4
- n_input = 2
- n_label = 1
- n_hidden = 2
- x = tf.placeholder(tf.float32, [None,n_input])
- y = tf.placeholder(tf.float32, [None, n_label])
- weights = {
- 'h1': tf.Variable(tf.truncated_normal([n_input, n_hidden], stddev=0.1)),
- 'h2': tf.Variable(tf.random_normal([n_hidden, n_label], stddev=0.1))
- }
- biases = {
- 'h1': tf.Variable(tf.zeros([n_hidden])),
- 'h2': tf.Variable(tf.zeros([n_label]))
- }
- layer_1 = tf.nn.relu(tf.add(tf.matmul(x, weights['h1']), biases['h1']))
- #y_pred = tf.nn.tanh(tf.add(tf.matmul(layer_1, weights['h2']),biases['h2']))
- #y_pred = tf.nn.relu(tf.add(tf.matmul(layer_1, weights['h2']),biases['h2']))#局部最优解
- #y_pred = tf.nn.sigmoid(tf.add(tf.matmul(layer_1, weights['h2']),biases['h2']))
- #Leaky relus 40000次 ok
- layer2 =tf.add(tf.matmul(layer_1, weights['h2']),biases['h2'])
- y_pred = tf.maximum(layer2,0.01*layer2)
- loss=tf.reduce_mean((y_pred-y)**2)
- train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)
- #生成数据
- X=[[0,0],[0,1],[1,0],[1,1]]
- Y=[[0],[1],[1],[0]]
- X=np.array(X).astype('float32')
- Y=np.array(Y).astype('int16')
- #加载
- sess = tf.InteractiveSession()
- sess.run(tf.global_variables_initializer())
- #训练
- for i in range(10000):
- sess.run(train_step,feed_dict={x:X,y:Y} )
- #计算预测值
- print(sess.run(y_pred,feed_dict={x:X}))
- #输出:已训练100000次
- #查看隐藏层的输出
- print(sess.run(layer_1,feed_dict={x:X}))
二、网络模型训练过程中可能存在的问题
1、欠拟合
拟合效果没有完全拟合到想要得到的真实数据情况
解决办法:增加节点或增加神经层
2、过拟合
拟合程度过优
解决办法:early stopping 数据集扩增、正则化、dropout