机器学习笔记之Neural Networks (Deep Learning) MLPs
Here only discussed some relatively simple methods, namely multiplayer perceptrons for classifictaion and regression
MLPs are feed-forward neural networks.
MLPs在python当中可以通过MLPClassifier和MLPRegressor来实现
import mglearn
from sklearn.neural_network import MLPClassifier
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.datasets import load_breast_cancer
x, y = make_moons(n_samples = 100,noise = 0.25,random_state = 3)
x_train,x_test,y_train,y_test = train_test_split(x,y, stratify = y, random_state = 42)
x, y = make_moons(n_samples = 100,noise = 0.25,random_state = 3)
x_train,x_test,y_train,y_test = train_test_split(x,y, stratify = y, random_state = 42)
mlp = MLPClassifier(solver = 'lbfgs',random_state = 0).fit(x_train,y_train)
mglearn.plots.plot_2d_separator(mlp,x_train,fill=True,eps = .5,alpha = .3)
mglearn.discrete_scatter(x_train[:,0],x_train[:,1],y_train)
plt.xlabel('Feature0')
plt.ylabel('Feature1')
plt.show()
'''By default the MLP uses 100 hideen nodes, which is too many for this little dataset'''
mlp = MLPClassifier(solver = 'lbfgs',random_state = 0,hidden_layer_sizes = [10]).fit(x_train,y_train)
mglearn.plots.plot_2d_separator(mlp,x_train,fill=True,eps = .5,alpha = .3)
mglearn.discrete_scatter(x_train[:,0],x_train[:,1],y_train)
plt.xlabel('Feature0')
plt.ylabel('Feature1')
plt.show()
'''By default the MLP uses 100 hideen nodes, which is too many for this little dataset'''
mlp = MLPClassifier(solver = 'lbfgs',random_state = 0,hidden_layer_sizes = [10]).fit(x_train,y_train)
mglearn.plots.plot_2d_separator(mlp,x_train,fill=True,eps = .5,alpha = .3)
mglearn.discrete_scatter(x_train[:,0],x_train[:,1],y_train)
plt.xlabel('Feature0')
plt.ylabel('Feature1')
plt.show()
fig,axes = plt.subplots(2,4,figsize = (20,8))
for axx, n_hidden_nodes in zip(axes,[10,100]):
for ax,alpha in zip(axx,[0.001,0.01,0.1,1]):
mlp = MLPClassifier(random_state = 0, solver = 'lbfgs',hidden_layer_sizes = [n_hidden_nodes,n_hidden_nodes])
mlp.fit(x_train,y_train)
mglearn.plots.plot_2d_separator(mlp,x_train,fill = True,eps = .5,alpha = .3,ax = ax)
mglearn.discrete_scatter(x_train[:,0],x_train[:,1],y_train,ax=ax)
ax.set_title('n_hidden = [%s,%s],alpha = %f'%(n_hidden_nodes,n_hidden_nodes,alpha))
plt.show()
cancer = load_breast_cancer()
x_train,x_test,y_train,y_test = train_test_split(cancer.data,cancer.target,random_state = 0)
mlp = MLPClassifier(random_state = 42)
mlp.fit(x_train,y_train)
print('Training score:%s, Testing score:%s'%((mlp.score(x_train,y_train)),(mlp.score(x_test,y_test))))
'''做一个preprocessing'''
mean_on_train = x_train.mean(axis= 0)
std_on_train = x_train.std(axis = 0)
x_train_scaled = (x_train-mean_on_train)/std_on_train
x_test_scaled = (x_test-mean_on_train)/std_on_train
mlp = MLPClassifier(random_state = 0)
mlp.fit(x_train_scaled,y_train)
print('Training score:%s, Testing score:%s'%((mlp.score(x_train_scaled,y_train)),(mlp.score(x_test_scaled,y_test))))
mlp = MLPClassifier(max_iter = 1000,random_state = 0)
mlp.fit(x_train_scaled,y_train)
print('Training score:%s, Testing score:%s'%((mlp.score(x_train_scaled,y_train)),(mlp.score(x_test_scaled,y_test))))
mlp = MLPClassifier(max_iter = 1000,random_state = 0,alpha = 1)
mlp.fit(x_train_scaled,y_train)
print('Training score:%s, Testing score:%s'%((mlp.score(x_train_scaled,y_train)),(mlp.score(x_test_scaled,y_test))))
'''看看系数之间的相关性热力图'''
plt.figure(figsize = (20,5))
plt.imshow(mlp.coefs_[0],interpolation = 'none',cmap = 'hsv')
plt.yticks(range(30),cancer.feature_names)
plt.xlabel('Colunms in weight matrix')
plt.ylabel('Inpur Feature')
plt.colorbar()
plt.show()
- Strengths: can build incredibly complex models, able to capture
information contained in large amounts of data - weaknesses: takes long time to train, work best with ‘homogeneous’
data, require careful pre-processing