import matplotlib.pyplot as plt
from sklearn.datasets import fetch_openml
from sklearn.neural_network import MLPClassifier
from pandas import read_csv
print(__doc__)
# Load data from https://www.openml.org/d/554
#X, y = fetch_openml('mnist_784', version=1, return_X_y=True)
df = read_csv('./mnist_784.csv')# 读取数据,前n-1列是pixel,最后一列是label
X = df.iloc[: , :-1].values # dataframe在iloc获取到的是series对象,.values之后称为numpy 的数组。
y = df.iloc[:, -1].values
X = X / 255.
# rescale the data, use the traditional train/test split#分割数据集,前6千个train,后1千个test。
X_train, X_test = X[:60000], X[60000:]
y_train, y_test = y[:60000], y[60000:]
# mlp = MLPClassifier(hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
# solver='sgd', verbose=10, tol=1e-4, random_state=1)
# sgd 随机梯度下降, alpha是L2罚项, tol是精度, verbose值应该是True或False表示是否打印progress信息
mlp = MLPClassifier(hidden_layer_sizes=(50,), max_iter=10, alpha=1e-4,
solver='sgd', verbose=10, tol=1e-4, random_state=1,
learning_rate_init=.1)
# Fit the model to data matrix X and target(s) y.
mlp.fit(X_train, y_train)
print("Training set score: %f" % mlp.score(X_train, y_train))#score:Returns the mean accuracy on the given test data and labels.
print("Test set score: %f" % mlp.score(X_test, y_test))
fig, axes = plt.subplots(4, 4)# 子图网格的行/列数
# use global min / max to ensure all weights are shown on the same scale
vmin, vmax = mlp.coefs_[0].min(), mlp.coefs_[0].max()
for coef, ax in zip(mlp.coefs_[0].T, axes.ravel()):
ax.matshow(coef.reshape(28, 28), cmap=plt.cm.gray, vmin=.5 * vmin,
vmax=.5 * vmax)
ax.set_xticks(())
ax.set_yticks(())
plt.show()
其中,当读代码遇到coefs_ 这个参数时,不明白啥意思。
可以先Go To declaration, 然后看这declaration的包路径,并从API中找到对应的内容进行查看。
- 包路径
- API中对应的解释
sklearn英文API
sklearn中文API