MNIST数据集是机器学习领域中非常经典的一个数据集,由60000个训练样本和10000个测试样本组成,每个样本都是一张28 * 28像素的灰度手写数字图片。
import time
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import fetch_openml #从openml.org网站导入数据
from sklearn.utils import check_random_state
t0=time.time()
train_samples=5000
#载入数据集
X, y = fetch_openml('mnist_784', version=1, return_X_y=True)
print(X.shape,y.shape)
#可视化样本,图形化显示前6个数据
fig,ax=plt.subplots(nrows=2,ncols=3,sharex='all',sharey='all')
ax = ax.flatten()
for i in range(6):
img=X[i].reshape(28,28)
ax[i].matshow(img)
plt.show()