from mnist_tools import load_mnist, plot_images
from sklearn.metrics import accuracy_score
train_x,train_y,test_x,test_y = load_mnist()
train_x = train_x.reshape(-1,28*28).astype("float")/255
test_x = test_x.reshape(-1,28*28).astype("float")/255
逻辑回归
from sklearn.linear_model import LogisticRegression # 逻辑回归
lr = LogisticRegression(multi_class='multinomial',solver='lbfgs')
lr_y = lr.fit(train_x,train_y).predict(test_x)
accuracy_score(test_y,lr_y) # 预测准确率
把训练结果保存起来,供以后做预测
import pickle
with open("lr.pkl","wb") as f:
pickle.dump(lr,f) # 写入文件当中
with open('lr.pkl',"rb") as f:
lr_pkl = pickle.load(f) # 读取出来
lr_pkl_y = lr_pkl.predict(test_x)
accuracy_score(test_y,lr_pkl_y)
k近邻算法
from sklearn.neighbors import KNeighborsClassifier # k近邻算法
knn = KNeighborsClassifier(n_neighbors=1)
knn_y = knn.fit(train_x[::10],train_y[::10]).predict(test_x)
accuracy_score(test_y,knn_y)
决策树算法
from sklearn.tree import DecisionTreeClassifier # 决策树算法
dt = DecisionTreeClassifier()
dt_y = dt.fit(train_x[::5],train_y[::5]).predict(test_x)
accuracy_score(test_y,dt_y)
支持向量机算法
from sklearn.svm import SVC # 支持向量机算法
svm = SVC()
svm_y = svm.fit(train_x[::100],train_y[::100]).predict(test_x)
accuracy_score(test_y,svm_y)
随机森林算法
from sklearn.ensemble import RandomForestClassifier,AdaBoostClassifier
rf = RandomForestClassifier() # 随机森林算法
rf_y = rf.fit(train_x[::100],train_y[::100]).predict(test_x)
accuracy_score(test_y,rf_y)
AdaBoostClassifier
ada = AdaBoostClassifier()
ada_y = ada.fit(train_x,train_y).predict(test_x)
accuracy_score(test_y,ada_y)
神经网络算法
from sklearn.neural_network import MLPClassifier # 神经网络算法
mlp = MLPClassifier()
mlp_y = mlp.fit(train_x,train_y).predict(test_x)
accuracy_score(test_y,mlp_y)
PCA
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA
pca = PCA(n_components=50)
pca_lr = Pipeline([('s1',pca),('s2',lr)])
pca_lr_y = pca_lr.fit(train_x,train_y).predict(test_x)
accuracy_score(test_y,pca_lr_y)
Pipeline
pca_mlp = Pipeline([('s1',pca),('s2',mlp)])
pca_mlp_y = pca_mlp.fit(train_x,train_y).predict(test_x)
accuracy_score(test_y,pca_mlp_y)