手写数字识别

from sklearn.decomposition import PCA
from sklearn.datasets import fetch_lfw_people
from sklearn.ensemble import RandomForestClassifier as RFC
from sklearn.model_selection import cross_val_score
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

人脸识别

# fig=plt.figure(figsize=(8,4))
fig,axes=plt.subplots(4,5,figsize=(8,4),subplot_kw={"xticks":[],"yticks":[]})
# fig 画布
# axes对象
for i,ax in enumerate(axes.flat):
    ax.imshow(faces.images[i,:,:],cmap="gray")

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-IRz7gCPH-1597737981535)(output_2_0.png)]

axes.shape
(4, 5)
axes.flat
# 二维转一维
<numpy.flatiter at 0x1d2351a8080>
for i,ax in enumerate(axes.flat):
    ax.imshow(faces.images[i,:,:],cmap="gray")
faces.images.shape
(1348, 62, 47)
faces.data.shape
(1348, 2914)
faces.target
array([1, 3, 3, ..., 7, 3, 5], dtype=int64)
faces.data.shape
(1348, 2914)
# 降维
pca=PCA(150).fit(faces.data)
v=pca.components_
v.shape
(150, 2914)
fig,axes=plt.subplots(3,8,figsize=(8,4),subplot_kw={"xticks":[],"yticks":[]})

for i,ax in enumerate(axes.flat):
    ax.imshow(v[i,:].reshape(62,47),cmap="gray")

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-BPIiNsH9-1597737981537)(output_12_0.png)]

数字识别

data=pd.read_csv("digit recognizor.csv")

data.shape
(42000, 785)
x=data.iloc[:,1:]
y=data.iloc[:,0]
x.shape
(42000, 784)
data.head()
labelpixel0pixel1pixel2pixel3pixel4pixel5pixel6pixel7pixel8...pixel774pixel775pixel776pixel777pixel778pixel779pixel780pixel781pixel782pixel783
01000000000...0000000000
10000000000...0000000000
21000000000...0000000000
34000000000...0000000000
40000000000...0000000000

5 rows × 785 columns

2 画累计方差贡献率,找最佳降维后维度的范围

pca_line=PCA().fit(x)
plt.figure(figsize=(20,5))
<Figure size 1440x360 with 0 Axes>




<Figure size 1440x360 with 0 Axes>
plt.plot(np.cumsum(pca_line.explained_variance_ratio_))
plt.xlabel("number of components after dimension reduction")
plt.ylabel("cumulative explained variance ratio")
plt.show()

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-Ne4yh5OH-1597737981538)(output_21_0.png)]

x
pixel0pixel1pixel2pixel3pixel4pixel5pixel6pixel7pixel8pixel9...pixel774pixel775pixel776pixel777pixel778pixel779pixel780pixel781pixel782pixel783

42000 rows × 784 columns

# 找出大致范围,继续缩小最佳维度范围
score=[]
for i in range(1,101,10):
    x_dr = PCA(i).fit_transform(x)
    once=cross_val_score(RFC(n_estimators=10,random_state=0)
                        ,x_dr,y,cv=10).mean()
    score.append(once)
plt.figure(figsize=(20,5))
plt.show()
<Figure size 1440x360 with 0 Axes>
# 细化学习曲线,找出降维后的最佳维度
score=[]
for i in range(10,25):
    x_dr = PCA(i).fit_transform(x)
    once=cross_val_score(RFC(n_estimators=10,random_state=0)
                        ,x_dr,y,cv=10).mean()
    score.append(once)
plt.figure(figsize=(20,5))
plt.plot(range(10,25),score)
plt.show()

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-oOlvstMx-1597737981540)(output_24_0.png)]

# 找出最佳维度进行降维,查看模型效果
x_dr = PCA(23).fit_transform(x)
cross_val_score(RFC(n_estimators=100,random_state=0),x_dr,y,cv=5).mean()
0.9461904761904762

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值