import numpy as np
import matplotlib. pyplot as plt
from sklearn. neighbors import KNeighborsClassifier
加载样本数据
matplotlib读取图片 将每一张图片转化为一组数组
bmp1 = plt. imread( 'digits/0/0_1.bmp' )
display( bmp1. shape)
plt. imshow( bmp1)
(28, 28)
<matplotlib.image.AxesImage at 0x5d80c50>
bmp1 = plt. imread( 'digits/0/0_1.bmp' )
display( bmp1. shape)
plt. imshow( bmp1, cmap= 'gray' )
(28, 28)
<matplotlib.image.AxesImage at 0x5dfdeb0>
尝试将两张图片转化为2行 28*28列
bmp2 = plt. imread( 'digits/0/0_2.bmp' )
digits = [ ]
digits. append( bmp1. ravel( ) )
digits. append( bmp2. ravel( ) )
digits = np. array( digits)
digits. shape
(2, 784)
尝试将数组中某一张图片进行显示
plt. imshow( digits[ 0 ] . reshape( 28 , 28 ) , cmap= 'gray' )
<matplotlib.image.AxesImage at 0x4ca1e90>
批量化读取所有的图片
data = [ ]
target = [ ]
for label in range ( 10 ) :
for index in range ( 1 , 501 ) :
bmp_filename = f'digits/ { label} / { label} _ { index} .bmp'
bmp = plt. imread( bmp_filename)
data. append( bmp. ravel( ) )
target. append( label)
data = np. array( data)
target = np. array( target)
display( data. shape, target. shape)
(5000, 784)
(5000,)
创建分类模型
KNN 分类 邻近数量:5,7,9,11 weights: uniform,distance # 权重
knn = KNeighborsClassifier( n_neighbors= 7 )
"""
KNeighborsClassifier(
n_neighbors=5,
weights='uniform',
algorithm='auto',
leaf_size=30,
p=2,
metric='minkowski',
metric_params=None,
n_jobs=None,
**kwargs,
)
"""
from sklearn. model_selection import train_test_split as split
X_train, X_test, y_train, y_test = split( data, target, test_size= 0.2 )
拆分数据的训练
knn. fit( X_train, y_train)
KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
metric_params=None, n_jobs=None, n_neighbors=7, p=2,
weights='uniform')
knn. score( X_test, y_test)
0.93
knn. set_params( n_neighbors= 5 )
knn. fit( X_train, y_train)
knn. score( X_test, y_test)
0.929
knn. set_params( n_neighbors= 9 )
knn. fit( X_train, y_train)
knn. score( X_test, y_test)
0.922
knn. set_params( n_neighbors= 9 , weights= 'distance' )
knn. fit( X_train, y_train)
knn. score( X_test, y_test)
0.926
knn. set_params( n_neighbors= 11 )
knn. fit( X_train, y_train)
knn. score( X_test, y_test)
0.917
重新训练全部样式
knn. fit( data, target)
KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
metric_params=None, n_jobs=None, n_neighbors=11, p=2,
weights='distance')
import os
加载测试样本
test1 = plt. imread( 'digits/test/4.bmp' )
test1. shape
(28, 28, 3)
plt. imshow( test1. mean( axis= - 1 ) )
<matplotlib.image.AxesImage at 0xada3b30>
plt. imshow( test1. mean( axis= - 1 ) , cmap= 'gray' )
<matplotlib.image.AxesImage at 0xaddf710>
test_data = [ ]
test_target = [ ]
for filename in os. listdir( 'digits/test' ) :
print ( filename)
bmp_file = f'digits/test/ { filename} '
bmp = plt. imread( bmp_file)
test_data. append( bmp. mean( axis= - 1 ) . ravel( ) )
label, ext_name = os. path. splitext( filename)
test_target. append( label)
test_data = np. array( test_data)
test_target = np. array( test_target)
display( test_data. shape, test_target. shape)
4.bmp
5.bmp
6.bmp
7.bmp
8.bmp
9.bmp
(6, 784)
(6,)
test_target
array(['4', '5', '6', '7', '8', '9'], dtype='<U1')
test_data[ 0 ] . shape
(784,)
test_data
array([[255., 255., 255., ..., 255., 255., 255.],
[255., 255., 255., ..., 255., 255., 255.],
[255., 255., 255., ..., 255., 255., 255.],
[255., 255., 255., ..., 255., 255., 255.],
[255., 255., 255., ..., 255., 255., 255.],
[255., 255., 255., ..., 255., 255., 255.]])
预测测试数据集的结果
y_ = knn. predict( test_data)
y_
array([4, 4, 5, 1, 0, 5])
可视化的方式显示预测结果
plt. figure( figsize= ( 10 , 12 ) )
for i, test_bmp in enumerate ( test_data) :
plt. subplot( 2 , 3 , i+ 1 )
plt. imshow( test_bmp. reshape( 28 , 28 ) , cmap= 'gray' )
plt. title( f'True: { test_target[ i] } Pred: { y_[ i] } ' , size= 20 )
plt. axis( 'off' )
plt. show( )
读取手写的数字
filename: digits/test/4_2.bmp
test_4_2 = plt. imread( 'digits/test/4_2.bmp' )
test_4_2. shape
(28, 28, 4)
y_2 = knn. predict( test_4_2. mean( axis= - 1 ) . reshape( 1 , - 1 ) )
display( y_2)
array([1])
from pandas import Series
D:\yingyong\Anaconda3\lib\importlib\_bootstrap.py:219: RuntimeWarning: numpy.ufunc size changed, may indicate binary incompatibility. Expected 112 from C header, got 124 from PyObject
return f(*args, **kwds)
Series( test_4_2. ravel( ) ) . unique( )
array([255, 0], dtype=uint64)
Series( bmp1. ravel( ) ) . unique( )
array([255, 204, 96, 2, 205, 207, 17, 3, 18, 201, 28, 16, 22,
198, 249, 245, 195, 31, 53, 171, 133, 92, 159, 66, 88, 65,
141, 27, 208, 176, 0, 87, 76, 243, 180, 134, 234, 12, 217,
90, 47, 248, 77, 15, 184, 236, 227, 60, 192, 57, 59, 179,
9, 143, 107, 170, 25, 230, 120, 69, 32, 124, 30, 110, 82,
169, 93, 6, 109, 226, 199, 26, 40, 125, 56, 127, 114, 218],
dtype=uint64)
随机从训练集中抽取5000个数字图片
index = np. arange( 5000 )
index
array([ 0, 1, 2, ..., 4997, 4998, 4999])
np. random. permutation( index)
array([4660, 2679, 3913, ..., 525, 3990, 146])
test_index = np. random. randint( 5000 , size= 50 )
data[ test_index] . shape
(50, 784)
y_ = knn. predict( data[ test_index] )
target[ test_index]
array([3, 3, 7, 8, 4, 7, 4, 3, 7, 3, 6, 5, 9, 7, 1, 8, 8, 2, 1, 5, 6, 2,
5, 7, 9, 3, 0, 3, 9, 2, 6, 0, 0, 6, 0, 8, 6, 7, 6, 2, 1, 0, 4, 9,
9, 8, 5, 0, 4, 5])
y_
array([3, 3, 7, 8, 4, 7, 4, 3, 7, 3, 6, 5, 9, 7, 1, 8, 8, 2, 1, 5, 6, 2,
5, 7, 9, 3, 0, 3, 9, 2, 6, 0, 0, 6, 0, 8, 6, 7, 6, 2, 1, 0, 4, 9,
9, 8, 5, 0, 4, 5])
test_label = target[ test_index]
test_label[ test_label == y_] . size / test_label. size
1.0
test_index = np. random. randint( 5000 , size= 50 )
y_ = knn. predict( data[ test_index] )
test_label = target[ test_index]
test_label[ test_label == y_] . size / test_label. size
1.0
test_index = np. random. randint( 1000 , size= 50 )
y_ = knn. predict( data[ test_index] )
test_label = target[ test_index]
test_label[ test_label == y_] . size / test_label. size
1.0
plt. figure( figsize= ( 30 , 20 ) )
i = 1
for index in test_index:
plt. subplot( 5 , 10 , i)
plt. imshow( X_test[ index] . reshape( 28 , 28 ) , cmap= 'gray' )
plt. axis( 'off' )
plt. title( f'T: { y_test[ index] } P: { y_[ i- 1 ] } ' , size= 30 )
i += 1
plt. show( )
通过mean()方式实现降维效果
a = np. array(
[
[
[ 3 , 4 , 5 ] ,
[ 5 , 6 , 7 ] ,
[ 3 , 6 , 9 ]
] ,
[
[ 1 , 2 , 5 ] ,
[ 3 , 5 , 7 ] ,
[ 6 , 8 , 9 ]
]
]
)
a. shape
(2, 3, 3)
a. mean( axis= 0 )
array([[2. , 3. , 5. ],
[4. , 5.5, 7. ],
[4.5, 7. , 9. ]])
a. mean( axis= 1 )
array([[3.66666667, 5.33333333, 7. ],
[3.33333333, 5. , 7. ]])
a. mean( axis= - 1 )
array([[4. , 6. , 6. ],
[2.66666667, 5. , 7.66666667]])