题目
代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
|
import
pandas as pd
import
numpy as np
train_full
=
pd.read_csv(
'../zip.train'
,sep
=
' '
,engine
=
'c'
,header
=
None
).values[:,
0
:
-
1
]
test_full
=
pd.read_csv(
'../zip.test'
,sep
=
' '
,engine
=
'c'
,header
=
None
).values
train
=
np.vstack((train_full[train_full[:,
0
]
=
=
2
],train_full[train_full[:,
0
]
=
=
3
]))
test
=
np.vstack((test_full[test_full[:,
0
]
=
=
2
],test_full[test_full[:,
0
]
=
=
3
]))
train_x
=
train[:,
1
:]
train_y
=
train[:,
0
]
test_x
=
test[:,
1
:]
test_y
=
test[:,
0
]
dc
=
[]
from
sklearn.linear_model
import
LinearRegression
lrcf
=
LinearRegression()
lrcf.fit(train_x, train_y)
dc.append((
'linear regression'
,lrcf))
from
sklearn.neighbors
import
KNeighborsClassifier
for
i
in
[
1
,
3
,
5
,
7
,
15
]:
knn
=
KNeighborsClassifier(n_neighbors
=
i)
knn.fit(train_x,train_y)
dc.append((
'%d-nearest neighbor'
%
(i),knn))
def
acc(clf,x,y):
res
=
clf.predict(x)
if
type
(clf)
=
=
LinearRegression:
res[res>
2.5
]
=
3
res[res<
2.5
]
=
2
n
=
y.shape[
0
]
r
=
res[(res
=
=
y)].shape[
0
]
return
r
*
1.0
/
n
for
i
in
dc:
accr
=
acc(i[
1
],test_x,test_y)
print
'%s: %.5f'
%
(i[
0
],accr)
|
结果
linear regression: 0.95879
1-nearest neighbor: 0.97527
3-nearest neighbor: 0.96978
5-nearest neighbor: 0.96978
7-nearest neighbor: 0.96703
15-nearest neighbor: 0.96154