在这个项目中,我使用下面给出的不同分类器来分类手语,如knn,svm,naive bayes等,并使用f1测量,准确度,精度和使用sklearn库调用来测量他们的分数,但它们都是相同的,即个别方法f1_measure =精度=精度=召回 . 请帮忙 .
import pandas as pd
from numpy._distributor_init import NUMPY_MKL
from sklearn import svm
from sklearn.naive_bayes import GaussianNB as nb
from sklearn.neighbors import KNeighborsClassifier as knn
from sklearn.linear_model import LogisticRegression as lr
import numpy as np
import sklearn.metrics as sm
train = pd.read_csv("train60.csv")
print(train.head())
y = np.array(train.pop('label'))
x = np.array(train)/255.
print("here1")
#train = dataset.iloc[:,1:].values
test = pd.read_csv("train40.csv")
label_test=np.array(test.pop('label'))
x_ = np.array(test)/255.
print("here2")
def calc_accuracy(method,label_test,pred):
print("accuracy score for ",method,sm.accuracy_score(label_test,pred))
print("precision_score for ",method,sm.precision_score(label_test,pred,average='micro'))
print("f1 score for ",method,sm.f1_score(label_test,pred,average='micro'))
print("recall score for ",method,sm.recall_score(label_test,pred,average='micro'))
def run_svm():
clf=svm.SVC(decision_function_shape='ovo')
print("svm started")
clf.fit(x,y)
#print clf.n_layers_
pred=clf.predict(x_)
#print(pred)
np.savetxt('submission_svm.csv', np.c_[range(1,len(test)+1),pred], delimiter=',', header = 'ImageId,Label', comments = '', fmt='%d')
calc_accuracy("SVM",label_test,pred)
def run_lr():
clf = lr()
print("lr started")
clf.fit(x,y)
#print clf.n_layers_
pred=clf.predict(x_)
#print(pred)
np.savetxt('submission_lr.csv', np.c_[range(1,len(test)+1),pred], delimiter=',', header = 'ImageId,Label', comments = '', fmt='%d')
calc_accuracy("Logistic regression",label_test,pred)
def run_nb():
clf = nb()
print("nb started")
clf.fit(x,y)
#print(clf.classes_)
#print clf.n_layers_
pred=clf.predict(x_)
#print(pred)
np.savetxt('submission_nb.csv', np.c_[range(1,len(test)+1),pred], delimiter=',', header = 'ImageId,Label', comments = '', fmt='%d')
calc_accuracy("Naive Bayes",label_test,pred)
def run_knn():
clf=knn(n_neighbors=3)
print("knn started")
clf.fit(x,y)
#print(clf.classes_)
#print clf.n_layers_
pred=clf.predict(x_)
#print(pred)
np.savetxt('submission_knn.csv', np.c_[range(1,len(test)+1),pred], delimiter=',', header = 'ImageId,Label', comments = '', fmt='%d')
calc_accuracy("K nearest neighbours",label_test,pred)
run_svm()
run_knn()
run_nb()
run_lr()
我在shell中的输出如下:
svm started
accuracy score for SVM 0.596358118361
precision_score for SVM 0.596358118361
f1 score for SVM 0.596358118361
recall score for SVM 0.596358118361
knn started
accuracy score for K nearest neighbours 0.656550328781
precision_score for K nearest neighbours 0.656550328781
f1 score for K nearest neighbours 0.656550328781
recall score for K nearest neighbours 0.656550328781
nb started
accuracy score for Naive Bayes 0.360647445625
precision_score for Naive Bayes 0.360647445625
f1 score for Naive Bayes 0.360647445625
recall score for Naive Bayes 0.360647445625
lr started
accuracy score for Logistic regression 0.601922104198
precision_score for Logistic regression 0.601922104198
f1 score for Logistic regression 0.601922104198
recall score for Logistic regression 0.601922104198
测试和训练中的第一列也分别是1977和2995图像的手语标记,每行都有图像标签,后面是单个图像的9126像素 .
>>> y
array([ 0, 0, 0, ..., 23, 23, 23], dtype=int64)
>>> len(y)
2995
>>> x
array([[ 1., 1., 1., ..., 1., 1., 1.],
[ 1., 1., 1., ..., 1., 1., 1.],
[ 1., 1., 1., ..., 1., 1., 1.],
...,
[ 1., 1., 1., ..., 1., 1., 1.],
[ 1., 1., 1., ..., 1., 1., 1.],
[ 1., 1., 1., ..., 1., 1., 1.]])
>>> len(x)
2995
>>> test.head()
pixel0 pixel1 pixel2 pixel3 pixel4 pixel5 pixel6 pixel7 pixel8 \
0 255 255 255 255 255 255 255 255 255
1 255 255 255 255 255 255 255 255 255
2 255 255 255 255 255 255 255 255 255
3 255 255 255 255 255 255 255 255 255
4 255 255 255 255 255 255 255 255 255
pixel9 ... pixel9206 pixel9207 pixel9208 pixel9209 pixel9210 \
0 255 ... 255 255 255 255 255
1 255 ... 255 255 255 255 255
2 255 ... 255 255 255 255 255
3 255 ... 255 255 255 255 255
4 255 ... 255 255 255 255 255
pixel9211 pixel9212 pixel9213 pixel9214 pixel9215
0 255 255 255 255 255
1 255 255 255 255 255
2 255 255 255 255 255
3 255 255 255 255 255
4 255 255 255 255 255
[5 rows x 9216 columns]