先上代码
# -*- coding: utf-8 -*-
from __future__ import division
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.cross_validation import train_test_split
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import SVC
df = pd.read_csv('iris.csv')
# 分类器对象
clf = OneVsRestClassifier(SVC(kernel='linear')) # 支持向量机
##clf = RandomForestClassifier(max_depth=5, n_estimators=10)
X = df.ix[:,1:5]
y = df.ix[:,5]
# 训练集和测试集分类器
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.3)
clf.fit(X_train,y_train)
# 用训练得到的分类器对测试集进行预测
y_pred = clf.predict(X_test)
# 用pandas建立一个表格
rf = pd.DataFrame(list(zip(y_pred, y_test)), columns=['predicted','actual'])
rf['correct'] = rf.apply(lambda r: 1 if r['predicted'] == r['actual'] else 0, axis=1)
print rf
print rf['correct'].sum()/rf['correct'].count()
结果如下