直接看代码
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
date = pd.read_csv('datingTestSet.txt', sep='\t')
X = date.iloc[:,:-1]
y = date.iloc[:,-1]
trainx, testx, trainy, testy = train_test_split(X, y, test_size=0.25, random_state=1)
log = LogisticRegression()
rnd = RandomForestClassifier()
svm = SVC()
voting = VotingClassifier(estimators=[('lr',log), ('rf',rnd), ('svm',svm)],voting='hard')
for clf in (log, rnd, svm, voting):
clf.fit(trainx, trainy)
y_ = clf.predict(testx)
print(clf.__class__.__name__, accuracy_score(y_, testy))
运行结果:
LogisticRegression 0.868
RandomForestClassifier 0.972
SVC 0.808
VotingClassifier 0.952