常用的分类器包括SVM、KNN、贝叶斯、线性回归、逻辑回归、决策树、随机森林、xgboost、GBDT、boosting、神经网络NN。
代码如下:
from
sklearn.metrics
import
precision_recall_fscore_support
def
timeDecor(func):
#一个用于统计函数运行时间的装饰器
def
innerDef(
*
args,
**
kwargs):
t1
=
time.time()
result
=
func(
*
args,
**
kwargs)
t2
=
time.time()
t
=
t2
-
t1
print
"{0}函数部分运行时间 :{1}s"
.
format
(
str
(func.__name__),t)
return
result
return
innerDef
@timeDecor
def
svm_classify(X_train, y_train, X_test, y_test):
from
sklearn
import
svm
param_grid
=
{
# 'C': [1e3, 5e3, 1e4, 5e4, 1e5],
'kernel'
: [
'rbf'
,
'linear'
,
'poly'
,
'sigmoid'
],
# 'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1],
}
t0
=
time()
clf
=
svm.SVC()
clf.fit(X_train, y_train)
# print(clf.best_params_)
print
(
"svm done in %0.3fs"
%
(time()
-
t0))
pre_y_train
=
clf.predict(X_train)
pre_y_test
=
clf.predict(X_test)
print
(
"SVM Metrics : {0}"
.
format
(precision_recall_fscore_support(y_test, pre_y_test)))
@timeDecor
def
rf_classify(X_train, y_train, X_test, y_test):
from
sklearn.ensemble
import
RandomForestClassifier
t0
=
time()
clf
=
RandomForestClassifier(random_state
=
0
, n_estimators
=
500
)
clf.fit(X_train, y_train)
print
(
"rf done in %0.3fs"
%
(time()
-
t0))
pre_y_train
=
clf.predict(X_train)
pre_y_test
=
clf.predict(X_test)
print
(
"rf Metrics : {0}"
.
format
(precision_recall_fscore_support(y_test, pre_y_test)))
@timeDecor
def
knn_classify(X_train, y_train, X_test, y_test):
from
sklearn.neighbors
import
KNeighborsClassifier
t0
=
time()
clf
=
KNeighborsClassifier(n_neighbors
=
5
)
clf.fit(X_train, y_train)
print
(
"knn done in %0.3fs"
%
(time()
-
t0))
pre_y_train
=
clf.predict(X_train)
pre_y_test
=
clf.predict(X_test)
print
(
"knn Metrics : {0}"
.
format
(precision_recall_fscore_support(y_test, pre_y_test)))
@timeDecor
def
bagging_knn_classify(X_train, y_train, X_test, y_test):
from
sklearn.neighbors
import
KNeighborsClassifier
from
sklearn.ensemble
import
BaggingClassifier
t0
=
time()
clf
=
BaggingClassifier(KNeighborsClassifier(),
max_samples
=
0.5
, max_features
=
0.5
)
clf.fit(X_train, y_train)
print
(
"bagging_knn done in %0.3fs"
%
(time()
-
t0))
pre_y_test
=
clf.predict(X_test)
print
(
"bagging_knn Metrics : {0}"
.
format
(precision_recall_fscore_support(y_test, pre_y_test)))
@timeDecor
def
lr_classify(X_train, y_train, X_test, y_test):
from
sklearn.linear_model
import
LogisticRegression
t0
=
time()
clf
=
LogisticRegression(C
=
1e5
)
clf.fit(X_train, y_train)
print
(
"lr done in %0.3fs"
%
(time()
-
t0))
pre_y_train
=
clf.predict(X_train)
pre_y_test
=
clf.predict(X_test)
print
(
"lr Metrics : {0}"
.
format
(precision_recall_fscore_support(y_test, pre_y_test)))
@timeDecor
def
nb_classify(X_train, y_train, X_test, y_test):
from
sklearn.naive_bayes
import
GaussianNB
t0
=
time()
clf
=
GaussianNB()
clf.fit(X_train, y_train)
print
(
"nb done in %0.3fs"
%
(time()
-
t0))
pre_y_train
=
clf.predict(X_train)
pre_y_test
=
clf.predict(X_test)
print
(
"nb Metrics : {0}"
.
format
(precision_recall_fscore_support(y_test, pre_y_test)))
@timeDecor
def
da_classify(X_train, y_train, X_test, y_test):
from
sklearn.discriminant_analysis
import
QuadraticDiscriminantAnalysis
t0
=
time()
clf
=
QuadraticDiscriminantAnalysis()
clf.fit(X_train, y_train)
print
(
"da done in %0.3fs"
%
(time()
-
t0))
pre_y_test
=
clf.predict(X_test)
print
(
"da Metrics : {0}"
.
format
(precision_recall_fscore_support(y_test, pre_y_test)))
@timeDecor
def
decisionTree_classify(X_train, y_train, X_test, y_test):
from
sklearn.tree
import
DecisionTreeClassifier
t0
=
time()
clf
=
DecisionTreeClassifier(max_depth
=
5
)
clf.fit(X_train, y_train)
print
(
"DT done in %0.3fs"
%
(time()
-
t0))
pre_y_test
=
clf.predict(X_test)
print
(
"DT Metrics : {0}"
.
format
(precision_recall_fscore_support(y_test, pre_y_test)))
@timeDecor
def
xgboost_classify(X_train, y_train, X_test, y_test):
import
xgboost
t0
=
time()
clf
=
xgboost.XGBClassifier()
clf.fit(X_train, y_train)
print
(
"xgboost done in %0.3fs"
%
(time()
-
t0))
pre_y_test
=
clf.predict(X_test)
print
(
"xgboost Metrics : {0}"
.
format
(precision_recall_fscore_support(y_test, pre_y_test)))
@timeDecor
def
GBDT_classify(X_train, y_train, X_test, y_test):
from
sklearn.ensemble
import
GradientBoostingClassifier
t0
=
time()
clf
=
GradientBoostingClassifier(n_estimators
=
200
)
clf.fit(X_train, y_train)
print
(
"GBDT done in %0.3fs"
%
(time()
-
t0))
pre_y_test
=
clf.predict(X_test)
print
(
"GBDT Metrics : {0}"
.
format
(precision_recall_fscore_support(y_test, pre_y_test)))
@timeDecor
def
voting_classify(X_train, y_train, X_test, y_test):
from
sklearn.ensemble
import
GradientBoostingClassifier, VotingClassifier, RandomForestClassifier
import
xgboost
from
sklearn.linear_model
import
LogisticRegression
from
sklearn.naive_bayes
import
GaussianNB
t0
=
time()
clf1
=
GradientBoostingClassifier(n_estimators
=
200
)
clf2
=
RandomForestClassifier(random_state
=
0
, n_estimators
=
500
)
# clf3 = LogisticRegression(random_state=1)
# clf4 = GaussianNB()
clf5
=
xgboost.XGBClassifier()
clf
=
VotingClassifier(estimators
=
[
# ('gbdt',clf1),
(
'rf'
,clf2),
# ('lr',clf3),
# ('nb',clf4),
# ('xgboost',clf5),
],
voting
=
'soft'
)
clf.fit(X_train, y_train)
print
(
"voting done in %0.3fs"
%
(time()
-
t0))
pre_y_test
=
clf.predict(X_test)
print
(
"voting Metrics : {0}"
.
format
(precision_recall_fscore_support(y_test, pre_y_test)))