1.KNN
from sklearn import datasets
wine = datasets.load_wine()
X = wine.data
y = wine.target
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 666)
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)
knn.score(X_test, y_test)
from sklearn.model_selection import GridSearchCV
param = ({
'n_neighbors': [i for i in range(1, 10)],
'weights':['uniform','distance'],
'algorithm':['auto','ball_tree','kd_tree','brute']
})
gs_clf = GridSearchCV(knn, param_grid=param)
gs_clf.fit(X_train, y_train)
gs_clf.score(X_test, y_test)
2.线性回归模型
import numpy as np
from sklearn import datasets
wine = datasets.load_wine()
X = wine.data
y = wine.target
X
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 66)
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(X_train)
X_train_td = scaler.transform(X_train)
X_test_td = scaler.transform(X_test)
from sklearn.linear_model import LinearRegression
linear = LinearRegression()
linear.fit(X_train_td, y_train)
linear.score(X_test_td, y_test)
from sklearn.metrics import mean_absolute_error,r2_score,mean_squared_error
y_pre = linear.predict(X_test_td)
mean_absolute_error(y_test,y_pre)
mean_squared_error(y_test, y_pre)
r2_score(y_test, y_pre)
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import GridSearchCV
pipe = Pipeline([
('scaler', StandardScaler()),
('poly', PolynomialFeatures()),
('linear', LinearRegression())
])
param_grid = [{
'poly__degree': [i for i in range(7)]
}]
grid_clf = GridSearchCV(pipe, param_grid=param_grid)
grid_clf.fit(X_train, y_train)
grid_clf.best_estimator_
grid_clf.best_score_
from sklearn.linear_model import Ridge
piper = Pipeline([
('scaler', StandardScaler()),
('poly', PolynomialFeatures()),
('linear', Ridge())
])
param_grid = [{
'poly__degree': [i for i in range(7)]
}]
grid_clf = GridSearchCV(piper, param_grid=param_grid)
grid_clf.fit(X_train, y_train)
grid_clf.best_estimator_
grid_clf.best_score_
from sklearn.linear_model import Lasso
pipel = Pipeline([
('scaler', StandardScaler()),
('poly', PolynomialFeatures()),
('linear', Lasso())
])
param_grid = [{
'poly__degree': [i for i in range(7)]
}]
grid_clf = GridSearchCV(pipel, param_grid=param_grid)
grid_clf.fit(X_train, y_train)
grid_clf.best_score_
from sklearn.model_selection import cross_val_score
grid_clf = GridSearchCV(piper, param_grid=param_grid)
cross_val_score(LinearRegression(), X_train, y_train)
grid_clf = GridSearchCV(piper, param_grid=param_grid, cv = 8)
grid_clf.fit(X_train, y_train)
grid_clf.best_score_
3.逻辑回归模型
import numpy as np
from sklearn import datasets
wine = datasets.load_wine()
X = wine.data
y = wine.target
X.shape, y.shape
print(wine.DESCR)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 66)
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler,PolynomialFeatures
from sklearn.pipeline import Pipeline
logi = LogisticRegression()
logi.fit(X_train, y_train)
logi.score(X_test, y_test)
from sklearn.model_selection import GridSearchCV
pipe = Pipeline([
('Poly',PolynomialFeatures(degree=2)),
('scaler',StandardScaler()),
('logi',LogisticRegression())
])
param_grid = [{
'Poly__degree': [i for i in range(6)],
'logi__C':[i for i in np.arange(0.01,0.11,10)],
'logi__solver': ['lbfgs', 'liblinear']
}]
grid = GridSearchCV(pipe, param_grid=param_grid)
grid.fit(X_train, y_train)
grid.best_estimator_
grid.score(X_test,y_test)
from sklearn.multiclass import OneVsRestClassifier
ovr = OneVsRestClassifier(LogisticRegression())
ovr.fit(X_train, y_train)
ovr.score(X_test, y_test)
modle = LogisticRegression(multi_class='ovr')
modle.fit(X_train,y_train)
modle.score(X_test, y_test)
from sklearn.metrics import confusion_matrix,precision_score,recall_score,accuracy_score
y_pre = modle.predict(X_test)
confusion_matrix(y_test, y_pre)
precision_score(y_test,y_pre,average='weighted')
recall_score(y_test,y_pre,average='weighted')
accuracy_score(y_test,y_pre)
from sklearn.multiclass import OneVsOneClassifier
ovo = OneVsOneClassifier(LogisticRegression())
ovo.fit(X_train, y_train)
ovo.score(X_test, y_test)
modle = LogisticRegression(multi_class='multinomial', solver='newton-cg')
modle.fit(X_train, y_train)
modle.score(X_test, y_test)
from sklearn.metrics import confusion_matrix,precision_score,recall_score,accuracy_score
y_pre = modle.predict(X_test)
wine.target
confusion_matrix(y_test, y_pre)
precision_score(y_test,y_pre,average='weighted')
recall_score(y_test,y_pre,average='weighted')
accuracy_score(y_test,y_pre)
4.决策树
from sklearn import datasets
from matplotlib import pyplot as plt
wine = datasets.load_wine()
X = wine.data
y = wine.target
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 666)
from sklearn.tree import DecisionTreeClassifier
dtc = DecisionTreeClassifier()
dtc.fit(X_train, y_train)
dtc.score(X_test, y_test)
import graphviz
from sklearn import tree
dot_data = tree.export_graphviz(dtc,
feature_names=wine.feature_names,
class_names=wine.target_names,
filled=True,
rounded=True
)
graphviz.Source(dot_data)
dct1 = DecisionTreeClassifier(criterion='entropy',
random_state=30,
max_depth=3,
splitter='best'
)
dct1.fit(X_train, y_train)
dct1.score(X_test, y_test)
dot_data1 = tree.export_graphviz(dct1,
feature_names=wine.feature_names,
class_names=wine.target_names,
filled=True,
rounded=True
)
graphviz.Source(dot_data1)
dtc2 = DecisionTreeClassifier(criterion='gini',
random_state=30,
max_depth=3,
splitter='best',
)
dtc2.fit(X_train, y_train)
dtc2.score(X_test, y_test)
5.Bagging
import numpy as np
from matplotlib import pyplot as plt
from sklearn import datasets
wine = datasets.load_wine()
X = wine.data
y = wine.target
X.shape, y
plt.scatter(X[y==0,0], X[y==0,1])
plt.scatter(X[y==1,0], X[y==1,1])
plt.scatter(X[y==2,0], X[y==2,1])
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state = 666)
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import BaggingClassifier
bagging = BaggingClassifier(DecisionTreeClassifier(random_state = 666),
n_estimators=10,
max_samples=30,
random_state=42)
bagging.fit(X_train, y_train)
bagging.score(X_test, y_test)
bagging = BaggingClassifier(DecisionTreeClassifier(random_state = 666),
n_estimators=1000,
max_samples=30,
random_state=42)
bagging.fit(X_train, y_train)
bagging.score(X_test, y_test)
bagging = BaggingClassifier(DecisionTreeClassifier(random_state = 666),
n_estimators=10,
max_samples=100,
random_state=42,
bootstrap = True)
bagging.fit(X_train, y_train)
bagging.score(X_test, y_test)
bagging = BaggingClassifier(DecisionTreeClassifier(random_state = 666),
n_estimators=1000,
max_samples=100,
random_state=42,
bootstrap = True)
bagging.fit(X_train, y_train)
bagging.score(X_test, y_test)
bagging = BaggingClassifier(DecisionTreeClassifier(random_state = 666),
n_estimators=1000,
max_samples=100,
random_state=42,
bootstrap = False)
bagging.fit(X_train, y_train)
bagging.score(X_test, y_test)
bagging = BaggingClassifier(DecisionTreeClassifier(random_state = 666),
n_estimators=10,
max_samples=100,
random_state=42,
bootstrap = False)
bagging.fit(X_train, y_train)
bagging.score(X_test, y_test)
bagging = BaggingClassifier(DecisionTreeClassifier(random_state = 666),
n_estimators=10,
max_samples=100,
random_state=42,
bootstrap = False,
max_features=3,
bootstrap_features=False)
bagging.fit(X_train, y_train)
bagging.score(X_test, y_test)
bagging = BaggingClassifier(DecisionTreeClassifier(random_state = 666),
n_estimators=10,
max_samples=100,
random_state=42,
bootstrap = False,
max_features=3,
bootstrap_features=True)
bagging.fit(X_train, y_train)
bagging.score(X_test, y_test)
bagging = BaggingClassifier(DecisionTreeClassifier(random_state = 666),
n_estimators=10,
max_samples=100,
random_state=42,
bootstrap = True,
oob_score=True)
bagging.fit(X, y)
bagging.oob_score_
%%time
bagging = BaggingClassifier(DecisionTreeClassifier(random_state = 666),
n_estimators=10,
max_samples=100,
random_state=42,
bootstrap = True,
oob_score=True,
n_jobs=-1)
bagging.fit(X, y)
bagging.oob_score_
%%time
bagging = BaggingClassifier(DecisionTreeClassifier(random_state = 666),
n_estimators=10,
max_samples=100,
random_state=42,
bootstrap = True,
oob_score=True,
n_jobs=3)
bagging.fit(X, y)
bagging.oob_score_