!/usr/bin/env python3
-- coding: utf-8 --
“””
Created on Tue Mar 14 14:39:19 2017
@author: dreamer
“””
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
def plot_feature_importances_cancer(model):
n_features = cancer.data.shape[1]
plt.barh(range(n_features), model.feature_importances_, align=’center’)
plt.yticks(np.arange(n_features), cancer.feature_names)
plt.xlabel(“Feature importance”)
plt.ylabel(“Feature”)
cancer = load_breast_cancer()
X_train, X_test, y_train, y_test = train_test_split(
cancer.data, cancer.target, stratify=cancer.target, random_state=0)
LogisticRegression
logreg = LogisticRegression().fit(X_train, y_train)
print(“Training set score: {:.3f}”.format(logreg.score(X_train, y_train)))
print(“Test set score: {:.3f}”.format(logreg.score(X_test, y_test)))
”’
train = []
test = []
for i in range(1,200):
forest = RandomForestClassifier(
n_estimators= i,random_state=0).fit(X_train,y_train)
train.append(forest.score(X_train,y_train))
test.append(forest.score(X_test,y_test))
plt.plot(train)
plt.plot(test)
”’
RandomForest
forest = RandomForestClassifier(
n_estimators= 100,random_state=0,n_jobs=-1,
max_features=6).fit(X_train,y_train)
print(“Training set score: {:.3f}”.format(forest.score(X_train, y_train)))
print(“Test set score: {:.3f}”.format(forest.score(X_test, y_test)))
”’feature=plot_feature_importances_cancer(forest)”’
”’
from sklearn.tree import export_graphviz
export_graphviz(tree, out_file=”tree.dot”, class_names=[“malignant”, “benign”],
feature_names=cancer.feature_names, impurity=False, filled=True)
”’
GradientBoostingClassifier
from sklearn.ensemble import GradientBoostingClassifier
gbrt = GradientBoostingClassifier(random_state=0,max_depth=3,learning_rate=0.02).fit(X_train, y_train)
print(“Accuracy on training set: {:.3f}”.format(gbrt.score(X_train, y_train)))
print(“Accuracy on test set: {:.3f}”.format(gbrt.score(X_test, y_test)))