######导入数据集,用二分法
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
boston = load_boston()
x_train,x_test,y_train,y_test=train_test_split(boston.data,boston.target,test_size=0.3,random_state=123)
from sklearn import linear_model
reg = linear_model.LinearRegression()
reg.fit(x_train,y_train)
##########验证
from sklearn.model_selection import cross_val_score
reg = linear_model.LinearRegression()
scores = cross_val_score(reg,boston.data,boston.target,cv = 10)
scores
#对数据进行随机重排,保证拆分的均匀性
import numpy as np
X,y = boston.data,boston.target
indices = np.arange(y.shape[0])
np.random.shuffle(indices)
X,y = X[indices],y[indices]
from sklearn.model_selection import cross_val_score
reg = linear_model.LinearRegression()
scores = cross_val_score(reg,X,y,cv = 10)
scores
scores.mean(),scores.std()
##决策树
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import load_iris
iris = load_iris()
ct = DecisionTreeClassifier()
ct.fit(iris.data,iris.target)
ct.max_features_
ct.feature_importances_
ct.predict(iris.data[:])
from sklearn.metrics import classification_report
print(classification_report(iris.target,ct.predict(iris.data)))
from sklearn.tree import export_graphviz
export_graphviz(ct,out_file=‘tree.dot’,feature_names=iris.feature_names,class_names=iris.target_names)