Sklearn机器学习学习笔记(2)

import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn import model_selection
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn import linear_model
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_validate
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import r2_score
from sklearn.tree import export_graphviz

#boston数据
boston = datasets.load_boston()#加载波士顿房价数据
boston.data###要输入

x_train,x_test,y_train,y_test = train_test_split(boston.data,boston.target,test_size = 0.3,random_state = 123)
len(x_train),len(x_test),len(y_train),len(y_test)

reg = linear_model.LinearRegression()
reg.fit(x_train, y_train)

#print(metrics.r2_score(y_train, reg.predict(x_train)))
#print(metrics.r2_score(y_test, reg.predict(x_test)))


reg = linear_model.LinearRegression()
scores = cross_val_score(reg, boston.data, boston.target, cv = 10)
scores
scores.mean(), scores.std()

scores = cross_val_score(reg, boston.data, boston.target, scoring = 'explained_variance', cv = 10)
scores

#对数据进行随机重排,保证拆分的均匀性
X, y = boston.data, boston.target
indices = np.arange(y.shape[0])
np.random.shuffle(indices)
X, y = X[indices], y[indices]

reg = linear_model.LinearRegression()
scores = cross_val_score(reg, X, y, cv = 10)
scores
scores.mean(), scores.std()

scoring = ['r2','explained_variance']
scores = cross_validate(reg, X, y, cv = 10, scoring = scoring,return_train_score = False)
scores
scores['test_r2'].mean()

pred = cross_val_predict(reg, X, y, cv = 10)
pred[:10]

r2_score(y, pred)




#iris数据
iris = datasets.load_iris()#导入iris数据
ct = DecisionTreeClassifier()
ct.fit(iris.data,iris.target)
ct.max_features_
ct.feature_importances_
ct.predict(iris.data[:])
#print(classification_report(iris.target,ct.predict(iris.data)))-+

export_graphviz(ct, out_file = 'tree.dot', feature_names = iris.feature_names,class_names = iris.target_names)




二分法对iris数据进行拆分并评估,再建立决策树模型

import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn import linear_model
from sklearn import metrics

#导入iris数据
iris = datasets.load_iris()
iris.data

#用二分法拆分
x_train,x_test,y_train,y_test = train_test_split(iris.data,iris.target,test_size = 0.3,random_state = 123)
len(x_train),len(x_test),len(y_train),len(y_test)

#评估
reg = linear_model.LinearRegression()
reg.fit(x_train,y_train)
print(metrics.r2_score(y_train,reg.predict(x_train)))
print(metrics.r2_score(y_test,reg.predict(x_test)))

#建立决策树模型
ct = DecisionTreeClassifier()
ct.fit(iris.data,iris.target)
ct.max_features_
ct.feature_importances_
print(ct.predict(iris.data[:]))
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值