Python之Sklearn使用教程!!!

https://blog.csdn.net/XiaoYi_Eric/article/details/79952325#commentsedit
Python之Sklearn使用教程,这个是重点,写的很好看这个!
原博主文章应该来着视频课:
https://morvanzhou.github.io/
我也打算看看这个视频咋样!
使用sklearn做各种回归:
https://www.cnblogs.com/valorchang/p/11397928.html
https://blog.csdn.net/u010900574/article/details/52666291
代码如下:

import numpy as np
import matplotlib.pyplot as plt


# 生成数据
def gen_data(x1, x2):
    y = np.sin(x1) * 1 / 2 + np.cos(x2) * 1 / 2 + 0.1 * x1
    return y


def load_data():
    x1_train = np.linspace(0, 50, 500)
    x2_train = np.linspace(-10, 10, 500)
    data_train = np.array(
        [[x1, x2, gen_data(x1, x2) + np.random.random(1) - 0.5] for x1, x2 in zip(x1_train, x2_train)])
    x1_test = np.linspace(0, 50, 100) + np.random.random(100) * 0.5
    x2_test = np.linspace(-10, 10, 100) + 0.02 * np.random.random(100)
    data_test = np.array([[x1, x2, gen_data(x1, x2)] for x1, x2 in zip(x1_test, x2_test)])
    return data_train, data_test


train, test = load_data()
# train的前两列是x,后一列是y,这里的y有随机噪声
x_train, y_train = train[:, :2], train[:, 2]
x_test, y_test = test[:, :2], test[:, 2]  # 同上,但这里的y没有噪声


# 回归部分
def try_different_method(model, method):
    model.fit(x_train, y_train)
    score = model.score(x_test, y_test)
    result = model.predict(x_test)
    plt.figure()
    plt.plot(np.arange(len(result)), y_test, "go-", label="True value")
    plt.plot(np.arange(len(result)), result, "ro-", label="Predict value")
    plt.title(f"method:{method}---score:{score}")
    plt.legend(loc="best")
    plt.show()


# 方法选择
# 1.决策树回归
from sklearn import tree

model_decision_tree_regression = tree.DecisionTreeRegressor()

# 2.线性回归
from sklearn.linear_model import LinearRegression

model_linear_regression = LinearRegression()

# 3.SVM回归
from sklearn import svm

model_svm = svm.SVR()

# 4.kNN回归
from sklearn import neighbors

model_k_neighbor = neighbors.KNeighborsRegressor()

# 5.随机森林回归
from sklearn import ensemble

model_random_forest_regressor = ensemble.RandomForestRegressor(n_estimators=20)  # 使用20个决策树

# 6.Adaboost回归
from sklearn import ensemble

model_adaboost_regressor = ensemble.AdaBoostRegressor(n_estimators=50)  # 这里使用50个决策树

# 7.GBRT回归
from sklearn import ensemble

model_gradient_boosting_regressor = ensemble.GradientBoostingRegressor(n_estimators=100)  # 这里使用100个决策树

# 8.Bagging回归
from sklearn import ensemble

model_bagging_regressor = ensemble.BaggingRegressor()

# 9.ExtraTree极端随机数回归
from sklearn.tree import ExtraTreeRegressor

model_extra_tree_regressor = ExtraTreeRegressor()

#调用函数
try_different_method(model_decision_tree_regression, "DecisiomTree")
try_different_method(model_linear_regression, "LinearRegression")
try_different_method(model_svm, "SvmRegression")
try_different_method(model_k_neighbor, "KNeighbor")
try_different_method(model_random_forest_regressor, "RandomForest")
try_different_method(model_adaboost_regressor, "AdaboostRegressor")
try_different_method(model_gradient_boosting_regressor, "GBRTRegression")
try_different_method(model_extra_tree_regressor, "ExtraTree")

需要用到的博客:
https://www.cnblogs.com/sunbigdata/p/8697729.html
讲解sklearn.make_classification的参数

#我们可以查看该线性回归方程的参数
print(model.coef_)
print(model.intercept_)

这个地方 两个函数代表的是权值和截距

参考文章:https://www.cnblogs.com/annebang/p/8759229.html
https://blog.csdn.net/u010624373/article/details/78023884

https://blog.csdn.net/u010900574/article/details/52669072
30分钟学会用scikit-learn的基本分类方法(决策树、SVM、KNN)和集成方法(随机森林,Adaboost和GBRT)
这个也不错 学习一下 看看

#这里我们引入一个两类shuffle的函数,它接收两个参数,分别是x和y,然后把x,y绑在一起shuffle.
def shuffle_in_unison(a, b):
    assert len(a) == len(b)
    import numpy
    shuffled_a = numpy.empty(a.shape, dtype=a.dtype)
    shuffled_b = numpy.empty(b.shape, dtype=b.dtype)
    permutation = numpy.random.permutation(len(a))
    for old_index, new_index in enumerate(permutation):
        shuffled_a[new_index] = a[old_index]
        shuffled_b[new_index] = b[old_index]
    return shuffled_a, shuffled_b

#下面我们导入Iris数据并打乱它,然后分为100个训练集和50个测试集
from sklearn.datasets import load_iris

iris = load_iris()
def load_data():
    iris.data, iris.target = shuffle_in_unison(iris.data, iris.target)
    x_train ,x_test = iris.data[:100],iris.data[100:]
    y_train, y_test = iris.target[:100].reshape(-1,1),iris.target[100:].reshape(-1,1)
    return x_train, y_train, x_test, y_test
#常用的分类方法一般有决策树, SVM, kNN, 朴素贝叶斯, 集成方法有随机森林,Adaboost和GBDT
from sklearn import tree, svm, naive_bayes,neighbors
from sklearn.ensemble import BaggingClassifier, AdaBoostClassifier, RandomForestClassifier, GradientBoostingClassifier


x_train, y_train, x_test, y_test = load_data()

clfs = {'svm': svm.SVC(),\
        'decision_tree':tree.DecisionTreeClassifier(),
        'naive_gaussian': naive_bayes.GaussianNB(), \
        'naive_mul':naive_bayes.MultinomialNB(),\
        'K_neighbor' : neighbors.KNeighborsClassifier(),\
        'bagging_knn' : BaggingClassifier(neighbors.KNeighborsClassifier(), max_samples=0.5,max_features=0.5), \
        'bagging_tree': BaggingClassifier(tree.DecisionTreeClassifier(), max_samples=0.5,max_features=0.5),
        'random_forest' : RandomForestClassifier(n_estimators=50),\
        'adaboost':AdaBoostClassifier(n_estimators=50),\
        'gradient_boost' : GradientBoostingClassifier(n_estimators=50, learning_rate=1.0,max_depth=1, random_state=0)
        }

def try_different_method(clf):
    clf.fit(x_train,y_train.ravel())
    score = clf.score(x_test,y_test.ravel())
    print('the score is :', score)

for clf_key in clfs.keys():
    print('the classifier is :',clf_key)
    clf = clfs[clf_key]
    try_different_method(clf)
  • 2
    点赞
  • 11
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

zqx951102

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值