监督学习。

 

广义线性模型

LinearRegression

from sklearn import linear_model

import numpy as np

 

reg = linear_model.LinearRegression()

data_x = np.array([[3], [4], [8]])

data_y = np.array([3, 5, 7])

# 拟合模型

reg.fit(data_x, data_y)

print(reg.coef_) # 斜率

print(reg.intercept_) # 截距

在上面的例子中我们可以进行下面的推演, 得到回归直线

import numpy as np

from sklearn.linear_model import LinearRegression

import plotly.graph_objects as go

 

if __name__ == '__main__':

 

    # 创建线性回归模型

    reg = LinearRegression()

    

    # 准备数据

    data_x = np.array([[3], [4], [8]])

    data_y = np.array([3, 5, 7])

    

    # 拟合模型

    reg.fit(data_x, data_y)

    

    # 生成x的值

    x = np.array([sum(i) for i in data_x])

    

    # 预测y的值

    y_pred = x * reg.coef_ + reg.intercept_

    

    # 创建图表

    fig = go.Figure(data=[

        go.Scatter(x=x, y=y_pred, mode='lines', name='线性回归'),

        go.Scatter(x=x, y=data_y, mode='markers', name='原点')

    ])

    

    # 显示图表

    fig.show()

 

岭回归

相对于普通最小二乘法, 可以防止过拟合,更健康

 

import numpy as np

from sklearn import linear_model

import plotly.graph_objects as go

 

if __name__ == '__main__':

    # 随机产生实验数据

    data_x = [[np.random.randint(0, 1000, size=1)[0]] for i in range(1000)]

    data_y = np.random.randint(0, 1000, size=1000)

 

    # 更改参数,查看效果

    reg = linear_model.Ridge(alpha=0.5)

    reg.fit(data_x, data_y)

    x = [i[0] for i in data_x]

    print(reg.coef_)

    fig = go.Figure(data=[

        go.Scatter(x=x, y=data_y, mode='markers', name='原点'),

        go.Scatter(x=x, y=x * reg.coef_ + reg.intercept_, mode='lines', name='岭回归')

    ])

    fig.show()

 3. Lasso

 

如果需要进行特征选择,Lasso可能是一个更好的选择

 

import numpy as np

from sklearn import linear_model

import plotly.graph_objects as go

 

if __name__ == '__main__':

    # 随机产生实验数据

    data_x = [[np.random.normal(loc=0.0, scale=1.0, size=1)[0]] for i in range(1000)]

    data_y = np.random.normal(loc=0.0, scale=1.0, size=1000)

    # 更改参数,查看效果

    reg = linear_model.Lasso(alpha=0.5)

    reg.fit(data_x, data_y)

    x = [sum(i) for i in data_x]

    fig = go.Figure(data=[

        go.Scatter(x=x, y=data_y, mode='markers', name='原点'),

        go.Scatter(x=x, y=x * reg.coef_ + reg.intercept_, mode='lines', name='Lasso')

    ])

    fig.show()

 4. 贝叶斯岭回归

 

贝叶斯岭回归对病态问题(ill-posed)的鲁棒性要更好

import numpy as np

from sklearn import linear_model

import plotly.graph_objects as go

 

if __name__ == '__main__':

    # 随机产生实验数据

    data_x = [[np.random.normal(loc=0.0, scale=1.0, size=1)[0]] for i in range(1000)]

    data_y = np.random.normal(loc=0.0, scale=1.0, size=1000)

    # 更改参数,查看效果

    reg = linear_model.BayesianRidge()

    reg.fit(data_x, data_y)

    x = [sum(i) for i in data_x]

    y1 = reg.predict(data_x)

    fig = go.Figure(data=[

        go.Scatter(x=x, y=data_y, mode='markers', name='原点'),

        go.Scatter(x=x, y=y1, mode='markers', name='预测')

    ])

    fig.show()

 

线性和二次判别分析(分类模型)

这里, 举个例子

 

from sklearn import datasets

from sklearn.model_selection import train_test_split

from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis

from sklearn.metrics import classification_report

 

if __name__ == '__main__':

    iris = datasets.load_iris()

    x = iris.data

    y = iris.target

    train_x, test_x, train_y, test_y = train_test_split(x, y, test_size=0.3, random_state=42) # 控制随机过程的随机性

    # LDA

    ida = LinearDiscriminantAnalysis()

    ida.fit(train_x, train_y)

    y_pred_lda = ida.predict(test_x)

 

    # QDA

 

    qda = QuadraticDiscriminantAnalysis()

    qda.fit(train_x, train_y)

    y_pred_qda = qda.predict(test_x)

 

    # 打印LDA模型的分类报告

    print("LDA Classification Report:")

    print(classification_report(test_y, y_pred_lda, target_names=iris.target_names))

 

    # 打印QDA模型的分类报告

    print("\nQDA Classification Report:")

    print(classification_report(test_y, y_pred_qda, target_names=iris.target_names))

结果:  发现,可能是数据太过于简单, 结果一样

 

内核岭回归(线性回归)

对于非线性回归问题非常有效

 

from sklearn.kernel_ridge import KernelRidge

from sklearn.model_selection import GridSearchCV

from sklearn.datasets import make_regression

 

# 生成一个回归数据集

X, y = make_regression(n_samples=100, n_features=1, noise=10, random_state=0)

 

# 创建内核岭回归模型

kr = KernelRidge(kernel='rbf', gamma=0.1)

 

# 使用网格搜索来寻找最佳的超参数组合

param_grid = {'alpha': [1e0, 1e-1, 1e-2, 1e-3],

              'gamma': [0.1, 0.01, 0.001, 0.0001]}

kr_grid = GridSearchCV(kr, param_grid=param_grid, cv=5)

kr_grid.fit(X, y)

 

# 输出最佳参数和得分

print("Best parameters: ", kr_grid.best_params_)

print("Best score: ", kr_grid.best_score_)

运行结果: 

 

下面用它来做一个正太拟合和预测

 

from sklearn.kernel_ridge import KernelRidge

from sklearn.model_selection import GridSearchCV

from sklearn.datasets import make_regression

from sklearn.model_selection import train_test_split

import plotly.graph_objects as go

if __name__ == '__main__':

    # 生成一个回归数据集

    data_x, data_y = make_regression(n_samples=1000, n_features=1, noise=10, random_state=0)

    X_train, X_test, y_train, y_test = train_test_split(data_x, data_y, test_size=0.3, random_state=42)

    kr = KernelRidge(kernel='rbf', gamma=0.1)

 

    # 使用网格搜索来寻找最佳的超参数组合

    param_grid = {'alpha': [1e0, 1e-1, 1e-2, 1e-3],

                  'gamma': [0.1, 0.01, 0.001, 0.0001]}

 

    kr_grid = GridSearchCV(kr, param_grid=param_grid, cv=5)

    kr_grid.fit(X_train, y_train)

    print("Best parameters: ", kr_grid.best_params_)

    print("Best score: ", kr_grid.best_score_)

    y_pred = kr_grid.predict(data_x)

    data_x = [sum(i) for i in data_x]

    fig = go.Figure(data=[

        go.Scatter(x=data_x, y=data_y, mode='markers', name='原始点集'),

        go.Scatter(x=data_x, y=y_pred, mode='lines', name='预测')

    ])

    fig.show()

运行结果: 

 

可以看出,效果确实不错。

 

支持向量机(分类模型)

就对自带的水仙花分类而言(0.9777777777777777),效果没线性和二次判别分析(1.0)好,广泛应用于模式识别、图像分类、文本分类等领域

 

from sklearn import datasets

from sklearn.svm import SVC

from sklearn.preprocessing import StandardScaler

from sklearn.model_selection import train_test_split

from sklearn.metrics import classification_report, accuracy_score

if __name__ == '__main__':

    iris = datasets.load_iris()

    x, y = iris.data, iris.target

    X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42)

 

    scaler = StandardScaler()

    x_train = scaler.fit_transform(X_train)

    x_test = scaler.transform(X_test)

 

    clf = SVC(kernel='linear')

    clf.fit(x_train, y_train)

 

    y_pred = clf.predict(x_test)

 

    print(classification_report(y_test, y_pred))

    print(accuracy_score(y_test, y_pred))

运行结果: 

 

KNeighborsClassifier 例子

import pandas as pd

from sklearn.datasets import load_breast_cancer

from sklearn.model_selection import train_test_split, GridSearchCV

from sklearn.neighbors import KNeighborsClassifier

from sklearn.preprocessing import StandardScaler

 

if __name__ == '__main__':

    breast_cancer = load_breast_cancer()

    # 特征数据

    data_x = breast_cancer.data

    # 对应标签

    data_y = breast_cancer.target

    X_train, X_test, y_train, y_test = train_test_split(data_x, data_y, test_size=0.3, random_state=42)

    transfer = StandardScaler()

    x_train = transfer.fit_transform(X_train)

    x_test = transfer.transform(X_test)

    estimator = KNeighborsClassifier()

    param_grid = {'n_neighbors': [i for i in range(1, 21)]}

 

    estimator = GridSearchCV(estimator=estimator, param_grid=param_grid, cv=5)

    estimator.fit(x_train, y_train)

    print('estimator.best_score_---', estimator.best_score_)

    print('estimator.best_estimator_---', estimator.best_estimator_)

    print('estimator.best_params_---', estimator.best_params_)

 

    myret = pd.DataFrame(estimator.cv_results_)

    myret.to_csv(path_or_buf='./mygridsearchcv.csv')

  • 4
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值