人工智能常用的优化算法(梯度下降、牛顿法、拟牛顿法、共轭梯度法、)

一、梯度下降:梯度下降是一种优化算法,用于找到函数的局部最小值。它通过沿着梯度的反方向迭代更新参数,以降低目标函数的值。

代码:

from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, confusion_matrix
import matplotlib.pyplot as plt
import numpy as np

# 加载鸢尾花数据集
iris = load_iris()
X = iris.data
y = iris.target

# 将数据集分割为训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 定义网格搜索的超参数范围
param_grid = {
    'max_depth': [3, 5, 7, None],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'criterion': ['gini', 'entropy']
}

# 初始化GridSearchCV
grid_search = GridSearchCV(estimator=DecisionTreeClassifier(), param_grid=param_grid, cv=5)

# 将网格搜索适配到数据集上
grid_search.fit(X_train, y_train)

# 获取网格搜索结果
results = grid_search.cv_results_
param_names = list(param_grid.keys())
params_combinations = results['params']
mean_test_scores = results['mean_test_score']

# 打印出最佳超参数
print("最佳超参数:", grid_search.best_params_)

# 获取最佳模型
best_clf = grid_search.best_estimator_

# 评估最佳模型
y_pred = best_clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("准确率:", accuracy)

# 计算混淆矩阵
conf_matrix = confusion_matrix(y_test, y_pred)

# 生成折线图
plt.figure(figsize=(12, 8))

for i, param in enumerate(param_names):
    param_values = [x[param] for x in params_combinations]  # 获取对应超参数的取值
    plt.plot(param_values, mean_test_scores, marker='o', label=f'{param}')

plt.title('Hyperparameter Tuning Results')
plt.xlabel('Hyperparameter Values')
plt.ylabel('Mean Test Score (Accuracy)')
plt.legend()
plt.grid(True)
plt.show()

# 生成真实值和预测值对比的图表
plt.figure(figsize=(10, 6))

plt.scatter(np.arange(len(y_test)), y_test, color='blue', label='True Values', alpha=0.5)
plt.scatter(np.arange(len(y_test)), y_pred, color='red', label='Predicted Values', alpha=1,marker='*')
plt.title('True vs Predicted Values')
plt.xlabel('Sample Index')
plt.ylabel('Class')
plt.legend()
plt.show()

# 输出混淆矩阵
print("混淆矩阵:")
print(conf_matrix)

 

 

 

二、牛顿法:牛顿法是一种迭代优化算法,通过使用函数的二阶导数信息来更新参数,以更快地收敛到函数的极小值点。

代码:

from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
import matplotlib.pyplot as plt
import numpy as np
from scipy.optimize import minimize

# 加载鸢尾花数据集
iris = load_iris()
X = iris.data
y = iris.target

# 将数据集分割为训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


# 定义交叉熵损失函数和其梯度
def cross_entropy_loss(theta, X, y):
    # 提取决策树的参数
    max_depth, min_samples_split, min_samples_leaf = theta

    # 将min_samples_leaf转换为整数
    min_samples_leaf = int(min_samples_leaf)

    # 训练决策树
    clf = DecisionTreeClassifier(max_depth=max_depth, min_samples_split=min_samples_split,
                                 min_samples_leaf=min_samples_leaf)
    clf.fit(X, y)

    # 计算交叉熵损失
    y_pred_proba = clf.predict_proba(X)
    loss = -np.sum(np.log(y_pred_proba[np.arange(len(y)), y])) / len(y)

    return loss


def cross_entropy_gradient(theta, X, y):
    # 提取决策树的参数
    max_depth, min_samples_split, min_samples_leaf = theta

    # 将min_samples_leaf转换为整数
    min_samples_leaf = int(min_samples_leaf)

    # 训练决策树
    clf = DecisionTreeClassifier(max_depth=max_depth, min_samples_split=min_samples_split,
                                 min_samples_leaf=min_samples_leaf)
    clf.fit(X, y)

    # 计算预测概率
    y_pred_proba = clf.predict_proba(X)

    # 计算交叉熵损失的梯度
    grad = np.zeros(3)
    for i in range(len(X)):
        for k in range(3):
            if k == y[i]:
                grad[0] += -(1 - y_pred_proba[i, k]) * X[i, 0]
                grad[1] += -(1 - y_pred_proba[i, k]) * X[i, 1]
                grad[2] += -(1 - y_pred_proba[i, k]) * X[i, 2]
            else:
                grad[0] += y_pred_proba[i, k] * X[i, 0]
                grad[1] += y_pred_proba[i, k] * X[i, 1]
                grad[2] += y_pred_proba[i, k] * X[i, 2]

    grad /= len(y)

    return grad


# 初始化参数
theta_initial = [3, 2, 1]

# 使用牛顿法最小化交叉熵损失函数
result = minimize(cross_entropy_loss, theta_initial, args=(X_train, y_train), jac=cross_entropy_gradient,
                  method='Newton-CG')

# 获取最优参数
max_depth_opt, min_samples_split_opt, min_samples_leaf_opt = result.x

# 将最优min_samples_leaf转换为整数
min_samples_leaf_opt = int(min_samples_leaf_opt)

# 构建最优模型
best_clf = DecisionTreeClassifier(max_depth=max_depth_opt, min_samples_split=min_samples_split_opt,
                                  min_samples_leaf=min_samples_leaf_opt)
best_clf.fit(X_train, y_train)

# 评估最佳模型
y_pred = best_clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("准确率:", accuracy)

# 计算混淆矩阵
conf_matrix = confusion_matrix(y_test, y_pred)

# 输出混淆矩阵
print("混淆矩阵:")
print(conf_matrix)

 

三、拟牛顿法:拟牛顿法是一类使用函数的一阶导数信息和历史迭代信息来近似二阶导数的优化算法。它通过更新拟牛顿矩阵来逼近牛顿法的迭代过程,从而减少计算二阶导数的开销。

代码:

import numpy as np
from scipy.optimize import minimize
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier

# Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define cross-entropy loss function
def cross_entropy_loss(theta, X, y):
    clf = DecisionTreeClassifier(min_samples_split=int(theta[0]))
    clf.fit(X, y)
    y_pred = clf.predict_proba(X)
    loss = -np.mean(np.log(y_pred[np.arange(len(y)), y]))
    return loss

# Define gradient of cross-entropy loss function
def cross_entropy_gradient(theta, X, y):
    clf = DecisionTreeClassifier(min_samples_split=int(theta[0]))
    clf.fit(X, y)
    y_pred = clf.predict_proba(X)
    grad = np.zeros_like(theta)
    for i in range(len(grad)):
        clf.min_samples_split = int(theta[0] + 1e-6)
        y_pred_plus = clf.predict_proba(X)
        clf.min_samples_split = int(theta[0] - 1e-6)
        y_pred_minus = clf.predict_proba(X)
        grad[i] = -np.mean((y_pred_plus - y_pred_minus) * (y == i)[:, np.newaxis])
    return grad

# Initial guess for theta
theta_initial = np.array([2.0])  # Initial value for min_samples_split

# Perform optimization using BFGS
result = minimize(cross_entropy_loss, theta_initial, args=(X_train, y_train), jac=cross_entropy_gradient,
                  method='BFGS')

# Get the optimal value of theta
optimal_theta = result.x

# Train the classifier with the optimal min_samples_split value
optimal_clf = DecisionTreeClassifier(min_samples_split=int(optimal_theta))
optimal_clf.fit(X_train, y_train)

# Evaluate the classifier on the testing data
accuracy = optimal_clf.score(X_test, y_test)
print("Optimal min_samples_split:", int(optimal_theta))
print("Accuracy:", accuracy)

 

四、共轭梯度法:共轭梯度法是一种用于解决大规模线性方程组或最小化二次型函数的优化算法。它利用了不同方向上的梯度共轭性质,在每一步迭代中找到一个共轭方向,并沿着该方向进行参数更新,以加快收敛速度。

代码:

from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, confusion_matrix
import matplotlib.pyplot as plt
import numpy as np

# 加载鸢尾花数据集
iris = load_iris()
X = iris.data
y = iris.target

# 将数据集分割为训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 定义网格搜索的超参数范围
param_grid = {
    'C': [0.001, 0.01, 0.1, 1, 10, 100],  # 正则化参数
    'penalty': ['l2'],  # 正则化类型,这里选择L2正则化
    'solver': ['lbfgs'],  # 选择共轭梯度法优化器
}

# 初始化GridSearchCV
grid_search = GridSearchCV(estimator=LogisticRegression(), param_grid=param_grid, cv=5)

# 将网格搜索适配到数据集上
grid_search.fit(X_train, y_train)

# 获取网格搜索结果
results = grid_search.cv_results_
param_names = list(param_grid.keys())
params_combinations = results['params']
mean_test_scores = results['mean_test_score']

# 打印出最佳超参数
print("最佳超参数:", grid_search.best_params_)

# 获取最佳模型
best_clf = grid_search.best_estimator_

# 评估最佳模型
y_pred = best_clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("准确率:", accuracy)

# 计算混淆矩阵
conf_matrix = confusion_matrix(y_test, y_pred)

# 生成折线图
plt.figure(figsize=(12, 8))

for i, param in enumerate(param_names):
    param_values = [x[param] for x in params_combinations]  # 获取对应超参数的取值
    plt.plot(param_values, mean_test_scores, marker='o', label=f'{param}')

plt.title('Hyperparameter Tuning Results')
plt.xlabel('Hyperparameter Values')
plt.ylabel('Mean Test Score (Accuracy)')
plt.legend()
plt.grid(True)
plt.show()

# 生成真实值和预测值对比的图表
plt.figure(figsize=(10, 6))

plt.scatter(np.arange(len(y_test)), y_test, color='blue', label='True Values', alpha=0.5)
plt.scatter(np.arange(len(y_test)), y_pred, color='red', label='Predicted Values', alpha=1,marker='*')
plt.title('True vs Predicted Values')
plt.xlabel('Sample Index')
plt.ylabel('Class')
plt.legend()
plt.show()

# 输出混淆矩阵
print("混淆矩阵:")
print(conf_matrix)

 

 

  • 1
    点赞
  • 8
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

张謹礧

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值