求解函数最值的几种算法，梯度下降法python实现

最新推荐文章于 2024-03-02 15:48:54 发布

哇酷哇酷哇酷

最新推荐文章于 2024-03-02 15:48:54 发布

阅读量2.7k

点赞数 8

文章标签： python 算法 numpy

本文链接：https://blog.csdn.net/qq_53019168/article/details/125471530

版权

原理就不做过多介绍了，直接上代码。其中的一维线搜索写了三种方法（黄金分割法，Armijo线搜索，Wolfe-Powell线搜索），可以稍微对比以下。

一、黄金分割法

黄金分割法计算步骤

import numpy as np

def golden(f, x0, d, s):
    '''
    黄金分割法求最小值
    返回下降步长
    f: 需要求最小值的函数
    x0: x_0
    d: 方向 
    s: 步长上限
    '''
    t,e=0.618,1e-5
    a,b=0,s
    a1=a+(1-t)*(b-a)
    a2=a+t*(b-a)
    f1,f2=f(x0+a1*d),f(x0+a2*d)
    while abs(a-b)>=e:
        if f1<f2:
            a2,f2,b=a1,f1,a2
            a1=a+(1-t)*(b-a)
            f1=f(x0+a1*d)
        else:
            a,a1,f1=a1,a2,f2
            a2=a+t*(b-a)
            f2=f(x0+a2*d)
    a=(a1+a2)/2
    return a

二、Armijo线搜索

Armijo线搜索计算步骤

def armijo(f,df, x, d, sigma=0.1):
    '''
    Armijo非精确线搜索求最小值
    返回搜索步长
    f: 需要求最小值的函数
    x: x_0
    d: 搜索方向
    sigma: armijo准则中的参数
    '''
    alpha, p, k=1.0, 0.8, 1
    while f(x+alpha*d)>=f(x)+sigma*alpha*df(x).dot(d) and k<100:
        alpha *= p
        k += 1
    return alpha

三、Wolfe-Powell线搜索

Wolfe-Powell线搜索计算步骤

def wolfe_powell(f,df,x,d,sigma1=1e-3,sigma2=0.9):
    '''
    Wolfe-powell非精确线搜索求最小值
    返回搜索步长
    f: 需要求最小值的函数
    x: x_0
    d: 搜索方向
    sigma1, sigma2: Wolfe-powell准则中的参数
    '''
    a, b, alpha = 0, float('inf'), 1
    k = 0
    while k<1000:
        k += 1
        if f(x+alpha*d) > f(x)+sigma1*alpha*df(x).dot(d):       # 若不满足Wolfe-powell准则1,则缩小alpha
            b = alpha
            alpha = (alpha + a)/2   
        elif df(x+alpha*d).dot(d) < sigma2*df(x).dot(d):        # 若不满足Wolfe-powell准则2,则增大alpha
            a = alpha
            alpha = min(2*alpha, (alpha+b)/2)
        else:                                                   # 若准则1和2均满足，则跳出循环
            break
    return alpha

四、梯度下降法

梯度下降法计算步骤

def steepest_descent(f,df, x):
    '''
    最速下降法求最小值
    其中利用黄金分割法求步长
    f: 需要求最小值的函数
    df: 函数的导数
    x: 初始点
    '''
    k = 1
    while np.linalg.norm(df(x))>1e-5 and k<1000:
        d = -df(x)
    # 以下三种方法都是求一维线搜索的步长，根据需要任选一种，注释掉其余两行即可。
        #alpha = golden(f, x , d, 10)                # 黄金分割法求步长
        #alpha = armijo(f, df, x, d, sigma=0.1)    # armijo非精确搜索求步长
        alpha = wolfe_powell(f,df,x,d,sigma1=0.1,sigma2=0.9) # wolfe-powell非精确搜索求步长
        x = x + alpha*d
        k += 1
    return [x,f(x),k]       # 返回最小值点，函数最小值，迭代次数

五、实例以及执行结果

def testF1(x):
    return 4*(x[0]-2*x[1]-4)**2+(x[0]-2)**2

def testDF1(x):
    res = np.zeros([2])
    res[0] = 8*(x[0]-2*x[1]-4)+2*(x[0]-2)
    res[1] = -16*(x[0]-2*x[1]-4)
    return res

def testF2(x):
    return (x[0]-1)**2+(2*x[1]-1)**2+(4*x[2]-1)**2+(8*x[3]-1)**2

def testDF2(x):
    res = np.zeros([4])
    res[0] = 2*(x[0]-1)
    res[1] = 2*(2*x[1]-1)
    res[2] = 2*(4*x[2]-1)
    res[3] = 2*(8*x[3]-1)
    return res

problems = [
    {
        "f": testF1,
        "df": testDF1,
        "x0": np.array([1, 0])
    },
    {
        "f": testF2,
        "df": testDF2,
        "x0": np.array([0, 0, 0, 0])
    }
]

for problem in problems:
    '''
    problem成员描述

    f: 函数f(x)
    df: 函数的梯度df(x)
    x0: 给定的初始点
    '''
    #print("测试函数在初始点的函数值为{}，梯度为{}".format(problem["f"](problem["x0"]), problem["df"](problem["x0"])))
#steepest_descent1,2,3分别是用不同线搜索方法所写的梯度下降算法，详见上steepest_descent的代码块
    result1 = steepest_descent1(problem['f'],problem['df'],problem['x0'])
    result2 = steepest_descent2(problem['f'],problem['df'],problem['x0'])
    result3 = steepest_descent3(problem['f'],problem['df'],problem['x0'])
    print('利用梯度下降法及黄金分割法求得测试函数在点{}取得最小值{},迭代次数为{}'.format(result1[0], result1[1],result1[2]))
    print('利用梯度下降法及Armijo线搜索求得测试函数在点{}取得最小值{},迭代次数为{}'.format(result2[0], result2[1],result2[2]))
    print('利用梯度下降法及Wolfe-Powell线搜索求得测试函数在点{}取得最小值{},迭代次数为{}'.format(result3[0], result3[1],result3[2]),end='\n\n')