首先根据几种算法在求解beta语句上的不同,初步使用选择语句运行四种算法,察看迭代次数和运行时间
import numpy as np
import time
def convex1(x):
"""Convex1目标函数"""
return np.sum(np.exp(x) - x)
def grad_convex1(x):
"""Convex1函数的梯度"""
return np.exp(x) - 1
def armijo_line_search(f, grad_f, x, d, sigma=0.02, beta=0.5):
"""
Armijo线搜索算法来确定步长。
"""
alpha = 1.0
while f(x + alpha * d) > f(x) + sigma * alpha * np.dot(grad_f(x), d):
alpha *= beta
return alpha
def conjugate_gradient_method(f, grad_f, x0, method='PRP', tol=1e-6, max_iter=1000):
"""
通用共轭梯度法实现,支持不同的β计算方法(PRP, FR, PRP+, CD, DY)
并使用Armijo搜索算法确定步长。
"""
x = x0.copy()
g = grad_f(x)
d = -g
n = len(x)
iters = 0
f_vals = [f(x)]
for _ in range(max_iter):
alpha = armijo_line_search(f, grad_f, x, d)
x_new = x + alpha * d
g_new = grad_f(x_new)
if method == 'FR':
beta = np.dot(g_new, g_new) / np.dot(g, g)
elif method == 'PRP':
beta = np.dot(g_new, g_new - g) / np.dot(g, g)
elif method == 'PRP+':
beta_prp = np.dot(g_new, g_new - g) / np.dot(g, g)
beta = max(0, beta_prp)
elif method == 'CD':
beta = -np.dot(g_new, g_new) / np.dot(d, g_new)
elif method == 'DY':
beta = np.dot(g_new, g_new) / np.dot(d, g_new - g)
d = -g_new + beta * d
x = x_new
g = g_new
f_vals.append(f(x))
iters += 1
if np.linalg.norm(g) < tol or iters >= max_iter:
break
return x, f_vals, iters
# 初始条件
n = 1000
x0 = np.array([(i+1)/n for i in range(n)])
# 共轭梯度方法列表
methods = ['PRP', 'FR', 'PRP+', 'CD', 'DY']
results = {}
for method in methods:
start_time = time.time()
x_final, f_vals, iters = conjugate_gradient_method(convex1, grad_convex1, x0, method=method)
end_time = time.time()
results[method] = {
'iterations': iters,
'runtime': end_time - start_time,
'final_value': f_vals[-1]
}
# 输出结果
for method, result in results.items():
print(f"Method: {method}, Iterations: {result['iterations']}, Runtime: {result['runtime']:.4f} seconds, Final value: {result['final_value']:.4f}")
Method: PRP, Iterations: 10, Runtime: 0.0120 seconds, Final value: 1000.0000
Method: FR, Iterations: 13, Runtime: 0.0030 seconds, Final value: 1000.0000
Method: PRP+, Iterations: 6, Runtime: 0.0070 seconds, Final value: 1000.0000
Method: CD, Iterations: 45, Runtime: 0.0200 seconds, Final value: 1000.0000
Method: DY, Iterations: 24, Runtime: 0.0050 seconds, Final value: 1000.0000
下面使用Scipy中定义好的优化器进行优化,求解出最后值作为对照
import numpy as np
from scipy.optimize import minimize
import time
# 定义目标函数
def convex1(x):
return np.sum(np.exp(x) - x)
# 定义梯度
def grad_convex1(x):
return np.exp(x) - 1
# 定义初始点
n = 1000
x0 = np.array([(i+1)/n for i in range(n)])
# 共轭梯度方法的比较
methods = ['CG'] # SciPy 中的共轭梯度方法标记为 'CG'
results = {}
# 进行优化并测量时间
for method in methods:
start_time = time.time()
res = minimize(convex1, x0, method=method, jac=grad_convex1, options={'disp': True})
end_time = time.time()
results[method] = {
'iterations': res.nit,
'runtime': end_time - start_time,
'success': res.success,
'final_value': res.fun
}
results
Optimization terminated successfully.
Current function value: 1000.000000
Iterations: 6
Function evaluations: 13
Gradient evaluations: 13
{'CG': {'iterations': 6,
'runtime': 0.0037050247192382812,
'success': True,
'final_value': 1000.0}}
以二维为例进行可视化,察看迭代路线
import numpy as np
import matplotlib.pyplot as plt
# Define the convex1 function for two dimensions
def convex1_2d(x):
return np.exp(x[0]) + np.exp(x[1]) - x[0] - x[1]
# Define the gradient for two dimensions
def grad_convex1_2d(x):
return np.array([np.exp(x[0]) - 1, np.exp(x[1]) - 1])
# Implement the Armijo line search for two dimensions
def armijo_line_search_2d(f, grad_f, x, d, sigma=0.02, beta=0.5):
alpha = 1.0
while f(x + alpha * d) > f(x) + sigma * alpha * np.dot(grad_f(x), d):
alpha *= beta
return alpha
# Conjugate gradient method for two dimensions and visualization
def conjugate_gradient_2d(f, grad_f, x0, tol=1e-6, max_iter=100):
x = x0.copy()
g = grad_f(x)
d = -g
x_path = [x.copy()]
for i in range(max_iter):
alpha = armijo_line_search_2d(f, grad_f, x, d)
x += alpha * d
g_new = grad_f(x)
beta = np.dot(g_new, g_new - g) / np.dot(g, g)
d = -g_new + beta * d
g = g_new
x_path.append(x.copy())
if np.linalg.norm(g) < tol:
break
return x, x_path
# Initialize and run the conjugate gradient method
x0_2d = np.array([1.0, 1.0]) # Initial point for 2D
x_final_2d, x_path_2d = conjugate_gradient_2d(convex1_2d, grad_convex1_2d, x0_2d)
# Generate grid for contour plot
x_range = np.linspace(-2, 2, 400)
y_range = np.linspace(-2, 2, 400)
X, Y = np.meshgrid(x_range, y_range)
Z = convex1_2d([X, Y])
# Plotting
plt.figure(figsize=(10, 6))
cp = plt.contourf(X, Y, Z, levels=100, cmap='viridis')
plt.colorbar(cp)
x_path_2d = np.array(x_path_2d)
plt.plot(x_path_2d[:, 0], x_path_2d[:, 1], 'ro-', linewidth=2, markersize=5)
# Annotate each point with the iteration number
for i, (x, y) in enumerate(x_path_2d):
plt.text(x, y, str(i), color="white", fontsize=8)
plt.title('Contour Plot of convex1 Function and Optimization Path (PRP)')
plt.xlabel('X1')
plt.ylabel('X2')
plt.show()
# Using the previously defined functions and procedures, let's re-run the optimization and print the details
x_final_2d, x_path_2d = conjugate_gradient_2d(convex1_2d, grad_convex1_2d, x0_2d)
# Printing the number of iterations and the final result
iteration_count = len(x_path_2d) - 1 # Subtracting the initial point
final_position = x_path_2d[-1]
final_value = convex1_2d(final_position)
iteration_count, final_position, final_value
(11, array([-4.57555886e-08, -4.57555886e-08]), 2.000000000000002)
# Reusing the previously defined functions, with minor modifications for higher-dimensional case
def convex1_highdim(x):
"""High-dimensional Convex1 objective function"""
return np.sum(np.exp(x) - x)
def grad_convex1_highdim(x):
"""Gradient of the Convex1 function for high-dimensional case"""
return np.exp(x) - 1
def armijo_line_search_highdim(f, grad_f, x, d, sigma=0.02, beta=0.5):
"""
Armijo line search algorithm to determine step size in high-dimensional space.
"""
alpha = 1.0
while f(x + alpha * d) > f(x) + sigma * alpha * np.dot(grad_f(x), d):
alpha *= beta
return alpha
def conjugate_gradient_highdim(f, grad_f, x0, method='PRP', tol=1e-6, max_iter=1000):
"""
Conjugate gradient method for high-dimensional optimization, supporting different beta calculation methods
using the Armijo line search for step size determination.
"""
x = x0.copy()
g = grad_f(x)
d = -g
n = len(x)
f_vals = [f(x)]
for iters in range(max_iter):
alpha = armijo_line_search_highdim(f, grad_f, x, d)
x_new = x + alpha * d
g_new = grad_f(x_new)
# Beta calculation methods
if method == 'FR':
beta = np.dot(g_new, g_new) / np.dot(g, g)
elif method == 'PRP':
beta = np.dot(g_new, g_new - g) / np.dot(g, g)
elif method == 'PRP+':
beta_prp = np.dot(g_new, g_new - g) / np.dot(g, g)
beta = max(0, beta_prp)
elif method == 'CD':
beta = -np.dot(g_new, g_new) / np.dot(d, g_new)
elif method == 'DY':
beta = np.dot(g_new, g_new) / np.dot(d, g_new - g)
d = -g_new + beta * d
x = x_new
g = g_new
f_vals.append(f(x))
if np.linalg.norm(g) < tol:
break
return x, f_vals, iters + 1 # Adding 1 to account for the initial iteration
# Initialize conditions for high-dimensional case (n = 1000)
n = 1000
x0_highdim = np.array([(i + 1) / n for i in range(n)])
# List of methods to test
methods = ['PRP', 'FR', 'PRP+', 'CD', 'DY']
results_highdim = {}
# Perform optimization using different conjugate gradient methods
for method in methods:
x_final_highdim, f_vals_highdim, iters_highdim = conjugate_gradient_highdim(
convex1_highdim, grad_convex1_highdim, x0_highdim, method=method)
results_highdim[method] = {
'iterations': iters_highdim,
'final_value': f_vals_highdim[-1],
'f_vals': f_vals_highdim
}
# Visualization: plotting the function values against iterations for each method
plt.figure(figsize=(12, 8))
for method, result in results_highdim.items():
plt.plot(result['f_vals'], label=f"{method} ({result['iterations']} iterations)")
plt.yscale('log')
plt.xlabel('Iterations')
plt.ylabel('Function Value (log scale)')
plt.title('Conjugate Gradient Method Performance for Convex1 Function')
plt.legend()
plt.grid(True)
plt.show()
# Return a summary of results to display
summary = {method: {'iterations': result['iterations'], 'final_value': result['final_value']} for method, result in results_highdim.items()}
summary
{'PRP': {'iterations': 10, 'final_value': 1000.0},
'FR': {'iterations': 13, 'final_value': 1000.0},
'PRP+': {'iterations': 6, 'final_value': 1000.0},
'CD': {'iterations': 45, 'final_value': 1000.0000000000002},
'DY': {'iterations': 24, 'final_value': 1000.0}}
# Let's measure and print the runtime for each conjugate gradient method in the high-dimensional case
import time
runtime_results = {}
for method in methods:
start_time = time.time()
_, _, _ = conjugate_gradient_highdim(convex1_highdim, grad_convex1_highdim, x0_highdim, method=method)
end_time = time.time()
runtime_results[method] = end_time - start_time
runtime_results
{'PRP': 0.004979848861694336,
'FR': 0.0010178089141845703,
'PRP+': 0.0020499229431152344,
'CD': 0.0063097476959228516,
'DY': 0.0020499229431152344}
# Let's visualize the runtime of each method in a bar chart
plt.figure(figsize=(10, 6))
plt.bar(runtime_results.keys(), runtime_results.values(), color='dodgerblue')
# Adding the actual runtime above the bars
for method, runtime in runtime_results.items():
plt.text(method, runtime, f"{runtime:.4f}", ha='center', va='bottom')
plt.title('Runtime of Conjugate Gradient Methods (n=1000)')
plt.xlabel('Method')
plt.ylabel('Runtime in seconds')
plt.tight_layout()
plt.show()
# Function to run optimization for different scales and return results
def run_optimization_scales(scales, methods):
scale_results = {}
for n in scales:
x0_scale = np.array([(i + 1) / n for i in range(n)])
method_results = {}
for method in methods:
start_time = time.time()
x_final, f_vals, iters = conjugate_gradient_highdim(
convex1_highdim, grad_convex1_highdim, x0_scale, method=method)
end_time = time.time()
method_results[method] = {
'iterations': iters,
'runtime': end_time - start_time,
'final_value': f_vals[-1]
}
scale_results[n] = method_results
return scale_results
# Problem scales to test
scales = [100, 1000, 10000, 100000]
# Running the optimization for different problem scales
results_scales = run_optimization_scales(scales, methods)
# Visualization: creating subplots for runtimes and iteration counts for each scale
fig, axes = plt.subplots(2, 1, figsize=(14, 12))
# Runtime plot
axes[0].set_title('Runtime Comparison')
axes[0].set_xlabel('Problem Scale (n)')
axes[0].set_ylabel('Runtime in seconds (log scale)')
axes[0].set_xscale('log')
axes[0].set_yscale('log')
# Iteration plot
axes[1].set_title('Iteration Count Comparison')
axes[1].set_xlabel('Problem Scale (n)')
axes[1].set_ylabel('Iteration Count')
axes[1].set_xscale('log')
# Plotting the data
for method in methods:
runtimes = [results_scales[n][method]['runtime'] for n in scales]
iterations = [results_scales[n][method]['iterations'] for n in scales]
axes[0].plot(scales, runtimes, 'o-', label=method)
axes[1].plot(scales, iterations, 's-', label=method)
axes[0].legend()
axes[1].legend()
plt.tight_layout()
plt.show()
# Return a summary of results to display
summary_scales = {n: {method: {'iterations': result['iterations'], 'runtime': result['runtime']}
for method, result in scale_results.items()}
for n, scale_results in results_scales.items()}
summary_scales
{100: {'PRP': {'iterations': 10, 'runtime': 0.0030014514923095703},
'FR': {'iterations': 12, 'runtime': 0.0010068416595458984},
'PRP+': {'iterations': 6, 'runtime': 0.0010068416595458984},
'CD': {'iterations': 41, 'runtime': 0.003999948501586914},
'DY': {'iterations': 22, 'runtime': 0.0010001659393310547}},
1000: {'PRP': {'iterations': 10, 'runtime': 0.00402069091796875},
'FR': {'iterations': 13, 'runtime': 0.0010066032409667969},
'PRP+': {'iterations': 6, 'runtime': 0.0019996166229248047},
'CD': {'iterations': 45, 'runtime': 0.01191568374633789},
'DY': {'iterations': 24, 'runtime': 0.008279085159301758}},
10000: {'PRP': {'iterations': 10, 'runtime': 0.0578000545501709},
'FR': {'iterations': 13, 'runtime': 0.02043914794921875},
'PRP+': {'iterations': 6, 'runtime': 0.04025983810424805},
'CD': {'iterations': 49, 'runtime': 0.09395337104797363},
'DY': {'iterations': 25, 'runtime': 0.041649580001831055}},
100000: {'PRP': {'iterations': 10, 'runtime': 0.5221173763275146},
'FR': {'iterations': 14, 'runtime': 0.2383120059967041},
'PRP+': {'iterations': 6, 'runtime': 0.4488406181335449},
'CD': {'iterations': 1000, 'runtime': 14.281717538833618},
'DY': {'iterations': 27, 'runtime': 0.39960527420043945}}}