最优化实验
TIKHONOV 正则化模型用于图片去噪
import numpy as np
import matplotlib.pyplot as plt
from scipy.sparse import diags, eye, kron
from scipy.sparse.linalg import spsolve
from skimage import io, util, metrics, color
image_path = "C:\\Users\Lenovo\Desktop\_20240517100615.jpg"
image = util.img_as_float(io.imread(image_path))
image_gray = color.rgb2gray(image)
noise_level = 0.1
noisy_image = util.random_noise(image_gray, mode='gaussian', var=noise_level ** 2)
def tikhonov_denoising(image, alpha):
m, n = image.shape
dx = diags([1, -1], [0, 1], shape=(n, n))
dy = diags([1, -1], [0, 1], shape=(m, m))
D = kron(dx.T.dot(dx), eye(m)) + kron(eye(n), dy.T.dot(dy))
f = image.flatten()
ATA = D + alpha * eye(m * n)
ATb = D.dot(f)
solution = spsolve(ATA, ATb)
return solution.reshape(m, n)
lambda_values = [0.001, 0.01, 0.1,0.5,1]
psnr_values = []
denoised_images = []
for alpha in lambda_values:
denoised_image = tikhonov_denoising(noisy_image, alpha)
psnr = metrics.peak_signal_noise_ratio(image_gray, denoised_image)
denoised_images.append(denoised_image)
fig, axes = plt.subplots( 1 , 1+len(lambda_values), figsize=(10, 5))
axes[0].imshow(image_gray, cmap='gray')
axes[0].set_title('Original Image')
axes[0].axis('off')
for i, (alpha, denoised_image, psnr) in enumerate(zip(lambda_values, denoised_images, psnr_values)):
axes[i + 1].imshow(denoised_image, cmap='gray')
axes[i + 1].set_title(f'λ: {alpha}\np: {psnr:.2f} dB')
axes[i + 1].axis('off')
plt.tight_layout()
plt.show()
利用 L-BFGS 方法求解逻辑回归问题
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import minimize
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, roc_curve, auc
def sigmoid(z):
return 1 / (1 + np.exp(-z))
def cost_function(theta, X, y):
m = len(y)
h = sigmoid(np.dot(X, theta))
epsilon = 1e-5
cost = -(1/m) * (np.dot(y, np.log(h + epsilon)) + np.dot((1 - y), np.log(1 - h + epsilon)))
return cost
def gradient_function(theta, X, y):
m = len(y)
h = sigmoid(np.dot(X, theta))
gradient = (1/m) * np.dot(X.T, (h - y))
return gradient
X, y = make_classification(n_samples=1000, n_features=20, n_classes=2, random_state=1)
X = np.hstack((np.ones((X.shape[0], 1)), X))
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)
initial_theta = np.zeros(X_train.shape[1])
result = minimize(fun=cost_function, x0=initial_theta, args=(X_train, y_train), method='L-BFGS-B', jac=gradient_function)
optimized_theta = result.x
y_train_pred = sigmoid(np.dot(X_train, optimized_theta)) >= 0.5
y_test_pred = sigmoid(np.dot(X_test, optimized_theta)) >= 0.5
train_accuracy = accuracy_score(y_train, y_train_pred)
test_accuracy = accuracy_score(y_test, y_test_pred)
print(f'Train Accuracy: {train_accuracy * 100:.2f}%')
print(f'Test Accuracy: {test_accuracy * 100:.2f}%')
fig, ax = plt.subplots(1, 2, figsize=(12, 6))
training_costs = []
for i in range(1, 1001):
res = minimize(fun=cost_function, x0=initial_theta, args=(X_train[:i], y_train[:i]), method='L-BFGS-B', jac=gradient_function)
training_costs.append(cost_function(res.x, X_train[:i], y_train[:i]))
ax[0].plot(range(1, 1001), training_costs)
ax[0].set_title('Learning Curve')
ax[0].set_xlabel('Number of training examples')
ax[0].set_ylabel('Cost')
y_test_proba = sigmoid(np.dot(X_test, optimized_theta))
fpr, tpr, _ = roc_curve(y_test, y_test_proba)
roc_auc = auc(fpr, tpr)
ax[1].plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (area = {roc_auc:.2f})')
ax[1].set_xlim([0.0, 1.0])
ax[1].set_ylim([0.0, 1.05])
ax[1].set_xlabel('False Positive Rate')
ax[1].set_ylabel('True Positive Rate')
ax[1].set_title('Characteristic')
ax[1].legend(loc="lower right")
plt.tight_layout()
plt.show()
罚函数法解基追踪问题
import numpy as np
import matplotlib.pyplot as plt
def normalize(A):
return (A - np.mean(A, axis=0)) / np.std(A, axis=0)
def objective_function(A, x, b, mu):
return np.linalg.norm(x, 1) + (mu / 2) * np.linalg.norm(A @ x - b)**2
def gradient(A, x, b, mu):
return np.sign(x) + mu * A.T @ (A @ x - b)
def basis_pursuit(A, b, mu, learning_rate=0.001, iterations=5000):
x = np.zeros(A.shape[1])
for i in range(iterations):
grad = gradient(A, x, b, mu)
x = x - learning_rate * grad
x = np.clip(x, -1e10, 1e10)
return x
np.random.seed(42)
A = np.random.randn(100, 200)
A = normalize(A)
x_true = np.zeros(200)
indices = np.random.choice(np.arange(200), 20, replace=False)
x_true[indices] = np.random.randn(20)
b = A @ x_true
mu = 1.0
x_estimated = basis_pursuit(A, b, mu, learning_rate=0.001, iterations=5000)
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.stem(x_true, use_line_collection=True)
plt.title('Sparse Coefficients')
plt.subplot(1, 2, 2)
plt.stem(x_estimated, use_line_collection=True)
plt.title('Estimated Sparse Coefficients')
plt.tight_layout()
plt.show()
利用增广拉格朗日函数法解基追踪问题
import numpy as np
import matplotlib.pyplot as plt
def normalize(A):
return (A - np.mean(A, axis=0)) / np.std(A, axis=0)
def augmented_lagrangian(A, x, b, lambda_, mu):
return np.linalg.norm(x, 1) + lambda_.T @ (A @ x - b) + (mu / 2) * np.linalg.norm(A @ x - b)**2
def gradient(A, x, b, lambda_, mu):
return np.sign(x) + A.T @ lambda_ + mu * A.T @ (A @ x - b)
def basis_pursuit_alm(A, b, mu, learning_rate=0.001, iterations=5000):
m, n = A.shape
x = np.zeros(n)
lambda_ = np.zeros(m)
for i in range(iterations):
grad = gradient(A, x, b, lambda_, mu)
x = x - learning_rate * grad
x = np.clip(x, -1e10, 1e10)
lambda_ = lambda_ + mu * (A @ x - b)
return x
np.random.seed(42)
A = np.random.randn(100, 200)
A = normalize(A)
x_true = np.zeros(200)
indices = np.random.choice(np.arange(200), 10, replace=False)
x_true[indices] = np.random.randn(10)
b = A @ x_true
mu = 1.0
x_estimated = basis_pursuit_alm(A, b, mu, learning_rate=0.001, iterations=5000)
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.stem(x_true, use_line_collection=True)
plt.title('True Sparse Coefficients')
plt.subplot(1, 2, 2)
plt.stem(x_estimated, use_line_collection=True)
plt.title('Estimated Sparse Coefficients using ALM')
plt.tight_layout()
plt.show()
近似点梯度法和NESTEROV加速算法求解LASSO问题
import numpy as np
import matplotlib.pyplot as plt
np.random.seed(42)
n_samples, n_features = 100, 20
X = np.random.randn(n_samples, n_features)
true_beta = np.zeros(n_features)
true_beta[:5] = np.random.randn(5)
y = X @ true_beta + 0.01 * np.random.randn(n_samples)
def lasso_loss(beta, X, y, lam):
residual = X @ beta - y
return 0.5 / X.shape[0] * np.sum(residual**2) + lam * np.sum(np.abs(beta))
def lasso_gradient(beta, X, y, lam):
residual = X @ beta - y
grad = 1.0 / X.shape[0] * (X.T @ residual) + lam * np.sign(beta)
return grad
def proximal_gradient_descent(X, y, lam, lr=0.01, num_iters=1000):
beta = np.zeros(X.shape[1])
for i in range(num_iters):
grad = lasso_gradient(beta, X, y, lam)
beta = beta - lr * grad
beta = np.sign(beta) * np.maximum(0, np.abs(beta) - lr * lam)
return beta
def nesterov_accelerated_gradient(X, y, lam, lr=0.01, num_iters=1000):
beta = np.zeros(X.shape[1])
beta_prev = beta.copy()
for i in range(num_iters):
grad = lasso_gradient(beta + (i / (i + 3)) * (beta - beta_prev), X, y, lam)
beta_new = beta - lr * grad
beta_new = np.sign(beta_new) * np.maximum(0, np.abs(beta_new) - lr * lam)
beta_prev = beta
beta = beta_new
return beta
lam = 0.4
beta_pg = proximal_gradient_descent(X, y, lam)
beta_nag = nesterov_accelerated_gradient(X, y, lam)
plt.figure(figsize=(12, 6))
plt.plot(true_beta, 'o-', label='True Beta')
plt.plot(beta_pg, 's-', label='Proximal Gradient')
plt.plot(beta_nag, 'd-', label='Nesterov Accelerated Gradient')
plt.xlabel('Coefficient Index')
plt.ylabel('Coefficient Value')
plt.legend()
plt.title('Comparison of LASSO Solutions')
plt.show()
print("True Beta:\n", true_beta)
print("Proximal Gradient Beta:\n", beta_pg)
print("Nesterov Accelerated Gradient Beta:\n", beta_nag)
近似点算法解LASSO问题
import numpy as np
import matplotlib.pyplot as plt
np.random.seed(42)
n_samples, n_features = 100, 20
X = np.random.randn(n_samples, n_features)
true_beta = np.zeros(n_features)
true_beta[:5] = np.random.randn(5)
y = X @ true_beta + 0.01 * np.random.randn(n_samples)
def lasso_loss(beta, X, y, lam):
residual = X @ beta - y
return 0.5 / X.shape[0] * np.sum(residual ** 2) + lam * np.sum(np.abs(beta))
def lasso_gradient(beta, X, y, lam):
residual = X @ beta - y
grad = 1.0 / X.shape[0] * (X.T @ residual) + lam * np.sign(beta)
return grad
def proximal_point_algorithm(X, y, lam, lr=0.01, num_iters=1000, tol=1e-6, method='coordinate_descent'):
beta = np.zeros(X.shape[1])
for i in range(num_iters):
beta_old = beta.copy()
if method == 'coordinate_descent':
for j in range(len(beta)):
grad = lasso_gradient(beta, X, y, lam)
beta[j] -= lr * grad[j]
beta[j] = np.sign(beta[j]) * max(0, np.abs(beta[j]) - lr * lam)
elif method == 'random_gradient':
j = np.random.randint(len(beta))
grad = lasso_gradient(beta, X, y, lam)
beta[j] -= lr * grad[j]
beta[j] = np.sign(beta[j]) * max(0, np.abs(beta[j]) - lr * lam)
else:
raise ValueError("Unknown method: {}".format(method))
if np.linalg.norm(beta - beta_old) < tol:
print("Convergence reached at iteration {}".format(i))
break
return beta
lam = 0.1
beta_cd = proximal_point_algorithm(X, y, lam, method='coordinate_descent')
beta_rg = proximal_point_algorithm(X, y, lam, method='random_gradient')
plt.figure(figsize=(12, 6))
plt.plot(true_beta, 'o-', label='True Beta')
plt.plot(beta_cd, 's-', label='Coordinate Descent')
plt.plot(beta_rg, 'd-', label='Random Gradient')
plt.xlabel('Coefficient Index')
plt.ylabel('Coefficient Value')
plt.legend()
plt.title('Comparison of LASSO Solutions')
plt.show()
print("True Beta:\n", true_beta)
print("Coordinate Descent Beta:\n", beta_cd)
print("Random Gradient Beta:\n", beta_rg)