代码实现:
import numpy as np
import matplotlib.pyplot as plt
X = np.empty((100,2))
X[:,0] = np.random.uniform(0.,100.,size=100)
X[:,1] = 0.75 * X[:,0] + 3. + np.random.normal(0,10.,size=100)
plt.scatter(X[:,0],X[:,1])
plt.show()
结果:
#%% demean
def demean(X):
return X - np.mean(X,axis=0)
X_demean = demean(X)
plt.scatter(X_demean[:,0],X_demean[:,1])
plt.show()
结果:
np.mean(X_demean[:,0])
3.552713678800501e-16
np.mean(X_demean[:,1])
-5.684341886080802e-16
即,结果近似为 0 .
#%% 梯度上升法
def f(w,X):
return np.sum((X.dot(w)**2)) / len(X)
def df_math(w,X):
return X.T.dot(X.dot(w))*2./len(X)
def df_debug(w,X,epsilon=0.0001):
res = np.empty(len(w))
for i in range(len(w)):
w_1 = w.copy()
w_1[i] += epsilon
w_2 = w.copy()
w_2[i] -= epsilon
res[i] = (f(w_1,X) - f(w_2,X)) / (2 * epsilon)
return res
def direction(w):
return w / np.linalg.norm(w)
def gradient_ascent(df,X,initial_w,eta,n_iters = 1e4,epsilon=1e-8):
w = direction(initial_w)
cur_iter = 0 # 初值设为0
while cur_iter < n_iters:
gradient = df(w,X)
last_w = w
w = w + eta * gradient #'+'上升
w = direction(w) #注意:1.每次求一个单位方向
if(abs(f(w,X) - f(last_w,X)) < epsilon):
break
cur_iter += 1
return w
initial_w = np.random.random(X.shape[1]) #注意:2.不能用0向量开始
initial_w
eta = 0.001
#注意:3.不能使用 StandardScaler 标准化数据
有:
gradient_ascent(df_debug,X_demean,initial_w,eta)
array([0.78728038,0.61659517])
gradient_ascent(df_math,X_demean,initial_w,eta)
array([0.78728038, 0.61659517])
w = gradient_ascent(df_math,X_demean,initial_w,eta)
plt.scatter(X_demean[:,0],X_demean[:,1])
plt.plot([0,w[0]*50],[0,w[1]*50],color='r')
plt.show()
去掉添加的随机噪音:
X2 = np.empty((100,2))
X2[:,0] = np.random.uniform(0.,100.,size=100)
X2[:,1] = 0.75 * X2[:,0] + 3.
plt.scatter(X2[:,0],X2[:,1])
plt.show()
X2_demean = demean(X2)
w2 = gradient_ascent(df_math,X2_demean,initial_w,eta)
plt.scatter(X2_demean[:,0],X2_demean[:,1])
plt.plot([0,w2[0]*50],[0,w2[1]*50],color='r')
plt.show()