import numpy as np
import matplotlib.pyplot as plt
数据集创建
m= 100000
x=np.random.normal(size=m)
X=x.reshape(-1,1)
y=4.*x+3.+np.random.normal(0,3,size=m)
其实效用不大,但不定义,又违背了梯度下降算法的要求
求导
求y值
def J(theta,X_b,y):
try:
return np.sum((y - X_b.dot(theta))**2 )/len(X_b)
except:
return float('inf')
def dj_sgd(theta,X_b_i,y_i):
return X_b_i.T.dot(X_b_i.dot(theta)-y_i )*2.
底层实现
def sgd(X_b,y,initial_theta,n_iters=5):
'''
initial_theta 样本特征比例
n_iters 对于整个样本遍历的轮数
'''
t0=5
t1=50
m=len(X_b)
def learning_rate(t):
return t0 /(t+t1)
theta = initial_theta
for cur_iter in range(n_iters):
indexs = np.random.permutation(m) #随机索引乱序 达到能够全部都能够遍历
X_b_new=X_b[indexs] #用新的索引来进行排序
y_b_new=y[indexs]
for i in range(m):
gradient = dj_sgd(theta,X_b[i],y[i])
theta = theta - learning_rate(cur_iter*m+i) * gradient
return theta
x_b=np.hstack([np.ones((len(X),1)),X])
initial_theta=np.zeros(x_b.shape[1])
theta=sgd(x_b,y,initial_theta)
theta