一、解析式numpy实现
由中心极限定理,假设误差IID均值为0,方差为某定值的高斯分布。
似然函数:
Python实现:
import numpy as np
def fit(X,y,lamda):
X0 = np.ones([X.shape[0],1])
X = np.concatenate((X0,X),axis=1)
Z = np.dot(X.T,X)
Z = Z + lamda*np.eye(Z.shape[0])
theta = np.dot(np.dot(Z.I,X.T),y)
return theta
def predict(X_test,theta):
return np.dot(X_test,theta)
二、迭代式python实现
mini-batch梯度下降算法
mini-batch梯度下降python实现:
import numpy as np
import math
def batch_generator(X_train,y_train,batch_size=64,shuffle=True):
n = X_train.shape[0]
batches = math.floor(n/batch_size) #下取整
batch_count = 0
if shuffle:
permutation = list(np.random.permutation(n))#返回一个长度为n的随机数组,且里面的数是0到n-1
shuffled_X = X_train[permutation,:] #将每一列的数据按permutation的顺序来重新排列。
shuffled_Y = y_train[permutation]
while True:
if batch_count < batches:
start = batch_count*batch_size
end = start + batch_size
batch_count += 1
else:
batch_count = 0
yield shuffled_X[start:end],shuffled_Y[start:end]
测试:
x = np.random.random(size=[53, 2])
y = np.random.randint(2, size=[53,1])
batch_size = 5
batch_gen = batch_generator(x, y, batch_size)
for i in range(5):
batch_x, batch_y = next(batch_gen)
print(batch_x, batch_y)
测试结果:
[[0.32710853 0.67869134]
[0.61695603 0.86193659]
[0.48160635 0.98581035]
[0.29885042 0.20402179]
[0.08079625 0.69353351]]
[[1]
[0]
[1]
[1]
[1]]
[[0.73961139 0.07881719]
[0.18701115 0.3359902 ]
[0.51912644 0.55833118]
[0.9169518 0.16333718]
[0.3028686 0.85041637]]
[[0]
[0]
[1]
[0]
[0]]
[[0.58380997 0.94092658]
[0.52170738 0.10782448]
[0.81749353 0.58407863]
[0.28071959 0.7554074 ]
[0.47579202 0.21961333]]
[[0]
[0]
[1]
[1]
[0]]
[[0.17388744 0.24528241]
[0.8260477 0.36172547]
[0.79123657 0.43831317]
[0.2020071 0.09857524]
[0.0033098 0.90089072]]
[[0]
[1]
[0]
[1]
[1]]
[[0.86563554 0.17681578]
[0.65463518 0.78269928]
[0.15594294 0.93580118]
[0.98586356 0.24725982]
[0.88887412 0.58030348]]
[[0]
[1]
[1]
[1]
[0]]
迭代式Python实现
import numpy as np
import math
from mini_batch import batch_generator
def linear_regression(X_train,y_train,batch_size=64,lamda=0.1,learning_rate=10**-3,iter_num=10**5,shuffle=True):
X0 = np.ones([X_train.shape[0],1])
X = np.concatenate((X0,X_train),axis=1) #将weight和bias合并处理
n = X.shape[0]
m = X.shape[1]
batches = math.floor(n/batch_size)
weight = np.ones((m,1))
mini_loss = math.inf
stop_num = 0
best_weight = np.zeros((m,1))
batch_gen = batch_generator(X,y_train,batch_size=batch_size,shuffle=shuffle)
for i in range(iter_num):
batch_x, batch_y = next(batch_gen)
y_hat = np.dot(batch_x,weight)
loss = 0.5*np.sum(np.square(batch_y-y_hat))+ lamda*np.sum(np.square(weight)) #L2正则化损失
if loss < mini_loss:
weight = weight + learning_rate*np.dot(batch_x.T,batch_y-y_hat) #更新参数
best_weight=weight #记录当前最优参数
mini_loss = loss #记录当前最小loss
stop_num = 0
else:
weight = weight + learning_rate*np.dot(batch_x.T,batch_y-y_hat)
stop_num += 1
if i%50 ==0:
print(mini_loss)
if stop_num >= 50: #连续50次loss没有减小,停止迭代
break
return best_weight
测试:
data_x=np.random.normal(0,10,[50,3])
Weights=np.array([[3,4,6]])
data_y=np.dot(data_x,Weights.T)+5
weight=linear_regression(data_x,data_y,batch_size=5,lamda=0.01)
print(weight)
测试结果:
9669.957861677509
22.876280215462273
13.429779858875358
8.646139075575832
5.418334519025948
3.551904684508223
2.4997854812640687
1.8215981889167516
1.441757054499888
1.1969684352959187
1.0613701617506808
0.9740169948553807
0.9261835202082394
0.894706580051378
0.8790383877029638
0.8693689014440267
0.8638901642825122
0.8613980612674257
0.8598047302877919
0.8592169923734799
0.8589452129883517
0.858920147112154
[[4.9789343 ]
[3.00044599]
[4.00017073]
[5.99940012]]
可以看出loss值不断减小,参数最终收敛到输入值,如果去掉L2正则化项最终学习的参数将与输入值完全相同。
线性回归的学习就到这里,不足之处欢迎指正,下期逻辑回归。