线性回归和正则化，最后实现不用sklearn的Softmax分类

瞳恩Dawn

已于 2022-09-07 17:50:45 修改

阅读量670

点赞数

文章标签：线性回归分类 python

于 2022-09-07 17:46:27 首次发布

本文链接：https://blog.csdn.net/weixin_44391984/article/details/126750751

版权

linear regression

生成一些数据

import numpy as np

import matplotlib.pyplot as plt

# np.random.rand(100, 1)
# Create an array of the given shape and populate it with
# random samples from a uniform distribution
# over ``[0, 1)``.
X = 2*np.random.rand(100, 1)
y = 4 + 3 * X + np.random.randn(100, 1)

X[0:5]    # 注意数据的大小，这样才是100*1

array([[1.18171039],
       [1.62333209],
       [0.59230184],
       [1.03828925],
       [1.3602664 ]])

y[0:5]

array([[ 6.45485124],
       [11.0588182 ],
       [ 5.36127661],
       [ 6.61603683],
       [ 7.76445136]])

$\theta^{hat} = (X^TX)^{-1}X^Ty$
$使用 MSE 进行梯度下降$

# np.c_ 竖着拼在一起
X_b = np.c_[np.ones((100, 1)), X]    # 把x0加到X里，以完成最后截距的predict
theta_best = np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y) # np.linalg.inv() 求拟

X_b[0:5]

array([[1.        , 1.18171039],
       [1.        , 1.62333209],
       [1.        , 0.59230184],
       [1.        , 1.03828925],
       [1.        , 1.3602664 ]])

theta_best # 生成带有噪音的数据，但期待可以得到的是   y = 4 + 3X

array([[3.9255906 ],
       [3.03536388]])

用 $\theta^{hat}$ 做出预测

X_new = np.array([[0], [2]])
X_new_b = np.c_[np.ones((2, 1)), X_new]    # 这个是中括号
y_predict = X_new_b.dot(theta_best)
y_predict

array([[3.9255906 ],
       [9.99631837]])

plt.figure(figsize = (8, 6))
plt.plot(X, y, 'b.')
plt.plot(X_new, y_predict, 'r-')
# 看起来像分别设定x和y轴？ 是这样的 xmin, xmax, ymin, ymax = axis([xmin, xmax, ymin, ymax])
plt.axis([0, 2, 0, 15])
plt.show()

在这里插入图片描述

使用sklearn进行线性回归

from sklearn.linear_model import LinearRegression # 使用SVD计算的

lin_reg = LinearRegression()
lin_reg.fit(X, y)    # 注意这里用的是X不是X_b，说明这个会自动算截距的，不需要自己加
# 打印一下截距b和权重W
print(lin_reg.intercept_, lin_reg.coef_)
# 预测
ans = lin_reg.predict(X_new)
ans

[3.9255906] [[3.03536388]]





array([[3.9255906 ],
       [9.99631837]])

使用梯度下降

# 批量梯度下降
lr = 0.1
n_iterations = 1000
m = 100
theta = np.random.randn(2, 1)
for interation in range(n_iterations):
    gradients = 2/m * X_b.T.dot(X_b.dot(theta) - y)
    theta = theta - lr * gradients
theta

array([[3.9255906 ],
       [3.03536388]])

# np.random.randint
# randint(low, high=None, size=None, dtype=int)

# Return random integers from `low` (inclusive) to `high` (exclusive).

# Return random integers from the "discrete uniform" distribution of
# the specified dtype in the "half-open" interval [`low`, `high`). If
# `high` is None (the default), then results are from [0, `low`).
print(np.random.randint(6)) # [0,6)
print(np.random.randint(low = 4, high = 11)) # [4, 10)

4
7

n_epochs = 50
t0, t1 = 5, 50
def learning_schedule(t):
    return t0/(t + t1)
theta = np.random.randn(2, 1)

# X_b
# array([[1.        , 1.82998312],
#        [1.        , 0.63093605],
#        [1.        , 0.64826421],
#        [1.        , 0.19033492],
#        [1.        , 1.97900686]])
print(len(X_b))
print(X_b[0:1].shape)
print(X_b[0].shape)    # 为什么是[random_index:random_index + 1] 而不直接random_index?
print(X_b[0].reshape(1, 2).shape)
print(y[0:1].shape)

100
(1, 2)
(2,)
(1, 2)
(1, 1)

for epoch in range(n_epochs):
    for i in range(m):         # 上面设置m=100，我不知道是干什么用的 猜想是极端的随机梯度下降然后把batch变成了1
        random_index = np.random.randint(m)   # 随机选择[0, m)    从100了里随机选100次
        xi = X_b[random_index:random_index + 1] # 为什么是[random_index:random_index + 1]而不直接random_index?
        yi = y[random_index : random_index + 1] # 答案看上面
        gradients = 2 * xi.T.dot(xi.dot(theta) - yi)          # 所以是只进行了50次epoch
        lr = learning_schedule(epoch * m + i)                 # 随着批次增加和epoch增加，lr减小
        theta = theta - lr * gradients
theta

array([[3.9128593 ],
       [3.01191738]])

用sklearn实现使用SGD的线性回归

# np.ndarray.ravel??
# Return a flattened array.
y.ravel().shape       # 太奇怪了，为啥要这样

(100,)

from sklearn.linear_model import SGDRegressor
# 最大epoch=1000，早停在两个mse之差0.001，penalty还不会（貌似是不正则化的意思），最开始的lr在0.1
sgd_reg = SGDRegressor(max_iter=1000, tol = 1e-3, penalty=None, eta0 = 0.1)
sgd_reg.fit(X_b, y.ravel()) # 如果没有ravel就会A column-vector y was passed when a 1d array was expected.

SGDRegressor(eta0=0.1, penalty=None)

sgd_reg.intercept_, sgd_reg.coef_

(array([1.92913098]), array([1.92913098, 2.94632646]))

多项式回归

m = 100
X = 6 * np.random.randn(m, 1) - 3
y = 0.5 * X**2 + X + 2 + np.random.randn(m, 1)

# 发现单独取出一行就变成了没有维度的向量
print(X.shape)
print(X[0].shape, X[0])
print(y[0].shape, y[

最低0.47元/天解锁文章