线性模型
# -*- coding: utf-8 -*-
import numpy as np
import matplotlib.pyplot as plt
x_data = [1.0, 2.0, 3.0]
y_data = [2.0, 4.0, 6.0]
def forward(x):
return x * w
def loss(x, y):
y_pred = forward(x)
return (y_pred - y) * (y_pred - y)
# 保存权重
w_list = []
# 保存权重的损失函数值
mse_list = []
# 穷举w值对应的损失函数MSE
for w in np.arange(0.0, 4.1, 0.1):
print('w = ', w)
loss_sum = 0
for x_val, y_val in zip(x_data, y_data):
# 为了打印y预测值,其实loss里也计算了
y_pred_val = forward(x_val)
loss_val = loss(x_val, y_val)
loss_sum += loss_val
print('\t', x_val, y_val,
y_pred_val, loss_val)
print('MSE = ', loss_sum / 3)
print('='*60)
w_list.append(w)
mse_list.append(loss_sum / 3)
# 绘loss变化图,横坐标是w,纵坐标是loss
plt.plot(w_list, mse_list)
plt.ylabel('Loss')
plt.xlabel('w')
plt.show()
=========================================
w = 0.0
1.0 2.0 0.0 4.0
2.0 4.0 0.0 16.0
3.0 6.0 0.0 36.0
MSE = 18.666666666666668
============================================================
w = 0.1
1.0 2.0 0.1 3.61
2.0 4.0 0.2 14.44
3.0 6.0 0.30000000000000004 32.49
MSE = 16.846666666666668
============================================================
w = 0.2
1.0 2.0 0.2 3.24
2.0 4.0 0.4 12.96
3.0 6.0 0.6000000000000001 29.160000000000004
MSE = 15.120000000000003
============================================================
w = 0.30000000000000004
1.0 2.0 0.30000000000000004 2.8899999999999997
2.0 4.0 0.6000000000000001 11.559999999999999
3.0 6.0 0.9000000000000001 26.009999999999998
MSE = 13.486666666666665
============================================================
w = 0.4
1.0 2.0 0.4 2.5600000000000005
2.0 4.0 0.8 10.240000000000002
3.0 6.0 1.2000000000000002 23.04
MSE = 11.946666666666667
============================================================
w = 0.5
1.0 2.0 0.5 2.25
2.0 4.0 1.0 9.0
3.0 6.0 1.5 20.25
MSE = 10.5
============================================================
w = 0.6000000000000001
1.0 2.0 0.6000000000000001 1.9599999999999997
2.0 4.0 1.2000000000000002 7.839999999999999
3.0 6.0 1.8000000000000003 17.639999999999993
MSE = 9.146666666666663
============================================================
w = 0.7000000000000001
1.0 2.0 0.7000000000000001 1.6899999999999995
2.0 4.0 1.4000000000000001 6.759999999999998
3.0 6.0 2.1 15.209999999999999
MSE = 7.886666666666666
============================================================
w = 0.8
1.0 2.0 0.8 1.44
2.0 4.0 1.6 5.76
3.0 6.0 2.4000000000000004 12.959999999999997
MSE = 6.719999999999999
============================================================
w = 0.9
1.0 2.0 0.9 1.2100000000000002
2.0 4.0 1.8 4.840000000000001
3.0 6.0 2.7 10.889999999999999
MSE = 5.646666666666666
============================================================
w = 1.0
1.0 2.0 1.0 1.0
2.0 4.0 2.0 4.0
3.0 6.0 3.0 9.0
MSE = 4.666666666666667
============================================================
w = 1.1
1.0 2.0 1.1 0.8099999999999998
2.0 4.0 2.2 3.2399999999999993
3.0 6.0 3.3000000000000003 7.289999999999998
MSE = 3.779999999999999
============================================================
w = 1.2000000000000002
1.0 2.0 1.2000000000000002 0.6399999999999997
2.0 4.0 2.4000000000000004 2.5599999999999987
3.0 6.0 3.6000000000000005 5.759999999999997
MSE = 2.986666666666665
============================================================
w = 1.3
1.0 2.0 1.3 0.48999999999999994
2.0 4.0 2.6 1.9599999999999997
3.0 6.0 3.9000000000000004 4.409999999999998
MSE = 2.2866666666666657
============================================================
w = 1.4000000000000001
1.0 2.0 1.4000000000000001 0.3599999999999998
2.0 4.0 2.8000000000000003 1.4399999999999993
3.0 6.0 4.2 3.2399999999999993
MSE = 1.6799999999999995
============================================================
w = 1.5
1.0 2.0 1.5 0.25
2.0 4.0 3.0 1.0
3.0 6.0 4.5 2.25
MSE = 1.1666666666666667
============================================================
w = 1.6
1.0 2.0 1.6 0.15999999999999992
2.0 4.0 3.2 0.6399999999999997
3.0 6.0 4.800000000000001 1.4399999999999984
MSE = 0.746666666666666
============================================================
w = 1.7000000000000002
1.0 2.0 1.7000000000000002 0.0899999999999999
2.0 4.0 3.4000000000000004 0.3599999999999996
3.0 6.0 5.1000000000000005 0.809999999999999
MSE = 0.4199999999999995
============================================================
w = 1.8
1.0 2.0 1.8 0.03999999999999998
2.0 4.0 3.6 0.15999999999999992
3.0 6.0 5.4 0.3599999999999996
MSE = 0.1866666666666665
============================================================
w = 1.9000000000000001
1.0 2.0 1.9000000000000001 0.009999999999999974
2.0 4.0 3.8000000000000003 0.0399999999999999
3.0 6.0 5.7 0.0899999999999999
MSE = 0.046666666666666586
============================================================
w = 2.0
1.0 2.0 2.0 0.0
2.0 4.0 4.0 0.0
3.0 6.0 6.0 0.0
MSE = 0.0
============================================================
w = 2.1
1.0 2.0 2.1 0.010000000000000018
2.0 4.0 4.2 0.04000000000000007
3.0 6.0 6.300000000000001 0.09000000000000043
MSE = 0.046666666666666835
============================================================
w = 2.2
1.0 2.0 2.2 0.04000000000000007
2.0 4.0 4.4 0.16000000000000028
3.0 6.0 6.6000000000000005 0.36000000000000065
MSE = 0.18666666666666698
============================================================
w = 2.3000000000000003
1.0 2.0 2.3000000000000003 0.09000000000000016
2.0 4.0 4.6000000000000005 0.36000000000000065
3.0 6.0 6.9 0.8100000000000006
MSE = 0.42000000000000054
============================================================
w = 2.4000000000000004
1.0 2.0 2.4000000000000004 0.16000000000000028
2.0 4.0 4.800000000000001 0.6400000000000011
3.0 6.0 7.200000000000001 1.4400000000000026
MSE = 0.7466666666666679
============================================================
w = 2.5
1.0 2.0 2.5 0.25
2.0 4.0 5.0 1.0
3.0 6.0 7.5 2.25
MSE = 1.1666666666666667
============================================================
w = 2.6
1.0 2.0 2.6 0.3600000000000001
2.0 4.0 5.2 1.4400000000000004
3.0 6.0 7.800000000000001 3.2400000000000024
MSE = 1.6800000000000008
============================================================
w = 2.7
1.0 2.0 2.7 0.49000000000000027
2.0 4.0 5.4 1.960000000000001
3.0 6.0 8.100000000000001 4.410000000000006
MSE = 2.2866666666666693
============================================================
w = 2.8000000000000003
1.0 2.0 2.8000000000000003 0.6400000000000005
2.0 4.0 5.6000000000000005 2.560000000000002
3.0 6.0 8.4 5.760000000000002
MSE = 2.986666666666668
============================================================
w = 2.9000000000000004
1.0 2.0 2.9000000000000004 0.8100000000000006
2.0 4.0 5.800000000000001 3.2400000000000024
3.0 6.0 8.700000000000001 7.290000000000005
MSE = 3.780000000000003
============================================================
w = 3.0
1.0 2.0 3.0 1.0
2.0 4.0 6.0 4.0
3.0 6.0 9.0 9.0
MSE = 4.666666666666667
============================================================
w = 3.1
1.0 2.0 3.1 1.2100000000000002
2.0 4.0 6.2 4.840000000000001
3.0 6.0 9.3 10.890000000000004
MSE = 5.646666666666668
============================================================
w = 3.2
1.0 2.0 3.2 1.4400000000000004
2.0 4.0 6.4 5.760000000000002
3.0 6.0 9.600000000000001 12.96000000000001
MSE = 6.720000000000003
============================================================
w = 3.3000000000000003
1.0 2.0 3.3000000000000003 1.6900000000000006
2.0 4.0 6.6000000000000005 6.7600000000000025
3.0 6.0 9.9 15.210000000000003
MSE = 7.886666666666668
============================================================
w = 3.4000000000000004
1.0 2.0 3.4000000000000004 1.960000000000001
2.0 4.0 6.800000000000001 7.840000000000004
3.0 6.0 10.200000000000001 17.640000000000008
MSE = 9.14666666666667
============================================================
w = 3.5
1.0 2.0 3.5 2.25
2.0 4.0 7.0 9.0
3.0 6.0 10.5 20.25
MSE = 10.5
============================================================
w = 3.6
1.0 2.0 3.6 2.5600000000000005
2.0 4.0 7.2 10.240000000000002
3.0 6.0 10.8 23.040000000000006
MSE = 11.94666666666667
============================================================
w = 3.7
1.0 2.0 3.7 2.8900000000000006
2.0 4.0 7.4 11.560000000000002
3.0 6.0 11.100000000000001 26.010000000000016
MSE = 13.486666666666673
============================================================
w = 3.8000000000000003
1.0 2.0 3.8000000000000003 3.240000000000001
2.0 4.0 7.6000000000000005 12.960000000000004
3.0 6.0 11.4 29.160000000000004
MSE = 15.120000000000005
============================================================
w = 3.9000000000000004
1.0 2.0 3.9000000000000004 3.610000000000001
2.0 4.0 7.800000000000001 14.440000000000005
3.0 6.0 11.700000000000001 32.49000000000001
MSE = 16.84666666666667
============================================================
w = 4.0
1.0 2.0 4.0 4.0
2.0 4.0 8.0 16.0
3.0 6.0 12.0 36.0
MSE = 18.666666666666668
============================================================
自动求导机制
# -*- coding: utf-8 -*-
"""
Created on Fri Oct 15 21:07:32 2021
@author: 86493=0
"""
import torch
# require_grad=True用来追踪计算历史
x = torch.ones(2, 2, requires_grad = True)
print(x)
print('-' * 50)
# 对张量作指数运算
y = x ** 2
print(y)
# y是计算的结果,所以又grad_fn属性
print(y.grad_fn)
print('-' * 50)
z = y * y * 3
out = z.mean() # 计算所有元素的平均值
print("z:", z)
print("out:", out)
print('-' * 50)
# requires_grad默认为False
a = torch.randn(2, 2)
print("初始的a值为:\n", a)
a = ((a * 3) / (a - 1))
print("运算后的a值为:\n", a)
print(a.requires_grad) # 默认为False
a.requires_grad_(True)
print(a.requires_grad)
b = (a * a).sum()
print(b.grad_fn) # b是计算的结果,所有它有grad_fn属性
print('-' * 50)
# ==================================
# 求梯度
out.backward() # out是一个标量
print(x.grad) # 输入导数d(out)/dx
print('-' *50)
# 再来反向传播,注意grad是累加的(加多一次梯度)
# out2.backward()
# print(x.grad)
out3 = x.sum()
# 一般在反向传播前把梯度清零(以防累加)
x.grad.data.zero_()
out3.backward()
print(x.grad)
print('-' *50)
# 雅克比向量积
x = torch.randn(3, requires_grad = True)
print(x)
y = x * 2
i = 0
# norm是求该tensor的L2范数
while y.data.norm() < 1000:
y = y * 2
i = i + 1
print("y:\n", y, '\n')
print("i:", i)
v = torch.tensor([0.1, 1.0, 0.0001],
dtype = torch.float)
y.backward(v)
print("x.grad:\n", x.grad)
# 可以通过将代码块包装在with torch.no_grad()中
# 来阻止autograd跟踪设置了requires_grad=True
print(x.requires_grad)
print((x ** 2).requires_grad)
with torch.no_grad():
print((x ** 2).requires_grad)
print('-' *50)
# 想修改tensor的数值,又不希望被autograd记录
# 即不会影响反向传播,可以对tensor.data操作
x = torch.ones(1, requires_grad = True)
print("x: ", x)
print(x.data) # 还是一个tensor
# 但是已经独立于计算图之外
print(x.data.requires_grad)
y = 2 * x
# 只改变了值,不会记录在计算图,所以不会影响梯度传播
x.data *= 100
y.backward()
# 更改data值也会影响tensor的值
print(x)
print(x.grad)
tensor([[1., 1.],
[1., 1.]], requires_grad=True)
--------------------------------------------------
tensor([[1., 1.],
[1., 1.]], grad_fn=<PowBackward0>)
<PowBackward0 object at 0x000001D74AEFBE50>
--------------------------------------------------
z: tensor([[3., 3.],
[3., 3.]], grad_fn=<MulBackward0>)
out: tensor(3., grad_fn=<MeanBackward0>)
--------------------------------------------------
初始的a值为:
tensor([[-0.5364, -0.5926],
[-0.5702, -0.7497]])
运算后的a值为:
tensor([[1.0474, 1.1163],
[1.0894, 1.2855]])
False
True
<SumBackward0 object at 0x000001D745FEDF70>
--------------------------------------------------
tensor([[3., 3.],
[3., 3.]])
--------------------------------------------------
tensor([[1., 1.],
[1., 1.]])
--------------------------------------------------
tensor([ 0.4216, 0.1233, -0.3729], requires_grad=True)
y:
tensor([ 863.4903, 252.5478, -763.7181], grad_fn=<MulBackward0>)
i: 10
x.grad:
tensor([2.0480e+02, 2.0480e+03, 2.0480e-01])
True
True
False
--------------------------------------------------
x: tensor([1.], requires_grad=True)
tensor([1.])
False
runfile('D:/桌面文件/matrix/code/Torch/grad.py', wdir='D:/桌面文件/matrix/code/Torch')
tensor([[1., 1.],
[1., 1.]], requires_grad=True)
--------------------------------------------------
tensor([[1., 1.],
[1., 1.]], grad_fn=<PowBackward0>)
<PowBackward0 object at 0x000001D74AEFBA30>
--------------------------------------------------
z: tensor([[3., 3.],
[3., 3.]], grad_fn=<MulBackward0>)
out: tensor(3., grad_fn=<MeanBackward0>)
--------------------------------------------------
初始的a值为:
tensor([[ 0.1064, -1.0084],
[-0.2516, -0.4749]])
运算后的a值为:
tensor([[-0.3570, 1.5063],
[ 0.6030, 0.9660]])
False
True
<SumBackward0 object at 0x000001D745593FD0>
--------------------------------------------------
tensor([[3., 3.],
[3., 3.]])
--------------------------------------------------
tensor([[1., 1.],
[1., 1.]])
--------------------------------------------------
tensor([-0.8706, -1.1828, -0.8192], requires_grad=True)
y:
tensor([ -891.5447, -1211.1826, -838.8481], grad_fn=<MulBackward0>)
i: 9
x.grad:
tensor([1.0240e+02, 1.0240e+03, 1.0240e-01])
True
True
False
--------------------------------------------------
x: tensor([1.], requires_grad=True)
tensor([1.])
False
tensor([100.], requires_grad=True)
tensor([2.])
梯度下降
损失函数:
c
o
s
t
=
1
N
∑
n
=
1
N
(
y
n
^
−
y
n
)
2
cost=\frac{1}{N}\sum_{n=1}^{N}(\hat{y_n}-y_n)^2
cost=N1∑n=1N(yn^−yn)2
w = w − α 1 N ∑ n = 1 n 2 X n ( x n w − y n ) w=w-α\frac{1}{N}\sum_{n=1}^{n}2X_n(x_nw-y_n) w=w−αN1∑n=1n2Xn(xnw−yn)
绘制loss图43
# -*- coding: utf-8 -*-
"""
Created on Sun Oct 17 14:42:34 2021
@author: 86493
"""
import numpy as np
import matplotlib.pyplot as plt
x_data = [1.0, 2.0, 3.0]
y_data = [2.0, 4.0, 6.0]
costlst = []
w = 1.0
# 向前传播
def forward(x):
return x * w
# 损失函数
def cost(allx, ally):
cost = 0
for x, y in zip(allx, ally):
y_predict = forward(x)
cost += (y_predict - y) ** 2
return cost / len(allx)
# 求梯度
def gradient(allx, ally):
grad = 0
for x, y in zip(allx, ally):
# 向前传播
temp = forward(x)
# 求梯度
grad += 2 * x *(temp - y)
return grad / len(allx)
# train
for epoch in range(100):
# 求损失值
cost_val = cost(x_data, y_data)
costlst.append(cost_val)
# 求梯度值
grad_val = gradient(x_data, y_data)
# 更新参数w
w -= 0.01 *grad_val
print("Epoch: ", epoch, "w = ", w, "loss = ", cost_val)
print("Predict(after training)", 4, forward(4))
# 绘图
plt.plot(range(100), costlst)
plt.ylabel("Cost")
plt.xlabel("Epoch")
plt.show()
随机梯度下降SGD
SGD的优点:可能跨越鞍点。
SGD:根据每一个样本的梯度来进行更新。而以前是根据全部样本的梯度均值进行更新权重。
# -*- coding: utf-8 -*-
"""
Created on Sun Oct 17 15:24:05 2021
@author: 86493
"""
import numpy as np
import matplotlib.pyplot as plt
x_data = [1.0, 2.0, 3.0]
y_data = [2.0, 4.0, 6.0]
lostlst = []
w = 1.0
# 向前传播
def forward(x):
return x * w
# 损失函数
def cost(allx, ally):
cost = 0
for x, y in zip(allx, ally):
y_predict = forward(x)
cost += (y_predict - y) ** 2
return cost / len(allx)
# 求单个loss
def loss(x, y):
y_predict = forward(x)
return (y_predict - y) ** 2
"""
# 求梯度
def gradient(allx, ally):
grad = 0
for x, y in zip(allx, ally):
# 向前传播
temp = forward(x)
# 求梯度
grad += 2 * x *(temp - y)
return grad / len(allx)
"""
# 求梯度
def gradient(x, y):
return 2 * x * (x * w - y)
"""
# train
for epoch in range(100):
# 求损失值
cost_val = cost(x_data, y_data)
costlst.append(cost_val)
# 求梯度值
grad_val = gradient(x_data, y_data)
# 更新参数w
w -= 0.01 *grad_val
print("Epoch: ", epoch, "w = ", w, "loss = ", cost_val)
print("Predict(after training)", 4, forward(4))
"""
# SGD随机梯度下降
for epoch in range(100):
for x, y in zip(x_data, y_data):
# 对每一个样本来求梯度,然后就进行更新
grad = gradient(x, y)
w -= 0.01 * grad
print("\tgrad: ", x, y, grad)
l = loss(x, y)
# print("l = ", l)
print("progress: ", epoch, "w = ", w, "loss = ", l)
print("Predict(after training)", 4, forward(4))
Epoch: 0 w = 1.0933333333333333 loss = 4.666666666666667
Epoch: 1 w = 1.1779555555555554 loss = 3.8362074074074086
Epoch: 2 w = 1.2546797037037036 loss = 3.1535329869958857
Epoch: 3 w = 1.3242429313580246 loss = 2.592344272332262
Epoch: 4 w = 1.3873135910979424 loss = 2.1310222071581117
Epoch: 5 w = 1.4444976559288012 loss = 1.7517949663820642
Epoch: 6 w = 1.4963445413754464 loss = 1.440053319920117
........................
Epoch: 93 w = 1.9998999817997325 loss = 5.678969725349543e-08
Epoch: 94 w = 1.9999093168317574 loss = 4.66836551287917e-08
Epoch: 95 w = 1.9999177805941268 loss = 3.8376039345125727e-08
Epoch: 96 w = 1.9999254544053418 loss = 3.154680994333735e-08
Epoch: 97 w = 1.9999324119941766 loss = 2.593287985380858e-08
Epoch: 98 w = 1.9999387202080534 loss = 2.131797981222471e-08
Epoch: 99 w = 1.9999444396553017 loss = 1.752432687141379e-08
Predict(after training) 4 7.999777758621207
正向传递
反向传播
线性模型的计算图
# -*- coding: utf-8 -*-
"""
Created on Sun Oct 17 19:39:32 2021
@author: 86493
"""
import torch
x_data = [1.0, 2.0, 3.0]
y_data = [2.0, 4.0, 6.0]
w = torch.Tensor([1.0])
w.requires_grad = True
# 向前传递
def forward(x):
return x * w
# 这里使用SGD
def loss(x, y):
y_pred = forward(x)
return (y_pred - y) ** 2
print("predict (before training)", 4,
forward(4).item())
# 训练过程,SGD
for epoch in range(100):
for x, y in zip(x_data, y_data):
# 向前传播,计算loss
l = loss(x, y)
# 计算requires_grad为true的tensor的梯度
l.backward()
print('\tgrad:', x, y, w.grad.item())
w.data = w.data - 0.01 * w.grad.data
# 反向传播后grad会被重复计算,所以记得清零梯度
w.grad.data.zero_()
print("progress:", epoch, l.item())
print("predict (after training)", 4,
forward(4).item())
注意:
(1)loss实际在构建计算图,每次运行完后计算图就释放了。
(2)Tensor的Grad也是一个Tensor。更新权重
w.data = w.data - 0.01 * w.grad.data
的0.01乘那坨其实是在建立计算图,而我们这里要乘0.01 * grad.data,这样是不会建立计算图的(并不希望修改权重w,后面还有求梯度)。
(3)w.grad.item()
是直接把w.grad的数值取出,变成一个标量(也是为了防止产生计算图)。总之,牢记权重更新过程中要使用data。
(4)如果不像上面计算一个样本的loss,想算所有样本的loss(cost),
然后就加上sum += l
,注意此时sum是关于张量
l
l
l 的一个计算图,又未对sum做backward操作,随着$l$
越加越多会导致内存爆炸。
正确做法:sum += l.item(),别把损失直接加到sum里面。
Tensor在做加法运算时会构建计算图
5)backward后的梯度一定要记得清零w.grad.data.zero()
。
(6)训练过程:先计算loss损失值,然后backward反向传播,现在就有了梯度了。通过梯度下降更新参数:
1.self.linear()
是一个可调用对象(callable),类似下图有__call__成员函数。
2.只要是要调用计算图,都需要继承module类。
3.过程:求y;求loss;求backward;更新。
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
# x和y数据必须是矩阵,所以如[1.0]
x_data = torch.Tensor([[1.0], [2.0], [3.0]])
y_data = torch.Tensor([[2.0], [4.0], [6.0]])
losslst = []
class LinearModel(nn.Module):
def __init__(self):
super(LinearModel, self).__init__()
# 实例化一个linear对象
self.linear = nn.Linear(1, 1)
def forward(self, x):
# 可调用的对象,pythonic
y_pred = self.linear(x)
return y_pred
model = LinearModel()
# 这里的MSE不除以N
# criterion = torch.nn.MSELoss(size_average=False)
criterion = torch.nn.MSELoss(reduction = 'sum')
optimizer = torch.optim.SGD(model.parameters(), lr = 0.01)
#model.parameters()为该实例中可优化的参数,lr为参数优化的选项(学习率等)
# 训练
for epoch in range(100):
y_pred = model(x_data)
loss = criterion(y_pred, y_data)
# 打印loss对象会自动调用__str__(),不会产生计算图
print(epoch, loss.item())
losslst.append(loss.item())
optimizer.zero_grad()
# 梯度归零后反向传播
loss.backward()
optimizer.step()
# 画图
plt.plot(range(100), losslst)
plt.ylabel('Loss')
plt.xlabel('epoch')
plt.show()
# 输出weight和bias
# 不用item也行,但就是矩阵[[]]
print('w = ', model.linear.weight.item())
print('b = ', model.linear.bias.item())
print('-' *60)
# Test model
# 输入是1×1矩阵,输出也是1×1矩阵
x_test = torch.Tensor([[4.0]])
y_test = model(x_test)
print('y_pred = ', y_test.data)
>>> m = nn.Linear(20, 30)
>>> input = torch.randn(128, 20)
>>> output = m(input)
>>> print(output.size())
torch.Size([128, 30])