线性回归
线性回归模型从零实现
导入packages和modules
import torch
from IPython import display
from matplotlib import pyplot as plt
import numpy as np
import random
生成数据集
自己生成一个带噪声的线性数据集,线性关系如下:
p
r
i
c
e
=
w
a
r
e
a
+
w
a
g
e
∗
a
g
e
+
b
price = w_{area}+w_{age}*age+b
price=warea+wage∗age+b
# set input feature number
num_inputs = 2 # 定义有两个特征
# set example number
num_examples = 1000 # 1000个样本
# set true weight and bias in order to generate corresponded label
true_w = [2, -3.4] # 真实参数
true_b = 4.2 # 真实参数
features = torch.randn(num_examples, num_inputs, dtype=torch.float32) # 随机生成特征值 服从标准正态分布 1000组 每组2个特征 返回tensor向量形式
labels = true_w[0] * features[:, 0] + true_w[1] * features[:, 1] + true_b # 利用真实参数计算出真实labels
labels += torch.tensor(np.random.normal(0, 0.01, size=labels.size()), dtype=torch.float32) # 真实labels加入噪声
torch.randn(*size, out=None, dtype=None, layout=torch.strided, device=None, requires_grad=False) → Tensor
返回一个由均值为0、方差为1的正态分布中的随机数组成的向量(tensor)(也称为标准正态分布)。
Example
>>> torch.randn(4)
tensor([-2.1436, 0.9966, 2.3426, -0.6366])
>>> torch.randn(2, 3)
tensor([[ 1.5954, 2.8929, -1.0923],
[ 1.1719, -0.4709, -0.1996]])
np.random.normal(loc=0.0, scale=1.0, size=None)
返回一个均值为loc,方差为scale的服从高斯分布的array数组,array大小为size。
Example
>>> np.random.normal(0, 0.01, (5,2))
array([[-0.00840117, 0.01345385],
[ 0.01409283, -0.00051939],
[-0.00374735, -0.00230632],
[-0.00743133, -0.00061798],
[ 0.00671899, -0.00639399]])
使用图像展示生成的数据
plt.scatter(features[:, 1].numpy(), labels.numpy(), 1)
使用plt生成散点图(matplotlib库),第一个参数为x轴,第二个参数为y轴,第三个参数控制生成散点大小
读取数据集
# 建立一个批量获取样本的generator
def data_iter(batch_size, features, labels):
num_examples = len(features) # 获取样本个数 1000
indices = list(range(num_examples)) # 生成 1- 1000 的列表
random.shuffle(indices) # random read 10 samples shuffle对list进行随机排序 即打乱
for i in range(0, num_examples, batch_size):
# j->LongTensor类型 获取 i - i+10 十个样本 不够十个则取到尾
j = torch.LongTensor(indices[i: min(i + batch_size, num_examples)]) # the last time may be not enough for a whole batch
yield features.index_select(0, j), labels.index_select(0, j) # yield使函数变为generator index_select(dim, index)
# 测试 获取批量为10的样本
batch_size = 10
for X, y in data_iter(batch_size, features, labels):
print(X, '\n', y)
break
yield
简而言之,yield使函数变为generator,每执行到yield则返回一个iterable对象。(详见:https://www.ibm.com/developerworks/cn/opensource/os-cn-python-yield/
)
初始化模型参数
w = torch.tensor(np.random.normal(0, 0.01, (num_inputs, 1)), dtype=torch.float32) # 均差0 方差0.01 高斯分布 随机获得参数w
b = torch.zeros(1, dtype=torch.float32) # b初始化为0
w.requires_grad_(requires_grad=True) # 获取梯度 将requires_grad设置为True 后面反传用
b.requires_grad_(requires_grad=True)
随机初始化权重w和偏置b,requires_grad_(requires_grad=True)令requires_grad=True,标识此参数参与求导,即默认w,b为变量,用于后面backward反传。
定义模型
即建立函数: p r i c e = w a r e a + w a g e ∗ a g e + b price = w_{area}+w_{age}*age+b price=warea+wage∗age+b
def linreg(X, w, b):
return torch.mm(X, w) + b # torch.mm执行矩阵乘法
定义损失函数
使用均方误差损失函数: l ( i ) ( w , b ) = 1 2 ( y ^ ( i ) − y ( i ) ) 2 ) l^{(i)}(\mathbf{w}, b) = \frac{1}{2} \left(\hat{y}^{(i)} - y^{(i)}\right)^2) l(i)(w,b)=21(y^(i)−y(i))2)
def squared_loss(y_hat, y):
return (y_hat - y.view(y_hat.size())) ** 2 / 2 # Tensor.view(*shape)重塑矩阵形状
定义优化函数
使用小批量随机梯度下降,优化函数:
(
w
,
b
)
←
(
w
,
b
)
−
η
∣
B
∣
∑
i
∈
B
∂
(
w
,
b
)
l
(
i
)
(
w
,
b
)
(\mathbf{w},b) \leftarrow (\mathbf{w},b) - \frac{\eta}{|\mathcal{B}|} \sum_{i \in \mathcal{B}} \partial_{(\mathbf{w},b)} l^{(i)}(\mathbf{w},b)
(w,b)←(w,b)−∣B∣η∑i∈B∂(w,b)l(i)(w,b)
def sgd(params, lr, batch_size):
for param in params:
param.data -= lr * param.grad / batch_size # ues .data to operate param without gradient track
训练
# super parameters init
lr = 0.03 # 初始化学习率
num_epochs = 5 # 训练轮数
net = linreg # 网络模型定义为线性模型
loss = squared_loss # 损失函数为均方差损失函数
# training
for epoch in range(num_epochs): # training repeats num_epochs times
# in each epoch, all the samples in dataset will be used once
# X is the feature and y is the label of a batch sample
for X, y in data_iter(batch_size, features, labels):
l = loss(net(X, w, b), y).sum() # l = batch_size个样本点损失总和
# calculate the gradient of batch sample loss
l.backward() # 自动计算l梯度
# using small batch random gradient descent to iter model parameters
sgd([w, b], lr, batch_size)
# reset parameter gradient
w.grad.data.zero_() # backward计算梯度累计存放 不会自动清零 需要手动清零
b.grad.data.zero_()
train_l = loss(net(features, w, b), labels)
print('epoch %d, loss %f' % (epoch + 1, train_l.mean().item()))
tensor.backward(gradient=None, retain_graph=None, create_graph=False)
用于计算当前tensor的梯度,累计保存在.grad中。(详见官方文档https://pytorch.org/docs/stable/autograd.html?highlight=backward#torch.autograd.backward或https://blog.csdn.net/huyaoyu/article/details/81059315)
线性回归模型使用pytorch的简介实现
导入packages和modules
import torch
from torch import nn
import numpy as np
生成数据集
同上
读取数据集
import torch.utils.data as Data
batch_size = 10
# combine featues and labels of dataset
dataset = Data.TensorDataset(features, labels)
# put dataset into DataLoader
data_iter = Data.DataLoader(
dataset=dataset, # torch TensorDataset format
batch_size=batch_size, # mini batch size
shuffle=True, # whether shuffle the data or not
num_workers=2, # read data in multithreading
)
# 测试
for X, y in data_iter:
print(X, '\n', y)
break
在这里使用pytorch的类型加载数据。Data.TensorDataset(features, labels)为一种数据类型对象。
使用DataLoader加载数据,返回iterator,其具体形式如下:
DataLoader(dataset, batch_size=1, shuffle=False, sampler=None,
batch_sampler=None, num_workers=0, collate_fn=None,
pin_memory=False, drop_last=False, timeout=0,
worker_init_fn=None)
详见:https://pytorch.org/docs/stable/data.html?highlight=utils%20data#module-torch.utils.data
定义模型
class LinearNet(nn.Module):
def __init__(self, n_feature):
super(LinearNet, self).__init__() # call father function to init
self.linear = nn.Linear(n_feature, 1) # function prototype: `torch.nn.Linear(in_features, out_features, bias=True)`
def forward(self, x):
y = self.linear(x)
return y
net = LinearNet(num_inputs)
print(net)
建立线性模型继承torch.nn.Module类,其是所有神经网络的基类,详见文档。
torch.nn.Linear(in_features, out_features, bias=True)
对输入的数据进行线性变换:
y
=
x
A
T
+
b
y=xA^T+b
y=xAT+b
in_features 输入数据的大小
out_features 输出数据的大小
bias 如果设置为False,该层则不会学习累积偏置
Examples
>>> m = nn.Linear(20, 30)
>>> input = torch.randn(128, 20)
>>> output = m(input)
>>> print(output.size())
torch.Size([128, 30])
初始化多层网络的方法
# ways to init a multilayer network
# method one
net = nn.Sequential(
nn.Linear(num_inputs, 1)
# other layers can be added here
)
# method two
net = nn.Sequential()
net.add_module('linear', nn.Linear(num_inputs, 1))
# net.add_module ......
# method three
from collections import OrderedDict
net = nn.Sequential(OrderedDict([
('linear', nn.Linear(num_inputs, 1))
# ......
]))
print(net)
print(net[0])
初始化模型参数
from torch.nn import init
init.normal_(net[0].weight, mean=0.0, std=0.01)
init.constant_(net[0].bias, val=0.0) # or you can use `net[0].bias.data.fill_(0)` to modify it directly
torch.nn.init.normal_(tensor, mean=0.0, std=1.0)
使用高斯分布随机初始化参数
tensor – an n-dimensional torch.Tensor
mean – the mean of the normal distribution
std – the standard deviation of the normal distribution
torch.nn.init.constant_(tensor, val)
用val初始化tensor,用以初始化常数bias
tensor – an n-dimensional torch.Tensor
val – the value to fill the tensor with
定义损失函数
使用torch.nn中的损失函数MSE,即均方误差损失
loss = nn.MSELoss() # nn built-in squared loss function
# function prototype: `torch.nn.MSELoss(size_average=None, reduce=None, reduction='mean')`
定义优化函数
同样使用SGD,调用torch.optim的优化函数
import torch.optim as optim
optimizer = optim.SGD(net.parameters(), lr=0.03) # built-in random gradient descent function
print(optimizer) # function prototype: `torch.optim.SGD(params, lr=, momentum=0, dampening=0, weight_decay=0, nesterov=False)`
训练
num_epochs = 3
for epoch in range(1, num_epochs + 1):
for X, y in data_iter:
output = net(X)
l = loss(output, y.view(-1, 1))
optimizer.zero_grad() # reset gradient, equal to net.zero_grad()
l.backward()
optimizer.step()
print('epoch %d, loss: %f' % (epoch, l.item()))