简单介绍
本文内容为通过一定手段,判断模型是否处于欠拟合或者过拟合.主要以线性回归为依托,完成该次实验.
值得注意的是,当代码量增多时,尽量将代码模块化,分模块完成,并且每完成一个模块,都需要认真的进行模块检验.如果在模块组装完成后再进行调试,将会提高调试的难度.
源代码以及题目等文件见博客
线性回归
导入必要的包并读取数据
import torch
import torch.nn as nn
import torch.utils.data as Data
from scipy.io import loadmat
# define the root path
root_path = '../machine-learning-ex5/ex5/'
# define the data path
data_path = root_path + 'ex5data1.mat'
# read file
train_data_file = loadmat(data_path)
# show the file
train_data_file
数据预处理
# preprocess data, turn array to tensor
train_features = torch.tensor(train_data_file['X'],dtype=torch.float)
test_features = torch.tensor(train_data_file['Xtest'],dtype=torch.float)
validation_features = torch.tensor(train_data_file['Xval'],dtype=torch.float)
train_labels = torch.tensor(train_data_file['y'],dtype=torch.float)
test_labels = torch.tensor(train_data_file['ytest'],dtype=torch.float)
validation_labels = torch.tensor(train_data_file['yval'],dtype=torch.float)
# show the shape
print(train_features.shape)
print(test_features.shape)
print(validation_features.shape)
实现模型初始化方法并添加正则项
# initialize the parameters with a uniform distribution
def InitParameters(net, epsilon_init = 0.12):
for m in net.modules():
if isinstance(m, nn.Linear):
weight_shape = m.weight.data.shape
bias_shape = m.bias.data.shape
# randomly select values for weight and bias uniformly in the range[-epsilon_init,epsilon_init]
m.weight.data = torch.rand(weight_shape) * 2 * epsilon_init - epsilon_init
m.bias.data = torch.rand(bias_shape) * 2 * epsilon_init - epsilon_init
# add regular term
def add_regular_item(net, m, lm):
# go through all the modules and find specific modules whose gradient need to be modified
for module in net.modules():
if isinstance(module, nn.Linear):
# the gradient of regular term:λθ/m(lm=λ,module.weight=θ)
module.weight.grad.data.add_(lm/m*module.weight.data)
实现训练方法和评估方法
# implement training method
def train(net, train_iter, num_epochs, loss, optim, lm=0, print_frequence=0):
for epoch in range(num_epochs):
for X,y in train_iter:
# m:the number of data in each batch
m = y.shape[0]
optim.zero_grad()
y_hat = net(X)
l = loss(y_hat, y).sum()
l.backward()
add_regular_item(net, m, lm)
optim.step()
if print_frequence > 0:
if (epoch+1) % print_frequence == 0:
print("epoch: %d, average loss: %f"