- 本此打卡的主要内容包括:
- 预训练模型加载与修改
- 训练过程中的模型保存
- 伪标签训练
- 知识蒸馏
预训练模型加载与修改
- 预训练模型采用的为resnet18,基于torchvision的model中保存的模型加载参数,并将现有的参数固定,后接全连接层进行回归
- resnet18模型加载说明:
- 固定参数的是用来提取图片的数据特征
- resnet18模型最后一层的全连接层输出为进入softmax层之前的out_features
- 利用 model.parameters()的可迭代性的params.requires_grad = False 固定参数
- 全连接层的加载:
- 全连接层使用nn.Linear(),使用nn.Sequential转化为Sequential后与原始网络合并
- 代码如下:
def set_parameter_requires_grad(model, feature_extracting):
if feature_extracting:
model = model
for param in model.parameters():
param.requires_grad = False
def resnet_model(out_feature, feature_extract = True):
model_ft = torchvision.models.resnet18(pretrained=True)
set_parameter_requires_grad(model_ft, feature_extract)
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Sequential(nn.Linear(num_ftrs, out_feature))
return model_ft
训练过程中的模型保存
- 模型的训练函数,其中包括模型保存函数,构建了整体训练流程
from torch.optim.lr_scheduler import CosineAnnealingLR
def train_model(model, trainLoader, vaildLoader, params):
train_loss, vaild_loss = [], []
loss_func = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(),lr=params.lr)
scheduler = CosineAnnealingLR(optimizer,T_max=10)
for i in range(params.epochs):
valid_losses = []
train_losses = []
for ibatch, (X, y) in enumerate(trainLoader):
model.train()
optimizer.zero_grad()
out = model(X)
loss = loss_func(y, out)
loss.backward()
optimizer.step()
train_losses.append(loss.item())
train_loss.append(loss.item())
for iv, (X, y) in enumerate(vaildLoader):
model.eval()
out = model(X)
v_loss = loss_func(y, out)
valid_losses.append(v_loss.item())
vaild_loss.append(v_loss.item())
if i%2 == 0:
print("train loss: {}, vaild loss: {}".format(
np.mean(train_losses), np.mean(valid_losses)))
filename = para.save_path.format(epoch=i)
save_torch(model, filename)
print('epoch {} model saved'.format(i))
伪标签训练
- 伪标签训练(pseudo-labeled data learning)流程如下:
- 步骤:
-
将原有数据分为train set和valid set训练出 model1
-
使用model1 对无标签的 test set进行进行伪标签预测
-
对train set按比例进行随机抽样,与带有伪标签的test set合并为新的train set
-
使用新的train set对模型进行训练得到model2
-
在使用model2 对test set进行预测,得到最终结果
-
代码如下:
def get_torh_modl(modelOri, filename): ### 模型数据加载 ### modelOri.load_state_dict(torch.load(filename)) return modelOri def pseudo_label_creat(model1, model1_fn, test_input): ### 对测试数据进行预测 ### model1 = get_torh_modl(model1, model1_fn) pseudo_label = model1(test_input) return test_input.detach(), pseudo_label.detach() def pseudo_real_concat(Xtrain, ytrain, Xtest, ytest, rate): ### 合并数据 ### index = random.sample(range(0,len(ytrain)),int(len(ytrain)*rate)) Xtrain = Xtrain[index, ...] ytrain = ytrain[index, ...] X = torch.cat((Xtrain, Xtest), dim=0) y = torch.cat((ytrain, ytest), dim=0) return X, y
-
知识蒸馏
- 对于回归问题的知识蒸馏
- 知识蒸馏主要思想是基于复杂模型的学到的数据结构对简单模型的结果进行优化
- 主要措施是对于训练损失函数的修改
- 刚开始应用于分类问题,之后文章:
- Knowledge Distillation for Regression with Teacher Bounds 将其应用于回归问题
- 更多关于知识蒸馏的知识,bolg如下:
- https://blog.csdn.net/nature553863/article/details/80568658
- https://blog.csdn.net/bryant_meng/article/details/104703438
- 在使用中采用如下公式设计了损失函数:
-
- 其中 m is margin, 设置为1, v v v为权重,设置为0.5
- y r e g y_reg yreg为真实值
- R t R_t Rt 和 R s R_s Rs 为tescher model和student model学习出来的回归两
- L为损失函数,这里选择的为L1smooth损失函数
- 代码如下:
def loss_fn_reg_kd(outputs, labels, teacher_output):
L_teacher_student = torch.zeros(1,1)
for i, _ in enumerate(labels):
L_t_s = nn.MSELoss()(outputs[i], labels[i]) if nn.MSELoss(
)(outputs[i], labels[i]) > nn.MSELoss(
)(teacher_output[i], labels[i]) + 1 else 0
L_teacher_student += L_t_s
loss = nn.SmoothL1Loss()(outputs, labels) + 0.5 * L_teacher_student/len(labels)
return loss
def distill_model_train(
teacher_moedl, student_model, trainLoader, vaildLoader, params):
'''
知识蒸馏模型训练方法
'''
train_loss, vaild_loss = [], []
loss_func = nn.MSELoss()
optimizer = torch.optim.Adam(student_model.parameters(),lr=params.lr)
scheduler = CosineAnnealingLR(optimizer,T_max=10)
for i in range(params.epochs):
valid_losses = []
train_losses = []
for ibatch, (X, y) in enumerate(trainLoader):
student_model.train()
optimizer.zero_grad()
t_out = teacher_moedl(X)
s_out = student_model(X)
loss = loss_fn_reg_kd(s_out, y, t_out)
loss.backward()
optimizer.step()
train_losses.append(loss.item())
train_loss.append(loss.item())
for iv, (X, y) in enumerate(vaildLoader):
student_model.eval()
out = student_model(X)
v_loss = loss_func(y, out)
valid_losses.append(v_loss.item())
vaild_loss.append(v_loss.item())
if i%2 == 0:
print("train loss: {}, vaild loss: {}".format(
np.mean(train_losses), np.mean(valid_losses)))
filename = para.save_path.format(epoch=i)
save_torch(student_model, filename)
print('epoch {} model saved'.format(i))
完整代码如下:
import os
import random
import torch
import torch.nn as nn
import torchvision
import torch.nn.functional as F
import pandas as pd
import numpy as np
import cv2
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, KFold
class MLP(nn.Module):
def __init__(self, output_dims):
super(MLP, self).__init__()
#self.input_dims = input_dims
self.output_dims = output_dims
self.conv1 = nn.Conv2d(
in_channels=3, out_channels=16,
kernel_size=3,stride=2)
self.conv2 = nn.Conv2d(
in_channels=16, out_channels=32,
kernel_size=3, stride=2)
self.conv3 = nn.Conv2d(
in_channels=32, out_channels=64,
kernel_size=3, stride=2)
self.conv4 = nn.Conv2d(
in_channels=64, out_channels=64,
kernel_size=3, stride=2)
self.fc1 = nn.Linear(7744, 1600)
self.fc2 = nn.Linear(1600, 800)
self.fc3 = nn.Linear(800, 100)
self.fc4 = nn.Linear(100, self.output_dims)
def forward(self, X):
X = F.relu(self.conv1(X))
X = F.relu(self.conv2(X))
X = F.relu(self.conv3(X))
X = X.reshape(X.shape[0],-1)
X = F.relu(self.fc1(X))
X = F.relu(self.fc2(X))
X = F.relu(self.fc3(X))
out = self.fc4(X)
return out
def set_parameter_requires_grad(model, feature_extracting):
if feature_extracting:
model = model
for param in model.parameters():
param.requires_grad = False
def resnet_model(out_feature, feature_extract = True):
model_ft = torchvision.models.resnet18(pretrained=True)
set_parameter_requires_grad(model_ft, feature_extract)
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Sequential(nn.Linear(num_ftrs, out_feature))
return model_ft
def alexnet_model(out_feature, feature_extract = True):
model_ft = torchvision.models.resnet18(pretrained=True)
set_parameter_requires_grad(model_ft, feature_extract)
class Params:
pass
para = Params()
para.lr = 1e-3
para.epochs = 10
para.batch_size = 32
para.save_path = './model/model_epoch_{epoch}.mdl'
from torch.utils.data import Dataset, DataLoader, TensorDataset
def in_out_creat(inputData, outputData):
return DataLoader(TensorDataset(inputData, outputData),
batch_size=para.batch_size, shuffle=True)
def save_torch(model, filename):
''' 保存模型 '''
makedir(filename)
torch.save(model.state_dict(), filename)
def makedir(fileName):
''' 为文件 创建文件夹 '''
path = os.path.dirname(fileName)
if not os.path.exists(path):
os.makedirs(path)
from torch.optim.lr_scheduler import CosineAnnealingLR
def train_model(model, trainLoader, vaildLoader, params):
train_loss, vaild_loss = [], []
loss_func = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(),lr=params.lr)
scheduler = CosineAnnealingLR(optimizer,T_max=10)
for i in range(params.epochs):
valid_losses = []
train_losses = []
for ibatch, (X, y) in enumerate(trainLoader):
model.train()
optimizer.zero_grad()
out = model(X)
loss = loss_func(y, out)
loss.backward()
optimizer.step()
train_losses.append(loss.item())
train_loss.append(loss.item())
for iv, (X, y) in enumerate(vaildLoader):
model.eval()
out = model(X)
v_loss = loss_func(y, out)
valid_losses.append(v_loss.item())
vaild_loss.append(v_loss.item())
if i%2 == 0:
print("train loss: {}, vaild loss: {}".format(
np.mean(train_losses), np.mean(valid_losses)))
filename = para.save_path.format(epoch=i)
save_torch(model, filename)
print('epoch {} model saved'.format(i))
def k_fold_train(X, y, model, params):
'''K折交叉检验'''
skf = KFold(n_splits=5, random_state=233, shuffle=True)
for ifold, (train_ind, valid_ind) in enumerate(skf.split(X, y)):
X_train, y_train = X[train_ind], y[train_ind]
X_valid, y_valid = X[valid_ind], y[valid_ind]
train_loader = in_out_creat(X_train, y_train)
valid_loader = in_out_creat(X_valid, y_valid)
train_model(model, train_loader, valid_loader, params)
filename = './model/kflod/kfold_{}.mdl'.format(ifold)
save_torch(model, filename)
'''
pseudo-labeled data learning
步骤:
将原有数据分为train set和valid set训练出 model1
使用model1 对无标签的 test set进行进行伪标签预测
对train set按比例进行随机抽样,与带有伪标签的test set合并为新的train set
使用新的train set对模型进行训练得到model2
在使用model2 对test set进行预测,得到最终结果
'''
def get_torh_modl(modelOri, filename):
modelOri.load_state_dict(torch.load(filename))
return modelOri
def pseudo_label_creat(model1, model1_fn, test_input):
'''
伪标签生成
'''
model1 = get_torh_modl(model1, model1_fn)
pseudo_label = model1(test_input)
return test_input.detach(), pseudo_label.detach()
def pseudo_real_concat(Xtrain, ytrain, Xtest, ytest, rate):
'''
合并训练数据和伪标签数据
'''
index = random.sample(range(0,len(ytrain)),int(len(ytrain)*rate))
Xtrain = Xtrain[index, ...]
ytrain = ytrain[index, ...]
X = torch.cat((Xtrain, Xtest), dim=0)
y = torch.cat((ytrain, ytest), dim=0)
return X, y
'''
对于回归问题的知识蒸馏
知识蒸馏主要思想是基于复杂模型的学到的数据结构对简单模型的结果进行优化
主要措施是对于训练损失函数的修改
刚开始应用于分类问题,之后文章:
Knowledge Distillation for Regression with Teacher Bounds
将其应用于回归问题
'''
def loss_fn_reg_kd(outputs, labels, teacher_output):
L_teacher_student = torch.zeros(1,1)
for i, _ in enumerate(labels):
L_t_s = nn.MSELoss()(outputs[i], labels[i]) if nn.MSELoss(
)(outputs[i], labels[i]) > nn.MSELoss(
)(teacher_output[i], labels[i]) + 1 else 0
L_teacher_student += L_t_s
loss = nn.SmoothL1Loss()(outputs, labels) + 0.5 * L_teacher_student/len(labels)
return loss
def distill_model_train(
teacher_moedl, student_model, trainLoader, vaildLoader, params):
'''
知识蒸馏模型训练方法
'''
train_loss, vaild_loss = [], []
loss_func = nn.MSELoss()
optimizer = torch.optim.Adam(student_model.parameters(),lr=params.lr)
scheduler = CosineAnnealingLR(optimizer,T_max=10)
for i in range(params.epochs):
valid_losses = []
train_losses = []
for ibatch, (X, y) in enumerate(trainLoader):
student_model.train()
optimizer.zero_grad()
t_out = teacher_moedl(X)
s_out = student_model(X)
loss = loss_fn_reg_kd(s_out, y, t_out)
loss.backward()
optimizer.step()
train_losses.append(loss.item())
train_loss.append(loss.item())
for iv, (X, y) in enumerate(vaildLoader):
student_model.eval()
out = student_model(X)
v_loss = loss_func(y, out)
valid_losses.append(v_loss.item())
vaild_loss.append(v_loss.item())
if i%2 == 0:
print("train loss: {}, vaild loss: {}".format(
np.mean(train_losses), np.mean(valid_losses)))
filename = para.save_path.format(epoch=i)
save_torch(student_model, filename)
print('epoch {} model saved'.format(i))
class Params:
pass
if __name__ == '__main__':
train_df = pd.read_csv('./人脸关键点检测挑战赛_数据集/train.csv')
train_img = np.load('./人脸关键点检测挑战赛_数据集/train.npy/train.npy')
test_img = np.load('./人脸关键点检测挑战赛_数据集/test.npy/test.npy')
print(train_df.head())
print(train_img.shape)
#print(train_df.isnull().sum())
'''数据读取及预处理'''
train_df.fillna(method='ffill', inplace=True)
para = Params()
para.lr = 1e-3
para.epochs = 10
para.batch_size = 32
para.save_path = './model/model_epoch_{epoch}.mdl'
model = resnet_model(8)
Xinput = train_img.transpose(2, 0, 1)
youtput = train_df.values.astype(np.float32)
Xinput = torch.FloatTensor(Xinput).unsqueeze(1).repeat(1,3,1,1)
youtput = torch.FloatTensor(youtput)
#k_fold_train(Xinput, youtput, model, para)
'''伪标签学习'''
Xtest = train_img.transpose(2, 0, 1)
Xtest = torch.FloatTensor(Xtest).unsqueeze(1).repeat(1,3,1,1)
model1 = model
model1_fn = './model/kflod/kfold_{}.mdl'.format(0)
Xtest, ytest = pseudo_label_creat(model1, model1_fn, Xtest)
X, y = pseudo_real_concat(Xinput, youtput, Xtest, ytest, rate=0.7)
model = resnet_model(8)
k_fold_train(Xinput, youtput, model, para)
'''蒸馏学习'''
Xtrain, Xvalid, ytrain, yvalid = train_test_split(
train_img.transpose(2, 0, 1),
train_df.values.astype(np.float32),
test_size=0.1)
Xtrain = torch.FloatTensor(Xtrain).unsqueeze(1).repeat(1,3,1,1)
ytrain = torch.FloatTensor(ytrain)
Xvalid = torch.FloatTensor(Xvalid).unsqueeze(1).repeat(1,3,1,1)
yvalid = torch.FloatTensor(yvalid)
trainLoader = in_out_creat(Xtrain, ytrain)
validLoader = in_out_creat(Xvalid, yvalid)
model = resnet_model(8)
model1_fn = './model/kflod/kfold_{}.mdl'.format(0)
teacher_moedl = get_torh_modl(model, model1_fn)
student_model = MLP(8)
para.save_path = './model/students/model_epoch_{epoch}.mdl'
distill_model_train(
teacher_moedl, student_model, trainLoader, validLoader, para)