数据集
class My_DataSet(Dataset):
def __init__(self, data_path, label_path):
_data = pd.read_csv(label_path).values
self.file_list = []
for d in _data:
self.file_list.append([os.path.join(data_path, d[0]+'.hdf5'), d[1]])
def __len__(self):
return len(self.file_list)
def __getitem__(self, item):
data_path, label = self.file_list[item]
data = read_data(data_path)
return data, label
模型搭建
class AlexNet(nn.Module):
def __init__(self):
super(AlexNet, self).__init__()
self.conv = nn.Sequential(
nn.Conv2d(1, 96, 11, 4), # in_channels, out_channels, kernel_size, stride, padding
nn.ReLU(),
nn.MaxPool2d(3, 2), # kernel_size, stride
nn.Conv2d(96, 256, 5, 1, 2),
nn.ReLU(),
nn.MaxPool2d(3, 2),
nn.Conv2d(256, 384, 3, 1, 1),
nn.ReLU(),
nn.Conv2d(384, 384, 3, 1, 1),
nn.ReLU(),
nn.Conv2d(384, 256, 3, 1, 1),
nn.ReLU(),
nn.MaxPool2d(3, 2)
)
self.fc = nn.Sequential(
nn.Linear(256*5*5, 4096),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(4096, 4096),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(4096, 10),
)
def forward(self, img):
feature = self.conv(img)
output = self.fc(feature.view(img.shape[0], -1))
return output
模型初始化
class AlexNet(nn.Module):
def __init__(self):
super(AlexNet, self).__init__()
self.conv = nn.Sequential(
nn.Conv2d(1, 96, 11, 4), # in_channels, out_channels, kernel_size, stride, padding
nn.ReLU(),
nn.MaxPool2d(3, 2), # kernel_size, stride
nn.Conv2d(96, 256, 5, 1, 2),
nn.ReLU(),
nn.MaxPool2d(3, 2),
nn.Conv2d(256, 384, 3, 1, 1),
nn.ReLU(),
nn.Conv2d(384, 384, 3, 1, 1),
nn.ReLU(),
nn.Conv2d(384, 256, 3, 1, 1),
nn.ReLU(),
nn.MaxPool2d(3, 2)
)
self.fc = nn.Sequential(
nn.Linear(256*5*5, 4096),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(4096, 4096),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(4096, 10),
)
self.initialize_weights()
def forward(self, img):
feature = self.conv(img)
output = self.fc(feature.view(img.shape[0], -1))
return output
def initialize_weights(self):
for m in self.modules():
# 判断是否属于Conv2d
if isinstance(m, nn.Conv2d):
torch.nn.init.xavier_normal_(m.weight.data)
# 判断是否有偏置
if m.bias is not None:
torch.nn.init.constant_(m.bias.data,0.3)
elif isinstance(m, nn.Linear):
torch.nn.init.normal_(m.weight.data, 0.1)
if m.bias is not None:
torch.nn.init.zeros_(m.bias.data)
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zeros_()
可视化
from visdom import Visdom
viz = Visdom()
viz.line([0.], [0.], win='Train_loss', opts=dict(title='Task loss'))
viz.line([0.], [0.], win='Learning_Rate', opts=dict(title="Learning Rate"))
# 数据更新
viz.line([np.mean(frame_recording_train)], [frame], win='Train_loss', update='append')
优化器
class Apollo(Optimizer):
def __init__(
self,
params: Params,
lr: float = 1e-2,
beta: float = 0.9,
eps: float = 1e-4,
warmup: int = 0,
init_lr: float = 0.01,
weight_decay: float = 0,
):
if lr <= 0.0:
raise ValueError('Invalid learning rate: {}'.format(lr))
if eps < 0.0:
raise ValueError('Invalid epsilon value: {}'.format(eps))
if not 0.0 <= beta < 1.0:
raise ValueError('Invalid beta parameter: {}'.format(beta))
if not 0.0 <= weight_decay:
raise ValueError(
'Invalid weight_decay value: {}'.format(weight_decay)
)
if not 0.0 <= warmup:
raise ValueError('Invalid warmup updates: {}'.format(warmup))
if not 0.0 <= init_lr <= 1.0:
raise ValueError(
'Invalid initial learning rate: {}'.format(init_lr)
)
defaults = dict(
lr=lr,
beta=beta,
eps=eps,
warmup=warmup,
init_lr=init_lr,
base_lr=lr,
weight_decay=weight_decay,
)
super(Apollo, self).__init__(params, defaults)
def step(self, closure: OptLossClosure = None) :
loss = None
if closure is not None:
loss = closure()
for group in self.param_groups:
for p in group['params']:
if p.grad is None:
continue
state = self.state[p]
if len(state) == 0:
state['step'] = 0
# Exponential moving average of gradient values
state['exp_avg_grad'] = torch.zeros_like(
p, memory_format=torch.preserve_format
)
# Exponential moving average of squared gradient values
state['approx_hessian'] = torch.zeros_like(
p, memory_format=torch.preserve_format
)
# Previous update direction
state['update'] = torch.zeros_like(
p, memory_format=torch.preserve_format
)
if state['step'] < group['warmup']:
curr_lr = (group['base_lr'] - group['init_lr']) * state[
'step'
] / group['warmup'] + group['init_lr']
else:
curr_lr = group['lr']
# Perform optimization step
grad = p.grad.data
if group['weight_decay'] != 0:
grad = grad.add(p, alpha=group['weight_decay'])
beta = group['beta']
exp_avg_grad = state['exp_avg_grad']
B = state['approx_hessian']
d_p = state['update']
state['step'] += 1
bias_correction = 1 - beta ** state['step']
alpha = (1 - beta) / bias_correction
# Update the running average grad
delta_grad = grad - exp_avg_grad
exp_avg_grad.add_(delta_grad, alpha=alpha)
denom = d_p.norm(p=4).add(group['eps'])
d_p.div_(denom)
v_sq = d_p.mul(d_p)
delta = (
delta_grad.div_(denom).mul_(d_p).sum().mul(-alpha)
- B.mul(v_sq).sum()
)
B.addcmul_(v_sq, delta)
denom = B.abs().clamp_(min=1)
d_p.copy_(exp_avg_grad.div(denom))
p.data.add_(d_p, alpha=-curr_lr)
return loss