3.12权重衰减

%matplotlib inline
import gluonbook as gb
from mxnet import autograd,gluon,init,nd
from mxnet.gluon import data as gdata,loss as gloss,nn

n_train,n_test,num_inputs=20,100,200
true_w,true_b=nd.ones((num_inputs,1))*0.01,0.05
features=nd.random.normal(shape=(n_train+n_test,num_inputs))
labels=nd.dot(features,true_w)+true_b
print(features.shape)
print(true_w.shape)
labels+=nd.random.normal(scale=0.01,shape=labels.shape)
train_features,test_features=features[:n_train,:],features[n_train:,:]
train_labels,test_labels=labels[:n_train],labels[n_train:]

#初始化模型参数
# 定义随机模型参数,该函数为每个参数都附上梯度
def init_params():
    w=nd.random.normal(scale=1,shape=(num_inputs,1))
    b=nd.zeros(shape=(1,))
    w.attach_grad()
    b.attach_grad()
    return [w,b]


# 定义L2范数惩罚项。这里只惩罚模型权重参数
def l2_penalty(w):
    return (w**2).sum()/2

# 定义训练和测试
# 在最终计算损失函数时添加了L2范数惩罚项
batch_size,num_epochs,lr=1,100,0.003
net,loss=gb.linreg,gb.squared_loss
train_iter=gdata.DataLoader(gdata.ArrayDataset(train_features,train_labels
                                              ),batch_size,shuffle=True)

def fit_and_plot(lambd):
    w,b=init_params()
    train_ls,test_ls=[],[]
    for _ in range(num_epochs):
        for X,y in train_iter:
            with autograd.record():
                l=loss(net(X,w,b),y)+lambd*l2_penalty(w)
            l.backward()
            gb.sgd([w,b],lr,batch_size)
        train_ls.append(loss(net(train_features,w,b),train_labels).mean().asscalar())
        test_ls.append(loss(net(test_features,w,b),test_labels).mean().asscalar())
    gb.semilogy(range(1,num_epochs+1),train_ls,'epochs','loss',range(1,num_epochs+1),test_ls,['train','test'])
    print('l2 norm of w',w.norm().asscalar()) # norm()#表示范数
fit_and_plot(1.2)
#gluon 实现

def fit_and_plot_gluon(wd):
    net=nn.Sequential()
    net.add(nn.Dense(1))
    net.initialize(init.Normal(sigma=1))
    #对权重参数衰减,权重名称一般是weight结尾 wd参数来指定权重衰减超参数
    trainer_w=gluon.Trainer(net.collect_params('.*weight'),'sgd',{'learning_rate':lr,'wd':wd})
#     不对偏差进行衰减
    trainer_b=gluon.Trainer(net.collect_params('.*bias'),'sgd',{'learning_rate':lr})
    train_ls,test_ls=[],[]
    for _ in range(num_epochs):
        for X,y in train_iter:
            with autograd.record():
                l=loss(net(X),y)
            l.backward()
            trainer_w.step(batch_size)
            trainer_b.step(batch_size)
        train_ls.append(loss(net(train_features),train_labels).mean().asscalar())
        test_ls.append(loss(net(test_features),test_labels).mean().asscalar())
    gb.semilogy(range(1,num_epochs+1),train_ls,'epochs','loss',range(1,num_epochs+1),test_ls,['train','test'])
    print('l2 norm of w',net[0].weight.data().norm().asscalar()) # norm()#表示范数
fit_and_plot_gluon(3)

 

 

  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值