arc loss minist实验

import torch
import torch.nn as nn
from torchvision import transforms
import torchvision
from torch import optim
import matplotlib.pyplot as plt
import torch.nn.functional as F
import math

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize([0.5], [0.5])
])

trainsets = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainsets, batch_size=100, shuffle=True)

class CenterLoss(torch.nn.Module):

    def __init__(self, cls_num, feature_num):
        super(CenterLoss, self).__init__()
        feature_num = torch.Tensor(feature_num)
        cls_num = bool(cls_num)
        self.W = nn.Parameter(feature_num, cls_num)

        def forward(self, feature):
            _W = F.normalize(self.W, dim=0)
            _X = torch.norm(feature, dim=1)
            out = torch.matmul(feature, _W)
            cosa = out / _X
            a = torch.acos(cosa)
            top = torch.exp(_X * torch.cos(a + 0.1))
            _top = torch.exp(_X * torch.cos(a))
            bottom = torch.sum(torch.exp(out), dim=1)

            return top / (bottom - _top + top)


class ArcMarginProduct(nn.Module):
    def __init__(self,s=256, m=0.01):
        super(ArcMarginProduct, self).__init__()
        self.in_feature = 2
        self.out_feature = 10
        self.s = s
        self.m = m
        self.weight = nn.Parameter(torch.Tensor(2, 10))  # (input,output)
        nn.init.xavier_uniform_(self.weight)
        self.weight.data.renorm_(2, 1, 1e-5).mul_(1e5)

        self.cos_m = math.cos(m)
        self.sin_m = math.sin(m)
        # 为了保证cos(theta+m)在0-pi单调递减:
        self.th = math.cos(3.1415926 - m)
        self.mm = math.sin(3.1415926 - m) * m

    def forward(self, x, label):
        cosine = F.normalize(x).mm(F.normalize(self.weight, dim=0))
        sine = torch.sqrt(1.0 - torch.pow(cosine, 2))
        phi = cosine * self.cos_m - sine * self.sin_m  # 两角和公式
        # 为了保证cos(theta+m)在0-pi单调递减:
        phi = torch.where((cosine - self.th) > 0, phi, cosine - self.mm)
        one_hot = torch.zeros_like(cosine)
        one_hot.scatter_(1, label.view(-1, 1), 1)
        output = (one_hot * phi) + ((1.0 - one_hot) * cosine)
        output = output * self.s
        #这里这个s比较关键,如果设置的不够大会导致收敛困难,
        # 比较容易出现两个类别的Feature重叠的现象,
        # 这也是加性Margin相对于乘性Margin的一个比较弱势的地方,
        # 对于特征向量的收缩要求足够,但是对两类特征向量之间的距离约束不够
        loss = F.cross_entropy(cosine, label)

        return loss, output

class MainNet(torch.nn.Module):

    def __init__(self):
        super(MainNet, self).__init__()
        self.hidden_layer = nn.Sequential(
            nn.Linear(784, 120),
            nn.PReLU(),
            nn.Linear(120, 2),
        )

        self.output_layer = nn.Sequential(
            nn.Linear(2, 10),
        )

        self.center_loss_layer = ArcMarginProduct()
        # self.crossEntropy = F.log_softmax()

    def forward(self, xs):
        features = self.hidden_layer(xs)
        # features = features.view(-1,2)
        outputs = self.output_layer(features)
        return features, F.log_softmax(outputs, dim=1)

    def getloss(self, outputs, features, labels):
        # loss_cls = self.crossEntropy(outputs, labels)
        loss_center = self.center_loss_layer(features, labels)
        # loss = loss_cls + loss_center
        return loss_center

def mscatter(x, y, ax=None, m=None, **kw):
    import matplotlib.markers as mmarkers
    if not ax: ax = plt.gca()
    sc = ax.scatter(x, y, **kw)
    if (m is not None) and (len(m) == len(x)):
        paths = []
        for marker in m:
            if isinstance(marker, mmarkers.MarkerStyle):
                marker_obj = marker
            else:
                marker_obj = mmarkers.MarkerStyle(marker)
            path = marker_obj.get_path().transformed(
                marker_obj.get_transform())
            paths.append(path)
        sc.set_paths(paths)
    return sc

if __name__ == '__main__':
    net = MainNet()
    # net.load_state_dict(torch.load('net_param.pkl'))
    optimizer = optim.Adam(net.parameters())
    nllloss = nn.NLLLoss().cuda()
    count = 0
    for epoch in range(1000):
        for xs,ys in trainloader:
            xs = xs.view(-1,784)
            feature,out = net(xs)
            loss1 = nllloss(out,ys)
            loss2,_ = net.getloss(out,feature,ys)
            loss = loss1 + loss2
            # feature = feature.detach().numpy()
            # x = feature[:,0]
            # y = feature[:,1]
            # c = out.argmax(dim=1)
            # fig, ax = plt.subplots()
            # scatter = mscatter(x, y, c=c, ax=ax, cmap=plt.cm.RdYlBu)
            # plt.show()
            # plt.scatter(x,y)
            # plt.pause(1)
            # plt.clf()
            # print(x,y)
            # exit()
            # loss = net.getloss(out,labels=ys,features=feature)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            print(count,loss)
            count += 1
        if (epoch+1) % 1 == 0:
            torch.save(net.state_dict(),'net3_param.pkl')
import torch
import torch.nn as nn
import torch.nn.functional as F
import math

class ArcSoftmax(torch.nn.Module):

    def __init__(self, feature_dim, cls_dim):
        super(ArcSoftmax, self).__init__()
        self.W = nn.Parameter(torch.randn(feature_dim, cls_dim))  ##2,10

    def forward(self, feature):
        feature = F.normalize(feature, dim=1)
        _W = F.normalize(self.W, dim=1)  ##2,10
        _X = torch.norm(feature, dim=1).view(-1,1)  ##n,1   sqr(x**2+y**2+z**2)
        out = torch.matmul(feature, _W)  ##n,10
        # out = out / torch.max(out,dim=1)[0].view(-1,1)
        cosa = out / _X ##n,10
        a = torch.acos(cosa)  ##n,10


        top = torch.exp((_X * torch.cos(a +0.1))*10) ##n,10,此处_X为1乘不乘意义一样

        _top = torch.exp((_X * torch.cos(a))*10)  ##n,10

        bottom = torch.sum(torch.exp(out*10), dim=1).view(-1,1)  ##n,1
        return torch.log((top / (bottom - _top + top))+1e-10)   ##n,10
class ArcSoftmax(torch.nn.Module):

    def __init__(self, feature_dim, cls_dim):
        super(ArcSoftmax, self).__init__()
        self.W = nn.Parameter(torch.randn(feature_dim,cls_dim))#torch.randn(feature_dim,cls_dim)

    def forward(self, feature):
        #归一化
        _W = F.normalize(self.W,dim=1)
        _X = F.normalize(feature,dim=1)
        out = torch.matmul(_X, _W)
        out = torch.DoubleTensor(out.detach().numpy())
        W = torch.DoubleTensor(self.W.detach().numpy())
        X = torch.DoubleTensor(feature.detach().numpy())
        #范数
        W = torch.norm(W)
        X = torch.norm(X)
        cosa = out / (W * X)
        a = torch.acos(cosa)#如果out在乘以的数据没做归一化导致cosa太大就会出现nan
        top = torch.exp(torch.cos(a + 0.1)*10)
        _top = torch.exp(torch.cos(a)*10)
        bottom = torch.sum(torch.exp(out*10), dim=1)
        return top / (bottom - _top + top)#[N, 10]

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值