import torch
import torch.nn as nn
from torchvision import transforms
import torchvision
from torch import optim
import matplotlib.pyplot as plt
import torch.nn.functional as F
import math
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize([0.5], [0.5])
])
trainsets = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainsets, batch_size=100, shuffle=True)
class CenterLoss(torch.nn.Module):
def __init__(self, cls_num, feature_num):
super(CenterLoss, self).__init__()
feature_num = torch.Tensor(feature_num)
cls_num = bool(cls_num)
self.W = nn.Parameter(feature_num, cls_num)
def forward(self, feature):
_W = F.normalize(self.W, dim=0)
_X = torch.norm(feature, dim=1)
out = torch.matmul(feature, _W)
cosa = out / _X
a = torch.acos(cosa)
top = torch.exp(_X * torch.cos(a + 0.1))
_top = torch.exp(_X * torch.cos(a))
bottom = torch.sum(torch.exp(out), dim=1)
return top / (bottom - _top + top)
class ArcMarginProduct(nn.Module):
def __init__(self,s=256, m=0.01):
super(ArcMarginProduct, self).__init__()
self.in_feature = 2
self.out_feature = 10
self.s = s
self.m = m
self.weight = nn.Parameter(torch.Tensor(2, 10)) # (input,output)
nn.init.xavier_uniform_(self.weight)
self.weight.data.renorm_(2, 1, 1e-5).mul_(1e5)
self.cos_m = math.cos(m)
self.sin_m = math.sin(m)
# 为了保证cos(theta+m)在0-pi单调递减:
self.th = math.cos(3.1415926 - m)
self.mm = math.sin(3.1415926 - m) * m
def forward(self, x, label):
cosine = F.normalize(x).mm(F.normalize(self.weight, dim=0))
sine = torch.sqrt(1.0 - torch.pow(cosine, 2))
phi = cosine * self.cos_m - sine * self.sin_m # 两角和公式
# 为了保证cos(theta+m)在0-pi单调递减:
phi = torch.where((cosine - self.th) > 0, phi, cosine - self.mm)
one_hot = torch.zeros_like(cosine)
one_hot.scatter_(1, label.view(-1, 1), 1)
output = (one_hot * phi) + ((1.0 - one_hot) * cosine)
output = output * self.s
#这里这个s比较关键,如果设置的不够大会导致收敛困难,
# 比较容易出现两个类别的Feature重叠的现象,
# 这也是加性Margin相对于乘性Margin的一个比较弱势的地方,
# 对于特征向量的收缩要求足够,但是对两类特征向量之间的距离约束不够
loss = F.cross_entropy(cosine, label)
return loss, output
class MainNet(torch.nn.Module):
def __init__(self):
super(MainNet, self).__init__()
self.hidden_layer = nn.Sequential(
nn.Linear(784, 120),
nn.PReLU(),
nn.Linear(120, 2),
)
self.output_layer = nn.Sequential(
nn.Linear(2, 10),
)
self.center_loss_layer = ArcMarginProduct()
# self.crossEntropy = F.log_softmax()
def forward(self, xs):
features = self.hidden_layer(xs)
# features = features.view(-1,2)
outputs = self.output_layer(features)
return features, F.log_softmax(outputs, dim=1)
def getloss(self, outputs, features, labels):
# loss_cls = self.crossEntropy(outputs, labels)
loss_center = self.center_loss_layer(features, labels)
# loss = loss_cls + loss_center
return loss_center
def mscatter(x, y, ax=None, m=None, **kw):
import matplotlib.markers as mmarkers
if not ax: ax = plt.gca()
sc = ax.scatter(x, y, **kw)
if (m is not None) and (len(m) == len(x)):
paths = []
for marker in m:
if isinstance(marker, mmarkers.MarkerStyle):
marker_obj = marker
else:
marker_obj = mmarkers.MarkerStyle(marker)
path = marker_obj.get_path().transformed(
marker_obj.get_transform())
paths.append(path)
sc.set_paths(paths)
return sc
if __name__ == '__main__':
net = MainNet()
# net.load_state_dict(torch.load('net_param.pkl'))
optimizer = optim.Adam(net.parameters())
nllloss = nn.NLLLoss().cuda()
count = 0
for epoch in range(1000):
for xs,ys in trainloader:
xs = xs.view(-1,784)
feature,out = net(xs)
loss1 = nllloss(out,ys)
loss2,_ = net.getloss(out,feature,ys)
loss = loss1 + loss2
# feature = feature.detach().numpy()
# x = feature[:,0]
# y = feature[:,1]
# c = out.argmax(dim=1)
# fig, ax = plt.subplots()
# scatter = mscatter(x, y, c=c, ax=ax, cmap=plt.cm.RdYlBu)
# plt.show()
# plt.scatter(x,y)
# plt.pause(1)
# plt.clf()
# print(x,y)
# exit()
# loss = net.getloss(out,labels=ys,features=feature)
optimizer.zero_grad()
loss.backward()
optimizer.step()
print(count,loss)
count += 1
if (epoch+1) % 1 == 0:
torch.save(net.state_dict(),'net3_param.pkl')
import torch
import torch.nn as nn
import torch.nn.functional as F
import math
class ArcSoftmax(torch.nn.Module):
def __init__(self, feature_dim, cls_dim):
super(ArcSoftmax, self).__init__()
self.W = nn.Parameter(torch.randn(feature_dim, cls_dim)) ##2,10
def forward(self, feature):
feature = F.normalize(feature, dim=1)
_W = F.normalize(self.W, dim=1) ##2,10
_X = torch.norm(feature, dim=1).view(-1,1) ##n,1 sqr(x**2+y**2+z**2)
out = torch.matmul(feature, _W) ##n,10
# out = out / torch.max(out,dim=1)[0].view(-1,1)
cosa = out / _X ##n,10
a = torch.acos(cosa) ##n,10
top = torch.exp((_X * torch.cos(a +0.1))*10) ##n,10,此处_X为1乘不乘意义一样
_top = torch.exp((_X * torch.cos(a))*10) ##n,10
bottom = torch.sum(torch.exp(out*10), dim=1).view(-1,1) ##n,1
return torch.log((top / (bottom - _top + top))+1e-10) ##n,10
class ArcSoftmax(torch.nn.Module):
def __init__(self, feature_dim, cls_dim):
super(ArcSoftmax, self).__init__()
self.W = nn.Parameter(torch.randn(feature_dim,cls_dim))#torch.randn(feature_dim,cls_dim)
def forward(self, feature):
#归一化
_W = F.normalize(self.W,dim=1)
_X = F.normalize(feature,dim=1)
out = torch.matmul(_X, _W)
out = torch.DoubleTensor(out.detach().numpy())
W = torch.DoubleTensor(self.W.detach().numpy())
X = torch.DoubleTensor(feature.detach().numpy())
#范数
W = torch.norm(W)
X = torch.norm(X)
cosa = out / (W * X)
a = torch.acos(cosa)#如果out在乘以的数据没做归一化导致cosa太大就会出现nan
top = torch.exp(torch.cos(a + 0.1)*10)
_top = torch.exp(torch.cos(a)*10)
bottom = torch.sum(torch.exp(out*10), dim=1)
return top / (bottom - _top + top)#[N, 10]