混合密度模型用于处理一个x对应于多个y的情况。本文参考https://blog.csdn.net/jianbinzheng/article/details/86555869,使用pytorch实现混合密度模型
首先讨论神经网络拟合1个x仅对应1个y的情况: y = 7.0 s i n ( 0.75 x ) + 0.5 x + ϵ y=7.0sin(0.75x)+0.5x+\epsilon y=7.0sin(0.75x)+0.5x+ϵ, 其中 ϵ \epsilon ϵ为高斯噪声
造出训练数据
import matplotlib.pyplot as plt
import numpy as np
import math
import torch
import torch.nn as nn
import torch.distributions as D
NSAMPLE = 1000
x_data = np.float32(np.random.uniform(-10.5, 10.5, (1, NSAMPLE))).T
r_data = np.float32(np.random.normal(size=(NSAMPLE,1))) #高斯噪声
y_data = np.float32(np.sin(0.75*x_data)*7.0+x_data*0.5+r_data*1.0)
和原文保持一致,我们定义一个简单的20个结点的单隐藏层神经网络。我们只是讨论模型的拟合性能,所以相当于只在训练集上做实验,不考虑泛化性能
class NET (nn.Module):
def __init__(self):
super(NET, self).__init__()
self.linear_layer=nn.Sequential(
nn.Linear(1,24),
nn.Tanh(),
nn.Linear(24,1)
)
def forward(self,x):
x=self.linear_layer(x)
return x
data_train=torch.as_tensor(torch.from_numpy(x_data), dtype=torch.float32)
label_train=torch.as_tensor(torch.from_numpy(y_data),dtype=torch.float32)
dataset=torch.utils.data.TensorDataset(data_train,label_train)
epoch=1000
Net=NET()
optimizer=torch.optim.Adam(Net.parameters(),lr=0.003,weight_decay=1e-4)
criterion=nn.MSELoss()#使用MSE损失函数
train_loss=[]
for epos in range (epoch):
trainLoader=torch.utils.data.DataLoader(dataset,batch_size=64)
Net.train()
train_loss_batch=[]
for batch in trainLoader:
x_train,y_train=batch
#前向传播
out=Net(x_train)
#损失函数
loss=criterion(out,y_train)
#反向传播
optimizer.zero_grad()
loss.backward()
optimizer.step()
train_loss_batch.append(loss.item())
train_loss.append(sum(train_loss_batch)/len(train_loss_batch))
plt.plot(range(1,len(train_loss)+1),train_loss,'ro',alpha=0.3)
训练结果如下图:
plot在训练集数据上模型的输出和数据标签之间的差距
接下来我们把上面的训练数据,x和y反转,就可以得到1个x对应多个y的情况
NSAMPLE = 2500
y_data = np.float32(np.random.uniform(-10.5, 10.5, (1, NSAMPLE))).T
r_data = np.float32(np.random.normal(size=(NSAMPLE,1))) # random noise
x_data = np.float32(np.sin(0.75*y_data)*7.0+y_data*0.5+r_data*1.0)
feature_train=torch.as_tensor(torch.from_numpy(x_data), dtype=torch.float32)
label_train=torch.as_tensor(torch.from_numpy(y_data),dtype=torch.float32)
plt.figure(figsize=(8, 8))
plt.plot(x_data,y_data,'ro', alpha=0.3)
plt.show()
dataset_mix=torch.utils.data.TensorDataset(feature_train,label_train)
得到的数据分布是这样的
如果使用上面的模型跑这个数据,会得到下面的结果(蓝色的是拟合结果),理论上就不解释了。但是和参考的博客里似乎pattern不太一样,不明白
下面使用混合密度模型来解决这个问题,具体的理论公式上面那篇博客的文章也有,不赘述了
class mix_linear(nn.Module): #定义网络
def __init__(self, mixture=24,n_hidden=24):
super(mix_linear,self).__init__()
self.mixture=mixture
self.n_out=3*mixture
self.n_hidden=n_hidden
self.linear_layer=nn.Sequential(
nn.Linear(1,self.n_hidden),
nn.Tanh(),
nn.Linear(self.n_hidden,self.n_out)
)
def forward(self,x):
x=self.linear_layer(x)
return x
class get_lossfunc(nn.Module): #损失函数
def __init__(self):
super(get_lossfunc,self).__init__()
def forward(self,out_pi,out_sigma,out_mu,y):
result=torch.subtract(y,out_mu)
result=torch.div(result,out_sigma)
result=torch.exp(-torch.div(torch.square(result),2))
result=torch.div(result,out_sigma)
result=torch.mul(result,out_pi)
result=torch.sum(result,dim=1)
result=-torch.log(result)
loss=torch.mean(result)
return loss
model=mix_linear(24,24)
model.train()
optimizer=torch.optim.Adam(model.parameters(),lr=0.003,weight_decay=1e-4)
n_epoch=1000
train_loss=[]
for i in range(n_epoch):
train_loss_batch=[]
trainLoader=torch.utils.data.DataLoader(dataset_mix,batch_size=2500)
for batch in trainLoader:
x_train,y_train=batch
out=model(x_train)
out_pi,out_sigma,out_mu=torch.split(out,model.mixture,dim=1)#将输出分割成三部分
out_pi=nn.Softmax(dim=1)(out_pi)#注意这里是n个样本,自身的系数和为1,dim=1
out_sigma=torch.exp(out_sigma) #输出的方差保证为正
criterion=get_lossfunc() #损失函数
loss=criterion(out_pi,out_sigma,out_mu,y_train)
grad_norm = nn.utils.clip_grad_norm_(model.parameters(), max_norm=10)
optimizer.zero_grad()
loss.backward()
optimizer.step()
train_loss_batch.append(loss.item())
train_loss.append(sum(train_loss_batch)/len(train_loss_batch))
plt.plot(range(1,len(train_loss)+1),train_loss,'ro',alpha=0.3)
注意在testing的时候,不是代入一个x求出一个y,而是代入一个x,得到一个关于y的分布,然后对分布进行多次采样
def testing(model, x_test,M=5):
"""
input: a trained model and the test data
output: sample data y
"""
model.eval()
out=model(x_test)
out_pi,out_sigma,out_mu=torch.split(out,model.mixture,dim=1)
out_pi=nn.Softmax(dim=1)(out_pi)#注意这里是n个样本,自身的系数和为1,dim=1
out_sigma=torch.exp(out_sigma)
mix=D.Categorical(out_pi)
comp=D.Normal(out_mu,out_sigma)
gmm=D.MixtureSameFamily(mix,comp)
y=gmm.sample([M])
return y
y=testing(model,feature_train)
y_out=y.numpy()
print(y_out.shape)
print(x_data.shape)
for i in range(y_out.shape[0]):
plt.scatter(x_data,y_out[i],c="b",s=10)
plt.plot(x_data,y_data,'ro', alpha=0.3)
plt.show()
看着效果还行