import torch
import torch.nn as nn
from sklearn.preprocessing import LabelBinarizer
from torch.nn import functional as F
# In[4]
class TextRNN(nn.Module):
def __init__(self,
input_size = 256,
hidden_size = 128,
output_size = 768,
n_layers = 2,
dropout = 0.5,
args = None
):
super(TextRNN, self).__init__()
self.rnn = nn.LSTM(input_size = input_size,
hidden_size = hidden_size,
num_layers = n_layers,
bidirectional = True,
batch_first = True,
dropout = dropout)
self.fc = nn.Linear(hidden_size*2, output_size) # 双向RNN,且arg1, arg2
self.dropout = nn.Dropout(dropout)
def forward(self, input_ids):
# [8, 80, 300]
arg_out = self.dropout(input_ids)
# out: [batch, seq_len, hidden_dim * 2]
# hideen: [batch, num_layers * 2, hidden_dim]
# cell/c: [batch, num_layers * 2, hidden_dim]
arg_out, (_, _) = self.rnn(arg_out)
out = self.fc(arg_out) # [8, 2]
return out
# In[1]
class CVAEModel(nn.Module):
def __init__(self,
input_size=256,
hidden_size=256,
output_size=768
):
super(CVAEModel, self).__init__()
# [8, 256, 768]
self.rnn01 = TextRNN(input_size=input_size, hidden_size = 128, output_size = 768)
self.rnn02 = TextRNN(input_size=input_size, hidden_size = 128, output_size = 768)
# 768 -> 256
self.fc11 = nn.Linear(hidden_size, hidden_size // 2)
self.fc12 = nn.Linear(hidden_size, hidden_size // 2)
self.fc21 = nn.Linear(hidden_size // 2, hidden_size)
self.fc22 = nn.Linear(hidden_size, hidden_size)
self.layernorm = nn.LayerNorm(hidden_size)
self.lb = LabelBinarizer()
# 将标签进行one-hot编码
def to_categrical(self, y):
device = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu')
y_n = y.cpu().detach()
self.lb.fit(list(range(0, 2)))
y_one_hot = self.lb.transform(y_n)
y_one_hot = torch.FloatTensor(y_one_hot).to(device)
return y_one_hot
def encode(self, x, y=None, Training=False):
if Training:
con = x
# y_c = self.to_categrical(y)
# y_c = y_c.unsqueeze(1)
# # 输入样本和标签y的one-hot向量连接
# con = con + y_c
out = self.rnn01(con)
return F.relu(self.fc11(out)), F.relu(self.fc12(out))
else:
return F.relu(self.rnn01(x))
# 再参数化
def reparameterize(self, mu, logvar):
std = torch.exp(0.5 * logvar)
eps = torch.randn_like(std)
return eps.mul(std).add_(mu)
def decode(self, z, y=None, Training=False):
con = z
y_c = self.to_categrical(y)
y_c = y_c.unsqueeze(1)
con = con + y_c
out = self.fc21(con)
out = self.rnn02(out) # 在这修改
return F.relu(out)
@classmethod
def loss_function(cls, recon_x, x, mu, logvar):
bz = x.shape[0]
# print(recon_x.shape, x.shape)
# recon_x, x = recon_x.view(bz, -1), x.view(-1)
BCE = nn.MSELoss()(recon_x, x)
# BCE = nn.CrossEntropyLoss(recon_x, x)
# see Appendix B from VAE paper:
# Kingma and Welling. Auto-Encoding Variational Bayes. ICLR, 2014
# https://arxiv.org/abs/1312.6114
# 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
return BCE + KLD
def forward(self, x, y=None, Training=False):
# 训练 CVAE
if Training:
# Encode
mu, logvar = self.encode(x, y, Training)
# 再参数化
z = self.reparameterize(mu, logvar)
# Decode
out = self.decode(z, y, Training)
return out, mu, logvar
else:
out = self.encode(x)
return out
# coding=utf-8
# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""PyTorch RoBERTa model. """
import math
import warnings
import torch
import torch.nn as nn
from torch.nn import CrossEntropyLoss, MSELoss
from transformers.activations import ACT2FN, gelu
from transformers.configuration_roberta import RobertaConfig
from transformers.file_utils import (
add_code_sample_docstrings,
add_start_docstrings,
add_start_docstrings_to_callable,
replace_return_docstrings,
)
from transformers.modeling_outputs import (
BaseModelOutput,
BaseModelOutputWithPooling,
CausalLMOutput,
MaskedLMOutput,
MultipleChoiceModelOutput,
QuestionAnsweringModelOutput,
SequenceClassifierOutput,
TokenClassifierOutput,
)
from transformers.modeling_utils import (
PreTrainedModel,
apply_chunking_to_forward,
find_pruneable_heads_and_indices,
prune_linear_layer,
)
from .CVAEModel import CVAEModel
from .Attention import AttentionInArgs
from .GATModel import GAT
import logging
logger = logging.getLogger(__name__)
_CONFIG_FOR_DOC = "RobertaConfig"
_TOKENIZER_FOR_DOC = "RobertaTokenizer"
ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST = [
"roberta-base",
"roberta-large",
"roberta-large-mnli",
"distilroberta-base",
"roberta-base-openai-detector",
"roberta-large-openai-detector",
# See all RoBERTa models at https://huggingface.co/models?filter=roberta
]
class RobertaEmbeddings(nn.Module):
"""
Same as BertEmbeddings with a tiny tweak for positional embeddings indexing.
"""
# Copied from transformers.modeling_bert.BertEmbeddings.__init__
def __init__(self, config):
super().__init__()
self.word_embeddings = nn.Embedding(config.vocab_size, config.hidden_size, padding_idx=config.pad_token_id)
self.position_embeddings = nn.Embedding(config.max_position_embeddings, config.hidden_size)
self.token_type_embeddings = nn.Embedding(config.type_vocab_size, config.hidden_size)
# self.LayerNorm is not snake-cased to stick with TensorFlow model variable name and be able to load
# any TensorFlow checkpoint file
self.LayerNorm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps)
self.dropout = nn.Dropout(config.hidden_dropout_prob)
# position_ids (1, len position emb) is contiguous in memory and exported when serialized
self.register_buffer("position_ids", torch.arange(config.max_position_embeddings).expand((1, -1)))
# End copy
self.padding_idx = config.pad_token_id
self.position_embeddings = nn.Embedding(
config.max_position_embeddings, config.hidden_size, padding_idx=self.padding_idx
)
def forward(self, input_ids=None, token_type_ids=None, position_ids=None, inputs_embeds=None):
if position_ids is None:
if input_ids is not None:
# Create the position ids from the input token ids. Any padded tokens remain padded.
position_ids = create_position_ids_from_input_ids(input_ids, self.padding_idx).to(input_ids.device)
else:
position_ids = self.create_position_ids_from_inputs_embeds(inputs_embeds)
# Copied from transformers.modeling_bert.BertEmbeddings.forward
if input_ids is not None:
input_shape = input_ids.size()
else:
input_shape = inputs_embeds.size()[:-1]
seq_length = input_shape[1]
if position_ids is None:
position_ids = self.position_ids[:, :seq_length]
if token_type_ids is None:
token_type_ids = torch.zeros(input_shape, dtype=torch.long, device=self.position_ids.device)
if inputs_embeds is None:
inputs_embeds = self.word_embeddings(input_ids)
position_embeddings = self.position_embeddings(position_ids)
token_type_embeddings = self.token_type_embeddings(token_type_ids)
embeddings = inputs_embeds + position_embeddings + token_type_embeddings
embeddings = self.LayerNorm(embeddings)
embeddings = self.dropout(embeddings)
return embeddings
def create_position_ids_from_inputs_embeds(self, inputs_embeds):
"""We are provided embeddings directly. We cannot infer which are padded so just generate
sequential position ids.
:param torch.Tensor inputs_embeds:
:return torch.Tensor:
"""
input_shape = inputs_embeds.size()[:-1]
sequence_length = input_shape[1]
position_ids = torch.arange(
self.padding_idx + 1, sequence_length + self.padding_idx + 1, dtype=torch.long, device=inputs_embeds.device
)
return position_ids.unsqueeze(0).expand(input_shape)
# Copied from transformers.modeling_bert.BertSelfAttention with Bert->Roberta
class RobertaSelfAttention(nn.Module):
def __init__(self, config):
super().__init__()
if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size