CVAE

最新推荐文章于 2024-08-31 08:46:12 发布

Douzi1024

最新推荐文章于 2024-08-31 08:46:12 发布

阅读量1k

点赞数

文章标签： stack cuda derby rnn widget

本文链接：https://blog.csdn.net/Xiao_CangTian/article/details/122207361

版权

本文详细介绍了条件变分自编码器（CVAE）的工作原理，通过实例展示了如何结合stack、cuda进行高效实现，并探讨了其在序列数据处理如RNN中的应用，同时提及其在GUI开发中的潜力，如利用widget进行交互式展示。

摘要由CSDN通过智能技术生成

import torch
import torch.nn as nn
from sklearn.preprocessing import LabelBinarizer
from torch.nn import functional as F

# In[4]
class TextRNN(nn.Module):
    def __init__(self, 
                 input_size = 256, 
                 hidden_size = 128,
                 output_size = 768,
                 n_layers = 2,
                 dropout =  0.5,
                 args = None
                 ):
        super(TextRNN, self).__init__()
        self.rnn = nn.LSTM(input_size = input_size, 
                           hidden_size = hidden_size, 
                           num_layers = n_layers, 
                           bidirectional = True, 
                           batch_first = True, 
                           dropout = dropout)

        self.fc = nn.Linear(hidden_size*2, output_size)     # 双向RNN，且arg1, arg2
        self.dropout = nn.Dropout(dropout) 

    def forward(self, input_ids):
        # [8, 80, 300] 
        arg_out = self.dropout(input_ids)
        # out: [batch, seq_len, hidden_dim * 2]
        # hideen: [batch, num_layers * 2, hidden_dim]
        # cell/c: [batch, num_layers * 2, hidden_dim]
        arg_out, (_, _) = self.rnn(arg_out)                                

        out = self.fc(arg_out)                                         # [8, 2]   

        return out

# In[1]
class CVAEModel(nn.Module):
    def __init__(self, 
                input_size=256,
                hidden_size=256,
                output_size=768
                ):
        super(CVAEModel, self).__init__()

        # [8, 256, 768]
        self.rnn01 = TextRNN(input_size=input_size, hidden_size = 128, output_size = 768)
        self.rnn02 = TextRNN(input_size=input_size, hidden_size = 128, output_size = 768)
        # 768 -> 256
        self.fc11 = nn.Linear(hidden_size, hidden_size // 2)
        self.fc12 = nn.Linear(hidden_size, hidden_size // 2)

        self.fc21 = nn.Linear(hidden_size // 2, hidden_size) 
        self.fc22 = nn.Linear(hidden_size, hidden_size) 

        self.layernorm = nn.LayerNorm(hidden_size)
        self.lb = LabelBinarizer()
    
    # 将标签进行one-hot编码
    def to_categrical(self, y):
        device = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu')
        y_n = y.cpu().detach()
        self.lb.fit(list(range(0, 2)))
        y_one_hot = self.lb.transform(y_n)
        y_one_hot = torch.FloatTensor(y_one_hot).to(device)
        return y_one_hot

    def encode(self, x, y=None, Training=False):
        if Training:
            con = x
            # y_c = self.to_categrical(y)
            # y_c = y_c.unsqueeze(1)
            # # 输入样本和标签y的one-hot向量连接
            # con = con + y_c
            out = self.rnn01(con)
            return F.relu(self.fc11(out)), F.relu(self.fc12(out))
        else:
            return F.relu(self.rnn01(x))
        
    # 再参数化
    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        return eps.mul(std).add_(mu)

    def decode(self, z, y=None, Training=False):   
        con = z
        y_c = self.to_categrical(y)
        y_c = y_c.unsqueeze(1)
        con = con + y_c
        out = self.fc21(con)
        out = self.rnn02(out)  # 在这修改
        return F.relu(out)
        

    @classmethod
    def loss_function(cls, recon_x, x, mu, logvar):
        bz = x.shape[0]
        # print(recon_x.shape, x.shape)
        # recon_x, x = recon_x.view(bz, -1), x.view(-1)
        BCE = nn.MSELoss()(recon_x, x)
        # BCE = nn.CrossEntropyLoss(recon_x, x)
        # see Appendix B from VAE paper:
        # Kingma and Welling. Auto-Encoding Variational Bayes. ICLR, 2014
        # https://arxiv.org/abs/1312.6114
        # 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
        KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
        return BCE + KLD

    def forward(self, x, y=None, Training=False):   
        # 训练 CVAE
        if Training:
            # Encode 
            mu, logvar = self.encode(x, y, Training)
            # 再参数化
            z = self.reparameterize(mu, logvar)
            # Decode
            out = self.decode(z, y, Training)
            
            return out, mu, logvar
        else:
            out = self.encode(x)
            return out

# coding=utf-8
# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
# Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""PyTorch RoBERTa model. """

import math
import warnings

import torch
import torch.nn as nn
from torch.nn import CrossEntropyLoss, MSELoss

from transformers.activations import ACT2FN, gelu
from transformers.configuration_roberta import RobertaConfig
from transformers.file_utils import (
    add_code_sample_docstrings,
    add_start_docstrings,
    add_start_docstrings_to_callable,
    replace_return_docstrings,
)
from transformers.modeling_outputs import (
    BaseModelOutput,
    BaseModelOutputWithPooling,
    CausalLMOutput,
    MaskedLMOutput,
    MultipleChoiceModelOutput,
    QuestionAnsweringModelOutput,
    SequenceClassifierOutput,
    TokenClassifierOutput,
)
from transformers.modeling_utils import (
    PreTrainedModel,
    apply_chunking_to_forward,
    find_pruneable_heads_and_indices,
    prune_linear_layer,
)

from .CVAEModel import CVAEModel
from .Attention import AttentionInArgs
from .GATModel import GAT

import logging

logger = logging.getLogger(__name__)

_CONFIG_FOR_DOC = "RobertaConfig"
_TOKENIZER_FOR_DOC = "RobertaTokenizer"

ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST = [
    "roberta-base",
    "roberta-large",
    "roberta-large-mnli",
    "distilroberta-base",
    "roberta-base-openai-detector",
    "roberta-large-openai-detector",
    # See all RoBERTa models at https://huggingface.co/models?filter=roberta
]


class RobertaEmbeddings(nn.Module):
    """
    Same as BertEmbeddings with a tiny tweak for positional embeddings indexing.
    """

    # Copied from transformers.modeling_bert.BertEmbeddings.__init__
    def __init__(self, config):
        super().__init__()
        self.word_embeddings = nn.Embedding(config.vocab_size, config.hidden_size, padding_idx=config.pad_token_id)
        self.position_embeddings = nn.Embedding(config.max_position_embeddings, config.hidden_size)
        self.token_type_embeddings = nn.Embedding(config.type_vocab_size, config.hidden_size)

        # self.LayerNorm is not snake-cased to stick with TensorFlow model variable name and be able to load
        # any TensorFlow checkpoint file
        self.LayerNorm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)

        # position_ids (1, len position emb) is contiguous in memory and exported when serialized
        self.register_buffer("position_ids", torch.arange(config.max_position_embeddings).expand((1, -1)))

        # End copy
        self.padding_idx = config.pad_token_id
        self.position_embeddings = nn.Embedding(
            config.max_position_embeddings, config.hidden_size, padding_idx=self.padding_idx
        )

    def forward(self, input_ids=None, token_type_ids=None, position_ids=None, inputs_embeds=None):
        if position_ids is None:
            if input_ids is not None:
                # Create the position ids from the input token ids. Any padded tokens remain padded.
                position_ids = create_position_ids_from_input_ids(input_ids, self.padding_idx).to(input_ids.device)
            else:
                position_ids = self.create_position_ids_from_inputs_embeds(inputs_embeds)

        # Copied from transformers.modeling_bert.BertEmbeddings.forward
        if input_ids is not None:
            input_shape = input_ids.size()
        else:
            input_shape = inputs_embeds.size()[:-1]

        seq_length = input_shape[1]

        if position_ids is None:
            position_ids = self.position_ids[:, :seq_length]

        if token_type_ids is None:
            token_type_ids = torch.zeros(input_shape, dtype=torch.long, device=self.position_ids.device)

        if inputs_embeds is None:
            inputs_embeds = self.word_embeddings(input_ids)
        position_embeddings = self.position_embeddings(position_ids)
        token_type_embeddings = self.token_type_embeddings(token_type_ids)

        embeddings = inputs_embeds + position_embeddings + token_type_embeddings
        embeddings = self.LayerNorm(embeddings)
        embeddings = self.dropout(embeddings)
        return embeddings

    def create_position_ids_from_inputs_embeds(self, inputs_embeds):
        """We are provided embeddings directly. We cannot infer which are padded so just generate
        sequential position ids.

        :param torch.Tensor inputs_embeds:
        :return torch.Tensor:
        """
        input_shape = inputs_embeds.size()[:-1]
        sequence_length = input_shape[1]

        position_ids = torch.arange(
            self.padding_idx + 1, sequence_length + self.padding_idx + 1, dtype=torch.long, device=inputs_embeds.device
        )
        return position_ids.unsqueeze(0).expand(input_shape)


# Copied from transformers.modeling_bert.BertSelfAttention with Bert->Roberta
class RobertaSelfAttention(nn.Module):
    def __init__(self, config):
        super().__init__()
        if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size