【NLP】几种用于文本分类的网络架构

8 篇文章 0 订阅

以唐诗生成模型为例,模型输入一个长度为 input_length=20 的序列,编码维度为 emb_dim = 128 ,token 最大词数为 max_word=10000 搭建如下模型。

1. LSTM

# 开始搭建网络
from keras.models import Sequential
from keras.layers import Dense, Embedding, LSTM, Dropout
from keras.optimizers import Adam

model = Sequential()
model.add(Embedding(10000, 128, input_length=20))
model.add(LSTM(128, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(128))
model.add(Dropout(0.2))
model.add(Dense(10000, activation='softmax'))

# 编译模型
model.compile(loss='categorical_crossentropy', optimizer=Adam(learning_rate=0.001), metrics=['accuracy'])
model.summary()

在这里插入图片描述

2. TextCNN



model = Sequential()
# 建立一个 TextCNN 模型
model.add(Embedding(10000, 128, input_length=20))
model.add(Conv1D(64, 3, padding='same', activation='relu'))
model.add(Conv1D(32, 3, padding='same', activation='relu'))
model.add(Conv1D(16, 3, padding='same', activation='relu'))
model.add(Flatten())
model.add(Dense(10000, activation='softmax'))

# 编译模型
model.compile(loss='categorical_crossentropy', optimizer=Adam(learning_rate=0.001), metrics=['accuracy'])
model.summary()

在这里插入图片描述

3. Multi_head Layer 堆叠模型

# 开始搭建网络
from keras.models import Sequential , Model
from keras.layers import Dense, Embedding, LSTM, Dropout, MultiHeadAttention, Input, Flatten
from keras.optimizers import Adam

# 从 keras_nlp 导入位置编码层
from keras_nlp.layers import position_embedding 

inputs = Input(shape=(20,))
x = Embedding(10000, 128, input_length=20)(inputs)
pos = position_embedding.PositionEmbedding(sequence_length=20)(x)
x = x + pos
x = MultiHeadAttention(num_heads=3, key_dim=128)(x,x)
# x = Dropout(0.1)(x)
x = MultiHeadAttention(num_heads=3, key_dim=128)(x,x)
# x = Dropout(0.1)(x)
x = MultiHeadAttention(num_heads=3, key_dim=128)(x,x)
x = Flatten()(x)
x = Dense(10000, activation='softmax')(x)
# 建立模型
model = Model(inputs=inputs, outputs=x)
# 编译模型
model.compile(loss='categorical_crossentropy', optimizer=Adam(learning_rate=0.001), metrics=['accuracy'])
model.summary()

在这里插入图片描述

4. Transformer (Encoder)

# 开始搭建网络
from keras.models import Sequential , Model
from keras.layers import Dense, Embedding, LSTM, Dropout, MultiHeadAttention, Input, Flatten
from keras.optimizers import Adam
from keras import layers

# 从 keras_nlp 导入位置编码层
from keras_nlp.layers import position_embedding 
import keras
class TransformerBlock(layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super(TransformerBlock, self).__init__()
        self.att = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = keras.Sequential(
            [layers.Dense(ff_dim, activation="relu"), layers.Dense(embed_dim),]
        )
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = layers.Dropout(rate)
        self.dropout2 = layers.Dropout(rate)

    def call(self, inputs, training):
        attn_output = self.att(inputs, inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)

    def get_config(self):                               # 重写 get_config 方法,以便保存模型
            config = super().get_config().copy()
            config.update({
                'att': self.att,
                'ffn': self.ffn,
                'layernorm1': self.layernorm1,
                'layernorm2': self.layernorm2,
                'dropout1': self.dropout1,
                'dropout2': self.dropout2,
            })
            return config

inputs = Input(shape=(20,))
x = Embedding(10000, 128, input_length=20)(inputs)
pos = position_embedding.PositionEmbedding(sequence_length=20)(x)
x = x + pos

x = TransformerBlock(embed_dim=128, num_heads=8, ff_dim=256)(x)     # (None, 20, 128)

x = layers.GlobalAveragePooling1D()(x)                              # (None, 128)

x = Dense(10000, activation='softmax')(x)
# 建立模型
model = Model(inputs=inputs, outputs=x)
# 编译模型
model.compile(loss='categorical_crossentropy', optimizer=Adam(learning_rate=0.001), metrics=['accuracy'])
model.summary()

在这里插入图片描述

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值