基于transformer的回归预测

import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Layer, Dense, Dropout, LayerNormalization
from tensorflow.keras import Model
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder

# 位置编码
class PositionEmbedding(Layer):
    def __init__(self, max_len, embedding_dim, **kwargs):
        super(PositionEmbedding, self).__init__(**kwargs)
        self.pos_encoding = self.positional_encoding(max_len, embedding_dim)

    def get_angles(self, pos, i, embedding_dim):
        angles = 1 / np.power(10000, (2 * (i // 2)) / np.float32(embedding_dim))
        return pos * angles

    def positional_encoding(self, max_len, embedding_dim):
        angle_rads = self.get_angles(np.arange(max_len)[:, np.newaxis],
                                     np.arange(embedding_dim)[np.newaxis, :],
                                     embedding_dim)
        sines = np.sin(angle_rads[:, 0::2])
        cosines = np.cos(angle_rads[:, 1::2])

        pos_encoding = np.concatenate([sines, cosines], axis=-1)
        pos_encoding = pos_encoding[np.newaxis, ...]

        return tf.cast(pos_encoding, dtype=tf.float32)

    def call(self, inputs):
        pos_encoding = tf.cast(self.pos_encoding, dtype=inputs.dtype)
        return inputs + pos_encoding[:, :tf.shape(inputs)[1], :]

# 多头注意力层
class MultiHeadAttentionLayer(Layer):
    def __init__(self, embedding_dim, num_heads, **kwargs):
        super(MultiHeadAttentionLayer, self).__init__(**kwargs)
        self.num_heads = num_heads
        self.embedding_dim = embedding_dim

        self.query_dense = Dense(embedding_dim)
        self.key_dense = Dense(embedding_dim)
        self.value_dense = Dense(embedding_dim)
        self.combine_heads = Dense(embedding_dim)

    def attention(self, query, key, value):
        score = tf.matmul(query, key, transpose_b=True)
        dim_key = tf.cast(tf.shape(key)[-1], tf.float32)
        scaled_score = score / tf.math.sqrt(dim_key)
        weights = tf.nn.softmax(scaled_score, axis=-1)
        output = tf.matmul(weights, value)
        return output, weights

    def separate_heads(self, x, batch_size):
        x = tf.reshape(x, (batch_size, -1, self.num_heads, self.embedding_dim // self.num_heads))
        return tf.transpose(x, perm=[0, 2, 1, 3])

    def call(self, inputs):
        batch_size = tf.shape(inputs)[0]
        query = self.query_dense(inputs)
        key = self.key_dense(inputs)
        value = self.value_dense(inputs)

        query = self.separate_heads(query, batch_size)
        key = self.separate_heads(key, batch_size)
        value = self.separate_heads(value, batch_size)

        attention_output, _ = self.attention(query, key, value)
        attention_output = tf.transpose(attention_output, perm=[0, 2, 1, 3])
        attention_output = tf.reshape(attention_output, (batch_size, -1, self.embedding_dim))
        attention_output = self.combine_heads(attention_output)
        return attention_output

# 前馈网络层
class FeedForwardNetwork(Layer):
    def __init__(self, embedding_dim, ff_dim, **kwargs):
        super(FeedForwardNetwork, self).__init__(**kwargs)
        self.dense1 = Dense(ff_dim, activation='relu')
        self.dense2 = Dense(embedding_dim)

    def call(self, inputs):
        x = self.dense1(inputs)
        x = self.dense2(x)
        return x

# 编码器层
class EncoderLayer(Layer):
    def __init__(self, embedding_dim, num_heads, ff_dim, **kwargs):
        super(EncoderLayer, self).__init__(**kwargs)
        self.attention = MultiHeadAttentionLayer(embedding_dim, num_heads)
        self.ffn = FeedForwardNetwork(embedding_dim, ff_dim)
        self.layernorm1 = LayerNormalization(epsilon=1e-6)
        self.layernorm2 = LayerNormalization(epsilon=1e-6)
        self.dropout1 = Dropout(0.1)
        self.dropout2 = Dropout(0.1)

    def call(self, inputs, training):
        attn_output = self.attention(inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)

        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        out2 = self.layernorm2(out1 + ffn_output)
        return out2

# 完整的Transformer模型
class Transformer(Model):
    def __init__(self, num_layers, embedding_dim, num_heads, ff_dim, input_dim, max_len, num_classes, **kwargs):
        super(Transformer, self).__init__(**kwargs)
        self.embedding = Dense(embedding_dim)
        self.pos_encoding = PositionEmbedding(max_len, embedding_dim)
        self.enc_layers = [EncoderLayer(embedding_dim, num_heads, ff_dim) for _ in range(num_layers)]
        self.dropout = Dropout(0.1)
        self.flatten = Dense(num_classes, activation='softmax')

    def call(self, inputs, training):
        x = self.embedding(inputs)
        x = self.pos_encoding(x)
        x = self.dropout(x, training=training)

        for enc_layer in self.enc_layers:
            x = enc_layer(x, training)

        x = tf.reduce_mean(x, axis=1)
        x = self.flatten(x)
        return x

# 加载和预处理数据
url = 'https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv'
data = pd.read_csv(url)

def preprocess_data(data):
    data['Age'].fillna(data['Age'].median(), inplace=True)
    data['Embarked'].fillna(data['Embarked'].mode()[0], inplace=True)
    data['Fare'].fillna(data['Fare'].median(), inplace=True)
    data.drop(columns=['Cabin', 'Ticket', 'Name', 'PassengerId'], inplace=True)
    le = LabelEncoder()
    data['Sex'] = le.fit_transform(data['Sex'])
    data['Embarked'] = le.fit_transform(data['Embarked'])
    return data

data = preprocess_data(data)
X = data.drop(columns=['Survived'])
y = data['Survived']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# 将数据reshape为三维
X_train = X_train.reshape(X_train.shape[0], 1, X_train.shape[1])
X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])

# 定义模型参数
num_layers = 2
embedding_dim = 32
num_heads = 2
ff_dim = 64
input_dim = X_train.shape[2]  # 注意这里是输入数据的特征维度
max_len = X_train.shape[1]    # 这里是序列长度
num_classes = 2

# 构建和编译模型
transformer_model = Transformer(num_layers, embedding_dim, num_heads, ff_dim, input_dim, max_len, num_classes)
transformer_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# 训练模型
history = transformer_model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.2)

# 评估模型
test_loss, test_accuracy = transformer_model.evaluate(X_test, y_test)
print(f'Test Accuracy: {test_accuracy:.4f}')

# 进行预测
predictions = transformer_model.predict(X_test)
predictions = np.argmax(predictions, axis=1)

  • 2
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
Transformer回归预测是一种基于Transformer模型的机器学习方法,用于解决回归问题。Transformer模型是一种基于自注意力机制的神经网络模型,最初用于自然语言处理任务,如机器翻译和文本生成。但是,由于其强大的建模能力和并行计算的优势,Transformer模型也被应用于其他领域,包括回归预测。 在Transformer回归预测中,输入数据通常是一个向量或矩阵,表示待预测的特征。这些特征可以是时间序列数据、图像数据或其他类型的数据。Transformer模型通过多层的自注意力机制和前馈神经网络来学习输入数据之间的关系,并输出一个连续值作为预测结果。 与传统的回归方法相比,Transformer回归预测具有以下优势: 1. 并行计算:Transformer模型可以并行计算输入数据中不同位置的特征,从而加快训练和推理的速度。 2. 长程依赖建模:Transformer模型使用自注意力机制来捕捉输入数据中不同位置之间的长程依赖关系,有助于提高预测的准确性。 3. 可扩展性:Transformer模型可以通过增加层数和隐藏单元数来增加模型的容量,从而适应更复杂的回归任务。 然而,Transformer回归预测也存在一些挑战: 1. 数据量要求高:Transformer模型通常需要大量的训练数据来获得良好的性能,特别是在复杂的回归任务中。 2. 超参数选择:Transformer模型有许多超参数需要调整,如层数、隐藏单元数和学习率等,选择合适的超参数对于模型的性能至关重要。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值