注意力机制代码自己写的

一枚爱吃大蒜的程序员

于 2024-08-01 16:25:56 发布

阅读量304

点赞数 3

文章标签： python 机器学习深度学习

本文链接：https://blog.csdn.net/qiqi_ai_/article/details/140851135

版权

import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt

# 数据生成函数
def generate_synthetic_time_series(num_samples, seq_length):
    x = np.linspace(0, 4 * np.pi, num_samples)
    y = np.sin(x) + 0.1 * np.random.randn(num_samples)
    
    X, y_seq = [], []
    for i in range(num_samples - seq_length):
        X.append(y[i:i + seq_length])
        y_seq.append(y[i + seq_length])
    
    return np.array(X), np.array(y_seq)

# 模型定义
class Attention(nn.Module):
    def __init__(self, hidden_dim):
        super(Attention, self).__init__()
        self.Wa = nn.Linear(hidden_dim, hidden_dim)
        self.Ua = nn.Linear(hidden_dim, hidden_dim)
        self.Va = nn.Linear(hidden_dim, 1)

    def forward(self, hidden, encoder_outputs):
        scores = self.Va(torch.tanh(self.Wa(hidden) + self.Ua(encoder_outputs)))
        weights = torch.softmax(scores, dim=1)
        context = torch.bmm(weights, encoder_outputs)
        return context, weights

class LSTMWithAttention(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(LSTMWithAttention, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, batch_first=True)
        self.attention = Attention(hidden_dim)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        lstm_out, (hn, cn) = self.lstm(x)
        context, attn_weights = self.attention(hn[-1], lstm_out)
        out = self.fc(context.squeeze(1))
        return out, attn_weights

# 训练模型
def train_model(model, criterion, optimizer, X_train, y_train, num_epochs=10, batch_size=64):
    model.train()
    dataset = TensorDataset(torch.Tensor(X_train), torch.Tensor(y_train))
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
    
    for epoch in range(num_epochs):
        epoch_loss = 0
        for inputs, targets in dataloader:
            optimizer.zero_grad()
            outputs, _ = model(inputs)
            loss = criterion(outputs.squeeze(), targets)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
        print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {epoch_loss / len(dataloader)}')

# 预测
def predict(model, X):
    model.eval()
    with torch.no_grad():
        inputs = torch.Tensor(X)
        predictions, _ = model(inputs)
        return predictions.numpy()

# 评价指标
def evaluate(y_true, y_pred):
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    return mse, rmse

# 主程序
if __name__ == "__main__":
    # 生成数据
    num_samples = 1000
    seq_length = 10
    X, y = generate_synthetic_time_series(num_samples, seq_length)

    # 划分数据集
    split_index = int(0.8 * len(X))
    X_train, X_test = X[:split_index], X[split_index:]
    y_train, y_test = y[:split_index], y[split_index:]

    # 定义模型、损失函数和优化器
    input_dim = 1
    hidden_dim = 64
    output_dim = 1

    model = LSTMWithAttention(input_dim, hidden_dim, output_dim)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    # 训练模型
    train_model(model, criterion, optimizer, X_train, y_train)

    # 预测
    y_pred = predict(model, X_test)

    # 评价
    mse, rmse = evaluate(y_test, y_pred)
    print(f'MSE: {mse}, RMSE: {rmse}')

    # 可视化
    plt.figure(figsize=(12, 6))
    plt.plot(np.arange(seq_length, len(y_test) + seq_length), y_test, label='True Signal')
    plt.plot(np.arange(seq_length, len(y_test) + seq_length), y_pred, label='Predicted Signal', linestyle='--')
    plt.legend()
    plt.show()