知识蒸馏——代码实现

更多内容请了解:
知识蒸馏——基础知识
知识蒸馏——学生模型
知识蒸馏——代码实现
知识蒸馏——讨论区

此代码实现为时间序列模型的知识蒸馏,最基础的实现,为教学指导。
数据集链接:AirQualityUCI.zip
输出结果:

Teacher Model MSE: 0.0026281994104216705
Student Model MSE: 0.002785646417517275
Teacher Model Training Time: 18.37 seconds
Student Model Training Time: 9.77 seconds
Teacher Model Memory Allocated: 37.50 MB
Student Model Memory Allocated: 28.73 MB

在这里插入图片描述
代码过程:

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
import time
def load_and_preprocess_data(file_path, seq_length=24):
    # 读取数据
    df = pd.read_csv(file_path, sep=';', decimal=',', na_values=-200)

    # 合并日期和时间列,并手动解析
    df['datetime'] = pd.to_datetime(df['Date'] + ' ' + df['Time'], format='%d/%m/%Y %H.%M.%S')

    # 处理缺失值和异常值
    df['CO(GT)'].replace(to_replace=-200, value=np.nan, inplace=True)
    df.dropna(subset=['CO(GT)'], inplace=True)

    # 选择需要的列并按日期排序
    df = df[['datetime', 'CO(GT)']]
    df.sort_values('datetime', inplace=True)
    df.set_index('datetime', inplace=True)

    # 数据归一化
    scaler = MinMaxScaler(feature_range=(0, 1))
    df_scaled = scaler.fit_transform(df)

    # 准备序列数据
    def create_sequences(data, seq_length):
        xs, ys = [], []
        for i in range(len(data) - seq_length):
            x = data[i:i + seq_length]
            y = data[i + seq_length]
            xs.append(x)
            ys.append(y)
        return np.array(xs), np.array(ys)

    X, y = create_sequences(df_scaled, seq_length)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

    return X_train, X_test, y_train, y_test, scaler
class TeacherModel(nn.Module):
    def __init__(self):
        super(TeacherModel, self).__init__()
        self.lstm1 = nn.LSTM(input_size=1, hidden_size=128, batch_first=True)
        self.lstm2 = nn.LSTM(input_size=128, hidden_size=64, batch_first=True)
        self.fc = nn.Linear(64, 1)

    def forward(self, x):
        x, _ = self.lstm1(x)
        x, _ = self.lstm2(x)
        x = self.fc(x[:, -1, :])
        return x
class StudentModel(nn.Module):
    def __init__(self):
        super(StudentModel, self).__init__()
        self.lstm = nn.LSTM(input_size=1, hidden_size=32, batch_first=True)
        self.fc = nn.Linear(32, 1)

    def forward(self, x):
        x, _ = self.lstm(x)
        x = self.fc(x[:, -1, :])
        return x
def train_teacher_model(X_train, y_train, X_test, y_test, seq_length=24, epochs=20, batch_size=32, device='cpu'):
    # 转换为张量
    X_train_t = torch.tensor(X_train, dtype=torch.float32).view(-1, seq_length, 1).to(device)
    y_train_t = torch.tensor(y_train, dtype=torch.float32).to(device)
    X_test_t = torch.tensor(X_test, dtype=torch.float32).view(-1, seq_length, 1).to(device)
    y_test_t = torch.tensor(y_test, dtype=torch.float32).to(device)

    # 创建数据加载器
    train_dataset = TensorDataset(X_train_t, y_train_t)
    test_dataset = TensorDataset(X_test_t, y_test_t)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    # 定义模型、损失函数和优化器
    model = TeacherModel().to(device)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    # 记录训练时间和内存开销
    start_time = time.time()
    if device == 'cuda':
        torch.cuda.reset_peak_memory_stats()

    # 训练模型
    for epoch in range(epochs):
        model.train()
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()

        # 评估模型
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for X_batch, y_batch in test_loader:
                outputs = model(X_batch)
                val_loss += criterion(outputs, y_batch).item()
        val_loss /= len(test_loader)
        print(f"Epoch {epoch + 1}/{epochs}, Validation Loss: {val_loss}")

    training_time = time.time() - start_time
    memory_allocated = torch.cuda.max_memory_allocated() if device == 'cuda' else 0

    return model, training_time, memory_allocated
def train_student_model(X_train, y_train, teacher_model, X_test, y_test, seq_length=24, epochs=20, batch_size=32, alpha=0.5, device='cpu'):
    # 转换为张量
    X_train_t = torch.tensor(X_train, dtype=torch.float32).view(-1, seq_length, 1).to(device)
    y_train_t = torch.tensor(y_train, dtype=torch.float32).to(device)
    X_test_t = torch.tensor(X_test, dtype=torch.float32).view(-1, seq_length, 1).to(device)
    y_test_t = torch.tensor(y_test, dtype=torch.float32).to(device)

    # 创建数据加载器
    train_dataset = TensorDataset(X_train_t, y_train_t)
    test_dataset = TensorDataset(X_test_t, y_test_t)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    # 获取教师模型的软标签
    teacher_model.eval()
    with torch.no_grad():
        teacher_predictions = teacher_model(X_train_t)

    # 定义模型、损失函数和优化器
    model = StudentModel().to(device)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    def distillation_loss(y_true, y_pred, teacher_pred, alpha):
        return alpha * criterion(y_true, y_pred) + (1 - alpha) * criterion(teacher_pred, y_pred)

    # 记录训练时间和内存开销
    start_time = time.time()
    if device == 'cuda':
        torch.cuda.reset_peak_memory_stats()

    # 训练模型
    for epoch in range(epochs):
        model.train()
        for i in range(0, len(X_train_t), batch_size):
            X_batch = X_train_t[i:i + batch_size]
            y_batch = y_train_t[i:i + batch_size]
            teacher_batch = teacher_predictions[i:i + batch_size]

            optimizer.zero_grad()
            outputs = model(X_batch)
            loss = distillation_loss(y_batch, outputs, teacher_batch, alpha)
            loss.backward()
            optimizer.step()

        # 评估模型
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for X_batch, y_batch in test_loader:
                outputs = model(X_batch)
                val_loss += criterion(outputs, y_batch).item()
        val_loss /= len(test_loader)
        print(f"Epoch {epoch + 1}/{epochs}, Validation Loss: {val_loss}")

    training_time = time.time() - start_time
    memory_allocated = torch.cuda.max_memory_allocated() if device == 'cuda' else 0

    return model, training_time, memory_allocated
def evaluate_model(model, X_test, y_test, scaler, seq_length=24, device='cpu'):
    model.eval()
    X_test_t = torch.tensor(X_test, dtype=torch.float32).view(-1, seq_length, 1).to(device)
    y_test_t = torch.tensor(y_test, dtype=torch.float32).to(device)
    with torch.no_grad():
        y_pred = model(X_test_t).cpu().numpy()

    mse = mean_squared_error(y_test, y_pred)
    y_test_inv = scaler.inverse_transform(y_test.reshape(-1, 1))
    y_pred_inv = scaler.inverse_transform(y_pred)
    return mse, y_test_inv, y_pred_inv
def plot_predictions(y_test_inv, y_pred_teacher_inv, y_pred_student_inv):
    plt.figure(figsize=(14, 7))
    plt.plot(y_test_inv, label='True')
    plt.plot(y_pred_teacher_inv, label='Teacher Prediction', alpha=0.7)
    plt.plot(y_pred_student_inv, label='Student Prediction', alpha=0.7)
    plt.legend()
    plt.show()
def main():
    device = 'cuda' if torch.cuda.is_available() else 'cpu'

    # 加载和预处理数据
    csv_file = 'AirQualityUCI.csv'
    X_train, X_test, y_train, y_test, scaler = load_and_preprocess_data(csv_file)

    # 训练教师模型
    teacher_model, teacher_time, teacher_memory = train_teacher_model(X_train, y_train, X_test, y_test, device=device)

    # 训练学生模型
    student_model, student_time, student_memory = train_student_model(X_train, y_train, teacher_model, X_test, y_test, device=device)

    # 评估模型
    mse_teacher, y_test_inv, y_pred_teacher_inv = evaluate_model(teacher_model, X_test, y_test, scaler, device=device)
    mse_student, _, y_pred_student_inv = evaluate_model(student_model, X_test, y_test, scaler, device=device)

    print(f"Teacher Model MSE: {mse_teacher}")
    print(f"Student Model MSE: {mse_student}")
    print(f"Teacher Model Training Time: {teacher_time:.2f} seconds")
    print(f"Student Model Training Time: {student_time:.2f} seconds")
    print(f"Teacher Model Memory Allocated: {teacher_memory / 1024 ** 2:.2f} MB")
    print(f"Student Model Memory Allocated: {student_memory / 1024 ** 2:.2f} MB")

    # 结果可视化
    plot_predictions(y_test_inv, y_pred_teacher_inv, y_pred_student_inv)


if __name__ == '__main__':
    main()
  • 10
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

机智的小神仙儿

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值