刘二大人《PyTorch深度学习实践》作业

享受这时光

已于 2024-06-14 21:21:43 修改

阅读量697

点赞数 10

文章标签：深度学习 pytorch 人工智能

于 2023-12-06 18:02:56 首次发布

本文链接：https://blog.csdn.net/qq_39804263/article/details/134837911

版权

P1 作业
代码：梯度下降法
代码：随机梯度下降
代码：反向传播
代码：反向传播
P5 作业：不同优化器比较
代码：Logistic回归
P7 作业：不同激活函数比较
代码：Dataloader
P8 作业：Kaggle数据集加载、训练和测试
代码：MNIST多分类任务
P9 作业：Kaggle Otto Group Product Classification Challenge

P1 作业

import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

x_data = [1.0, 2.0, 3.0]
y_data = [3.0, 4.0, 6.0]

def forward(x, w, b):
    return x * w + b

def loss(x, y, w, b):
    y_pred = forward(x, w, b)
    loss = (y_pred - y) ** 2
    return loss

w_list = np.arange(0.0, 4.1, 0.1)
b_list = np.arange(-2.0, 2.1, 0.1)
# mse_matrix用于存储不同 w,b 组合下的均方误差损失
mse_matrix = np.zeros((len(w_list), len(b_list)))

for i, w in enumerate(w_list):
    for j, b in enumerate(b_list):
        l_sum = 0
        for x_val, y_val in zip(x_data, y_data):
            l_sum += loss(x_val, y_val, w, b)
        mse_matrix[i, j]= l_sum/len(x_data)
    
W, B = np.meshgrid(w_list, b_list)
fig = plt.figure('Linear Model Cost Value')
ax = fig.add_subplot(111, projection='3d')
ax.plot_surface(W, B, mse_matrix.T, cmap='viridis')

ax.set_xlabel('w')
ax.set_ylabel('b')
ax.set_zlabel('loss')

plt.show()

在这里插入图片描述

代码：梯度下降法

import numpy as np
import matplotlib.pyplot as plt

x_data = [1.0, 2.0, 3.0]
y_data = [2.0, 4.0, 6.0]

w = 1.0
learning_rate = 0.01

def forward(x, w):
    return x * w

def cost_fuction(xs, ys, w):
    cost = 0
    for x, y in zip(xs, ys):
        y_pred = forward(x, w)
        cost += (y_pred - y) ** 2
    return cost / len(xs)

def gradient(xs, ys, w):
    grad = 0
    for x, y in zip(xs, ys):
        grad += 2 * x * (x * w - y)
    return grad / len(xs)

print('predict (before training)', 4, forward(4, w))

epoch_list = []
cost_val_list = []

for epoch in range(100):
    cost_val = cost_fuction(x_data, y_data, w)
    grad_val = gradient(x_data, y_data, w)
    w -= learning_rate * grad_val
    print('Epoch: ', epoch, 'w=', w, 'loss=', cost_val)
    epoch_list.append(epoch)
    cost_val_list.append(cost_val)
print('predict (after training)', 4, forward(4, w))

plt.plot(epoch_list, cost_val_list)
plt.xlabel('epoch')
plt.ylabel('cost val')
plt.show()

在这里插入图片描述

代码：随机梯度下降

import matplotlib.pyplot as plt

x_data = [1.0, 2.0, 3.0]
y_data = [2.0, 4.0, 6.0]

w = 1.0
learning_rate = 0.01

def forward(x, w):
    return x * w

def loss(x, y, w):
    y_pred = forward(x, w)
    loss = (y - y_pred) ** 2
    return loss

def gradient(x, y, w):
    return 2 * x * (x * w - y)

print('predict (before training)', 4, forward(4, w))

epoch_list = []
loss_list = []

for epoch in range(100):
    for x, y in zip(x_data, y_data):
        # 拿到一个样本就更新了
        grad = gradient(x, y, w)
        w = w - learning_rate * grad
        print('\tgrad: ', x, y, grad)
        l = loss(x, y, w)
    print('process: ', epoch, "w=", w, 'loss=', l)
    epoch_list.append(epoch)
    loss_list.append(l)
print('predict (after training)', 4, forward(4, w))

plt.plot(epoch_list, loss_list)
plt.xlabel('epoch')
plt.ylabel('loss')
plt.show()

在这里插入图片描述

代码：反向传播

# 如果是复杂的网络，没办法都自己写gradient的计算。
import torch
import matplotlib.pyplot as plt

x_data = [1.0, 2.0, 3.0]
y_data = [2.0, 4.0, 6.0]

w = torch.Tensor([1.0])
w.requires_grad = True

def forward(x, w):
    return x * w

def loss(x, y, w):
    y_pred = forward(x, w)
    loss = (y - y_pred) ** 2
    return loss

print('predict (before training)', 4, forward(4, w.item()))

epoch_list = []
loss_list = []

for epoch in range(100):
    for x, y in zip(x_data, y_data):
        l = loss(x, y, w)
        l.backward()
        print('\tgrad:', x, y, w.grad.item())
        w.data = w.data - 0.01 * w.grad.data
        w.grad.data.zero_()
    print('process:', epoch, l.item())
    epoch_list.append(epoch)
    loss_list.append(l.item())

print('predict (after training)', 4, forward(4, w))

plt.plot(epoch_list, loss_list)
plt.xlabel('epoch')
plt.ylabel('loss')
plt.show()

在这里插入图片描述

代码：反向传播

# 如果是复杂的网络，没办法都自己写gradient的计算。
import torch
import matplotlib.pyplot as plt

x_data = [1.0, 2.0, 3.0]
y_data = [2.0, 4.0, 6.0]

w = torch.Tensor([1.0])
w.requires_grad = True

def forward(x, w):
    return x * w

def loss(x, y, w):
    y_pred = forward(x, w)
    loss = (y - y_pred) ** 2
    return loss

print('predict (before training)', 4, forward(4, w.item()))

epoch_list = []
loss_list = []

for epoch in range(100):
    for x, y in zip(x_data, y_data):
        l = loss(x, y, w)
        l.backward()
        print('\tgrad:', x, y, w.grad.item())
        w.data = w.data - 0.01 * w.grad.data
        w.grad.data.zero_()
    print('process:', epoch, l.item())
    epoch_list.append(epoch)
    loss_list.append(l.item())

print('predict (after training)', 4, forward(4, w))

plt.plot(epoch_list, loss_list)
plt.xlabel('epoch')
plt.ylabel('loss')
plt.show()

在这里插入图片描述

P5 作业：不同优化器比较

import torch
import matplotlib.pyplot as plt

# step 1: Prepare Dataset
x_data = torch.Tensor([[1.0], [2.0], [3.0]])
y_data = torch.Tensor([[2.0], [4.0], [6.0]])

# step 2: Design Model
class LinearModel(torch.nn.Module):
    def __init__(self):
        super(LinearModel, self).__init__()
        self.linear = torch.nn.Linear(1, 1, bias=True)
    
    def forward(self, x):
        y_pred = self.linear(x)
        return y_pred

models = {
    'SGD': LinearModel(),
    'Adam': LinearModel(),
    'Adagrad': LinearModel(),
    'Adamax': LinearModel(),
    'ASGD': LinearModel(),
    'RMSprop': LinearModel(),
    'Rprop': LinearModel(),
}

# step 3: Donstruct Loss and Optimizer
criterion = torch.nn.MSELoss(size_average=False)
optimizer = {
    'SGD': torch.optim.SGD(models['SGD'].parameters(), lr=0.01),
    'Adam': torch.optim.Adam(models['Adam'].parameters(), lr=0.01),
    'Adagrad': torch.optim.Adagrad(models['Adagrad'].parameters(), lr=0.01),
    'Adamax': torch.optim.Adamax(models['Adamax'].parameters(), lr=0.01),
    'ASGD': torch.optim.ASGD(models['ASGD'].parameters(), lr=0.01),
    'RMSprop': torch.optim.RMSprop(models['RMSprop'].parameters(), lr=0.01),
    'Rprop': torch.optim.Rprop(models['RMSprop'].parameters(), lr=0.01),
    }

loss_values = {k: [] for k in optimizer.keys()}

# step 4: Training Cycle
for opt_name, optimizer in optimizer.items():
    model = models[opt_name]
    for epoch in range(100):
        y_pred = model(x_data)              # forward predict
        loss = criterion(y_pred, y_data)    # forward loss
        optimizer.zero_grad()               # set the grad to zero
        loss.backward()                     # backward
        optimizer.step()                    # update

        loss_values[opt_name].append(loss.item())

plt.figure(figsize=(10, 5))
for opt_name, losses in loss_values.items():
    plt.plot(losses, label=opt_name)

plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.title("Loss by Optimization Algorithm")
plt.show()

在这里插入图片描述

代码：Logistic回归

import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt

x_data = torch.Tensor([[1.0], [2.0], [3.0]])
y_data = torch.Tensor([[0], [0], [1]])

class Logistic_Regression_Model(torch.nn.Module):
    def __init__(self):
        super(Logistic_Regression_Model, self).__init__()
        self.linear = torch.nn.Linear(1, 1, bias=True)

    def forward(self, x):
        y_pred = F.sigmoid(self.linear(x))
        return y_pred
    
models = {
    'SGD': Logistic_Regression_Model(),
    'Adam': Logistic_Regression_Model(),
    'Adagrad': Logistic_Regression_Model(),
    'Adamax': Logistic_Regression_Model(),
    'ASGD': Logistic_Regression_Model(),
    'RMSprop': Logistic_Regression_Model(),
    'Rprop': Logistic_Regression_Model(),
}

criterion = torch.nn.BCELoss(size_average=False)
optimizers = {
    'SGD': torch.optim.SGD(models['SGD'].parameters(), lr=0.01),
    'Adam': torch.optim.Adam(models['Adam'].parameters(), lr=0.01),
    'Adagrad': torch.optim.Adagrad(models['Adagrad'].parameters(), lr=0.01),
    'Adamax': torch.optim.Adamax(models['Adamax'].parameters(), lr=0.01),
    'ASGD': torch.optim.ASGD(models['ASGD'].parameters(), lr=0.01),
    'RMSprop': torch.optim.RMSprop(models['RMSprop'].parameters(), lr=0.01),
    'Rprop': torch.optim.Rprop(models['RMSprop'].parameters(), lr=0.01),
    }

loss_values = {k: [] for k in optimizers.keys()}

for opt_name, optimizer in optimizers.items():
    model = models[opt_name]
    for epoch in range(1000):
        y_pred = model(x_data)
        loss = criterion(y_pred, y_data)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        loss_values[opt_name].append(loss.item())

plt.figure(figsize=(10, 5))

for opt_name, losses in loss_values.items():
    plt.plot(losses, label=opt_name)

plt.xlabel('Epoch')
plt.ylabel("Loss")
plt.legend()
plt.title("Loss by Optimization Algorithm")
plt.show()

在这里插入图片描述

P7 作业：不同激活函数比较

import torch
import numpy as np
import matplotlib.pyplot as plt

# 读取压缩包 np.loadtxt， delimier：分隔符
xy = np.loadtxt('PyTorch深度学习实践/diabetes.csv.gz', delimiter=',', dtype=np.float32)
x_data = torch.from_numpy(xy[:, :-1])
y_data = torch.from_numpy(xy[:, [-1]])

class Model(torch.nn.Module):
    def __init__(self, activation_fn=torch.nn.Sigmoid()):
        super(Model, self).__init__()
        self.linear1 = torch.nn.Linear(8, 6)
        self.linear2 = torch.nn.Linear(6, 4)
        self.linear3 = torch.nn.Linear(4, 1)
        self.activation_fn = activation_fn
    def forward(self, x):
        x = self.activation_fn(self.linear1(x))
        x = self.activation_fn(self.linear2(x))
        x = torch.sigmoid(self.linear3(x))
        return x

activation_fns = {
    'Sigmoid': torch.nn.Sigmoid(),
    'ReLU': torch.nn.ReLU(),
    'Tanh': torch.nn.Tanh(),
    'Softplus': torch.nn.Softplus(),
}

criterion = torch.nn.BCELoss(reduction='mean')

loss_values = {k: [] for k in activation_fns.keys()}

for activation_name, activation_fn in activation_fns.items():
    model = Model(activation_fn=activation_fn)
    optimizer = torch.optim.SGD(model.parameters(), lr=0.1)

    for epoch in range(100):
        y_pred = model(x_data)
        loss = criterion(y_pred, y_data)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        loss_values[activation_name].append(loss.item())

plt.figure(figsize=(10, 5))

for activation_name, losses in loss_values.items():
    plt.plot(losses, label=activation_name)

plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Loss by Sigmoid Algorithm')
plt.legend()
plt.show()

在这里插入图片描述

代码：Dataloader

import torch
import numpy as np
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.utils.data import random_split
import matplotlib.pyplot as plt

class Diabetes_Dataset(Dataset):
    def __init__(self, filepath):
        super(Diabetes_Dataset, self).__init__()

        # 读取数据
        xy = np.loadtxt(filepath, delimiter=',', dtype=np.float32)
        self.len = xy.shape[0]
        self.x_data = torch.from_numpy(xy[:, :-1])
        self.y_data = torch.from_numpy(xy[:, [-1]])
    
    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index]
    
    def __len__(self):
        return self.len
    
class Logistic_Model(torch.nn.Module):
    def __init__(self):
        super(Logistic_Model, self).__init__()
        self.linear1 = torch.nn.Linear(8, 6)
        self.linear2 = torch.nn.Linear(6, 4)
        self.linear3 = torch.nn.Linear(4, 1)
        self.activate = torch.nn.ReLU()
        
    def forward(self, x):
        x = self.activate(self.linear1(x))
        x = self.activate(self.linear2(x))
        x = torch.sigmoid(self.linear3(x))
        return x
    
# 加载数据集并划分为训练集和测试集
dataset = Diabetes_Dataset('PyTorch深度学习实践/diabetes.csv.gz')
train_size = int(len(dataset) * 0.8)
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

train_loader = DataLoader(dataset=train_dataset, batch_size=32, shuffle=True, num_workers=2)
test_loader = DataLoader(dataset=test_dataset, batch_size=32, shuffle=False, num_workers=2)

# 初始化模型、损失函数和优化器
model = Logistic_Model()
criterion = torch.nn.BCELoss(reduction='mean')
optimizer = torch.optim.SGD(model.parameters(), momentum=0.1, weight_decay=0.9, lr=0.01)

# 训练模型
epoch_list = []
loss_list = []
for epoch in range(100):
    total_loss = 0.0
    for i, data in enumerate(train_loader):
        inputs, labels = data
        y_pred = model(inputs)
        loss = criterion(y_pred, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    average_loss = total_loss / len(train_loader)
    epoch_list.append(epoch)
    loss_list.append(average_loss)
    print("Epoch:", epoch, "average loss:", average_loss)

# 可视化训练过程
plt.figure(figsize=(10, 5))
plt.plot(epoch_list, loss_list)
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Dataset & Dataloader')
plt.show()

# 测试模型
model.eval()
correct = 0
total = 0

with torch.no_grad():
    for inputs, labels in test_loader:
        outputs = model(inputs)
        predicted = (outputs > 0.5).float()  # 将输出概率转换为0或1
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f'Accuracy: {accuracy:.2f}%')

在这里插入图片描述

P8 作业：Kaggle数据集加载、训练和测试

数据下载地址：https://www.kaggle.com/c/titanic/data

当评估泰坦尼克号数据集中的特征与生还可能性的关联时，我们可以基于历史资料、统计学习和领域知识来进行一些推测：

Pclass（乘客等级）: 社会经济地位是一个重要因素，高等级乘客（如1等舱）可能有更高的生还几率。
Name（名字）: 虽然名字本身可能与生还率无关，但可以从名字中提取称谓（如 Mr., Mrs., Miss.），这可能反映了性别、婚姻状况和社会地位。
Sex（性别）: 历史记录显示，女性和儿童在灾难中的生还率更高，因为他们通常会被优先疏散。
Age（年龄）: 同样，儿童和年轻人可能有更高的生还几率。
SibSp（兄弟姐妹/配偶数量）和 Parch（父母/子女数量）: 这些特征反映了家庭结构，家庭成员可能会互相帮助，影响生还率。然而，太大的家庭可能在疏散时遇到困难。
Ticket（船票信息）: 船票信息可能隐含着有用的信息，比如团体旅行或位置信息，但这需要更深入的分析来决定其相关性。
Fare（票价）: 票价可能与 Pclass 相关，较高的票价可能意味着更高的社会经济地位和更高的生还几率。
Cabin（船舱号）: 船舱号可能与船上的位置有关，一些位置在船沉时可能更安全或者更容易疏散。
Embarked（登船口）: 登船口可能是一个次要因素，但如果某些登船口的乘客普遍属于特定的社会经济群体，这可能会间接影响生还率。

import pandas as pd
import torch
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import LabelEncoder, StandardScaler
import matplotlib.pyplot as plt

# 自定义 PyTorch 数据集类，用于加载和预处理 Titanic 数据集
# 数据下载地址：https://www.kaggle.com/c/titanic/data
class TitanicDataset(Dataset):
    def __init__(self, filepath, scaler=None, is_train=True):
        super(TitanicDataset, self).__init__()

        # 初始化函数，读取 CSV 文件
        self.dataframe = pd.read_csv(filepath)
        self.scaler = scaler
        # 调用预处理函数来处理 DataFrame
        self.preprocess(self.dataframe, is_train)

    def preprocess(self, df, is_train):
        # 移除不需要的类别
        df.drop(['PassengerId', 'Name', 'Ticket', 'Cabin'], axis=1, inplace=True)
        
        # 处理缺失值
        df['Age'].fillna(df['Age'].mean(), inplace=True)                # Age 缺失的值用平均值来填充
        df['Fare'].fillna(df['Fare'].mean(), inplace=True)              # Fare 缺失的值用平均值来填充
        df['Embarked'].fillna(df['Embarked'].mode()[0], inplace=True)   # Embarked 缺失的值用众值来填充

        # 使用 LabelEncoder 来转换性别和登船口为数值形式
        # LabelEncoder 适用于将文本标签转换为一个范围从 0 到 n_classes-1 的数值。这种方法适用于转换具有顺序性的分类特征。例如“低”，“中”，“高”。
        label_encoder = LabelEncoder()
        df['Sex'] = label_encoder.fit_transform(df['Sex'])
        df['Embarked'] = label_encoder.fit_transform(df['Embarked'])

        # 与 LabelEncoder 不同，One-Hot 编码 创建了一个二进制列来表示每个类别，没有数值的大小意义。当分类特征的不同类别之间没有顺序或等级的概念时，通常使用独热编码。
        # 注意：要使用 One-Hot的话，input_features=10
        # df = pd.get_dummies(df, columns=['Sex', 'Embarked'])

        if is_train:
            # 如果是训练集，创建新的 StandardScaler，并进行 fit_transform, 来标准化 'Age' 和 'Fare' 列的数值
            # 如果特征的数值范围差异很大，那么算法可能会因为较大范围的特征而受到偏向，导致模型性能不佳。
            self.scaler = StandardScaler()
            df[['Age', 'Fare']] = self.scaler.fit_transform(df[['Age', 'Fare']])

            # 如果是训练数据，将 'Survived' 列作为标签
            self.labels = df['Survived'].values
            self.features = df.drop('Survived', axis=1).values

        else:
            # 如果是测试集，使用传入的 scaler 进行 transform
            df[['Age', 'Fare']] = self.scaler.transform(df[['Age', 'Fare']])

            # 对于测试数据，可能没有 'Survived' 列，因此特征就是整个 DataFrame
            self.features = df.values
            self.labels = None      # 标签设置为 None


    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, index):
        # 获取单个样本，包括特征和标签（如果有的话）
        # 如果有标签，同时返回特征和标签
        if self.labels is not None:
            return torch.tensor(self.features[index], dtype=torch.float), torch.tensor(self.labels[index], dtype=torch.float)
        # 对于没有标签的测试数据，返回一个占位符张量，例如大小为 1 的零张量
        else:
            return torch.tensor(self.features[index], dtype=torch.float), torch.zeros(1, dtype=torch.float)


# 自定义 二分类模型
class BinaryClassificationModel(torch.nn.Module):
    def __init__(self, input_features):
        super(BinaryClassificationModel, self).__init__()
        self.linear1 = torch.nn.Linear(input_features, 64)
        self.linear2 = torch.nn.Linear(64, 64)
        self.linear3 = torch.nn.Linear(64, 1)       

        # 定义 dropout 层，可以减少过拟合
        self.dropout = torch.nn.Dropout(p=0.1)

        # 定义 batchnorm层，帮助稳定学习过程
        self.batchnorm1 = torch.nn.BatchNorm1d(64)
        self.batchnorm2 = torch.nn.BatchNorm1d(64)
        
    def forward(self, x):
        x = F.relu(self.linear1(x))     # 第一层激活函数为 ReLU
        x = self.batchnorm1(x)          # 应用 batch normalization
        x = self.dropout(x)             # 应用 dropout

        x = F.relu(self.linear2(x))     # 第二层激活函数为 ReLU
        x = self.batchnorm2(x)          # 应用 batch normalization
        x = self.dropout(x)             # 应用 dropout

        x = self.linear3(x)             # 输出层
        return torch.sigmoid(x)         # 应用 sigmoid 激活函数

# 训练过程
def train(models, train_loader, criterion, optimizers, num_epochs):
    epoch_losses = {k: [] for k in optimizers.keys()}

    print('start training')

    for optim_name, optimizer in optimizers.items():
        model = models[optim_name]
        for epoch in range(num_epochs):
            model.train()
            running_loss = 0.0
            for batch_idx, (inputs, labels) in enumerate(train_loader):
                optimizer.zero_grad()                       # 梯度清零
                outputs = model(inputs)                     # 前向传播
                loss = criterion(outputs.squeeze(), labels) # 使用 squeeze 调整输出形状
                loss.backward()                             # 反向传播
                optimizer.step()                            # 更新权重
                # 乘以 inputs.size(0) 的目的是为了累积整个批次的总损失，而不仅仅是单个数据点的平均损失。
                # 调用 loss = criterion(outputs, labels) 时，计算的是当前批次中所有样本的平均损失。
                # 为了得到整个训练集上的总损失，我们需要将每个批次的平均损失乘以该批次中的样本数（inputs.size(0)）。
                # 这样做可以确保每个样本，无论它们属于哪个批次，对总损失的贡献都是平等的。
                running_loss += loss.item() * inputs.size(0)
            epoch_loss = running_loss / len(train_loader.dataset)
            print(f'Epoch {epoch+1}/{num_epochs} Loss: {epoch_loss:.4f}')
            epoch_losses[optim_name].append(epoch_loss)
    return epoch_losses

# 测试
def test(model, test_loader, optimizers):
    results = {}
    for optim_name, _ in optimizers.items():
        model = models[optim_name]
        model.eval()
        
        predictions = []
        with torch.no_grad():   # 不计算梯度，减少计算和内存消耗
            for inputs, _ in test_loader:
                outputs = model(inputs)
                # test没有标签，只输出结果
                predicted = (outputs > 0.5).float().squeeze()
                predictions.extend(predicted.tolist())  # 使用 extend 和 tolist 将 predicted 中的每个元素添加到 predictions
        print("Predict result: ", predictions)
        results[optim_name] = predictions
    return  results

    #         # 如果是验证集，同时有标签，就可以算精度，但是我们的test没有标签
    #         # torch.max(outputs.data, 1): 这一行是在查找每个样本预测概率最高的类别。torch.max 返回两个结果：最大值和它们的索引。
    #         # 由于我们只关心最大概率的索引（即预测的类别），因此使用 _ 来忽略第一个返回值（最大概率值本身），而 predicted 保存了这些索引。
    #         # _, predicted = torch.max(output.data, 1)

    #         # 对于二分类问题，可以直接将 sigmoid 输出阈值化（例如，阈值 0.5）来获取预测标签。
    #         predicted = (outputs > 0.5).float().squeeze()
    #         total += labels.size(0)
    #         correct += (predicted == labels).sum().item()
    # accuracy = 100 * correct / total
    # print(f'Accuracy: {accuracy:.2f}%')


# 加载数据
# 训练数据集，没有传入 scaler，因此会创建一个新的
train_dataset = TitanicDataset('data/titanic/train.csv', scaler=None, is_train=True)
train_loader = DataLoader(dataset=train_dataset, batch_size=32, shuffle=True, num_workers=0)

# 测试数据集，传入从训练数据集得到的 scaler
test_dataset = TitanicDataset('data/titanic/test.csv', scaler=train_dataset.scaler, is_train=False)
test_loader = DataLoader(dataset=test_dataset, batch_size=32, shuffle=False, num_workers=0)

# 实例化模型，输入特征数量为10: Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked
# 但是注意，预处理之后，只采用了7个: Pclass Sex Age SibSp Parch Fare Embarked
models = {
    'Adam': BinaryClassificationModel(input_features=7),
    'SGD': BinaryClassificationModel(input_features=7),
    }

# 定义损失函数，优化器
criterion = torch.nn.BCELoss(reduction='mean')
optimizers = {
    'Adam': torch.optim.Adam(models['Adam'].parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-8, weight_decay=0.0001),
    'SGD': torch.optim.SGD(models['SGD'].parameters(), lr=0.01, weight_decay=0.001, momentum=0.9)
}


# 训练模型
num_epochs = 100
losses = train(models, train_loader, criterion, optimizers, num_epochs)

# 测试模型
# 已知test的结果保存在 gender_submission.csv 文件中，获取准确的 labels 和 predicted 结果算精度
labels_path = 'data/titanic/gender_submission.csv'
data_frame = pd.read_csv(labels_path)
data_frame.drop(['PassengerId'], axis=1, inplace=True)
labels = data_frame['Survived'].values
print('Test Dataset 正确结果: ', labels)

# 模型预测结果
results = test(models, test_loader, optimizers)
print('Test Dataset 预测结果: ', results)

# 精度计算
for optimizer_name, predicted in results.items():
    accuracy = 100 * (predicted == labels).sum() / len(predicted)
    print(f'Accuracy for {optimizer_name}: {accuracy:.2f}%')


plt.figure(figsize=(10, 5))
for optim_name, losses in losses.items():
    plt.plot(losses, label=optim_name)
    final_accuracy = 100 * (results[optim_name] == labels).sum() / len(results[optim_name])
    plt.annotate(f'Final Acc: {final_accuracy:.2f}%', xy=(num_epochs - 1, losses[-1]), xytext=(-40, 10), textcoords='offset points', fontsize=10)

plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Titanic Dataset training Loss Curve')
plt.legend()
plt.show()

在这里插入图片描述

代码：MNIST多分类任务

import torch
from torchvision import datasets
from torchvision import transforms
from torch.utils.data import DataLoader
import torch.nn.functional as F
import torch.optim as optim

# prepare dataset
batch_size = 64

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307, ), (0.3081, ))
])

train_dataset = datasets.MNIST('data/MNIST/', train=True, transform=transform, download=True)
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)

test_dataset = datasets.MNIST('data/MNIST/', train=False, transform=transform, download=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

# design model
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.linear1 = torch.nn.Linear(784, 512)
        self.linear2 = torch.nn.Linear(512, 256)
        self.linear3 = torch.nn.Linear(256, 128)
        self.linear4 = torch.nn.Linear(128, 64)
        self.linear5 = torch.nn.Linear(64, 10)

    def forward(self, x):
        x = x.view(-1, 784)
        x = F.relu(self.linear1(x))
        x = F.relu(self.linear2(x))
        x = F.relu(self.linear3(x))
        x = F.relu(self.linear4(x))
        x = self.linear5(x) # 不用激活函数，因为 torch.nn.CrossEntropyLoss = softmax + nllloss
        return x
    
model = Net()

# construct loss and optimizer
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)

# training
def train(epoch):
    running_loss = 0.0
    for batch_idx, data in enumerate(train_loader, 0):
        inputs, target = data
        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, target)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        if batch_idx % 300 == 299:
            print('[%d, %5d] loss: %.3f' % (epoch+1, batch_idx+1, running_loss/300))
            running_loss = 0.0

# test
def test():
    correct = 0
    total = 0
    with torch.no_grad():
        for data in test_loader:
            inputs, labels = data
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, dim=1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        print('Accuracy on test set: %d %%' %(100*correct/total))


if __name__ == '__main__':
    for epoch in range(10):
        train(epoch)
        if epoch % 10 == 0:
            test()

P9 作业：Kaggle Otto Group Product Classification Challenge

数据下载地址：https://www.kaggle.com/c/otto-group-product-classification-challenge/data
百度网盘链接下载： https://pan.baidu.com/s/1g8rshQdwba7ctwLmzl69Qw?pwd=4nd4 提取码: 4nd4

import torch
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler        # pip install scikit-learn
import torch.nn.functional as F
import torch.optim as optim
import matplotlib.pyplot as plt

class OttoDataset(Dataset):
    def __init__(self, feature_filepath, label_filepath=None, mode='train', scaler=None):
        super(OttoDataset, self).__init__()

        # Load the dataset into a pandas dataframe.
        data = pd.read_csv(feature_filepath)

        if mode == 'train':
            # Extract the numeric part of the class labels, convert to integers, and shift to zero-based indexing.
            self.labels = torch.tensor(data.iloc[:, -1].apply(lambda x: int(x.split('_')[-1]) - 1).values, dtype=torch.long)
            
            # Initialize the StandardScaler.
            # StandardScaler will normalize the features (i.e., each column of the dataset)
            # by subtracting the mean and dividing by the standard deviation.
            # This centers the feature columns at mean 0 with standard deviation 1.
            self.scaler = StandardScaler()

            # Select all columns except 'id' and 'target' for features.
            # Then apply the scaler to standardize them.
            features = data.iloc[:, 1:-1].values
            self.features = torch.tensor(self.scaler.fit_transform(features), dtype=torch.float32)

        elif mode == 'test':
            features = data.iloc[:, 1:].values

            # Apply the same scaling as on the training set to the test set features. use self.scaler.transform
            self.scaler = scaler if scaler is not None else StandardScaler()
            self.features = torch.tensor(self.scaler.transform(features), dtype=torch.float32)
            
            if label_filepath is not None:
                label_data = pd.read_csv(label_filepath)
                # Assuming the first column after 'id' are one-hot encoded class labels,
                # find the index of the max value in each row which corresponds to the predicted class.
                self.labels = torch.tensor(label_data.iloc[:, 1:].values.argmax(axis=1), dtype=torch.long)

            else:
                self.labels = None

        # If neither 'train' nor 'test' mode is specified, raise an error.
        else:
            raise ValueError("Mode must be 'train' or 'test'")
        
        # Store the length of the dataset.
        self.len = len(self.features)

    def __len__(self):
        # When len(dataset) is called, return the length of the dataset.
        return self.len
    
    def __getitem__(self, index):
        # This method retrieves the features and label of a specified index.
        return self.features[index], self.labels[index] if self.labels is not None else -1
    

class FullyConnectedModel(torch.nn.Module):
    def __init__(self, input_features, output_classes):
        super(FullyConnectedModel, self).__init__()
        
        # 定义网络层
        self.fc1 = torch.nn.Linear(input_features, 128)
        self.fc2 = torch.nn.Linear(128, 64)
        self.fc3 = torch.nn.Linear(64, 32)
        self.fc4 = torch.nn.Linear(32, output_classes)

        # 可以选择增加更多的层

        # 定义 dropout 层，可以减少过拟合
        self.dropout = torch.nn.Dropout(p=0.3)

        # 定义 batchnorm 层，帮助稳定学习过程
        self.batchnorm1 = torch.nn.BatchNorm1d(128)
        self.batchnorm2 = torch.nn.BatchNorm1d(64)
        self.batchnorm3 = torch.nn.BatchNorm1d(32)

    def forward(self, x):
        x = F.relu(self.batchnorm1(self.fc1(x)))
        x = self.dropout(x)
        x = F.relu(self.batchnorm2(self.fc2(x)))
        x = self.dropout(x)
        x = F.relu(self.batchnorm3(self.fc3(x)))
        x = self.dropout(x)
        x = self.fc4(x)
        return x
    

def train(epoch, train_loader, model, criterion, optimizer):
    model.train()
    running_loss = 0.0
    for batch_idx, data in enumerate(train_loader, 0):
        inputs, targets = data

        inputs = inputs.to(device)
        targets = targets.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        if batch_idx % 300 == 0:
            print('Epoch:[{}/{}], Loss:{:.4f}'.format(epoch, batch_idx, running_loss/300))

    # 计算平均损失
    average_loss = running_loss / len(train_loader)
    return average_loss


def test(test_loader, model):
    model.eval()
    correct = 0.0
    total = 0
    with torch.no_grad():
        for inputs, targets in test_loader:
            outputs = model(inputs)
            inputs = inputs.to(device)
            targets = targets.to(device)
            _, predicted = torch.max(outputs.data, dim=1)
            total += targets.size(0)
            correct += (predicted == targets).sum().item()
    accuracy = 100 * (correct / total)
    print("Accuracy on test data is {:.2f}".format(accuracy))
    return accuracy



if __name__ == '__main__':
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # Prepare dataset
    train_dataset = OttoDataset(feature_filepath='data/Otto/train.csv', mode='train')
    scaler = train_dataset.scaler
    test_dataset = OttoDataset(feature_filepath='data/Otto/test.csv', label_filepath='data/Otto/otto_correct_submission.csv', mode='test', scaler=scaler)

    train_loader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True, num_workers=0)
    test_loader = DataLoader(dataset=test_dataset, batch_size=64, shuffle=False, num_workers=0)
    
    # Design model
    model = FullyConnectedModel(input_features=93, output_classes=9).to(device)

    # Construct loss and optimizer
    criterion = torch.nn.CrossEntropyLoss().to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=25, gamma=0.1)

    
    # Train and Test
    train_losses = []
    test_accuracies = []

    num_epochs = 100
    for epoch in range(num_epochs):
        train_loss = train(epoch, train_loader, model, criterion, optimizer)
        train_losses.append(train_loss)
        
        if epoch % 2 == 0 or epoch == num_epochs-1:
            test_accuracy = test(test_loader, model)
            test_accuracies.append(test_accuracy)


        # Update the learning rate
        scheduler.step()
        
    # Save model parameters for future use
    torch.save(model.state_dict(), 'model/09_kaggle_OttoDataset_model.pth')

    # Visualize
    plt.figure(figsize=(12, 5))

    # Loss Curve
    plt.subplot(1, 2, 1)
    plt.plot(train_losses, label='Train Loss')
    plt.title('Training Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()

    # Accuracy Curve
    plt.subplot(1, 2, 2)
    plt.plot(range(0, 101, 2), test_accuracies, label='Test Accuracy')  # Adjust x-axis for test accuracy
    plt.title('Testing Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy (%)')
    plt.legend()

    plt.show()