刘二大人《PyTorch深度学习实践》作业

P1 作业

import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

x_data = [1.0, 2.0, 3.0]
y_data = [3.0, 4.0, 6.0]

def forward(x, w, b):
    return x * w + b

def loss(x, y, w, b):
    y_pred = forward(x, w, b)
    loss = (y_pred - y) ** 2
    return loss

w_list = np.arange(0.0, 4.1, 0.1)
b_list = np.arange(-2.0, 2.1, 0.1)
# mse_matrix用于存储不同 w,b 组合下的均方误差损失
mse_matrix = np.zeros((len(w_list), len(b_list)))

for i, w in enumerate(w_list):
    for j, b in enumerate(b_list):
        l_sum = 0
        for x_val, y_val in zip(x_data, y_data):
            l_sum += loss(x_val, y_val, w, b)
        mse_matrix[i, j]= l_sum/len(x_data)
    
W, B = np.meshgrid(w_list, b_list)
fig = plt.figure('Linear Model Cost Value')
ax = fig.add_subplot(111, projection='3d')
ax.plot_surface(W, B, mse_matrix.T, cmap='viridis')

ax.set_xlabel('w')
ax.set_ylabel('b')
ax.set_zlabel('loss')

plt.show()

在这里插入图片描述

代码:梯度下降法

import numpy as np
import matplotlib.pyplot as plt

x_data = [1.0, 2.0, 3.0]
y_data = [2.0, 4.0, 6.0]

w = 1.0
learning_rate = 0.01

def forward(x, w):
    return x * w

def cost_fuction(xs, ys, w):
    cost = 0
    for x, y in zip(xs, ys):
        y_pred = forward(x, w)
        cost += (y_pred - y) ** 2
    return cost / len(xs)

def gradient(xs, ys, w):
    grad = 0
    for x, y in zip(xs, ys):
        grad += 2 * x * (x * w - y)
    return grad / len(xs)

print('predict (before training)', 4, forward(4, w))

epoch_list = []
cost_val_list = []

for epoch in range(100):
    cost_val = cost_fuction(x_data, y_data, w)
    grad_val = gradient(x_data, y_data, w)
    w -= learning_rate * grad_val
    print('Epoch: ', epoch, 'w=', w, 'loss=', cost_val)
    epoch_list.append(epoch)
    cost_val_list.append(cost_val)
print('predict (after training)', 4, forward(4, w))

plt.plot(epoch_list, cost_val_list)
plt.xlabel('epoch')
plt.ylabel('cost val')
plt.show()

在这里插入图片描述

代码:随机梯度下降

import matplotlib.pyplot as plt

x_data = [1.0, 2.0, 3.0]
y_data = [2.0, 4.0, 6.0]

w = 1.0
learning_rate = 0.01

def forward(x, w):
    return x * w

def loss(x, y, w):
    y_pred = forward(x, w)
    loss = (y - y_pred) ** 2
    return loss

def gradient(x, y, w):
    return 2 * x * (x * w - y)

print('predict (before training)', 4, forward(4, w))

epoch_list = []
loss_list = []

for epoch in range(100):
    for x, y in zip(x_data, y_data):
        # 拿到一个样本就更新了
        grad = gradient(x, y, w)
        w = w - learning_rate * grad
        print('\tgrad: ', x, y, grad)
        l = loss(x, y, w)
    print('process: ', epoch, "w=", w, 'loss=', l)
    epoch_list.append(epoch)
    loss_list.append(l)
print('predict (after training)', 4, forward(4, w))

plt.plot(epoch_list, loss_list)
plt.xlabel('epoch')
plt.ylabel('loss')
plt.show()

在这里插入图片描述

代码:反向传播

# 如果是复杂的网络,没办法都自己写gradient的计算。
import torch
import matplotlib.pyplot as plt

x_data = [1.0, 2.0, 3.0]
y_data = [2.0, 4.0, 6.0]

w = torch.Tensor([1.0])
w.requires_grad = True

def forward(x, w):
    return x * w

def loss(x, y, w):
    y_pred = forward(x, w)
    loss = (y - y_pred) ** 2
    return loss

print('predict (before training)', 4, forward(4, w.item()))

epoch_list = []
loss_list = []

for epoch in range(100):
    for x, y in zip(x_data, y_data):
        l = loss(x, y, w)
        l.backward()
        print('\tgrad:', x, y, w.grad.item())
        w.data = w.data - 0.01 * w.grad.data
        w.grad.data.zero_()
    print('process:', epoch, l.item())
    epoch_list.append(epoch)
    loss_list.append(l.item())

print('predict (after training)', 4, forward(4, w))

plt.plot(epoch_list, loss_list)
plt.xlabel('epoch')
plt.ylabel('loss')
plt.show()

在这里插入图片描述

代码:反向传播

# 如果是复杂的网络,没办法都自己写gradient的计算。
import torch
import matplotlib.pyplot as plt

x_data = [1.0, 2.0, 3.0]
y_data = [2.0, 4.0, 6.0]

w = torch.Tensor([1.0])
w.requires_grad = True

def forward(x, w):
    return x * w

def loss(x, y, w):
    y_pred = forward(x, w)
    loss = (y - y_pred) ** 2
    return loss

print('predict (before training)', 4, forward(4, w.item()))

epoch_list = []
loss_list = []

for epoch in range(100):
    for x, y in zip(x_data, y_data):
        l = loss(x, y, w)
        l.backward()
        print('\tgrad:', x, y, w.grad.item())
        w.data = w.data - 0.01 * w.grad.data
        w.grad.data.zero_()
    print('process:', epoch, l.item())
    epoch_list.append(epoch)
    loss_list.append(l.item())

print('predict (after training)', 4, forward(4, w))

plt.plot(epoch_list, loss_list)
plt.xlabel('epoch')
plt.ylabel('loss')
plt.show()

在这里插入图片描述

P5 作业:不同优化器比较

import torch
import matplotlib.pyplot as plt

# step 1: Prepare Dataset
x_data = torch.Tensor([[1.0], [2.0], [3.0]])
y_data = torch.Tensor([[2.0], [4.0], [6.0]])

# step 2: Design Model
class LinearModel(torch.nn.Module):
    def __init__(self):
        super(LinearModel, self).__init__()
        self.linear = torch.nn.Linear(1, 1, bias=True)
    
    def forward(self, x):
        y_pred = self.linear(x)
        return y_pred

models = {
    'SGD': LinearModel(),
    'Adam': LinearModel(),
    'Adagrad': LinearModel(),
    'Adamax': LinearModel(),
    'ASGD': LinearModel(),
    'RMSprop': LinearModel(),
    'Rprop': LinearModel(),
}

# step 3: Donstruct Loss and Optimizer
criterion = torch.nn.MSELoss(size_average=False)
optimizer = {
    'SGD': torch.optim.SGD(models['SGD'].parameters(), lr=0.01),
    'Adam': torch.optim.Adam(models['Adam'].parameters(), lr=0.01),
    'Adagrad': torch.optim.Adagrad(models['Adagrad'].parameters(), lr=0.01),
    'Adamax': torch.optim.Adamax(models['Adamax'].parameters(), lr=0.01),
    'ASGD': torch.optim.ASGD(models['ASGD'].parameters(), lr=0.01),
    'RMSprop': torch.optim.RMSprop(models['RMSprop'].parameters(), lr=0.01),
    'Rprop': torch.optim.Rprop(models['RMSprop'].parameters(), lr=0.01),
    }

loss_values = {k: [] for k in optimizer.keys()}

# step 4: Training Cycle
for opt_name, optimizer in optimizer.items():
    model = models[opt_name]
    for epoch in range(100):
        y_pred = model(x_data)              # forward predict
        loss = criterion(y_pred, y_data)    # forward loss
        optimizer.zero_grad()               # set the grad to zero
        loss.backward()                     # backward
        optimizer.step()                    # update

        loss_values[opt_name].append(loss.item())

plt.figure(figsize=(10, 5))
for opt_name, losses in loss_values.items():
    plt.plot(losses, label=opt_name)

plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.title("Loss by Optimization Algorithm")
plt.show()

在这里插入图片描述

代码:Logistic回归

import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt

x_data = torch.Tensor([[1.0], [2.0], [3.0]])
y_data = torch.Tensor([[0], [0], [1]])

class Logistic_Regression_Model(torch.nn.Module):
    def __init__(self):
        super(Logistic_Regression_Model, self).__init__()
        self.linear = torch.nn.Linear(1, 1, bias=True)

    def forward(self, x):
        y_pred = F.sigmoid(self.linear(x))
        return y_pred
    
models = {
    'SGD': Logistic_Regression_Model(),
    'Adam': Logistic_Regression_Model(),
    'Adagrad': Logistic_Regression_Model(),
    'Adamax': Logistic_Regression_Model(),
    'ASGD': Logistic_Regression_Model(),
    'RMSprop': Logistic_Regression_Model(),
    'Rprop': Logistic_Regression_Model(),
}

criterion = torch.nn.BCELoss(size_average=False)
optimizers = {
    'SGD': torch.optim.SGD(models['SGD'].parameters(), lr=0.01),
    'Adam': torch.optim.Adam(models['Adam'].parameters(), lr=0.01),
    'Adagrad': torch.optim.Adagrad(models['Adagrad'].parameters(), lr=0.01),
    'Adamax': torch.optim.Adamax(models['Adamax'].parameters(), lr=0.01),
    'ASGD': torch.optim.ASGD(models['ASGD'].parameters(), lr=0.01),
    'RMSprop': torch.optim.RMSprop(models['RMSprop'].parameters(), lr=0.01),
    'Rprop': torch.optim.Rprop(models['RMSprop'].parameters(), lr=0.01),
    }

loss_values = {k: [] for k in optimizers.keys()}

for opt_name, optimizer in optimizers.items():
    model = models[opt_name]
    for epoch in range(1000):
        y_pred = model(x_data)
        loss = criterion(y_pred, y_data)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        loss_values[opt_name].append(loss.item())

plt.figure(figsize=(10, 5))

for opt_name, losses in loss_values.items():
    plt.plot(losses, label=opt_name)

plt.xlabel('Epoch')
plt.ylabel("Loss")
plt.legend()
plt.title("Loss by Optimization Algorithm")
plt.show()

在这里插入图片描述

P7 作业:不同激活函数比较

import torch
import numpy as np
import matplotlib.pyplot as plt

# 读取压缩包 np.loadtxt, delimier:分隔符
xy = np.loadtxt('PyTorch深度学习实践/diabetes.csv.gz', delimiter=',', dtype=np.float32)
x_data = torch.from_numpy(xy[:, :-1])
y_data = torch.from_numpy(xy[:, [-1]])

class Model(torch.nn.Module):
    def __init__(self, activation_fn=torch.nn.Sigmoid()):
        super(Model, self).__init__()
        self.linear1 = torch.nn.Linear(8, 6)
        self.linear2 = torch.nn.Linear(6, 4)
        self.linear3 = torch.nn.Linear(4, 1)
        self.activation_fn = activation_fn
    def forward(self, x):
        x = self.activation_fn(self.linear1(x))
        x = self.activation_fn(self.linear2(x))
        x = torch.sigmoid(self.linear3(x))
        return x

activation_fns = {
    'Sigmoid': torch.nn.Sigmoid(),
    'ReLU': torch.nn.ReLU(),
    'Tanh': torch.nn.Tanh(),
    'Softplus': torch.nn.Softplus(),
}

criterion = torch.nn.BCELoss(reduction='mean')

loss_values = {k: [] for k in activation_fns.keys()}

for activation_name, activation_fn in activation_fns.items():
    model = Model(activation_fn=activation_fn)
    optimizer = torch.optim.SGD(model.parameters(), lr=0.1)

    for epoch in range(100):
        y_pred = model(x_data)
        loss = criterion(y_pred, y_data)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        loss_values[activation_name].append(loss.item())

plt.figure(figsize=(10, 5))

for activation_name, losses in loss_values.items():
    plt.plot(losses, label=activation_name)

plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Loss by Sigmoid Algorithm')
plt.legend()
plt.show()

在这里插入图片描述

代码:Dataloader

import torch
import numpy as np
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.utils.data import random_split
import matplotlib.pyplot as plt

class Diabetes_Dataset(Dataset):
    def __init__(self, filepath):
        super(Diabetes_Dataset, self).__init__()

        # 读取数据
        xy = np.loadtxt(filepath, delimiter=',', dtype=np.float32)
        self.len = xy.shape[0]
        self.x_data = torch.from_numpy(xy[:, :-1])
        self.y_data = torch.from_numpy(xy[:, [-1]])
    
    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index]
    
    def __len__(self):
        return self.len
    
class Logistic_Model(torch.nn.Module):
    def __init__(self):
        super(Logistic_Model, self).__init__()
        self.linear1 = torch.nn.Linear(8, 6)
        self.linear2 = torch.nn.Linear(6, 4)
        self.linear3 = torch.nn.Linear(4, 1)
        self.activate = torch.nn.ReLU()
        
    def forward(self, x):
        x = self.activate(self.linear1(x))
        x = self.activate(self.linear2(x))
        x = torch.sigmoid(self.linear3(x))
        return x
    
# 加载数据集并划分为训练集和测试集
dataset = Diabetes_Dataset('PyTorch深度学习实践/diabetes.csv.gz')
train_size = int(len(dataset) * 0.8)
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

train_loader = DataLoader(dataset=train_dataset, batch_size=32, shuffle=True, num_workers=2)
test_loader = DataLoader(dataset=test_dataset, batch_size=32, shuffle=False, num_workers=2)

# 初始化模型、损失函数和优化器
model = Logistic_Model()
criterion = torch.nn.BCELoss(reduction='mean')
optimizer = torch.optim.SGD(model.parameters(), momentum=0.1, weight_decay=0.9, lr=0.01)

# 训练模型
epoch_list = []
loss_list = []
for epoch in range(100):
    total_loss = 0.0
    for i, data in enumerate(train_loader):
        inputs, labels = data
        y_pred = model(inputs)
        loss = criterion(y_pred, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    average_loss = total_loss / len(train_loader)
    epoch_list.append(epoch)
    loss_list.append(average_loss)
    print("Epoch:", epoch, "average loss:", average_loss)

# 可视化训练过程
plt.figure(figsize=(10, 5))
plt.plot(epoch_list, loss_list)
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Dataset & Dataloader')
plt.show()

# 测试模型
model.eval()
correct = 0
total = 0

with torch.no_grad():
    for inputs, labels in test_loader:
        outputs = model(inputs)
        predicted = (outputs > 0.5).float()  # 将输出概率转换为0或1
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f'Accuracy: {accuracy:.2f}%')

在这里插入图片描述
在这里插入图片描述

P8 作业:Kaggle数据集加载、训练和测试

数据下载地址:https://www.kaggle.com/c/titanic/data

当评估泰坦尼克号数据集中的特征与生还可能性的关联时,我们可以基于历史资料、统计学习和领域知识来进行一些推测:

  1. Pclass(乘客等级): 社会经济地位是一个重要因素,高等级乘客(如1等舱)可能有更高的生还几率。
  2. Name(名字): 虽然名字本身可能与生还率无关,但可以从名字中提取称谓(如 Mr., Mrs., Miss.),这可能反映了性别、婚姻状况和社会地位。
  3. Sex(性别): 历史记录显示,女性和儿童在灾难中的生还率更高,因为他们通常会被优先疏散。
  4. Age(年龄): 同样,儿童和年轻人可能有更高的生还几率。
  5. SibSp(兄弟姐妹/配偶数量)和 Parch(父母/子女数量): 这些特征反映了家庭结构,家庭成员可能会互相帮助,影响生还率。然而,太大的家庭可能在疏散时遇到困难。
  6. Ticket(船票信息): 船票信息可能隐含着有用的信息,比如团体旅行或位置信息,但这需要更深入的分析来决定其相关性。
  7. Fare(票价): 票价可能与 Pclass 相关,较高的票价可能意味着更高的社会经济地位和更高的生还几率。
  8. Cabin(船舱号): 船舱号可能与船上的位置有关,一些位置在船沉时可能更安全或者更容易疏散。
  9. Embarked(登船口): 登船口可能是一个次要因素,但如果某些登船口的乘客普遍属于特定的社会经济群体,这可能会间接影响生还率。
import pandas as pd
import torch
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import LabelEncoder, StandardScaler
import matplotlib.pyplot as plt

# 自定义 PyTorch 数据集类,用于加载和预处理 Titanic 数据集
# 数据下载地址:https://www.kaggle.com/c/titanic/data
class TitanicDataset(Dataset):
    def __init__(self, filepath, scaler=None, is_train=True):
        super(TitanicDataset, self).__init__()

        # 初始化函数,读取 CSV 文件
        self.dataframe = pd.read_csv(filepath)
        self.scaler = scaler
        # 调用预处理函数来处理 DataFrame
        self.preprocess(self.dataframe, is_train)

    def preprocess(self, df, is_train):
        # 移除不需要的类别
        df.drop(['PassengerId', 'Name', 'Ticket', 'Cabin'], axis=1, inplace=True)
        
        # 处理缺失值
        df['Age'].fillna(df['Age'].mean(), inplace=True)                # Age 缺失的值用平均值来填充
        df['Fare'].fillna(df['Fare'].mean(), inplace=True)              # Fare 缺失的值用平均值来填充
        df['Embarked'].fillna(df['Embarked'].mode()[0], inplace=True)   # Embarked 缺失的值用众值来填充

        # 使用 LabelEncoder 来转换性别和登船口为数值形式
        # LabelEncoder 适用于将文本标签转换为一个范围从 0 到 n_classes-1 的数值。这种方法适用于转换具有顺序性的分类特征。例如“低”,“中”,“高”。
        label_encoder = LabelEncoder()
        df['Sex'] = label_encoder.fit_transform(df['Sex'])
        df['Embarked'] = label_encoder.fit_transform(df['Embarked'])

        # 与 LabelEncoder 不同,One-Hot 编码 创建了一个二进制列来表示每个类别,没有数值的大小意义。当分类特征的不同类别之间没有顺序或等级的概念时,通常使用独热编码。
        # 注意:要使用 One-Hot的话,input_features=10
        # df = pd.get_dummies(df, columns=['Sex', 'Embarked'])

        if is_train:
            # 如果是训练集,创建新的 StandardScaler,并进行 fit_transform, 来标准化 'Age' 和 'Fare' 列的数值
            # 如果特征的数值范围差异很大,那么算法可能会因为较大范围的特征而受到偏向,导致模型性能不佳。
            self.scaler = StandardScaler()
            df[['Age', 'Fare']] = self.scaler.fit_transform(df[['Age', 'Fare']])

            # 如果是训练数据,将 'Survived' 列作为标签
            self.labels = df['Survived'].values
            self.features = df.drop('Survived', axis=1).values

        else:
            # 如果是测试集,使用传入的 scaler 进行 transform
            df[['Age', 'Fare']] = self.scaler.transform(df[['Age', 'Fare']])

            # 对于测试数据,可能没有 'Survived' 列,因此特征就是整个 DataFrame
            self.features = df.values
            self.labels = None      # 标签设置为 None


    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, index):
        # 获取单个样本,包括特征和标签(如果有的话)
        # 如果有标签,同时返回特征和标签
        if self.labels is not None:
            return torch.tensor(self.features[index], dtype=torch.float), torch.tensor(self.labels[index], dtype=torch.float)
        # 对于没有标签的测试数据,返回一个占位符张量,例如大小为 1 的零张量
        else:
            return torch.tensor(self.features[index], dtype=torch.float), torch.zeros(1, dtype=torch.float)


# 自定义 二分类模型
class BinaryClassificationModel(torch.nn.Module):
    def __init__(self, input_features):
        super(BinaryClassificationModel, self).__init__()
        self.linear1 = torch.nn.Linear(input_features, 64)
        self.linear2 = torch.nn.Linear(64, 64)
        self.linear3 = torch.nn.Linear(64, 1)       

        # 定义 dropout 层,可以减少过拟合
        self.dropout = torch.nn.Dropout(p=0.1)

        # 定义 batchnorm层,帮助稳定学习过程
        self.batchnorm1 = torch.nn.BatchNorm1d(64)
        self.batchnorm2 = torch.nn.BatchNorm1d(64)
        
    def forward(self, x):
        x = F.relu(self.linear1(x))     # 第一层激活函数为 ReLU
        x = self.batchnorm1(x)          # 应用 batch normalization
        x = self.dropout(x)             # 应用 dropout

        x = F.relu(self.linear2(x))     # 第二层激活函数为 ReLU
        x = self.batchnorm2(x)          # 应用 batch normalization
        x = self.dropout(x)             # 应用 dropout

        x = self.linear3(x)             # 输出层
        return torch.sigmoid(x)         # 应用 sigmoid 激活函数

# 训练过程
def train(models, train_loader, criterion, optimizers, num_epochs):
    epoch_losses = {k: [] for k in optimizers.keys()}

    print('start training')

    for optim_name, optimizer in optimizers.items():
        model = models[optim_name]
        for epoch in range(num_epochs):
            model.train()
            running_loss = 0.0
            for batch_idx, (inputs, labels) in enumerate(train_loader):
                optimizer.zero_grad()                       # 梯度清零
                outputs = model(inputs)                     # 前向传播
                loss = criterion(outputs.squeeze(), labels) # 使用 squeeze 调整输出形状
                loss.backward()                             # 反向传播
                optimizer.step()                            # 更新权重
                # 乘以 inputs.size(0) 的目的是为了累积整个批次的总损失,而不仅仅是单个数据点的平均损失。
                # 调用 loss = criterion(outputs, labels) 时,计算的是当前批次中所有样本的平均损失。
                # 为了得到整个训练集上的总损失,我们需要将每个批次的平均损失乘以该批次中的样本数(inputs.size(0))。
                # 这样做可以确保每个样本,无论它们属于哪个批次,对总损失的贡献都是平等的。
                running_loss += loss.item() * inputs.size(0)
            epoch_loss = running_loss / len(train_loader.dataset)
            print(f'Epoch {epoch+1}/{num_epochs} Loss: {epoch_loss:.4f}')
            epoch_losses[optim_name].append(epoch_loss)
    return epoch_losses

# 测试
def test(model, test_loader, optimizers):
    results = {}
    for optim_name, _ in optimizers.items():
        model = models[optim_name]
        model.eval()
        
        predictions = []
        with torch.no_grad():   # 不计算梯度,减少计算和内存消耗
            for inputs, _ in test_loader:
                outputs = model(inputs)
                # test没有标签,只输出结果
                predicted = (outputs > 0.5).float().squeeze()
                predictions.extend(predicted.tolist())  # 使用 extend 和 tolist 将 predicted 中的每个元素添加到 predictions
        print("Predict result: ", predictions)
        results[optim_name] = predictions
    return  results

    #         # 如果是验证集,同时有标签,就可以算精度,但是我们的test没有标签
    #         # torch.max(outputs.data, 1): 这一行是在查找每个样本预测概率最高的类别。torch.max 返回两个结果:最大值和它们的索引。
    #         # 由于我们只关心最大概率的索引(即预测的类别),因此使用 _ 来忽略第一个返回值(最大概率值本身),而 predicted 保存了这些索引。
    #         # _, predicted = torch.max(output.data, 1)

    #         # 对于二分类问题,可以直接将 sigmoid 输出阈值化(例如,阈值 0.5)来获取预测标签。
    #         predicted = (outputs > 0.5).float().squeeze()
    #         total += labels.size(0)
    #         correct += (predicted == labels).sum().item()
    # accuracy = 100 * correct / total
    # print(f'Accuracy: {accuracy:.2f}%')


# 加载数据
# 训练数据集,没有传入 scaler,因此会创建一个新的
train_dataset = TitanicDataset('data/titanic/train.csv', scaler=None, is_train=True)
train_loader = DataLoader(dataset=train_dataset, batch_size=32, shuffle=True, num_workers=0)

# 测试数据集,传入从训练数据集得到的 scaler
test_dataset = TitanicDataset('data/titanic/test.csv', scaler=train_dataset.scaler, is_train=False)
test_loader = DataLoader(dataset=test_dataset, batch_size=32, shuffle=False, num_workers=0)

# 实例化模型,输入特征数量为10: Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked
# 但是注意,预处理之后,只采用了7个: Pclass Sex Age SibSp Parch Fare Embarked
models = {
    'Adam': BinaryClassificationModel(input_features=7),
    'SGD': BinaryClassificationModel(input_features=7),
    }

# 定义损失函数,优化器
criterion = torch.nn.BCELoss(reduction='mean')
optimizers = {
    'Adam': torch.optim.Adam(models['Adam'].parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-8, weight_decay=0.0001),
    'SGD': torch.optim.SGD(models['SGD'].parameters(), lr=0.01, weight_decay=0.001, momentum=0.9)
}


# 训练模型
num_epochs = 100
losses = train(models, train_loader, criterion, optimizers, num_epochs)

# 测试模型
# 已知test的结果保存在 gender_submission.csv 文件中,获取准确的 labels 和 predicted 结果算精度
labels_path = 'data/titanic/gender_submission.csv'
data_frame = pd.read_csv(labels_path)
data_frame.drop(['PassengerId'], axis=1, inplace=True)
labels = data_frame['Survived'].values
print('Test Dataset 正确结果: ', labels)

# 模型预测结果
results = test(models, test_loader, optimizers)
print('Test Dataset 预测结果: ', results)

# 精度计算
for optimizer_name, predicted in results.items():
    accuracy = 100 * (predicted == labels).sum() / len(predicted)
    print(f'Accuracy for {optimizer_name}: {accuracy:.2f}%')


plt.figure(figsize=(10, 5))
for optim_name, losses in losses.items():
    plt.plot(losses, label=optim_name)
    final_accuracy = 100 * (results[optim_name] == labels).sum() / len(results[optim_name])
    plt.annotate(f'Final Acc: {final_accuracy:.2f}%', xy=(num_epochs - 1, losses[-1]), xytext=(-40, 10), textcoords='offset points', fontsize=10)

plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Titanic Dataset training Loss Curve')
plt.legend()
plt.show()

在这里插入图片描述

代码:MNIST多分类任务

import torch
from torchvision import datasets
from torchvision import transforms
from torch.utils.data import DataLoader
import torch.nn.functional as F
import torch.optim as optim

# prepare dataset
batch_size = 64

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307, ), (0.3081, ))
])

train_dataset = datasets.MNIST('data/MNIST/', train=True, transform=transform, download=True)
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)

test_dataset = datasets.MNIST('data/MNIST/', train=False, transform=transform, download=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

# design model
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.linear1 = torch.nn.Linear(784, 512)
        self.linear2 = torch.nn.Linear(512, 256)
        self.linear3 = torch.nn.Linear(256, 128)
        self.linear4 = torch.nn.Linear(128, 64)
        self.linear5 = torch.nn.Linear(64, 10)

    def forward(self, x):
        x = x.view(-1, 784)
        x = F.relu(self.linear1(x))
        x = F.relu(self.linear2(x))
        x = F.relu(self.linear3(x))
        x = F.relu(self.linear4(x))
        x = self.linear5(x) # 不用激活函数,因为 torch.nn.CrossEntropyLoss = softmax + nllloss
        return x
    
model = Net()

# construct loss and optimizer
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)

# training
def train(epoch):
    running_loss = 0.0
    for batch_idx, data in enumerate(train_loader, 0):
        inputs, target = data
        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, target)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        if batch_idx % 300 == 299:
            print('[%d, %5d] loss: %.3f' % (epoch+1, batch_idx+1, running_loss/300))
            running_loss = 0.0

# test
def test():
    correct = 0
    total = 0
    with torch.no_grad():
        for data in test_loader:
            inputs, labels = data
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, dim=1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        print('Accuracy on test set: %d %%' %(100*correct/total))


if __name__ == '__main__':
    for epoch in range(10):
        train(epoch)
        if epoch % 10 == 0:
            test()

P9 作业:Kaggle Otto Group Product Classification Challenge

数据下载地址:https://www.kaggle.com/c/otto-group-product-classification-challenge/data
百度网盘链接下载: https://pan.baidu.com/s/1g8rshQdwba7ctwLmzl69Qw?pwd=4nd4 提取码: 4nd4

import torch
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler        # pip install scikit-learn
import torch.nn.functional as F
import torch.optim as optim
import matplotlib.pyplot as plt

class OttoDataset(Dataset):
    def __init__(self, feature_filepath, label_filepath=None, mode='train', scaler=None):
        super(OttoDataset, self).__init__()

        # Load the dataset into a pandas dataframe.
        data = pd.read_csv(feature_filepath)

        if mode == 'train':
            # Extract the numeric part of the class labels, convert to integers, and shift to zero-based indexing.
            self.labels = torch.tensor(data.iloc[:, -1].apply(lambda x: int(x.split('_')[-1]) - 1).values, dtype=torch.long)
            
            # Initialize the StandardScaler.
            # StandardScaler will normalize the features (i.e., each column of the dataset)
            # by subtracting the mean and dividing by the standard deviation.
            # This centers the feature columns at mean 0 with standard deviation 1.
            self.scaler = StandardScaler()

            # Select all columns except 'id' and 'target' for features.
            # Then apply the scaler to standardize them.
            features = data.iloc[:, 1:-1].values
            self.features = torch.tensor(self.scaler.fit_transform(features), dtype=torch.float32)

        elif mode == 'test':
            features = data.iloc[:, 1:].values

            # Apply the same scaling as on the training set to the test set features. use self.scaler.transform
            self.scaler = scaler if scaler is not None else StandardScaler()
            self.features = torch.tensor(self.scaler.transform(features), dtype=torch.float32)
            
            if label_filepath is not None:
                label_data = pd.read_csv(label_filepath)
                # Assuming the first column after 'id' are one-hot encoded class labels,
                # find the index of the max value in each row which corresponds to the predicted class.
                self.labels = torch.tensor(label_data.iloc[:, 1:].values.argmax(axis=1), dtype=torch.long)

            else:
                self.labels = None

        # If neither 'train' nor 'test' mode is specified, raise an error.
        else:
            raise ValueError("Mode must be 'train' or 'test'")
        
        # Store the length of the dataset.
        self.len = len(self.features)

    def __len__(self):
        # When len(dataset) is called, return the length of the dataset.
        return self.len
    
    def __getitem__(self, index):
        # This method retrieves the features and label of a specified index.
        return self.features[index], self.labels[index] if self.labels is not None else -1
    

class FullyConnectedModel(torch.nn.Module):
    def __init__(self, input_features, output_classes):
        super(FullyConnectedModel, self).__init__()
        
        # 定义网络层
        self.fc1 = torch.nn.Linear(input_features, 128)
        self.fc2 = torch.nn.Linear(128, 64)
        self.fc3 = torch.nn.Linear(64, 32)
        self.fc4 = torch.nn.Linear(32, output_classes)

        # 可以选择增加更多的层

        # 定义 dropout 层,可以减少过拟合
        self.dropout = torch.nn.Dropout(p=0.3)

        # 定义 batchnorm 层,帮助稳定学习过程
        self.batchnorm1 = torch.nn.BatchNorm1d(128)
        self.batchnorm2 = torch.nn.BatchNorm1d(64)
        self.batchnorm3 = torch.nn.BatchNorm1d(32)

    def forward(self, x):
        x = F.relu(self.batchnorm1(self.fc1(x)))
        x = self.dropout(x)
        x = F.relu(self.batchnorm2(self.fc2(x)))
        x = self.dropout(x)
        x = F.relu(self.batchnorm3(self.fc3(x)))
        x = self.dropout(x)
        x = self.fc4(x)
        return x
    

def train(epoch, train_loader, model, criterion, optimizer):
    model.train()
    running_loss = 0.0
    for batch_idx, data in enumerate(train_loader, 0):
        inputs, targets = data

        inputs = inputs.to(device)
        targets = targets.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        if batch_idx % 300 == 0:
            print('Epoch:[{}/{}], Loss:{:.4f}'.format(epoch, batch_idx, running_loss/300))

    # 计算平均损失
    average_loss = running_loss / len(train_loader)
    return average_loss


def test(test_loader, model):
    model.eval()
    correct = 0.0
    total = 0
    with torch.no_grad():
        for inputs, targets in test_loader:
            outputs = model(inputs)
            inputs = inputs.to(device)
            targets = targets.to(device)
            _, predicted = torch.max(outputs.data, dim=1)
            total += targets.size(0)
            correct += (predicted == targets).sum().item()
    accuracy = 100 * (correct / total)
    print("Accuracy on test data is {:.2f}".format(accuracy))
    return accuracy



if __name__ == '__main__':
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # Prepare dataset
    train_dataset = OttoDataset(feature_filepath='data/Otto/train.csv', mode='train')
    scaler = train_dataset.scaler
    test_dataset = OttoDataset(feature_filepath='data/Otto/test.csv', label_filepath='data/Otto/otto_correct_submission.csv', mode='test', scaler=scaler)

    train_loader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True, num_workers=0)
    test_loader = DataLoader(dataset=test_dataset, batch_size=64, shuffle=False, num_workers=0)
    
    # Design model
    model = FullyConnectedModel(input_features=93, output_classes=9).to(device)

    # Construct loss and optimizer
    criterion = torch.nn.CrossEntropyLoss().to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=25, gamma=0.1)

    
    # Train and Test
    train_losses = []
    test_accuracies = []

    num_epochs = 100
    for epoch in range(num_epochs):
        train_loss = train(epoch, train_loader, model, criterion, optimizer)
        train_losses.append(train_loss)
        
        if epoch % 2 == 0 or epoch == num_epochs-1:
            test_accuracy = test(test_loader, model)
            test_accuracies.append(test_accuracy)


        # Update the learning rate
        scheduler.step()
        
    # Save model parameters for future use
    torch.save(model.state_dict(), 'model/09_kaggle_OttoDataset_model.pth')

    # Visualize
    plt.figure(figsize=(12, 5))

    # Loss Curve
    plt.subplot(1, 2, 1)
    plt.plot(train_losses, label='Train Loss')
    plt.title('Training Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()

    # Accuracy Curve
    plt.subplot(1, 2, 2)
    plt.plot(range(0, 101, 2), test_accuracies, label='Test Accuracy')  # Adjust x-axis for test accuracy
    plt.title('Testing Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy (%)')
    plt.legend()

    plt.show()

在这里插入图片描述

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值