目录
P1 作业
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
x_data = [1.0, 2.0, 3.0]
y_data = [3.0, 4.0, 6.0]
def forward(x, w, b):
return x * w + b
def loss(x, y, w, b):
y_pred = forward(x, w, b)
loss = (y_pred - y) ** 2
return loss
w_list = np.arange(0.0, 4.1, 0.1)
b_list = np.arange(-2.0, 2.1, 0.1)
# mse_matrix用于存储不同 w,b 组合下的均方误差损失
mse_matrix = np.zeros((len(w_list), len(b_list)))
for i, w in enumerate(w_list):
for j, b in enumerate(b_list):
l_sum = 0
for x_val, y_val in zip(x_data, y_data):
l_sum += loss(x_val, y_val, w, b)
mse_matrix[i, j]= l_sum/len(x_data)
W, B = np.meshgrid(w_list, b_list)
fig = plt.figure('Linear Model Cost Value')
ax = fig.add_subplot(111, projection='3d')
ax.plot_surface(W, B, mse_matrix.T, cmap='viridis')
ax.set_xlabel('w')
ax.set_ylabel('b')
ax.set_zlabel('loss')
plt.show()
代码:梯度下降法
import numpy as np
import matplotlib.pyplot as plt
x_data = [1.0, 2.0, 3.0]
y_data = [2.0, 4.0, 6.0]
w = 1.0
learning_rate = 0.01
def forward(x, w):
return x * w
def cost_fuction(xs, ys, w):
cost = 0
for x, y in zip(xs, ys):
y_pred = forward(x, w)
cost += (y_pred - y) ** 2
return cost / len(xs)
def gradient(xs, ys, w):
grad = 0
for x, y in zip(xs, ys):
grad += 2 * x * (x * w - y)
return grad / len(xs)
print('predict (before training)', 4, forward(4, w))
epoch_list = []
cost_val_list = []
for epoch in range(100):
cost_val = cost_fuction(x_data, y_data, w)
grad_val = gradient(x_data, y_data, w)
w -= learning_rate * grad_val
print('Epoch: ', epoch, 'w=', w, 'loss=', cost_val)
epoch_list.append(epoch)
cost_val_list.append(cost_val)
print('predict (after training)', 4, forward(4, w))
plt.plot(epoch_list, cost_val_list)
plt.xlabel('epoch')
plt.ylabel('cost val')
plt.show()
代码:随机梯度下降
import matplotlib.pyplot as plt
x_data = [1.0, 2.0, 3.0]
y_data = [2.0, 4.0, 6.0]
w = 1.0
learning_rate = 0.01
def forward(x, w):
return x * w
def loss(x, y, w):
y_pred = forward(x, w)
loss = (y - y_pred) ** 2
return loss
def gradient(x, y, w):
return 2 * x * (x * w - y)
print('predict (before training)', 4, forward(4, w))
epoch_list = []
loss_list = []
for epoch in range(100):
for x, y in zip(x_data, y_data):
# 拿到一个样本就更新了
grad = gradient(x, y, w)
w = w - learning_rate * grad
print('\tgrad: ', x, y, grad)
l = loss(x, y, w)
print('process: ', epoch, "w=", w, 'loss=', l)
epoch_list.append(epoch)
loss_list.append(l)
print('predict (after training)', 4, forward(4, w))
plt.plot(epoch_list, loss_list)
plt.xlabel('epoch')
plt.ylabel('loss')
plt.show()
代码:反向传播
# 如果是复杂的网络,没办法都自己写gradient的计算。
import torch
import matplotlib.pyplot as plt
x_data = [1.0, 2.0, 3.0]
y_data = [2.0, 4.0, 6.0]
w = torch.Tensor([1.0])
w.requires_grad = True
def forward(x, w):
return x * w
def loss(x, y, w):
y_pred = forward(x, w)
loss = (y - y_pred) ** 2
return loss
print('predict (before training)', 4, forward(4, w.item()))
epoch_list = []
loss_list = []
for epoch in range(100):
for x, y in zip(x_data, y_data):
l = loss(x, y, w)
l.backward()
print('\tgrad:', x, y, w.grad.item())
w.data = w.data - 0.01 * w.grad.data
w.grad.data.zero_()
print('process:', epoch, l.item())
epoch_list.append(epoch)
loss_list.append(l.item())
print('predict (after training)', 4, forward(4, w))
plt.plot(epoch_list, loss_list)
plt.xlabel('epoch')
plt.ylabel('loss')
plt.show()
代码:反向传播
# 如果是复杂的网络,没办法都自己写gradient的计算。
import torch
import matplotlib.pyplot as plt
x_data = [1.0, 2.0, 3.0]
y_data = [2.0, 4.0, 6.0]
w = torch.Tensor([1.0])
w.requires_grad = True
def forward(x, w):
return x * w
def loss(x, y, w):
y_pred = forward(x, w)
loss = (y - y_pred) ** 2
return loss
print('predict (before training)', 4, forward(4, w.item()))
epoch_list = []
loss_list = []
for epoch in range(100):
for x, y in zip(x_data, y_data):
l = loss(x, y, w)
l.backward()
print('\tgrad:', x, y, w.grad.item())
w.data = w.data - 0.01 * w.grad.data
w.grad.data.zero_()
print('process:', epoch, l.item())
epoch_list.append(epoch)
loss_list.append(l.item())
print('predict (after training)', 4, forward(4, w))
plt.plot(epoch_list, loss_list)
plt.xlabel('epoch')
plt.ylabel('loss')
plt.show()
P5 作业:不同优化器比较
import torch
import matplotlib.pyplot as plt
# step 1: Prepare Dataset
x_data = torch.Tensor([[1.0], [2.0], [3.0]])
y_data = torch.Tensor([[2.0], [4.0], [6.0]])
# step 2: Design Model
class LinearModel(torch.nn.Module):
def __init__(self):
super(LinearModel, self).__init__()
self.linear = torch.nn.Linear(1, 1, bias=True)
def forward(self, x):
y_pred = self.linear(x)
return y_pred
models = {
'SGD': LinearModel(),
'Adam': LinearModel(),
'Adagrad': LinearModel(),
'Adamax': LinearModel(),
'ASGD': LinearModel(),
'RMSprop': LinearModel(),
'Rprop': LinearModel(),
}
# step 3: Donstruct Loss and Optimizer
criterion = torch.nn.MSELoss(size_average=False)
optimizer = {
'SGD': torch.optim.SGD(models['SGD'].parameters(), lr=0.01),
'Adam': torch.optim.Adam(models['Adam'].parameters(), lr=0.01),
'Adagrad': torch.optim.Adagrad(models['Adagrad'].parameters(), lr=0.01),
'Adamax': torch.optim.Adamax(models['Adamax'].parameters(), lr=0.01),
'ASGD': torch.optim.ASGD(models['ASGD'].parameters(), lr=0.01),
'RMSprop': torch.optim.RMSprop(models['RMSprop'].parameters(), lr=0.01),
'Rprop': torch.optim.Rprop(models['RMSprop'].parameters(), lr=0.01),
}
loss_values = {k: [] for k in optimizer.keys()}
# step 4: Training Cycle
for opt_name, optimizer in optimizer.items():
model = models[opt_name]
for epoch in range(100):
y_pred = model(x_data) # forward predict
loss = criterion(y_pred, y_data) # forward loss
optimizer.zero_grad() # set the grad to zero
loss.backward() # backward
optimizer.step() # update
loss_values[opt_name].append(loss.item())
plt.figure(figsize=(10, 5))
for opt_name, losses in loss_values.items():
plt.plot(losses, label=opt_name)
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.title("Loss by Optimization Algorithm")
plt.show()
代码:Logistic回归
import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt
x_data = torch.Tensor([[1.0], [2.0], [3.0]])
y_data = torch.Tensor([[0], [0], [1]])
class Logistic_Regression_Model(torch.nn.Module):
def __init__(self):
super(Logistic_Regression_Model, self).__init__()
self.linear = torch.nn.Linear(1, 1, bias=True)
def forward(self, x):
y_pred = F.sigmoid(self.linear(x))
return y_pred
models = {
'SGD': Logistic_Regression_Model(),
'Adam': Logistic_Regression_Model(),
'Adagrad': Logistic_Regression_Model(),
'Adamax': Logistic_Regression_Model(),
'ASGD': Logistic_Regression_Model(),
'RMSprop': Logistic_Regression_Model(),
'Rprop': Logistic_Regression_Model(),
}
criterion = torch.nn.BCELoss(size_average=False)
optimizers = {
'SGD': torch.optim.SGD(models['SGD'].parameters(), lr=0.01),
'Adam': torch.optim.Adam(models['Adam'].parameters(), lr=0.01),
'Adagrad': torch.optim.Adagrad(models['Adagrad'].parameters(), lr=0.01),
'Adamax': torch.optim.Adamax(models['Adamax'].parameters(), lr=0.01),
'ASGD': torch.optim.ASGD(models['ASGD'].parameters(), lr=0.01),
'RMSprop': torch.optim.RMSprop(models['RMSprop'].parameters(), lr=0.01),
'Rprop': torch.optim.Rprop(models['RMSprop'].parameters(), lr=0.01),
}
loss_values = {k: [] for k in optimizers.keys()}
for opt_name, optimizer in optimizers.items():
model = models[opt_name]
for epoch in range(1000):
y_pred = model(x_data)
loss = criterion(y_pred, y_data)
optimizer.zero_grad()
loss.backward()
optimizer.step()
loss_values[opt_name].append(loss.item())
plt.figure(figsize=(10, 5))
for opt_name, losses in loss_values.items():
plt.plot(losses, label=opt_name)
plt.xlabel('Epoch')
plt.ylabel("Loss")
plt.legend()
plt.title("Loss by Optimization Algorithm")
plt.show()
P7 作业:不同激活函数比较
import torch
import numpy as np
import matplotlib.pyplot as plt
# 读取压缩包 np.loadtxt, delimier:分隔符
xy = np.loadtxt('PyTorch深度学习实践/diabetes.csv.gz', delimiter=',', dtype=np.float32)
x_data = torch.from_numpy(xy[:, :-1])
y_data = torch.from_numpy(xy[:, [-1]])
class Model(torch.nn.Module):
def __init__(self, activation_fn=torch.nn.Sigmoid()):
super(Model, self).__init__()
self.linear1 = torch.nn.Linear(8, 6)
self.linear2 = torch.nn.Linear(6, 4)
self.linear3 = torch.nn.Linear(4, 1)
self.activation_fn = activation_fn
def forward(self, x):
x = self.activation_fn(self.linear1(x))
x = self.activation_fn(self.linear2(x))
x = torch.sigmoid(self.linear3(x))
return x
activation_fns = {
'Sigmoid': torch.nn.Sigmoid(),
'ReLU': torch.nn.ReLU(),
'Tanh': torch.nn.Tanh(),
'Softplus': torch.nn.Softplus(),
}
criterion = torch.nn.BCELoss(reduction='mean')
loss_values = {k: [] for k in activation_fns.keys()}
for activation_name, activation_fn in activation_fns.items():
model = Model(activation_fn=activation_fn)
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
for epoch in range(100):
y_pred = model(x_data)
loss = criterion(y_pred, y_data)
optimizer.zero_grad()
loss.backward()
optimizer.step()
loss_values[activation_name].append(loss.item())
plt.figure(figsize=(10, 5))
for activation_name, losses in loss_values.items():
plt.plot(losses, label=activation_name)
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Loss by Sigmoid Algorithm')
plt.legend()
plt.show()
代码:Dataloader
import torch
import numpy as np
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.utils.data import random_split
import matplotlib.pyplot as plt
class Diabetes_Dataset(Dataset):
def __init__(self, filepath):
super(Diabetes_Dataset, self).__init__()
# 读取数据
xy = np.loadtxt(filepath, delimiter=',', dtype=np.float32)
self.len = xy.shape[0]
self.x_data = torch.from_numpy(xy[:, :-1])
self.y_data = torch.from_numpy(xy[:, [-1]])
def __getitem__(self, index):
return self.x_data[index], self.y_data[index]
def __len__(self):
return self.len
class Logistic_Model(torch.nn.Module):
def __init__(self):
super(Logistic_Model, self).__init__()
self.linear1 = torch.nn.Linear(8, 6)
self.linear2 = torch.nn.Linear(6, 4)
self.linear3 = torch.nn.Linear(4, 1)
self.activate = torch.nn.ReLU()
def forward(self, x):
x = self.activate(self.linear1(x))
x = self.activate(self.linear2(x))
x = torch.sigmoid(self.linear3(x))
return x
# 加载数据集并划分为训练集和测试集
dataset = Diabetes_Dataset('PyTorch深度学习实践/diabetes.csv.gz')
train_size = int(len(dataset) * 0.8)
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])
train_loader = DataLoader(dataset=train_dataset, batch_size=32, shuffle=True, num_workers=2)
test_loader = DataLoader(dataset=test_dataset, batch_size=32, shuffle=False, num_workers=2)
# 初始化模型、损失函数和优化器
model = Logistic_Model()
criterion = torch.nn.BCELoss(reduction='mean')
optimizer = torch.optim.SGD(model.parameters(), momentum=0.1, weight_decay=0.9, lr=0.01)
# 训练模型
epoch_list = []
loss_list = []
for epoch in range(100):
total_loss = 0.0
for i, data in enumerate(train_loader):
inputs, labels = data
y_pred = model(inputs)
loss = criterion(y_pred, labels)
optimizer.zero_grad()
loss.backward()
optimizer.step()
total_loss += loss.item()
average_loss = total_loss / len(train_loader)
epoch_list.append(epoch)
loss_list.append(average_loss)
print("Epoch:", epoch, "average loss:", average_loss)
# 可视化训练过程
plt.figure(figsize=(10, 5))
plt.plot(epoch_list, loss_list)
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Dataset & Dataloader')
plt.show()
# 测试模型
model.eval()
correct = 0
total = 0
with torch.no_grad():
for inputs, labels in test_loader:
outputs = model(inputs)
predicted = (outputs > 0.5).float() # 将输出概率转换为0或1
total += labels.size(0)
correct += (predicted == labels).sum().item()
accuracy = 100 * correct / total
print(f'Accuracy: {accuracy:.2f}%')
P8 作业:Kaggle数据集加载、训练和测试
数据下载地址:https://www.kaggle.com/c/titanic/data
当评估泰坦尼克号数据集中的特征与生还可能性的关联时,我们可以基于历史资料、统计学习和领域知识来进行一些推测:
- Pclass(乘客等级): 社会经济地位是一个重要因素,高等级乘客(如1等舱)可能有更高的生还几率。
- Name(名字): 虽然名字本身可能与生还率无关,但可以从名字中提取称谓(如 Mr., Mrs., Miss.),这可能反映了性别、婚姻状况和社会地位。
- Sex(性别): 历史记录显示,女性和儿童在灾难中的生还率更高,因为他们通常会被优先疏散。
- Age(年龄): 同样,儿童和年轻人可能有更高的生还几率。
- SibSp(兄弟姐妹/配偶数量)和 Parch(父母/子女数量): 这些特征反映了家庭结构,家庭成员可能会互相帮助,影响生还率。然而,太大的家庭可能在疏散时遇到困难。
- Ticket(船票信息): 船票信息可能隐含着有用的信息,比如团体旅行或位置信息,但这需要更深入的分析来决定其相关性。
- Fare(票价): 票价可能与 Pclass 相关,较高的票价可能意味着更高的社会经济地位和更高的生还几率。
- Cabin(船舱号): 船舱号可能与船上的位置有关,一些位置在船沉时可能更安全或者更容易疏散。
- Embarked(登船口): 登船口可能是一个次要因素,但如果某些登船口的乘客普遍属于特定的社会经济群体,这可能会间接影响生还率。
import pandas as pd
import torch
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import LabelEncoder, StandardScaler
import matplotlib.pyplot as plt
# 自定义 PyTorch 数据集类,用于加载和预处理 Titanic 数据集
# 数据下载地址:https://www.kaggle.com/c/titanic/data
class TitanicDataset(Dataset):
def __init__(self, filepath, scaler=None, is_train=True):
super(TitanicDataset, self).__init__()
# 初始化函数,读取 CSV 文件
self.dataframe = pd.read_csv(filepath)
self.scaler = scaler
# 调用预处理函数来处理 DataFrame
self.preprocess(self.dataframe, is_train)
def preprocess(self, df, is_train):
# 移除不需要的类别
df.drop(['PassengerId', 'Name', 'Ticket', 'Cabin'], axis=1, inplace=True)
# 处理缺失值
df['Age'].fillna(df['Age'].mean(), inplace=True) # Age 缺失的值用平均值来填充
df['Fare'].fillna(df['Fare'].mean(), inplace=True) # Fare 缺失的值用平均值来填充
df['Embarked'].fillna(df['Embarked'].mode()[0], inplace=True) # Embarked 缺失的值用众值来填充
# 使用 LabelEncoder 来转换性别和登船口为数值形式
# LabelEncoder 适用于将文本标签转换为一个范围从 0 到 n_classes-1 的数值。这种方法适用于转换具有顺序性的分类特征。例如“低”,“中”,“高”。
label_encoder = LabelEncoder()
df['Sex'] = label_encoder.fit_transform(df['Sex'])
df['Embarked'] = label_encoder.fit_transform(df['Embarked'])
# 与 LabelEncoder 不同,One-Hot 编码 创建了一个二进制列来表示每个类别,没有数值的大小意义。当分类特征的不同类别之间没有顺序或等级的概念时,通常使用独热编码。
# 注意:要使用 One-Hot的话,input_features=10
# df = pd.get_dummies(df, columns=['Sex', 'Embarked'])
if is_train:
# 如果是训练集,创建新的 StandardScaler,并进行 fit_transform, 来标准化 'Age' 和 'Fare' 列的数值
# 如果特征的数值范围差异很大,那么算法可能会因为较大范围的特征而受到偏向,导致模型性能不佳。
self.scaler = StandardScaler()
df[['Age', 'Fare']] = self.scaler.fit_transform(df[['Age', 'Fare']])
# 如果是训练数据,将 'Survived' 列作为标签
self.labels = df['Survived'].values
self.features = df.drop('Survived', axis=1).values
else:
# 如果是测试集,使用传入的 scaler 进行 transform
df[['Age', 'Fare']] = self.scaler.transform(df[['Age', 'Fare']])
# 对于测试数据,可能没有 'Survived' 列,因此特征就是整个 DataFrame
self.features = df.values
self.labels = None # 标签设置为 None
def __len__(self):
return len(self.dataframe)
def __getitem__(self, index):
# 获取单个样本,包括特征和标签(如果有的话)
# 如果有标签,同时返回特征和标签
if self.labels is not None:
return torch.tensor(self.features[index], dtype=torch.float), torch.tensor(self.labels[index], dtype=torch.float)
# 对于没有标签的测试数据,返回一个占位符张量,例如大小为 1 的零张量
else:
return torch.tensor(self.features[index], dtype=torch.float), torch.zeros(1, dtype=torch.float)
# 自定义 二分类模型
class BinaryClassificationModel(torch.nn.Module):
def __init__(self, input_features):
super(BinaryClassificationModel, self).__init__()
self.linear1 = torch.nn.Linear(input_features, 64)
self.linear2 = torch.nn.Linear(64, 64)
self.linear3 = torch.nn.Linear(64, 1)
# 定义 dropout 层,可以减少过拟合
self.dropout = torch.nn.Dropout(p=0.1)
# 定义 batchnorm层,帮助稳定学习过程
self.batchnorm1 = torch.nn.BatchNorm1d(64)
self.batchnorm2 = torch.nn.BatchNorm1d(64)
def forward(self, x):
x = F.relu(self.linear1(x)) # 第一层激活函数为 ReLU
x = self.batchnorm1(x) # 应用 batch normalization
x = self.dropout(x) # 应用 dropout
x = F.relu(self.linear2(x)) # 第二层激活函数为 ReLU
x = self.batchnorm2(x) # 应用 batch normalization
x = self.dropout(x) # 应用 dropout
x = self.linear3(x) # 输出层
return torch.sigmoid(x) # 应用 sigmoid 激活函数
# 训练过程
def train(models, train_loader, criterion, optimizers, num_epochs):
epoch_losses = {k: [] for k in optimizers.keys()}
print('start training')
for optim_name, optimizer in optimizers.items():
model = models[optim_name]
for epoch in range(num_epochs):
model.train()
running_loss = 0.0
for batch_idx, (inputs, labels) in enumerate(train_loader):
optimizer.zero_grad() # 梯度清零
outputs = model(inputs) # 前向传播
loss = criterion(outputs.squeeze(), labels) # 使用 squeeze 调整输出形状
loss.backward() # 反向传播
optimizer.step() # 更新权重
# 乘以 inputs.size(0) 的目的是为了累积整个批次的总损失,而不仅仅是单个数据点的平均损失。
# 调用 loss = criterion(outputs, labels) 时,计算的是当前批次中所有样本的平均损失。
# 为了得到整个训练集上的总损失,我们需要将每个批次的平均损失乘以该批次中的样本数(inputs.size(0))。
# 这样做可以确保每个样本,无论它们属于哪个批次,对总损失的贡献都是平等的。
running_loss += loss.item() * inputs.size(0)
epoch_loss = running_loss / len(train_loader.dataset)
print(f'Epoch {epoch+1}/{num_epochs} Loss: {epoch_loss:.4f}')
epoch_losses[optim_name].append(epoch_loss)
return epoch_losses
# 测试
def test(model, test_loader, optimizers):
results = {}
for optim_name, _ in optimizers.items():
model = models[optim_name]
model.eval()
predictions = []
with torch.no_grad(): # 不计算梯度,减少计算和内存消耗
for inputs, _ in test_loader:
outputs = model(inputs)
# test没有标签,只输出结果
predicted = (outputs > 0.5).float().squeeze()
predictions.extend(predicted.tolist()) # 使用 extend 和 tolist 将 predicted 中的每个元素添加到 predictions
print("Predict result: ", predictions)
results[optim_name] = predictions
return results
# # 如果是验证集,同时有标签,就可以算精度,但是我们的test没有标签
# # torch.max(outputs.data, 1): 这一行是在查找每个样本预测概率最高的类别。torch.max 返回两个结果:最大值和它们的索引。
# # 由于我们只关心最大概率的索引(即预测的类别),因此使用 _ 来忽略第一个返回值(最大概率值本身),而 predicted 保存了这些索引。
# # _, predicted = torch.max(output.data, 1)
# # 对于二分类问题,可以直接将 sigmoid 输出阈值化(例如,阈值 0.5)来获取预测标签。
# predicted = (outputs > 0.5).float().squeeze()
# total += labels.size(0)
# correct += (predicted == labels).sum().item()
# accuracy = 100 * correct / total
# print(f'Accuracy: {accuracy:.2f}%')
# 加载数据
# 训练数据集,没有传入 scaler,因此会创建一个新的
train_dataset = TitanicDataset('data/titanic/train.csv', scaler=None, is_train=True)
train_loader = DataLoader(dataset=train_dataset, batch_size=32, shuffle=True, num_workers=0)
# 测试数据集,传入从训练数据集得到的 scaler
test_dataset = TitanicDataset('data/titanic/test.csv', scaler=train_dataset.scaler, is_train=False)
test_loader = DataLoader(dataset=test_dataset, batch_size=32, shuffle=False, num_workers=0)
# 实例化模型,输入特征数量为10: Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked
# 但是注意,预处理之后,只采用了7个: Pclass Sex Age SibSp Parch Fare Embarked
models = {
'Adam': BinaryClassificationModel(input_features=7),
'SGD': BinaryClassificationModel(input_features=7),
}
# 定义损失函数,优化器
criterion = torch.nn.BCELoss(reduction='mean')
optimizers = {
'Adam': torch.optim.Adam(models['Adam'].parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-8, weight_decay=0.0001),
'SGD': torch.optim.SGD(models['SGD'].parameters(), lr=0.01, weight_decay=0.001, momentum=0.9)
}
# 训练模型
num_epochs = 100
losses = train(models, train_loader, criterion, optimizers, num_epochs)
# 测试模型
# 已知test的结果保存在 gender_submission.csv 文件中,获取准确的 labels 和 predicted 结果算精度
labels_path = 'data/titanic/gender_submission.csv'
data_frame = pd.read_csv(labels_path)
data_frame.drop(['PassengerId'], axis=1, inplace=True)
labels = data_frame['Survived'].values
print('Test Dataset 正确结果: ', labels)
# 模型预测结果
results = test(models, test_loader, optimizers)
print('Test Dataset 预测结果: ', results)
# 精度计算
for optimizer_name, predicted in results.items():
accuracy = 100 * (predicted == labels).sum() / len(predicted)
print(f'Accuracy for {optimizer_name}: {accuracy:.2f}%')
plt.figure(figsize=(10, 5))
for optim_name, losses in losses.items():
plt.plot(losses, label=optim_name)
final_accuracy = 100 * (results[optim_name] == labels).sum() / len(results[optim_name])
plt.annotate(f'Final Acc: {final_accuracy:.2f}%', xy=(num_epochs - 1, losses[-1]), xytext=(-40, 10), textcoords='offset points', fontsize=10)
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Titanic Dataset training Loss Curve')
plt.legend()
plt.show()
代码:MNIST多分类任务
import torch
from torchvision import datasets
from torchvision import transforms
from torch.utils.data import DataLoader
import torch.nn.functional as F
import torch.optim as optim
# prepare dataset
batch_size = 64
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307, ), (0.3081, ))
])
train_dataset = datasets.MNIST('data/MNIST/', train=True, transform=transform, download=True)
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_dataset = datasets.MNIST('data/MNIST/', train=False, transform=transform, download=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)
# design model
class Net(torch.nn.Module):
def __init__(self):
super(Net, self).__init__()
self.linear1 = torch.nn.Linear(784, 512)
self.linear2 = torch.nn.Linear(512, 256)
self.linear3 = torch.nn.Linear(256, 128)
self.linear4 = torch.nn.Linear(128, 64)
self.linear5 = torch.nn.Linear(64, 10)
def forward(self, x):
x = x.view(-1, 784)
x = F.relu(self.linear1(x))
x = F.relu(self.linear2(x))
x = F.relu(self.linear3(x))
x = F.relu(self.linear4(x))
x = self.linear5(x) # 不用激活函数,因为 torch.nn.CrossEntropyLoss = softmax + nllloss
return x
model = Net()
# construct loss and optimizer
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
# training
def train(epoch):
running_loss = 0.0
for batch_idx, data in enumerate(train_loader, 0):
inputs, target = data
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, target)
loss.backward()
optimizer.step()
running_loss += loss.item()
if batch_idx % 300 == 299:
print('[%d, %5d] loss: %.3f' % (epoch+1, batch_idx+1, running_loss/300))
running_loss = 0.0
# test
def test():
correct = 0
total = 0
with torch.no_grad():
for data in test_loader:
inputs, labels = data
outputs = model(inputs)
_, predicted = torch.max(outputs.data, dim=1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print('Accuracy on test set: %d %%' %(100*correct/total))
if __name__ == '__main__':
for epoch in range(10):
train(epoch)
if epoch % 10 == 0:
test()
P9 作业:Kaggle Otto Group Product Classification Challenge
数据下载地址:https://www.kaggle.com/c/otto-group-product-classification-challenge/data
百度网盘链接下载: https://pan.baidu.com/s/1g8rshQdwba7ctwLmzl69Qw?pwd=4nd4 提取码: 4nd4
import torch
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler # pip install scikit-learn
import torch.nn.functional as F
import torch.optim as optim
import matplotlib.pyplot as plt
class OttoDataset(Dataset):
def __init__(self, feature_filepath, label_filepath=None, mode='train', scaler=None):
super(OttoDataset, self).__init__()
# Load the dataset into a pandas dataframe.
data = pd.read_csv(feature_filepath)
if mode == 'train':
# Extract the numeric part of the class labels, convert to integers, and shift to zero-based indexing.
self.labels = torch.tensor(data.iloc[:, -1].apply(lambda x: int(x.split('_')[-1]) - 1).values, dtype=torch.long)
# Initialize the StandardScaler.
# StandardScaler will normalize the features (i.e., each column of the dataset)
# by subtracting the mean and dividing by the standard deviation.
# This centers the feature columns at mean 0 with standard deviation 1.
self.scaler = StandardScaler()
# Select all columns except 'id' and 'target' for features.
# Then apply the scaler to standardize them.
features = data.iloc[:, 1:-1].values
self.features = torch.tensor(self.scaler.fit_transform(features), dtype=torch.float32)
elif mode == 'test':
features = data.iloc[:, 1:].values
# Apply the same scaling as on the training set to the test set features. use self.scaler.transform
self.scaler = scaler if scaler is not None else StandardScaler()
self.features = torch.tensor(self.scaler.transform(features), dtype=torch.float32)
if label_filepath is not None:
label_data = pd.read_csv(label_filepath)
# Assuming the first column after 'id' are one-hot encoded class labels,
# find the index of the max value in each row which corresponds to the predicted class.
self.labels = torch.tensor(label_data.iloc[:, 1:].values.argmax(axis=1), dtype=torch.long)
else:
self.labels = None
# If neither 'train' nor 'test' mode is specified, raise an error.
else:
raise ValueError("Mode must be 'train' or 'test'")
# Store the length of the dataset.
self.len = len(self.features)
def __len__(self):
# When len(dataset) is called, return the length of the dataset.
return self.len
def __getitem__(self, index):
# This method retrieves the features and label of a specified index.
return self.features[index], self.labels[index] if self.labels is not None else -1
class FullyConnectedModel(torch.nn.Module):
def __init__(self, input_features, output_classes):
super(FullyConnectedModel, self).__init__()
# 定义网络层
self.fc1 = torch.nn.Linear(input_features, 128)
self.fc2 = torch.nn.Linear(128, 64)
self.fc3 = torch.nn.Linear(64, 32)
self.fc4 = torch.nn.Linear(32, output_classes)
# 可以选择增加更多的层
# 定义 dropout 层,可以减少过拟合
self.dropout = torch.nn.Dropout(p=0.3)
# 定义 batchnorm 层,帮助稳定学习过程
self.batchnorm1 = torch.nn.BatchNorm1d(128)
self.batchnorm2 = torch.nn.BatchNorm1d(64)
self.batchnorm3 = torch.nn.BatchNorm1d(32)
def forward(self, x):
x = F.relu(self.batchnorm1(self.fc1(x)))
x = self.dropout(x)
x = F.relu(self.batchnorm2(self.fc2(x)))
x = self.dropout(x)
x = F.relu(self.batchnorm3(self.fc3(x)))
x = self.dropout(x)
x = self.fc4(x)
return x
def train(epoch, train_loader, model, criterion, optimizer):
model.train()
running_loss = 0.0
for batch_idx, data in enumerate(train_loader, 0):
inputs, targets = data
inputs = inputs.to(device)
targets = targets.to(device)
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, targets)
loss.backward()
optimizer.step()
running_loss += loss.item()
if batch_idx % 300 == 0:
print('Epoch:[{}/{}], Loss:{:.4f}'.format(epoch, batch_idx, running_loss/300))
# 计算平均损失
average_loss = running_loss / len(train_loader)
return average_loss
def test(test_loader, model):
model.eval()
correct = 0.0
total = 0
with torch.no_grad():
for inputs, targets in test_loader:
outputs = model(inputs)
inputs = inputs.to(device)
targets = targets.to(device)
_, predicted = torch.max(outputs.data, dim=1)
total += targets.size(0)
correct += (predicted == targets).sum().item()
accuracy = 100 * (correct / total)
print("Accuracy on test data is {:.2f}".format(accuracy))
return accuracy
if __name__ == '__main__':
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# Prepare dataset
train_dataset = OttoDataset(feature_filepath='data/Otto/train.csv', mode='train')
scaler = train_dataset.scaler
test_dataset = OttoDataset(feature_filepath='data/Otto/test.csv', label_filepath='data/Otto/otto_correct_submission.csv', mode='test', scaler=scaler)
train_loader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True, num_workers=0)
test_loader = DataLoader(dataset=test_dataset, batch_size=64, shuffle=False, num_workers=0)
# Design model
model = FullyConnectedModel(input_features=93, output_classes=9).to(device)
# Construct loss and optimizer
criterion = torch.nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=25, gamma=0.1)
# Train and Test
train_losses = []
test_accuracies = []
num_epochs = 100
for epoch in range(num_epochs):
train_loss = train(epoch, train_loader, model, criterion, optimizer)
train_losses.append(train_loss)
if epoch % 2 == 0 or epoch == num_epochs-1:
test_accuracy = test(test_loader, model)
test_accuracies.append(test_accuracy)
# Update the learning rate
scheduler.step()
# Save model parameters for future use
torch.save(model.state_dict(), 'model/09_kaggle_OttoDataset_model.pth')
# Visualize
plt.figure(figsize=(12, 5))
# Loss Curve
plt.subplot(1, 2, 1)
plt.plot(train_losses, label='Train Loss')
plt.title('Training Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
# Accuracy Curve
plt.subplot(1, 2, 2)
plt.plot(range(0, 101, 2), test_accuracies, label='Test Accuracy') # Adjust x-axis for test accuracy
plt.title('Testing Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy (%)')
plt.legend()
plt.show()