图像数据来自吴恩达深度学习,主要任务是分辨图像中人物是笑脸(1)还是非笑脸(0),是二分类的问题,使用交叉熵损失函数。
读取训练集和测试集:
train_signs_data_path = '/content/drive/MyDrive/Colab Notebooks/吴恩达L4HW2KT/datasets/train_happy(1).h5'
train_data = Image_Data(train_signs_data_path)
train_data_loader = DataLoader(train_data, shuffle=True, batch_size=32)
test_signs_data_path = '/content/drive/MyDrive/Colab Notebooks/吴恩达L4HW2KT/datasets/test_happy.h5'
test_data = Image_Data(test_signs_data_path)
test_data_loader = DataLoader(test_data, shuffle=False, batch_size=32)
class Image_Data(Dataset):
def __init__(self, data_path):
super(Image_Data, self).__init__()
# 读取数据集
dataset = h5py.File(data_path, "r")
if data_path == "/content/drive/MyDrive/Colab Notebooks/吴恩达L4HW2KT/datasets/train_happy(1).h5":
data_set_x_orig = np.array(dataset["train_set_x"][:])
data_set_y_orig = np.array(dataset["train_set_y"][:])
else:
data_set_x_orig = np.array(dataset["test_set_x"][:])
data_set_y_orig = np.array(dataset["test_set_y"][:])
data_set_x_orig = data_set_x_orig.astype("float32") / 255
data_set_y_orig = data_set_y_orig.astype("float32")
self.x_data = torch.from_numpy(data_set_x_orig)
self.y_data = torch.from_numpy(data_set_y_orig)
self.len = self.y_data.size()[0]
def __getitem__(self, item):
return self.x_data[item], self.y_data[item]
def __len__(self):
return self.len
def get_shape(self):
return self.x_data.size(), self.y_data.size()
number of training examples = 600
number of test examples = 150
X_train shape: (600, 64, 64, 3)
Y_train shape: (600, 1)
X_test shape: (150, 64, 64, 3)
Y_test shape: (150, 1)
三层卷积:
class Happymodel(torch.nn.Module):
def __init__(self):
super(Happymodel, self).__init__()
self.cnn = torch.nn.Sequential(
torch.nn.Conv2d(3, 8, kernel_size=3, stride=1, padding=1),#stride=2时,h与w减半,64*64*8
torch.nn.BatchNorm2d(8),
torch.nn.ReLU(),
torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=0),#32*32*8
torch.nn.Conv2d(8, 16, kernel_size=3, stride=1, padding=1),#32*32*16
torch.nn.BatchNorm2d(16),
torch.nn.ReLU(),
torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=0),#16*16*16
torch.nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1),#16*16*32
torch.nn.BatchNorm2d(32),
torch.nn.ReLU(),
torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=0),#8*8*32
torch.nn.Sigmoid()
)
self.fc = torch.nn.Linear(32*64,2)
def forward(self, x):
out1 = self.cnn(x)
out1 = out1.view(out1.size()[0], -1)#flatten
out = self.fc(out1)
return out
hpm=Happymodel()
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
hpm.to(device)
num = torch.cuda.device_count()
epoch_num = 60
learning_rate = 0.001
batch_size = 32
seed = 3
costs = []
optimizer = torch.optim.Adam(hpm.parameters(), lr=learning_rate)
loss_func = torch.nn.CrossEntropyLoss()
模型训练:
hpm.train()
for epoch in range(epoch_num):
cost = 0
for i, data in enumerate(train_data_loader):
img_data, img_label = data
img_data = img_data.permute(0, 3, 1, 2)#维度的互换,本来是0,1,2,3,即样本数,高,宽,通道数,转换成0,3,1,2,样本数,通道数,高,宽
img_data = img_data.to(device)
img_label = img_label.to(device)
mmm=hpm.cnn(img_data)
optimizer.zero_grad()
y_pred = hpm.forward(img_data)
#print(y_pred.squeeze().shape)
#print(img_label.shape)
loss = loss_func(y_pred, img_label.long())
loss.backward()#反向传播
optimizer.step()#参数更新
cost = cost + loss.item()
costs.append(cost / (i + 1))
if epoch % 5 == 0:
print("epoch=" + str(epoch) + ": " + "loss=" + str(cost / (i + 1)))
plt.plot(costs)
plt.ylabel("cost")
plt.xlabel('iterations (per tens)')
plt.title("Learning rate =" + str(learning_rate))
plt.show()
epoch=0: loss=0.7684744847448248 epoch=5: loss=0.45560904553062037 epoch=10: loss=0.30206744764980514 epoch=15: loss=0.16170190353142588 epoch=20: loss=0.11984253086541828 epoch=25: loss=0.09111806475802471 epoch=30: loss=0.06678835596693189 epoch=35: loss=0.05601120044134165 epoch=40: loss=0.04703287594020367 epoch=45: loss=0.03911749812725343 epoch=50: loss=0.0324560205795263 epoch=55: loss=0.024705255796250544
测试集上的表现:
y_test_pred=[]
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
loss_func = torch.nn.CrossEntropyLoss()
costs=[]
cost=0
hpm.to(device)
hpm.eval()
for i, data in enumerate(test_data_loader):
img_data, img_label = data
img_data = img_data.permute(0, 3, 1, 2)#维度的互换,本来是0,1,2,3,即样本数,高,宽,通道数,转换成0,3,1,2,样本数,通道数,高,宽
img_data = img_data.to(device)
img_label = img_label.to(device)
y_pred = hpm.forward(img_data)
y_test_pred.append(torch.argmax(y_pred,dim=1))
with torch.no_grad():
loss = loss_func(y_pred, img_label.long())
cost = cost + loss.item()
costs.append(cost / (i + 1))
print(y_pred.shape)
预测标签与真实标签比较,精确率为96.67%