学校做的小项目:
卷积网络识别古日文
Kuzushiji-MNIST数据集(此数据集专注于草书日语)下载
古日文中很重要的一个特征并且不同于现代日语的一点就是古日语含有变体假名(Hentaigana)。变体假名或者变分体,是平假名字符,有多种形式的文字,因为他们是从不同的汉字派生的。因此,一种Kuzushiji-MNIST或者Kuzushiji-49的Hiragana一类可能会有更多的字符映射到它。这是Kuzushiji-MNIST数据集比MNIST数据集更具挑战性的原因之一。
-
数据集
-
(28x28灰度,70,000张图像)一共10个类,每个类有7000个图,每张图28 x 28,6:1分训练集和测试集
评价
-
准确率
项目介绍
-
- 首先对原始数据进行采集,对数据特征进行向量化,使用基于LeNet网结构的CNN,采用ReLu激活函数。在此基础上使用了Dropout技巧,在卷积-激活-池化后接入FC,训练过程采用Mini-Batch梯度下降法等,训练神经网络参数,达到手写古日文的识别输出。
- 在测试集的识别准确率达:89%
baseline
-
前驱神经网络:Linear (256) -> ReLU -> Linear(64) -> ReLU -> Linear(10) -> ReLU-> LogSoftmax
CNN:
-
conv1 (channels = 10, kernel size= 5, stride = 1) -> Relu -> max pool (kernel size = 2x2) ->
conv2 (channels = 50, kernel size= 5, stride = 1) -> Relu -> max pool (kernel size = 2x2) ->
Linear (256) -> Relu -> Linear (10) -> LogSoftmax
class CNN(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = torch.nn.Conv2d(in_channels=1, out_channels=10, kernel_size=5, stride=1)
self.conv2 = torch.nn.Conv2d(in_channels=10, out_channels=50, kernel_size=5, stride=1)
self.fc1 = nn.Linear(50*4*4, 256)
self.fc2 = nn.Linear(256, 10)
def forward(self, x):
x = F.relu(self.conv1(x))
x = F.max_pool2d(x,2)
x = F.relu(self.conv2(x))
x = F.max_pool2d(x,2)
x = x.view(x.shape[0], -1) # make sure inputs are flattened
x = F.relu(self.fc1(x))
x = self.fc2(x)
x = F.log_softmax(x, dim=1) # preserve batch dim
return x
完整的
#!/usr/bin/env python3
import torch
from torchvision import datasets, transforms
from torch import nn, optim
import torch.nn.functional as F
import matplotlib.pyplot as plt
import numpy as np
class Linear(nn.Module):
"""
Linear (10) -> ReLU -> LogSoftmax
"""
def __init__(self):
super().__init__()
self.fc1 = nn.Linear(784, 10)
def forward(self, x):
x = x.view(x.shape[0], -1) # make sure inputs are flattened
x = F.relu(self.fc1(x))
x = F.log_softmax(x, dim=1) # preserve batch dim
return x
class FeedForward(nn.Module):
"""
Linear (256) -> ReLU -> Linear(64) -> ReLU -> Linear(10) -> ReLU-> LogSoftmax
"""
def __init__(self):
super().__init__()
self.fc1 = nn.Linear(784, 256)
self.fc2 = nn.Linear(256, 64)
self.fc3 = nn.Linear(64, 10)
def forward(self, x):
x = x.view(x.shape[0], -1) # make sure inputs are flattened
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = F.relu(self.fc3(x))
x = F.log_softmax(x, dim=1) # preserve batch dim
return x
class CNN(nn.Module):
"""
TODO: Implement CNN Network structure
conv1 (channels = 10, kernel size= 5, stride = 1) -> Relu -> max pool (kernel size = 2x2) ->
conv2 (channels = 50, kernel size= 5, stride = 1) -> Relu -> max pool (kernel size = 2x2) ->
Linear (256) -> Relu -> Linear (10) -> LogSoftmax
Hint: You will need to reshape outputs from the last conv layer prior to feeding them into
the linear layers.
"""
def __init__(self):
super().__init__()
self.conv1 = torch.nn.Conv2d(in_channels=1, out_channels=10, kernel_size=5, stride=1)
self.conv2 = torch.nn.Conv2d(in_channels=10, out_channels=50, kernel_size=5, stride=1)
self.fc1 = nn.Linear(50*4*4, 256)
self.fc2 = nn.Linear(256, 10)
def forward(self, x):
x = F.relu(self.conv1(x))
x = F.max_pool2d(x,2)
x = F.relu(self.conv2(x))
x = F.max_pool2d(x,2)
x = x.view(x.shape[0], -1) # make sure inputs are flattened
x = F.relu(self.fc1(x))
x = self.fc2(x)
x = F.log_softmax(x, dim=1) # preserve batch dim
return x
class NNModel:
def __init__(self, network, learning_rate):
"""
Load Data, initialize a given network structure and set learning rate
DO NOT MODIFY
"""
# Define a transform to normalize the data
transform = transforms.Compose([transforms.ToTensor(),
transforms.Normalize((0.5,), (0.5,))])
# Download and load the training data
trainset = datasets.KMNIST(root='./data', train=True, download=True, transform=transform)
self.trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=False)
# Download and load the test data
testset = datasets.KMNIST(root='./data', train=False, download=True, transform=transform)
self.testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False)
self.model = network
"""
TODO: Set appropriate loss function such that learning is equivalent to minimizing the
cross entropy loss. Note that we are outputting log-softmax values from our networks,
not raw softmax values, so just using torch.nn.CrossEntropyLoss is incorrect.
Hint: All networks output log-softmax values (i.e. log probabilities or.. likelihoods.).
"""
self.lossfn = torch.nn.NLLLoss()
self.optimizer = optim.Adam(self.model.parameters(), lr=learning_rate)
self.num_train_samples = len(self.trainloader)
self.num_test_samples = len(self.testloader)
def view_batch(self):
"""
TODO: Display first batch of images from trainloader in 8x8 grid
Do not make calls to plt.imshow() here
Return:
1) A float32 numpy array (of dim [28*8, 28*8]), containing a tiling of the batch images,
place the first 8 images on the first row, the second 8 on the second row, and so on
2) An int 8x8 numpy array of labels corresponding to this tiling
"""
for images, labels in self.trainloader:
images=images.view(64,28,28)
images=images.view(8,8,28,28)
images=images.permute(0,2,1,3)
return torch.reshape(images, (8 * 28,8 * 28)),torch.reshape(labels,(8,8))
def train_step(self):
"""
Used for submission tests and may be usefull for debugging
DO NOT MODIFY
"""
self.model.train()
for images, labels in self.trainloader:
log_ps = self.model(images)
loss = self.lossfn(log_ps, labels)
self.optimizer.zero_grad()
loss.backward()
self.optimizer.step()
return
def train_epoch(self):
self.model.train()
for images, labels in self.trainloader:
log_ps = self.model(images)
loss = self.lossfn(log_ps, labels)
self.optimizer.zero_grad()
loss.backward()
self.optimizer.step()
return
def eval(self):
self.model.eval()
accuracy = 0
with torch.no_grad():
for images, labels in self.testloader:
log_ps = self.model(images)
ps = torch.exp(log_ps)
top_p, top_class = ps.topk(1, dim=1)
equals = top_class == labels.view(*top_class.shape)
accuracy += torch.mean(equals.type(torch.FloatTensor))
return accuracy / self.num_test_samples
def plot_result(results, names):
"""
Take a 2D list/array, where row is accuracy at each epoch of training for given model, and
names of each model, and display training curves
"""
for i, r in enumerate(results):
plt.plot(range(len(r)), r, label=names[i])
plt.legend()
plt.title("KMNIST")
plt.xlabel("Epoch")
plt.ylabel("Test accuracy")
plt.grid(True)
plt.tight_layout()
plt.show()
plt.savefig("./part_2_plot.png")
def main():
models = [Linear(), FeedForward(), CNN()] # Change during development
epochs = 10
results = []
# Can comment the below out during development
images, labels = NNModel(Linear(), 0.003).view_batch()
print(labels)
plt.imshow(images, cmap="Greys")
plt.show()
for model in models:
print(f"Training {model.__class__.__name__}...")
m = NNModel(model, 0.003)
accuracies = [0]
for e in range(epochs):
m.train_epoch()
accuracy = m.eval()
print(f"Epoch: {e}/{epochs}.. Test Accuracy: {accuracy}")
accuracies.append(accuracy)
results.append(accuracies)
plot_result(results, [m.__class__.__name__ for m in models])
if __name__ == "__main__":
main()