import torch
from torch import nn
from skorch import NeuralNetClassifier
import torch.nn.functional as F
#1.构建类
class Torch_Model(nn.Module):
def __init__(self):
super(Torch_Model, self).__init__()
self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
self.conv2_drop = nn.Dropout2d()
self.fc1 = nn.Linear(320, 50)
self.fc2 = nn.Linear(50, 10)
def forward(self, x):
x = F.relu(F.max_pool2d(self.conv1(x), 2))
x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
x = x.view(-1, 320)
x = F.relu(self.fc1(x))
x = F.dropout(x, training=self.training)
x = self.fc2(x)
return F.log_softmax(x, dim=1)
#2. 创建分类器
device = "cuda" if torch.cuda.is_available() else "cpu"
classifier = NeuralNetClassifier(Torch_Model,
criterion=nn.CrossEntropyLoss,
optimizer=torch.optim.Adam,
train_split=None,
verbose=1,
device=device)
#3.加载数据
import numpy as np
from torch.utils.data import DataLoader
from torchvision.transforms import ToTensor
from torchvision.datasets import MNIST
mnist_data = MNIST(root='data',download=True,transform=ToTensor())
dataloader = DataLoader(mnist_data, shuffle=True, batch_size=60000)
x,y = next(iter(dataloader))
#由于modAL框架中的learner函数不接收tensor,所以将tensor张量类型转化为numpy类型
x=x.detach().cpu().numpy()
y=y.detach().cpu().numpy()
#读取训练数据
x_train, x_test, y_train, y_test = x[:50000], x[50000:], y[:50000], y[50000:]
x_train = x_train.reshape(50000,1,28,28)
x_test = x_test.reshape(10000,1,28,28)
#收集初始数据样本
n_initial = 1000
initial_idx = np.random.choice(range(len(x_train)), size=n_initial, replace=False)
x_initial = x_train[initial_idx]
y_initial = y_train[initial_idx]
#建立数据池
#从训练样本集中删除初始化用到的样本后作为整体样本池
x_pool = np.delete(x_train, initial_idx,axis=0)
y_pool = np.delete(y_train, initial_idx,axis=0)
#4.建立主动学习器
from modAL.models import ActiveLearner
#初始化学习器
learner = ActiveLearner(
estimator = classifier,
X_training=x_initial,y_training=y_initial,
)
#5. 查询数据
#主动学习循环
n_queries = 10
for idx in range(n_queries):
print('Query no. %d'%(idx +1))
query_idx, query_instance = learner.query(x_pool, n_instances=100)
#使用新数据训练样本
learner.teach(
X=x_pool[query_idx], y=y_pool[query_idx], only_new=False,
)
#删除查询过的样本
x_pool= np.delete(x_pool, query_idx, axis=0)
y_pool =np.delete(y_pool, query_idx, axis=0)
# the final accuracy score
print(learner.score(x_test, y_test))
最终结果:我们仅采用了2000个数据样本就达到了95.84%的准确率。