问题
在使用windows环境来使用 pytorch 的 cifar10_tutorial 时肯定会遇到如下的error:
......
......
......
RuntimeError:
An attempt has been made to start a new process before the
current process has finished its bootstrapping phase.
This probably means that you are not using fork to start your
child processes and you have forgotten to use the proper idiom
in the main module:
if __name__ == '__main__':
freeze_support()
...
The "freeze_support()" line can be omitted if the program
is not going to be frozen to produce an executable.
---------------------
......
......
......
BrokenPipeError: [Errno 32] Broken pipe
分析
问题出现的原因大概是,windows的multiprocessing library会不停地产生子进程(child process),如果用main保护起来,子进程在运行时因为会导入这个main,(只有一个main进程),所以避免了循环生成子进程。
参考:https://blog.csdn.net/tanmx219/article/details/86129623
解决方法
方法一
按照提示,在主要模块前加入
if __name__ == '__main__':
将主要模块都包含在这行代码之下,就像C语言中的 int main() 一样。根据pytorch官网的代码重新排列之后可在windows下直接运行。
附代码及较详细注释如下:
import torch
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
# functions to show an image
def imshow(img):
img = img / 2 + 0.5 # unnormalize, transform them to Tensors of range[0,1]
npimg = img.numpy() # transform img into numpy array
# process the image and show the format, because the numpy array's format is [d, h, w].
# So transpose the array's format to [h, w, d].
plt.imshow(np.transpose(npimg, (1, 2, 0)))
plt.show() # show the image
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(3, 6, 5)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16 * 5 * 5, 120) # full connection layer
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = x.view(-1, 16 * 5 * 5) # x.size = 16*5*5(d*h*w), transformed into Tensor of [1 * 400]
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
net = Net()
# print(net)
if __name__ == '__main__':
# The output of torchvision datasets are PILImage images of range [0, 1].
# We transform them to Tensors of normalized range [-1, 1].
transform = transforms.Compose(
[transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4,
shuffle=True, num_workers=2)
testset = torchvision.datasets.CIFAR10(root='./data', train=False,
download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=4,
shuffle=False, num_workers=2)
classes = ('plane', 'car', 'bird', 'cat',
'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
# get some random training images
dataiter = iter(trainloader)
images, labels = dataiter.next()
# show images
imshow(torchvision.utils.make_grid(images))
# print labels
print(' '.join('%5s' % classes[labels[j]] for j in range(4)))
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
for epoch in range(2): # loop over the dataset multiple times
running_loss = 0.0
for i, data in enumerate(trainloader, 0): # Traversing an Iterative Object with starting from 0
# get the inputs
inputs, labels = data
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
# print statistics
running_loss += loss.item() # get a Python number from a tensor containing a single value
if i % 2000 == 1999: # print every 2000 mini-batches
print('[%d, %5d] loss: %.3f' %
(epoch + 1, i + 1, running_loss / 2000))
running_loss = 0.0
print('Finished Training')
dataiter = iter(testloader)
images, labels = dataiter.next()
# print images
imshow(torchvision.utils.make_grid(images))
print('GroundTruth: ', ' '.join('%5s' % classes[labels[j]] for j in range(4)))
outputs = net(images)
_, predicted = torch.max(outputs, 1) # 对 outputs 每行( 1 表示以行为单位 )取最大值,并将返回的索引值赋给 predicted.
print('Predicted: ', ' '.join('%5s' % classes[predicted[j]]
for j in range(4)))
correct = 0
total = 0
with torch.no_grad(): # 表示不需要计算梯度
for data in testloader:
images, labels = data
outputs = net(images)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item() # sum() 返回两个数组中相等的个数,再用item()返回tensor中的number
print('Accuracy of the network on the 10000 test images: %d %%' % (
100 * correct / total))
class_correct = list(0. for i in range(10)) # 创建大小为10的全0数组
class_total = list(0. for i in range(10))
with torch.no_grad():
for data in testloader:
images, labels = data
outputs = net(images)
_, predicted = torch.max(outputs, 1)
c = (predicted == labels).squeeze() # squeeze()在这里没有作用
for i in range(4):
label = labels[i]
class_correct[label] += c[i].item() # item()之后还是bool值,但是做运算时会使用0,1来替代false,true
class_total[label] += 1
for i in range(10):
print('Accuracy of %5s : %2d %%' % (
classes[i], 100 * class_correct[i] / class_total[i]))
方法二
将调用 torch.utils.data.DataLoader 函数中的参数中的 num_workers 设为0。
附相关代码如下:
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4,
shuffle=True, num_workers=0)
testloader = torch.utils.data.DataLoader(testset, batch_size=4,
shuffle=False, num_workers=0)
参考:https://blog.csdn.net/qq_33666011/article/details/81873217