读取地基云图,并显示
%matplotlib inline
import gluonbook as gb
from mxnet import gluon,init,nd,image
from mxnet.gluon import data as gdata,loss as gloss,model_zoo,nn
import numpy as np
def read_voc_images(root=voc_dir, train=True):
txt_fname = '%s/segmentation/%s' % (
root, 'train.txt' if train else 'val.txt')
with open(txt_fname, 'r') as f:
images = f.read().split()
data, label = [None] * len(images), [None] * len(images)
for i, fname in enumerate(images):
data[i] = image.imread('%s/images/%s.png' % (root, fname))
label[i] = image.imread('%s/GTmaps/%s_GT.png' % (root, fname))
return data, label
train_images, train_labels = read_voc_images()
n = 5
imgs = train_images[0:n] + train_labels[0:n]
gb.show_images(imgs, 2, n)
对图片进行剪切,从而增加数据集,并显示
def voc_rand_crop(data, label, height, width):
data, rect = image.random_crop(data, (width, height))
label = image.fixed_crop(label, *rect)
return data, label
imgs = []
for _ in range(n):
imgs += voc_rand_crop(train_images[0], train_labels[0], 224, 224)
gb.show_images(imgs[::2] + imgs[1::2], 2, n);
生成云图分割数据集
class VOCSegDataset(gdata.Dataset):
def __init__(self, train, crop_size, voc_dir, colormap2label):
self.rgb_mean = nd.array([0.485, 0.456, 0.406])
self.rgb_std = nd.array([0.229, 0.224, 0.225])
self.crop_size = crop_size
data, label = read_voc_images(root=voc_dir, train=train)
self.data = [self.normalize_image(im) for im in self.filter(data)]
self.label = self.filter(label)
self.colormap2label = colormap2label
print('read ' + str(len(self.data)) + ' examples')
def normalize_image(self, data):
return (data.astype('float32') / 255 - self.rgb_mean) / self.rgb_std
def filter(self, images):
return [im for im in images if (
im.shape[0] >= self.crop_size[0] and
im.shape[1] >= self.crop_size[1])]
def __getitem__(self, idx):
data, label = voc_rand_crop(self.data[idx], self.label[idx],
*self.crop_size)
return (data.transpose((2, 0, 1)),
voc_label_indices(label, self.colormap2label))
def __len__(self):
return len(self.data)
output_shape = (224, 224) # 高和宽。
voc_train = VOCSegDataset(True, output_shape, voc_dir, colormap2label)
voc_test = VOCSegDataset(False, output_shape, voc_dir, colormap2label)
import sys
batch_size = 4
num_workers = 0 if sys.platform.startswith('win32') else 4
train_iter = gdata.DataLoader(voc_train, batch_size, shuffle=True,
last_batch='discard', num_workers=num_workers)
test_iter = gdata.DataLoader(voc_test, batch_size, last_batch='discard',
num_workers=num_workers)
定义模型
class Resnet(nn.Block):
def __init__(self,verbose=False,**kwargs):
super(Resnet,self).__init__(**kwargs)
b0 = nn.Sequential()
b0.add(pretrained_net.features[0])
b1 = nn.Sequential()
b1.add(pretrained_net.features[1])
b2 = nn.Sequential()
b2.add(pretrained_net.features[2])
b3 = nn.Sequential()
b3.add(pretrained_net.features[3])
b4 = nn.Sequential()
b4.add(pretrained_net.features[4])
b5 = nn.Sequential()
b5.add(pretrained_net.features[5])
b6 = nn.Sequential()
b6.add(pretrained_net.features[6])
b7 = nn.Sequential()
b7.add(pretrained_net.features[7])
b8 = nn.Sequential()
b8.add(pretrained_net.features[8])
b9 = nn.Sequential()
b9.add(nn.Conv2DTranspose(256, kernel_size=4, padding=1, strides=2)) #output 256x14x14
b10 = nn.Sequential()
b10.add(nn.Conv2DTranspose(128, kernel_size=4, padding=1, strides=2)) #output 128x28x28
b11 = nn.Sequential()
b11.add(nn.Conv2DTranspose(64, kernel_size=4, padding=1, strides=2)) #output 64x56x56
b12 = nn.Sequential()
b12.add(nn.Conv2DTranspose(64, kernel_size=4, padding=1, strides=2)) #output 64x112x112
b13 = nn.Sequential()
b13.add(nn.Conv2DTranspose(3, kernel_size=4, padding=1, strides=2)) #output 64x224x224
self.verbose=verbose
self.net = nn.Sequential()
self.net.add(b0,b1,b2,b3,b4,b5,b6,b7,b8,b9,b10,b11,b12,b13)
def forward(self,x):
out = x
b0 = self.net[0]
b1 = self.net[1]
b2 = self.net[2]
b3 = self.net[3]
b4 = self.net[4]
b5 = self.net[5]
b6 = self.net[6]
b7 = self.net[7]
b8 = self.net[8]
b9 = self.net[9]
b10 = self.net[10]
b11 = self.net[11]
b12 = self.net[12]
b13 = self.net[13]
r0 = out = b0(out)
r1 = out = b1(out) #output 4x64x112x112
#print('/r1',r1.shape)
r2 = out = b2(out) #output 4x64x112x112
#print('/r2',r2.shape)
r3 = out = b3(out) #output 4x64x112x112
#print('/r3',r3.shape)
r4 = out = b4(out) #output 4x64x56x56
#print('/r4',r4.shape)
r5 = out = b5(out) #output 4x64x56x56
#print('/r5',r5.shape)
r6 = out = b6(out) #output 4x128x28x28
#print('/r6',r6.shape)
r7 = out = b7(out) #output 4x256x14x14
#print('/r7',r7.shape)
r8 = out = b8(out) #output (4, 512, 7, 7)
#print('/r8',r8.shape)
r9 = out = b9(out) #output (4, 256, 14, 14)
#out = (r9 + r7)
#print('/r9',r9.shape)
r10 = out = b10(out) #output (4, 128, 28, 28)
#out = (r10 + r6)
#print('/r10',r10.shape)
r11 = out = b11(out) #output (4, 64, 56, 56)
#out = (r11 + r5)
#print('/r11',r11.shape)
r12 = out = b12(out) #output (4, 64, 112, 112)
#print('/r12',r12.shape)
r13 = out = b13(out) #output (4, 3, 224, 224)
#print('/r13',r13.shape)
return out
net = Resnet(True)
net.initialize()
x = nd.random.normal(shape=(4,3,224,224))
net(x).shape
训练模型
%matplotlib inline
import gluonbook as gb
from mxnet import gluon, init, nd, image
from mxnet.gluon import data as gdata, loss as gloss, model_zoo, nn
import numpy as np
import sys
ctx = gb.try_all_gpus()
loss = gloss.SoftmaxCrossEntropyLoss(axis=1)
net.collect_params().reset_ctx(ctx)
trainer = gluon.Trainer(net.collect_params(), 'sgd',
{'learning_rate': 0.1, 'wd': 1e-3})
gb.train(train_iter, test_iter, net, loss, trainer, ctx, num_epochs=50)
显示模型训练的效果
def display(img, out, thresh=0.5):
import random
import matplotlib as mpl
mpl.rcParams['figure.figsize'] = (10,10)
pens = dict()
plt.clf()
plt.imshow(img)
for det in out:
cid = int(det[0])
if cid < 0:
continue
score = det[1]
if score < thresh:
continue
if cid not in pens:
pens[cid] = (random.random(), random.random(), random.random())
scales = [img.shape[1], img.shape[0]] * 2
xmin, ymin, xmax, ymax = [int(p * s) for p, s in zip(det[2:6].tolist(), scales)]
rect = plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, fill=False,
edgecolor=pens[cid], linewidth=3)
plt.gca().add_patch(rect)
text = class_names[cid]
plt.gca().text(xmin, ymin-2, '{:s} {:.3f}'.format(text, score),
bbox=dict(facecolor=pens[cid], alpha=0.5),
fontsize=12, color='white')
plt.show()
display(image[:, :, (2, 1, 0)], output[0].asnumpy(), thresh=0.27)