百度飞浆图像分割课程 作业2:basic_seg_loss.py、basic_dataloader.py、basic_transform.py、basic_train.py、train.py

在这里插入图片描述
代码列表:
在这里插入图片描述

  1. basic_transforms.py
import cv2
import numpy as np
import random
import matplotlib.pyplot as plt


class Compose(object):
    def __init__(self, transforms):
        self.transforms = transforms
    def __call__(self, image, label=None):
        for t in self.transforms:
            image, label = t(image, label)
        return image, label


class Normalize(object):
    def __init__(self, mean_val, std_val, val_scale=1):
        # set val_scale = 1 if mean and std are in range (0,1)
        # set val_scale to other value, if mean and std are in range (0,255)
        self.mean = np.array(mean_val, dtype=np.float32)
        self.std = np.array(std_val, dtype=np.float32)
        self.val_scale = 1/255.0 if val_scale==1 else 1
    def __call__(self, image, label=None):
        image = image.astype(np.float32)
        image = image * self.val_scale
        image = image - self.mean
        image = image * (1 / self.std)
        return image, label


class ConvertDataType(object):
    def __call__(self, image, label=None):
        if label is not None:
            label = label.astype(np.int64)
        return image.astype(np.float32), label


# 增加边框,size指定为一个int类型,确定增加后图像的尺寸,方形;
# 若指定为一个tuple或list则宽高分别为list的值
class Pad(object):
    def __init__(self, size, ignore_label=255, mean_val=0, val_scale=1):
        # set val_scale to 1 if mean_val is in range (0, 1)
        # set val_scale to 255 if mean_val is in range (0, 255) 
        factor = 255 if val_scale == 1 else 1
        
        if isinstance(size, int):
            self.size_height, self.size_width = size, size
        else:
            self.size_height, self.size_width = size[0], size[1]
        self.ignore_label = ignore_label
        self.mean_val=mean_val
        # from 0-1 to 0-255
        if isinstance(self.mean_val, (tuple,list)):
            self.mean_val = [int(x* factor) for x in self.mean_val]
        else:
            self.mean_val = int(self.mean_val * factor)


    def __call__(self, image, label=None):
        h, w, c = image.shape
        pad_h = max(self.size_height - h, 0)
        pad_w = max(self.size_width - w, 0)

        pad_h_half = int(pad_h / 2)
        pad_w_half = int(pad_w / 2)

        if pad_h > 0 or pad_w > 0:

            image = cv2.copyMakeBorder(image,
                                       top=pad_h_half,
                                       left=pad_w_half,
                                       bottom=pad_h - pad_h_half,
                                       right=pad_w - pad_w_half,
                                       borderType=cv2.BORDER_CONSTANT,
                                       value=self.mean_val)
            if label is not None:
                label = cv2.copyMakeBorder(label,
                                           top=pad_h_half,
                                           left=pad_w_half,
                                           bottom=pad_h - pad_h_half,
                                           right=pad_w - pad_w_half,
                                           borderType=cv2.BORDER_CONSTANT,
                                           value=self.ignore_label)
        return image, label


# 输入为一个int类型的整数,或者元组,列表
class CenterCrop(object):
    def __init__(self, crop_size):
        self.crop_h = crop_size
        self.crop_w = crop_size

    def __call__(self, image, label=None):
        h, w, c = image.shape
        top = (h - self.crop_h) // 2
        left = (w - self.crop_w) // 2
        image = image[top:top + self.crop_h, left:left + self.crop_w, :]
        if label is not None:
            label = label[top:top + self.crop_h, left:left + self.crop_w]
        return image, label


# 缩放图像,输入尺寸可以是一个int类型,或一个tuple或list
class Resize(object):
    def __init__(self, size):
        self.size = size
    def __call__(self, image, label=None):
        image = cv2.resize(image, (self.size, self.size), interpolation=cv2.INTER_LINEAR)
        if label is not None:
            label = cv2.resize(label, (self.size, self.size), interpolation=cv2.INTER_LINEAR)
    
        return image, label


# 随机翻转,code=0 垂直翻转,code=1 水平翻转,code=-1 水平垂直翻转
class RandomFlip(object):
    def __call__(self, image, label=None):
        prob_of_flip = np.random.rand()
        if prob_of_flip > 0.5:
            image = cv2.flip(image, 1)
            if label is not None:
                label = cv2.flip(label, 1)
        return image, label


# 随机裁剪,输入尺寸,在图片上随机区域裁剪出指定大小图片
# 输入类型为int,tuple,list
class RandomCrop(object):
    def __init__(self, crop_size):
        self.crop_size = crop_size

    def __call__(self, image, label=None):
        h, w, c = image.shape
        top = np.random.uniform(h - self.crop_size)
        left = np.random.uniform(w - self.crop_size)
        assert top >= 0, "Error: crop_size > image height !"
        assert left >= 0, "Error: crop_size > image width !"

        rect = np.array([int(left), int(top), int(left + self.crop_size), int(top + self.crop_size)])

        image = image[rect[1]:rect[3], rect[0]:rect[2], :]
        if label is not None:
            label = label[rect[1]:rect[3], rect[0]:rect[2]]
        return image, label


# 缩放,输入为一个float类型
class Scale(object):
    def __call__(self, image, label=None, scale=1.0):
        if not isinstance(scale, (list, tuple)):
            scale = [scale, scale]
        h, w, c = image.shape
        image = cv2.resize(image, (int(w*scale[0]), int(h*scale[1])), interpolation=cv2.INTER_LINEAR)
        if label is not None:
            label = cv2.resize(label, (int(w*scale[0]), int(h*scale[1])), interpolation=cv2.INTER_LINEAR)

        return image, label


# 随即缩放,输入为一个float类型,或tuple,list
class RandomScale(object):
    def __init__(self, min_scale=0.5, max_scale=2.0, step=0.25):
        self.min_scale = min_scale
        self.max_scale = max_scale
        self.step = step
        self.scale = Scale()

    def __call__(self, image, label=None):
        if self.step == 0:
            self.random_scale = np.random.uniform(self.min_scale, self.max_scale, 1)[0]
        else:
            num_steps = int((self.max_scale - self.min_scale) / self.step + 1)
            scale_factors = np.linspace(self.min_scale, self.max_scale, num_steps)

            np.random.shuffle(scale_factors)
            self.random_scale = scale_factors[0]
        
        image, label = self.scale(image, label, self.random_scale)
        return image, label


def main():
    img = cv2.imread('./demo.jpg')

    crop_size = 1000
    augment = Compose([
                RandomScale(),
                RandomFlip(),
                Pad(crop_size, mean_val=[0.485, 0.456, 0.406]),
                RandomCrop(crop_size),
                ConvertDataType(),
                Normalize(0, 1)])
    
    new_img, _ = augment(img)
    plt.imshow(new_img)
    plt.show()
    cv2.imwrite('tmp_new.jpg', new_img)


if __name__ == "__main__":
    main()
  1. basic_train.py
import os
import paddle
import paddle.fluid as fluid
from paddle.fluid.optimizer import AdamOptimizer
import numpy as np
import argparse
from utils import AverageMeter
from basic_model import BasicModel
from basic_dataloader import BasicDataLoader
from basic_seg_loss import Basic_SegLoss
from basic_data_preprocessing import TrainAugmentation


parser = argparse.ArgumentParser()
parser.add_argument('--net', type=str, default='basic')
parser.add_argument('--lr', type=float, default=0.001)
parser.add_argument('--num_epochs', type=int, default=10)
parser.add_argument('--batch_size', type=int, default=4)
parser.add_argument('--image_folder', type=str, default='./dummy_data')
parser.add_argument('--image_list_file', type=str, default='./dummy_data/list.txt')
parser.add_argument('--checkpoint_folder', type=str, default='./output')
parser.add_argument('--save_freq', type=int, default=2)


args = parser.parse_args()

def train(dataloader, model, criterion, optimizer, epoch, total_batch):
    model.train()
    train_loss_meter = AverageMeter()
    for batch_id, data in enumerate(dataloader):
        image = data[0]
        label = data[1]

        image = fluid.layers.transpose(image, (0, 3, 1, 2))
        pred = model(image)
        loss = criterion(pred, label)

        loss.backward()
        optimizer.minimize(loss)
        model.clear_gradients()

        n = image.shape[0]
        train_loss_meter.update(loss.numpy()[0], n)
        print(f"Epoch[{epoch:03d}/{args.num_epochs:03d}], " +
              f"Step[{batch_id:04d}/{total_batch:04d}], " +
              f"Average Loss: {train_loss_meter.avg:4f}")

    return train_loss_meter.avg



def main():
    # Step 0: preparation
    place = paddle.fluid.CUDAPlace(0)
    with fluid.dygraph.guard(place):
        # Step 1: Define training dataloader
        basic_augmentation = TrainAugmentation(image_size=256)
        basic_dataloader = BasicDataLoader(image_folder=args.image_folder,
                                           image_list_file=args.image_list_file,
                                           transform=basic_augmentation,
                                           shuffle=True)
        train_dataloader = fluid.io.DataLoader.from_generator(capacity=10,
                                                          use_multiprocess=True)
        train_dataloader.set_sample_generator(basic_dataloader,
                                              batch_size=args.batch_size,
                                              places=place)
        total_batch = int(len(basic_dataloader) / args.batch_size)
        
        # Step 2: Create model
        if args.net == "basic":
            model = BasicModel()
        else:
            raise NotImplementedError(f"args.net: {args.net} is not Supported!")

        # Step 3: Define criterion and optimizer
        criterion = Basic_SegLoss

        # create optimizer
        optimizer = AdamOptimizer(learning_rate=args.lr,
                                  parameter_list=model.parameters())
        # Step 4: Training
        for epoch in range(1, args.num_epochs+1):
            train_loss = train(train_dataloader,
                               model,
                               criterion,
                               optimizer,
                               epoch,
                               total_batch)
            print(f"----- Epoch[{epoch}/{args.num_epochs}] Train Loss: {train_loss:.4f}")

            if epoch % args.save_freq == 0 or epoch == args.num_epochs:
                model_path = os.path.join(args.checkpoint_folder, f"{args.net}-Epoch-{epoch}-Loss-{train_loss}")

                # save model and optmizer states
                model_dict = model.state_dict()
                fluid.save_dygraph(model_dict, model_path)
                optimizer_dict = optimizer.state_dict()
                fluid.save_dygraph(optimizer_dict, model_path)
                print(f'----- Save model: {model_path}.pdparams')
                print(f'----- Save optimizer: {model_path}.pdopt')


if __name__ == "__main__":
    main()

  1. utils.py

import numpy as np


class AverageMeter(object):
    def __init__(self):
        self.reset()

    def reset(self):
        self.total = 0
        self.avg = 0
        self.cnt = 0

    def update(self, val, n=1):
        self.cnt += n
        self.total += val * n
        self.avg = self.total / self.cnt


def _fast_hist(label_true, label_pred, n_class):
    mask = (label_true >= 0) & (label_true < n_class)
    hist = np.bincount(
        n_class * label_true[mask].astype(int) +
        label_pred[mask], minlength=n_class ** 2).reshape(n_class, n_class)
    return hist


def label_accuracy_score(label_trues, label_preds, n_class):
    """Returns accuracy score evaluation result.
      - overall accuracy
      - mean accuracy
      - mean IU
      - fwavacc
    """
    hist = np.zeros((n_class, n_class))
    for lt, lp in zip(label_trues, label_preds):
        hist += _fast_hist(lt.flatten(), lp.flatten(), n_class)
    acc = np.diag(hist).sum() / hist.sum()
    with np.errstate(divide='ignore', invalid='ignore'):
        acc_cls = np.diag(hist) / hist.sum(axis=1)
    acc_cls = np.nanmean(acc_cls)
    with np.errstate(divide='ignore', invalid='ignore'):
        iu = np.diag(hist) / (
            hist.sum(axis=1) + hist.sum(axis=0) - np.diag(hist)
        )
    mean_iu = np.nanmean(iu)
    freq = hist.sum(axis=1) / hist.sum()
    fwavacc = (freq[freq > 0] * iu[freq > 0]).sum()
    return acc, acc_cls, mean_iu, fwavacc

  1. vgg.py
import numpy as np
import paddle.fluid as fluid
from paddle.fluid.dygraph import to_variable
from paddle.fluid.dygraph import Conv2D
from paddle.fluid.dygraph import Dropout
from paddle.fluid.dygraph import BatchNorm
from paddle.fluid.dygraph import Pool2D
from paddle.fluid.dygraph import Linear

model_path = {
        #'vgg16': './vgg16',
        'vgg16bn': './vgg16_bn',
        # 'vgg19': './vgg19',
        # 'vgg19bn': './vgg19_bn'
        }

class ConvBNLayer(fluid.dygraph.Layer):
    def __init__(self,
                 num_channels,
                 num_filters,
                 filter_size=3,
                 stride=1,
                 groups=1,
                 use_bn=True,
                 act='relu',
                 name=None):
        super(ConvBNLayer, self).__init__(name)

        self.use_bn = use_bn
        if use_bn:
            self.conv = Conv2D(num_channels=num_channels,
                                num_filters=num_filters,
                                filter_size=filter_size,
                                stride=stride,
                                padding=(filter_size-1)//2,
                                groups=groups,
                                act=None,
                                bias_attr=None)
            self.bn = BatchNorm(num_filters, act=act)
        else:
            self.conv = Conv2D(num_channels=num_channels,
                                num_filters=num_filters,
                                filter_size=filter_size,
                                stride=stride,
                                padding=(filter_size-1)//2,
                                groups=groups,
                                act=act,
                                bias_attr=None)

    def forward(self, inputs):
        y = self.conv(inputs)
        if self.use_bn:
            y = self.bn(y)
        return y



class VGG(fluid.dygraph.Layer):
    def __init__(self, layers=16, use_bn=False, num_classes=1000):
        super(VGG, self).__init__()
        self.layers = layers
        self.use_bn = use_bn
        supported_layers = [16, 19]
        assert layers in supported_layers

        if layers == 16:
            depth = [2, 2, 3, 3, 3]
        elif layers == 19:
            depth = [2, 2, 4, 4, 4]

        num_channels = [3, 64, 128, 256, 512]
        num_filters = [64, 128, 256, 512, 512]

        self.layer1 = fluid.dygraph.Sequential(*self.make_layer(num_channels[0], num_filters[0], depth[0], use_bn, name='layer1'))
        self.layer2 = fluid.dygraph.Sequential(*self.make_layer(num_channels[1], num_filters[1], depth[1], use_bn, name='layer2'))
        self.layer3 = fluid.dygraph.Sequential(*self.make_layer(num_channels[2], num_filters[2], depth[2], use_bn, name='layer3'))
        self.layer4 = fluid.dygraph.Sequential(*self.make_layer(num_channels[3], num_filters[3], depth[3], use_bn, name='layer4'))
        self.layer5 = fluid.dygraph.Sequential(*self.make_layer(num_channels[4], num_filters[4], depth[4], use_bn, name='layer5'))

        self.classifier = fluid.dygraph.Sequential(
                Linear(input_dim=512 * 7 * 7, output_dim=4096, act='relu'),
                Dropout(),
                Linear(input_dim=4096, output_dim=4096, act='relu'),
                Dropout(),
                Linear(input_dim=4096, output_dim=num_classes))
                
        self.out_dim = 512 * 7 * 7


    def forward(self, inputs):
        x = self.layer1(inputs)
        x = fluid.layers.pool2d(x, pool_size=2, pool_stride=2)
        x = self.layer2(x)
        x = fluid.layers.pool2d(x, pool_size=2, pool_stride=2)
        x = self.layer3(x)
        x = fluid.layers.pool2d(x, pool_size=2, pool_stride=2)
        x = self.layer4(x)
        x = fluid.layers.pool2d(x, pool_size=2, pool_stride=2)
        x = self.layer5(x)
        x = fluid.layers.pool2d(x, pool_size=2, pool_stride=2)
        x = fluid.layers.adaptive_pool2d(x, pool_size=(7,7), pool_type='avg')
        x = fluid.layers.reshape(x, shape=[-1, self.out_dim])
        x = self.classifier(x)

        return x

    def make_layer(self, num_channels, num_filters, depth, use_bn, name=None):
        layers = []
        layers.append(ConvBNLayer(num_channels, num_filters, use_bn=use_bn, name=f'{name}.0'))
        for i in range(1, depth):
            layers.append(ConvBNLayer(num_filters, num_filters, use_bn=use_bn, name=f'{name}.{i}'))
        return layers


# def VGG16(pretrained=False):
    # model = VGG(layers=16)
    # if pretrained:
    #     model_dict, _ = fluid.load_dygraph(model_path['vgg16'])
    #     model.set_dict(model_dict)
    # return model

def VGG16BN(pretrained=False):
    model = VGG(layers=16, use_bn=True)
    if pretrained:
        model_dict, _ = fluid.load_dygraph(model_path['vgg16bn'])
        model.set_dict(model_dict)
    return model

# def VGG19(pretrained=False):
#     model =  VGG(layers=19)
#     if pretrained:
#         model_dict, _ = fluid.load_dygraph(model_path['vgg19'])
#         model.set_dict(model_dict)
#     return model

# def VGG19BN(pretrained=False):
#     model =  VGG(layers=19, use_bn=True)
#     if pretrained:
#         model_dict, _ = fluid.load_dygraph(model_path['vgg19bn'])
#         model.set_dict(model_dict)
#     return model



def main():
    with fluid.dygraph.guard():
        x_data = np.random.rand(2, 3, 224, 224).astype(np.float32)
        x = to_variable(x_data)

        # model = VGG16()
        # model.eval()
        # pred = model(x)
        # print('vgg16: pred.shape = ', pred.shape)

        model = VGG16BN()
        model.eval()
        pred = model(x)
        print('vgg16bn: pred.shape = ', pred.shape)

        # model = VGG19()
        # model.eval()
        # pred = model(x)
        # print('vgg19: pred.shape = ', pred.shape)

        # model = VGG19BN()
        # model.eval()
        # pred = model(x)
        # print('vgg19bn: pred.shape = ', pred.shape)

if __name__ == "__main__":
    main()

  1. basic_dataloader.py
import random
import cv2
import numpy as np
import paddle.fluid as fluid
import os
from basic_transforms import *


class Transform(object):
    def __init__(self, size=256):
        self.size=size

    def __call__(self, input, label):
        self.augment = Compose([
                        Pad(0, mean_val=[0.485, 0.456, 0.406]),
                        RandomCrop(self.size),
                        ConvertDataType(),
                        Normalize(0, 1)])
        
        input, label = self.augment(input, label)

        return input, label


class BasicDataLoader(object):
    def __init__(self,
                 image_folder,
                 image_list_file,
                 transform=None,
                 shuffle=True):
        self.image_folder = image_folder
        self.image_list_file = image_list_file
        self.transform = transform
        self.shuffle = shuffle
        self.data_list = self.read_list()

    def read_list(self):
        data_list = []
        with open(self.image_list_file) as infile:
            for line in infile:
                data_path = os.path.join(self.image_folder,line.split()[0])
                label_path = os.path.join(self.image_folder, line.split()[1])
                data_list.append((data_path, label_path))
        random.shuffle(data_list)
        return data_list

    def preprocess(self, data, label):
        h, w, c = data.shape
        h_gt, w_gt = label.shape
        assert h == h_gt, "Error"
        assert w == w_gt, "Error"
        if self.transform:
            data, label = self.transform(data, label)
        label = label[:, :, np.newaxis]
        return data, label

    def __len__(self):
        return len(self.data_list)

    def __call__(self):
        for data_path, label_path in self.data_list:
            data = cv2.imread(data_path, cv2.IMREAD_COLOR)
            data = cv2.cvtColor(data, cv2.COLOR_BGR2RGB)
            label = cv2.imread(label_path, cv2.IMREAD_GRAYSCALE)
            data, label = self.preprocess(data, label)

            yield data, label


def main():
    batch_size = 5
    place = fluid.CPUPlace()
    with fluid.dygraph.guard(place):
        transform = Transform(256)
        # create BasicDataloader instance
        basic_dataloader = BasicDataLoader(
            image_folder = './dummy_data',
            image_list_file = './dummy_data/list.txt',
            transform = transform,
            shuffle = True
            )
        # create fluid.io.Dataloader instance
        dataloader = fluid.io.DataLoader.from_generator(capacity=1, use_multiprocess=False)
        
        # set sample generator for fluid dataloader
        dataloader.set_sample_generator(basic_dataloader,
                                        batch_size=batch_size,
                                        places=place)        

        num_epoch = 2
        for epoch in range(1, num_epoch+1):
            print(f'Epoch [{epoch}/{num_epoch}]:')
            for idx, (data, label) in enumerate(dataloader):
                print(f'iter {idx}, Data shape: {data.shape}, Label shape:{label.shape}')

if __name__ == '__main__':
    main()
  1. basic_model.py
import paddle
import paddle.fluid as fluid
from paddle.fluid.dygraph import to_variable
import numpy as np
from paddle.fluid.dygraph import Conv2D, Pool2D

# 方便显示
np.set_printoptions(precision=2)

class BasicModel(fluid.dygraph.Layer):
    def __init__(self, num_classes=59):
        super(BasicModel, self).__init__()
        self.pool = Pool2D(pool_size=2, pool_stride=2)
        self.conv = Conv2D(num_channels=3, num_filters=num_classes, filter_size=1)
    
    def forward(self, inputs):
        x = self.pool(inputs)
        x = fluid.layers.interpolate(x, out_shape=inputs.shape[2::])
        x = self.conv(x)

        return x

def main():
    # 调用GPU或者CPU
    place = paddle.fluid.CPUPlace()
    
    with fluid.dygraph.guard(place):
        model = BasicModel(num_classes=59)
        # 训练模式
        model.eval()
        input_data = np.random.rand(1, 3, 8, 8).astype(np.float32)
        print('Input data shape:', input_data.shape)
        input_data = to_variable(input_data) 
        output_data = model(input_data)
        print(output_data)
        output_data = output_data.numpy()
        print('Output data sahpe:', output_data.shape)

if __name__=='__main__':
    main()
  1. basic_data_preprocessing.py
from basic_transforms import *

class TrainAugmentation():
    def __init__(self, image_size, mean_val=0, std_val=1.0):
        self.crop_size = 256
        #TODO: add self.augment, which contains
        # random scale, pad, random crop, random flip, convert data type, and normalize ops
        self.augment = Compose([
                        RandomScale(),
                        RandomFlip(),
                        Pad(self.crop_size, mean_val=[0.485, 0.456, 0.406]),
                        RandomCrop(self.crop_size),
                        ConvertDataType(),
                        Normalize(0, 1)])

    def __call__(self, image, label):
        return self.augment(image, label)

  1. fcn8s.py
import numpy as np
import paddle.fluid as fluid
from paddle.fluid.dygraph import to_variable
from paddle.fluid.dygraph import Conv2D
from paddle.fluid.dygraph import Conv2DTranspose
from paddle.fluid.dygraph import Dropout
from paddle.fluid.dygraph import BatchNorm
from paddle.fluid.dygraph import Pool2D
from paddle.fluid.dygraph import Linear
from vgg import VGG16BN

# create fcn8s model
class FCN8s(fluid.dygraph.Layer):
    def __init__( self, num_classes=59):
        super(FCN8s, self).__init__()
        backbone = VGG16BN(pretrained=False)

        self.layer1 = backbone.layer1
        self.layer1[0].conv._padding = [100, 100]
        self.pool1 = Pool2D(pool_size=2, pool_stride=2, ceil_mode=True)
        self.layer2 = backbone.layer2
        self.pool2 = Pool2D(pool_size=2, pool_stride=2, ceil_mode=True)
        self.layer3 = backbone.layer3
        self.pool3 = Pool2D(pool_size=2, pool_stride=2, ceil_mode=True)
        self.layer4 = backbone.layer4
        self.pool4 = Pool2D(pool_size=2, pool_stride=2, ceil_mode=True)
        self.layer5 = backbone.layer5
        self.pool5 = Pool2D(pool_size=2, pool_stride=2, ceil_mode=True)

        self.fc6 = Conv2D(512, 4096, 7, act='relu')
        self.fc7 = Conv2D(4096, 4096, 1, act='relu')
        self.drop6 = Dropout()
        self.drop7 = Dropout()

        self.score = Conv2D(4096, num_classes, 1)
        self.score_pool3 = Conv2D(256, num_classes, 1)
        self.score_pool4 = Conv2D(512, num_classes, 1)

        self.up_output = Conv2DTranspose(num_channels=num_classes,
                                         num_filters=num_classes,
                                         filter_size=4,
                                         stride=2,
                                         bias_attr=False)
        
        self.up_pool4 = Conv2DTranspose(num_channels=num_classes,
                                         num_filters=num_classes,
                                         filter_size=4,
                                         stride=2,
                                         bias_attr=False)
        
        self.up_final = Conv2DTranspose(num_channels=num_classes,
                                         num_filters=num_classes,
                                         filter_size=16,
                                         stride=8,
                                         bias_attr=False)
    
    def forward(self, inputs):
        x = self.layer1(inputs)
        x = self.pool1(x) # 1/2
        x = self.layer2(x)
        x = self.pool2(x) # 1/4
        x = self.layer3(x)
        x = self.pool3(x) # 1/8
        pool3 = x
        x = self.layer4(x)
        x = self.pool4(x) # 1/16
        pool4 = x
        x = self.layer5(x)
        x = self.pool5(x) # 1/32

        x = self.fc6(x)
        x = self.drop6(x)
        x = self.fc7(x)
        x = self.drop7(x)

        x = self.score(x)
        x = self.up_output(x)

        up_output = x # 1/16
        x = self.score_pool4(pool4)

        x = x[:, :, 5:5+up_output.shape[2], 5:5+up_output.shape[3]]

        up_pool4 = x
        x = up_pool4 + up_output
        x = self.up_pool4(x)
        up_pool4 = x
        x = self.score_pool3(pool3)
        x = x[:, :, 9:9+up_pool4.shape[2], 9:9+up_pool4.shape[3]]
        up_pool3 = x # 1/8

        x = up_pool3 + up_pool4
        x = self.up_final(x)
        x = x[:, :, 31:31+inputs.shape[2], 31:31+inputs.shape[3]]

        return x


def main():
    with fluid.dygraph.guard():
        x_data = np.random.rand(2, 3, 512, 512).astype(np.float32)
        x = to_variable(x_data)
        model = FCN8s(num_classes=59)
        model.eval()
        pred = model(x)
        print(pred.shape)


if __name__ == '__main__':
    main()

  1. basic_seg_loss.py
import paddle
import paddle.fluid as fluid
import numpy as np
import cv2

eps = 1e-8

def Basic_SegLoss(preds, labels, ignore_index=255):
    n, c, h, w = preds.shape

    preds = fluid.layers.transpose(preds, [0, 2, 3, 1])
    
    mask = labels!=ignore_index
    mask = fluid.layers.cast(mask, 'float32')

    loss = fluid.layers.softmax_with_cross_entropy(preds, labels)
    loss = loss * mask
    avg_loss = fluid.layers.mean(loss) / (fluid.layers.mean(mask) + eps)

    return avg_loss

def main():
    label = cv2.imread('./dummy_data/GroundTruth_trainval_png/2008_000149.png')
    label = cv2.cvtColor(label, cv2.COLOR_BGR2GRAY).astype(np.int64)
    pred = np.random.uniform(0, 1, (1, 59, label.shape[0], label.shape[1])).astype(np.float32)
    label = label[:,:,np.newaxis]
    label = label[np.newaxis, :, :, :]

    with fluid.dygraph.guard(fluid.CPUPlace()):
        pred = fluid.dygraph.to_variable(pred)
        label = fluid.dygraph.to_variable(label)
        loss = Basic_SegLoss(pred, label)
        print(loss)

if __name__ == "__main__":
    main()


  1. train.py
import os
import paddle
import paddle.fluid as fluid
from paddle.fluid.optimizer import AdamOptimizer
import numpy as np
import argparse
from utils import AverageMeter
from basic_model import BasicModel
from basic_dataloader import BasicDataLoader
from basic_seg_loss import Basic_SegLoss
from basic_data_preprocessing import TrainAugmentation
from basic_dataloader import Transform
import utils
from fcn8s import FCN8s


parser = argparse.ArgumentParser()
parser.add_argument('--net', type=str, default='fcn8s')
parser.add_argument('--lr', type=float, default=0.001)
parser.add_argument('--num_epochs', type=int, default=100)
parser.add_argument('--batch_size', type=int, default=10)
parser.add_argument('--image_folder', type=str, default='./dummy_data')
parser.add_argument('--image_list_file', type=str, default='./dummy_data/list.txt')
parser.add_argument('--val_folder', type=str, default='./val_data')
parser.add_argument('--val_list_file', type=str, default='./val_data/list.txt')
parser.add_argument('--checkpoint_folder', type=str, default='./output')
parser.add_argument('--save_freq', type=int, default=2)


args = parser.parse_args()

def train(dataloader, model, criterion, optimizer, epoch, total_batch):
    model.train()
    train_loss_list = []
    train_acc_list = []
    train_iou_list = []
    train_loss_meter = AverageMeter()
    for batch_id, data in enumerate(dataloader):
        image = data[0]
        label = data[1]

        image = fluid.layers.transpose(image, (0, 3, 1, 2))
        pred = model(image)
        loss = criterion(pred, label)
        label_pred = np.argmax(pred.numpy(), 1)
        label_true = label.numpy()
        acc, acc_cls, mean_iu, fwavacc = utils.label_accuracy_score(label_true, label_pred, n_class=59)

        loss.backward()
        optimizer.minimize(loss)
        model.clear_gradients()

        n = image.shape[0]
        train_loss_meter.update(loss.numpy()[0], n)
        print(f"Epoch[{epoch:03d}/{args.num_epochs:03d}], " +
              f"Step[{batch_id:04d}/{total_batch:04d}], " +
              f"Average Loss: {train_loss_meter.avg:4f}, " + 
              f"Average Acc: {acc:4f}, " + 
              f"Mean_iou: {mean_iu:4f}")
        train_loss_list.append(train_loss_meter.avg)
        train_acc_list.append(acc)
        train_iou_list.append(mean_iu)

    return np.mean(train_loss_list), np.mean(train_acc_list), np.mean(train_iou_list)


def val(dataloader, model):
    model.eval()
    val_acc_list = []
    val_iou_list = []
    for batch_id, data in enumerate(dataloader):
        image = data[0]
        label = data[1]

        image = fluid.layers.transpose(image, (0, 3, 1, 2))
        pred = model(image)
        label_pred = np.argmax(pred.numpy(), 1)
        label_true = label.numpy()
        acc, acc_cls, mean_iu, fwavacc = utils.label_accuracy_score(label_true, label_pred, n_class=59)
        val_acc_list.append(acc)
        val_iou_list.append(mean_iu)

    return np.mean(val_acc_list), np.mean(val_iou_list)


def main():
    # Step 0: preparation
    place = paddle.fluid.CUDAPlace(0)
    with fluid.dygraph.guard(place):
        # Step 1: Define training dataloader
        basic_augmentation = TrainAugmentation(image_size=256)
        basic_dataloader = BasicDataLoader(image_folder=args.image_folder,
                                           image_list_file=args.image_list_file,
                                           transform=basic_augmentation,
                                           shuffle=True)
        train_dataloader = fluid.io.DataLoader.from_generator(capacity=10,
                                                          use_multiprocess=True)
        train_dataloader.set_sample_generator(basic_dataloader,
                                              batch_size=args.batch_size,
                                              places=place)
        total_batch = int(len(basic_dataloader) / args.batch_size)

        # Step 1: Define validation dataloader
        basic_augmentation = Transform()
        basic_dataloader = BasicDataLoader(image_folder=args.val_folder,
                                           image_list_file=args.val_list_file,
                                           transform=basic_augmentation,
                                           shuffle=True)
        val_dataloader = fluid.io.DataLoader.from_generator(capacity=5,
                                                          use_multiprocess=True)
        val_dataloader.set_sample_generator(basic_dataloader,
                                            batch_size=5,
                                            places=place)
        
        # Step 2: Create model
        if args.net == "fcn8s":
            model = FCN8s()
        elif args.net == "basic":
            model = BasicModel()
        else:
            raise NotImplementedError(f"args.net: {args.net} is not Supported!")

        # Step 3: Define criterion and optimizer
        criterion = Basic_SegLoss

        # create optimizer
        optimizer = AdamOptimizer(learning_rate=args.lr,
                                  parameter_list=model.parameters())
        # Step 4: Training
        for epoch in range(1, args.num_epochs+1):
            train_loss, train_acc, train_mean_iou = train(train_dataloader,
                               model,
                               criterion,
                               optimizer,
                               epoch,
                               total_batch)

            val_acc, val_mean_iou = val(val_dataloader, model)
            print(f"----- Epoch[{epoch}/{args.num_epochs}] Train Loss: {train_loss:.4f} Train Acc: {train_acc:.4f} Train Mean_iou: {train_mean_iou:.4f}")
            print(f"----- Epoch[{epoch}/{args.num_epochs}] Val Acc: {val_acc:.4f} Mean iou: {val_mean_iou:.4f}")

            if epoch % 25 == 0 or epoch == args.num_epochs:
                model_path = os.path.join(args.checkpoint_folder, f"{args.net}-Epoch-{epoch}-Loss-{train_loss}")

                # save model and optmizer states
                model_dict = model.state_dict()
                fluid.save_dygraph(model_dict, model_path)
                optimizer_dict = optimizer.state_dict()
                fluid.save_dygraph(optimizer_dict, model_path)
                print(f'----- Save model: {model_path}.pdparams')
                print(f'----- Save optimizer: {model_path}.pdopt')


if __name__ == "__main__":
    main()

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值