Polygonal Building Segmentation by Frame Field Learning
损失
CE loss和Dice loss:
自监督loss
运行流程
定义model
model = FrameFieldModel(config, backbone=backbone, train_online_cuda_transform,eval_online_cuda_transform)
(1)获得feature
(2)分别预测interior seg map、edge seg map和frame field.
if self.config["compute_seg"]:
seg_channels = self.config["seg_params"]["compute_vertex"]\
+ self.config["seg_params"]["compute_edge"]\
+ self.config["seg_params"]["compute_interior"]
self.seg_module = torch.nn.Sequential(
torch.nn.Conv2d(backbone_out_features, backbone_out_features, 3, padding=1),
torch.nn.BatchNorm2d(backbone_out_features),
torch.nn.ELU(),
torch.nn.Conv2d(backbone_out_features, seg_channels, 1),
torch.nn.Sigmoid(),)
if self.config["compute_crossfield"]:
crossfield_channels = 4
self.crossfield_module = torch.nn.Sequential(
torch.nn.Conv2d(backbone_out_features + seg_channels, backbone_out_features, 3, padding=1),
torch.nn.BatchNorm2d(backbone_out_features),
torch.nn.ELU(),
torch.nn.Conv2d(backbone_out_features, crossfield_channels, 1),
torch.nn.Tanh(),
)
outputs = {}
# --- Extract features for every pixel of the image with a U-Net --- #
backbone_features = self.backbone(image)["out"]
if self.config["compute_seg"]:
# --- Output a segmentation of the image --- #
seg = self.seg_module(backbone_features)
seg_to_cat = seg.clone().detach()
backbone_features = torch.cat([backbone_features, seg_to_cat], dim=1) # Add seg to image features
outputs["seg"] = seg
if self.config["compute_crossfield"]:
# --- Output a cross-field of the image --- #
crossfield = 2 * self.crossfield_module(backbone_features) # Outputs c_0, c_2 values in [-2, 2]
outputs["crossfield"] = crossfield
return outputs
注意,本文除了一个输入得到一个输出外,还提供了另一种方法:A->(model)->B, A_Flip->(model)->B_Flip->B,A_rotated->(model)->B_Flip->B,然后将上面3个B求平均作为最后的结果
def tta_inference(model, xb, seg_threshold):
notrans_outputs = model.inference(xb["image"])
# Flip image
flipped_image = kornia.geometry.transform.vflip(xb["image"])
flipped_outputs = model.inference(flipped_image)
for key in output_keys:
reversed_output = kornia.geometry.transform.vflip(flipped_outputs[key])
all_outputs[key][1] = reversed_output
......
final_outputs = aggr_mean(all_outputs)
将输入图像和标签(batch)送入网络,并计算损失
pred, batch, total_loss, metrics_dict, loss_extra_dict, log_iou, nums = self.loss_batch(batch, opt=opt, epoch=epoch)
pred, batch = self.model(batch)
loss, individual_metrics_dict, extra_dict = self.loss_func(pred, batch, epoch=epoch)
其中loss_func = losses.build_combined_loss(config).cuda(gpu),定义了8个损失
def build_combined_loss(config):
combined_loss = MultiLoss(loss_funcs, weights, epoch_thresholds=config[“loss_params”][“multiloss”][“coefs”][“epoch_thresholds”], pre_processes=pre_processes)#定义了总的损失是各个子损失的权重家和,各个子损失的计算见下面
代码
backbone feature预测seg结果,然后将backbone+seg的结果concat,去预测frame field
def __init__(self, config: dict, backbone, train_transform=None, eval_transform=None):
"""
:param config:
:param backbone: A _SimpleSegmentationModel network, its output features will be used to compute seg and framefield.
:param train_transform: transform applied to the inputs when self.training is True
:param eval_transform: transform applied to the inputs when self.training is False
"""
super(FrameFieldModel, self).__init__()
assert config["compute_seg"] or config["compute_crossfield"], \
"Model has to compute at least one of those:\n" \
"\t- segmentation\n" \
"\t- cross-field"
assert isinstance(backbone, _SimpleSegmentationModel), \
"backbone should be an instance of _SimpleSegmentationModel"
self.config = config
self.backbone = backbone
self.train_transform = train_transform
self.eval_transform = eval_transform
backbone_out_features = get_out_channels(self.backbone)
# --- Add other modules if activated in config:
seg_channels = 0
if self.config["compute_seg"]:
seg_channels = self.config["seg_params"]["compute_vertex"]\
+ self.config["seg_params"]["compute_edge"]\
+ self.config["seg_params"]["compute_interior"]
self.seg_module = torch.nn.Sequential(
torch.nn.Conv2d(backbone_out_features, backbone_out_features, 3, padding=1),
torch.nn.BatchNorm2d(backbone_out_features),
torch.nn.ELU(),
torch.nn.Conv2d(backbone_out_features, seg_channels, 1),
torch.nn.Sigmoid(),)
if self.config["compute_crossfield"]:
crossfield_channels = 4
self.crossfield_module = torch.nn.Sequential(
torch.nn.Conv2d(backbone_out_features + seg_channels, backbone_out_features, 3, padding=1),
torch.nn.BatchNorm2d(backbone_out_features),
torch.nn.ELU(),
torch.nn.Conv2d(backbone_out_features, crossfield_channels, 1),
torch.nn.Tanh(),
)
多个loss
seg bce loss和seg dice loss,对内部(interior)和边缘(edge)预测结果都使用同样的方式
class SegLoss(Loss):
def __init__(self, name, gt_channel_selector, bce_coef=0.5, dice_coef=0.5):
"""
:param name:
:param gt_channel_selector: used to select which channels gt_polygons_image to use to compare to predicted seg
(see docstring of method compute() for more details).
"""
super(SegLoss, self).__init__(name)
self.gt_channel_selector = gt_channel_selector
self.bce_coef = bce_coef
self.dice_coef = dice_coef
def compute(self, pred_batch, gt_batch):
"""
seg and gt_polygons_image do not necessarily have the same channel count.
gt_selector is used to select which channels of gt_polygons_image to use.
For example, if seg has C_pred=2 (interior and edge) and
gt_polygons_image has C_gt=3 (interior, edge and vertex), use gt_channel_selector=slice(0, 2)
@param pred_batch: key "seg" is shape (N, C_pred, H, W)
@param gt_batch: key "gt_polygons_image" is shape (N, C_gt, H, W)
@return:
"""
# print(self.name)
pred_seg = pred_batch["seg"]
gt_seg = gt_batch["gt_polygons_image"][:, self.gt_channel_selector, ...]
weights = gt_batch["seg_loss_weights"][:, self.gt_channel_selector, ...]
dice = measures.dice_loss(pred_seg, gt_seg)
mean_dice = torch.mean(dice)
mean_cross_entropy = F.binary_cross_entropy(pred_seg, gt_seg, weight=weights, reduction="mean")
return self.bce_coef * mean_cross_entropy + self.dice_coef * mean_dice
frame field smooth loss:Away from polygon boundaries, the frame field does not have any alignment constraints but is encouraged to be smooth and not collapse into a line field
smmoth loss:用拉普拉斯计算梯度,边缘处的不要求平滑(1-gt_edge_map,这样边缘处的像素的权重为0)
class LaplacianPenalty:
def __init__(self, channels: int):
self.channels = channels
self.filter = torch.tensor([[0.5, 1.0, 0.5],
[1.0, -6., 1.0],
[0.5, 1.0, 0.5]]) / 12
self.filter = self.filter[None, None, ...].expand(self.channels, -1, -1, -1)
def laplacian_filter(self, tensor):
penalty_tensor = F.conv2d(tensor, self.filter.to(tensor.device), padding=1, groups=self.channels)
return torch.abs(penalty_tensor)
def __call__(self, tensor: torch.Tensor) -> torch.Tensor:
return self.laplacian_filter(tensor)
class CrossfieldSmoothLoss(Loss):
def __init__(self, name):
super(CrossfieldSmoothLoss, self).__init__(name)
self.laplacian_penalty = frame_field_utils.LaplacianPenalty(channels=4)
def compute(self, pred_batch, gt_batch):
c0c2 = pred_batch["crossfield"]
gt_polygons_image = gt_batch["gt_polygons_image"]
gt_edges_inv = 1 - gt_polygons_image[:, 1, ...]
penalty = self.laplacian_penalty(c0c2)
avg_penalty = torch.mean(penalty * gt_edges_inv[:, None, ...])
return avg_penalty
frame field aligh90 loss
class CrossfieldAlign90Loss(Loss):
def __init__(self, name):
super(CrossfieldAlign90Loss, self).__init__(name)
def compute(self, pred_batch, gt_batch):
c0 = pred_batch["crossfield"][:, :2]
c2 = pred_batch["crossfield"][:, 2:]
z = gt_batch["gt_field"]
z_90deg = torch.cat((- z[:, 1:2, ...], z[:, 0:1, ...]), dim=1)
gt_polygons_image = gt_batch["gt_polygons_image"]
assert gt_polygons_image.shape[1] == 3, \
"gt_polygons_image should have 3 channels for interior, edges and vertices"
gt_edges = gt_polygons_image[:, 1, ...]
gt_vertices = gt_polygons_image[:, 2, ...]
gt_edges_minus_vertices = gt_edges - gt_vertices
gt_edges_minus_vertices = gt_edges_minus_vertices.clamp(0, 1)
align90_loss = frame_field_utils.framefield_align_error(c0, c2, z_90deg, complex_dim=1)
avg_align90_loss = torch.mean(align90_loss * gt_edges_minus_vertices)
return avg_align90_loss
frame field align loss
class CrossfieldAlignLoss(Loss):
def __init__(self, name):
super(CrossfieldAlignLoss, self).__init__(name)
def compute(self, pred_batch, gt_batch):
c0 = pred_batch["crossfield"][:, :2]
c2 = pred_batch["crossfield"][:, 2:]
z = gt_batch["gt_field"]
gt_polygons_image = gt_batch["gt_polygons_image"]
assert 2 <= gt_polygons_image.shape[1], \
"gt_polygons_image should have at least 2 channels for interior and edges"
gt_edges = gt_polygons_image[:, 1, ...]
align_loss = frame_field_utils.framefield_align_error(c0, c2, z, complex_dim=1)
avg_align_loss = torch.mean(align_loss * gt_edges)
self.extra_info["gt_field"] = gt_batch["gt_field"]
return avg_align_loss
自监督loss,输出间的相互制约
Lint align和Ledge align
两个损失都使用相同的函数SegCrossfieldLoss,都先用ComputeSegGrads将seg的结果求边缘和做归一化(这里的归一化没有看明白),然后分别与frame field的结果计算损失
SegCrossfieldLoss(name="seg_interior_crossfield", pred_channel=pred_channel)
c0 = pred_batch["crossfield"][:, :2]
c2 = pred_batch["crossfield"][:, 2:]
seg_slice_grads_normed = pred_batch["seg_grads_normed"][:, self.pred_channel, ...]
seg_slice_grad_norm = pred_batch["seg_grad_norm"][:, self.pred_channel, ...]
align_loss = frame_field_utils.framefield_align_error(c0, c2, seg_slice_grads_normed, complex_dim=1)
avg_align_loss = torch.mean(align_loss * seg_slice_grad_norm.detach())
Lint edge
将interior的预测结果求归一化梯度,然后与边缘的预测结果做差。只在建筑物的边缘和建筑物的外部进行Linterior和Ledge的限制,这样是为了解决当建筑物相连时,interior的预测没有梯度,但edge的预测有边缘的情况
class SegEdgeInteriorLoss(Loss):
"""
Enforce seg edge to be equal to interior grad norm except inside buildings
"""
def __init__(self, name):
super(SegEdgeInteriorLoss, self).__init__(name)
def compute(self, pred_batch, batch):
seg_interior = pred_batch["seg"][:, 0, ...]
seg_edge = pred_batch["seg"][:, 1, ...]
seg_interior_grad_norm = pred_batch["seg_grad_norm"][:, 0, ...]
raw_loss = torch.abs(seg_edge - seg_interior_grad_norm)
# Apply the loss only on interior boundaries and outside of objects
outside_mask = (torch.cos(np.pi * seg_interior) + 1) / 2
boundary_mask = (1 - torch.cos(np.pi * seg_interior_grad_norm)) / 2
mask = torch.max(outside_mask, boundary_mask).float()
avg_loss = torch.mean(raw_loss * mask)
return avg_loss
其中,seg_interior是interoir的预测结果,seg_edge是边缘的预测结果;seg_interior_grad_norm是归一化的边缘;outside_mask表示建筑物的外部(非建筑物)为1;boundary_mask表示建筑物的边缘为1,mask表示两者的并集