IOU
"""
#检测
A:
左下角坐标(left_x,left_y)
右上角坐标(right_x,right_y)
B:
左下角坐标(left_x,left_y)
右上角坐标(right_x,right_y)
"""
def IOU(rectangle A, rectangleB):
W = min(A.right_x, B.right_x) - max(A.left_x, B.left_x)
H = min(A.right_y, B.right_y) - max(A.left_y, B.left_y)
if W <= 0 or H <= 0:
return 0;
SA = (A.right_x - A.left_x) * (A.right_y - A.left_y)
SB = (B.right_x - B.left_x) * (B.right_y - B.left_y)
cross = W * H
return cross/(SA + SB - cross)
"""
#分割
"""
A:
左下角坐标(left_x,left_y)
右上角坐标(right_x,right_y)
B:
左下角坐标(left_x,left_y)
右上角坐标(right_x,right_y)
def compute_ious(pred, label, classes):
'''computes iou for one ground truth mask and predicted mask'''
ious = [] # 记录每一类的iou
for c in classes:
label_c = (label == c) # label_c为true/false矩阵
pred_c = (pred == c)
intersection = np.logical_and(pred_c, label_c).sum()
union = np.logical_or(pred_c, label_c).sum()
if union == 0:
ious.append(float('nan'))
else
ious.append(intersection / union)
return np.nanmean(ious) #返回当前图片里所有类的mean iou
def compute_iou_batch(preds, labels, classes=None):
'''computes mean iou for a batch of ground truth masks and predicted masks'''
ious = []
preds = np.copy(preds) # copy is imp
labels = np.array(labels) # tensor to np
for pred, label in zip(preds, labels): # iter one batch
ious.append(compute_ious(pred, label, classes))
iou = np.nanmean(ious) # mean iou of one batch
return iou
mIOU
"""
#分割
https://blog.csdn.net/weixin_43917574/article/details/117264163
"""
#混淆矩阵
def _fast_hist(label_true, label_pred, n_class):
"""
label_true是转化为一维数组的真实标签,label_pred是转化为一维数组的预测结果,n_class是类别数
hist是一个混淆矩阵
hist是一个二维数组,可以写成hist[label_true][label_pred]的形式
最后得到的这个数组的意义就是行下标表示的类别预测成列下标类别的数量
"""
# mask在和label_true相对应的索引的位置上填入true或者false
# label_true[mask]会把mask中索引为true的元素输出
mask = (label_true >= 0) & (label_true < n_class)
# n_class * label_true[mask].astype(int) + label_pred[mask]计算得到的是二维数组元素
# 变成一位数组元素的时候的地址取值(每个元素大小为1),返回的是一个numpy的list
# np.bincount()会给出索引对应的元素个数
hist = np.bincount(n_class * label_true[mask].astype(int) + label_pred[mask],
minlength=n_class ** 2).reshape(n_class, n_class)
return hist
#每个类别IOU
def per_class_iu(hist):
# 矩阵的对角线上的值组成的一维数组/(矩阵的每行求和+每列求和-对角线上的值),返回值形状(n,)
return np.diag(hist) / (hist.sum(1) + hist.sum(0) - np.diag(hist))
#mIOU
np.mean(per_class_iu(hist))
NMS
import numpy as np
"""
坐标:左下角,右上角
"""
def nms(dets, thresh):
x1 = dets[:, 0]
y1 = dets[:, 1]
x2 = dets[:, 2]
y2 = dets[:, 3]
scores = dets[:, 4]
sorted_scores = scores.argsort()[::-1]
keep_list = []
while sorted_scores.size > 0:
max_score_idx = sorted_scores[0]
keep_list.append(max_score_idx)
sorted_scores=np.delete(sorted_scores, 0, axis = 0)
max_score_x1 = x1[max_score_idx]
max_score_y1 = y1[max_score_idx]
max_score_x2 = x2[max_score_idx]
max_score_y2 = y2[max_score_idx]
delete_list = []
for idx in range(len(sorted_scores)):
candidate_idx = sorted_scores[idx]
candidate_x1 = x1[candidate_idx]
candidate_y1 = y1[candidate_idx]
candidate_x2 = x2[candidate_idx]
candidate_y2 = y2[candidate_idx]
intersection = max(0, (min(max_score_x2, candidate_x2) - max(max_score_x1, candidate_x1)+1) * (min(max_score_y2, candidate_y2) - max(max_score_y1, candidate_y1)+1))
union = (max_score_x2 - max_score_x1) * (max_score_y2 - max_score_y1) + (candidate_x2 - candidate_x1) * (candidate_y2 - candidate_y1) - intersection
iou = float(intersection/union)
print(intersection,union,iou)
if iou >=thresh:
delete_list.append(idx)
sorted_scores = np.delete(sorted_scores, delete_list, axis = 0)
print(keep_list)
return keep_list
dets = np.array([[100, 100, 210, 210, 0.72],
[250, 250, 420, 420, 0.8],
[220, 220, 320, 330, 0.92],
[100, 100, 210, 210, 0.72],
nms(dets=dets, thresh=0.7)
Dice Loss
# 二分类
import torch
import torch.nn as nn
class BinaryDiceLoss(nn.Model):
def __init__(self):
super(BinaryDiceLoss, self).__init__()
def forward(self, input, targets):
# 获取每个批次的大小 N
N = targets.size()[0]
# 平滑变量
smooth = 1
# 将宽高 reshape 到同一纬度
input_flat = input.view(N, -1)
targets_flat = targets.view(N, -1)
# 计算交集
intersection = input_flat * targets_flat
N_dice_eff = (2 * intersection.sum(1) + smooth) / (input_flat.sum(1) + targets_flat.sum(1) + smooth)
# 计算一个批次中平均每张图的损失
loss = 1 - dice_eff.sum() / N
return loss
# 多分类
import torch
import torch.nn as nn
class MultiClassDiceLoss(nn.Module):
def __init__(self, weight=None, ignore_index=None, **kwargs):
super(MultiClassDiceLoss, self).__init__()
self.weight = weight
self.ignore_index = ignore_index
self.kwargs = kwargs
def forward(self, input, target):
"""
input tesor of shape = (N, C, H, W)
target tensor of shape = (N, H, W)
"""
# 先将 target 进行 one-hot 处理,转换为 (N, C, H, W)
nclass = input.shape[1]
target = one_hot(target.long(), nclass)
assert input.shape == target.shape, "predict & target shape do not match"
binaryDiceLoss = BinaryDiceLoss()
total_loss = 0
# 归一化输出
logits = F.softmax(input, dim=1)
C = target.shape[1]
# 遍历 channel,得到每个类别的二分类 DiceLoss
for i in range(C):
dice_loss = binaryDiceLoss(logits[:, i], target[:, i])
total_loss += dice_loss
# 每个类别的平均 dice_loss
return total_loss / C
BN
def batch_norm(X, gamma, beta, moving_mean, moving_var, eps, momentum):
# 通过 `is_grad_enabled` 来判断当前模式是训练模式还是预测模式
if not torch.is_grad_enabled():
# 如果是在预测模式下,直接使用传入的移动平均所得的均值和方差
X_hat = (X - moving_mean) / torch.sqrt(moving_var + eps)
else:
assert len(X.shape) in (2, 4)
if len(X.shape) == 2:
# 使用全连接层的情况,计算特征维上的均值和方差
mean = X.mean(dim=0)
var = ((X - mean)**2).mean(dim=0)
else:
# 使用二维卷积层的情况,计算通道维上(axis=1)的均值和方差。
# 这里我们需要保持X的形状以便后面可以做广播运算
mean = X.mean(dim=(0, 2, 3), keepdim=True)
var = ((X - mean)**2).mean(dim=(0, 2, 3), keepdim=True)
# 训练模式下,用当前的均值和方差做标准化
X_hat = (X - mean) / torch.sqrt(var + eps)
# 更新移动平均的均值和方差
moving_mean = momentum * moving_mean + (1.0 - momentum) * mean
moving_var = momentum * moving_var + (1.0 - momentum) * var
Y = gamma * X_hat + beta # 缩放和移位
return Y, moving_mean.data, moving_var.data
Conv2d
# coding:utf-8
import numpy as np
class Conv2D:
def __init__(self, in_channels, out_channels, kernel_size, padding, stride):
self.in_channels = in_channels
self.out_channels = out_channels
self.kernel_size = kernel_size
self.padding = padding
self.stride = stride
self.filters = np.random.rand(self.in_channels, self.out_channels, self.kernel_size, self.kernel_size)
def __call__(self, x):
B, C, W, H = x.shape
out_h = (H + self.padding * 2 - self.kernel_size) // self.stride + 1
out_w = (W + self.padding * 2 - self.kernel_size) // self.stride + 1
out_features = np.zeros((B, self.out_channels, out_h, out_w))
in_features = np.zeros((B, C, H + self.padding * 2, W + self.padding * 2))
in_features[:, :, self.padding:-self.padding, self.padding:-self.padding] = x
for batch_idx in range(B):
for ch_idx in range(self.out_channels):
out_features[batch_idx, ch_idx, :, :] = self._conv(
in_features[batch_idx], out_features[batch_idx, ch_idx, :, :],
self.filters[:, ch_idx, :, :])
return out_features
def _conv(self, x, out, kernel):
h, w = out.shape
for i in range(0, h):
for j in range(0, w):
# print("x.shape",x.shape) # (3, 12, 12)
# print("out.shape",out.shape) # (10, 10)
# print("kernel.shape",kernel.shape) # (3, 3, 3)
tmp = x[:, i*self.stride:i*self.stride + self.kernel_size, j*self.stride:j*self.stride + self.kernel_size] * kernel
# print("tmp.shape",tmp.shape) (3, 3, 3)
out[i][j] = min(tmp.sum(axis=(0,1,2)),255)
return out
con2d=Conv2D(in_channels=3, out_channels=5, kernel_size=3, padding=1, stride=1)
x=np.random.rand(4, 3, 10, 10) # B, C, W, H
con2d(x)
MaxPool2D
# coding:utf-8
import numpy as np
class MaxPool2D:
def __init__(self, pool_size, stride):
# super.__init__()
self.pool_size = pool_size
self.stride = stride
def __call__(self, x):
B, C, W, H = x.shape
out_h = (H - self.pool_size) // self.stride + 1
out_w = (W - self.pool_size) // self.stride + 1
out_features = np.zeros((B, C, out_h, out_w))
for batch_idx in range(B):
for ch_idx in range(C):
for i in range(out_h):
for j in range(out_w):
# print("patch.shape",x[batch_idx, ch_idx, i*self.stride:i*self.stride+self.pool_size, j*self.stride:j*self.stride+self.pool_size].shape) # [2, 2]
out_features[batch_idx, ch_idx, i, j] = np.max(x[batch_idx, ch_idx, i*self.stride:i*self.stride+self.pool_size, j*self.stride:j*self.stride+self.pool_size])
return out_features
maxpool2d=MaxPool2D(2,2)
x=np.random.rand(4, 3, 10, 10)
maxpool2d(x)
AvgPool2D
# coding:utf-8
import numpy as np
class AvgPool2D:
def __init__(self, pool_size, stride):
# super.__init__()
self.pool_size = pool_size
self.stride = stride
def __call__(self, x):
B, C, W, H = x.shape
out_h = (H - self.pool_size) // self.stride + 1
out_w = (W - self.pool_size) // self.stride + 1
out_features = np.zeros((B, C, out_h, out_w))
for batch_idx in range(B):
for ch_idx in range(C):
for i in range(out_h):
for j in range(out_w):
# print("patch.shape",x[batch_idx, ch_idx, i*self.stride:i*self.stride+self.pool_size, j*self.stride:j*self.stride+self.pool_size].shape) # [2, 2]
out_features[batch_idx, ch_idx, i, j] = np.mean(x[batch_idx, ch_idx, i*self.stride:i*self.stride+self.pool_size, j*self.stride:j*self.stride+self.pool_size])
return out_features
avgpool2d=AvgPool2D(2,2)
x=np.random.rand(4, 3, 10, 10)
avgpool2d(x)