手写.

xiaocong1990

已于 2023-04-23 22:02:31 修改

阅读量112

点赞数

分类专栏：深度学习计算机视觉文章标签： numpy python 机器学习

于 2023-04-23 11:07:11 首次发布

本文链接：https://blog.csdn.net/xiaocong1990/article/details/130317297

版权

深度学习同时被 2 个专栏收录

62 篇文章 3 订阅

订阅专栏

计算机视觉

5 篇文章 0 订阅

订阅专栏

IOU

"""
#检测
A:
左下角坐标(left_x,left_y)
右上角坐标(right_x,right_y)
B:
左下角坐标(left_x,left_y)
右上角坐标(right_x,right_y)
"""
 
def IOU(rectangle A, rectangleB):
    W = min(A.right_x, B.right_x) - max(A.left_x, B.left_x)
    H = min(A.right_y, B.right_y) - max(A.left_y, B.left_y)
    if W <= 0 or H <= 0:
        return 0;
    SA = (A.right_x - A.left_x) * (A.right_y - A.left_y)
    SB = (B.right_x - B.left_x) * (B.right_y - B.left_y)
    cross = W * H
    return cross/(SA + SB - cross)


"""
#分割
"""
A:
左下角坐标(left_x,left_y)
右上角坐标(right_x,right_y)
B:
左下角坐标(left_x,left_y)
右上角坐标(right_x,right_y)
def compute_ious(pred, label, classes):
    '''computes iou for one ground truth mask and predicted mask'''
    ious = [] # 记录每一类的iou
    for c in classes:
        label_c = (label == c) # label_c为true/false矩阵
        pred_c = (pred == c)
        intersection = np.logical_and(pred_c, label_c).sum()
        union = np.logical_or(pred_c, label_c).sum()
        if union == 0:
            ious.append(float('nan'))  
        else
            ious.append(intersection / union)
    return np.nanmean(ious) #返回当前图片里所有类的mean iou
 
def compute_iou_batch(preds, labels, classes=None):
    '''computes mean iou for a batch of ground truth masks and predicted masks'''
    ious = []
    preds = np.copy(preds) # copy is imp
    labels = np.array(labels) # tensor to np
    for pred, label in zip(preds, labels): # iter one batch
        ious.append(compute_ious(pred, label, classes))
    iou = np.nanmean(ious) # mean iou of one batch
    return iou

mIOU

"""
#分割

https://blog.csdn.net/weixin_43917574/article/details/117264163
"""
#混淆矩阵
def _fast_hist(label_true, label_pred, n_class):
    """
    label_true是转化为一维数组的真实标签，label_pred是转化为一维数组的预测结果，n_class是类别数
    hist是一个混淆矩阵
    hist是一个二维数组，可以写成hist[label_true][label_pred]的形式
    最后得到的这个数组的意义就是行下标表示的类别预测成列下标类别的数量
    """
    # mask在和label_true相对应的索引的位置上填入true或者false
    # label_true[mask]会把mask中索引为true的元素输出
    mask = (label_true >= 0) & (label_true < n_class)
    # n_class * label_true[mask].astype(int) + label_pred[mask]计算得到的是二维数组元素
    # 变成一位数组元素的时候的地址取值(每个元素大小为1)，返回的是一个numpy的list
    # np.bincount()会给出索引对应的元素个数
    hist = np.bincount(n_class * label_true[mask].astype(int) + label_pred[mask],
                       minlength=n_class ** 2).reshape(n_class, n_class)
    return hist

#每个类别IOU
def per_class_iu(hist):
    # 矩阵的对角线上的值组成的一维数组/(矩阵的每行求和+每列求和-对角线上的值)，返回值形状(n,)
    return np.diag(hist) / (hist.sum(1) + hist.sum(0) - np.diag(hist))

#mIOU
np.mean(per_class_iu(hist))

NMS

import numpy as np

"""
坐标：左下角，右上角
"""


def nms(dets, thresh):
    x1 = dets[:, 0]
    y1 = dets[:, 1]
    x2 = dets[:, 2]
    y2 = dets[:, 3]
    scores = dets[:, 4]
    sorted_scores = scores.argsort()[::-1]
    keep_list = []
    while sorted_scores.size > 0:
        max_score_idx = sorted_scores[0]
        keep_list.append(max_score_idx)
        sorted_scores=np.delete(sorted_scores, 0, axis = 0)

        max_score_x1 = x1[max_score_idx]
        max_score_y1 = y1[max_score_idx]
        max_score_x2 = x2[max_score_idx]
        max_score_y2 = y2[max_score_idx]

        delete_list = []

        for idx in range(len(sorted_scores)):
            candidate_idx = sorted_scores[idx]
            candidate_x1 = x1[candidate_idx]
            candidate_y1 = y1[candidate_idx]
            candidate_x2 = x2[candidate_idx]
            candidate_y2 = y2[candidate_idx]

            intersection = max(0, (min(max_score_x2, candidate_x2) - max(max_score_x1, candidate_x1)+1) * (min(max_score_y2, candidate_y2) - max(max_score_y1, candidate_y1)+1))
            union = (max_score_x2 - max_score_x1) * (max_score_y2 - max_score_y1) + (candidate_x2 - candidate_x1) * (candidate_y2 - candidate_y1) -  intersection
            iou = float(intersection/union)
            print(intersection,union,iou)
            if iou >=thresh:
                delete_list.append(idx)

        sorted_scores = np.delete(sorted_scores, delete_list, axis = 0)
    print(keep_list)
    return keep_list

dets = np.array([[100, 100, 210, 210, 0.72],
                      [250, 250, 420, 420, 0.8],
                      [220, 220, 320, 330, 0.92],
                      [100, 100, 210, 210, 0.72],
nms(dets=dets, thresh=0.7)

Dice Loss

# 二分类
import torch
import torch.nn as nn

class BinaryDiceLoss(nn.Model):
	def __init__(self):
		super(BinaryDiceLoss, self).__init__()
	
	def forward(self, input, targets):
		# 获取每个批次的大小 N
		N = targets.size()[0]
		# 平滑变量
		smooth = 1
		# 将宽高 reshape 到同一纬度
		input_flat = input.view(N, -1)
		targets_flat = targets.view(N, -1)
	
		# 计算交集
		intersection = input_flat * targets_flat 
		N_dice_eff = (2 * intersection.sum(1) + smooth) / (input_flat.sum(1) + targets_flat.sum(1) + smooth)
		# 计算一个批次中平均每张图的损失
		loss = 1 - dice_eff.sum() / N
		return loss

# 多分类
import torch
import torch.nn as nn

class MultiClassDiceLoss(nn.Module):
	def __init__(self, weight=None, ignore_index=None, **kwargs):
		super(MultiClassDiceLoss, self).__init__()
		self.weight = weight
		self.ignore_index = ignore_index
		self.kwargs = kwargs
	
	def forward(self, input, target):
		"""
			input tesor of shape = (N, C, H, W)
			target tensor of shape = (N, H, W)
		"""
		# 先将 target 进行 one-hot 处理，转换为 (N, C, H, W)
		nclass = input.shape[1]
		target = one_hot(target.long(), nclass)

		assert input.shape == target.shape, "predict & target shape do not match"
		
		binaryDiceLoss = BinaryDiceLoss()
		total_loss = 0
		
		# 归一化输出
		logits = F.softmax(input, dim=1)
		C = target.shape[1]
		
		# 遍历 channel，得到每个类别的二分类 DiceLoss
		for i in range(C):
			dice_loss = binaryDiceLoss(logits[:, i], target[:, i])
			total_loss += dice_loss
		
		# 每个类别的平均 dice_loss
		return total_loss / C

def batch_norm(X, gamma, beta, moving_mean, moving_var, eps, momentum):
    # 通过 `is_grad_enabled` 来判断当前模式是训练模式还是预测模式
    if not torch.is_grad_enabled():
        # 如果是在预测模式下，直接使用传入的移动平均所得的均值和方差
        X_hat = (X - moving_mean) / torch.sqrt(moving_var + eps)
    else:
        assert len(X.shape) in (2, 4)
        if len(X.shape) == 2:
            # 使用全连接层的情况，计算特征维上的均值和方差
            mean = X.mean(dim=0)
            var = ((X - mean)**2).mean(dim=0)
        else:
            # 使用二维卷积层的情况，计算通道维上（axis=1）的均值和方差。
            # 这里我们需要保持X的形状以便后面可以做广播运算
            mean = X.mean(dim=(0, 2, 3), keepdim=True)
            var = ((X - mean)**2).mean(dim=(0, 2, 3), keepdim=True)
        # 训练模式下，用当前的均值和方差做标准化
        X_hat = (X - mean) / torch.sqrt(var + eps)
        # 更新移动平均的均值和方差
        moving_mean = momentum * moving_mean + (1.0 - momentum) * mean
        moving_var = momentum * moving_var + (1.0 - momentum) * var
    Y = gamma * X_hat + beta  # 缩放和移位
    return Y, moving_mean.data, moving_var.data

Conv2d

# coding:utf-8
import numpy as np
class Conv2D:
    def __init__(self, in_channels, out_channels, kernel_size, padding, stride):
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        self.padding = padding
        self.stride = stride
        self.filters = np.random.rand(self.in_channels, self.out_channels, self.kernel_size, self.kernel_size)

    def __call__(self, x):
        B, C, W, H = x.shape
        out_h = (H + self.padding * 2 - self.kernel_size) // self.stride + 1
        out_w = (W + self.padding * 2 - self.kernel_size) // self.stride + 1
        out_features = np.zeros((B, self.out_channels, out_h, out_w))

        in_features = np.zeros((B, C, H + self.padding * 2, W + self.padding * 2))
        in_features[:, :, self.padding:-self.padding, self.padding:-self.padding] = x
        for batch_idx in range(B):
            for ch_idx in range(self.out_channels):
                out_features[batch_idx, ch_idx, :, :] = self._conv(
                    in_features[batch_idx], out_features[batch_idx, ch_idx, :, :],
                    self.filters[:, ch_idx, :, :])
        return out_features
    
    def _conv(self, x, out, kernel):
        h, w = out.shape
        for i in range(0, h):
            for j in range(0, w):
                # print("x.shape",x.shape) # (3, 12, 12)
                # print("out.shape",out.shape) # (10, 10)
                # print("kernel.shape",kernel.shape) # (3, 3, 3)
                tmp = x[:, i*self.stride:i*self.stride + self.kernel_size, j*self.stride:j*self.stride + self.kernel_size] * kernel
                # print("tmp.shape",tmp.shape) (3, 3, 3)
                out[i][j] = min(tmp.sum(axis=(0,1,2)),255)
        return out

con2d=Conv2D(in_channels=3, out_channels=5, kernel_size=3, padding=1, stride=1)
x=np.random.rand(4, 3, 10, 10) # B, C, W, H
con2d(x)

MaxPool2D

# coding:utf-8
import numpy as np
 
class MaxPool2D:
    def __init__(self, pool_size, stride):
        # super.__init__()
        self.pool_size = pool_size
        self.stride = stride

    def __call__(self, x):
        B, C, W, H = x.shape
        out_h = (H - self.pool_size) // self.stride + 1
        out_w = (W - self.pool_size) // self.stride + 1
        out_features = np.zeros((B, C, out_h, out_w))

        for batch_idx in range(B):
            for ch_idx in range(C):
                for i in range(out_h):
                    for j in range(out_w):
                        # print("patch.shape",x[batch_idx, ch_idx, i*self.stride:i*self.stride+self.pool_size, j*self.stride:j*self.stride+self.pool_size].shape) # [2, 2]
                        out_features[batch_idx, ch_idx, i, j] = np.max(x[batch_idx, ch_idx, i*self.stride:i*self.stride+self.pool_size, j*self.stride:j*self.stride+self.pool_size])
        return out_features

maxpool2d=MaxPool2D(2,2)
x=np.random.rand(4, 3, 10, 10)
maxpool2d(x)

AvgPool2D

# coding:utf-8
import numpy as np
 
class AvgPool2D:
    def __init__(self, pool_size, stride):
        # super.__init__()
        self.pool_size = pool_size
        self.stride = stride

    def __call__(self, x):
        B, C, W, H = x.shape
        out_h = (H - self.pool_size) // self.stride + 1
        out_w = (W - self.pool_size) // self.stride + 1
        out_features = np.zeros((B, C, out_h, out_w))

        for batch_idx in range(B):
            for ch_idx in range(C):
                for i in range(out_h):
                    for j in range(out_w):
                        # print("patch.shape",x[batch_idx, ch_idx, i*self.stride:i*self.stride+self.pool_size, j*self.stride:j*self.stride+self.pool_size].shape) # [2, 2]
                        out_features[batch_idx, ch_idx, i, j] = np.mean(x[batch_idx, ch_idx, i*self.stride:i*self.stride+self.pool_size, j*self.stride:j*self.stride+self.pool_size])
        return out_features

avgpool2d=AvgPool2D(2,2)
x=np.random.rand(4, 3, 10, 10)
avgpool2d(x)