AI算法工程师-非leetcode题目总结

除了Leetcode你还需要这些

希望大家留言,我可以进行补充。持续更新~~~

实现iou

def iou(box1, box2):

	area1 = (box1[2] -box1[0]) * (box1[3] - box1[1])
	area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
    x1  = max(box1[0],box2[0])
    y1  = max(box1[1],box2[1])

    x2  = min(box1[2],box2[2])
    y2  = min(box1[3],box2[3])
    inter = max(0, (y2-y1)) * max(0, (x2-x1))
    iou = inter / (area1 + area2 - inter)
    return iou
print(iou ([10, 10, 20, 20], [115, 15, 20, 20]))

实现nms


import numpy as np
def nms(dets, threshold):
    x1 = dets[:, 0]
    y1 = dets[:, 1]
    x2 = dets[:, 2]
    y2 = dets[:, 3]
    score = dets[:, 4]
    area = (x2 - x1 +1)* (y2-y1+1)
    keep = []
    order = score.argsort()[::-1]
    while len(order) >= 1:
        i = order[0]
        keep.append(i)
        xx1  = np.maximum(x1[i], x1[order[1:]])
        yy1 = np.maximum(y1[i], y1[order[1:]])
        xx2 = np.minimum(x2[i], x2[order[1:]])
        yy2 = np.minimum(y2[i], y2[order[1:]])
        w = np.maximum(xx2-xx1 +1, 0)
        h = np.maximum(yy2-yy1+1, 0)
        inter = w * h
        iou = inter / (area[i] + area[order[1:]] - inter)
        idx = np.where(iou <= threshold)[0]
        order = order[idx+1]
    return keep


dets = np.array([[10, 10, 50, 50, 0.9] , [20, 20, 50, 50, 0.8] ,[50, 50, 100, 100, 0.7] ])
print(nms(dets, 0.5))

旋转矩形IOU


import numpy as np
import cv2

# 中心点 矩形的w h, 旋转的theta(角度,不是弧度)
def iou_rotate_calculate(boxes1, boxes2):
    area1 = boxes1[:, 2] * boxes1[:, 3]
    area2 = boxes2[:, 2] * boxes2[:, 3]
    ious = []
    for i, box1 in enumerate(boxes1):
        temp_ious = []
        r1 = ((box1[0], box1[1]), (box1[2], box1[3]), box1[4])
        for j, box2 in enumerate(boxes2):
            r2 = ((box2[0], box2[1]), (box2[2], box2[3]), box2[4])

            int_pts = cv2.rotatedRectangleIntersection(r1, r2)[1]
            if int_pts is not None:
                order_pts = cv2.convexHull(int_pts, returnPoints=True)

                int_area = cv2.contourArea(order_pts)

                inter = int_area * 1.0 / (area1[i] + area2[j] - int_area)
                temp_ious.append(inter)
            else:
                temp_ious.append(0.0)
        ious.append(temp_ious)
    return np.array(ious, dtype=np.float32)



手动实现BN

y = x − m e a n / v a r + e − 6 , . y = x - mean /var+e^-6,. y=xmean/var+e6,.
BN在训练的过程中和测试的过程中应该如何设置:
训练时的数据量大,分布更加稳定;eval数据量有限不建议大规模更改mean和var;
eval: trainning=False, track_running_stats=True。这个是期望中的测试阶段的设置,此时BN会用之前训练好的模型中的(假设已经保存下了)running_mean和running_var并且不会对其进行更新。一般来说,只需要设置model.eval()其中model中含有BN层,即可实现这个功能。
train: trainning=True, track_running_stats=True。这个是期望中的训练阶段的设置,此时BN将会跟踪整个训练过程中batch的统计特性。
如果输入数据维度【B,C,H,W】对于BatchNorm来说,他的weight和bias都是C的维度,里面求的mean和std维度都是[1,C,1,1]。仔细看一看,该题目可以问的很细。


import numpy as np

class BatchNormalization4D:
    def __init__(self, epsilon=1e-5, momentum=0.9):
        self.epsilon = epsilon
        self.momentum = momentum
        self.running_mean = None
        self.running_var = None
        self.gamma = None
        self.beta = None
        self.batch_size = None
        self.x_centered = None
        self.std = None

    def forward(self, x):
        self.batch_size, self.channels, self.width, self.height = x.shape

        if self.running_mean is None:
            self.running_mean = np.mean(x, axis=(0, 2, 3), keepdims=True)
            self.running_var = np.var(x, axis=(0, 2, 3), keepdims=True)

        if self.gamma is None:
            self.gamma = np.ones((1, self.channels, 1, 1))
            self.beta = np.zeros((1, self.channels, 1, 1))

        x_mean = np.mean(x, axis=(0, 2, 3), keepdims=True)
        x_centered = x - x_mean
        self.x_centered = x_centered

        x_var = np.var(x, axis=(0, 2, 3), keepdims=True)
        self.std = np.sqrt(x_var + self.epsilon)

        x_norm = x_centered / self.std
        out = self.gamma * x_norm + self.beta

        return out

    def backward(self, dout):
        dbeta = np.sum(dout, axis=(0, 2, 3), keepdims=True)
        dgamma = np.sum(self.x_centered / self.std * dout, axis=(0, 2, 3), keepdims=True)

        dx_norm = dout * self.gamma
        dx_centered = dx_norm / self.std
        dstd = -np.sum((dx_norm * self.x_centered) / (self.std * self.std), axis=(0, 2, 3), keepdims=True)
        dvar = 0.5 * dstd / self.std
        dx_centered += (2.0 / (self.batch_size * self.width * self.height)) * self.x_centered * dvar

        dx = dx_centered
        dmean = -np.sum(dx_centered, axis=(0, 2, 3), keepdims=True)
        dx += 1.0 / (self.batch_size * self.width * self.height) * np.ones((self.batch_size, self.channels, self.width, self.height)) * dmean

        self.gamma -= dgamma
        self.beta -= dbeta

        return dx


x = np.random.randn(3, 4, 8, 8)
# 初始化批量归一化类
bn = BatchNormalization4D(x)

手动实现CONV

使用了简化版本,类实现的太多了,背不上;

import numpy as np

def conv2d_numpy(input_data, kernel, stride=1, padding=0):
    # 获取输入数据的尺寸
    input_height, input_width = input_data.shape
    # 获取卷积核的尺寸
    kernel_height, kernel_width = kernel.shape
    # 计算输出图像的尺寸
    output_height = (input_height - kernel_height + 2 * padding) // stride + 1
    output_width = (input_width - kernel_width + 2 * padding) // stride + 1

    # 初始化输出图像
    output_data = np.zeros((output_height, output_width))

    # 填充输入数据(根据填充数量添加额外的行和列)
    if padding > 0:
        input_data = np.pad(input_data, ((padding, padding), (padding, padding)), mode='constant')

    # 执行卷积操作
    for i in range(0, input_height - kernel_height + 1, stride):
        for j in range(0, input_width - kernel_width + 1, stride):
            output_data[i // stride, j // stride] = np.sum(input_data[i:i + kernel_height, j:j + kernel_width] * kernel)

    return output_data


# 创建一个示例的二维图片数据(4x4 像素)
image = np.array([[1, 2, 3, 4],
                  [5, 6, 7, 8],
                  [9, 10, 11, 12],
                  [13, 14, 15, 16]], dtype=np.float32)

# 定义一个卷积核(滤波器)
kernel = np.array([[1, 1],
                   [0, -1]], dtype=np.float32)

# 执行自定义的卷积操作
result = conv2d_numpy(image, kernel, stride=1, padding=0)

# 打印卷积结果
print(result)

实现CrossEntropyLoss

在这里插入图片描述

import numpy as np
import torch

def myCrossEntropyLoss(x, label):
    loss = []
    for i, cls in enumerate(label):
        x_class = -x[i][cls]
        log_x_j = np.log(sum([np.exp(j) for j in x[i]]))
        loss.append(x_class + log_x_j)

    return np.mean(loss)

x = np.array([
            [ 0.1545 , -0.5706, -0.0739 ],
            [ 0.2990, 0.1373, 0.0784],
            [ 0.1633, 0.0226, 0.1038 ]
        ])

# 分类标签
label = np.array([0, 1, 0])

print("my CrossEntropyLoss output: %.4f"% myCrossEntropyLoss(x, label))

loss = torch.nn.CrossEntropyLoss()
x_tensor = torch.from_numpy(x)
label_tensor = torch.from_numpy(label)
output = loss(x_tensor, label_tensor)
print("torch CrossEntropyLoss output: ", output)

实现kmeans算法

import numpy as np

def euclidean_distance(x1, x2):
    return np.sqrt(np.sum((x1 - x2) ** 2))

def kmeans(X, k, max_iters=100):
    # 随机初始化簇的中心点
    centroids = X[np.random.choice(range(len(X)), k, replace=False)]

    for _ in range(max_iters):
        # 分配每个样本到最近的中心点的簇
        clusters = [[] for _ in range(k)]
        for point in X:
            distances = [euclidean_distance(point, centroid) for centroid in centroids]
            cluster_idx = np.argmin(distances)
            clusters[cluster_idx].append(point)

        # 更新中心点为每个簇的平均值
        new_centroids = [np.mean(cluster, axis=0) for cluster in clusters]

        # 如果中心点不再改变,停止迭代
        if np.all(np.array(centroids) == np.array(new_centroids)):
            break

        centroids = new_centroids

    return centroids, clusters

# 生成一些示例数据
X = np.array([[1, 2], [5, 8], [1.5, 1.8], [8, 8], [1, 0.6], [9, 11]])

# 定义要分成的簇的数量
k = 2

# 运行 KMeans 算法
centroids, clusters = kmeans(X, k)

# 打印每个样本所属的簇
for i, cluster in enumerate(clusters):
    print(f"Cluster {i+1}:")
    for point in cluster:
        print(point)

# 打印聚类的中心点
print("Centroids:")
for centroid in centroids:
    print(centroid)

实现多头注意力机制


import torch
import torch.nn as nn

class MultiHeadAttention(nn.Module):
    def __init__(self, input_dim, num_heads, dropout=0.1):
        super(MultiHeadAttention, self).__init__()
        
        assert input_dim % num_heads == 0
        
        self.input_dim = input_dim
        self.num_heads = num_heads
        self.head_dim = input_dim // num_heads
        
        self.W_q = nn.Linear(input_dim, input_dim)
        self.W_k = nn.Linear(input_dim, input_dim)
        self.W_v = nn.Linear(input_dim, input_dim)
        
        self.W_o = nn.Linear(input_dim, input_dim)
        
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, query, key, value, mask=None):
        # Linear transformation
        Q = self.W_q(query)
        K = self.W_k(key)
        V = self.W_v(value)
        
        # Split into multiple heads
        Q = self.split_heads(Q, self.num_heads)
        K = self.split_heads(K, self.num_heads)
        V = self.split_heads(V, self.num_heads)
        
        # Scale dot-product attention
        scores = torch.matmul(Q, K.transpose(-2, -1)) / torch.sqrt(torch.tensor(self.head_dim, dtype=torch.float32))
        
        if mask is not None:
            scores = scores.masked_fill(mask == 0, float("-inf"))
        
        attn_weights = torch.softmax(scores, dim=-1)
        attn_weights = self.dropout(attn_weights)
        
        # Weighted sum
        attention_output = torch.matmul(attn_weights, V)
        
        # Concatenate heads
        attention_output = self.combine_heads(attention_output)
        
        # Final linear transformation
        attention_output = self.W_o(attention_output)
        
        return attention_output
    
    def split_heads(self, tensor, num_heads):
        batch_size = tensor.shape[0]
        seq_length = tensor.shape[1]
        
        tensor = tensor.view(batch_size, seq_length, num_heads, self.head_dim)
        return tensor.permute(0, 2, 1, 3)
    
    def combine_heads(self, tensor):
        batch_size = tensor.shape[0]
        num_heads = tensor.shape[1]
        seq_length = tensor.shape[2]
        
        tensor = tensor.permute(0, 2, 1, 3).contiguous()
        return tensor.view(batch_size, seq_length, num_heads * self.head_dim)

# 测试
input_dim = 64
num_heads = 8
seq_length = 10
batch_size = 32

query = torch.rand(batch_size, seq_length, input_dim)
key = torch.rand(batch_size, seq_length, input_dim)
value = torch.rand(batch_size, seq_length, input_dim)

multihead_attn = MultiHeadAttention(input_dim, num_heads)
output = multihead_attn(query, key, value)

print(output.size())  # 输出:torch.Size([32, 10, 64])

Multi-Head Attention 的初衷是通过并行化处理、增强表示能力、减少过拟合等方式,提高模型在处理序列数据时的效率和性能。

self attention的实现

import torch
import torch.nn.functional as F

class SelfAttention(nn.Module):
    def __init__(self, input_dim, dropout=0.1):
        super(SelfAttention, self).__init__()
        
        self.input_dim = input_dim
        self.W_q = nn.Linear(input_dim, input_dim)
        self.W_k = nn.Linear(input_dim, input_dim)
        self.W_v = nn.Linear(input_dim, input_dim)
        
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, inputs, mask=None):
        # Linear transformation
        Q = self.W_q(inputs)
        K = self.W_k(inputs)
        V = self.W_v(inputs)
        
        # Compute scaled dot-product attention
        scores = torch.matmul(Q, K.transpose(-2, -1)) / torch.sqrt(torch.tensor(self.input_dim, dtype=torch.float32))
        
        if mask is not None:
            scores = scores.masked_fill(mask == 0, float("-inf"))
        
        attn_weights = F.softmax(scores, dim=-1)
        attn_weights = self.dropout(attn_weights)
        
        # Weighted sum
        attention_output = torch.matmul(attn_weights, V)
        
        return attention_output, attn_weights

# 测试
input_dim = 64
seq_length = 10
batch_size = 32

inputs = torch.rand(batch_size, seq_length, input_dim)

self_attention = SelfAttention(input_dim)
output, attn_weights = self_attention(inputs)

print(output.size())         # 输出:torch.Size([32, 10, 64])
print(attn_weights.size())   # 输出:torch.Size([32, 10, 10])

cross attention 实现

import torch
import torch.nn as nn
import torch.nn.functional as F

class CrossAttention(nn.Module):
    def __init__(self, input_dim_query, input_dim_key_value, output_dim, dropout=0.1):
        super(CrossAttention, self).__init__()
        
        self.input_dim_query = input_dim_query
        self.input_dim_key_value = input_dim_key_value
        self.output_dim = output_dim
        
        self.W_q = nn.Linear(input_dim_query, output_dim)
        self.W_k = nn.Linear(input_dim_key_value, output_dim)
        self.W_v = nn.Linear(input_dim_key_value, output_dim)
        
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, query, key, value, mask=None):
        # Linear transformation
        Q = self.W_q(query)
        K = self.W_k(key)
        V = self.W_v(value)
        
        # Compute scaled dot-product attention
        scores = torch.matmul(Q, K.transpose(-2, -1)) / torch.sqrt(torch.tensor(self.output_dim, dtype=torch.float32))
        
        if mask is not None:
            scores = scores.masked_fill(mask == 0, float("-inf"))
        
        attn_weights = F.softmax(scores, dim=-1)
        attn_weights = self.dropout(attn_weights)
        
        # Weighted sum
        attention_output = torch.matmul(attn_weights, V)
        
        return attention_output, attn_weights

# 测试
input_dim_query = 64
input_dim_key_value = 128
output_dim = 64
seq_length_query = 10
seq_length_key_value = 20
batch_size = 32

query = torch.rand(batch_size, seq_length_query, input_dim_query)
key = torch.rand(batch_size, seq_length_key_value, input_dim_key_value)
value = torch.rand(batch_size, seq_length_key_value, input_dim_key_value)

cross_attention = CrossAttention(input_dim_query, input_dim_key_value, output_dim)
output, attn_weights = cross_attention(query, key, value)

print(output.size())         # 输出:torch.Size([32, 10, 64])
print(attn_weights.size())   # 输出:torch.Size([32, 10, 20])

Cross-attention的输入来自不同的序列,Self-attention的输入来自同序列,也就是所谓的输入不同,但是除此之外,基本一致。

  • 9
    点赞
  • 7
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值