AI算法工程师-非leetcode题目总结
除了Leetcode你还需要这些
希望大家留言,我可以进行补充。持续更新~~~
实现iou
def iou(box1, box2):
area1 = (box1[2] -box1[0]) * (box1[3] - box1[1])
area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
x1 = max(box1[0],box2[0])
y1 = max(box1[1],box2[1])
x2 = min(box1[2],box2[2])
y2 = min(box1[3],box2[3])
inter = max(0, (y2-y1)) * max(0, (x2-x1))
iou = inter / (area1 + area2 - inter)
return iou
print(iou ([10, 10, 20, 20], [115, 15, 20, 20]))
实现nms
import numpy as np
def nms(dets, threshold):
x1 = dets[:, 0]
y1 = dets[:, 1]
x2 = dets[:, 2]
y2 = dets[:, 3]
score = dets[:, 4]
area = (x2 - x1 +1)* (y2-y1+1)
keep = []
order = score.argsort()[::-1]
while len(order) >= 1:
i = order[0]
keep.append(i)
xx1 = np.maximum(x1[i], x1[order[1:]])
yy1 = np.maximum(y1[i], y1[order[1:]])
xx2 = np.minimum(x2[i], x2[order[1:]])
yy2 = np.minimum(y2[i], y2[order[1:]])
w = np.maximum(xx2-xx1 +1, 0)
h = np.maximum(yy2-yy1+1, 0)
inter = w * h
iou = inter / (area[i] + area[order[1:]] - inter)
idx = np.where(iou <= threshold)[0]
order = order[idx+1]
return keep
dets = np.array([[10, 10, 50, 50, 0.9] , [20, 20, 50, 50, 0.8] ,[50, 50, 100, 100, 0.7] ])
print(nms(dets, 0.5))
旋转矩形IOU
import numpy as np
import cv2
# 中心点 矩形的w h, 旋转的theta(角度,不是弧度)
def iou_rotate_calculate(boxes1, boxes2):
area1 = boxes1[:, 2] * boxes1[:, 3]
area2 = boxes2[:, 2] * boxes2[:, 3]
ious = []
for i, box1 in enumerate(boxes1):
temp_ious = []
r1 = ((box1[0], box1[1]), (box1[2], box1[3]), box1[4])
for j, box2 in enumerate(boxes2):
r2 = ((box2[0], box2[1]), (box2[2], box2[3]), box2[4])
int_pts = cv2.rotatedRectangleIntersection(r1, r2)[1]
if int_pts is not None:
order_pts = cv2.convexHull(int_pts, returnPoints=True)
int_area = cv2.contourArea(order_pts)
inter = int_area * 1.0 / (area1[i] + area2[j] - int_area)
temp_ious.append(inter)
else:
temp_ious.append(0.0)
ious.append(temp_ious)
return np.array(ious, dtype=np.float32)
手动实现BN
y
=
x
−
m
e
a
n
/
v
a
r
+
e
−
6
,
.
y = x - mean /var+e^-6,.
y=x−mean/var+e−6,.
BN在训练的过程中和测试的过程中应该如何设置:
训练时的数据量大,分布更加稳定;eval数据量有限不建议大规模更改mean和var;
eval: trainning=False, track_running_stats=True。这个是期望中的测试阶段的设置,此时BN会用之前训练好的模型中的(假设已经保存下了)running_mean和running_var并且不会对其进行更新。一般来说,只需要设置model.eval()其中model中含有BN层,即可实现这个功能。
train: trainning=True, track_running_stats=True。这个是期望中的训练阶段的设置,此时BN将会跟踪整个训练过程中batch的统计特性。
如果输入数据维度【B,C,H,W】对于BatchNorm来说,他的weight和bias都是C的维度,里面求的mean和std维度都是[1,C,1,1]。仔细看一看,该题目可以问的很细。
import numpy as np
class BatchNormalization4D:
def __init__(self, epsilon=1e-5, momentum=0.9):
self.epsilon = epsilon
self.momentum = momentum
self.running_mean = None
self.running_var = None
self.gamma = None
self.beta = None
self.batch_size = None
self.x_centered = None
self.std = None
def forward(self, x):
self.batch_size, self.channels, self.width, self.height = x.shape
if self.running_mean is None:
self.running_mean = np.mean(x, axis=(0, 2, 3), keepdims=True)
self.running_var = np.var(x, axis=(0, 2, 3), keepdims=True)
if self.gamma is None:
self.gamma = np.ones((1, self.channels, 1, 1))
self.beta = np.zeros((1, self.channels, 1, 1))
x_mean = np.mean(x, axis=(0, 2, 3), keepdims=True)
x_centered = x - x_mean
self.x_centered = x_centered
x_var = np.var(x, axis=(0, 2, 3), keepdims=True)
self.std = np.sqrt(x_var + self.epsilon)
x_norm = x_centered / self.std
out = self.gamma * x_norm + self.beta
return out
def backward(self, dout):
dbeta = np.sum(dout, axis=(0, 2, 3), keepdims=True)
dgamma = np.sum(self.x_centered / self.std * dout, axis=(0, 2, 3), keepdims=True)
dx_norm = dout * self.gamma
dx_centered = dx_norm / self.std
dstd = -np.sum((dx_norm * self.x_centered) / (self.std * self.std), axis=(0, 2, 3), keepdims=True)
dvar = 0.5 * dstd / self.std
dx_centered += (2.0 / (self.batch_size * self.width * self.height)) * self.x_centered * dvar
dx = dx_centered
dmean = -np.sum(dx_centered, axis=(0, 2, 3), keepdims=True)
dx += 1.0 / (self.batch_size * self.width * self.height) * np.ones((self.batch_size, self.channels, self.width, self.height)) * dmean
self.gamma -= dgamma
self.beta -= dbeta
return dx
x = np.random.randn(3, 4, 8, 8)
# 初始化批量归一化类
bn = BatchNormalization4D(x)
手动实现CONV
使用了简化版本,类实现的太多了,背不上;
import numpy as np
def conv2d_numpy(input_data, kernel, stride=1, padding=0):
# 获取输入数据的尺寸
input_height, input_width = input_data.shape
# 获取卷积核的尺寸
kernel_height, kernel_width = kernel.shape
# 计算输出图像的尺寸
output_height = (input_height - kernel_height + 2 * padding) // stride + 1
output_width = (input_width - kernel_width + 2 * padding) // stride + 1
# 初始化输出图像
output_data = np.zeros((output_height, output_width))
# 填充输入数据(根据填充数量添加额外的行和列)
if padding > 0:
input_data = np.pad(input_data, ((padding, padding), (padding, padding)), mode='constant')
# 执行卷积操作
for i in range(0, input_height - kernel_height + 1, stride):
for j in range(0, input_width - kernel_width + 1, stride):
output_data[i // stride, j // stride] = np.sum(input_data[i:i + kernel_height, j:j + kernel_width] * kernel)
return output_data
# 创建一个示例的二维图片数据(4x4 像素)
image = np.array([[1, 2, 3, 4],
[5, 6, 7, 8],
[9, 10, 11, 12],
[13, 14, 15, 16]], dtype=np.float32)
# 定义一个卷积核(滤波器)
kernel = np.array([[1, 1],
[0, -1]], dtype=np.float32)
# 执行自定义的卷积操作
result = conv2d_numpy(image, kernel, stride=1, padding=0)
# 打印卷积结果
print(result)
实现CrossEntropyLoss
import numpy as np
import torch
def myCrossEntropyLoss(x, label):
loss = []
for i, cls in enumerate(label):
x_class = -x[i][cls]
log_x_j = np.log(sum([np.exp(j) for j in x[i]]))
loss.append(x_class + log_x_j)
return np.mean(loss)
x = np.array([
[ 0.1545 , -0.5706, -0.0739 ],
[ 0.2990, 0.1373, 0.0784],
[ 0.1633, 0.0226, 0.1038 ]
])
# 分类标签
label = np.array([0, 1, 0])
print("my CrossEntropyLoss output: %.4f"% myCrossEntropyLoss(x, label))
loss = torch.nn.CrossEntropyLoss()
x_tensor = torch.from_numpy(x)
label_tensor = torch.from_numpy(label)
output = loss(x_tensor, label_tensor)
print("torch CrossEntropyLoss output: ", output)
实现kmeans算法
import numpy as np
def euclidean_distance(x1, x2):
return np.sqrt(np.sum((x1 - x2) ** 2))
def kmeans(X, k, max_iters=100):
# 随机初始化簇的中心点
centroids = X[np.random.choice(range(len(X)), k, replace=False)]
for _ in range(max_iters):
# 分配每个样本到最近的中心点的簇
clusters = [[] for _ in range(k)]
for point in X:
distances = [euclidean_distance(point, centroid) for centroid in centroids]
cluster_idx = np.argmin(distances)
clusters[cluster_idx].append(point)
# 更新中心点为每个簇的平均值
new_centroids = [np.mean(cluster, axis=0) for cluster in clusters]
# 如果中心点不再改变,停止迭代
if np.all(np.array(centroids) == np.array(new_centroids)):
break
centroids = new_centroids
return centroids, clusters
# 生成一些示例数据
X = np.array([[1, 2], [5, 8], [1.5, 1.8], [8, 8], [1, 0.6], [9, 11]])
# 定义要分成的簇的数量
k = 2
# 运行 KMeans 算法
centroids, clusters = kmeans(X, k)
# 打印每个样本所属的簇
for i, cluster in enumerate(clusters):
print(f"Cluster {i+1}:")
for point in cluster:
print(point)
# 打印聚类的中心点
print("Centroids:")
for centroid in centroids:
print(centroid)
实现多头注意力机制
import torch
import torch.nn as nn
class MultiHeadAttention(nn.Module):
def __init__(self, input_dim, num_heads, dropout=0.1):
super(MultiHeadAttention, self).__init__()
assert input_dim % num_heads == 0
self.input_dim = input_dim
self.num_heads = num_heads
self.head_dim = input_dim // num_heads
self.W_q = nn.Linear(input_dim, input_dim)
self.W_k = nn.Linear(input_dim, input_dim)
self.W_v = nn.Linear(input_dim, input_dim)
self.W_o = nn.Linear(input_dim, input_dim)
self.dropout = nn.Dropout(dropout)
def forward(self, query, key, value, mask=None):
# Linear transformation
Q = self.W_q(query)
K = self.W_k(key)
V = self.W_v(value)
# Split into multiple heads
Q = self.split_heads(Q, self.num_heads)
K = self.split_heads(K, self.num_heads)
V = self.split_heads(V, self.num_heads)
# Scale dot-product attention
scores = torch.matmul(Q, K.transpose(-2, -1)) / torch.sqrt(torch.tensor(self.head_dim, dtype=torch.float32))
if mask is not None:
scores = scores.masked_fill(mask == 0, float("-inf"))
attn_weights = torch.softmax(scores, dim=-1)
attn_weights = self.dropout(attn_weights)
# Weighted sum
attention_output = torch.matmul(attn_weights, V)
# Concatenate heads
attention_output = self.combine_heads(attention_output)
# Final linear transformation
attention_output = self.W_o(attention_output)
return attention_output
def split_heads(self, tensor, num_heads):
batch_size = tensor.shape[0]
seq_length = tensor.shape[1]
tensor = tensor.view(batch_size, seq_length, num_heads, self.head_dim)
return tensor.permute(0, 2, 1, 3)
def combine_heads(self, tensor):
batch_size = tensor.shape[0]
num_heads = tensor.shape[1]
seq_length = tensor.shape[2]
tensor = tensor.permute(0, 2, 1, 3).contiguous()
return tensor.view(batch_size, seq_length, num_heads * self.head_dim)
# 测试
input_dim = 64
num_heads = 8
seq_length = 10
batch_size = 32
query = torch.rand(batch_size, seq_length, input_dim)
key = torch.rand(batch_size, seq_length, input_dim)
value = torch.rand(batch_size, seq_length, input_dim)
multihead_attn = MultiHeadAttention(input_dim, num_heads)
output = multihead_attn(query, key, value)
print(output.size()) # 输出:torch.Size([32, 10, 64])
Multi-Head Attention 的初衷是通过并行化处理、增强表示能力、减少过拟合等方式,提高模型在处理序列数据时的效率和性能。
self attention的实现
import torch
import torch.nn.functional as F
class SelfAttention(nn.Module):
def __init__(self, input_dim, dropout=0.1):
super(SelfAttention, self).__init__()
self.input_dim = input_dim
self.W_q = nn.Linear(input_dim, input_dim)
self.W_k = nn.Linear(input_dim, input_dim)
self.W_v = nn.Linear(input_dim, input_dim)
self.dropout = nn.Dropout(dropout)
def forward(self, inputs, mask=None):
# Linear transformation
Q = self.W_q(inputs)
K = self.W_k(inputs)
V = self.W_v(inputs)
# Compute scaled dot-product attention
scores = torch.matmul(Q, K.transpose(-2, -1)) / torch.sqrt(torch.tensor(self.input_dim, dtype=torch.float32))
if mask is not None:
scores = scores.masked_fill(mask == 0, float("-inf"))
attn_weights = F.softmax(scores, dim=-1)
attn_weights = self.dropout(attn_weights)
# Weighted sum
attention_output = torch.matmul(attn_weights, V)
return attention_output, attn_weights
# 测试
input_dim = 64
seq_length = 10
batch_size = 32
inputs = torch.rand(batch_size, seq_length, input_dim)
self_attention = SelfAttention(input_dim)
output, attn_weights = self_attention(inputs)
print(output.size()) # 输出:torch.Size([32, 10, 64])
print(attn_weights.size()) # 输出:torch.Size([32, 10, 10])
cross attention 实现
import torch
import torch.nn as nn
import torch.nn.functional as F
class CrossAttention(nn.Module):
def __init__(self, input_dim_query, input_dim_key_value, output_dim, dropout=0.1):
super(CrossAttention, self).__init__()
self.input_dim_query = input_dim_query
self.input_dim_key_value = input_dim_key_value
self.output_dim = output_dim
self.W_q = nn.Linear(input_dim_query, output_dim)
self.W_k = nn.Linear(input_dim_key_value, output_dim)
self.W_v = nn.Linear(input_dim_key_value, output_dim)
self.dropout = nn.Dropout(dropout)
def forward(self, query, key, value, mask=None):
# Linear transformation
Q = self.W_q(query)
K = self.W_k(key)
V = self.W_v(value)
# Compute scaled dot-product attention
scores = torch.matmul(Q, K.transpose(-2, -1)) / torch.sqrt(torch.tensor(self.output_dim, dtype=torch.float32))
if mask is not None:
scores = scores.masked_fill(mask == 0, float("-inf"))
attn_weights = F.softmax(scores, dim=-1)
attn_weights = self.dropout(attn_weights)
# Weighted sum
attention_output = torch.matmul(attn_weights, V)
return attention_output, attn_weights
# 测试
input_dim_query = 64
input_dim_key_value = 128
output_dim = 64
seq_length_query = 10
seq_length_key_value = 20
batch_size = 32
query = torch.rand(batch_size, seq_length_query, input_dim_query)
key = torch.rand(batch_size, seq_length_key_value, input_dim_key_value)
value = torch.rand(batch_size, seq_length_key_value, input_dim_key_value)
cross_attention = CrossAttention(input_dim_query, input_dim_key_value, output_dim)
output, attn_weights = cross_attention(query, key, value)
print(output.size()) # 输出:torch.Size([32, 10, 64])
print(attn_weights.size()) # 输出:torch.Size([32, 10, 20])
Cross-attention的输入来自不同的序列,Self-attention的输入来自同序列,也就是所谓的输入不同,但是除此之外,基本一致。