获取视频数据的信息
import cv2
if __name__ == "__main__":
video_path = "视频路径"
cap = cv2.VideoCapture(video_path)
# 视频总的帧数
frame_num = cap.get(7)
# 图片帧率
fps = cap.get(5)
while True:
ret, frame = cap.read()
print(f"视频帧率{fps} 视频总帧数{frame_num } 视频分辨率{frame.shape}")
通过上述代码就能获取到视频帧率、帧数、分辨率信息,根据需求自行确立采样的分辨率与帧数。
将视频数据转换为npy数据
npy数据的好处:读取速度快,省去了解码时间
坏处:增加了空间
这是一种时间换空间的策略
import cv2
import numpy as np
if __name__ == "__main__":
video_path = "视频路径"
cap = cv2.VideoCapture(video_path)
# 获取帧率
fps = int(cap.get(5))
frame_lists = []
while True:
ret, frame = cap.read()
if not(ret):
break
# 对图片数据做缩小,缩小为224*224
frame = cv2.resize(frame, (224, 224))
# 将三维数据转换为一维
frame_array = frame.reshape(-1)
frame_lists.append(frame_array)
# 根据视频帧率和目标帧率计算采样周期
interval = int(fps / 20)
frame_lists = np.array(frame_lists)[::interval]
# 保存数据
np.save("npy文件保存地址", frame_lists)
数据加载模块
这是pytorch的数据加载器,这块掌握了基本就ok了,哪都用得上。
from torch.utils.data import Dataset, DataLoader
class MyDataset(Dataset):
def __init__(self, txt, load_type="test"):
self.load_type = load_type
with open(txt, "r") as file:
self.data = file.readlines()
def __getitem__(self, idx):
video_path = self.data[idx].strip()
data = np.load(video_path)
# 数据的保存样式[20, 224*223*3 + label]
# 加载20张图片数据
imgs = data[:, :-1].reshape(20, 224, 224, 3)
label = data[:, -1:].reshape(-1)
return imgs, label[0]
def __len__(self):
return len(self.data)
训练模型
代码大致如下,需要掌握一些api,并且理解数据的流动。
from torch.utils.data import Dataset, DataLoader
# 加载模型
from models.model import COMP_F
import torch
import numpy as np
from tqdm import tqdm
from time import sleep
import os
torch.backends.cudnn.benchmark = True
def save_matrix(file, epoch, train_matrix, test_matrix, mode="a+"):
"""
保存混淆矩阵
"""
with open(file, mode) as f:
f.writelines(f"epoch: {epoch}\n")
f.writelines(f"train_matrix: {train_matrix}\n")
f.writelines(f"test_matrix: {test_matrix}\n")
def save_acc(file, epoch, acc, loss, lr, mode="a+"):
"""
保存训练准确度、学习率等信息
"""
with open(file, mode) as f:
f.writelines(f"{epoch}\n")
f.writelines(f"{acc}\n")
f.writelines(f"{loss}\n")
f.writelines(f"{lr}\n")
def calculate_matrix(predicted, label, matrix):
"""
计算混淆矩阵
"""
for pre_y, real_y in zip(predicted, label):
if not(f"{pre_y}_{real_y}" in matrix.keys()):
matrix[f"{pre_y}_{real_y}"] = 0
matrix[f"{pre_y}_{real_y}"] += 1
class MyDataset(Dataset):
def __init__(self, txt, load_type="test"):
self.load_type = load_type
with open(txt, "r") as file:
self.data = file.readlines()
def __getitem__(self, idx):
video_path = self.data[idx].strip()
data = np.load(video_path)
# 数据的保存样式[20, 224*223*3 + label]
# 加载20张图片数据
imgs = data[:, :-1].reshape(20, 224, 224, 3)
label = data[:, -1:].reshape(-1)
return imgs, label[0]
def __len__(self):
return len(self.data)
if __name__ == "__main__":
# 将需要训练的数据的路径写入txt
train_path = "../txt/train.txt"
test_path = "../txt/test.txt"
# 加载数据
dataset = MyDataset(train_path, load_type="train")
train_dataloader = DataLoader(dataset, batch_size=8, shuffle=True, num_workers=4, pin_memory=True)
dataset = MyDataset(test_path, load_type="test")
test_dataloader = DataLoader(dataset, batch_size=4, shuffle=False, pin_memory=True)
Epochs = 500
criterion = torch.nn.CrossEntropyLoss()
device = "cuda"
# 模型的大小
model_type = "resnet18"
os.makedirs(f"{model_type}", exist_ok=True)
os.makedirs(f"{model_type}/log", exist_ok=True)
model = COMP_F(model_type).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10, eta_min=1e-8)
last_acc = 0
save_matrix(file=f"{model_type}/log/matrix.txt", epoch=" ", train_matrix=" ", test_matrix=" ", mode="w")
save_acc(file=f"{model_type}/log/acc.txt", epoch=" ", acc=" ", loss=" ", lr=" ", mode="w")
for epoch in range(Epochs):
model.train()
train_loss = 0
batch_num = 0
train_correct = 0
sample_num = 0
train_matrix = {}
with open(f"{model_type}/log/异常数据.txt", "a+") as error_file:
for sample, label in tqdm(train_dataloader):
sample, label = torch.permute(sample.to(device) / 255, (0, 4, 1, 2, 3)), label.to(device)
# print(sample.shape, label.shape)
optimizer.zero_grad()
output = model(sample)
loss = criterion(output, label)
loss.backward()
optimizer.step()
batch_num += 1
train_loss += loss.item()
_, predicted = torch.max(output, 1)
sample_num += label.size(0)
train_correct += (predicted == label).sum()
# 计算混淆矩阵
calculate_matrix(predicted.cpu().numpy(), label.cpu().numpy(), train_matrix)
train_acc = train_correct.item() / sample_num
train_loss = train_loss / batch_num
# 测试
model.eval()
test_loss = 0
batch_num = 0
test_correct = 0
sample_num = 0
test_matrix = {}
with torch.no_grad():
try:
for sample, label in tqdm(test_dataloader):
sample, label = torch.permute(sample.to(device) / 255, (0, 4, 1, 2, 3)), label.to(device)
optimizer.zero_grad()
output = model(sample)
loss = criterion(output, label)
batch_num += 1
test_loss += loss.item()
_, predicted = torch.max(output, 1)
calculate_matrix(predicted.cpu().numpy(), label.cpu().numpy(), test_matrix)
sample_num += label.size(0)
test_correct += (predicted == label).sum()
except:
pass
test_acc = test_correct.item() / sample_num
torch.save(model.state_dict(), f'./{model_type}/train_{epoch}_{test_acc:.4f}.pth')
test_loss = test_loss / batch_num
save_matrix(file=f"{model_type}/log/matrix.txt", epoch=epoch, train_matrix=train_matrix, test_matrix=test_matrix)
save_acc(file=f"{model_type}/log/acc.txt", epoch=epoch, acc=f"train_acc: {train_acc:.4f} test_acc: {test_acc:.4f}"
, loss=f"training_loss:{train_loss:.5f} test_loss:{test_loss:.5f}", lr=f"Lr:{scheduler.get_last_lr()}")
print(f"Epoch: {epoch} training_loss:{train_loss:.5f} test_loss:{test_loss:.5f}")
print(f"train_acc: {train_acc:.4f} test_acc: {test_acc:.4f}")
scheduler.step()
adjusted_lr = scheduler.get_last_lr()
print(f"Epoch Lr:{adjusted_lr}\n")
sleep(1)
结尾
如果觉得文章对你有用请点赞、关注
群内交流更多技术
130856474