对Nor-behavior源码的阅读
源码准备
https://github.com/AIFARMS/NOR-behavior-recognition
源码结构
文件名称 | 文件作用 |
---|---|
extract_frame.py | 视频分割成帧 |
static.py | 截取或者说固定帧数30 or 60, 统计 |
models/TSM/main.py | 模型训练 |
annotate.py | 预测 |
groundtruth.py | 评价 |
步骤 | output size | 备注 |
---|---|---|
将本文中图片 (1920, 1080) crop成 (1220, 1020) 下采样成 (305, 255) | (305, 255) | list标签: 地址 帧数 标签; 数据集比例:3:1 |
稀疏采样 | list[img1, img2, img3] img:(305, 255) | 假设一个视频共有88帧,num_segments=3, 88/3 = 29, 取29以内的随机数3个构成列表; 假设随机数为[1, 7, 8]; 所取帧数为[1, 7+29, 8+29*2]->每个加1->[2 37 67]。 |
稀疏采样3张图片*RGB3通道 = 9, resize(224, 224) | (9, 224, 224) | 新的裁剪方式:GroupMultiScaleCrop[255,224,191,168]再resize成(224,224) |
数据放入model | (1, 2) | (3, 3, 224, 224) ->(1,2) |
计算loss, 梯度优化 | / | CrossEntropyLoss(); SGD |
数据预处理
extract_frame.py
(1) crop_and_downsample函数
将本文中图片 1920 ∗ 1080 1920*1080 1920∗1080 crop成 1220 ∗ 1020 1220*1020 1220∗1020 下采样成 305 ∗ 255 305*255 305∗255
def crop_and_downsample(frame):
h, w, _ = frame.shape
## Crop the frame
cropped_frame = frame[Y_CROP:h-Y_CROP, X_CROP:w-X_CROP]
## Downsample by a factor of 4, 下采样
downsampled_frame = cv2.resize(cropped_frame, (0,0), fx=0.25, fy=0.25)
return downsampled_frame
(2) 输入标签,视频地址;输出视频分割成帧的地址
ANNOTATIONS_DF = pd.read_csv('./analysis/action_annotation.csv')
VIDEO_DIR = './videos'
X_CROP, Y_CROP = 350,30
FRAMES_DIR = 'compressed_action_frames'
create_dir(FRAMES_DIR)
(3) for循环主函数
这里要提到文章提供的标签:action_annotation.csv,由 video名;每次 invest 的开始帧;结束帧;invest 左or右;组成。
for vf in video_files:
video_name = vf.split('/')[-1].split('.')[0]
video_stream = cv2.VideoCapture('%s/%s.mp4'%(VIDEO_DIR, video_name))
## Create directory to store all frames per video
print(video_name)
#create_dir(os.path.join(FRAMES_DIR, video_name))
actions_df = ANNOTATIONS_DF[ANNOTATIONS_DF['Scoring'] == video_name]
## Skip initial frames
assert actions_df.iloc[0]['Behaviour'] == 'Begin Trial'
start_frame, end_frame = int(actions_df.iloc[0]['Start_Frame']), int(actions_df.iloc[0]['Start_Frame'])
# start_frame=160 运行160次 video_stream.read()即0-159帧画面,使得之后video_stream.read()就从160帧开始读了
a = [video_stream.read() for _ in range(start_frame)]
actions_df = actions_df.iloc[1:]
## Start annotating actions
explore_count, investigate_count = 0, 0
# iterrows 行迭代生成器
for index, row in actions_df.iterrows():
action = row['Behaviour']
astart_frame = int(row['Start_Frame'])
## Annotate exploration
if end_frame < astart_frame:
dir_path = os.path.join(FRAMES_DIR, '&explore-%d[%d-%d]'%(explore_count, end_frame, astart_frame))
explore_count += 1
create_dir(dir_path)
frame_id = 1
for i in range(astart_frame - end_frame):
_, frame = video_stream.read()
cv2.imwrite('%s/%d.jpg'%(dir_path,frame_id), crop_and_downsample(frame))
frame_id += 1
if action == 'End Trial':
break
aend_frame = int(row['Stop_Frame'])
action = action.split(' ')[0].lower()
## Annotate investigation
dir_path = os.path.join(FRAMES_DIR, '&investigate-%s-%d[%d-%d]'%(action, investigate_count,astart_frame,aend_frame))
investigate_count += 1
create_dir(dir_path)
frame_id = 1
for i in range(aend_frame - astart_frame):
_, frame = video_stream.read()
cv2.imwrite('%s/%d.jpg'%(dir_path,frame_id), crop_and_downsample(frame))
frame_id += 1
start_frame, end_frame = astart_frame, aend_frame
static.py
该函数实现的是对explore和invest两种行为的次数与帧数的统计
(1) get_stats函数
展示种类class_count和分割帧个数clip_distribution的统计图
def get_stats(dir_path):
# Counter 计数,defaultdict(list)会构建一个默认value为list的字典
class_count = Counter()
clip_distribution = defaultdict(list)
for clip_name in os.listdir(dir_path):
loc1 = clip_name.find('&')
loc2 = clip_name.find('-')
class_name = clip_name[(loc1 + 1): loc2]
class_count[class_name] += 1
clip_distribution[class_name].append(len(os.listdir(os.path.join(dir_path, clip_name))))
return class_count, clip_distribution
class_count, clip_distribution = get_stats(ROOT_DIR)
print(class_count)
plt.bar(list(class_count.keys()), list(class_count.values()))
plt.xlabel('Action')
plt.ylabel('Number of clips')
plt.show()
统计图如下图所示:
(2) for循环实现固定帧
固定帧用于算法CNN+LSTM即LRCN。本文采用TSM算法,并不用该方法处理数据集
阈值可变,若=30则每30帧为一个clip。
threshold = 60
THRESH_DIR = '%s-%d' % (ROOT_DIR, threshold)
create_directory(THRESH_DIR)
for clip_name in os.listdir(ROOT_DIR):
clip_path = os.path.join(ROOT_DIR, clip_name)
frame_count = len(os.listdir(clip_path))
if threshold <= frame_count:
num_splits = frame_count // threshold
for s in range(num_splits):
new_clip_path = os.path.join(THRESH_DIR, clip_name + "-%d" % s)
create_directory(new_clip_path)
for i in range(1, threshold + 1):
shutil.copy(os.path.join(clip_path, "%d.jpg" % (threshold * s + i)),
os.path.join(new_clip_path, "%d.jpg" % i))
for循环得出结果:
创建trainlist.txt和testlist.txt
鉴于TSM的数据集处理和训练方式,下列代码用于创建trainlist.txt和testlist.txt。
import random
random.seed(0)
train_list_dir = r'D:\NOR-behavior-recognition-master\data/trainlist.txt'
test_list_dir = r'D:\NOR-behavior-recognition-master\data/testlist.txt'
class_ind = [x.strip().split() for x in open(r'D:\NOR-behavior-recognition-master\models\TSM\ops\pigs_label_map.txt')] # 类别txt
class_mapping = {x[1]:int(x[0])-1 for x in class_ind}
train_list, test_list = list(), list()
trainList = random.sample(os.listdir(ROOT_DIR), int(0.75*len(os.listdir(ROOT_DIR))))
for clip_name in os.listdir(ROOT_DIR):
clip_path = os.path.join(ROOT_DIR, clip_name)
frame_count = len(os.listdir(clip_path))
loc1 = clip_name.find('&')
loc2 = clip_name.find('-')
class_name = clip_name[(loc1 + 1): loc2]
label = class_mapping[class_name]
if clip_name in trainList:
train_list.append('{} {} {}\n'.format(clip_path, frame_count, label))
with open(train_list_dir, 'w',) as file1:
file1.writelines(train_list)
else:
test_list.append('{} {} {}\n'.format(clip_path, frame_count, label))
with open(test_list_dir, 'w',) as file2:
file2.writelines(test_list)