![](https://img-blog.csdnimg.cn/direct/12dffa5916e04c8bb2ed525fb5e9a9dc.png)
requirements.txt
absl-py==0.11.0
astunparse==1.6.3
backcall==0.2.0
cachetools==4.2.1
certifi==2020.12.5
chardet==4.0.0
cloudpickle==1.6.0
colorama==0.4.4
cycler==0.10.0
decorator==4.4.2
dlib==19.21.1
flatbuffers==1.12
gast==0.3.3
google-auth==1.24.0
google-auth-oauthlib==0.4.2
google-pasta==0.2.0
grpcio==1.32.0
h5py==2.10.0
helpdev==0.7.1
idna==2.10
imageio==2.9.0
ipykernel==5.3.4
ipython==7.22.0
ipython-genutils==0.2.0
jedi==0.17.0
joblib==1.0.1
jupyter-client==6.1.12
jupyter-core==4.7.1
Keras-Preprocessing==1.1.2
kiwisolver==1.3.1
Markdown==3.3.3
matplotlib==3.3.2
natsort==7.1.1
networkx==2.5.1
numpy==1.19.5
oauthlib==3.1.0
opencv-contrib-python-headless==4.5.1.48
opencv-python-headless==4.5.1.48
opt-einsum==3.3.0
pandas==1.1.3
parso==0.8.2
pickleshare==0.7.5
Pillow==8.2.0
pip==21.1.2
prompt-toolkit==3.0.17
protobuf==3.17.3
ptyprocess==0.7.0
pyasn1==0.4.8
pyasn1-modules==0.2.8
Pygments==2.9.0
pyparsing==2.4.7
PyQtWebEngine==5.12.1
python-dateutil==2.8.1
pytz==2021.1
PyWavelets==1.1.1
pywin32==227
pyzmq==20.0.0
requests==2.25.1
requests-oauthlib==1.3.0
rsa==4.7
scikit-image==0.17.2
scikit-learn==0.23.2
scipy==1.6.3
setuptools==52.0.0
six==1.15.0
spyder-kernels==1.10.0
tensorboard==2.5.0
tensorboard-data-server==0.6.1
tensorboard-plugin-wit==1.8.0
tensorflow-estimator==2.4.0
tensorflow-gpu==2.4.1
termcolor==1.1.0
threadpoolctl==2.1.0
tifffile==2021.6.14
tornado==6.1
traitlets==5.0.5
typing-extensions==3.7.4.3
urllib3==1.26.3
wcwidth==0.2.5
Werkzeug==1.0.1
wheel==0.36.2
wincertstore==0.2
wrapt==1.12.1
xlrd==1.2.0
pip install -r requirements.txt
extraction_preprocess.py
import numpy as np
import pandas as pd
import cv2
import dlib
def pol2cart(rho, phi): #Convert polar coordinates to cartesian coordinates for computation of optical strain
x = rho * np.cos(phi)
y = rho * np.sin(phi)
return (x, y)
def computeStrain(u, v):
u_x= u - pd.DataFrame(u).shift(-1, axis=1)
v_y= v - pd.DataFrame(v).shift(-1, axis=0)
u_y= u - pd.DataFrame(u).shift(-1, axis=0)
v_x= v - pd.DataFrame(v).shift(-1, axis=1)
os = np.array(np.sqrt(u_x**2 + v_y**2 + 1/2 * (u_y+v_x)**2).ffill(1).ffill(0))
return os
def extract_preprocess(final_images, k):
predictor_model = "Utils\\shape_predictor_68_face_landmarks.dat"
face_detector = dlib.get_frontal_face_detector()
face_pose_predictor = dlib.shape_predictor(predictor_model)
dataset = []
for video in range(len(final_images)):
OFF_video = []
for img_count in range(final_images[video].shape[0]-k):
img1 = final_images[video][img_count]
img2 = final_images[video][img_count+k]
if (img_count==0):
reference_img = img1
detect = face_detector(reference_img,1)
next_img=0 #Loop through the frames until all the landmark is detected
while (len(detect)==0):
next_img+=1
reference_img = final_images[video][img_count+next_img]
detect = face_detector(reference_img,1)
shape = face_pose_predictor(reference_img,detect[0])
#Left Eye
x11=max(shape.part(36).x - 15, 0)
y11=shape.part(36).y
x12=shape.part(37).x
y12=max(shape.part(37).y - 15, 0)
x13=shape.part(38).x
y13=max(shape.part(38).y - 15, 0)
x14=min(shape.part(39).x + 15, 128)
y14=shape.part(39).y
x15=shape.part(40).x
y15=min(shape.part(40).y + 15, 128)
x16=shape.part(41).x
y16=min(shape.part(41).y + 15, 128)
#Right Eye
x21=max(shape.part(42).x - 15, 0)
y21=shape.part(42).y
x22=shape.part(43).x
y22=max(shape.part(43).y - 15, 0)
x23=shape.part(44).x
y23=max(shape.part(44).y - 15, 0)
x24=min(shape.part(45).x + 15, 128)
y24=shape.part(45).y
x25=shape.part(46).x
y25=min(shape.part(46).y + 15, 128)
x26=shape.part(47).x
y26=min(shape.part(47).y + 15, 128)
#ROI 1 (Left Eyebrow)
x31=max(shape.part(17).x - 12, 0)
y32=max(shape.part(19).y - 12, 0)
x33=min(shape.part(21).x + 12, 128)
y34=min(shape.part(41).y + 12, 128)
#ROI 2 (Right Eyebrow)
x41=max(shape.part(22).x - 12, 0)
y42=max(shape.part(24).y - 12, 0)
x43=min(shape.part(26).x + 12, 128)
y44=min(shape.part(46).y + 12, 128)
#ROI 3 #Mouth
x51=max(shape.part(60).x - 12, 0)
y52=max(shape.part(50).y - 12, 0)
x53=min(shape.part(64).x + 12, 128)
y54=min(shape.part(57).y + 12, 128)
#Nose landmark
x61=shape.part(28).x
y61=shape.part(28).y
#Compute Optical Flow Features
# optical_flow = cv2.DualTVL1OpticalFlow_create() #Depends on cv2 version
optical_flow = cv2.optflow.DualTVL1OpticalFlow_create()
flow = optical_flow.calc(img1, img2, None)
magnitude, angle = cv2.cartToPolar(flow[..., 0], flow[..., 1])
u, v = pol2cart(magnitude, angle)
os = computeStrain(u, v)
#Features Concatenation into 128x128x3
final = np.zeros((128, 128, 3))
final[:,:,0] = u
final[:,:,1] = v
final[:,:,2] = os
#Remove global head movement by minus nose region
final[:, :, 0] = abs(final[:, :, 0] - final[y61-5:y61+6, x61-5:x61+6, 0].mean())
final[:, :, 1] = abs(final[:, :, 1] - final[y61-5:y61+6, x61-5:x61+6, 1].mean())
final[:, :, 2] = final[:, :, 2] - final[y61-5:y61+6, x61-5:x61+6, 2].mean()
#Eye masking
left_eye = [(x11, y11), (x12, y12), (x13, y13), (x14, y14), (x15, y15), (x16, y16)]
right_eye = [(x21, y21), (x22, y22), (x23, y23), (x24, y24), (x25, y25), (x26, y26)]
cv2.fillPoly(final, [np.array(left_eye)], 0)
cv2.fillPoly(final, [np.array(right_eye)], 0)
#ROI Selection -> Image resampling into 42x22x3
final_image = np.zeros((42, 42, 3))
final_image[:21, :, :] = cv2.resize(final[min(y32, y42) : max(y34, y44), x31:x43, :], (42, 21))
final_image[21:42, :, :] = cv2.resize(final[y52:y54, x51:x53, :], (42, 21))
OFF_video.append(final_image)
dataset.append(OFF_video)
print('Video', video, 'Done')
print('All Done')
return dataset
load_images.py
import os
import shutil
import glob
import natsort
import pickle
import dlib
import numpy as np
import cv2
def crop_images(dataset_name):
face_detector = dlib.get_frontal_face_detector()
if(dataset_name == 'CASME_sq'):
# Save the images into folder 'rawpic_crop'
for subjectName in glob.glob(dataset_name + '\\rawpic\\*'):
dataset_rawpic = dataset_name + '\\rawpic\\' + str(subjectName.split('\\')[-1]) + '\\*'
# Create new directory for 'rawpic_crop'
dir_crop = dataset_name + '\\rawpic_crop\\'
if os.path.exists(dir_crop)==False:
os.mkdir(dir_crop)
#Create new directory for each subject
dir_crop_sub = dataset_name + '\\rawpic_crop\\' + str(subjectName.split('\\')[-1]) + '\\'
if os.path.exists(dir_crop_sub):
shutil.rmtree(dir_crop_sub)
os.mkdir(dir_crop_sub)
print('Subject', subjectName.split('\\')[-1])
for vid in glob.glob(dataset_rawpic):
dir_crop_sub_vid = dir_crop_sub + vid.split('\\')[-1] #Get dir of video
if os.path.exists(dir_crop_sub_vid):
shutil.rmtree(dir_crop_sub_vid)
os.mkdir(dir_crop_sub_vid)
for dir_crop_sub_vid_img in natsort.natsorted(glob.glob(vid+'\\img*.jpg')): #Read images
img = dir_crop_sub_vid_img.split('\\')[-1]
count = img[3:-4] #Get img num Ex 001,002,...,2021
# Load the image
image = cv2.imread(dir_crop_sub_vid_img)
# Run the HOG face detector on the image data
detected_faces = face_detector(image, 1)
if (count == '001'): #Use first frame as reference frame
for face_rect in detected_faces:
face_top = face_rect.top()
face_bottom = face_rect.bottom()
face_left = face_rect.left()
face_right = face_rect.right()
face = image[face_top:face_bottom, face_left:face_right] #Crop the face region
face = cv2.resize(face, (128, 128)) #Resize to 128x128
cv2.imwrite(dir_crop_sub_vid + "\\img{}.jpg".format(count), face)
elif(dataset_name == 'SAMMLV'):
if os.path.exists(dataset_name + '\\SAMM_longvideos_crop'): #Delete dir if exist and create new dir
shutil.rmtree(dataset_name + '\\SAMM_longvideos_crop')
os.mkdir(dataset_name + '\\SAMM_longvideos_crop')
for vid in glob.glob(dataset_name + '\\SAMM_longvideos\\*'):
count = 0
dir_crop = dataset_name + '\\SAMM_longvideos_crop\\' + vid.split('\\')[-1]
if os.path.exists(dir_crop): #Delete dir if exist and create new dir
shutil.rmtree(dir_crop)
os.mkdir(dir_crop)
print('Video', vid.split('\\')[-1])
for dir_crop_img in natsort.natsorted(glob.glob(vid+'\\*.jpg')):
img = dir_crop_img.split('\\')[-1].split('.')[0]
count = img[-4:] #Get img num Ex 0001,0002,...,2021
# Load the image
image = cv2.imread(dir_crop_img)
# Run the HOG face detector on the image data
detected_faces = face_detector(image, 1)
# Loop through each face we found in the image
if (count == '0001'): #Use first frame as reference frame
for i, face_rect in enumerate(detected_faces):
face_top = face_rect.top()
face_bottom = face_rect.bottom()
face_left = face_rect.left()
face_right = face_rect.right()
face = image[face_top:face_bottom, face_left:face_right]
face = cv2.resize(face, (128, 128))
cv2.imwrite(dir_crop + "\\{}.jpg".format(count), face)
def load_images(dataset_name):
images = []
subjects = []
subjectsVideos = []
if(dataset_name == 'CASME_sq'):
for i, dir_sub in enumerate(natsort.natsorted(glob.glob(dataset_name + "\\rawpic_crop\\*"))):
print('Subject: ' + dir_sub.split('\\')[-1])
subjects.append(dir_sub.split('\\')[-1])
subjectsVideos.append([])
for dir_sub_vid in natsort.natsorted(glob.glob(dir_sub + "\\*")):
subjectsVideos[-1].append(dir_sub_vid.split('\\')[-1].split('_')[1][:4]) # Ex:'CASME_sq/rawpic_aligned/s15/15_0101disgustingteeth' -> '0101'
image = []
for dir_sub_vid_img in natsort.natsorted(glob.glob(dir_sub_vid + "\\img*.jpg")):
image.append(cv2.imread(dir_sub_vid_img, 0))
images.append(np.array(image))
elif(dataset_name == 'SAMMLV'):
for i, dir_vid in enumerate(natsort.natsorted(glob.glob(dataset_name + "\\SAMM_longvideos_crop\\*"))):
print('Subject: ' + dir_vid.split('\\')[-1].split('_')[0])
subject = dir_vid.split('\\')[-1].split('_')[0]
subjectVideo = dir_vid.split('\\')[-1]
if (subject not in subjects): #Only append unique subject name
subjects.append(subject)
subjectsVideos.append([])
subjectsVideos[-1].append(dir_vid.split('\\')[-1])
image = []
for dir_vid_img in natsort.natsorted(glob.glob(dir_vid + "\\*.jpg")):
image.append(cv2.imread(dir_vid_img, 0))
image = np.array(image)
images.append(image)
return images, subjects, subjectsVideos
def save_images_pkl(dataset_name, images, subjectsVideos, subjects):
pickle.dump(images, open(dataset_name + "_images_crop.pkl", "wb") )
pickle.dump(subjectsVideos, open(dataset_name + "_subjectsVideos_crop.pkl", "wb") )
pickle.dump(subjects, open(dataset_name + "_subjects_crop.pkl", "wb") )
def load_images_pkl(dataset_name):
images = pickle.load( open( dataset_name + "_images_crop.pkl", "rb" ) )
subjectsVideos = pickle.load( open( dataset_name + "_subjectsVideos_crop.pkl", "rb" ) )
subjects = pickle.load( open( dataset_name + "_subjects_crop.pkl", "rb" ) )
return images, subjectsVideos, subjects
load_label.py
import numpy as np
import pandas as pd
def load_excel(dataset_name):
if(dataset_name == 'CASME_sq'):
xl = pd.ExcelFile(dataset_name + '/code_final.xlsx') #Specify directory of excel file
colsName = ['subject', 'video', 'onset', 'apex', 'offset', 'au', 'emotion', 'type', 'selfReport']
codeFinal = xl.parse(xl.sheet_names[0], header=None, names=colsName) #Get data
videoNames = []
for videoName in codeFinal.iloc[:,1]:
videoNames.append(videoName.split('_')[0])
codeFinal['videoName'] = videoNames
naming1 = xl.parse(xl.sheet_names[2], header=None, converters={0: str})
dictVideoName = dict(zip(naming1.iloc[:,1], naming1.iloc[:,0]))
codeFinal['videoCode'] = [dictVideoName[i] for i in codeFinal['videoName']]
naming2 = xl.parse(xl.sheet_names[1], header=None)
dictSubject = dict(zip(naming2.iloc[:,2], naming2.iloc[:,1]))
codeFinal['subjectCode'] = [dictSubject[i] for i in codeFinal['subject']]
elif(dataset_name=='SAMMLV'):
xl = pd.ExcelFile(dataset_name + '/SAMM_LongVideos_V2_Release.xlsx')
colsName = ['Subject', 'Filename', 'Inducement Code', 'Onset', 'Apex', 'Offset', 'Duration', 'Type', 'Action Units', 'Notes']
codeFinal = xl.parse(xl.sheet_names[0], header=None, names=colsName, skiprows=[0,1,2,3,4,5,6,7,8,9])
videoNames = []
subjectName = []
for videoName in codeFinal.iloc[:,1]:
videoNames.append(str(videoName).split('_')[0] + '_' + str(videoName).split('_')[1])
subjectName.append(str(videoName).split('_')[0])
codeFinal['videoCode'] = videoNames
codeFinal['subjectCode'] = subjectName
#Synchronize the columns name with CAS(ME)^2
codeFinal.rename(columns={'Type':'type', 'Onset':'onset', 'Offset':'offset', 'Apex':'apex'}, inplace=True)
print('Data Columns:', codeFinal.columns) #Final data column
return codeFinal
def load_gt(dataset_name, expression_type, images, subjectsVideos, subjects, codeFinal):
dataset_expression_type = expression_type
if(dataset_name == 'SAMMLV' and expression_type=='micro-expression'):
dataset_expression_type = 'Micro - 1/2'
elif(dataset_name == 'SAMMLV' and expression_type=='macro-expression'):
dataset_expression_type = 'Macro'
vid_need = []
vid_count = 0
ground_truth = []
for sub_video_each_index, sub_vid_each in enumerate(subjectsVideos):
ground_truth.append([])
for videoIndex, videoCode in enumerate(sub_vid_each):
on_off = []
for i, row in codeFinal.iterrows():
if (row['subjectCode']==subjects[sub_video_each_index]): #S15, S16... for CAS(ME)^2, 001, 002... for SAMMLV
if (row['videoCode']==videoCode):
if (row['type']==dataset_expression_type): #Micro-expression or macro-expression
if (row['offset']==0): #Take apex if offset is 0
on_off.append([int(row['onset']-1), int(row['apex']-1)])
else:
if(dataset_expression_type!='Macro' or int(row['onset'])!=0): #Ignore the samples that is extremely long in SAMMLV
on_off.append([int(row['onset']-1), int(row['offset']-1)])
if(len(on_off)>0):
vid_need.append(vid_count) #To get the video that is needed
ground_truth[-1].append(on_off)
vid_count+=1
#Remove unused video
final_samples = []
final_videos = []
final_subjects = []
count = 0
for subjectIndex, subject in enumerate(ground_truth):
final_samples.append([])
final_videos.append([])
for samplesIndex, samples in enumerate(subject):
if (count in vid_need):
final_samples[-1].append(samples)
final_videos[-1].append(subjectsVideos[subjectIndex][samplesIndex])
final_subjects.append(subjects[subjectIndex])
count += 1
#Remove the empty data in array
final_subjects = np.unique(final_subjects)
final_videos = [ele for ele in final_videos if ele != []]
final_samples = [ele for ele in final_samples if ele != []]
final_images = [images[i] for i in vid_need]
print('Total Videos:', len(final_images))
return final_images, final_videos, final_subjects, final_samples
def cal_k(dataset_name, expression_type, final_samples):
samples = [samples for subjects in final_samples for videos in subjects for samples in videos]
total_duration = 0
for sample in samples:
total_duration += sample[1]-sample[0]
N=total_duration/len(samples)
k=int((N+1)/2)
print('k (Half of average length of expression) =', k)
return k
main.py
import numpy as np
import pandas as pd
def load_excel(dataset_name):
if(dataset_name == 'CASME_sq'):
xl = pd.ExcelFile(dataset_name + '/code_final.xlsx') #Specify directory of excel file
colsName = ['subject', 'video', 'onset', 'apex', 'offset', 'au', 'emotion', 'type', 'selfReport']
codeFinal = xl.parse(xl.sheet_names[0], header=None, names=colsName) #Get data
videoNames = []
for videoName in codeFinal.iloc[:,1]:
videoNames.append(videoName.split('_')[0])
codeFinal['videoName'] = videoNames
naming1 = xl.parse(xl.sheet_names[2], header=None, converters={0: str})
dictVideoName = dict(zip(naming1.iloc[:,1], naming1.iloc[:,0]))
codeFinal['videoCode'] = [dictVideoName[i] for i in codeFinal['videoName']]
naming2 = xl.parse(xl.sheet_names[1], header=None)
dictSubject = dict(zip(naming2.iloc[:,2], naming2.iloc[:,1]))
codeFinal['subjectCode'] = [dictSubject[i] for i in codeFinal['subject']]
elif(dataset_name=='SAMMLV'):
xl = pd.ExcelFile(dataset_name + '/SAMM_LongVideos_V2_Release.xlsx')
colsName = ['Subject', 'Filename', 'Inducement Code', 'Onset', 'Apex', 'Offset', 'Duration', 'Type', 'Action Units', 'Notes']
codeFinal = xl.parse(xl.sheet_names[0], header=None, names=colsName, skiprows=[0,1,2,3,4,5,6,7,8,9])
videoNames = []
subjectName = []
for videoName in codeFinal.iloc[:,1]:
videoNames.append(str(videoName).split('_')[0] + '_' + str(videoName).split('_')[1])
subjectName.append(str(videoName).split('_')[0])
codeFinal['videoCode'] = videoNames
codeFinal['subjectCode'] = subjectName
#Synchronize the columns name with CAS(ME)^2
codeFinal.rename(columns={'Type':'type', 'Onset':'onset', 'Offset':'offset', 'Apex':'apex'}, inplace=True)
print('Data Columns:', codeFinal.columns) #Final data column
return codeFinal
def load_gt(dataset_name, expression_type, images, subjectsVideos, subjects, codeFinal):
dataset_expression_type = expression_type
if(dataset_name == 'SAMMLV' and expression_type=='micro-expression'):
dataset_expression_type = 'Micro - 1/2'
elif(dataset_name == 'SAMMLV' and expression_type=='macro-expression'):
dataset_expression_type = 'Macro'
vid_need = []
vid_count = 0
ground_truth = []
for sub_video_each_index, sub_vid_each in enumerate(subjectsVideos):
ground_truth.append([])
for videoIndex, videoCode in enumerate(sub_vid_each):
on_off = []
for i, row in codeFinal.iterrows():
if (row['subjectCode']==subjects[sub_video_each_index]): #S15, S16... for CAS(ME)^2, 001, 002... for SAMMLV
if (row['videoCode']==videoCode):
if (row['type']==dataset_expression_type): #Micro-expression or macro-expression
if (row['offset']==0): #Take apex if offset is 0
on_off.append([int(row['onset']-1), int(row['apex']-1)])
else:
if(dataset_expression_type!='Macro' or int(row['onset'])!=0): #Ignore the samples that is extremely long in SAMMLV
on_off.append([int(row['onset']-1), int(row['offset']-1)])
if(len(on_off)>0):
vid_need.append(vid_count) #To get the video that is needed
ground_truth[-1].append(on_off)
vid_count+=1
#Remove unused video
final_samples = []
final_videos = []
final_subjects = []
count = 0
for subjectIndex, subject in enumerate(ground_truth):
final_samples.append([])
final_videos.append([])
for samplesIndex, samples in enumerate(subject):
if (count in vid_need):
final_samples[-1].append(samples)
final_videos[-1].append(subjectsVideos[subjectIndex][samplesIndex])
final_subjects.append(subjects[subjectIndex])
count += 1
#Remove the empty data in array
final_subjects = np.unique(final_subjects)
final_videos = [ele for ele in final_videos if ele != []]
final_samples = [ele for ele in final_samples if ele != []]
final_images = [images[i] for i in vid_need]
print('Total Videos:', len(final_images))
return final_images, final_videos, final_subjects, final_samples
def cal_k(dataset_name, expression_type, final_samples):
samples = [samples for subjects in final_samples for videos in subjects for samples in videos]
total_duration = 0
for sample in samples:
total_duration += sample[1]-sample[0]
N=total_duration/len(samples)
k=int((N+1)/2)
print('k (Half of average length of expression) =', k)
return k
training.py
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import cv2
from skimage.util import random_noise
import random
from collections import Counter
from sklearn.model_selection import LeaveOneGroupOut
from scipy.signal import find_peaks
from Utils.mean_average_precision.mean_average_precision import MeanAveragePrecision2d
random.seed(1)
def pseudo_labeling(final_images, final_samples, k):
pseudo_y = []
video_count = 0
for subject in final_samples:
for video in subject:
samples_arr = []
if (len(video)==0):
pseudo_y.append([0 for i in range(len(final_images[video_count])-k)]) #Last k frames are ignored
else:
pseudo_y_each = [0]*(len(final_images[video_count])-k)
for ME in video:
samples_arr.append(np.arange(ME[0]+1, ME[1]+1))
for ground_truth_arr in samples_arr:
for index in range(len(pseudo_y_each)):
pseudo_arr = np.arange(index, index+k)
# Equivalent to if IoU>0 then y=1, else y=0
if (pseudo_y_each[index] < len(np.intersect1d(pseudo_arr, ground_truth_arr))/len(np.union1d(pseudo_arr, ground_truth_arr))):
pseudo_y_each[index] = 1
pseudo_y.append(pseudo_y_each)
video_count+=1
# Integrate all videos into one dataset
pseudo_y = [y for x in pseudo_y for y in x]
print('Total frames:', len(pseudo_y))
return pseudo_y
def loso(dataset, pseudo_y, final_images, final_samples, k):
#To split the dataset by subjects
y = np.array(pseudo_y)
videos_len = []
groupsLabel = y.copy()
prevIndex = 0
countVideos = 0
#Get total frames of each video
for video_index in range(len(final_images)):
videos_len.append(final_images[video_index].shape[0]-k)
print('Frame Index for each subject:-')
for video_index in range(len(final_samples)):
countVideos += len(final_samples[video_index])
index = sum(videos_len[:countVideos])
groupsLabel[prevIndex:index] = video_index
print('Subject', video_index, ':', prevIndex, '->', index)
prevIndex = index
X = [frame for video in dataset for frame in video]
print('\nTotal X:', len(X), ', Total y:', len(y))
return X, y, groupsLabel
def normalize(images):
for index in range(len(images)):
for channel in range(3):
images[index][:,:,channel] = cv2.normalize(images[index][:,:,channel], None, alpha=0, beta=1,norm_type=cv2.NORM_MINMAX)
return images
def generator(X, y, batch_size=12, epochs=1):
while True:
for start in range(0, len(X), batch_size):
end = min(start + batch_size, len(X))
num_images = end - start
X[start:end] = normalize(X[start:end])
u = np.array(X[start:end])[:,:,:,0].reshape(num_images,42,42,1)
v = np.array(X[start:end])[:,:,:,1].reshape(num_images,42,42,1)
os = np.array(X[start:end])[:,:,:,2].reshape(num_images,42,42,1)
yield [u, v, os], np.array(y[start:end])
def shuffling(X, y):
shuf = list(zip(X, y))
random.shuffle(shuf)
X, y = zip(*shuf)
return list(X), list(y)
def data_augmentation(X, y):
transformations = {
0: lambda image: np.fliplr(image),
1: lambda image: cv2.GaussianBlur(image, (7,7), 0),
2: lambda image: random_noise(image),
}
y1=y.copy()
for index, label in enumerate(y1):
if (label==1): #Only augment on expression samples (label=1)
for augment_type in range(3):
img_transformed = transformations[augment_type](X[index]).reshape(42,42,3)
X.append(np.array(img_transformed))
y.append(1)
return X, y
def SOFTNet():
inputs1 = layers.Input(shape=(42,42,1))
conv1 = layers.Conv2D(3, (5,5), padding='same', activation='relu')(inputs1)
pool1 = layers.MaxPooling2D(pool_size=(3, 3), strides=(3,3))(conv1)
# channel 2
inputs2 = layers.Input(shape=(42,42,1))
conv2 = layers.Conv2D(5, (5,5), padding='same', activation='relu')(inputs2)
pool2 = layers.MaxPooling2D(pool_size=(3, 3), strides=(3,3))(conv2)
# channel 3
inputs3 = layers.Input(shape=(42,42,1))
conv3 = layers.Conv2D(8, (5,5), padding='same', activation='relu')(inputs3)
pool3 = layers.MaxPooling2D(pool_size=(3, 3), strides=(3,3))(conv3)
# merge
merged = layers.Concatenate()([pool1, pool2, pool3])
# interpretation
merged_pool = layers.MaxPooling2D(pool_size=(2, 2), strides=(2,2))(merged)
flat = layers.Flatten()(merged_pool)
dense = layers.Dense(400, activation='relu')(flat)
outputs = layers.Dense(1, activation='linear')(dense)
#Takes input u,v,s
model = keras.models.Model(inputs=[inputs1, inputs2, inputs3], outputs=outputs)
# compile
sgd = keras.optimizers.SGD(lr=0.0005)
model.compile(loss="mse", optimizer=sgd, metrics=[tf.keras.metrics.MeanAbsoluteError()])
return model
def spotting(result, total_gt, final_samples, subject_count, dataset, k, metric_fn, p, show_plot):
prev=0
for videoIndex, video in enumerate(final_samples[subject_count-1]):
preds = []
gt = []
countVideo = len([video for subject in final_samples[:subject_count-1] for video in subject])
print('Video:', countVideo+videoIndex)
score_plot = np.array(result[prev:prev+len(dataset[countVideo+videoIndex])]) #Get related frames to each video
score_plot_agg = score_plot.copy()
#Score aggregation
for x in range(len(score_plot[k:-k])):
score_plot_agg[x+k] = score_plot[x:x+2*k].mean()
score_plot_agg = score_plot_agg[k:-k]
#Plot the result to see the peaks
#Note for some video the ground truth samples is below frame index 0 due to the effect of aggregation, but no impact to the evaluation
if(show_plot):
plt.figure(figsize=(15,4))
plt.plot(score_plot_agg)
plt.xlabel('Frame')
plt.ylabel('Score')
threshold = score_plot_agg.mean() + p * (max(score_plot_agg) - score_plot_agg.mean()) #Moilanen threshold technique
peaks, _ = find_peaks(score_plot_agg[:,0], height=threshold[0], distance=k)
if(len(peaks)==0): #Occurs when no peak is detected, simply give a value to pass the exception in mean_average_precision
preds.append([0, 0, 0, 0, 0, 0])
for peak in peaks:
preds.append([peak-k, 0, peak+k, 0, 0, 0]) #Extend left and right side of peak by k frames
for samples in video:
gt.append([samples[0]-k, 0, samples[1]-k, 0, 0, 0, 0])
total_gt += 1
if(show_plot):
plt.axvline(x=samples[0]-k, color='r')
plt.axvline(x=samples[1]-k+1, color='r')
plt.axhline(y=threshold, color='g')
if(show_plot):
plt.show()
prev += len(dataset[countVideo+videoIndex])
metric_fn.add(np.array(preds),np.array(gt)) #IoU = 0.5 according to MEGC2020 metrics
return preds, gt, total_gt
def evaluation(preds, gt, total_gt, metric_fn): #Get TP, FP, FN for final evaluation
TP = int(sum(metric_fn.value(iou_thresholds=0.5)[0.5][0]['tp']))
FP = int(sum(metric_fn.value(iou_thresholds=0.5)[0.5][0]['fp']))
FN = total_gt - TP
print('TP:', TP, 'FP:', FP, 'FN:', FN)
return TP, FP, FN
def training(X, y, groupsLabel, dataset_name, expression_type, final_samples, k, dataset, train, show_plot):
logo = LeaveOneGroupOut()
logo.get_n_splits(X, y, groupsLabel)
subject_count = 0
epochs = 10
batch_size = 12
total_gt = 0
metric_fn = MeanAveragePrecision2d(num_classes=1)
p = 0.55 #From our analysis, 0.55 achieved the highest F1-Score
model = SOFTNet()
weight_reset = model.get_weights() #Initial weights
for train_index, test_index in logo.split(X, y, groupsLabel): # Leave One Subject Out
subject_count+=1
print('Subject : ' + str(subject_count))
X_train, X_test = [X[i] for i in train_index], [X[i] for i in test_index] #Get training set
y_train, y_test = [y[i] for i in train_index], [y[i] for i in test_index] #Get testing set
print('------Initializing SOFTNet-------') #To reset the model at every LOSO testing
path = 'SOFTNet_Weights\\' + dataset_name + '\\' + expression_type + '\\s' + str(subject_count) + '.hdf5'
if(train):
#Downsampling non expression samples the dataset by 1/2 to reduce dataset bias
print('Dataset Labels', Counter(y_train))
unique, uni_count = np.unique(y_train, return_counts=True)
rem_count = int(uni_count.max()*1/2)
#Randomly remove non expression samples (With label 0) from dataset
rem_index = random.sample([index for index, i in enumerate(y_train) if i==0], rem_count)
rem_index += (index for index, i in enumerate(y_train) if i>0)
rem_index.sort()
X_train = [X_train[i] for i in rem_index]
y_train = [y_train[i] for i in rem_index]
print('After Downsampling Dataset Labels', Counter(y_train))
#Data augmentation to the micro-expression samples only
if (expression_type == 'micro-expression'):
X_train, y_train = data_augmentation(X_train, y_train)
print('After Augmentation Dataset Labels', Counter(y_train))
#Shuffle the training set
X_train, y_train = shuffling(X_train, y_train)
model.set_weights(weight_reset) #Reset weights to ensure the model does not have info about current subject
model.fit(
generator(X_train, y_train, batch_size, epochs),
steps_per_epoch = len(X_train)/batch_size,
epochs=epochs,
verbose=1,
validation_data = generator(X_test, y_test, batch_size),
validation_steps = len(X_test)/batch_size,
shuffle=True,
)
else:
model.load_weights(path) #Load Pretrained Weights
result = model.predict_generator(
generator(X_test, y_test, batch_size),
steps=len(X_test)/batch_size,
verbose=1
)
preds, gt, total_gt = spotting(result, total_gt, final_samples, subject_count, dataset, k, metric_fn, p, show_plot)
TP, FP, FN = evaluation(preds, gt, total_gt, metric_fn)
print('Done Subject', subject_count)
return TP, FP, FN, metric_fn
def final_evaluation(TP, FP, FN, metric_fn):
precision = TP/(TP+FP)
recall = TP/(TP+FN)
F1_score = (2 * precision * recall) / (precision + recall)
print('TP:', TP, 'FP:', FP, 'FN:', FN)
print('Precision = ', round(precision, 4))
print('Recall = ', round(recall, 4))
print('F1-Score = ', round(F1_score, 4))
print("COCO AP@[.5:.95]:", round(metric_fn.value(iou_thresholds=np.round(np.arange(0.5, 1.0, 0.05), 2), mpolicy='soft')['mAP'], 4))
# Result if Pre-trained weights are used, slightly different to the research paper
# Final Result for CASME_sq micro-expression
# TP: 18 FP: 327 FN: 39
# Precision = 0.0522
# Recall = 0.3158
# F1-Score = 0.0896
# COCO AP@[.5:.95]: 0.0069
# Final Result for CASME_sq macro-expression
# TP: 91 FP: 348 FN: 209
# Precision = 0.2073
# Recall = 0.3033
# F1-Score = 0.2463
# COCO AP@[.5:.95]: 0.0175
# Final Result for SAMMLV micro-expression
# TP: 41 FP: 323 FN: 118
# Precision = 0.1126
# Recall = 0.2579
# F1-Score = 0.1568
# COCO AP@[.5:.95]: 0.0092
# Final Result for SAMMLV macro-expression
# TP: 60 FP: 231 FN: 273
# Precision = 0.2062
# Recall = 0.1802
# F1-Score = 0.1923
# COCO AP@[.5:.95]: 0.0103