微表情检测(二)----SOFTNet(二)代码

requirements.txt
absl-py==0.11.0
astunparse==1.6.3
backcall==0.2.0
cachetools==4.2.1
certifi==2020.12.5
chardet==4.0.0
cloudpickle==1.6.0
colorama==0.4.4
cycler==0.10.0
decorator==4.4.2
dlib==19.21.1
flatbuffers==1.12
gast==0.3.3
google-auth==1.24.0
google-auth-oauthlib==0.4.2
google-pasta==0.2.0
grpcio==1.32.0
h5py==2.10.0
helpdev==0.7.1
idna==2.10
imageio==2.9.0
ipykernel==5.3.4
ipython==7.22.0
ipython-genutils==0.2.0
jedi==0.17.0
joblib==1.0.1
jupyter-client==6.1.12
jupyter-core==4.7.1
Keras-Preprocessing==1.1.2
kiwisolver==1.3.1
Markdown==3.3.3
matplotlib==3.3.2
natsort==7.1.1
networkx==2.5.1
numpy==1.19.5
oauthlib==3.1.0
opencv-contrib-python-headless==4.5.1.48
opencv-python-headless==4.5.1.48
opt-einsum==3.3.0
pandas==1.1.3
parso==0.8.2
pickleshare==0.7.5
Pillow==8.2.0
pip==21.1.2
prompt-toolkit==3.0.17
protobuf==3.17.3
ptyprocess==0.7.0
pyasn1==0.4.8
pyasn1-modules==0.2.8
Pygments==2.9.0
pyparsing==2.4.7
PyQtWebEngine==5.12.1
python-dateutil==2.8.1
pytz==2021.1
PyWavelets==1.1.1
pywin32==227
pyzmq==20.0.0
requests==2.25.1
requests-oauthlib==1.3.0
rsa==4.7
scikit-image==0.17.2
scikit-learn==0.23.2
scipy==1.6.3
setuptools==52.0.0
six==1.15.0
spyder-kernels==1.10.0
tensorboard==2.5.0
tensorboard-data-server==0.6.1
tensorboard-plugin-wit==1.8.0
tensorflow-estimator==2.4.0
tensorflow-gpu==2.4.1
termcolor==1.1.0
threadpoolctl==2.1.0
tifffile==2021.6.14
tornado==6.1
traitlets==5.0.5
typing-extensions==3.7.4.3
urllib3==1.26.3
wcwidth==0.2.5
Werkzeug==1.0.1
wheel==0.36.2
wincertstore==0.2
wrapt==1.12.1
xlrd==1.2.0
pip install -r requirements.txt
extraction_preprocess.py
import numpy as np
import pandas as pd
import cv2
import dlib

def pol2cart(rho, phi): #Convert polar coordinates to cartesian coordinates for computation of optical strain
    x = rho * np.cos(phi)
    y = rho * np.sin(phi)
    return (x, y)

def computeStrain(u, v):
    u_x= u - pd.DataFrame(u).shift(-1, axis=1)
    v_y= v - pd.DataFrame(v).shift(-1, axis=0)
    u_y= u - pd.DataFrame(u).shift(-1, axis=0)
    v_x= v - pd.DataFrame(v).shift(-1, axis=1)
    os = np.array(np.sqrt(u_x**2 + v_y**2 + 1/2 * (u_y+v_x)**2).ffill(1).ffill(0))
    return os

def extract_preprocess(final_images, k):
    predictor_model = "Utils\\shape_predictor_68_face_landmarks.dat"
    face_detector = dlib.get_frontal_face_detector()
    face_pose_predictor = dlib.shape_predictor(predictor_model)
    dataset = []
    for video in range(len(final_images)):
      OFF_video = []
      for img_count in range(final_images[video].shape[0]-k):
        img1 = final_images[video][img_count]
        img2 = final_images[video][img_count+k]
        if (img_count==0):
            reference_img = img1
            detect = face_detector(reference_img,1)
            next_img=0 #Loop through the frames until all the landmark is detected
            while (len(detect)==0):
                next_img+=1
                reference_img = final_images[video][img_count+next_img]
                detect = face_detector(reference_img,1)
            shape = face_pose_predictor(reference_img,detect[0])
            
            #Left Eye
            x11=max(shape.part(36).x - 15, 0)
            y11=shape.part(36).y 
            x12=shape.part(37).x 
            y12=max(shape.part(37).y - 15, 0)
            x13=shape.part(38).x 
            y13=max(shape.part(38).y - 15, 0)
            x14=min(shape.part(39).x + 15, 128)
            y14=shape.part(39).y 
            x15=shape.part(40).x 
            y15=min(shape.part(40).y + 15, 128)
            x16=shape.part(41).x 
            y16=min(shape.part(41).y + 15, 128)
            
            #Right Eye
            x21=max(shape.part(42).x - 15, 0)
            y21=shape.part(42).y 
            x22=shape.part(43).x 
            y22=max(shape.part(43).y - 15, 0)
            x23=shape.part(44).x 
            y23=max(shape.part(44).y - 15, 0)
            x24=min(shape.part(45).x + 15, 128)
            y24=shape.part(45).y 
            x25=shape.part(46).x 
            y25=min(shape.part(46).y + 15, 128)
            x26=shape.part(47).x 
            y26=min(shape.part(47).y + 15, 128)
            
            #ROI 1 (Left Eyebrow)
            x31=max(shape.part(17).x - 12, 0)
            y32=max(shape.part(19).y - 12, 0)
            x33=min(shape.part(21).x + 12, 128)
            y34=min(shape.part(41).y + 12, 128)
            
            #ROI 2 (Right Eyebrow)
            x41=max(shape.part(22).x - 12, 0)
            y42=max(shape.part(24).y - 12, 0)
            x43=min(shape.part(26).x + 12, 128)
            y44=min(shape.part(46).y + 12, 128)
            
            #ROI 3 #Mouth
            x51=max(shape.part(60).x - 12, 0)
            y52=max(shape.part(50).y - 12, 0)
            x53=min(shape.part(64).x + 12, 128)
            y54=min(shape.part(57).y + 12, 128)
            
            #Nose landmark
            x61=shape.part(28).x
            y61=shape.part(28).y
    
        #Compute Optical Flow Features
        # optical_flow = cv2.DualTVL1OpticalFlow_create() #Depends on cv2 version
        optical_flow = cv2.optflow.DualTVL1OpticalFlow_create()
        flow = optical_flow.calc(img1, img2, None)
        magnitude, angle = cv2.cartToPolar(flow[..., 0], flow[..., 1]) 
        u, v = pol2cart(magnitude, angle)
        os = computeStrain(u, v)
                
        #Features Concatenation into 128x128x3
        final = np.zeros((128, 128, 3))
        final[:,:,0] = u
        final[:,:,1] = v
        final[:,:,2] = os
        
        #Remove global head movement by minus nose region
        final[:, :, 0] = abs(final[:, :, 0] - final[y61-5:y61+6, x61-5:x61+6, 0].mean())
        final[:, :, 1] = abs(final[:, :, 1] - final[y61-5:y61+6, x61-5:x61+6, 1].mean())
        final[:, :, 2] = final[:, :, 2] - final[y61-5:y61+6, x61-5:x61+6, 2].mean()
        
        #Eye masking
        left_eye = [(x11, y11), (x12, y12), (x13, y13), (x14, y14), (x15, y15), (x16, y16)]
        right_eye = [(x21, y21), (x22, y22), (x23, y23), (x24, y24), (x25, y25), (x26, y26)]
        cv2.fillPoly(final, [np.array(left_eye)], 0)
        cv2.fillPoly(final, [np.array(right_eye)], 0)
        
        #ROI Selection -> Image resampling into 42x22x3
        final_image = np.zeros((42, 42, 3))
        final_image[:21, :, :] = cv2.resize(final[min(y32, y42) : max(y34, y44), x31:x43, :], (42, 21))
        final_image[21:42, :, :] = cv2.resize(final[y52:y54, x51:x53, :], (42, 21))
        OFF_video.append(final_image)
        
      dataset.append(OFF_video)
      print('Video', video, 'Done')
    print('All Done')
    return dataset
    
    
    
load_images.py
import os
import shutil
import glob
import natsort
import pickle
import dlib
import numpy as np
import cv2

def crop_images(dataset_name):
    face_detector = dlib.get_frontal_face_detector()
    if(dataset_name == 'CASME_sq'):
        # Save the images into folder 'rawpic_crop'
        for subjectName in glob.glob(dataset_name + '\\rawpic\\*'):
            dataset_rawpic = dataset_name + '\\rawpic\\' + str(subjectName.split('\\')[-1]) + '\\*'
    
            # Create new directory for 'rawpic_crop'
            dir_crop = dataset_name + '\\rawpic_crop\\'
            if os.path.exists(dir_crop)==False:
              os.mkdir(dir_crop)
    
            #Create new directory for each subject
            dir_crop_sub = dataset_name + '\\rawpic_crop\\' + str(subjectName.split('\\')[-1]) + '\\'
            if os.path.exists(dir_crop_sub):
              shutil.rmtree(dir_crop_sub)
            os.mkdir(dir_crop_sub)
            print('Subject', subjectName.split('\\')[-1])
            for vid in glob.glob(dataset_rawpic):
              dir_crop_sub_vid = dir_crop_sub + vid.split('\\')[-1] #Get dir of video
              if os.path.exists(dir_crop_sub_vid): 
                  shutil.rmtree(dir_crop_sub_vid)
              os.mkdir(dir_crop_sub_vid)
              for dir_crop_sub_vid_img in natsort.natsorted(glob.glob(vid+'\\img*.jpg')): #Read images
                img = dir_crop_sub_vid_img.split('\\')[-1]
                count = img[3:-4] #Get img num Ex 001,002,...,2021
                # Load the image
                image = cv2.imread(dir_crop_sub_vid_img)
                # Run the HOG face detector on the image data
                detected_faces = face_detector(image, 1)
    
                if (count == '001'): #Use first frame as reference frame
                    for face_rect in detected_faces:
                        face_top = face_rect.top()
                        face_bottom = face_rect.bottom()
                        face_left = face_rect.left()
                        face_right = face_rect.right()
    
                face = image[face_top:face_bottom, face_left:face_right] #Crop the face region
                face = cv2.resize(face, (128, 128)) #Resize to 128x128
    
                cv2.imwrite(dir_crop_sub_vid + "\\img{}.jpg".format(count), face)
    
        
    elif(dataset_name == 'SAMMLV'):
        if os.path.exists(dataset_name + '\\SAMM_longvideos_crop'): #Delete dir if exist and create new dir
          shutil.rmtree(dataset_name + '\\SAMM_longvideos_crop')
        os.mkdir(dataset_name + '\\SAMM_longvideos_crop')
    
        for vid in glob.glob(dataset_name + '\\SAMM_longvideos\\*'):
            count = 0
            dir_crop = dataset_name + '\\SAMM_longvideos_crop\\' + vid.split('\\')[-1]
    
            if os.path.exists(dir_crop): #Delete dir if exist and create new dir
              shutil.rmtree(dir_crop)
            os.mkdir(dir_crop)
            print('Video', vid.split('\\')[-1])
            for dir_crop_img in natsort.natsorted(glob.glob(vid+'\\*.jpg')):
                img = dir_crop_img.split('\\')[-1].split('.')[0]
                count = img[-4:] #Get img num Ex 0001,0002,...,2021
                # Load the image
                image = cv2.imread(dir_crop_img)
    
                # Run the HOG face detector on the image data
                detected_faces = face_detector(image, 1)
    
                # Loop through each face we found in the image
                if (count == '0001'): #Use first frame as reference frame
                    for i, face_rect in enumerate(detected_faces):
                        face_top = face_rect.top()
                        face_bottom = face_rect.bottom()
                        face_left = face_rect.left()
                        face_right = face_rect.right()
    
                face = image[face_top:face_bottom, face_left:face_right]
                face = cv2.resize(face, (128, 128)) 
    
                cv2.imwrite(dir_crop + "\\{}.jpg".format(count), face)
    
    
def load_images(dataset_name):
    images = []
    subjects = []
    subjectsVideos = []
    
    if(dataset_name == 'CASME_sq'):
        for i, dir_sub in enumerate(natsort.natsorted(glob.glob(dataset_name + "\\rawpic_crop\\*"))):
          print('Subject: ' + dir_sub.split('\\')[-1])
          subjects.append(dir_sub.split('\\')[-1])
          subjectsVideos.append([])
          for dir_sub_vid in natsort.natsorted(glob.glob(dir_sub + "\\*")):
            subjectsVideos[-1].append(dir_sub_vid.split('\\')[-1].split('_')[1][:4]) # Ex:'CASME_sq/rawpic_aligned/s15/15_0101disgustingteeth' -> '0101' 
            image = []
            for dir_sub_vid_img in natsort.natsorted(glob.glob(dir_sub_vid + "\\img*.jpg")):
              image.append(cv2.imread(dir_sub_vid_img, 0))
            images.append(np.array(image))
        
    elif(dataset_name == 'SAMMLV'):
        for i, dir_vid in enumerate(natsort.natsorted(glob.glob(dataset_name + "\\SAMM_longvideos_crop\\*"))):
          print('Subject: ' + dir_vid.split('\\')[-1].split('_')[0])
          subject = dir_vid.split('\\')[-1].split('_')[0]
          subjectVideo = dir_vid.split('\\')[-1]
          if (subject not in subjects): #Only append unique subject name
            subjects.append(subject)
            subjectsVideos.append([])
          subjectsVideos[-1].append(dir_vid.split('\\')[-1])
    
          image = []
          for dir_vid_img in natsort.natsorted(glob.glob(dir_vid + "\\*.jpg")):
            image.append(cv2.imread(dir_vid_img, 0))
          image = np.array(image)
          images.append(image)
    
    return images, subjects, subjectsVideos

def save_images_pkl(dataset_name, images, subjectsVideos, subjects):
    pickle.dump(images, open(dataset_name + "_images_crop.pkl", "wb") )
    pickle.dump(subjectsVideos, open(dataset_name + "_subjectsVideos_crop.pkl", "wb") )
    pickle.dump(subjects, open(dataset_name + "_subjects_crop.pkl", "wb") )

def load_images_pkl(dataset_name):
    images = pickle.load( open( dataset_name + "_images_crop.pkl", "rb" ) )
    subjectsVideos = pickle.load( open( dataset_name + "_subjectsVideos_crop.pkl", "rb" ) )
    subjects = pickle.load( open( dataset_name + "_subjects_crop.pkl", "rb" ) )
    return images, subjectsVideos, subjects



load_label.py
import numpy as np
import pandas as pd

def load_excel(dataset_name):
    if(dataset_name == 'CASME_sq'):
        xl = pd.ExcelFile(dataset_name + '/code_final.xlsx') #Specify directory of excel file
    
        colsName = ['subject', 'video', 'onset', 'apex', 'offset', 'au', 'emotion', 'type', 'selfReport']
        codeFinal = xl.parse(xl.sheet_names[0], header=None, names=colsName) #Get data
    
        videoNames = []
        for videoName in codeFinal.iloc[:,1]:
            videoNames.append(videoName.split('_')[0])
        codeFinal['videoName'] = videoNames
    
        naming1 = xl.parse(xl.sheet_names[2], header=None, converters={0: str})
        dictVideoName = dict(zip(naming1.iloc[:,1], naming1.iloc[:,0]))
        codeFinal['videoCode'] = [dictVideoName[i] for i in codeFinal['videoName']]
    
        naming2 = xl.parse(xl.sheet_names[1], header=None)
        dictSubject = dict(zip(naming2.iloc[:,2], naming2.iloc[:,1]))
        codeFinal['subjectCode'] = [dictSubject[i] for i in codeFinal['subject']]
        
    elif(dataset_name=='SAMMLV'):
        xl = pd.ExcelFile(dataset_name + '/SAMM_LongVideos_V2_Release.xlsx')
    
        colsName = ['Subject', 'Filename', 'Inducement Code', 'Onset', 'Apex', 'Offset', 'Duration', 'Type', 'Action Units', 'Notes']
        codeFinal = xl.parse(xl.sheet_names[0], header=None, names=colsName, skiprows=[0,1,2,3,4,5,6,7,8,9])
    
        videoNames = []
        subjectName = []
        for videoName in codeFinal.iloc[:,1]:
            videoNames.append(str(videoName).split('_')[0] + '_' + str(videoName).split('_')[1])
            subjectName.append(str(videoName).split('_')[0])
        codeFinal['videoCode'] = videoNames
        codeFinal['subjectCode'] = subjectName
        #Synchronize the columns name with CAS(ME)^2
        codeFinal.rename(columns={'Type':'type', 'Onset':'onset', 'Offset':'offset', 'Apex':'apex'}, inplace=True) 
        print('Data Columns:', codeFinal.columns) #Final data column
    return codeFinal
    
def load_gt(dataset_name, expression_type, images, subjectsVideos, subjects, codeFinal):
    dataset_expression_type = expression_type
    if(dataset_name == 'SAMMLV' and expression_type=='micro-expression'):
        dataset_expression_type = 'Micro - 1/2'
    elif(dataset_name == 'SAMMLV' and expression_type=='macro-expression'):
        dataset_expression_type = 'Macro'
        
    vid_need = []
    vid_count = 0
    ground_truth = []
    for sub_video_each_index, sub_vid_each in enumerate(subjectsVideos):
        ground_truth.append([])
        for videoIndex, videoCode in enumerate(sub_vid_each):
            on_off = []
            for i, row in codeFinal.iterrows():
                if (row['subjectCode']==subjects[sub_video_each_index]): #S15, S16... for CAS(ME)^2, 001, 002... for SAMMLV
                    if (row['videoCode']==videoCode):
                        if (row['type']==dataset_expression_type): #Micro-expression or macro-expression
                            if (row['offset']==0): #Take apex if offset is 0
                                on_off.append([int(row['onset']-1), int(row['apex']-1)])
                            else:
                                if(dataset_expression_type!='Macro' or int(row['onset'])!=0): #Ignore the samples that is extremely long in SAMMLV
                                    on_off.append([int(row['onset']-1), int(row['offset']-1)])
            if(len(on_off)>0):
                vid_need.append(vid_count) #To get the video that is needed
            ground_truth[-1].append(on_off) 
            vid_count+=1
    
    #Remove unused video
    final_samples = []
    final_videos = []
    final_subjects = []
    count = 0
    for subjectIndex, subject in enumerate(ground_truth):
        final_samples.append([])
        final_videos.append([])
        for samplesIndex, samples in enumerate(subject):
            if (count in vid_need):
                final_samples[-1].append(samples)
                final_videos[-1].append(subjectsVideos[subjectIndex][samplesIndex])
                final_subjects.append(subjects[subjectIndex])
            count += 1
    
    #Remove the empty data in array
    final_subjects = np.unique(final_subjects)
    final_videos = [ele for ele in final_videos if ele != []]
    final_samples = [ele for ele in final_samples if ele != []]
    final_images = [images[i] for i in vid_need]
    print('Total Videos:', len(final_images))
    return final_images, final_videos, final_subjects, final_samples
        
def cal_k(dataset_name, expression_type, final_samples):
    samples = [samples for subjects in final_samples for videos in subjects for samples in videos]
    total_duration = 0
    for sample in samples:
        total_duration += sample[1]-sample[0]
    N=total_duration/len(samples)
    k=int((N+1)/2)
    print('k (Half of average length of expression) =', k)
    return k
main.py
import numpy as np
import pandas as pd

def load_excel(dataset_name):
    if(dataset_name == 'CASME_sq'):
        xl = pd.ExcelFile(dataset_name + '/code_final.xlsx') #Specify directory of excel file
    
        colsName = ['subject', 'video', 'onset', 'apex', 'offset', 'au', 'emotion', 'type', 'selfReport']
        codeFinal = xl.parse(xl.sheet_names[0], header=None, names=colsName) #Get data
    
        videoNames = []
        for videoName in codeFinal.iloc[:,1]:
            videoNames.append(videoName.split('_')[0])
        codeFinal['videoName'] = videoNames
    
        naming1 = xl.parse(xl.sheet_names[2], header=None, converters={0: str})
        dictVideoName = dict(zip(naming1.iloc[:,1], naming1.iloc[:,0]))
        codeFinal['videoCode'] = [dictVideoName[i] for i in codeFinal['videoName']]
    
        naming2 = xl.parse(xl.sheet_names[1], header=None)
        dictSubject = dict(zip(naming2.iloc[:,2], naming2.iloc[:,1]))
        codeFinal['subjectCode'] = [dictSubject[i] for i in codeFinal['subject']]
        
    elif(dataset_name=='SAMMLV'):
        xl = pd.ExcelFile(dataset_name + '/SAMM_LongVideos_V2_Release.xlsx')
    
        colsName = ['Subject', 'Filename', 'Inducement Code', 'Onset', 'Apex', 'Offset', 'Duration', 'Type', 'Action Units', 'Notes']
        codeFinal = xl.parse(xl.sheet_names[0], header=None, names=colsName, skiprows=[0,1,2,3,4,5,6,7,8,9])
    
        videoNames = []
        subjectName = []
        for videoName in codeFinal.iloc[:,1]:
            videoNames.append(str(videoName).split('_')[0] + '_' + str(videoName).split('_')[1])
            subjectName.append(str(videoName).split('_')[0])
        codeFinal['videoCode'] = videoNames
        codeFinal['subjectCode'] = subjectName
        #Synchronize the columns name with CAS(ME)^2
        codeFinal.rename(columns={'Type':'type', 'Onset':'onset', 'Offset':'offset', 'Apex':'apex'}, inplace=True) 
        print('Data Columns:', codeFinal.columns) #Final data column
    return codeFinal
    
def load_gt(dataset_name, expression_type, images, subjectsVideos, subjects, codeFinal):
    dataset_expression_type = expression_type
    if(dataset_name == 'SAMMLV' and expression_type=='micro-expression'):
        dataset_expression_type = 'Micro - 1/2'
    elif(dataset_name == 'SAMMLV' and expression_type=='macro-expression'):
        dataset_expression_type = 'Macro'
        
    vid_need = []
    vid_count = 0
    ground_truth = []
    for sub_video_each_index, sub_vid_each in enumerate(subjectsVideos):
        ground_truth.append([])
        for videoIndex, videoCode in enumerate(sub_vid_each):
            on_off = []
            for i, row in codeFinal.iterrows():
                if (row['subjectCode']==subjects[sub_video_each_index]): #S15, S16... for CAS(ME)^2, 001, 002... for SAMMLV
                    if (row['videoCode']==videoCode):
                        if (row['type']==dataset_expression_type): #Micro-expression or macro-expression
                            if (row['offset']==0): #Take apex if offset is 0
                                on_off.append([int(row['onset']-1), int(row['apex']-1)])
                            else:
                                if(dataset_expression_type!='Macro' or int(row['onset'])!=0): #Ignore the samples that is extremely long in SAMMLV
                                    on_off.append([int(row['onset']-1), int(row['offset']-1)])
            if(len(on_off)>0):
                vid_need.append(vid_count) #To get the video that is needed
            ground_truth[-1].append(on_off) 
            vid_count+=1
    
    #Remove unused video
    final_samples = []
    final_videos = []
    final_subjects = []
    count = 0
    for subjectIndex, subject in enumerate(ground_truth):
        final_samples.append([])
        final_videos.append([])
        for samplesIndex, samples in enumerate(subject):
            if (count in vid_need):
                final_samples[-1].append(samples)
                final_videos[-1].append(subjectsVideos[subjectIndex][samplesIndex])
                final_subjects.append(subjects[subjectIndex])
            count += 1
    
    #Remove the empty data in array
    final_subjects = np.unique(final_subjects)
    final_videos = [ele for ele in final_videos if ele != []]
    final_samples = [ele for ele in final_samples if ele != []]
    final_images = [images[i] for i in vid_need]
    print('Total Videos:', len(final_images))
    return final_images, final_videos, final_subjects, final_samples
        
def cal_k(dataset_name, expression_type, final_samples):
    samples = [samples for subjects in final_samples for videos in subjects for samples in videos]
    total_duration = 0
    for sample in samples:
        total_duration += sample[1]-sample[0]
    N=total_duration/len(samples)
    k=int((N+1)/2)
    print('k (Half of average length of expression) =', k)
    return k
training.py
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import cv2
from skimage.util import random_noise
import random
from collections import Counter
from sklearn.model_selection import LeaveOneGroupOut
from scipy.signal import find_peaks
from Utils.mean_average_precision.mean_average_precision import MeanAveragePrecision2d
random.seed(1)

def pseudo_labeling(final_images, final_samples, k):
    pseudo_y = []
    video_count = 0 
    
    for subject in final_samples:
        for video in subject:
            samples_arr = []
            if (len(video)==0):
                pseudo_y.append([0 for i in range(len(final_images[video_count])-k)]) #Last k frames are ignored
            else:
                pseudo_y_each = [0]*(len(final_images[video_count])-k)
                for ME in video:
                    samples_arr.append(np.arange(ME[0]+1, ME[1]+1))
                for ground_truth_arr in samples_arr: 
                    for index in range(len(pseudo_y_each)):
                        pseudo_arr = np.arange(index, index+k) 
                        # Equivalent to if IoU>0 then y=1, else y=0
                        if (pseudo_y_each[index] < len(np.intersect1d(pseudo_arr, ground_truth_arr))/len(np.union1d(pseudo_arr, ground_truth_arr))):
                            pseudo_y_each[index] = 1 
                pseudo_y.append(pseudo_y_each)
            video_count+=1
    
    # Integrate all videos into one dataset
    pseudo_y = [y for x in pseudo_y for y in x]
    print('Total frames:', len(pseudo_y))
    return pseudo_y
    
def loso(dataset, pseudo_y, final_images, final_samples, k):
    #To split the dataset by subjects
    y = np.array(pseudo_y)
    videos_len = []
    groupsLabel = y.copy()
    prevIndex = 0
    countVideos = 0
    
    #Get total frames of each video
    for video_index in range(len(final_images)):
      videos_len.append(final_images[video_index].shape[0]-k)
    
    print('Frame Index for each subject:-')
    for video_index in range(len(final_samples)):
      countVideos += len(final_samples[video_index])
      index = sum(videos_len[:countVideos])
      groupsLabel[prevIndex:index] = video_index
      print('Subject', video_index, ':', prevIndex, '->', index)
      prevIndex = index
    
    X = [frame for video in dataset for frame in video]
    print('\nTotal X:', len(X), ', Total y:', len(y))
    return X, y, groupsLabel
    
def normalize(images):
    for index in range(len(images)):
        for channel in range(3):
            images[index][:,:,channel] = cv2.normalize(images[index][:,:,channel], None, alpha=0, beta=1,norm_type=cv2.NORM_MINMAX)
    return images

def generator(X, y, batch_size=12, epochs=1):
    while True:
        for start in range(0, len(X), batch_size):
            end = min(start + batch_size, len(X))
            num_images = end - start
            X[start:end] = normalize(X[start:end])
            u = np.array(X[start:end])[:,:,:,0].reshape(num_images,42,42,1)
            v = np.array(X[start:end])[:,:,:,1].reshape(num_images,42,42,1)
            os = np.array(X[start:end])[:,:,:,2].reshape(num_images,42,42,1)
            yield [u, v, os], np.array(y[start:end])
            
def shuffling(X, y):
    shuf = list(zip(X, y))
    random.shuffle(shuf)
    X, y = zip(*shuf)
    return list(X), list(y)

def data_augmentation(X, y):
    transformations = {
        0: lambda image: np.fliplr(image), 
        1: lambda image: cv2.GaussianBlur(image, (7,7), 0),
        2: lambda image: random_noise(image),
    }
    y1=y.copy()
    for index, label in enumerate(y1):
        if (label==1): #Only augment on expression samples (label=1)
            for augment_type in range(3):
                img_transformed = transformations[augment_type](X[index]).reshape(42,42,3)
                X.append(np.array(img_transformed))
                y.append(1)
    return X, y

def SOFTNet():
    inputs1 = layers.Input(shape=(42,42,1))
    conv1 = layers.Conv2D(3, (5,5), padding='same', activation='relu')(inputs1)
    pool1 = layers.MaxPooling2D(pool_size=(3, 3), strides=(3,3))(conv1)
    # channel 2
    inputs2 = layers.Input(shape=(42,42,1))
    conv2 = layers.Conv2D(5, (5,5), padding='same', activation='relu')(inputs2)
    pool2 = layers.MaxPooling2D(pool_size=(3, 3), strides=(3,3))(conv2)
    # channel 3
    inputs3 = layers.Input(shape=(42,42,1))
    conv3 = layers.Conv2D(8, (5,5), padding='same', activation='relu')(inputs3)
    pool3 = layers.MaxPooling2D(pool_size=(3, 3), strides=(3,3))(conv3)
    # merge
    merged = layers.Concatenate()([pool1, pool2, pool3])
    # interpretation
    merged_pool = layers.MaxPooling2D(pool_size=(2, 2), strides=(2,2))(merged)
    flat = layers.Flatten()(merged_pool)
    dense = layers.Dense(400, activation='relu')(flat)
    outputs = layers.Dense(1, activation='linear')(dense)
    #Takes input u,v,s
    model = keras.models.Model(inputs=[inputs1, inputs2, inputs3], outputs=outputs)
    # compile
    sgd = keras.optimizers.SGD(lr=0.0005)
    model.compile(loss="mse", optimizer=sgd, metrics=[tf.keras.metrics.MeanAbsoluteError()])
    return model

def spotting(result, total_gt, final_samples, subject_count, dataset, k, metric_fn, p, show_plot):
    prev=0
    for videoIndex, video in enumerate(final_samples[subject_count-1]):
        preds = []
        gt = []
        countVideo = len([video for subject in final_samples[:subject_count-1] for video in subject])
        print('Video:', countVideo+videoIndex)
        score_plot = np.array(result[prev:prev+len(dataset[countVideo+videoIndex])]) #Get related frames to each video
        score_plot_agg = score_plot.copy()
        
        #Score aggregation
        for x in range(len(score_plot[k:-k])):
            score_plot_agg[x+k] = score_plot[x:x+2*k].mean()
        score_plot_agg = score_plot_agg[k:-k]
        
        #Plot the result to see the peaks
        #Note for some video the ground truth samples is below frame index 0 due to the effect of aggregation, but no impact to the evaluation
        if(show_plot):
            plt.figure(figsize=(15,4))
            plt.plot(score_plot_agg) 
            plt.xlabel('Frame')
            plt.ylabel('Score')
        threshold = score_plot_agg.mean() + p * (max(score_plot_agg) - score_plot_agg.mean()) #Moilanen threshold technique
        peaks, _ = find_peaks(score_plot_agg[:,0], height=threshold[0], distance=k)
        if(len(peaks)==0): #Occurs when no peak is detected, simply give a value to pass the exception in mean_average_precision
            preds.append([0, 0, 0, 0, 0, 0]) 
        for peak in peaks:
            preds.append([peak-k, 0, peak+k, 0, 0, 0]) #Extend left and right side of peak by k frames
        for samples in video:
            gt.append([samples[0]-k, 0, samples[1]-k, 0, 0, 0, 0])
            total_gt += 1
            if(show_plot):
                plt.axvline(x=samples[0]-k, color='r')
                plt.axvline(x=samples[1]-k+1, color='r')
                plt.axhline(y=threshold, color='g')
        if(show_plot):
            plt.show()
        prev += len(dataset[countVideo+videoIndex])
        metric_fn.add(np.array(preds),np.array(gt)) #IoU = 0.5 according to MEGC2020 metrics
    return preds, gt, total_gt
        
def evaluation(preds, gt, total_gt, metric_fn): #Get TP, FP, FN for final evaluation
    TP = int(sum(metric_fn.value(iou_thresholds=0.5)[0.5][0]['tp'])) 
    FP = int(sum(metric_fn.value(iou_thresholds=0.5)[0.5][0]['fp']))
    FN = total_gt - TP
    print('TP:', TP, 'FP:', FP, 'FN:', FN)
    return TP, FP, FN

def training(X, y, groupsLabel, dataset_name, expression_type, final_samples, k, dataset, train, show_plot):
    logo = LeaveOneGroupOut()
    logo.get_n_splits(X, y, groupsLabel)
    subject_count = 0
    epochs = 10
    batch_size = 12
    total_gt = 0
    metric_fn = MeanAveragePrecision2d(num_classes=1)
    p = 0.55 #From our analysis, 0.55 achieved the highest F1-Score
    model = SOFTNet()
    weight_reset = model.get_weights() #Initial weights
    
    for train_index, test_index in logo.split(X, y, groupsLabel): # Leave One Subject Out
        subject_count+=1
        print('Subject : ' + str(subject_count))
        
        X_train, X_test = [X[i] for i in train_index], [X[i] for i in test_index] #Get training set
        y_train, y_test = [y[i] for i in train_index], [y[i] for i in test_index] #Get testing set
        
        print('------Initializing SOFTNet-------') #To reset the model at every LOSO testing
        
        path = 'SOFTNet_Weights\\' + dataset_name + '\\' + expression_type + '\\s' + str(subject_count) + '.hdf5'
        if(train):
            #Downsampling non expression samples the dataset by 1/2 to reduce dataset bias 
            print('Dataset Labels', Counter(y_train))
            unique, uni_count = np.unique(y_train, return_counts=True) 
            rem_count = int(uni_count.max()*1/2)
            
            
            #Randomly remove non expression samples (With label 0) from dataset
            rem_index = random.sample([index for index, i in enumerate(y_train) if i==0], rem_count) 
            rem_index += (index for index, i in enumerate(y_train) if i>0)
            rem_index.sort()
            X_train = [X_train[i] for i in rem_index]
            y_train = [y_train[i] for i in rem_index]
            print('After Downsampling Dataset Labels', Counter(y_train))
            
            #Data augmentation to the micro-expression samples only
            if (expression_type == 'micro-expression'):
                X_train, y_train = data_augmentation(X_train, y_train)
                print('After Augmentation Dataset Labels', Counter(y_train))
                
            #Shuffle the training set
            X_train, y_train = shuffling(X_train, y_train)
            model.set_weights(weight_reset) #Reset weights to ensure the model does not have info about current subject
            model.fit(
                generator(X_train, y_train, batch_size, epochs),
                steps_per_epoch = len(X_train)/batch_size,
                epochs=epochs,
                verbose=1,
                validation_data = generator(X_test, y_test, batch_size),
                validation_steps = len(X_test)/batch_size,
                shuffle=True,
            )
        else:
            model.load_weights(path)  #Load Pretrained Weights
        
        result = model.predict_generator(
            generator(X_test, y_test, batch_size),
            steps=len(X_test)/batch_size,
            verbose=1
        )
        
        preds, gt, total_gt = spotting(result, total_gt, final_samples, subject_count, dataset, k, metric_fn, p, show_plot)
        TP, FP, FN = evaluation(preds, gt, total_gt, metric_fn)
        
        print('Done Subject', subject_count)
    return TP, FP, FN, metric_fn

def final_evaluation(TP, FP, FN, metric_fn):
    precision = TP/(TP+FP)
    recall = TP/(TP+FN)
    F1_score = (2 * precision * recall) / (precision + recall)
    
    print('TP:', TP, 'FP:', FP, 'FN:', FN)
    print('Precision = ', round(precision, 4))
    print('Recall = ', round(recall, 4))
    print('F1-Score = ', round(F1_score, 4))
    print("COCO AP@[.5:.95]:", round(metric_fn.value(iou_thresholds=np.round(np.arange(0.5, 1.0, 0.05), 2), mpolicy='soft')['mAP'], 4))
    
    
# Result if Pre-trained weights are used, slightly different to the research paper

# Final Result for CASME_sq micro-expression
# TP: 18 FP: 327 FN: 39
# Precision =  0.0522
# Recall =  0.3158
# F1-Score =  0.0896
# COCO AP@[.5:.95]: 0.0069

# Final Result for CASME_sq macro-expression
# TP: 91 FP: 348 FN: 209
# Precision =  0.2073
# Recall =  0.3033
# F1-Score =  0.2463
# COCO AP@[.5:.95]: 0.0175

# Final Result for SAMMLV micro-expression
# TP: 41 FP: 323 FN: 118
# Precision =  0.1126
# Recall =  0.2579
# F1-Score =  0.1568
# COCO AP@[.5:.95]: 0.0092

# Final Result for SAMMLV macro-expression
# TP: 60 FP: 231 FN: 273
# Precision =  0.2062
# Recall =  0.1802
# F1-Score =  0.1923
# COCO AP@[.5:.95]: 0.0103

  • 8
    点赞
  • 11
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
Softnet packets(软身网络包)是一种网络传输中的数据包,用于在软件层面对网络数据进行处理和传输。Softnet packets的概念是建立在软件定义网络(Software Defined Networking,SDN)的基础上的,即将网络功能和控制从传统的硬件设备中分离出来,通过软件来实现对网络的管理和控制。 Softnet packets的处理方式相对于传统的硬件处理方式更加灵活和可编程。它可以通过编写相应的软件程序来定义和修改网络流量的路由、转发、过滤等行为,满足不同的网络需求和应用场景。与此同时,Softnet packets还可以根据网络负载和性能的优化策略,动态地分配网络资源,提高网络的吞吐量和性能。 Softnet packets的优势在于其灵活性和可扩展性。通过软件定义的方式,网络管理人员可以更加精确地配置和管理网络流量,实现对网络的精细化控制。同时,Softnet packets的虚拟化特性也使得网络资源的使用更加高效,节约了成本和能源。此外,软件定义的网络还可以与其他技术和系统相结合,例如网络功能虚拟化(Network Function Virtualization,NFV)和云计算,实现更加智能和高效的网络管理和应用部署。 总之,Softnet packets是一种通过软件定义网络来处理和传输数据的方式,具有灵活性、可编程性和可扩展性的优势。它可以实现对网络流量的精细化控制,优化网络资源的使用,提高网络性能和吞吐量,为网络管理和应用部署带来了更大的便利和效益。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值