基于RGB信号的非接触式血氧计算方法
注意:本方法是对使用普通摄像头进行血氧估计的一种尝试,会不断进行优化,希望和该领域小伙伴共同进步
一、数据预处理
为了放大视频中微小但重要的颜色变化信号,这里我们使用了EVM(Eulerian Video Magnification)算法
算法原理参考
完整代码如下:
import cv2
import numpy as np
import scipy.signal as signal
import scipy.fftpack as fftpack
#convert RBG to YIQ
#RGB到YIQ的转换和反向转换
#这两部分代码将RGB图像转换为YIQ颜色空间,并提供了反向转换的功能。
def rgb2ntsc(src):
[rows,cols]=src.shape[:2]
dst=np.zeros((rows,cols,3),dtype=np.float64)
T = np.array([[0.114, 0.587, 0.298], [-0.321, -0.275, 0.596], [0.311, -0.528, 0.212]])
for i in range(rows):
for j in range(cols):
dst[i, j]=np.dot(T,src[i,j])
return dst
#convert YIQ to RBG
def ntsc2rbg(src):
[rows, cols] = src.shape[:2]
dst=np.zeros((rows,cols,3),dtype=np.float64)
T = np.array([[1, -1.108, 1.705], [1, -0.272, -0.647], [1, 0.956, 0.620]])
for i in range(rows):
for j in range(cols):
dst[i, j]=np.dot(T,src[i,j])
return dst
#构建高斯金字塔和拉普拉斯金字塔
#高斯金字塔用于视频缩放,拉普拉斯金字塔用于视频细节增强。
#Build Gaussian Pyramid
def build_gaussian_pyramid(src,level=3):
s=src.copy()
pyramid=[s]
for i in range(level):
s=cv2.pyrDown(s)
pyramid.append(s)
return pyramid
#Build Laplacian Pyramid
def build_laplacian_pyramid(src,levels=3):
gaussianPyramid = build_gaussian_pyramid(src, levels)
pyramid=[]
for i in range(levels,0,-1):
GE=cv2.pyrUp(gaussianPyramid[i])
L=cv2.subtract(gaussianPyramid[i-1],GE)
pyramid.append(L)
return pyramid
#该函数从文件中加载视频,并将其存储为四维数组。
#load video from file
def load_video(video_filename):
cap=cv2.VideoCapture(video_filename)
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
width, height = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS))
video_tensor=np.zeros((frame_count,height,width,3),dtype='float')
x=0
while cap.isOpened():
ret,frame=cap.read()
if ret is True:
video_tensor[x]=frame
x+=1
else:
break
return video_tensor,fps
#该函数应用傅里叶变换对视频数据进行时域滤波。
# apply temporal ideal bandpass filter to gaussian video
def temporal_ideal_filter(tensor,low,high,fps,axis=0):
fft=fftpack.fft(tensor,axis=axis)
frequencies = fftpack.fftfreq(tensor.shape[0], d=1.0 / fps)
bound_low = (np.abs(frequencies - low)).argmin()
bound_high = (np.abs(frequencies - high)).argmin()
fft[:bound_low] = 0
fft[bound_high:-bound_high] = 0
fft[-bound_low:] = 0
iff=fftpack.ifft(fft, axis=axis)
return np.abs(iff)
#构建高斯金字塔,并应用放大操作。
# build gaussian pyramid for video
def gaussian_video(video_tensor,levels=3):
for i in range(0,video_tensor.shape[0]):
frame=video_tensor[i]
pyr=build_gaussian_pyramid(frame,level=levels)
gaussian_frame=pyr[-1]
if i==0:
vid_data=np.zeros((video_tensor.shape[0],gaussian_frame.shape[0],gaussian_frame.shape[1],3))
vid_data[i]=gaussian_frame
return vid_data
#amplify the video
def amplify_video(gaussian_vid,amplification=50):
return gaussian_vid*amplification
#reconstract video from original video and gaussian video
def reconstract_video(amp_video,origin_video,levels=3):
final_video=np.zeros(origin_video.shape)
for i in range(0,amp_video.shape[0]):
img = amp_video[i]
for x in range(levels):
img=cv2.pyrUp(img)
img=img+origin_video[i]
final_video[i]=img
return final_video
#save video to files
def save_video(video_name,video_tensor):
fourcc = cv2.VideoWriter_fourcc('M','J','P','G')
[height,width]=video_tensor[0].shape[0:2]
writer = cv2.VideoWriter("%s_out.avi"%video_name[:-4], fourcc, 30, (width, height), 1)
for i in range(0,video_tensor.shape[0]):
writer.write(cv2.convertScaleAbs(video_tensor[i]))
writer.release()
#magnify color
def magnify_color(video_name,low,high,levels=3,amplification=20):
t,f=load_video(video_name)
gau_video=gaussian_video(t,levels=levels)
filtered_tensor=temporal_ideal_filter(gau_video,low,high,f)
amplified_video=amplify_video(filtered_tensor,amplification=amplification)
final=reconstract_video(amplified_video,t,levels=3)
save_video(video_name,final)
#build laplacian pyramid for video
def laplacian_video(video_tensor,levels=3):
tensor_list=[]
for i in range(0,video_tensor.shape[0]):
frame=video_tensor[i]
pyr=build_laplacian_pyramid(frame,levels=levels)
if i==0:
for k in range(levels):
tensor_list.append(np.zeros((video_tensor.shape[0],pyr[k].shape[0],pyr[k].shape[1],3)))
for n in range(levels):
tensor_list[n][i] = pyr[n]
return tensor_list
#butterworth bandpass filter
def butter_bandpass_filter(data, lowcut, highcut, fs, order=5):
omega = 0.5 * fs
low = lowcut / omega
high = highcut / omega
b, a = signal.butter(order, [low, high], btype='band')
y = signal.lfilter(b, a, data, axis=0)
return y
#reconstract video from laplacian pyramid
def reconstract_from_tensorlist(filter_tensor_list,levels=3):
final=np.zeros(filter_tensor_list[-1].shape)
for i in range(filter_tensor_list[0].shape[0]):
up = filter_tensor_list[0][i]
for n in range(levels-1):
up=cv2.pyrUp(up)+filter_tensor_list[n + 1][i]#可以改为up=cv2.pyrUp(up)
final[i]=up
return final
#manify motion
def magnify_motion(video_name,low,high,levels=3,amplification=20):
t,f=load_video(video_name)
lap_video_list=laplacian_video(t,levels=levels)
filter_tensor_list=[]
for i in range(levels):
filter_tensor=butter_bandpass_filter(lap_video_list[i],low,high,f)
filter_tensor*=amplification
filter_tensor_list.append(filter_tensor)
recon=reconstract_from_tensorlist(filter_tensor_list)
final=t+recon
save_video(video_name,final)
if __name__=="__main__":
# magnify_color("baby.mp4",0.4,3)
# magnify_motion("./guitar.mp4",0.4,3)
magnify_color("row.mp4", 0.4, 3)
二、网络模型搭建
模型受VIT模型启发,本模型主要使用transformer提取视屏放大后的面部视频序列中的隐含特征信息。
结构如下:
模型摘要如下:
结果
在总体趋势的拟合上还是差点意思🤣