最终实现的形式
下面展示一些 内联代码片
。
# -*- coding: utf-8 -*-
import cv2
import pandas as pd
import os
import time
import wave
import numpy as np
import pickle
import nextpow2
from sklearn import preprocessing
from moviepy.editor import *
import matplotlib.pyplot as plt
import matplotlib
matplotlib.use('TkAgg')
def video_sound(dir):
video = VideoFileClip(dir)
audio = video.audio
audio.write_audiofile(dir_out)
def extract_sound_from_videos(dir_out):
fw = wave.open(dir_out, 'r')
soundInfo = fw.readframes(-1)
soundInfo = np.fromstring(soundInfo, np.int16)
f = fw.getframerate()
fs = f
x = soundInfo
len_ = int(fs / 14.5) # 样本中帧的大小
PERC = 5 # 窗口重叠占帧的百分比
len1 = len_ * PERC // 100 # 重叠窗口
len2 = len_ - len1 # 非重叠窗口
# 初始化汉明窗
win = np.hamming(len_)
# Noise magnitude calculations - assuming that the first 5 frames is noise/silence
nFFT = 2 * 2 ** (nextpow2.nextpow2(len_))
j = 0
ki = 1
img = 1j
Nframes = len(x) // len2 - 1
Low_Power_M = []
Low_Power_S = []
Middle_Power_M = []
Middle_Power_S = []
High_Power_M = []
High_Power_S = []
for i in range(0, int(Nframes)):
# Windowing
insign = win * x[ki - 1:ki + len_ - 1] #
plt.subplot(111)
spectrum, freqs, ts, fig = plt.specgram(insign, Fs=f, scale_by_freq=True, sides='default')
low_power = spectrum[0:43]
low_power_mean = '%.4f' %(np.mean(low_power))
low_power_std = '%.4f' %(np.std(low_power))
Low_Power_M.append(low_power_mean)
Low_Power_S.append(low_power_std)
middle_power = spectrum[43:86]
middle_power_mean ='%.4f' % (np.mean(middle_power))
middle_power_std = '%.4f' % (np.std(middle_power))
Middle_Power_M.append(middle_power_mean)
Middle_Power_S.append(middle_power_std)
high_power = spectrum[86:129]
high_power_mean = '%.4f' % (np.mean(high_power))
high_power_std = '%.4f' % (np.std(high_power))
High_Power_M.append(high_power_mean)
High_Power_S.append(high_power_std)
ki = ki + len2
fw.close()
return Low_Power_M,Low_Power_S,Middle_Power_M, Middle_Power_S, High_Power_M,High_Power_S
def feature_unite():
Video_feature = []
print('image processing ...')
Image = extract_colors_from_videos(dir) #传入图像特征值
print('sound processing ...')
Low_M, Low_S, Middle_M, Middle_S, High_M, High_S = extract_sound_from_videos(dir_out) #传入声音特征值
#重新组成新的列 video_Feanture[Image,Sound]
for i in range(len(Image)):
video_feature = []
video_feature.append(Image[i][0]);video_feature.append(Image[i][1]);video_feature.append(Image[i][2]);video_feature.append(Image[i][3]);video_feature.append(Image[i][4])
video_feature.append(Low_M[i])
video_feature.append(Low_S[i])
video_feature.append(Middle_M[i])
video_feature.append(Middle_S[i])
video_feature.append(High_M[i])
video_feature.append(High_S[i])
Video_feature.append(video_feature)
return Video_feature
def mkdir(path_3):
folder = os.path.exists(path_3)
if not folder: # 判断是否存在文件夹如果不存在则创建为文件夹
os.makedirs(path_3) # makedirs 创建文件时如果路径不存在会创建这个路径
print("--- new folder... ---")
print("--- OK ---")
else:
print("--- There is this folder! ---")
def extract_colors_from_videos(dir):
cap = cv2.VideoCapture(dir)
frame_cnt = 0
# 读取每一帧
f_index = [];gray_avg = [];r_avg = [];g_avg = [];b_avg = [];h_avg = [];s_avg = [];v_avg = [];l_avg = [];alpha_avg = [];beta_avg = []
while True:
ret, frame = cap.read() # Capture frame
if ret:
# print("frame_cnt:", frame_cnt, "image_size:", len(frame[0]), len(frame))
frame_cnt += 1 # frame count
gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) # Convert to gray scale
rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # Convert to RGB
hsv_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV) # Convert to HSV
lab_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2LAB) # Convert to LAB
# 分离每个特征值
r, g, b = cv2.split(rgb_frame)
h, s, v = cv2.split(hsv_frame)
l, alpha, beta = cv2.split(lab_frame)
#求整张图片的mean值,然后放入列表中
gray_avg.append(gray_frame.mean())
h_avg.append(h.mean())
s_avg.append(s.mean())
alpha_avg.append(alpha.mean())
beta_avg.append(beta.mean())
else:
break
cap.release()
Image =[]
# print(len(gray_avg))
for i in range(len(gray_avg)):
images = []
images.append(gray_avg[i])
images.append(h_avg[i])
images.append(s_avg[i])
images.append(alpha_avg[i])
images.append(beta_avg[i])
Image.append(images)
return Image
# return gray_avg,h_avg,s_avg,alpha_avg,beta_avg
def video(dir,fm,e_image,n_image,e_video,n_video):
fig = plt.figure(figsize=(3.6, 3.6), dpi=100)
cap = cv2.VideoCapture(dir)
EI = [];EV = []
NI = [];NV = []
for i in range(fm):
# update data
plt.cla()
# y轴范围和名称
plt.ylim(-0.5, 3.5)
# x轴范围和名称
EI.append(e_image[i])
NI.append(n_image[i])
EV.append(e_video[i])
NV.append(n_video[i])
price = [e_video[i], n_video[i],e_image[i],n_image[i]]
plt.barh(range(4),price, color='b', alpha=0.6) # 从下往上画
colors = 'yellowgreen'
plt.yticks(range(4), ['Image+Sound\nEmpathy\nScore', 'Image+Sound\nNo\nEmpathy\nScore',
'Image\nEmpathy\nScore','Image\nNo\nEmpathy\nScore'],color=colors,fontsize=8,ha='center',va= 'center')
plt.xlim(0, 1)
# plt.xlabel("Real Time Empathy Evaluation")
for x, y in enumerate(price):
plt.text(y+0.02, x+0.02, '%.2f%%' % (y*100), color="r",fontsize=10,ha='center',va= 'bottom')
# redraw the canvas
fig.canvas.draw()
# convert canvas to image
img = np.fromstring(fig.canvas.tostring_rgb(), dtype=np.uint8,
sep='')
img = img.reshape(fig.canvas.get_width_height()[::-1] + (3,))
# img is rgb, convert to opencv's default bgr
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
ret, frame = cap.read()
frame = cv2.resize(frame, (640, 360))
numpy_vertical_concat = np.concatenate((img, frame), axis=1)
cv2.imshow('Real Time Empathy Evaluation', numpy_vertical_concat)
k = cv2.waitKey(1) & 0xFF
if str(i) == str(fm-1):
time.sleep(10)
# if k == 27:
# break
def image_pred(image_train_clf_knn):
Images = extract_colors_from_videos(dir)
#只用 image 特征值
f_knn = open(image_train_clf_knn, 'rb')
s_knn = f_knn.read()
model_knn = pickle.loads(s_knn)
X_test = preprocessing.scale(Images)
y_pred_knn = model_knn.predict(X_test)
x = len(y_pred_knn)
# print(y_pred_knn)
empathy = 0; no_empathy = 0
E_image =[]; N_image=[]; e = 0; n = 0
for m in range(len(y_pred_knn)):
if str(y_pred_knn[m]) == '1':
empathy = empathy + 1
e = empathy/len(y_pred_knn)
e = '%.2f' % e
E_image.append(float(e))
N_image.append(float(n))
else:
no_empathy = no_empathy + 1
n = no_empathy / len(y_pred_knn)
n = '%.2f' % n
N_image.append(float(n))
E_image.append(float(e))
return x,E_image,N_image
def video_pred(video_train_clf_knn):
Video_feature = feature_unite()
#只用 image 特征值
f_knn = open(video_train_clf_knn, 'rb')
s_knn = f_knn.read()
model_knn = pickle.loads(s_knn)
X_test = preprocessing.scale(Video_feature)
y_pred_knn = model_knn.predict(X_test)
empathy = 0; no_empathy = 0
E_video =[]; N_video=[]; e = 0; n = 0
for m in range(len(y_pred_knn)):
if str(y_pred_knn[m]) == '1':
empathy = empathy + 1
e = empathy/len(y_pred_knn)
e = '%.2f' % e
E_video.append(float(e))
N_video.append(float(n))
else:
no_empathy = no_empathy + 1
n = no_empathy / len(y_pred_knn)
n = '%.2f' % n
N_video.append(float(n))
E_video.append(float(e))
return E_video,N_video
if __name__ == '__main__':
video_name = [str(35)]
for n in range (len(video_name)):
dir = 'E:/ffmpeg-latest-win64-static/2019_11_12_experimental_data/test_video/video/'+video_name[n] +'.mp4'
video_train_clf_knn = 'E:/ffmpeg-latest-win64-static/2019_11_12_experimental_data/peak_detetion(test)/sound_image_knn.pickle'
image_train_clf_knn = 'E:/ffmpeg-latest-win64-static/2019_11_12_experimental_data/GAZE_DATA_backup/7_09_proceess/classifier/pickle/image_svm.pickle'
dir_out = 'E:/ffmpeg-latest-win64-static/2019_11_12_experimental_data/stimuli/feature/image_7_15/realtime_sound/'+video_name[n] +'.wav'
sound_rgb_out = 'E:/ffmpeg-latest-win64-static/2019_11_12_experimental_data/stimuli/feature/image_7_15/realtime_sound/' +video_name[n] +'/'
video_sound(dir)
print('wait...')
x,E_image,N_image = image_pred(image_train_clf_knn)
E_video, N_video = video_pred(video_train_clf_knn)
video(dir,x,E_image,N_image,E_video, N_video)