参考自
《
结
合
主
成
分
分
析
和
聚
类
的
关
键
帧
提
取
》
《结合主成分分析和聚类的关键帧提取》
《结合主成分分析和聚类的关键帧提取》
作者
许文竹,徐立鸿
聚类的复杂度是并不低的,所以我们需要通过降低数据的维度来进行计算。
主成分分析
简单介绍
即
P
C
A
PCA
PCA
这里我们使用
P
C
A
PCA
PCA提取图像特征。
P C A PCA PCA主要用于数据降维,对于高纬向量可以用 P C A PCA PCA求出其投影矩阵,将特征从高维降到低维,并且仍能保证反映了图像的特征。
具体做法
对于
w
∗
h
w*h
w∗h的图像,进行扁平化得到一组向量
(
1
,
w
h
)
(1,wh)
(1,wh)
但维度较高,
X
X
X表示包含所有帧,则
X
X
X的维度是
(
n
,
w
h
)
(n,wh)
(n,wh)
我们求得总体的协方差矩阵
∑
=
1
N
∑
i
=
1
N
(
X
i
−
u
)
(
X
i
−
u
)
T
\sum=\frac{1}{N}\sum\limits_{i=1}^N(X_i-u)(X_i-u)^T
∑=N1i=1∑N(Xi−u)(Xi−u)T
u
u
u表示平均帧图像
求得协方差矩阵了之后,可以求解
∑
\sum
∑的特征值和特征向量,
Q
R
QR
QR或者
S
V
D
SVD
SVD都可以。
设特征值为
λ
i
\lambda_i
λi,则有:
α
≤
∑
i
=
1
L
λ
i
∑
i
=
1
w
h
λ
i
\alpha \leq \frac{\sum\limits_{i=1}^L\lambda_i}{\sum\limits_{i=1}^{wh} \lambda_i}
α≤i=1∑whλii=1∑Lλi,
L
L
L等价于我们降到多少维。
α
\alpha
α为
0.90
0.90
0.90到
0.99
0.99
0.99
但是由于协方差矩阵过大,我们无法显示计算特征值,我们需要通过 S V D SVD SVD求得奇异值,用奇异值代替进行选择。最终得到的结果要和图像的个数取最小(具体原因并不懂,数学相关,否则会出错)
聚类
得到图像的特征之后,我们对图像进行聚类,我们使用
k
m
e
a
n
s
kmeans
kmeans算法,但需要注意的是:
1、此时的聚类,并不是漫无目的的去找聚类中心,而是每次只找自己附近的聚类中心。
2、同时
k
k
k的选择,当平均帧差
≤
3500
\leq 3500
≤3500的时候,说明视频总体变换缓慢,但考虑到可能有局部剧烈运动的情况,
k
=
m
a
x
(
k
1
,
k
2
)
k=max(k_1,k_2)
k=max(k1,k2),
k
1
k_1
k1是按比例求的关键帧个数,
k
1
=
n
/
100
k_1 = n/100
k1=n/100,
k
2
=
f
r
a
m
e
>
T
k_2=frame>T
k2=frame>T,即帧差过大的个数,这里的
T
T
T设置成
13000
13000
13000。
3、当
>
3500
>3500
>3500的时候,
k
=
k
1
=
n
/
50
k=k_1=n/50
k=k1=n/50
4、防止迭代次数过多,我们设置个阈值
100
100
100次
最后即可求得结果,写完了但没测。
代码如下:
import numpy as np
from sklearn.decomposition import PCA
import cv2
ansl = [1,94,132,154,162,177,222,236,252,268,286,310,322,255,373,401,
423,431,444,498,546,594,627,681,759,800,832,846,932,1235,1369,1438,1529,1581,1847]
ansr = [93,131,153,161,176,221,235,251,267,285,309,321,354,372,400,
422,430,443,497,545,593,626,680,758,799,831,845,931,1234,1368,1437,
1528,1580,1846,2139]#关键帧区间
ansl = np.array(ansl)
ansr = np.array(ansr)
cap = cv2.VideoCapture('D:/ai/CV/pyt/1.mp4')
Frame_rate = cap.get(5)#一秒多少帧
Frame_number = int(cap.get(7))#帧数
Frame_time = 1000 / Frame_rate;#一帧多少秒
len_windows = 0
local_windows = 0
def smooth(swift_img,windows):
r = swift_img.shape[1]
c = swift_img.shape[2]
for i in range(r):
for j in range(c):
L = swift_img[:,i,j]
L = np.convolve(L,np.ones(windows),'same')
swift_img[:,i,j] = L
return swift_img
def get_block(img):
img = np.array(img)
img = img.ravel()
return img
def get_img(now_time = 0,get_number = Frame_number):#便于算法学习
swift_img = []#转换后
index = 0#标记第几个帧
time = now_time#当前时间
while (cap.isOpened()):
cap.set(cv2.CAP_PROP_POS_MSEC,time)
ret,img = cap.read()#获取图像
if not ret:
break
img0 = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)#转换成灰度图
img1 = get_block(img0)
swift_img.append(img1)
time += Frame_time
index += 1
if index >= get_number:
break
if index % 50 ==0:
print("当前到达"+str(index))
swift_img = np.array(swift_img)
return swift_img
def get_key_frame(Change):
diff_C = Change[1:] - Change[:-1]
mid_d = np.zeros(Change.shape[1])
for i in range(diff_C.shape[1]):
mid_d[i] = np.mean(diff_C[:,i])
mid_d = np.sum(np.abs(mid_d))
k = 0
k1 = 0
k2 = 0
T = 13000
#确定聚类个数k
#当mid_d<=3500的时候,说明视频内容变换平缓
print(mid_d)
if mid_d <= 3500:
k1 = Frame_number / 100
k2 = np.sum(diff_C >= T)
k = max(k1,k2)
else :
k1 = Frame_number / 50
k2 = np.sum(diff_C >= T)
k = k1
k = int(k)
print(k)
#确认了提取关键帧的数量,接下来进行聚类法提取关键帧
Cluster = []
set_cluster = []
now = 0
for i in range(k):
if now >= Frame_number:
now = Frame_number
Cluster.append(Change[now-1])
set_cluster.append({now})
now += int(Frame_number / k)
cnt = 0#防止迭代次数过多
while True:
cnt += 1
now = 0#指代当前分配帧数
for i in range(k):
set_cluster[i].clear();#先清空每个集合
for i in range(Frame_number):
l = now
r = min(now + 1,k-1)
ldiff = np.mean(abs(Cluster[l] - Change[i]))
rdiff = np.mean(abs(Cluster[r] - Change[i]))
if ldiff < rdiff:
set_cluster[l].add(i)
else :
set_cluster[r].add(i)
now = r
ok = True
for i in range(k):
Len = len(set_cluster[i])
if Len == 0:
continue
set_sum = np.zeros(Change.shape[1])
for x in set_cluster[i]:
set_sum = set_sum + Change[x]
set_sum /= Len
if np.mean(abs(Cluster[i]-set_sum)) < 1e-10:
continue
ok = False
Cluster[i] = set_sum
print("第"+str(cnt)+"次聚类")
if cnt >= 100 or ok == True:
break
TL = []
for i in range(int(Frame_number)):
TL.append(False)
for i in range(k):
MIN = 1e20
for x in set_cluster[i]:
MIN = min(MIN,np.mean(np.abs(Change[x] - Cluster[i])))
for x in set_cluster[i]:
if abs(MIN - np.mean(np.abs(Change[x] - Cluster[i]))) < 1e-10:
TL[x] = True
break
TL = np.array(TL)
return TL
def preserve(L):
num = 0
time = 0
for i in range(L.shape[0]):
if L[i] == False:
continue
num += 1
cap.set(cv2.CAP_PROP_POS_MSEC,time)
ret,img = cap.read()#获取图像
cv2.imwrite('./1.1/{0:05d}.jpg'.format(num),img)#保存关键帧
time += Frame_time
def cal_ans(cal_L,l,r):
rate = []
add = 0
right = 0
for j in range(ansl.shape[0]):
num = 0
if not (l <= j and j <= r):
continue
ll = ansl[j]
rr = ansr[j]
for i in range(cal_L.shape[0]):
if cal_L[i] == False:
continue
if j == 0 :
print(i)
if i + ansl[l] >= ll and i + ansl[l] <= rr:
num += 1
if num == 0:
rate.append(0.0)
else:
right += 1
if num == 1:
rate.append(6.0)
continue
add += num - 1
rate.append(6.0)
rate = np.array(rate)
ret = np.sum(rate) / rate.shape[0]
print("多余的个数:")
print(add)
add = add / (5 * (r - l + 1))
add = min(add , 1)
print("多余的占比:")
print(add)
print("正确的评分:")
print(right)
ret += 4 * (1 - add) * right / (r - l + 1)#总共帧数中只有正确的部分才考虑时间因素。
print("评分是:")
print(ret)
return ret
def study():
window = 1
local = 2
mmax = 0
lindex = 4
rindex = 10
for i in range(10):
tmp = 1 + i
for j in range(10):
Tmp = 2 + j
print("当前参数: "+"卷积窗口"+str(tmp)+"最值窗口"+str(Tmp))
tmp_img = get_img(ansl[lindex],ansr[rindex])
tmp_img = smooth(tmp_img,tmp)
tmp_L = get_key_frame(tmp_img,Tmp)
ttmp = cal_ans(tmp_L,lindex,rindex)
if ttmp > mmax:
window = tmp
local = Tmp
mmax = ttmp
print("分割线--------------------")
return window,local
def PCA_get_feature(X):
#k is the components you want
#mean of each feature
mean_X = X.mean(axis = 0)
X = X - mean_X
#数据中心化
k = 1
U,S,V = np.linalg.svd(X,full_matrices = False)
index = 0
S_sum = np.sum(S)
now = 0
P = 0
while True:
now += S[index]
index+=1
if now >= 0.90 * S_sum:
P = index / S.shape[0]
if index == S.shape[0]:
P = 1.0
break
k = int(P * min(X.shape[1],X.shape[0]))
#计算降维数量
pca = PCA(n_components = k)
pca.fit(X)#利用中心化的X建造模型
new_x = pca.fit_transform(X)#得到降维后的数据
return new_x
swift_img = get_img()
Frame_number = int(swift_img.shape[0])
#Change = PCA_get_feature(swift_img)
cal_L = get_key_frame(swift_img)
print("结束")
cal_ans(cal_L,0,ansl.shape[0]-1)
初始评分,
4
4
4分。
显然以分段设置聚类中心还是不够合理。
我们按比例设置聚类中心,可以达到6.01分.
按极值点设置聚类中心,可以达到7.91分
import numpy as np
from sklearn.decomposition import PCA
import cv2
ansl = [1,94,132,154,162,177,222,236,252,268,286,310,322,355,373,401,
423,431,444,498,546,594,627,681,759,800,832,846,932,1235,1369,1438,1529,1581,1847]
ansr = [93,131,153,161,176,221,235,251,267,285,309,321,354,372,400,
422,430,443,497,545,593,626,680,758,799,831,845,931,1234,1368,1437,
1528,1580,1846,2139]#关键帧区间
ansl = np.array(ansl)
ansr = np.array(ansr)
cap = cv2.VideoCapture('D:/ai/CV/pyt/1.mp4')
Frame_rate = cap.get(5)#一秒多少帧
Frame_number = int(cap.get(7))#帧数
Frame_time = 1000 / Frame_rate;#一帧多少秒
len_windows = 0
local_windows = 0
def smooth(swift_img,windows):
r = swift_img.shape[1]
c = swift_img.shape[2]
for i in range(r):
for j in range(c):
L = swift_img[:,i,j]
L = np.convolve(L,np.ones(windows),'same')
swift_img[:,i,j] = L
return swift_img
def get_block(img):
img = np.array(img)
img = img.ravel()
return img
def get_img(now_time = 0,get_number = Frame_number):#便于算法学习
swift_img = []#转换后
index = 0#标记第几个帧
time = now_time#当前时间
while (cap.isOpened()):
cap.set(cv2.CAP_PROP_POS_MSEC,time)
ret,img = cap.read()#获取图像
if not ret:
break
img0 = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)#转换成灰度图
img1 = get_block(img0)
swift_img.append(img1)
time += Frame_time
index += 1
if index >= get_number:
break
if index % 50 ==0:
print("当前到达"+str(index))
swift_img = np.array(swift_img)
return swift_img
def get_key_frame(Change):
diff_C = Change[1:] - Change[:-1]
mid_d = np.zeros(Change.shape[1])
for i in range(diff_C.shape[1]):
mid_d[i] = np.mean(diff_C[:,i])
mid_d = np.sum(np.abs(mid_d))
k = 0
k1 = 0
k2 = 0
T = 13000
#确定聚类个数k
#当mid_d<=3500的时候,说明视频内容变换平缓
print(mid_d)
if mid_d <= 3500:
k1 = Frame_number / 100
k2 = np.sum(diff_C >= T)
k = max(k1,k2)
else :
k1 = Frame_number / 50
k2 = np.sum(diff_C >= T)
k = k1
k = int(k)
print(k)
#确认了提取关键帧的数量,接下来进行聚类法提取关键帧
Cluster = []
set_cluster = []
now = 0
for i in range(k):
if now >= Frame_number - 2:
now = Frame_number - 2
Cluster.append(Change[now])
set_cluster.append({now})
if(np.sum(np.abs(diff_C[now]))>mid_d):
now += int(Frame_number / (3 * k))
else:
now += int(Frame_number / k)
cnt = 0#防止迭代次数过多
while True:
cnt += 1
now = 0#指代当前分配帧数
for i in range(k):
set_cluster[i].clear();#先清空每个集合
for i in range(Frame_number):
l = now
r = min(now + 1,k-1)
ldiff = np.mean(abs(Cluster[l] - Change[i]))
rdiff = np.mean(abs(Cluster[r] - Change[i]))
if ldiff < rdiff:
set_cluster[l].add(i)
else :
set_cluster[r].add(i)
now = r
ok = True
for i in range(k):
Len = len(set_cluster[i])
if Len == 0:
continue
set_sum = np.zeros(Change.shape[1])
for x in set_cluster[i]:
set_sum = set_sum + Change[x]
set_sum /= Len
if np.mean(abs(Cluster[i]-set_sum)) < 1e-10:
continue
ok = False
Cluster[i] = set_sum
print("第"+str(cnt)+"次聚类")
if cnt >= 100 or ok == True:
break
TL = []
for i in range(int(Frame_number)):
TL.append(False)
for i in range(k):
MIN = 1e20
for x in set_cluster[i]:
MIN = min(MIN,np.mean(np.abs(Change[x] - Cluster[i])))
for x in set_cluster[i]:
if abs(MIN - np.mean(np.abs(Change[x] - Cluster[i]))) < 1e-10:
TL[x] = True
break
TL = np.array(TL)
return TL
def preserve(L):
num = 0
time = 0
for i in range(L.shape[0]):
if L[i] == False:
continue
num += 1
cap.set(cv2.CAP_PROP_POS_MSEC,time)
ret,img = cap.read()#获取图像
cv2.imwrite('./1.1/{0:05d}.jpg'.format(num),img)#保存关键帧
time += Frame_time
def cal_ans(cal_L,l,r):
rate = []
add = 0
right = 0
for j in range(ansl.shape[0]):
num = 0
if not (l <= j and j <= r):
continue
ll = ansl[j]
rr = ansr[j]
for i in range(cal_L.shape[0]):
if cal_L[i] == False:
continue
if j == 0 :
print(i)
if i + ansl[l] >= ll and i + ansl[l] <= rr:
num += 1
if num == 0:
rate.append(0.0)
else:
right += 1
if num == 1:
rate.append(6.0)
continue
add += num - 1
rate.append(6.0)
rate = np.array(rate)
ret = np.sum(rate) / rate.shape[0]
print("多余的个数:")
print(add)
add = add / (5 * (r - l + 1))
add = min(add , 1)
print("多余的占比:")
print(add)
print("正确的评分:")
print(right)
ret += 4 * (1 - add) * right / (r - l + 1)#总共帧数中只有正确的部分才考虑时间因素。
print("评分是:")
print(ret)
return ret
def study():
window = 1
local = 2
mmax = 0
lindex = 4
rindex = 10
for i in range(10):
tmp = 1 + i
for j in range(10):
Tmp = 2 + j
print("当前参数: "+"卷积窗口"+str(tmp)+"最值窗口"+str(Tmp))
tmp_img = get_img(ansl[lindex],ansr[rindex])
tmp_img = smooth(tmp_img,tmp)
tmp_L = get_key_frame(tmp_img,Tmp)
ttmp = cal_ans(tmp_L,lindex,rindex)
if ttmp > mmax:
window = tmp
local = Tmp
mmax = ttmp
print("分割线--------------------")
return window,local
def PCA_get_feature(X):
#k is the components you want
#mean of each feature
mean_X = X.mean(axis = 0)
X = X - mean_X
#数据中心化
k = 1
U,S,V = np.linalg.svd(X,full_matrices = False)
index = 0
S_sum = np.sum(S)
now = 0
P = 0
while True:
now += S[index]
index+=1
if now >= 0.90 * S_sum:
P = index / S.shape[0]
if index == S.shape[0]:
P = 1.0
break
k = int(P * min(X.shape[1],X.shape[0]))
#计算降维数量
pca = PCA(n_components = k)
pca.fit(X)#利用中心化的X建造模型
new_x = pca.fit_transform(X)#得到降维后的数据
return new_x
swift_img = get_img()
Frame_number = int(swift_img.shape[0])
#Change = PCA_get_feature(swift_img)
cal_L = get_key_frame(swift_img)
print("结束")
cal_ans(cal_L,0,ansl.shape[0]-1)