检测图片是否包含在视频中(粗略)
- 算法: 均值哈希算法
- 图片1和某一视频帧 得到哈希值 ,然后进行哈希对比
- 读取视频时 计算哈希对比值
- 当相似度大于0.95 时 停止视频的读取,保存视频帧到本地;读取完视频时,将最大的相似度的视频帧保存到本地
- 使用四个进程(进程池为4),同时读取四个视频
import os
import cv2
import numpy as np
from tqdm import tqdm
from concurrent.futures import ProcessPoolExecutor
from multiprocessing import freeze_support
# 均值哈希算法
def aHash(img, shape=(10, 10)) :
# 缩放为10*10
img = cv2.resize(img, shape)
# 转换为灰度图
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# s为像素和初值为0,hash_str为hash值初值为''
s = 0
hash_str = ''
# 遍历累加求像素和
for i in range(shape[0]) :
for j in range(shape[1]) :
s = s + gray[i, j]
# 求平均灰度
avg = s / 100
# 灰度大于平均值为1相反为0生成图片的hash值
for i in range(shape[0]) :
for j in range(shape[1]) :
if gray[i, j] > avg :
hash_str = hash_str + '1'
else :
hash_str = hash_str + '0'
return hash_str
# Hash值对比
def cmpHash(hash1, hash2, shape=(10, 10)) :
n = 0
# hash长度不同则返回-1代表传参出错
if len(hash1) != len(hash2) :
return -1
# 遍历判断
for i in range(len(hash1)) :
# 相等则n计数+1,n最终为相似度
if hash1[i] == hash2[i] :
n = n + 1
return n / (shape[0] * shape[1])
def save_same_img(cover_url, mp4_path) :
print(
cover_url, mp4_path
)
# cover_url = r"H:\CRTubeGet Downloaded7长视频\军事\《军事科技》 “加密”和“破译”的永恒之战(上).jpg"
# mp4_path = r"H:\CRTubeGet Downloaded7长视频\军事\《军事科技》 “加密”和“破译”的永恒之战(上).mp4"
img1 = cv2.imdecode(np.fromfile(cover_url, dtype=np.uint8), -1)
hash1 = aHash(img1)
video_cap = cv2.VideoCapture(mp4_path)
start_frame = 0
res = {"iframe" : [], "n" : 0}
while True :
# break
# video_cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)
ret, img2 = video_cap.read()
if ret :
hash2 = aHash(img2)
n = cmpHash(hash1, hash2)
start_frame += 1
if n > 0.95 :
res["iframe"] = img2
res["n"] = n
break
if list(res.get("iframe")) :
old_n = res.get("n")
if old_n < n :
res["iframe"] = img2
res["n"] = n
else :
res["iframe"] = img2
res["n"] = n
else :
break
img_path = cover_url
img_path = img_path.replace(".jpg", str(res["n"]) + ".jpg")
print(res, img_path)
if not os.path.exists(img_path):
cv2.imencode('.jpg', res["iframe"])[1].tofile(img_path)
if __name__ == '__main__' :
freeze_support()
dir_path = r"H:\CRTubeGet Downloaded7长视频\军事"
with ProcessPoolExecutor(max_workers=4) as pool :
for file in tqdm(os.listdir(dir_path)) :
if file.endswith(".jpg") :
cover_path = os.path.join(dir_path, file)
mp4_path = cover_path.replace(".jpg", ".mp4")
pool.submit(save_same_img,cover_path, mp4_path)
# save_same_img(cover_path, mp4_path)
效果