python判断图片相似 过滤相似图片

目录

sklearn 图片相似

二、余弦距离

三、汉明距离(效率高,计算速度快)

过滤相似图片,小图片


sklearn 图片相似

安装:

pip install scikit-image

from skimage.metrics import structural_similarity as ssim
import cv2

import numpy as np
 
img1 = cv2.imread('1.jpg')
img2 = cv2.imread('2.jpg')
 
img2 = np.resize(img2, (img1.shape[0], img1.shape[1], img1.shape[2]))
 
print(img2.shape)
print(img1.shape)
ssim = ssim(img1, img2, multichannel=True)
 
print(ssim)


有一次报错了:

    "win_size exceeds image extent.  If the input is a multichannel "
ValueError: win_size exceeds image extent.  If the input is a multichannel (color) image, set multichannel=True.

调试发现相似度的输入图片宽高必须大于6

随机数图片测试:

import numpy as np
import cv2
from skimage.metrics import structural_similarity as ssima

def main():

    # 1.创建白色背景图片
    d = 400
    img = np.ones((d, d, 3), np.uint8) * 255

    # 2.循环随机绘制实心圆
    for i in range(0, 100):
        # 随机中心点
        center_x = np.random.randint(0, high=d)
        center_y = np.random.randint(0, high=d)

        # 随机半径与颜色
        radius = np.random.randint(5, high=d/5)
        color = np.random.randint(0, high=256, size=(3, )).tolist()

        cv2.circle(img, (center_x, center_y), radius, color, -1)

    h_h=7
    img1=img[:h_h,:h_h]
    img2=img[h_h:h_h*2,h_h:h_h*2]
    ssim = ssima(img1, img2, multichannel=True)

    print(ssim)


    # 3.显示结果
    cv2.imshow("img", img)
    cv2.waitKey()
    cv2.destroyAllWindows()


if __name__ == '__main__':
    main()

二、余弦距离

余弦相似度用向量空间中两个向量夹角的余弦值作为衡量两个个体间差异的大小。两个向量越相似夹角越小,余弦值越接近1。相比距离度量,余弦相似度更加注重两个向量在方向上的差异,而非距离或长度上

import numpy as np
from scipy.spatial.distance import pdist
x=np.random.random(10)
y=np.random.random(10)

# 代码实现方法一
dist1 = 1 - np.dot(x,y)/(np.linalg.norm(x)*np.linalg.norm(y))
# 代码实现方法二
dist2 = pdist(np.vstack([x,y]),'cosine')

print('x',x)
print('y',y)
print('dist1',dist1)
print('dist2',dist2)

三、汉明距离(效率高,计算速度快)

汉明距离表示两个(相同长度)字对应位不同的数量,我们以d(x,y)表示两个字x,y之间的汉明距离。对两个字符串进行异或运算,并统计结果为1的个数,那么这个数就是汉明距离。

向量相似度越高,对应的汉明距离越小。如10001001和10010001有2位不同。

#比较两张图片的相似度
from PIL import Image
from functools import reduce
import time

# 计算Hash
def phash(img):
    img = img.resize((8, 8), Image.ANTIALIAS).convert('L')
    avg = reduce(lambda x, y: x + y, img.getdata()) / 64.
    return reduce(
        lambda x, y: x | (y[1] << y[0]),
        enumerate(map(lambda i: 0 if i < avg else 1, img.getdata())),
        0
    )
# 计算汉明距离
def hamming_distance(a, b):
    return bin(a ^ b).count('1')
# 计算图片相似度
def is_imgs_similar(img1, img2):
    return True if hamming_distance(phash(img1), phash(img2)) <= 5 else False
if __name__ == '__main__':
    img1_path = 'F:\\project_py\\my_study\\Image\\003.jpg'
    img2_path = "F:\\project_py\\my_study\\Image\\006.jpg"

    img1 = Image.open(img1_path)
    img2 = Image.open(img2_path)
    
    start_time =time.time()
    a = is_imgs_similar(img1, img2)
    end_time = time.time()
    print(a,end_time-start_time)

过滤相似图片,小图片

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 2019/1/15 9:19
# @Author  : xiaodai
import os
import cv2
from skimage.measure import compare_ssim
import datetime
import shutil
def yidong(filename1,filename2):
    shutil.move(filename1,filename2)
def delete(filename1):
    os.remove(filename1)
def get_time(now):
    now_now = datetime.datetime.now()
    print('real_time:',now_now-now)
if __name__ == '__main__':
    now = datetime.datetime.now()
    or_path = r'G:\_test_xinjiang\0625'
    # save_path_img = r'G:\video_jiance\0427test\pic_20190427\201904272030\JPEGImages'
    # os.makedirs(save_path_img, exist_ok=True)
    for di in or_path:
        path = or_path + di
        for (root, dirs, files) in os.walk(path):
            for dirc in dirs:
                # if dirc == 'rec_pic':
                # if dirc == 'lou_img_dir':
                if dirc == 'JPEGImages':
                    pic_path = os.path.join(root, dirc)
                    img_path = pic_path
                    imgs_n = []
                    num = []
                    img_files = [os.path.join(rootdir, file) for rootdir, _, files in os.walk(img_path) for file in files if
                                 (file.endswith('.jpg'))]
                    for currIndex, filename in enumerate(img_files):
                        if not os.path.exists(img_files[currIndex]):
                            print('not exist', img_files[currIndex])
                            break
                        if currIndex >= len(img_files)-1:
                            break
                        else:
                            size = os.path.getsize(img_files[currIndex + 1])
                            if size < 512:
                                delete(img_files[currIndex + 1])
                                # yidong(img_files[currIndex + 1], save_path_img)
                                currIndex += 1
                            else:
                                img = cv2.imread(img_files[currIndex])
                                img = cv2.resize(img, (46, 46), interpolation=cv2.INTER_CUBIC)
                                img1 = cv2.imread(img_files[currIndex + 1])
                                img1 = cv2.resize(img1, (46, 46), interpolation=cv2.INTER_CUBIC)
                                ssim = compare_ssim(img, img1, multichannel=True)
                                if ssim > 0.9:
                                    imgs_n.append(img_files[currIndex + 1])
                                    print(img_files[currIndex], img_files[currIndex + 1], ssim)
                                else:
                                    print('small_ssim',img_files[currIndex], img_files[currIndex + 1], ssim)
                                currIndex += 1
                    for image in imgs_n:
                        # yidong(image, save_path_img)
                        delete(image)
                        print('delete',image)
    get_time(now)


  • 0
    点赞
  • 15
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

AI算法网奇

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值