利用hash算法计算图片相似度

最新推荐文章于 2025-04-09 22:01:28 发布

AI松子666

最新推荐文章于 2025-04-09 22:01:28 发布

阅读量1k

点赞数

文章标签： opencv python cv 计算机视觉深度学习

本文链接：https://blog.csdn.net/qq_39523365/article/details/113619015

版权

计算两张图片的相似性，利用官方的hash python代码（https://github.com/zhangsong8/imagehash）测试图片相似度，算法源代码都在工程里面，非常方便，简单易懂。
下面是我自己写的python测试脚本，批量生成图片之间的汉明距离。

#-*- coding: utf-8 -*-
#!/usr/bin/env python

from PIL import Image
import imagehash
import os  
import random
import numpy
hash_result = []
def get_files(dir, suffix): 
    res = []
    for root, directory, files in os.walk(dir): 
        for filename in files:
            name, suf = os.path.splitext(filename) 
            if suf in suffix:
                #res.append(filename)
                res.append(os.path.join(root, filename))
    return res

def hash_similarity1(image_path):
    image_list = get_files(image_path, ['.jpg'])
    random.shuffle(image_list)
    total_len = len(image_list)
    print('total_label_len', total_len)
    for i in range(0, total_len):
        for j in range(1, total_len-1):
        #22_030_9.jpg 22_024_4.jpg 22_021_0.jpg 22_025_11.jpg
            #hash = imagehash.average_hash(Image.open(image_list[i])) 
            # # average_hash phash dhash
            hash = imagehash.whash(Image.open(image_list[i]))
            print(image_list[i])
            #otherhash = imagehash.average_hash(Image.open(image_list[i-j]))
            otherhash = imagehash.whash(Image.open(image_list[i-j]))
            print(image_list[i-j])
            print(hash - otherhash)
            print(hash)
            print(type(hash))
            hash_result.append(hash - otherhash)

def hash_similarity2(image_path1,image_path2,hashfunc = imagehash.average_hash):
    image_list1 = get_files(image_path1, ['.jpg'])
    image_list2 = get_files(image_path2, ['.jpg'])
    random.shuffle(image_list1)
    random.shuffle(image_list2)
    total_len1 = len(image_list1)
    total_len2 = len(image_list2)
    print('total_label_len', total_len1)
    print('total_label_len', total_len2)
    for i in range(0, total_len1):
        for j in range(0, total_len2):
        #22_030_9.jpg 22_024_4.jpg 22_021_0.jpg 22_025_11.jpg
        # average_hash phash dhash whash
            hash = hashfunc(Image.open(image_list1[i]))
            print(image_list1[i])
            otherhash = hashfunc(Image.open(image_list2[j]))
            print(image_list2[j])
            print(hash - otherhash)
            hash_result.append(hash - otherhash)

def average_hash(image, hash_size=8, mean=numpy.mean):
	image = image.convert("L").resize((hash_size, hash_size), Image.ANTIALIAS)
	pixels = numpy.asarray(image)
    #print(pixels)
	avg = mean(pixels)
    #print(id(avg))
	diff = pixels > avgxia

if __name__ == '__main__':
    # ahash phash dhash whash
    hashmethod = 'ahash'
    if hashmethod == 'ahash':
        hashfunc = imagehash.average_hash
    elif hashmethod == 'phash':
        hashfunc = imagehash.phash
    elif hashmethod == 'dhash':
        hashfunc = imagehash.dhash
    elif hashmethod == 'whash':
        hashfunc = imagehash.whash
    #image_path1 = r'C:\Users\Administrator\Desktop\similarity\small\class1'
    #image_path2 = r'C:\Users\Administrator\Desktop\similarity\small\class7'
    image_path = r'C:\Users\Administrator\Desktop\similarity\small\class'
    image_i = 1
    image_path1 = image_path + str(image_i)
    #image_i = image_i+1
    #hash_similarity1(image_path1)
    hash_results = []
    ave_hash_results = []

    for j in range(image_i, 8):
        image_path2 = image_path + str(j)
        #hash_similarity2(image_path1,image_path2,hashfunc=hashfunc)
        hash_similarity2(r'C:\Users\Administrator\Desktop\similarity\big\class4',image_path2,hashfunc=hashfunc)
        #print(hash_result)
        #print(sum(hash_result) / len(hash_result))
        #print(sum(hash_result))
        #print(len(hash_result))
        hash_results.append(hash_result[:])
        ave_hash_results.append(sum(hash_result) / len(hash_result))
        hash_result.clear()
    average_hash(Image.open(r'C:\Users\Administrator\Desktop\similarity\big\class1\22_001_2.jpg'))
    for print_hash_result in hash_results:
        print(print_hash_result)
    print(ave_hash_results)

测试效果如下表：
在这里插入图片描述

下面是我自己用c++实现的代码：

#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <iostream>
#include <vector>
using namespace std;
using namespace cv;


//计算明汉距离
int hamming_distance(unsigned x, unsigned y)
{
    int dist = 0;
    
    // Count the number of bits set
    for (unsigned val = x ^ y; val > 0; val >>= 1)
    {
        // If A bit is set, so increment the count
        if (val & 1)
            dist++;
        // Clear (delete) val's lowest-order bit
    }
 
    // Return the number of differing bits
    return dist;
}
//计算明汉距离
int hamming_distance2(const char* x, const char* y)
{
    int dist = 0;
    
    // Count the number of bits set
    for (int i = 0; x[i]!='\0'; i++)
    {
		//cout<<"**********"<<endl;
        // If A bit is set, so increment the count
        if (!(x[i] == y[i]))
            dist++;
        // Clear (delete) val's lowest-order bit
    }
 
    // Return the number of differing bits
    return dist;
}

//pHash算法
string pHashValue(Mat &src0)
{
	//std::stringstream hash_rst;
	//std::string hash_rst_str;
	//string hash_rst_str = "";
	char hash_rst_str[1000]={0};
	string return_value;

	Mat src,img ,dst;
	string rst(64,'\0');
	double dIdex[64];
	double mean = 0.0;
	int k = 0;
	uchar *pData;

	src=src0.clone();

	/* 第一步，缩放尺寸*/
	//resize(src, src, Size(8,8));
	/* 第二步，转换成灰度图*/
	if(src.channels()==3)
	{
	 cvtColor(src,src,CV_BGR2GRAY);
	 img = Mat_<double>(src);
	}	 
	else
	{
	 img = Mat_<double>(src);
	}
	resize(img, img, Size(8,8));
	imwrite("src.jpg",img);
	waitKey(1);
 	int average = cv::mean(img).val[0];

	printf("灰度图像的mean均值为：%lf %lf %lf\n", cv::mean(img).val[0], cv::mean(img).val[1], cv::mean(img).val[2]);
	printf("灰度图像的mean均值为：%d \n", average);

 	Mat mask= (img>=(uchar)average);
	
	
    /* 第五步，计算哈希值。*/
 	int index = 0;
 	for(int i=0;i<mask.rows;i++)
 	{
 		pData = mask.ptr<uchar>(i);
 		for(int j=0;j<mask.cols;j++)
 		{
 			if(pData[j]==0)
 				//rst[index++]='0';
				//hash_rst<<"0";
				//hash_rst_str.append('0');
				hash_rst_str[index++]='0';
 			else
 				//rst[index++]='1';
				//hash_rst<<"1";
				//hash_rst_str.append('1');
				hash_rst_str[index++]='1';
 		}
 	}
	//cout<<"**********"<<endl;
	hash_rst_str[index]='\0';
	//printf("灰度图像的mean均值为：%s\n", hash_rst_str);
	return_value = hash_rst_str;
	//cout<<return_value<<endl;
	//hash_rst >> hash_rst_str;
	/*
	for(int i =0 ;i<index;i++)
	{
		printf("灰度图像的mean均值为：%c \n", rst[index]);
	}*/
	//printf("灰度图像的mean均值为：%s\n", return_value);
	
	return return_value;
}


//计算二值图像素大于0的个数
int bSums(Mat src)
{
	int counter = 0;
	//迭代器访问像素点
	Mat_<uchar>::iterator it = src.begin<uchar>();
	Mat_<uchar>::iterator itend = src.end<uchar>();
	for (; it != itend; ++it)
	{
		if ((*it)>10) counter += 1;//二值化后，像素点是0或者255
	}
	return counter;
}
//图像块结构体
struct FraRIO
{
	Mat frameRIO;
	int point_x;
	int point_y;
	bool RIO_flag;

};

void ImgMean(float& c1, float& c2, float& c3, Mat pImg)
{
	int nPixel = pImg.rows*pImg.cols;	// number of pixels in image
	c1 = 0; c2 = 0; c3 = 0;

	//累加各通道的值
	MatConstIterator_<Vec3b> it = pImg.begin<Vec3b>();
	MatConstIterator_<Vec3b> itend = pImg.end<Vec3b>();

	while (it != itend)
	{
		c1 += (*it)[0];
		c2 += (*it)[1];
		c3 += (*it)[2];
		it++;

	}
	//累加各通道的值

	c1 = c1 / nPixel;
	c2 = c2 / nPixel;
	c3 = c3 / nPixel;
}
//统计学计算相似度
int algorithm(Mat &src1,Mat &src2)
{
	Mat img1,img2;
	Mat frame, tem_frame;
	/* 第一步，缩放尺寸*/
	resize(src1, src1, Size(30,30));
	resize(src2, src2, Size(30,30));
	/* 第二步，转换成灰度图*/
	if(src1.channels()==3)
	{
	 cvtColor(src1,src1,CV_BGR2GRAY);
	 img1 = Mat_<double>(src1);
	}	 
	else
	{
	 img1 = Mat_<double>(src1);
	}
	if(src2.channels()==3)
	{
	 cvtColor(src2,src2,CV_BGR2GRAY);
	 img2 = Mat_<double>(src2);
	}	 
	else
	{
	 img2 = Mat_<double>(src2);
	}

	//两帧做差
	absdiff(img1, img2, tem_frame);
	//threshold(tem_frame, tem_frame, 2, 255, CV_THRESH_BINARY);
	//找出像素大于0的区块
	if (bSums(tem_frame)>50){
		printf("相似值%d  两张图片不相似\n",bSums(tem_frame));
	}else{
		printf("不相似值%d  两张图片相似\n",bSums(tem_frame));
	}
}


int main()
{
	int hash_threshold_value = 10;
	string src1_path = "/root/zhangsong/similarity/data/similarity/class1/22_001_2.jpg";
	string src2_path = "/root/zhangsong/similarity/data/similarity/class1/22_017_4.jpg"; //22_030_9.jpg 22_017_4.jpg
	//string src2_path = "/root/zhangsong/similarity/data/similarity/others/21_034_21.jpg";
	//string src2_path = "/root/zhangsong/similarity/data/similarity/others/21_034_21.jpg";//21_034_18.jpg   21_034_22.jpg  21_034_21.jpg
	Mat src1 = imread(src1_path);
	Mat src2 = imread(src2_path);
	#if 0
	algorithm(src1,src2);
	#else
	string src1_hash= pHashValue(src1);
	cout<<"**********"<<endl;
	string src2_hash= pHashValue(src2);
	cout<<src1_hash<<endl;
	cout<<src2_hash<<endl;
	cout<<"**********"<<endl;
	int hash_result = hamming_distance2(src1_hash.c_str(),src2_hash.c_str());
	if(hash_result > hash_threshold_value)
	{
		printf("明汉距离是：%d两张图片不相似\n",hash_result);
	}else
	{
		printf("明汉距离是：%d两张图片相似\n",hash_result);
	}
	#endif
	return 0;
}