计算两张图片的相似性,利用官方的hash python代码(https://github.com/zhangsong8/imagehash)测试图片相似度,算法源代码都在工程里面,非常方便,简单易懂。
下面是我自己写的python测试脚本,批量生成图片之间的汉明距离。
#-*- coding: utf-8 -*-
#!/usr/bin/env python
from PIL import Image
import imagehash
import os
import random
import numpy
hash_result = []
def get_files(dir, suffix):
res = []
for root, directory, files in os.walk(dir):
for filename in files:
name, suf = os.path.splitext(filename)
if suf in suffix:
#res.append(filename)
res.append(os.path.join(root, filename))
return res
def hash_similarity1(image_path):
image_list = get_files(image_path, ['.jpg'])
random.shuffle(image_list)
total_len = len(image_list)
print('total_label_len', total_len)
for i in range(0, total_len):
for j in range(1, total_len-1):
#22_030_9.jpg 22_024_4.jpg 22_021_0.jpg 22_025_11.jpg
#hash = imagehash.average_hash(Image.open(image_list[i]))
# # average_hash phash dhash
hash = imagehash.whash(Image.open(image_list[i]))
print(image_list[i])
#otherhash = imagehash.average_hash(Image.open(image_list[i-j]))
otherhash = imagehash.whash(Image.open(image_list[i-j]))
print(image_list[i-j])
print(hash - otherhash)
print(hash)
print(type(hash))
hash_result.append(hash - otherhash)
def hash_similarity2(image_path1,image_path2,hashfunc = imagehash.average_hash):
image_list1 = get_files(image_path1, ['.jpg'])
image_list2 = get_files(image_path2, ['.jpg'])
random.shuffle(image_list1)
random.shuffle(image_list2)
total_len1 = len(image_list1)
total_len2 = len(image_list2)
print('total_label_len', total_len1)
print('total_label_len', total_len2)
for i in range(0, total_len1):
for j in range(0, total_len2):
#22_030_9.jpg 22_024_4.jpg 22_021_0.jpg 22_025_11.jpg
# average_hash phash dhash whash
hash = hashfunc(Image.open(image_list1[i]))
print(image_list1[i])
otherhash = hashfunc(Image.open(image_list2[j]))
print(image_list2[j])
print(hash - otherhash)
hash_result.append(hash - otherhash)
def average_hash(image, hash_size=8, mean=numpy.mean):
image = image.convert("L").resize((hash_size, hash_size), Image.ANTIALIAS)
pixels = numpy.asarray(image)
#print(pixels)
avg = mean(pixels)
#print(id(avg))
diff = pixels > avgxia
if __name__ == '__main__':
# ahash phash dhash whash
hashmethod = 'ahash'
if hashmethod == 'ahash':
hashfunc = imagehash.average_hash
elif hashmethod == 'phash':
hashfunc = imagehash.phash
elif hashmethod == 'dhash':
hashfunc = imagehash.dhash
elif hashmethod == 'whash':
hashfunc = imagehash.whash
#image_path1 = r'C:\Users\Administrator\Desktop\similarity\small\class1'
#image_path2 = r'C:\Users\Administrator\Desktop\similarity\small\class7'
image_path = r'C:\Users\Administrator\Desktop\similarity\small\class'
image_i = 1
image_path1 = image_path + str(image_i)
#image_i = image_i+1
#hash_similarity1(image_path1)
hash_results = []
ave_hash_results = []
for j in range(image_i, 8):
image_path2 = image_path + str(j)
#hash_similarity2(image_path1,image_path2,hashfunc=hashfunc)
hash_similarity2(r'C:\Users\Administrator\Desktop\similarity\big\class4',image_path2,hashfunc=hashfunc)
#print(hash_result)
#print(sum(hash_result) / len(hash_result))
#print(sum(hash_result))
#print(len(hash_result))
hash_results.append(hash_result[:])
ave_hash_results.append(sum(hash_result) / len(hash_result))
hash_result.clear()
average_hash(Image.open(r'C:\Users\Administrator\Desktop\similarity\big\class1\22_001_2.jpg'))
for print_hash_result in hash_results:
print(print_hash_result)
print(ave_hash_results)
测试效果如下表:
下面是我自己用c++实现的代码:
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <iostream>
#include <vector>
using namespace std;
using namespace cv;
//计算明汉距离
int hamming_distance(unsigned x, unsigned y)
{
int dist = 0;
// Count the number of bits set
for (unsigned val = x ^ y; val > 0; val >>= 1)
{
// If A bit is set, so increment the count
if (val & 1)
dist++;
// Clear (delete) val's lowest-order bit
}
// Return the number of differing bits
return dist;
}
//计算明汉距离
int hamming_distance2(const char* x, const char* y)
{
int dist = 0;
// Count the number of bits set
for (int i = 0; x[i]!='\0'; i++)
{
//cout<<"**********"<<endl;
// If A bit is set, so increment the count
if (!(x[i] == y[i]))
dist++;
// Clear (delete) val's lowest-order bit
}
// Return the number of differing bits
return dist;
}
//pHash算法
string pHashValue(Mat &src0)
{
//std::stringstream hash_rst;
//std::string hash_rst_str;
//string hash_rst_str = "";
char hash_rst_str[1000]={0};
string return_value;
Mat src,img ,dst;
string rst(64,'\0');
double dIdex[64];
double mean = 0.0;
int k = 0;
uchar *pData;
src=src0.clone();
/* 第一步,缩放尺寸*/
//resize(src, src, Size(8,8));
/* 第二步,转换成灰度图*/
if(src.channels()==3)
{
cvtColor(src,src,CV_BGR2GRAY);
img = Mat_<double>(src);
}
else
{
img = Mat_<double>(src);
}
resize(img, img, Size(8,8));
imwrite("src.jpg",img);
waitKey(1);
int average = cv::mean(img).val[0];
printf("灰度图像的mean均值为:%lf %lf %lf\n", cv::mean(img).val[0], cv::mean(img).val[1], cv::mean(img).val[2]);
printf("灰度图像的mean均值为:%d \n", average);
Mat mask= (img>=(uchar)average);
/* 第五步,计算哈希值。*/
int index = 0;
for(int i=0;i<mask.rows;i++)
{
pData = mask.ptr<uchar>(i);
for(int j=0;j<mask.cols;j++)
{
if(pData[j]==0)
//rst[index++]='0';
//hash_rst<<"0";
//hash_rst_str.append('0');
hash_rst_str[index++]='0';
else
//rst[index++]='1';
//hash_rst<<"1";
//hash_rst_str.append('1');
hash_rst_str[index++]='1';
}
}
//cout<<"**********"<<endl;
hash_rst_str[index]='\0';
//printf("灰度图像的mean均值为:%s\n", hash_rst_str);
return_value = hash_rst_str;
//cout<<return_value<<endl;
//hash_rst >> hash_rst_str;
/*
for(int i =0 ;i<index;i++)
{
printf("灰度图像的mean均值为:%c \n", rst[index]);
}*/
//printf("灰度图像的mean均值为:%s\n", return_value);
return return_value;
}
//计算二值图像素大于0的个数
int bSums(Mat src)
{
int counter = 0;
//迭代器访问像素点
Mat_<uchar>::iterator it = src.begin<uchar>();
Mat_<uchar>::iterator itend = src.end<uchar>();
for (; it != itend; ++it)
{
if ((*it)>10) counter += 1;//二值化后,像素点是0或者255
}
return counter;
}
//图像块结构体
struct FraRIO
{
Mat frameRIO;
int point_x;
int point_y;
bool RIO_flag;
};
void ImgMean(float& c1, float& c2, float& c3, Mat pImg)
{
int nPixel = pImg.rows*pImg.cols; // number of pixels in image
c1 = 0; c2 = 0; c3 = 0;
//累加各通道的值
MatConstIterator_<Vec3b> it = pImg.begin<Vec3b>();
MatConstIterator_<Vec3b> itend = pImg.end<Vec3b>();
while (it != itend)
{
c1 += (*it)[0];
c2 += (*it)[1];
c3 += (*it)[2];
it++;
}
//累加各通道的值
c1 = c1 / nPixel;
c2 = c2 / nPixel;
c3 = c3 / nPixel;
}
//统计学计算相似度
int algorithm(Mat &src1,Mat &src2)
{
Mat img1,img2;
Mat frame, tem_frame;
/* 第一步,缩放尺寸*/
resize(src1, src1, Size(30,30));
resize(src2, src2, Size(30,30));
/* 第二步,转换成灰度图*/
if(src1.channels()==3)
{
cvtColor(src1,src1,CV_BGR2GRAY);
img1 = Mat_<double>(src1);
}
else
{
img1 = Mat_<double>(src1);
}
if(src2.channels()==3)
{
cvtColor(src2,src2,CV_BGR2GRAY);
img2 = Mat_<double>(src2);
}
else
{
img2 = Mat_<double>(src2);
}
//两帧做差
absdiff(img1, img2, tem_frame);
//threshold(tem_frame, tem_frame, 2, 255, CV_THRESH_BINARY);
//找出像素大于0的区块
if (bSums(tem_frame)>50){
printf("相似值%d 两张图片不相似\n",bSums(tem_frame));
}else{
printf("不相似值%d 两张图片相似\n",bSums(tem_frame));
}
}
int main()
{
int hash_threshold_value = 10;
string src1_path = "/root/zhangsong/similarity/data/similarity/class1/22_001_2.jpg";
string src2_path = "/root/zhangsong/similarity/data/similarity/class1/22_017_4.jpg"; //22_030_9.jpg 22_017_4.jpg
//string src2_path = "/root/zhangsong/similarity/data/similarity/others/21_034_21.jpg";
//string src2_path = "/root/zhangsong/similarity/data/similarity/others/21_034_21.jpg";//21_034_18.jpg 21_034_22.jpg 21_034_21.jpg
Mat src1 = imread(src1_path);
Mat src2 = imread(src2_path);
#if 0
algorithm(src1,src2);
#else
string src1_hash= pHashValue(src1);
cout<<"**********"<<endl;
string src2_hash= pHashValue(src2);
cout<<src1_hash<<endl;
cout<<src2_hash<<endl;
cout<<"**********"<<endl;
int hash_result = hamming_distance2(src1_hash.c_str(),src2_hash.c_str());
if(hash_result > hash_threshold_value)
{
printf("明汉距离是:%d两张图片不相似\n",hash_result);
}else
{
printf("明汉距离是:%d两张图片相似\n",hash_result);
}
#endif
return 0;
}