为了进行图片相似度匹配,最近在做java的dHash,pHash和aHash等算法
读取网络图片ImageIO.read()之后,再读取每个像素点上的getRGB
发现每次得到rgb的值都会存在差异,造成对hash算法对同样url的图片生成的指纹每次都会不一致
造成匹配结果有误差
import javax.imageio.ImageIO;
import java.awt.*;
import java.awt.image.BufferedImage;
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URL;
/**
* 差异值Hash:差异值哈希。精确度较高,且速度也非常快。
* <p>
* ①图片缩放为9*8大小
* <p>
* ②将图片灰度化
* <p>
* ③差异值计算(每行相邻像素的差值,这样会生成8*8的差值,前一个像素大于后一个像素则为1,否则为0)
* <p>
* ④生成哈希值
*/
public class DHashUtil {
/**
* 计算dHash方法
*
* @param url 文件
* @return hash
*/
public static String getDHash(String url) {
//读取文件
BufferedImage srcImage = getRemoteBufferedImage(url);
//文件转成9*8像素,为算法比较通用的长宽
BufferedImage buffImg = new BufferedImage(9, 8, BufferedImage.TYPE_INT_RGB);
buffImg.getGraphics().drawImage(srcImage.getScaledInstance(9, 8, Image.SCALE_SMOOTH), 0, 0, null);
int width = buffImg.getWidth();
int height = buffImg.getHeight();
int[][] grayPix = new int[width][height];
StringBuffer figure = new StringBuffer();
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++) {
//图片灰度化
int rgb = buffImg.getRGB(x, y);
int r = rgb >> 16 & 0xff;
int g = rgb >> 8 & 0xff;
int b = rgb & 0xff;
int gray = (r * 30 + g * 59 + b * 11) / 100;
grayPix[x][y] = gray;
//开始计算dHash 总共有9*8像素 每行相对有8个差异值 总共有 8*8=64 个
if (x != 0) {
long bit = grayPix[x - 1][y] > grayPix[x][y] ? 1 : 0;
figure.append(bit);
}
}
}
return figure.toString();
}
/**
* 计算海明距离
* <p>
* 原本用于编码的检错和纠错的一个算法
* 现在拿来计算相似度,如果差异值小于一定阈值则相似,一般经验值小于5为同一张图片
*
* @param str1
* @param str2
* @return 距离
*/
public static long getHammingDistance(String str1, String str2) {
int distance;
if (str1 == null || str2 == null || str1.length() != str2.length()) {
distance = -1;
} else {
distance = 0;
for (int i = 0; i < str1.length(); i++) {
if (str1.charAt(i) != str2.charAt(i)) {
distance++;
}
}
}
return distance;
}
/**
* 获取远程网络图片信息
* @param imageURL
* @return
*/
public static BufferedImage getRemoteBufferedImage(String imageURL) {
URL url = null;
InputStream is = null;
BufferedImage bufferedImage = null;
try {
url = new URL(imageURL);
is = url.openStream();
bufferedImage = ImageIO.read(is);
} catch (MalformedURLException e) {
e.printStackTrace();
System.out.println("imageURL: " + imageURL + ",无效!");
return null;
} catch (IOException e) {
e.printStackTrace();
System.out.println("imageURL: " + imageURL + ",读取失败!");
return null;
} finally {
try {
if (is!=null) {
is.close();
}
} catch (IOException e) {
e.printStackTrace();
System.out.println("imageURL: " + imageURL + ",流关闭异常!");
return null;
}
}
return bufferedImage;
}
//DHashUtil 参数值为待处理文件夹
public static void main(String[] args) {
//读取文件
BufferedImage srcImage1 = getRemoteBufferedImage("https://wx.qlogo.cn/mmhead/ver_1/rWO0bKnehTEeH64ibicoWESgJSUOtQiaqJPwx0003aszhPlUhpTc8ZuGwhF3Rr1ibVmJhfKBN2DSHF7WG3vHHCHyZA/96");
BufferedImage srcImage2 = getRemoteBufferedImage("https://wx.qlogo.cn/mmhead/ver_1/rWO0bKnehTEeH64ibicoWESgJSUOtQiaqJPwx0003aszhPlUhpTc8ZuGwhF3Rr1ibVmJhfKBN2DSHF7WG3vHHCHyZA/96");
StringBuffer figure1 = new StringBuffer();
StringBuffer figure2 = new StringBuffer();
for (int y = 0; y < srcImage1.getHeight(); y++) {
for (int x = 0; x < srcImage1.getWidth(); x++) {
int rgb = srcImage1.getRGB(x, y);
figure1.append(rgb);
}
}
for (int y = 0; y < srcImage2.getHeight(); y++) {
for (int x = 0; x < srcImage2.getWidth(); x++) {
int rgb = srcImage2.getRGB(x, y);
figure2.append(rgb);
}
}
if (StringUtils.equals(figure1.toString(), figure2.toString())) {
System.out.println("相同!");
} else {
System.out.println("不相同!");
}
}
}
执行main的结果就是"不相同",这一点的原因尚不清楚,怎么避免网上相关资料也没有查询到,不知道有没有人能给下解答?