本文主要在于比较图片的相似度,利用平均hash算法,结果接近1就越相近,由于java读取ico文件有些坑,需要下个jar,再处理,这样就可以一起比较.jpg,.png,.ico的文件了,代码也是综合了各方大神的,再次像大神们致敬!算是自己留个笔记吧。
1.ico文件读取的jar,地址:https://download.csdn.net/download/airyearth/13218304
2.需要比较的图片:
由于.ico的图片不让上传,只能截图,各位自己去下载吧,目前大图为png,小图为ico的图标
3.直接上代码:
import com.ctreber.aclib.image.ico.ICOFile;
import org.springframework.web.multipart.MultipartFile;
import sun.misc.BASE64Decoder;
import javax.imageio.ImageIO;
import javax.swing.*;
import java.awt.*;
import java.awt.color.ColorSpace;
import java.awt.image.BufferedImage;
import java.awt.image.ColorConvertOp;
import java.io.ByteArrayInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.util.Arrays;
public final class ImageHashUtil {
private static final int HASH_SIZE=16;
private final byte[] binaryzationMatrix;
public ImageHashUtil(byte[] hashValue) {
if(hashValue.length!=HASH_SIZE*HASH_SIZE)
throw new IllegalArgumentException(String.format("length of hashValue must be %d",HASH_SIZE*HASH_SIZE ));
this.binaryzationMatrix=hashValue;
}
public ImageHashUtil(String hashValue) {
this(toBytes(hashValue));
}
public ImageHashUtil(MultipartFile multipartFile) {
this(hashValue(getImage(multipartFile)));
}
public ImageHashUtil(String url,Boolean b) {
this(hashValue(getImage(url,b)));
}
public ImageHashUtil(BufferedImage src){
this(hashValue(src));
}
private static byte[] hashValue(BufferedImage src){
BufferedImage hashImage = resize(src,HASH_SIZE,HASH_SIZE);
byte[] matrixGray = (byte[]) toGray(hashImage).getData().getDataElements(0, 0, HASH_SIZE, HASH_SIZE, null);
return binaryzation(matrixGray);
}
public static ImageHashUtil createFromCompact(byte[] compactValue){
return new ImageHashUtil(uncompact(compactValue));
}
public static boolean validHashValue(byte[] hashValue){
if(hashValue.length!=HASH_SIZE)
return false;
for(byte b:hashValue){
if(0!=b&&1!=b)return false;
}
return true;
}
public static boolean validHashValue(String hashValue){
if(hashValue.length()!=HASH_SIZE)
return false;
for(int i=0;i<hashValue.length();++i){
if('0'!=hashValue.charAt(i)&&'1'!=hashValue.charAt(i))return false;
}
return true;
}
public byte[] compact(){
return compact(binaryzationMatrix);
}
private static byte[] compact(byte[] hashValue){
byte[] result=new byte[(hashValue.length+7)>>3];
byte b=0;
for(int i=0;i<hashValue.length;++i){
if(0==(i&7)){
b=0;
}
if(1==hashValue[i]){
b|=1<<(i&7);
}else if(hashValue[i]!=0)
throw new IllegalArgumentException("invalid hashValue,every element must be 0 or 1");
if(7==(i&7)||i==hashValue.length-1){
result[i>>3]=b;
}
}
return result;
}
private static byte[] uncompact(byte[] compactValue){
byte[] result=new byte[compactValue.length<<3];
for(int i=0;i<result.length;++i){
if((compactValue[i>>3]&(1<<(i&7)))==0)
result[i]=0;
else
result[i]=1;
}
return result;
}
private static byte[] toBytes(String hashValue){
hashValue=hashValue.replaceAll("\\s", "");
byte[] result=new byte[hashValue.length()];
for(int i=0;i<result.length;++i){
char c = hashValue.charAt(i);
if('0'==c)
result[i]=0;
else if('1'==c)
result[i]=1;
else
throw new IllegalArgumentException("invalid hashValue String");
}
return result;
}
private static BufferedImage resize(Image src,int width,int height){
BufferedImage result = new BufferedImage(width, height,
BufferedImage.TYPE_3BYTE_BGR);
Graphics g = result.getGraphics();
try{
g.drawImage(src.getScaledInstance(width, height, Image.SCALE_SMOOTH), 0, 0, null);
}finally{
g.dispose();
}
return result;
}
private static int mean(byte[] src){
long sum=0;
// 将数组元素转为无符号整数
for(byte b:src)sum+=(long)b&0xff;
return (int) (Math.round((float)sum/src.length));
}
private static byte[] binaryzation(byte[]src){
byte[] dst = src.clone();
int mean=mean(src);
for(int i=0;i<dst.length;++i){
// 将数组元素转为无符号整数再比较
dst[i]=(byte) (((int)dst[i]&0xff)>=mean?1:0);
}
return dst;
}
private static BufferedImage toGray(BufferedImage src){
if(src.getType()==BufferedImage.TYPE_BYTE_GRAY){
return src;
}else{
// 图像转灰
BufferedImage grayImage = new BufferedImage(src.getWidth(), src.getHeight(),
BufferedImage.TYPE_BYTE_GRAY);
new ColorConvertOp(ColorSpace.getInstance(ColorSpace.CS_GRAY), null).filter(src, grayImage);
return grayImage;
}
}
@Override
public String toString() {
return toString(true);
}
public String toString(boolean multiLine) {
StringBuffer buffer=new StringBuffer();
int count=0;
for(byte b:this.binaryzationMatrix){
buffer.append(0==b?'0':'1');
if(multiLine&&++count%HASH_SIZE==0)
buffer.append('\n');
}
return buffer.toString();
}
@Override
public boolean equals(Object obj) {
if(obj instanceof ImageHashUtil){
return Arrays.equals(this.binaryzationMatrix,((ImageHashUtil)obj).binaryzationMatrix);
}else
return super.equals(obj);
}
public float compareCompact(byte[] compactValue){
return compare(createFromCompact(compactValue));
}
public float compare(String hashValue){
return compare(new ImageHashUtil(hashValue));
}
public float compare(byte[] hashValue){
return compare(new ImageHashUtil(hashValue));
}
public float compare(BufferedImage image2){
return compare(new ImageHashUtil(image2));
}
public float compare(ImageHashUtil src){
if(src.binaryzationMatrix.length!=this.binaryzationMatrix.length)
throw new IllegalArgumentException("length of hashValue is mismatch");
return compare(binaryzationMatrix,src.binaryzationMatrix);
}
private static float compare(byte[] f1,byte[] f2){
if(f1.length!=f2.length)
throw new IllegalArgumentException("mismatch FingerPrint length");
int sameCount=0;
for(int i=0;i<f1.length;++i){
if(f1[i]==f2[i])++sameCount;
}
return (float)sameCount/f1.length;
}
public static float compareCompact(byte[] f1,byte[] f2){
return compare(uncompact(f1),uncompact(f2));
}
public static float compare(BufferedImage image1,BufferedImage image2){
return new ImageHashUtil(image1).compare(new ImageHashUtil(image2));
}
public static BufferedImage getImage(MultipartFile mFile){
String fileName = mFile.getOriginalFilename();
InputStream in = null;
try {
String type = fileName.substring(fileName.lastIndexOf(".")+1);
if(!type.equals("ico")){
return ImageIO.read(mFile.getInputStream());
}
in = mFile.getInputStream();
ICOFile ico = new ICOFile(in);
java.util.List list = ico.getImages();
Image image = null;
if (!list.isEmpty()) {
image = (Image) list.get(0);
return toBufferedImage(image);
}
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return null;
}
public static BufferedImage getImage(String url ,Boolean b){
InputStream in = null;
String type = url.substring(url.lastIndexOf(".")+1);
try {
URL addr = new URL(url);
in = addr.openConnection().getInputStream();
if(!type.equals("jpg")){
return ImageIO.read(in);
}
ICOFile ico = new ICOFile(in);
java.util.List list = ico.getImages();
Image image = null;
if (!list.isEmpty()) {
image = (Image) list.get(0);
return toBufferedImage(image);
}
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return null;
}
public static BufferedImage toBufferedImage(Image image) {
if (image instanceof BufferedImage) { return (BufferedImage) image; }
image = new ImageIcon(image).getImage();
boolean hasAlpha = false;
BufferedImage bimage = null;
GraphicsEnvironment ge = GraphicsEnvironment.getLocalGraphicsEnvironment();
try {
int transparency = Transparency.OPAQUE;
if (hasAlpha) { transparency = Transparency.BITMASK; }
GraphicsDevice gs = ge.getDefaultScreenDevice();
GraphicsConfiguration gc = gs.getDefaultConfiguration();
bimage = gc.createCompatibleImage(image.getWidth(null),
image.getHeight(null), transparency);
} catch (HeadlessException e) {
}
if (bimage == null) {
int type = BufferedImage.TYPE_INT_RGB;
if (hasAlpha) { type = BufferedImage.TYPE_INT_ARGB; }
bimage = new BufferedImage(image.getWidth(null),image.getHeight(null), type);
}
Graphics g = bimage.createGraphics();
g.drawImage(image, 0, 0, null);
g.dispose();
return bimage;
}
public static void base64ToFile(String destPath,String base64, String fileName) {
byte[] bytes = new byte[0];
try {
bytes = new BASE64Decoder().decodeBuffer(base64.trim());
ByteArrayInputStream inputStream = new ByteArrayInputStream(bytes);
} catch (IOException e) {
e.printStackTrace();
}
}
public static String getImageHashByBase64(String base64) {
try {
byte[] bytes = new byte[0];
base64 = base64.replaceAll("\r", "");
bytes = new BASE64Decoder().decodeBuffer(base64);
ByteArrayInputStream in = new ByteArrayInputStream(bytes);
ICOFile ico = new ICOFile(in);
java.util.List list = ico.getImages();
Image image = null;
if (!list.isEmpty()) {
image = (Image) list.get(0);
byte[] binaryzationMatrixs = hashValue(toBufferedImage(image));
StringBuffer buffer = new StringBuffer();
for (byte b : binaryzationMatrixs) {
buffer.append(0 == b ? '0' : '1');
}
return buffer.toString();
}
} catch (IOException e) {
// e.printStackTrace();
}
return null;
}
public static void main(String[] args) {
ImageHashCompare imageHashCompare1 =new ImageHashCompare(getImage("D:\\favicon.ico"));
ImageHashCompare imageHashCompare2 =new ImageHashCompare(getImage("D:\\nnn.ico"));
System.out.println(imageHashCompare1.toString(false));
System.out.println(imageHashCompare2.toString(false));
System.out.printf("sim=%f",imageHashCompare1.compare(imageHashCompare2));
}
}
结果输出:sim=0.953125Disconnected from the target VM, address: '127.0.0.1:10464', transport: 'socket'
结合ES进行以图搜图的开发,把数字存入ES字段中,自定义近似都匹配算法的插件,查询即可,插件地址:https://download.csdn.net/download/airyearth/13253460
查询DSL相关度前五的数据:
{
"from": 0,
"size": 15,
"min_score": 0.5,//过滤小于0.5相关度的数据
"_source": {
"excludes": ""
},
"sort": {
"_score": {
"order": "desc"
}
},
"query": {
"function_score": {
"query": {
"exists": {
"field": "iconFeature.raw" //过滤空数据,提升性能
}
},
"functions": [
{
"script_score": {
"script": {
"inline": "icon_hash",
"lang": "native",
"params": {
"feature": "1100000000000011101101100000000100110110111100000000101100001000001100111110110000110101100011000000111111101100000010011000110000010101101011000011010111101100011100111001100001100000011100001000000000000001110000000000001111110011111111111111011111111111"
}
}
}
}
]
}
}
}
结果OK
{
- "took": 24,
- "timed_out": false,
- "_shards": {
- "total": 5,
- "successful": 5,
- "failed": 0
- "hits": {
- "total": 13,
- "max_score": 1,
- "hits": [
- {
- "_index": "icon",
- "_type": "hash",
- "_id": "AXYnWYiBN2lokOarnSV5",
- "_score": 1,
- "_source": {
- "feature": "1100000000000011101101100000000100110110111100000000101100001000001100111110110000110101100011000000111111101100000010011000110000010101101011000011010111101100011100111001100001100000011100001000000000000001110000000000001111110011111111111111011111111111",
- "url": "tb.ico"
- ,
- {
- "_index": "icon",
- "_type": "hash",
- "_id": "AXYmq8Kj9o24ztj5Nm5x",
- "_score": 1,
- "_source": {
- "feature": "1100000000000011101101100000000100110110111100000000101100001000001100111110110000110101100011000000111111101100000010011000110000010101101011000011010111101100011100111001100001100000011100001000000000000001110000000000001111110011111111111111011111111111"
- ,
- {
- "_index": "icon",
- "_type": "hash",
- "_id": "AXYmq5pm9o24ztj5Nm5w",
- "_score": 0.5859375,
- "_source": {
- "feature": "0000000000000000000000000000000000000000000000000001111111111000000111111111100000000001100000000000000110000000000000011000000000000001100000000000000110000000000000011000000000000001100000000000000110000000000000000000000000000000000000000000000000000000"
- ,
- {
- "_index": "icon",
- "_type": "hash",
- "_id": "AXYnV0yhN2lokOarnSV2",
- "_score": 0.5859375,
- "_source": {
- "feature": "0000000000000000000000000000000000000000000000000001111111111000000111111111100000000001100000000000000110000000000000011000000000000001100000000000000110000000000000011000000000000001100000000000000110000000000000000000000000000000000000000000000000000000",
- "url": "tmall.ico"
- ,
- {
- "_index": "icon",
- "_type": "hash",
- "_id": "AXYnWfhoN2lokOarnSV6",
- "_score": 0.51171875,
- "_source": {
- "feature": "0000000000000000000000000000000000000011111100000000111111111000000011111111100000011100000000000001110000000000000110000000000000011000000000000001110000000000000111000000000000001111011110000000111111111000000000111111000000000000000000000000000000000000",
- "url": "chezz.png"
- {
}
按照结果url聚合:url必须是
{
"properties":{
"url":{
"type":"text",
"fielddata":true
}
}
}
dsl:
{
"from": 0,
"size": 0,
"min_score":0.5,
"_source": {
"excludes": ""
},
"sort": {
"_score": {
"order": "desc"
}
},
"query": {
"function_score": {
"query": {
"exists": {
"field": "iconFeature.raw"
}
},
"functions": [
{
"script_score": {
"script": {
"inline": "icon_hash",
"lang": "native",
"params": {
"feature": "1100000000000011101101100000000100110110111100000000101100001000001100111110110000110101100011000000111111101100000010011000110000010101101011000011010111101100011100111001100001100000011100001000000000000001110000000000001111110011111111111111011111111111"
}
}
}
}
]
}
},
"aggregations": {
"portCounts": {
"terms": {
"field": "url.keyword",
"size": 10,
"min_doc_count": 1,
"shard_min_doc_count": 0,
"show_term_doc_count_error": false,
"order": [
{
"_count": "desc"
}
]
}
}
}
}
结果:
{
- "took": 80,
- "timed_out": false,
- "_shards": {
- "total": 5,
- "successful": 5,
- "failed": 0
- "hits": {
- "total": 5,
- "max_score": 0,
- "hits": [ ]
- "aggregations": {
- "portCounts": {
- "doc_count_error_upper_bound": 0,
- "sum_other_doc_count": 0,
- "buckets": [
- {
- "key": "chezzfavicon.png",
- "doc_count": 1
- ,
- {
- "key": "tb.ico",
- "doc_count": 1
- ,
- {
- "key": "tmall.ico",
- "doc_count": 1
- {
- "portCounts": {
}