java读取ico文件比较图片的相似度,利用平均hash算法,图片模糊搜索算法

本文主要在于比较图片的相似度,利用平均hash算法,结果接近1就越相近,由于java读取ico文件有些坑,需要下个jar,再处理,这样就可以一起比较.jpg,.png,.ico的文件了,代码也是综合了各方大神的,再次像大神们致敬!算是自己留个笔记吧。

1.ico文件读取的jar,地址:https://download.csdn.net/download/airyearth/13218304

2.需要比较的图片:

由于.ico的图片不让上传,只能截图,各位自己去下载吧,目前大图为png,小图为ico的图标

3.直接上代码:



import com.ctreber.aclib.image.ico.ICOFile;
import org.springframework.web.multipart.MultipartFile;
import sun.misc.BASE64Decoder;

import javax.imageio.ImageIO;
import javax.swing.*;
import java.awt.*;
import java.awt.color.ColorSpace;
import java.awt.image.BufferedImage;
import java.awt.image.ColorConvertOp;
import java.io.ByteArrayInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.util.Arrays;


public final class ImageHashUtil {

    private static final int HASH_SIZE=16;

    private final byte[] binaryzationMatrix;

    public ImageHashUtil(byte[] hashValue) {
        if(hashValue.length!=HASH_SIZE*HASH_SIZE)
            throw new IllegalArgumentException(String.format("length of hashValue must be %d",HASH_SIZE*HASH_SIZE ));
        this.binaryzationMatrix=hashValue;
    }
    public ImageHashUtil(String hashValue) {
        this(toBytes(hashValue));
    }

    public ImageHashUtil(MultipartFile multipartFile) {
       this(hashValue(getImage(multipartFile)));
    }
    public ImageHashUtil(String url,Boolean b) {
       this(hashValue(getImage(url,b)));
    }

    public ImageHashUtil(BufferedImage src){
        this(hashValue(src));
    }
    private static byte[] hashValue(BufferedImage src){
        BufferedImage hashImage = resize(src,HASH_SIZE,HASH_SIZE);
        byte[] matrixGray = (byte[]) toGray(hashImage).getData().getDataElements(0, 0, HASH_SIZE, HASH_SIZE, null);
        return  binaryzation(matrixGray);
    }

    public static ImageHashUtil createFromCompact(byte[] compactValue){
        return new ImageHashUtil(uncompact(compactValue));
    }

    public static boolean validHashValue(byte[] hashValue){
        if(hashValue.length!=HASH_SIZE)
            return false;
        for(byte b:hashValue){
            if(0!=b&&1!=b)return false;
        }
        return true;
    }
    public static boolean validHashValue(String hashValue){
        if(hashValue.length()!=HASH_SIZE)
            return false;
        for(int i=0;i<hashValue.length();++i){
            if('0'!=hashValue.charAt(i)&&'1'!=hashValue.charAt(i))return false;
        }
        return true;
    }
    public byte[] compact(){
        return compact(binaryzationMatrix);
    }

    private static byte[] compact(byte[] hashValue){
        byte[] result=new byte[(hashValue.length+7)>>3];
        byte b=0;
        for(int i=0;i<hashValue.length;++i){
            if(0==(i&7)){
                b=0;
            }
            if(1==hashValue[i]){
                b|=1<<(i&7);
            }else if(hashValue[i]!=0)
                throw new IllegalArgumentException("invalid hashValue,every element must be 0 or 1");
            if(7==(i&7)||i==hashValue.length-1){
                result[i>>3]=b;
            }
        }
        return result;
    }
    private static byte[] uncompact(byte[] compactValue){
        byte[] result=new byte[compactValue.length<<3];
        for(int i=0;i<result.length;++i){
            if((compactValue[i>>3]&(1<<(i&7)))==0)
                result[i]=0;
            else
                result[i]=1;
        }
        return result;
    }

    private static byte[] toBytes(String hashValue){
        hashValue=hashValue.replaceAll("\\s", "");
        byte[] result=new byte[hashValue.length()];
        for(int i=0;i<result.length;++i){
            char c = hashValue.charAt(i);
            if('0'==c)
                result[i]=0;
            else if('1'==c)
                result[i]=1;
            else
                throw new IllegalArgumentException("invalid hashValue String");
        }
        return result;
    }

    private static BufferedImage resize(Image src,int width,int height){
        BufferedImage result = new BufferedImage(width, height,
                BufferedImage.TYPE_3BYTE_BGR);
        Graphics g = result.getGraphics();
        try{
            g.drawImage(src.getScaledInstance(width, height, Image.SCALE_SMOOTH), 0, 0, null);
        }finally{
            g.dispose();
        }
        return result;
    }

    private static  int mean(byte[] src){
        long sum=0;
        // 将数组元素转为无符号整数
        for(byte b:src)sum+=(long)b&0xff;
        return (int) (Math.round((float)sum/src.length));
    }

    private static byte[] binaryzation(byte[]src){
        byte[] dst = src.clone();
        int mean=mean(src);
        for(int i=0;i<dst.length;++i){
            // 将数组元素转为无符号整数再比较
            dst[i]=(byte) (((int)dst[i]&0xff)>=mean?1:0);
        }
        return dst;

    }

    private static BufferedImage toGray(BufferedImage src){
        if(src.getType()==BufferedImage.TYPE_BYTE_GRAY){
            return src;
        }else{
            // 图像转灰
            BufferedImage grayImage = new BufferedImage(src.getWidth(), src.getHeight(),
                    BufferedImage.TYPE_BYTE_GRAY);
            new ColorConvertOp(ColorSpace.getInstance(ColorSpace.CS_GRAY), null).filter(src, grayImage);
            return grayImage;
        }
    }

    @Override
    public String toString() {
        return toString(true);
    }

    public String toString(boolean multiLine) {
        StringBuffer buffer=new StringBuffer();
        int count=0;
        for(byte b:this.binaryzationMatrix){
            buffer.append(0==b?'0':'1');
            if(multiLine&&++count%HASH_SIZE==0)
                buffer.append('\n');
        }
        return buffer.toString();
    }
    @Override
    public boolean equals(Object obj) {
        if(obj instanceof ImageHashUtil){
            return Arrays.equals(this.binaryzationMatrix,((ImageHashUtil)obj).binaryzationMatrix);
        }else
            return super.equals(obj);
    }

    public float compareCompact(byte[] compactValue){
        return compare(createFromCompact(compactValue));
    }

    public float compare(String hashValue){
        return compare(new ImageHashUtil(hashValue));
    }

    public float compare(byte[] hashValue){
        return compare(new ImageHashUtil(hashValue));
    }

    public float compare(BufferedImage image2){
        return compare(new ImageHashUtil(image2));
    }

    public float compare(ImageHashUtil src){
        if(src.binaryzationMatrix.length!=this.binaryzationMatrix.length)
            throw new IllegalArgumentException("length of hashValue is mismatch");
        return compare(binaryzationMatrix,src.binaryzationMatrix);
    }

    private static float compare(byte[] f1,byte[] f2){
        if(f1.length!=f2.length)
            throw new IllegalArgumentException("mismatch FingerPrint length");
        int sameCount=0;
        for(int i=0;i<f1.length;++i){
            if(f1[i]==f2[i])++sameCount;
        }
        return (float)sameCount/f1.length;
    }
    public static float compareCompact(byte[] f1,byte[] f2){
        return compare(uncompact(f1),uncompact(f2));
    }
    public static float compare(BufferedImage image1,BufferedImage image2){
        return new ImageHashUtil(image1).compare(new ImageHashUtil(image2));
    }


    public static BufferedImage getImage(MultipartFile mFile){
        String fileName = mFile.getOriginalFilename();
        InputStream in = null;
        try {
            String type = fileName.substring(fileName.lastIndexOf(".")+1);
            if(!type.equals("ico")){

                return ImageIO.read(mFile.getInputStream());
            }
            in = mFile.getInputStream();
            ICOFile ico = new ICOFile(in);
            java.util.List list = ico.getImages();
            Image image = null;
            if (!list.isEmpty()) {
                image = (Image) list.get(0);
                return toBufferedImage(image);
            }
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
            return null;
    }
public static BufferedImage getImage(String url ,Boolean b){

        InputStream in = null;

        String type = url.substring(url.lastIndexOf(".")+1);

        try {
            URL addr = new URL(url);

            in = addr.openConnection().getInputStream();


            if(!type.equals("jpg")){

                return ImageIO.read(in);
            }

            ICOFile ico = new ICOFile(in);
            java.util.List list = ico.getImages();
            Image image = null;
            if (!list.isEmpty()) {
                image = (Image) list.get(0);
                return toBufferedImage(image);
            }
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
            return null;
    }


    public static BufferedImage toBufferedImage(Image image) {
        if (image instanceof BufferedImage) { return (BufferedImage) image; }

        image = new ImageIcon(image).getImage();

        boolean hasAlpha = false;

        BufferedImage bimage = null;

        GraphicsEnvironment ge = GraphicsEnvironment.getLocalGraphicsEnvironment();

        try {

            int transparency = Transparency.OPAQUE;

            if (hasAlpha) { transparency = Transparency.BITMASK; }

            GraphicsDevice gs = ge.getDefaultScreenDevice();

            GraphicsConfiguration gc = gs.getDefaultConfiguration();

            bimage = gc.createCompatibleImage(image.getWidth(null),

                    image.getHeight(null), transparency);

        } catch (HeadlessException e) {

        }

        if (bimage == null) {

            int type = BufferedImage.TYPE_INT_RGB;

            if (hasAlpha) { type = BufferedImage.TYPE_INT_ARGB; }

            bimage = new BufferedImage(image.getWidth(null),image.getHeight(null), type);

        }

        Graphics g = bimage.createGraphics();

        g.drawImage(image, 0, 0, null);

        g.dispose();

        return bimage;

    }

    public static void base64ToFile(String destPath,String base64, String fileName) {
        byte[] bytes = new byte[0];
        try {
            bytes = new BASE64Decoder().decodeBuffer(base64.trim());
            ByteArrayInputStream inputStream = new ByteArrayInputStream(bytes);
        } catch (IOException e) {
            e.printStackTrace();
        }

    }
    public static String  getImageHashByBase64(String base64) {
        try {

            byte[] bytes = new byte[0];
            base64 = base64.replaceAll("\r", "");
            bytes = new BASE64Decoder().decodeBuffer(base64);
            ByteArrayInputStream in = new ByteArrayInputStream(bytes);
            ICOFile ico = new ICOFile(in);
            java.util.List list = ico.getImages();
            Image image = null;
            if (!list.isEmpty()) {
                image = (Image) list.get(0);
                byte[] binaryzationMatrixs = hashValue(toBufferedImage(image));
                StringBuffer buffer = new StringBuffer();

                for (byte b : binaryzationMatrixs) {
                    buffer.append(0 == b ? '0' : '1');
                }
                return buffer.toString();

            }
        } catch (IOException e) {
//            e.printStackTrace();

        }
        return null;
    }
public static void main(String[] args) {

        ImageHashCompare imageHashCompare1 =new ImageHashCompare(getImage("D:\\favicon.ico"));
        ImageHashCompare imageHashCompare2 =new ImageHashCompare(getImage("D:\\nnn.ico"));
            System.out.println(imageHashCompare1.toString(false));
            System.out.println(imageHashCompare2.toString(false));
            System.out.printf("sim=%f",imageHashCompare1.compare(imageHashCompare2));

    }
}


结果输出:sim=0.953125Disconnected from the target VM, address: '127.0.0.1:10464', transport: 'socket'

结合ES进行以图搜图的开发,把数字存入ES字段中,自定义近似都匹配算法的插件,查询即可,插件地址:https://download.csdn.net/download/airyearth/13253460

查询DSL相关度前五的数据:

{
  "from": 0,
  "size": 15,
  "min_score": 0.5,//过滤小于0.5相关度的数据
  "_source": {
    "excludes": ""
  },
  "sort": {
    "_score": {
      "order": "desc"
    }
  },
  "query": {
    "function_score": {
      "query": {
        "exists": {
            "field": "iconFeature.raw" //过滤空数据,提升性能
          }

      },
      "functions": [
        {
          "script_score": {
            "script": {
              "inline": "icon_hash",
              "lang": "native",
              "params": {
                "feature": "1100000000000011101101100000000100110110111100000000101100001000001100111110110000110101100011000000111111101100000010011000110000010101101011000011010111101100011100111001100001100000011100001000000000000001110000000000001111110011111111111111011111111111"
              }
            }
          }
        }
      ]
    }
  }
}

结果OK

{

  • "took": 24,
  • "timed_out": false,
  • "_shards": {
    • "total": 5,
    • "successful": 5,
    • "failed": 0
    },
  • "hits": {
    • "total": 13,
    • "max_score": 1,
    • "hits": [
      • {
        • "_index": "icon",
        • "_type": "hash",
        • "_id": "AXYnWYiBN2lokOarnSV5",
        • "_score": 1,
        • "_source": {
          • "feature": "1100000000000011101101100000000100110110111100000000101100001000001100111110110000110101100011000000111111101100000010011000110000010101101011000011010111101100011100111001100001100000011100001000000000000001110000000000001111110011111111111111011111111111",
          • "url": "tb.ico"
          }
        }
      • ,
      • {
        • "_index": "icon",
        • "_type": "hash",
        • "_id": "AXYmq8Kj9o24ztj5Nm5x",
        • "_score": 1,
        • "_source": {
          • "feature": "1100000000000011101101100000000100110110111100000000101100001000001100111110110000110101100011000000111111101100000010011000110000010101101011000011010111101100011100111001100001100000011100001000000000000001110000000000001111110011111111111111011111111111"
          }
        }
      • ,
      • {
        • "_index": "icon",
        • "_type": "hash",
        • "_id": "AXYmq5pm9o24ztj5Nm5w",
        • "_score": 0.5859375,
        • "_source": {
          • "feature": "0000000000000000000000000000000000000000000000000001111111111000000111111111100000000001100000000000000110000000000000011000000000000001100000000000000110000000000000011000000000000001100000000000000110000000000000000000000000000000000000000000000000000000"
          }
        }
      • ,
      • {
        • "_index": "icon",
        • "_type": "hash",
        • "_id": "AXYnV0yhN2lokOarnSV2",
        • "_score": 0.5859375,
        • "_source": {
          • "feature": "0000000000000000000000000000000000000000000000000001111111111000000111111111100000000001100000000000000110000000000000011000000000000001100000000000000110000000000000011000000000000001100000000000000110000000000000000000000000000000000000000000000000000000",
          • "url": "tmall.ico"
          }
        }
      • ,
      • {
        • "_index": "icon",
        • "_type": "hash",
        • "_id": "AXYnWfhoN2lokOarnSV6",
        • "_score": 0.51171875,
        • "_source": {
          • "feature": "0000000000000000000000000000000000000011111100000000111111111000000011111111100000011100000000000001110000000000000110000000000000011000000000000001110000000000000111000000000000001111011110000000111111111000000000111111000000000000000000000000000000000000",
          • "url": "chezz.png"
          }
        }
      ]
    }

}

按照结果url聚合:url必须是

{
  "properties":{
    "url":{
      "type":"text",
      "fielddata":true
    }
  }
}

dsl:

{
  "from": 0,
  "size": 0,
  "min_score":0.5,
  "_source": {
    "excludes": ""
  },
  "sort": {
    "_score": {
      "order": "desc"
    }
  },
  "query": {
    "function_score": {
      "query": {
       "exists": {
            "field": "iconFeature.raw"
          }
      },
      "functions": [
        {
          "script_score": {
            "script": {
              "inline": "icon_hash",
              "lang": "native",
              "params": {
                "feature": "1100000000000011101101100000000100110110111100000000101100001000001100111110110000110101100011000000111111101100000010011000110000010101101011000011010111101100011100111001100001100000011100001000000000000001110000000000001111110011111111111111011111111111"
              }
            }
          }
        }
      ]
    }
  },
  "aggregations": {
    "portCounts": {
      "terms": {
        "field": "url.keyword",
        "size": 10,
        "min_doc_count": 1,
        "shard_min_doc_count": 0,
        "show_term_doc_count_error": false,
        "order": [
          {
            "_count": "desc"
          }
        ]
      }
    }
  }
}

结果:

{

  • "took": 80,
  • "timed_out": false,
  • "_shards": {
    • "total": 5,
    • "successful": 5,
    • "failed": 0
    },
  • "hits": {
    • "total": 5,
    • "max_score": 0,
    • "hits": [ ]
    },
  • "aggregations": {
    • "portCounts": {
      • "doc_count_error_upper_bound": 0,
      • "sum_other_doc_count": 0,
      • "buckets": [
        • {
          • "key": "chezzfavicon.png",
          • "doc_count": 1
          }
        • ,
        • {
          • "key": "tb.ico",
          • "doc_count": 1
          }
        • ,
        • {
          • "key": "tmall.ico",
          • "doc_count": 1
          }
        ]
      }
    }

}

  • 8
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 4
    评论
评论 4
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值