java实现lsh_Java实现LSH（Locality Sensitive Hash ）

最新推荐文章于 2021-03-24 11:00:08 发布

藤沙树

最新推荐文章于 2021-03-24 11:00:08 发布

阅读量307

点赞数

文章标签： java实现lsh

本文链接：https://blog.csdn.net/weixin_35846141/article/details/114097116

版权

这是一个Java实现的LSH（Locality Sensitive Hashing）类，用于将特征点转换为哈希并存储。通过生成随机投影点进行哈希，使用MD5对哈希键进行压缩，并在数据库中操作哈希表进行数据存储和查询。

摘要由CSDN通过智能技术生成

1 packagecom.demo.lsh;2

3 importcom.demo.config.Constant;4 importcom.demo.dao.FeatureDao;5 importcom.demo.dao.FeatureTableDao;6 importcom.demo.dao.HashTableDao;7 importcom.demo.entity.HashTable;8 importcom.demo.utils.MD5Util;9 importcom.demo.utils.MathUtil;10 importorg.opencv.core.Mat;11 importorg.springframework.util.StringUtils;12

13 import java.io.*;14 importjava.security.MessageDigest;15 importjava.security.NoSuchAlgorithmException;16 import java.util.*;17

18 public classLSH {19 //维度大小，例如对于sift特征来说就是128

20 private int dimention =Constant.DIMENTION;21 //所需向量中元素可能的上限，譬如对于RGB来说，就是255

22 private int max =Constant.MAX;23 //哈希表的数量，用于更大程度地削减false positive

24 private int hashCount =Constant.HASHCOUNT;25 //LSH随机选取的采样位数，该值越小，则近似查找能力越大，但相应的false positive也越大；若该值等于size，则为由近似查找退化为精确匹配

26 private int bitCount =Constant.BITCOUNT;27 //转化为01字符串之后的位数，等于max乘以dimensions

28 private int size = dimention *max;29 //LSH哈希族，保存了随机采样点的INDEX

30 private int[][] hashFamily;31 privateHashTableDao hashTableDao;32 /**

33 * 构造函数34 */

35 publicLSH(HashTableDao hashTableDao) {36 this.hashTableDao =hashTableDao;37 dimention =Constant.DIMENTION;38 max =Constant.MAX;39 hashCount =Constant.HASHCOUNT;40 bitCount =Constant.BITCOUNT;41 size = dimention *max;42 hashFamily = new int[hashCount][bitCount];43 generataHashFamily();44 }45

46 /**

47 * 生成随机的投影点，在程序第一次执行时生成。投影点可以理解为后面去数组的索引值48 */

49 private voidgenerataHashFamily() {50 if (new File("/home/fanxuan/data/1.txt").exists()) {51 try{52 InputStream in = new FileInputStream("/home/fanxuan/data/1.txt");53 ObjectInputStream oin = newObjectInputStream(in);54 hashFamily = (int[][]) (oin.readObject());55 } catch(IOException e) {56 e.printStackTrace();57 } catch(ClassNotFoundException e) {58 e.printStackTrace();59 }60 }else{61 Random rd = newRandom();62 for (int i = 0; i < hashCount; i++) {63 for (int j = 0; j < bitCount; j++) {64 hashFamily[i][j] =rd.nextInt(size);65 }66 }67 try{68 OutputStream out = new FileOutputStream("/home/fanxuan/data/1.txt");69 ObjectOutputStream oout = newObjectOutputStream(out);70 oout.writeObject(hashFamily);71 } catch(FileNotFoundException e) {72 e.printStackTrace();73 } catch(IOException e) {74 e.printStackTrace();75 }76 }77 }78

79 //将向量转化为二进制字符串，比如元素的最大范围255，则元素65就被转化为65个1以及190个0

80 private int[] unAray(int[] data) {81 int unArayData[] = new int[size];82 for (int i = 0; i < data.length; i++) {83 for (int j = 0; j < data[i]; j++) {84 unArayData[i * max + j] = 1;85 }86 }87 returnunArayData;88 }89

90 /**

91 * 将向量映射为LSH中的key92 */

93 private String generateHashKey(int[] list, inthashNum) {94 StringBuilder sb = newStringBuilder();95 int[] tempData =unAray(list);96 int[] hashedData = new int[bitCount];97 //首先将向量转为二进制字符串

98 for (int i = 0; i < bitCount; i++) {99 hashedData[i] =tempData[hashFamily[hashNum][i]];100 sb.append(hashedData[i]);101 }102 //再用常规hash函数比如MD5对key进行压缩

103 MessageDigest messageDigest = null;104 try{105 messageDigest = MessageDigest.getInstance("MD5");106 }catch(NoSuchAlgorithmException e) {107

108 }109 byte[] binary =sb.toString().getBytes();110 byte[] hash =messageDigest.digest(binary);111 String hashV =MD5Util.bufferToHex(hash);112 returnhashV;113 }114

115 /**

116 * 将Sift特征点转换为Hash存表117 */

118 public void generateHashMap(String id, int[] vercotr, intfeatureId) {119 for (int j = 0; j < hashCount; j++) {120 String key =generateHashKey(vercotr, j);121 HashTable hashTableUpdateOrAdd = newHashTable();122 HashTable hashTable =hashTableDao.findHashTableByBucketId(key);123 if (hashTable != null) {124 String featureIdValue = hashTable.getFeatureId() + "," +featureId;125 hashTableUpdateOrAdd.setFeatureId(featureIdValue);126 hashTableUpdateOrAdd.setBucketId(key);127 hashTableDao.updateHashTableFeatureId(hashTableUpdateOrAdd);128 } else{129 hashTableUpdateOrAdd.setBucketId(key);130 hashTableUpdateOrAdd.setFeatureId(String.valueOf(featureId));131 hashTableDao.insertHashTable(hashTableUpdateOrAdd);132 }133 }134 }135

136 //查询与输入向量最接近(海明空间)的向量

137 public List queryList(int[] data) {138 List result = new ArrayList<>();139 for (int j = 0; j < hashCount; j++) {140 String key =generateHashKey(data, j);141 result.add(key);142 HashTable hashTable = hashTableDao.findHashTableByBucketId(key);143 if (!StringUtils.isEmpty(hashTable.getFeatureId())) {144 String[] str = hashTable.getFeatureId().split(",");145 for (String string : str) {146 result.add(string);147 }148 }

149 }150 returnresult;151 }152

153 }