java实现lsh_Java实现LSH(Locality Sensitive Hash )

这是一个Java实现的LSH(Locality Sensitive Hashing)类,用于将特征点转换为哈希并存储。通过生成随机投影点进行哈希,使用MD5对哈希键进行压缩,并在数据库中操作哈希表进行数据存储和查询。
摘要由CSDN通过智能技术生成

1 packagecom.demo.lsh;2

3 importcom.demo.config.Constant;4 importcom.demo.dao.FeatureDao;5 importcom.demo.dao.FeatureTableDao;6 importcom.demo.dao.HashTableDao;7 importcom.demo.entity.HashTable;8 importcom.demo.utils.MD5Util;9 importcom.demo.utils.MathUtil;10 importorg.opencv.core.Mat;11 importorg.springframework.util.StringUtils;12

13 import java.io.*;14 importjava.security.MessageDigest;15 importjava.security.NoSuchAlgorithmException;16 import java.util.*;17

18 public classLSH {19 //维度大小,例如对于sift特征来说就是128

20 private int dimention =Constant.DIMENTION;21 //所需向量中元素可能的上限,譬如对于RGB来说,就是255

22 private int max =Constant.MAX;23 //哈希表的数量,用于更大程度地削减false positive

24 private int hashCount =Constant.HASHCOUNT;25 //LSH随机选取的采样位数,该值越小,则近似查找能力越大,但相应的false positive也越大;若该值等于size,则为由近似查找退化为精确匹配

26 private int bitCount =Constant.BITCOUNT;27 //转化为01字符串之后的位数,等于max乘以dimensions

28 private int size = dimention *max;29 //LSH哈希族,保存了随机采样点的INDEX

30 private int[][] hashFamily;31 privateHashTableDao hashTableDao;32 /**

33 * 构造函数34 */

35 publicLSH(HashTableDao hashTableDao) {36 this.hashTableDao =hashTableDao;37 dimention =Constant.DIMENTION;38 max =Constant.MAX;39 hashCount =Constant.HASHCOUNT;40 bitCount =Constant.BITCOUNT;41 size = dimention *max;42 hashFamily = new int[hashCount][bitCount];43 generataHashFamily();44 }45

46 /**

47 * 生成随机的投影点 ,在程序第一次执行时生成。投影点可以理解为后面去数组的索引值48 */

49 private voidgenerataHashFamily() {50 if (new File("/home/fanxuan/data/1.txt").exists()) {51 try{52 InputStream in = new FileInputStream("/home/fanxuan/data/1.txt");53 ObjectInputStream oin = newObjectInputStream(in);54 hashFamily = (int[][]) (oin.readObject());55 } catch(IOException e) {56 e.printStackTrace();57 } catch(ClassNotFoundException e) {58 e.printStackTrace();59 }60 }else{61 Random rd = newRandom();62 for (int i = 0; i < hashCount; i++) {63 for (int j = 0; j < bitCount; j++) {64 hashFamily[i][j] =rd.nextInt(size);65 }66 }67 try{68 OutputStream out = new FileOutputStream("/home/fanxuan/data/1.txt");69 ObjectOutputStream oout = newObjectOutputStream(out);70 oout.writeObject(hashFamily);71 } catch(FileNotFoundException e) {72 e.printStackTrace();73 } catch(IOException e) {74 e.printStackTrace();75 }76 }77 }78

79 //将向量转化为二进制字符串,比如元素的最大范围255,则元素65就被转化为65个1以及190个0

80 private int[] unAray(int[] data) {81 int unArayData[] = new int[size];82 for (int i = 0; i < data.length; i++) {83 for (int j = 0; j < data[i]; j++) {84 unArayData[i * max + j] = 1;85 }86 }87 returnunArayData;88 }89

90 /**

91 * 将向量映射为LSH中的key92 */

93 private String generateHashKey(int[] list, inthashNum) {94 StringBuilder sb = newStringBuilder();95 int[] tempData =unAray(list);96 int[] hashedData = new int[bitCount];97 //首先将向量转为二进制字符串

98 for (int i = 0; i < bitCount; i++) {99 hashedData[i] =tempData[hashFamily[hashNum][i]];100 sb.append(hashedData[i]);101 }102 //再用常规hash函数比如MD5对key进行压缩

103 MessageDigest messageDigest = null;104 try{105 messageDigest = MessageDigest.getInstance("MD5");106 }catch(NoSuchAlgorithmException e) {107

108 }109 byte[] binary =sb.toString().getBytes();110 byte[] hash =messageDigest.digest(binary);111 String hashV =MD5Util.bufferToHex(hash);112 returnhashV;113 }114

115 /**

116 * 将Sift特征点转换为Hash存表117 */

118 public void generateHashMap(String id, int[] vercotr, intfeatureId) {119 for (int j = 0; j < hashCount; j++) {120 String key =generateHashKey(vercotr, j);121 HashTable hashTableUpdateOrAdd = newHashTable();122 HashTable hashTable =hashTableDao.findHashTableByBucketId(key);123 if (hashTable != null) {124 String featureIdValue = hashTable.getFeatureId() + "," +featureId;125 hashTableUpdateOrAdd.setFeatureId(featureIdValue);126 hashTableUpdateOrAdd.setBucketId(key);127 hashTableDao.updateHashTableFeatureId(hashTableUpdateOrAdd);128 } else{129 hashTableUpdateOrAdd.setBucketId(key);130 hashTableUpdateOrAdd.setFeatureId(String.valueOf(featureId));131 hashTableDao.insertHashTable(hashTableUpdateOrAdd);132 }133 }134 }135

136 //查询与输入向量最接近(海明空间)的向量

137 public List queryList(int[] data) {138 List result = new ArrayList<>();139 for (int j = 0; j < hashCount; j++) {140 String key =generateHashKey(data, j);141 result.add(key);142 HashTable hashTable = hashTableDao.findHashTableByBucketId(key);143 if (!StringUtils.isEmpty(hashTable.getFeatureId())) {144 String[] str = hashTable.getFeatureId().split(",");145 for (String string : str) {146 result.add(string);147 }148 }

149 }150 returnresult;151 }152

153 }

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值