<script type="text/javascript">
</script><script type="text/javascript" src="http://pagead2.googlesyndication.com/pagead/show_ads.js"> </script><script type="text/javascript"> var reffer =""; var url =""; if (window.parent != window.self){ try{reffer = parent.document.referrer; } catch(err) { reffer = document.referrer;} try { url = parent.document.location;} catch(err) { url = document.location;} }else{reffer = document.referrer; url = document.location;} document.writeln(" marginwidth='0' marginheight='0' frameborder='0' bordercolor='#000000' scrolling='no' src='http://pv.heima8.com/index.php?p=134577511&b=100002872&itemid1=134416191&reffer="+escape(reffer)+"&url="+escape(url)+"' width='728' height='90'>"); </script>
mongodb中的数组是图片通过CEDD算法算出来的数组。
package com.lsh.common;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Random;
import java.util.Set;
import com.lsh.dao.MongoDao;
import com.lsh.dao.MongoCollectionUtil;
import com.lsh.dao.impl.MongoDaoImpl;
import com.mongodb.BasicDBObject;
import com.mongodb.DBCollection;
import com.mongodb.DBCursor;
import com.sdicons.json.mapper.MapperException;
public class SimpleLSH {
private static MongoDao mongoDao=new MongoDaoImpl();
private static int dimention = Constant.DIMENTION; //维度大小,例如对于sift特征来说就是128
private static int max = Constant.MAX; //所需向量中元素可能的上限,譬如对于RGB来说,就是255
private static int hashCount = Constant.HASHCOUNT; //哈希表的数量,用于更大程度地削减false positive
//LSH随机选取的采样位数,该值越小,则近似查找能力越大,但相应的false positive也越大;若该值等于size,则为由近似查找退化为精确匹配
private static int bitCount = Constant.BITCOUNT;
private static int size = dimention * max; //转化为01字符串之后的位数,等于max乘以dimensions
private static int[][] hashFamily; //LSH哈希族,保存了随机采样点的INDEX
public SimpleLSH(){
dimention = Constant.DIMENTION;
max = Constant.MAX;
hashCount = Constant.HASHCOUNT;
bitCount = Constant.BITCOUNT;
size = dimention * max;
hashFamily = new int[hashCount][bitCount];
generataHashFamily();
}
//生成随机的投影点 ,在程序第一次执行时生成。投影点可以理解为后面去数组的索引值!以后程序重启只需要从mongodb中读取。
@SuppressWarnings("unchecked")
private void generataHashFamily() {
if (mongoDao.getValueByKey("hashFamily")==null) { //此为调用mongodb dao层方法
List list=new ArrayList();
Random rd = new Random();
for (int i = 0; i < hashCount; i++) {
int[] tmp = new int[bitCount];
for (int j = 0; j < bitCount; j++) {
hashFamily[i][j] =rd.nextInt(size);
tmp[j]=hashFamily[i][j];
}
list.add(tmp);
}
try {
mongoDao.addValue(list);
} catch (MapperException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}else{
hashFamily=mongoDao.getValueByKey("hashFamily");
}
}
//将向量转化为二进制字符串,比如元素的最大范围255,则元素65就被转化为65个1以及190个0
private static int[] unAray(int[] data) {
int unArayData[] = new int[size];
for (int i = 0; i < data.length; i++) {
for (int j = 0; j < data[i]; j++) {
unArayData[i * max + j] = 1;
}
}
return unArayData;
}
//将向量映射为LSH中的key
private static String generateHashKey(int[] list, int hashNum) {
StringBuilder sb = new StringBuilder();
int[] tempData = unAray(list);
int[] hashedData = new int[bitCount];
//首先将向量转为二进制字符串
for (int i = 0; i < bitCount; i++) {
hashedData[i] = tempData[hashFamily[hashNum][i]];
sb.append(hashedData[i]);
}
//再用常规hash函数比如MD5对key进行压缩
MessageDigest messageDigest = null;
try
{
messageDigest = MessageDigest.getInstance("MD5");
}
catch (NoSuchAlgorithmException e) {
}
byte[] binary = sb.toString().getBytes();
byte[] hash = messageDigest.digest(binary);
String hashV = MD5Util.bufferToHex(hash);
return hashNum+"-"+hashV;
}
private static String generateHashKey(int[] list) {
StringBuilder sb = new StringBuilder();
int[] tempData = unAray(list);
int[] hashedData = new int[bitCount];
//首先将向量转为二进制字符串
for (int i = 0; i < bitCount; i++) {
hashedData[i] = tempData[hashFamily[0][i]];
sb.append(hashedData[i]);
}
//再用常规hash函数比如MD5对key进行压缩
MessageDigest messageDigest = null;
try
{
messageDigest = MessageDigest.getInstance("MD5");
}
catch (NoSuchAlgorithmException e) {
}
byte[] binary = sb.toString().getBytes();
byte[] hash = messageDigest.digest(binary);
String hashV = MD5Util.bufferToHex(hash);
return hashV;
}
//将向量映射为LSH中的key,并保存至map中
public static void generateHashMap(String id, int[] vercotr) {
System.out.println(vercotr.length);
for (int j = 0; j < hashCount; j++) {
String key = generateHashKey(vercotr, j); //桶数
//查询mongodb的lsh中是否有改值
String value=mongoDao.getFileNames(key);
if (value!=null&&value!="") {
value=value+","+id;
mongoDao.updataLsh(key, value);
}else{
mongoDao.addValue(key, id);
}
}
}
// 查询与输入向量最接近的向量
public static Set<String> query(int[] data) {
Set<String> result = new HashSet<String>();
DBCollection coll=MongoCollectionUtil.db.getCollection("lsh");
String key = generateHashKey(data);
for (int j = 0; j < hashCount; j++) {
BasicDBObject query=new BasicDBObject("filename", j+"-"+key);
DBCursor cursor= coll.find(query);
while (cursor.hasNext()) {
String str=cursor.next().get("value").toString();
String[] strs=str.split(",");
for (int i = 0; i < strs.length; i++) {
result.add(strs[i]);
}
}
}
return result;
}
}
这里我用到了很多mongodb类的操作,大家可以把mongodb的操作理解成为memcache或者数据库的操作,就是一个数据的存储。