这是我之前写的一篇文章extremely fast codebook learning for landmark recognition。主要是利用Random projection trees and low dimensional manifolds这篇文章进行地标识别,论文的主要思想简单:利用random projection tree 随机选择一个方向进行投影,将投影之后的data根据阈值进行左右子树的划分,这里的思想有点类似于lsh在欧几里得空间的做法了,只不过这里的bin只有左右两个,分裂到一定深度停止(比如10),利用多颗树构成森林,消除随机性带来的不确定性,提高算法的泛化性。算法的速度非常快。在实际使用中也比其他的无监督式的做法要快的多。这里主要贴上个人写的主要c++实现代码,具体代码将放在https://github.com/guoyilin/ERPF:
struct TreeNode {
vector<int> indices;
int index;
int height;
float thresholds[2];
vector<float> w;
TreeNode* left;
TreeNode* right;
};
class SpatialTree {
private:
vector<vector<float> > *data;
void splitF(TreeNode *node);
void splitFbyFixed(TreeNode *node);
vector<float>
dot(const vector<vector<float> > &v1, const vector<float> &v2);
float dot(const vector<float> &v1, const vector<float> &v2);
float find_percentile(float percentage, const vector<float> &w);
void save_rpTree(const string& rptree_file);
void save_rpTree_sub(TreeNode *tree, ptree &pt);
public:
float spill;
string rule;
TreeNode *root;
int leaf_count;
int min_items;
int samples_rp;
int height;
int dimension;
SpatialTree();
vector<float> stringTovector(string& w);
void print_rpTree(TreeNode *tree);
SpatialTree(vector<vector<float> > &data, const string &rule, float spill,
int height);
void create_rpTree(const string& rptree_file);
void load_rpTree(const string& rptree_file);
vector<int> retrievalLeaf(vector<float> &feature);
};
#endif
<pre name="code" class="cpp">void SpatialTree::save_rpTree_sub(TreeNode *tree, ptree &pt) {
if (tree->index > -1) leaf
pt.put("xmlattr.index", tree->index);
else {
not leaf
string w_string;
for (std::vector<float>::const_iterator iter = tree->w.begin(); iter
!= tree->w.end(); ++iter)
if (w_string.empty()) {
ostringstream ostr;
ostr << *iter;
w_string += ostr.str();
w_string += ",";
} else {