//fpTree.h
#ifndef FPTREE_H
#define FPTREE_H
#include <iostream>
#include <memory>
#include "itemset.h"
#include "transaction.h"
namespace fptSpace
{
class fpTree ;
class fpTree
{
public :
fpTree ( ordered_itemset items ,
transaction_database &db ) ;
fpTree ( const fpTree & );
struct stats
{
stats () ;
std::size_t size ;
std::size_t n_leaves ;
std::size_t height ;
} ;
stats get_stats () const ;
class node ;
typedef std::shared_ptr<node> nodeptr ;
const ordered_itemset item_order ;
private:
std::unordered_map<item_type , nodeptr> roots ;
struct header
{
nodeptr head ;
nodeptr tail ;
} ;
std::unordered_map<item_type,header> headerList ;
} ;
}
#endif //FPTREE_H
//fpTree.cpp
#include <unordered_set>
#include <limits>
#include <cassert>
#include <memory>
#include "fpTree.h"
using fptSpace::fpTree ;
class fpTree::node : public std::enable_shared_from_this<node>
{
public :
node ( const ordered_itemset &item_order ,
item_type _item , std::weak_ptr<node> p = std::weak_ptr<node>() ) :
item ( _item ),counter(0) ,parent( parent),
item_order(item_order)
{}
nodeptr operator[] (item_type) ;
void increment_count ()
{
counter++ ;
}
item_type item ;
std::size_t counter ;
std::weak_ptr<node> parent ;
std::unordered_map<item_type, nodeptr > children ;
const ordered_itemset &item_order ;
};
fpTree::nodeptr fpTree::node::operator[] (std::string item )
{
nodeptr &retval = children[item] ;
if ( !retval )
retval = nodeptr ( new node(item_order ,item,
shared_from_this() )) ;
return retval ;
}
fpTree::fpTree (fptSpace::ordered_itemset _items , fptSpace::transaction_database &db ):
item_order ( std::move(_items) )
{
for ( const transaction &trans : db )
{
nodeptr current ;
//first find the first node (frequency max)
//from the travering transaction (itemset )
auto i = item_order.begin () ;
for ( ; i != item_order.end() ;i++ )
if (trans.find(i->_item) != trans.end())
break ;
//we find the first node in transaction
//pointed by the i , then if i not null
//search it in the roots (first level in fpTree )
//else if the i is null , we create a node for i
//and insert it into the roots which with the type
//of unordered_map<item , nodeptr>
assert (i != item_order.end()) ;
{
//construct root if non-existent
nodeptr &root = roots[i->_item] ;
if ( !root )
root = nodeptr( new node (item_order, i->_item)) ;
current = root ;
}
current->increment_count() ;
i++ ;
//traverse through the rest elements in current transaction
for ( ; i != item_order.end() ; i++ )
{
if ( trans.find(i->_item) == trans.end() )
continue ;
current = (*current)[i->_item] ;
current->increment_count() ;
}
}
}
namespace
{
void traverse ( const fpTree::nodeptr ¤t_node ,
std::size_t height , fpTree::stats &stats )
{
++stats.size ;
if ( current_node->children.empty() )
{
//reach leaf
++stats.n_leaves ;
stats.height = std::max(stats.height , height ) ;
}
else
{
for ( const auto &i : current_node->children)
traverse ( i.second , height+1 , stats) ;
}
}
}
fpTree::stats fpTree::get_stats() const
{
fpTree::stats stats ;
for ( const auto &i : roots )
traverse(i.second , 1 , stats ) ;
return stats ;
}
fpTree::stats::stats() :
size(0) ,
n_leaves(0),height(0)
{}
//Main.cpp
#include <cstdio>
#include <cstdlib>
#include <iostream>
#include "fileParser.h"
#include "parser.h"
#include "fpTree.h"
#include "memChecker.h"
using namespace std ;
int main ( void )
{
string filename ("d:\\test\\a.dat") ;
/*parser::transaction_database database ;
database = parser::parseFile(filename) ;
cout<<endl<<"here are all the items in transaction database "<<endl;
for ( const auto &i : database ) //traverse vector < vector <string> > ; i is type of vector<string>
for (const auto &j : i ) //traverse vector <string > ; j is the type of string , out put directly is ok
cout<<j<<endl ;
`*/
fptSpace::transaction_database trans_db = fptSpace::parseFromFile(filename) ;
fptSpace::itemset itemSet = trans_db.extract_itemset() ;
int total_item_counter = itemSet.size() ;
double min_support ;
int sup_threshold ;
cout<<"input min support ( min_sup < 1 )"<<endl;
cin >> min_support ;
sup_threshold = (int) (min_support*total_item_counter );
itemSet.prune(sup_threshold) ;
//fptSpace::ordered_itemset ordered_item_set = itemSet.get_ordered() ;
fptSpace::fpTree tree ( itemSet.get_ordered() , trans_db) ;
fptSpace::fpTree::stats stats = tree.get_stats() ;
std::cout<<"File : "<<filename<< std::endl
<<"Threshold: "<<min_support<<std::endl
<<"Number of nodes: "<<stats.size<<std::endl
<<"Number of leaves: "<<stats.n_leaves<<std::endl
<<" Height: "<<stats.height <<std::endl
<<"Max memory usage: "<<fptSpace::get_maxMem_kb()<<" kb "<<std::endl ;
system("pause") ;
return 0 ;
}
//有时间再注释,其中的smart 指针和命名空间还有复杂结构类型的STL 将各写一个专题进行学习实验
//快要考试了,要看书了