数据挖掘之FP-Tree算法

算法细节见论文:Mining Frequent Patterns Without Candidata Generation

这篇论文是Jiawei Han的大作,又一个牛逼的中国人去了北美。


图形化界面的工程+测试用例戳这http://download.csdn.net/detail/michealtx/4266155


控制台版C++代码如下:

#include <iostream>
#include <sstream>
#include <fstream>
#include <vector>
#include <map>
#include <list>
#include <string>
#include <algorithm>
#include <ctime>
using namespace std;

struct FPNode
{
	string nodeName;
	unsigned int count;
	
	FPNode *parentNode;
	vector<FPNode *> childNode;
	FPNode *siblingNode;
	
	bool operator < (const FPNode &other) const
	{
		return count>other.count;
	}
};

//读取文件获取整个数据库存储在database中,fileName必须为char*型,要是用string会报错,in()不认
bool ObtainDatabase(vector<vector<string> > &database,char *fileName)
{
 /*   vector<string> data;
	data.push_back(1);data.push_back(2);data.push_back(5);
	database.push_back(data);
	
	data.clear();
	data.push_back(2);data.push_back(4);
	database.push_back(data);
	
	data.clear();
	data.push_back(2);data.push_back(3);
	database.push_back(data);
	
	data.clear();
	data.push_back(1);data.push_back(2);data.push_back(4);
	database.push_back(data);
	
	data.clear();
	data.push_back(1);data.push_back(3);
	database.push_back(data);
	
	data.clear();
	data.push_back(2);data.push_back(3);
	database.push_back(data);
	
	data.clear();
	data.push_back(1);data.push_back(3);
	database.push_back(data);
	
	data.clear();
	data.push_back(1);data.push_back(2);data.push_back(3);data.push_back(5);
	database.push_back(data);
	
	data.clear();
	data.push_back(1);data.push_back(2);data.push_back(3);
	database.push_back(data);
	return true;
*/
	ifstream in(fileName);
	if(!in)
	{
		cout<<"文件打开失败!"<<endl;
		return false;
	}
	
	string s="";
	int i=0;
	while(getline(in,s))
	{//读取一行记录
	i++;
		vector<string> transaction;	
		int len=s.length();
		string str="";
		for(int i=0;i<len;i++)
		{//将记录中的数提取出来
			if(s[i]!=' ')
			{
				str+=s[i];
			}
			else if(s[i]==' '||i==len-1)
			{		
				transaction.push_back(str);
				str="";
			}
		}
		database.push_back(transaction);
		s="";
	}cout<<i;system("pause");
	return true;
	
}
///

//遍历一遍数据库,创建1-项大项集
void CreateItemset(vector<vector<string> > &database,vector<FPNode> &largeItemset,unsigned int minSupport)
{
	map<string,int> dir;
	map<string,int>::iterator dirIt;
	
	vector<vector<string> >::iterator databaseIt;
	
	vector<string> temp;
	vector<string>::iterator tempIt;
	
	//根据数据库创建字典,字典形式为<item,count>
	for(databaseIt=database.begin();databaseIt!=database.end();databaseIt++)
	{
		temp=*databaseIt;
		for(tempIt=temp.begin();tempIt!=temp.end();tempIt++)
		{
			string item=*tempIt;
			dirIt=dir.find(item);
			if(dirIt==dir.end())
			{//item不在字典dir中
				dir.insert(pair<string,int>(item,1));
			}
			else
			{//item在字典dir中,则将其count值加1
				(dirIt->second)++;
			}
		}
	}
	
	//从字典中选出支持度超过minSopport的item
	for(dirIt=dir.begin();dirIt!=dir.end();dirIt++)
	{
		if(dirIt->second>=minSupport)
		{
			FPNode large;
			large.nodeName=dirIt->first;
			large.count=dirIt->second;
			large.parentNode=NULL;
			(large.childNode).clear();
			large.siblingNode=NULL;
			largeItemset.push_back(large);
		}
	}
	//根据每个节点的count值对大项集排序
	sort(largeItemset.begin(),largeItemset.end());
}

//输出一项大项集
void OutputLargeItemset(vector<FPNode> &largeItemset,unsigned int i)
{
	cout<<"包含 "<<largeItemset.size()<<" 项的 "<<i<<"-项大项集:"<<endl;
	
	vector<FPNode>::iterator largeItemsetIt;
	int j=0;
	for(largeItemsetIt=largeItemset.begin();largeItemsetIt!=largeItemset.end();largeItemsetIt++)
	{
		FPNode temp=*largeItemsetIt;
		cout<<"{ ";
		cout<<temp.nodeName<<" : "<<temp.count;
		cout<<" }";
		j++;
		if(j%4==0)
		{
			cout<<endl;
		}
	}
	cout<<endl<<endl;
}


//
//对数据库某条交易trans筛选出在1-项大项集large1中出现的项,并按在1-项大项集large1出现的顺序排序,放到tempTrans中
void SortItem(vector<string> &trans,vector<string> &tempTrans,vector<FPNode> &large1)
{
	
	unsigned int sizeLarge=large1.size();
	unsigned int sizeTrans=trans.size();
	
	for(int i=0;i<sizeLarge;i++)
	{
		FPNode tempNode=large1[i];
		for(int j=0;j<sizeTrans;j++)
		{
			if(tempNode.nodeName==trans[j])
			{
				tempTrans.push_back(tempNode.nodeName);
			}
		}
	}
}


//利用包括transTemp[k]在内及其以后的值为root节点创建子树
int AddNode(FPNode *parentNode,vector<string> &transTemp,unsigned int k,vector<FPNode> &large1)
{
	unsigned int size=transTemp.size();
	if(size>0&&(k>=0&&k<size))
	{//cout<<"addNode 时父节点为:"<<parentNode->nodeName<<endl;
		FPNode *nodeTemp=new FPNode;//创建新节点
		if(nodeTemp==NULL)
		{
			return 1;
		}
		nodeTemp->nodeName=transTemp[k];
		nodeTemp->count=1;
		nodeTemp->parentNode=parentNode;
		nodeTemp->siblingNode=NULL;
		
		(parentNode->childNode).push_back(nodeTemp);
		
		//将nodeTemp添加到它的兄弟节点上。
		unsigned int sizeLarge=large1.size();
		for(int i=0;i<sizeLarge;i++)
		{
			if((large1[i]).nodeName==nodeTemp->nodeName)
			{
				FPNode *temp=&(large1[i]);
				while(temp->siblingNode!=NULL)
				{
					temp=temp->siblingNode;
				}
				temp->siblingNode=nodeTemp;//cout<<"brother";system("pause");
				break;
			}
		}
		
		k++;
		if(k<size)
		{//此交易中还有其它项
			AddNode(nodeTemp,transTemp,k,large1);
		}
	}
	return 0;
}

//创建FP-Tree
int CreateFPTree(vector<vector<string> > &database,vector<FPNode> &large1,FPNode **treeRoot)
{
	*treeRoot=new FPNode;
	FPNode *root=*treeRoot;
	if(root==NULL)
	{
		return 1;
	}
	root->nodeName="-1";//-1表示空节点
	root->count=0;
	root->parentNode=NULL;
	(root->childNode).clear();
	root->siblingNode=NULL;
	
	unsigned int sizeDatabase=database.size();
	for(int i=0;i<sizeDatabase;i++)
	{
		vector<string> trans=database[i];//取出数据库一条交易
		vector<string> transTemp;
		SortItem(trans,transTemp,large1);//把此交易包含于large1中的项筛选出来并排序,放到transTemp中
/*
//对比输出trans和transTemp
int da=trans.size();
cout<<"trans:";
for(int i=0;i<da;i++)cout<<trans[i]<<" ";
cout<<endl<<"transTemp:";
da=transTemp.size();
for(int i=0;i<da;i++)cout<<transTemp[i]<<" ";
cout<<endl;
//system("pause");
*/
		vector<FPNode *> childNode=root->childNode;//获取根的儿子节点集合
//cout<<"此时树根有 "<<childNode.size()<<" 个儿子"<<endl;
		if(childNode.size()==0)
		{//如果树根的儿子节点集合为空,说明这个树为空。则把此交易转换为树根root的子树。
			AddNode(root,transTemp,0,large1);//cout<<(root->childNode).size()<<"树不为空"<<endl;
		}
		else
		{//树不为空
			unsigned int sizeTrans=transTemp.size();
			FPNode *pt=NULL;
			for(int i=0;i<sizeTrans;i++)
			{//先查找transTemp[i]是否在树中出现过
				string temp=transTemp[i];
				
				unsigned int sizeChild=childNode.size();
				
				int j=0;
				for(j=0;j<sizeChild;j++)
				{
					if(temp==(childNode[j])->nodeName)
					{
						pt=childNode[j];
						(pt->count)++;//transTemp[i]已经在树中出现过了,则把它的计数器加1
		//cout<<"此时transTemp[i]为"<<pt->nodeName<<endl;
						childNode=pt->childNode;
						break;
					}
				}
				
				if(j==sizeChild)
				{
					if(pt==NULL)
					{//transTemp[i]未在树中,则添加包括i节点之后的所有节点,添加到树根上
						AddNode(root,transTemp,i,large1);
					}
					else
					{//transTemp[i]已在树中出现过,但是它还没儿子节点,所以把它后面的节点挂在它后面
						AddNode(pt,transTemp,i,large1);//这个地方不是i+1,调试过后才醒悟!
					}
					break;
				}
			}
		}
	}

	return 0;
}
/
//获取large1除第一项之外的各项的conditional pattern base条件模式基。该过程即是建立每一项对应的子数据库。
void CreateConditionalPatterBase(vector<FPNode> &large1,map<string,vector<vector<string> > > &cpbMap)
{//if(large1.empty()){cout<<"large1 is empty"<<endl;}else{cout<<"large1 is not empty"<<endl;}
	vector<string> condition;
	int sizeLarge=large1.size();
	//large1中除了第一项外(因为直接挂在树根上的节点肯定没有前缀,所以不用求了,免得浪费时间),全部求条件模式基
	for(int i=sizeLarge-1;i>0;i--)
	{
		FPNode *sibling=(large1[i]).siblingNode;
		//if(sibling==NULL)cout<<"*****"<<i<<endl;system("pause");
		vector<vector<string> > pathSet;
		pathSet.clear();
		while(sibling!=NULL)
		{
			unsigned int count=sibling->count;
			FPNode *parent=sibling->parentNode;
			vector<string> path;
			path.clear();
			while(parent!=NULL)
			{//逆向向上,把除树根之外的父节点添加到路径path中
				if(parent->parentNode!=NULL)
				{//parent的父节点为空,说明parent是树根,树根里不含项
					path.push_back(parent->nodeName);
					parent=parent->parentNode;
				}
				else
				{
					break;
				}
			}
			
			if(!(path.empty()))
			{
				for(int j=0;j<count;j++)
				{
					pathSet.push_back(path);	
				}
			}
			
			sibling=sibling->siblingNode;
		}
		if(pathSet.size()!=0)
		{
			cpbMap.insert(pair<string,vector<vector<string> > >((large1[i]).nodeName,pathSet));
		}
	}
}
//
void DestroyTree(FPNode *root)
{
	if(root!=NULL)
	{
		vector<FPNode *> childNode=root->childNode;
		unsigned int size=childNode.size();
		for(int i=0;i<size;i++)
		{
			DestroyTree(childNode[i]);
		}
		delete root;
	}
	root=NULL;
}


//输出FP树
void OutputFPTree(FPNode *root)
{
	if(root==NULL)
	{//cout<<"root is null"<<endl;
		return;
	}
	//cout<<"root not null"<<endl;
	vector<FPNode*> childNode=root->childNode;
	
	
	
	cout<<"{"<<root->nodeName<<","<<root->count<<","<<childNode.size();
	if(childNode.size()!=0)
	{
		cout<<"}儿子如下:"<<endl;
		for(int i=0;i<childNode.size();i++)
		{
			OutputFPTree(childNode[i]);
		}
	}
	else 
	{
		cout<<"}无子"<<endl;
	}
	
}


//由每一项的条件模式基得到对应的条件模式FP树,最终得到条件模式FP树字典
void CreateCpbFPtree(map<string,vector<vector<string> > > &cpbMap,map<string,FPNode *> &cpbFPTreeMap,unsigned int minSupport)
{
	map<string,vector<vector<string> > >::iterator cpbMapIt;
	for(cpbMapIt=cpbMap.begin();cpbMapIt!=cpbMap.end();cpbMapIt++)
	{
		string nodeName=cpbMapIt->first;
		vector<vector<string> > pathSet=cpbMapIt->second;
		
		vector<FPNode> cpbLarge;
		//根据nodeName项对应的条件模式基集合pathSet创建它的频繁一项集cpbLarge
		CreateItemset(pathSet,cpbLarge,minSupport);
		//输出刚刚创建的频繁一项集
		//cout<<"\n\n输出刚刚创建的 "<<nodeName<<" 的频繁一项集"<<endl;
		//OutputLargeItemset(cpbLarge,1);
//if(pathSet.empty()){cout<<"pathSet is null"<<endl;}else{cout<<"pathSet not null"<<endl;}
//if(cpbLarge.empty()){cout<<"cpbLarge is null"<<endl;}else{cout<<"cpbLarge not null"<<endl;system("pause");}
		
		FPNode *cpbRoot=NULL;
		if(!(pathSet.empty())&&!(cpbLarge.empty()))
		{
			CreateFPTree(pathSet,cpbLarge,&cpbRoot);//创建nodeName对应的条件模式FP树
			//cout<<"\n\n输出刚刚创建的 "<<nodeName<<" 的条件模式FP树"<<endl;
			//OutputFPTree(cpbRoot);
		}
		
		if(cpbRoot!=NULL)
		{//创建成功
			
			vector<FPNode *> *childNode=&(cpbRoot->childNode);
			vector<FPNode *>::iterator childIt=(*childNode).begin();
			unsigned int size=(*childNode).size();
			for(int i=0;i<size;i++)
			{
				if((*childIt)->count< minSupport)
				{//删掉最小支持度不满足的分支
					//cout<<endl<<(*childIt)->nodeName<<"   "<<(*childIt)->count<<"	EEEEEEEEEEEERROR"<<endl;system("pause");
					DestroyTree(*childIt);
					childIt=(*childNode).erase(childIt);//cout<<endl<<(*childIt)->nodeName<<"   "<<(*childIt)->count<<"	EEEEEEEEEEEERROR"<<endl;system("pause");
				}
				else
				{
					childIt++;
				}
			}			
			if((*childNode).size()!=0)
			{//如果这个条件模式树树根的儿子节点个数不为0,则把它加到条件模式FP树字典中
				cpbFPTreeMap.insert(pair<string,FPNode*>(nodeName,cpbRoot));
			}
		}
	}
}
//
//
//根据每个关键字的条件模式FP树,创建出所有的路径并放到路径集合pathSet中
void CreatePath(FPNode *root,vector<list<FPNode> > &pathSet)
{
	//vector<list<FPNode> > pathSet=new vector<list<FPNode> >();
	if(root!=NULL)
	{
		if((root->childNode).empty())
		{//root没有分支
			list<FPNode> path;
			if(root->parentNode!=NULL)
			{//不能把树根添加到路径中
				path.push_back(*root);//cout<<"root!=NULL"<<"--"<<root->nodeName<<"   "<<root->count<<"    "<<(root->childNode).size()<<endl;
			}
			
			if(path.size()!=0)
			{
				pathSet.push_back(path);
			}
		}
		else
		{//root有分支
			vector<FPNode *> childNode=root->childNode;
			unsigned int size=childNode.size();
			if(size>1)
			{//root有多个分支
				for(int i=0;i<size;i++)
				{
					unsigned int count=childNode[i]->count;
					CreatePath(childNode[i],pathSet);
					unsigned int cnt=(childNode[i]->childNode).size();
					if(cnt==0)
					{
						cnt=1;
					}
					if(root->parentNode!=NULL)
					{
						unsigned int sizePathSet=pathSet.size();
						for(int j=sizePathSet-cnt;j<sizePathSet;j++)
						{//cout<<"size>1"<<"--"<<root->nodeName<<"   "<<root->count<<"    "<<(root->childNode).size()<<endl;
							list<FPNode> *lis=&(pathSet[j]);
							FPNode *node=new FPNode;
							node->nodeName=root->nodeName;
							node->count=count;
							node->parentNode=NULL;
							lis->push_front(*node);
						}
					}
				}
			}
			else
			{//root只有一个分支
				for(int i=0;i<size;i++)
				{
					unsigned int count=childNode[i]->count;
					CreatePath(childNode[i],pathSet);
					if(root->parentNode!=NULL)
					{
						unsigned int sizePathSet=pathSet.size();
						for(int j=sizePathSet-1;j<sizePathSet;j++)
						{//cout<<"size==1"<<"--"<<root->nodeName<<"   "<<root->count<<"    "<<(root->childNode).size()<<endl;
							list<FPNode> *lis=&(pathSet[j]);
							lis->push_front(*root);
						}
					}
				}
			}
		}
	}
	//return pathSet;
}
//输出每个项对应的全部前缀路径
void OutputPathSet(vector<list<FPNode> > &pathSet)
{
	vector<list<FPNode> >::iterator pathSetIt;

	for(pathSetIt=pathSet.begin();pathSetIt!=pathSet.end();pathSetIt++)
	{
		list<FPNode> path=*pathSetIt;
		list<FPNode>::iterator pathIt;
		for(pathIt=path.begin();pathIt!=path.end();pathIt++)
		{
			cout<<(*pathIt).nodeName<<" ";
		}
		cout<<endl<<endl;
	}
}
//根据path创建fp,k初始值为path.size()-1
map<list<string>,int>* CreateFP(list<FPNode> &path)//list<FPNode>::reverse_iterator pathIt,int k,vector<vector<string> > &fp)
{
	if(path.size()>0)
	{
		//把路径path的第一个节点加入
		FPNode *temp=&(path.front());
		if(!temp)
		{
			return NULL;
		}
		list<string> lis;
		string start=temp->nodeName;
		lis.push_back(start);
		map<list<string>,int> *fp=new map<list<string>,int>;
		if(!fp)
		{
			return NULL;
		}
		fp->insert(pair<list<string>,int>(lis,temp->count));
		path.pop_front();
		
		map<list<string>,int> *others=CreateFP(path);
		if(others)
		{
			map<list<string>,int>::iterator othersIt=others->begin();
			for(;othersIt!=others->end();othersIt++)
			{
				fp->insert(pair<list<string>,int>(othersIt->first,othersIt->second));
				const list<string> *li=&(othersIt->first);
				list<string> tmp;
				
				list<string>::const_iterator liIt=li->begin();
				while(liIt!=li->end())
				{
					tmp.push_back(*liIt);
					liIt++;
				}
				
				tmp.push_front(start);
				
				fp->insert(pair<list<string>,int>(tmp,othersIt->second));
			}
		}
		return fp;
	}
	return NULL;
}

//根据条件模式FP树字典,生成每一个关键字对应的所有频繁模式
void ObtainFrequenPattern(map<string,FPNode *> &cpbFPTreeMap,map<string,map<list<string>,int>* > &frequentPatternMap)
{//if(cpbFPTreeMap.empty()){cout<<"cpbFPTreeMap is empty"<<endl;}
	map<string,FPNode *>::iterator cpbTreeIt;
	for(cpbTreeIt=cpbFPTreeMap.begin();cpbTreeIt!=cpbFPTreeMap.end();cpbTreeIt++)
	{
		string nodeName=cpbTreeIt->first;
		FPNode *cpbRoot=cpbTreeIt->second;
		
		//根据关键字nodeName对应的条件模式树把所有路径择出来放到pathSet中
		vector<list<FPNode> > pathSet;
		list<FPNode> path;

		//输出每个项对应的FP树
//		cout<<endl<<nodeName<<" 对应的FP树为:"<<endl;
//		OutputFPTree(cpbRoot);
		CreatePath(cpbRoot,pathSet);
		//输出每个项对应的全部前缀路径
//		cout<<endl<<nodeName<<" 对应的所有前缀路径为:"<<endl;
//		OutputPathSet(pathSet);system("pause");
		
		vector<list<FPNode> >::iterator pathSetIt;
		for(pathSetIt=pathSet.begin();pathSetIt!=pathSet.end();pathSetIt++)
		{
			list<FPNode> pathTemp=*pathSetIt;
			
			//根据每条路径创建其对应的频繁模式集合,放到fp中
			map<list<string>,int>* fp=NULL;
			fp=CreateFP(pathTemp);

//			cout<<"CreateFP finish"<<endl;
			if(fp==NULL)
			{
//				cout<<"fp is null"<<endl;
				continue;
			}
			map<string,map<list<string>,int>* >::iterator fpmIt;
			fpmIt=frequentPatternMap.find(nodeName);
			if(fpmIt==frequentPatternMap.end())
			{//关键字nodeName未在频繁模式字典中出现过,就把它和相应的频繁模式子集集合fp加入进来
				frequentPatternMap.insert(pair<string,map<list<string>,int>* >(nodeName,fp));
			}
			else
			{
				map<list<string>,int>::iterator fpIt=fp->begin();
				unsigned int sizeFp=fp->size();
				map<list<string>,int>* pt=fpmIt->second;
				map<list<string>,int>::iterator ptIt=pt->begin();
				unsigned int sizePt=pt->size();
				for(;fpIt!=fp->end();++fpIt)
				{//查找fp中的list是否在pt中list表中出现过
					bool flag=true;
					int j=0;
					for(ptIt=pt->begin(),j=0;j<sizePt&&ptIt!=pt->end();++j,++ptIt)
					{
						list<string>::const_iterator t1It=(fpIt->first).begin();
						unsigned int sizeT1=(fpIt->first).size();
						list<string>::const_iterator t2It=(ptIt->first).begin();
						unsigned int sizeT2=(ptIt->first).size();
						if(sizeT1!=sizeT2)
						{
							flag=false;
							continue;
						}
						
						for(int k=0;k<sizeT1;k++)
						{
							if(*t1It!=*t2It)
							{
								flag=false;
								break;
							}
							t1It++;
							t2It++;
							if(k==sizeT1-1)
							{
								flag=true;
							}
						}
						
						if(flag==true)
						{
							break;
						}
					}
					if(flag==true)
					{
						ptIt->second+=fpIt->second;
					}
					else
					{
						pt->insert(pair<list<string>,int>(fpIt->first,fpIt->second));
					}
				}
			}
		}
	}
}


//输出所有频繁模式
void OutputFrequentPattern(map<string,map<list<string>,int>* > &frequentPatternMap,unsigned int minSupport)
{//if(frequentPatternMap.empty()){cout<<"frequentPatternMap is empty"<<endl;}
	map<string,map<list<string>,int>* >::iterator it;
	int geshu=0;
	for(it=frequentPatternMap.begin();it!=frequentPatternMap.end();it++)
	{geshu++;
		string nodeName=it->first;
		map<list<string>,int>* fpSet=it->second;
		unsigned int size=fpSet->size();
		
		map<list<string>,int>::iterator fpSetIt=fpSet->begin();
		cout<<nodeName<<":"<<endl;
		for(int i=0;i<size;++i,++fpSetIt)
		{
			const list<string> *fp=&(fpSetIt->first);
			unsigned int cnt=fpSetIt->second;
			if(cnt>=minSupport)
			{
				unsigned int len=fp->size();
				list<string>::const_iterator fpIt=fp->begin();
				cout<<"{ ";
				for(int j=0;j<len;j++,++fpIt)
				{
					cout<<*fpIt<<" ";
				}
				cout<<nodeName<<" }"<<endl;
			}
		}
	}cout<<geshu<<"---";
}



//输出每一项的全部条件模式基
void OutputConditionalPatterBase(map<string,vector<vector<string> > > &cpbMap)
{
	map<string,vector<vector<string> > >::iterator cpbMapIt;
	for(cpbMapIt=cpbMap.begin();cpbMapIt!=cpbMap.end();cpbMapIt++)
	{
		string nodeName=cpbMapIt->first;
		vector<vector<string> > cpbSet=cpbMapIt->second;
		vector<vector<string> >::iterator cpbSetIt;
		cout<<nodeName<<"有 "<<cpbSet.size()<<" 个条件模式基:"<<endl;
		for(cpbSetIt=cpbSet.begin();cpbSetIt!=cpbSet.end();cpbSetIt++)
		{
			vector<string> cpb=*cpbSetIt;
			vector<string>::iterator cpbIt;
			for(cpbIt=cpb.begin();cpbIt!=cpb.end();cpbIt++)
			{
				cout<<*cpbIt<<" ";
			}
			cout<<endl;
		}
	}
}
//输出每一项的条件模式FP树
void OutputCpbFPTree(map<string,FPNode *> &cpbFPTreeMap)
{
	map<string,FPNode *>::iterator cpbFPTreeMapIt;
	for(cpbFPTreeMapIt=cpbFPTreeMap.begin();cpbFPTreeMapIt!=cpbFPTreeMap.end();cpbFPTreeMapIt++)
	{
		string nodeName=cpbFPTreeMapIt->first;
		FPNode *root=cpbFPTreeMapIt->second;
		cout<<nodeName<<" 的条件模式FP树为:"<<endl;
		OutputFPTree(root);
	}
}
int main()
{	
	char *fileName="retail.dat";
	int minSupport=6171;//最小支持度
 
    clock_t start=clock();	
	vector<vector<string> > database;//数据库
	ObtainDatabase(database,fileName);
 	
	vector<FPNode> large1;
	CreateItemset(database,large1,minSupport);
	
	int k=1;
	vector<FPNode> largeTemp=large1;
	//输出一项大项集
	OutputLargeItemset(largeTemp,k);
	
	FPNode *root=NULL;
	//创建FPTree
	CreateFPTree(database,large1,&root);
	//输出FP树
	//OutputFPTree(root);
//if(root==NULL)cout<<"root is null"<<endl;
	
	map<string,vector<vector<string> > > cpbMap;
	//获取large1除第一项之外的各项的conditional pattern base条件模式基。该过程即是建立每一项对应的子数据库。
	CreateConditionalPatterBase(large1,cpbMap);
	//输出每一项的全部条件模式基
	//OutputConditionalPatterBase(cpbMap);
//if(cpbMap.empty()){cout<<"cpbMap is empty"<<endl;}///
	
	map<string,FPNode *> cpbFPTreeMap;
	//由每一项的条件模式基得到对应的条件模式FP树,最终得到条件模式FP树字典
	CreateCpbFPtree(cpbMap,cpbFPTreeMap,minSupport);
	//输出每一项的条件模式FP树
	//OutputCpbFPTree(cpbFPTreeMap);
	
	map<string,map<list<string>,int>* > frequentPatternMap;
	//根据条件模式FP数字典,生成每一个关键字对应的所有频繁模式
	ObtainFrequenPattern(cpbFPTreeMap,frequentPatternMap);

	//输出所有频繁模式
	OutputFrequentPattern(frequentPatternMap,minSupport);
	
	clock_t end=clock();
	cout<<"Finish!共用时:"<<(end-start)<<"ms"<<endl;
	system("pause");
}


  • 0
    点赞
  • 6
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值