STL_Apriori_Algorithm

/**@author	Amiber
@date 2012-11-23
@brief: Apriori-Algorithm
*/

#pragma warning(disable:4786)

#include<iostream>
#include<algorithm>
#include<string>
#include<iterator>
#include<map>
#include<fstream>
#include<vector>
#include<set>
#include<stdexcept>
#include<iomanip>

using namespace std;

//define the maxNumber of primer
const int MAXPRIMNUM = 101;

const string outputFiles[]= {"system.out.1","system.out.2","system.out.3","system.out.4"};

//define the freqitemset
typedef struct 
{
	set<int,less<int> > itemset;
	vector<int> transId;
	int hashId;
	
}FreqItemSet;


//define the comparable-function of FreqItemset
bool cmp(const FreqItemSet& frq1,const FreqItemSet& frq2)
{
	if(frq1.itemset.size() != frq2.itemset.size())
	{
		return frq1.itemset.size() < frq2.itemset.size();
	}else
	{
		return frq1.itemset < frq2.itemset;
	}
}

//overload the operator == of FreqItemSet
bool operator ==(const FreqItemSet& frq1,const FreqItemSet& frq2)
{
	if(frq1.itemset == frq2.itemset)
	{
		return true; 
	}

	return false;
}

//define the apriori-algorithm-class
class Apriori
{
public :
	Apriori(const string& dataTransInput,const string& dataItemInput)
	{
	    //open the transInputFile
		try
		{
			fTrancinput.open(dataTransInput.c_str(),ios::in);
		}catch(...)
		{
			cerr<<"Trans file input error"<<std::endl;
		}

		//open the ItemInputFile
		try
		{
			fIteminput.open(dataItemInput.c_str(),ios::in);

		}catch(...)
		{
			cerr<<"Item file input error"<<std::endl;
		}


		//open the outputFile(4)
		for(int i=0;i<4;i++)
		{
			try
			{
				fout[i].open(outputFiles[i].c_str());
			}catch(...)
			{
				cerr<<"error write"<<endl;
			}
		}
	}

	void apriori(double minsup,double minconf)
	{
		//clear;
		reset();

		//get the prime according to item
		getPrime();
		
		//get the itemDat ,transDat
		doPrepare();


		vector<FreqItemSet> tmpSet;

		map<int,int>::iterator newIter;
		map<int,int>::iterator  transIter;

		//folowing ,calculate the 1-freq
		for(newIter = primItems.begin();newIter!=primItems.end();)
		{
			FreqItemSet freq;
			freq.itemset.insert(newIter->first);
			freq.hashId = newIter->second;
			
			// read the transDat,count the single-itemset
			for(transIter=trans.begin();transIter!=trans.end();++transIter)
			{
				if(transIter->second % freq.hashId ==0 )
				{
					freq.transId.push_back(transIter->first);
				}
			}
			
			if( static_cast<double>(freq.transId.size())/totalTrans >= minsup)
			{
				tmpSet.push_back(freq);
				freqSet.push_back(freq);
				newIter ++;
			}else
			{
				//if a is not freq,then ab is also
				newIter = primItems.erase(newIter);
			}
		}

		//following , k-freqitems -> k-1
		vector<FreqItemSet>::iterator iter;

		//if left in the tmpSet
		while(!tmpSet.empty())
		{
			iter = tmpSet.begin();
			
			vector<FreqItemSet> tmpKSet;

			//begin to do tmpSet
			while(iter!=tmpSet.end())
			{
				newIter = primItems.begin();
				//add the new item
				for(;newIter!=primItems.end();newIter++)
				{
					if((iter->itemset).find(newIter->first) == (iter->itemset).end())
					{
						FreqItemSet tmpFreq;
						tmpFreq.itemset = iter->itemset;
						tmpFreq.itemset.insert(newIter->first);
						tmpFreq.hashId = iter->hashId * newIter->second;

						//count the new-itemset
						for(transIter = trans.begin();transIter!=trans.end();++transIter)
						{
							if(transIter->second % tmpFreq.hashId == 0)
							{
								tmpFreq.transId.push_back(transIter->first);
							}
						}

						//satisfying the condition
						if(tmpFreq.transId.size()*1.0/totalTrans >= (minsup))
						{
							tmpKSet.push_back(tmpFreq);
							freqSet.push_back(tmpFreq);
						}
					}
				}

				iter = tmpSet.erase(iter);
			}

			//release the mem
			vector<FreqItemSet> tmpDele;
			tmpDele.swap(tmpSet);
			
			tmpSet = tmpKSet;
			
		}

		//uniq the same itemset
		sort(freqSet.begin(),freqSet.end(),cmp);
		freqSet.erase(unique(freqSet.begin(),freqSet.end()),freqSet.end());

		//output the freqSet(task1-3)
		outputFreqSet();
		
		//output the associate-rules(task4)
		outputAssociate();
	}
	

	~Apriori()
	{
		fIteminput.close();
		fTrancinput.close();

		for(int i=0;i<4;i++)
		{
			fout[i].close();
		}
	}
private :

	//trans-inputfile
	ifstream fTrancinput;

	//items-inputfile
	ifstream fIteminput;

	//output-file
	ofstream fout[4];
	
	//store the trans
	map<int,int> trans;

	//store the items
	map<int,string> items;

	//store exchange items
	map<int,int> primItems;

	//store the final frequset
	vector<FreqItemSet> freqSet;

	//the min-sup
	double minsup;

	//the min-conf
	double minconf;

	//store the usefull prime
	vector<int> prime;

	//the total-transId
	int totalTrans;

private :

	//clear in  order to next
	void reset()
	{
		primItems.clear();
		freqSet.clear();
	}

	//get the itemId,itemB from the inputfile
	void getIntData(const string& strline,int& itemId,int& itemB)
	{
		sscanf(strline.c_str(),"%d,%d",&itemId,&itemB);
	}

	//get the item-data
	void getItemData(const string& strline,int& item,string& itemName)
	{
		string::size_type index = strline.find(",");

		item = atoi(strline.substr(0,index).c_str());
		itemName = strline.substr(index+2,strline.size()-index-3);
	}

	//get the prime
	void getPrime(int primNumber = MAXPRIMNUM)
	{
		prime.reserve(primNumber);
		
		prime[0] = 2;
		int count = 1;
		for(int i=3;count<primNumber;i++)
		{
			bool flag = false;
			for(int j=2;j*j<=i&& !flag;j++)
			{
				if(i%j==0)
				{
					flag = true;
				}
			}

			if(!flag)
			{
				prime[count++] = i;
			}
		}
	}

	//prepare the data
	void doPrepare()
	{

		string strline;
		string itemName;
		int itemId;

		//read the item-input file
		/**
			Original-Format:
				itemId,itemName
			Finall-Format:
				itemId,itemName
				itemId,primItem
		*/
		while(getline(fIteminput,strline))
		{
			getItemData(strline,itemId,itemName);
			items.insert(make_pair<int,string>(itemId,itemName));
			primItems.insert(make_pair<int,int>(itemId,prime[itemId]));
		}

		//read the trans-input file
		/**
			Original-Format:
				tid,itemId
			Final-Format:
				tid,MultiSumValue
		*/

		int transCount = 0;
		while(getline(fTrancinput,strline))
		{
			int itemId,itemB;
			getIntData(strline,itemId,itemB);

			if(trans.find(itemId)==trans.end())
			{
				trans.insert(make_pair<int,int>(itemId,primItems[itemB]));
			}else
			{
				trans[itemId] *=primItems[itemB];
			}
		}

		totalTrans = trans.size();
	}

	//output the single freqitemset
	void OSingleFreq(const FreqItemSet& frq,ostream& os=cout)
	{
		//following print the itemset
		os<<"{";

		for(set<int,less<int> >::const_iterator iter = frq.itemset.begin();iter!=frq.itemset.end();++iter)
		{
			if(iter == frq.itemset.begin())
			{
				os<<items[*iter];
			}else
			{
				os<<","<<items[*iter];
			}
		}

		os<<"} , "<<fixed<<setprecision(5)<<static_cast<double>(frq.transId.size()*1.0/totalTrans*100)<<"%"<<endl;
	}

	//output the single associate-rules
	void OSingleAsso(const set<int,less<int> >& leftset,const set<int,less<int> >& rightset,double sup,double conf,ostream& os=cout)
	{
		//output the left
		os<<"{";
		for(set<int,less<int> >::const_iterator sIter = leftset.begin();sIter!=leftset.end();++sIter)
		{
			if(sIter ==leftset.begin())
			{
				os<<items[*sIter];
			}else
			{
				os<<","<<items[*sIter];
			}
		}
		os<<"}>{";
		//output the right
		for(sIter = rightset.begin();sIter!=rightset.end();++sIter)
		{
			if(sIter ==rightset.begin())
			{
				os<<items[*sIter];
			}else
			{
				os<<","<<items[*sIter];
			}
		}
		os<<"},";
		os<<"s="<<sup<<"%"<<",c="<<conf<<"%"<<endl;
	}


	//output the freqset
	void outputFreqSet()
	{
		for(vector<FreqItemSet>::const_iterator iter = freqSet.begin();iter!=freqSet.end();++iter)
		{
			//task-1
			if(iter->itemset.size()==1)
			{
				OSingleFreq(*iter,fout[0]);
			}
			
			//task-2
			if(iter->itemset.size()>=1 && iter->itemset.size()<=2)
			{
				OSingleFreq(*iter,fout[1]);
			}
			
			//task-3
			if(iter->itemset.size()>=1)
			{
				OSingleFreq(*iter,fout[2]);
			}
			
			
		}
	}
	
	//output the associate-rules
	void outputAssociate()
	{
		map< set<int,less<int> > ,set<int,less<int> > > tmpMap;
		for(vector<FreqItemSet>::iterator iter = freqSet.begin();iter!=freqSet.end();++iter)
		{
			tmpMap.insert(make_pair<set<int,less<int> > ,set<int,less<int> > >(iter->itemset,set<int,less<int> >(iter->transId.begin(),iter->transId.end())));
		}
		
		//X->Y => X,Y/X >=minconf
		
		map< set<int,less<int> > ,bool> existAsso;
		
		for(iter = freqSet.begin();iter!=freqSet.end();++iter)
		{
			vector<int> vct;
			
			for(set<int,less<int> >::iterator sIter = iter->itemset.begin();sIter!=iter->itemset.end();sIter++)
			{
				vct.push_back(*sIter);
			}
			
			if(vct.size()>=2)
			{
				while(next_permutation(vct.begin(),vct.end()))
				{
					bool flag = true;
					for(int i=1;flag && i< vct.size()-1;i++)
					{
						set<int,less<int> > leftset(vct.begin(),vct.end()-i);
						set<int,less<int> > rightset(vct.end()-i,vct.end());
						
						if(minconf <= static_cast<double>(iter->transId.size())/tmpMap[leftset].size()&& existAsso.find(leftset) == existAsso.end())
						{
							OSingleAsso(leftset,rightset,(iter->transId.size()*1.0)/totalTrans*100,iter->transId.size()*1.0/tmpMap[leftset].size()*100,fout[3]);
							existAsso.insert(make_pair< set<int,less<int> >,bool>(leftset,true));
						}else if(existAsso.find(leftset) != existAsso.end())
						{
							continue;
						}else
						{
							flag = false;
						}
					}
					
				}
			}
		}
	}

};

int main(int argc,char* argv[])
{
	Apriori api("./trans.dat","./items.dat");
	double minsup=0.005,minconf=0.01;
	api.apriori(minsup,minconf);
	return 0;
}

DataFile:(1) trans.dat


DateFile(2): items.dat



  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值