/**@author Amiber
@date 2012-11-23
@brief: Apriori-Algorithm
*/
#pragma warning(disable:4786)
#include<iostream>
#include<algorithm>
#include<string>
#include<iterator>
#include<map>
#include<fstream>
#include<vector>
#include<set>
#include<stdexcept>
#include<iomanip>
using namespace std;
//define the maxNumber of primer
const int MAXPRIMNUM = 101;
const string outputFiles[]= {"system.out.1","system.out.2","system.out.3","system.out.4"};
//define the freqitemset
typedef struct
{
set<int,less<int> > itemset;
vector<int> transId;
int hashId;
}FreqItemSet;
//define the comparable-function of FreqItemset
bool cmp(const FreqItemSet& frq1,const FreqItemSet& frq2)
{
if(frq1.itemset.size() != frq2.itemset.size())
{
return frq1.itemset.size() < frq2.itemset.size();
}else
{
return frq1.itemset < frq2.itemset;
}
}
//overload the operator == of FreqItemSet
bool operator ==(const FreqItemSet& frq1,const FreqItemSet& frq2)
{
if(frq1.itemset == frq2.itemset)
{
return true;
}
return false;
}
//define the apriori-algorithm-class
class Apriori
{
public :
Apriori(const string& dataTransInput,const string& dataItemInput)
{
//open the transInputFile
try
{
fTrancinput.open(dataTransInput.c_str(),ios::in);
}catch(...)
{
cerr<<"Trans file input error"<<std::endl;
}
//open the ItemInputFile
try
{
fIteminput.open(dataItemInput.c_str(),ios::in);
}catch(...)
{
cerr<<"Item file input error"<<std::endl;
}
//open the outputFile(4)
for(int i=0;i<4;i++)
{
try
{
fout[i].open(outputFiles[i].c_str());
}catch(...)
{
cerr<<"error write"<<endl;
}
}
}
void apriori(double minsup,double minconf)
{
//clear;
reset();
//get the prime according to item
getPrime();
//get the itemDat ,transDat
doPrepare();
vector<FreqItemSet> tmpSet;
map<int,int>::iterator newIter;
map<int,int>::iterator transIter;
//folowing ,calculate the 1-freq
for(newIter = primItems.begin();newIter!=primItems.end();)
{
FreqItemSet freq;
freq.itemset.insert(newIter->first);
freq.hashId = newIter->second;
// read the transDat,count the single-itemset
for(transIter=trans.begin();transIter!=trans.end();++transIter)
{
if(transIter->second % freq.hashId ==0 )
{
freq.transId.push_back(transIter->first);
}
}
if( static_cast<double>(freq.transId.size())/totalTrans >= minsup)
{
tmpSet.push_back(freq);
freqSet.push_back(freq);
newIter ++;
}else
{
//if a is not freq,then ab is also
newIter = primItems.erase(newIter);
}
}
//following , k-freqitems -> k-1
vector<FreqItemSet>::iterator iter;
//if left in the tmpSet
while(!tmpSet.empty())
{
iter = tmpSet.begin();
vector<FreqItemSet> tmpKSet;
//begin to do tmpSet
while(iter!=tmpSet.end())
{
newIter = primItems.begin();
//add the new item
for(;newIter!=primItems.end();newIter++)
{
if((iter->itemset).find(newIter->first) == (iter->itemset).end())
{
FreqItemSet tmpFreq;
tmpFreq.itemset = iter->itemset;
tmpFreq.itemset.insert(newIter->first);
tmpFreq.hashId = iter->hashId * newIter->second;
//count the new-itemset
for(transIter = trans.begin();transIter!=trans.end();++transIter)
{
if(transIter->second % tmpFreq.hashId == 0)
{
tmpFreq.transId.push_back(transIter->first);
}
}
//satisfying the condition
if(tmpFreq.transId.size()*1.0/totalTrans >= (minsup))
{
tmpKSet.push_back(tmpFreq);
freqSet.push_back(tmpFreq);
}
}
}
iter = tmpSet.erase(iter);
}
//release the mem
vector<FreqItemSet> tmpDele;
tmpDele.swap(tmpSet);
tmpSet = tmpKSet;
}
//uniq the same itemset
sort(freqSet.begin(),freqSet.end(),cmp);
freqSet.erase(unique(freqSet.begin(),freqSet.end()),freqSet.end());
//output the freqSet(task1-3)
outputFreqSet();
//output the associate-rules(task4)
outputAssociate();
}
~Apriori()
{
fIteminput.close();
fTrancinput.close();
for(int i=0;i<4;i++)
{
fout[i].close();
}
}
private :
//trans-inputfile
ifstream fTrancinput;
//items-inputfile
ifstream fIteminput;
//output-file
ofstream fout[4];
//store the trans
map<int,int> trans;
//store the items
map<int,string> items;
//store exchange items
map<int,int> primItems;
//store the final frequset
vector<FreqItemSet> freqSet;
//the min-sup
double minsup;
//the min-conf
double minconf;
//store the usefull prime
vector<int> prime;
//the total-transId
int totalTrans;
private :
//clear in order to next
void reset()
{
primItems.clear();
freqSet.clear();
}
//get the itemId,itemB from the inputfile
void getIntData(const string& strline,int& itemId,int& itemB)
{
sscanf(strline.c_str(),"%d,%d",&itemId,&itemB);
}
//get the item-data
void getItemData(const string& strline,int& item,string& itemName)
{
string::size_type index = strline.find(",");
item = atoi(strline.substr(0,index).c_str());
itemName = strline.substr(index+2,strline.size()-index-3);
}
//get the prime
void getPrime(int primNumber = MAXPRIMNUM)
{
prime.reserve(primNumber);
prime[0] = 2;
int count = 1;
for(int i=3;count<primNumber;i++)
{
bool flag = false;
for(int j=2;j*j<=i&& !flag;j++)
{
if(i%j==0)
{
flag = true;
}
}
if(!flag)
{
prime[count++] = i;
}
}
}
//prepare the data
void doPrepare()
{
string strline;
string itemName;
int itemId;
//read the item-input file
/**
Original-Format:
itemId,itemName
Finall-Format:
itemId,itemName
itemId,primItem
*/
while(getline(fIteminput,strline))
{
getItemData(strline,itemId,itemName);
items.insert(make_pair<int,string>(itemId,itemName));
primItems.insert(make_pair<int,int>(itemId,prime[itemId]));
}
//read the trans-input file
/**
Original-Format:
tid,itemId
Final-Format:
tid,MultiSumValue
*/
int transCount = 0;
while(getline(fTrancinput,strline))
{
int itemId,itemB;
getIntData(strline,itemId,itemB);
if(trans.find(itemId)==trans.end())
{
trans.insert(make_pair<int,int>(itemId,primItems[itemB]));
}else
{
trans[itemId] *=primItems[itemB];
}
}
totalTrans = trans.size();
}
//output the single freqitemset
void OSingleFreq(const FreqItemSet& frq,ostream& os=cout)
{
//following print the itemset
os<<"{";
for(set<int,less<int> >::const_iterator iter = frq.itemset.begin();iter!=frq.itemset.end();++iter)
{
if(iter == frq.itemset.begin())
{
os<<items[*iter];
}else
{
os<<","<<items[*iter];
}
}
os<<"} , "<<fixed<<setprecision(5)<<static_cast<double>(frq.transId.size()*1.0/totalTrans*100)<<"%"<<endl;
}
//output the single associate-rules
void OSingleAsso(const set<int,less<int> >& leftset,const set<int,less<int> >& rightset,double sup,double conf,ostream& os=cout)
{
//output the left
os<<"{";
for(set<int,less<int> >::const_iterator sIter = leftset.begin();sIter!=leftset.end();++sIter)
{
if(sIter ==leftset.begin())
{
os<<items[*sIter];
}else
{
os<<","<<items[*sIter];
}
}
os<<"}>{";
//output the right
for(sIter = rightset.begin();sIter!=rightset.end();++sIter)
{
if(sIter ==rightset.begin())
{
os<<items[*sIter];
}else
{
os<<","<<items[*sIter];
}
}
os<<"},";
os<<"s="<<sup<<"%"<<",c="<<conf<<"%"<<endl;
}
//output the freqset
void outputFreqSet()
{
for(vector<FreqItemSet>::const_iterator iter = freqSet.begin();iter!=freqSet.end();++iter)
{
//task-1
if(iter->itemset.size()==1)
{
OSingleFreq(*iter,fout[0]);
}
//task-2
if(iter->itemset.size()>=1 && iter->itemset.size()<=2)
{
OSingleFreq(*iter,fout[1]);
}
//task-3
if(iter->itemset.size()>=1)
{
OSingleFreq(*iter,fout[2]);
}
}
}
//output the associate-rules
void outputAssociate()
{
map< set<int,less<int> > ,set<int,less<int> > > tmpMap;
for(vector<FreqItemSet>::iterator iter = freqSet.begin();iter!=freqSet.end();++iter)
{
tmpMap.insert(make_pair<set<int,less<int> > ,set<int,less<int> > >(iter->itemset,set<int,less<int> >(iter->transId.begin(),iter->transId.end())));
}
//X->Y => X,Y/X >=minconf
map< set<int,less<int> > ,bool> existAsso;
for(iter = freqSet.begin();iter!=freqSet.end();++iter)
{
vector<int> vct;
for(set<int,less<int> >::iterator sIter = iter->itemset.begin();sIter!=iter->itemset.end();sIter++)
{
vct.push_back(*sIter);
}
if(vct.size()>=2)
{
while(next_permutation(vct.begin(),vct.end()))
{
bool flag = true;
for(int i=1;flag && i< vct.size()-1;i++)
{
set<int,less<int> > leftset(vct.begin(),vct.end()-i);
set<int,less<int> > rightset(vct.end()-i,vct.end());
if(minconf <= static_cast<double>(iter->transId.size())/tmpMap[leftset].size()&& existAsso.find(leftset) == existAsso.end())
{
OSingleAsso(leftset,rightset,(iter->transId.size()*1.0)/totalTrans*100,iter->transId.size()*1.0/tmpMap[leftset].size()*100,fout[3]);
existAsso.insert(make_pair< set<int,less<int> >,bool>(leftset,true));
}else if(existAsso.find(leftset) != existAsso.end())
{
continue;
}else
{
flag = false;
}
}
}
}
}
}
};
int main(int argc,char* argv[])
{
Apriori api("./trans.dat","./items.dat");
double minsup=0.005,minconf=0.01;
api.apriori(minsup,minconf);
return 0;
}
DataFile:(1) trans.dat
DateFile(2): items.dat