// PrefixSpan.cpp : 定义控制台应用程序的入口点。
//
#include "stdafx.h"
#include<iostream>
#include<iomanip>
#include<vector>
#include<map>
#include<set>
#include<string>
using namespace std;
//data format
typedef vector<vector<vector<string>>> strTVect;
typedef vector<vector<string>> strDVect;
typedef vector<string> strVect;
//item and occur times
typedef map<string,int> strIntMap;
//items' name
typedef set<string> strSet;
//project data
typedef map<string,strTVect> vectMap;
void showData(strTVect datas)
{
strTVect::const_iterator strTIter;
strDVect::const_iterator strDIter;
strVect::const_iterator strIter;
for(strTIter = datas.begin(); strTIter != datas.end(); strTIter++)
{
for(strDIter = strTIter->begin(); strDIter != strTIter->end(); strDIter++)
{
string temp = "";
for(strIter = strDIter->begin(); strIter != strDIter->end(); strIter++)
{
temp += *strIter;
}
cout.width(5);
cout << setiosflags(ios::left) << temp;
}
cout << endl;
}
}
//get length-1 items
strIntMap getItems(strTVect datas,strSet items,int threshold)
{
strTVect::const_iterator strTIter;
strDVect::const_iterator strDIter;
strVect::const_iterator strIter;
//item and occur times
strIntMap itemsMap;
//get all possible items;
for(strTIter = datas.begin(); strTIter != datas.end(); strTIter++)
{
//element
strSet temp = items;
for(strDIter = strTIter->begin(); strDIter != strTIter->end(); strDIter++)
{
//item
for(strIter = strDIter->begin(); strIter != strDIter->end(); strIter++)
{
if(temp.find(*strIter) != temp.end())
{
if(itemsMap.find(*strIter) != itemsMap.end())
{
itemsMap.find(*strIter)->second++;
}
else
{
itemsMap.insert(make_pair(*strIter,1));
}
temp.erase(*strIter);
}
}
}
}
//filt by threshold
strIntMap::iterator mapIter;
for(mapIter = itemsMap.begin(); mapIter != itemsMap.end();)
{
if(mapIter->second < threshold)
{
//put the return iterator to mapIter
mapIter = itemsMap.erase(mapIter);
}
else
{
mapIter++;
}
}
show
//for(mapIter = itemsMap.begin(); mapIter != itemsMap.end(); mapIter++)
//{
// cout << mapIter->first << " " << mapIter->second << endl;
//}
return itemsMap;
}
void filtDataByItems(strTVect &datas, strIntMap itemsMap)
{
strTVect::iterator strTIter;
strDVect::iterator strDIter;
strVect::iterator strIter;
for(strTIter = datas.begin(); strTIter != datas.end();)
{
//element
for(strDIter = strTIter->begin(); strDIter != strTIter->end();)
{
//item
for(strIter = strDIter->begin(); strIter != strDIter->end();)
{
//remove from item
if(itemsMap.find(*strIter) == itemsMap.end())
{
strIter = strDIter->erase(strIter);
}
else
{
strIter++;
}
}
//pay attention to erase some value
//if(strDIter->empty())
if(strDIter->size() == 0)
{
strDIter = strTIter->erase(strDIter);
}
else
{
strDIter++;
}
}
if(strTIter->empty())
{
strTIter = datas.erase(strTIter);
}
else
{
strTIter++;
}
}
}
//divide search place by first prefix or get the project like <30 x>
vectMap simpleProjectData(strTVect datas,int threshold)
{
strTVect::const_iterator strTIter;
strDVect::const_iterator strDIter;
strVect::const_iterator strIter;
strTVect pTVect;
strDVect pDVect;
strVect pVect;
vectMap pVectMap;
strSet items;
for(strTIter = datas.begin(); strTIter != datas.end(); strTIter++)
{
items.clear();
//element
for(strDIter = strTIter->begin(); strDIter != strTIter->end(); strDIter++)
{
//item
for(strIter = strDIter->begin(); strIter != strDIter->end(); strIter++)
{
//this element is the same as the last element of prefix
//it doesn't work at the first call
//it works when call by projectData function
if(*strIter == "_")
{
break;
}
//if find a strIter without sequence then add to pVectMap
if(items.find(*strIter) == items.end())
{
//cout << *strIter << endl;
strVect::const_iterator tempIter;
strDVect::const_iterator tempDIter;
pDVect.clear();
//traverse the last part of element
if((strIter+1) != strDIter->end())
{
pVect.clear();
pVect.push_back("_");
for(tempIter = strIter+1; tempIter != strDIter->end(); tempIter++)
{
pVect.push_back(*tempIter);
}
pDVect.push_back(pVect);
}
//traverse the last part of sequence
for(tempDIter = strDIter+1; tempDIter != strTIter->end(); tempDIter++)
{
pDVect.push_back(*tempDIter);
}
//add to project data
if(pVectMap.find(*strIter) == pVectMap.end())
{
pTVect.clear();
pTVect.push_back(pDVect);
pVectMap.insert(make_pair(*strIter,pTVect));
}
else
{
pVectMap.find(*strIter)->second.push_back(pDVect);
}
items.insert(*strIter);
}
}
}
}
//filt project data by threshold
vectMap::iterator vMIter;
for(vMIter = pVectMap.begin(); vMIter != pVectMap.end();)
{
if(vMIter->second.size() < threshold)
{
vMIter =pVectMap.erase(vMIter);
}
else
{
vMIter++;
}
}
return pVectMap;
}
//find prefix like <(30 x)>,<(_ x)>
vectMap normalProjectData(strTVect datas,strDVect prefix, int threshold)
{
vectMap pVectMap;
strTVect::const_iterator strTIter;
strDVect::const_iterator strDIter;
//strIter2:the last item of last element of prefix
strVect::const_iterator strIter,strIter2;
strTVect pTVect;
strDVect pDVect;
strVect pVect,preVect;
strSet items;
//get the last element of prefix
preVect = prefix.at(prefix.size()-1);
for(strTIter = datas.begin(); strTIter != datas.end(); strTIter++)
{
items.clear();
//element
for(strDIter = strTIter->begin(); strDIter != strTIter->end(); strDIter++)
{
//item
for(strIter = strDIter->begin(); strIter != strDIter->end(); strIter++)
{
if(*strIter == "_")
{
strIter2 = preVect.end();
strIter++;
}
else
{
//find the same of the last element of prefix
for(strIter2 = preVect.begin(); strIter2 != preVect.end() && strIter != strDIter->end();)
{
//*strIter equal *strIter2
if(*strIter == *strIter2)
{
strIter++;
strIter2++;
}
else
{
break;
}
}
}
string temp;
if(strIter2 == preVect.end() && strIter != strDIter->end())
{
//if find a strIter without sequence then add to pVectMap
if(items.find(*strIter) == items.end())
{
temp = *strIter;
pDVect.clear();
if((strIter+1) != strDIter->end())
{
pVect.clear();
pVect.push_back("_");
for(strIter = strIter+1;strIter != strDIter->end(); strIter++)
{
pVect.push_back(*strIter);
}
pDVect.push_back(pVect);
}
strDVect::const_iterator tempDIter;
//traverse the last part of sequence
for(tempDIter = strDIter + 1; tempDIter != strTIter->end(); tempDIter++)
{
pDVect.push_back(*tempDIter);
}
//add to project data
if(pVectMap.find(temp) == pVectMap.end())
{
pTVect.clear();
pTVect.push_back(pDVect);
pVectMap.insert(make_pair(temp,pTVect));
}
else
{
pVectMap.find(temp)->second.push_back(pDVect);
}
items.insert(temp);
}
break;
}
if(strIter == strDIter->end())
{
break;
}
}
}
}
//filt project data by threshold
vectMap::iterator vMIter;
for(vMIter = pVectMap.begin(); vMIter != pVectMap.end();)
{
if(vMIter->second.size() < threshold)
{
vMIter =pVectMap.erase(vMIter);
}
else
{
vMIter++;
}
}
return pVectMap;
}
//the principal function. two functions recursion
void projectData(strTVect datas,strDVect prefix,int threshold)
{
//find prefix like <30 x>
vectMap simpleMap = simpleProjectData(datas,threshold);
vectMap::iterator vMIter;
strDVect::const_iterator strDIter;
strVect::const_iterator strIter;
strVect preVect;
strDVect temp;
for(vMIter = simpleMap.begin(); vMIter != simpleMap.end();vMIter++)
{
temp = prefix;
preVect.clear();
preVect.push_back(vMIter->first);
temp.push_back(preVect);
cout <<"<";
for(strDIter = temp.begin(); strDIter != temp.end(); strDIter++)
{
if(strDIter->size() > 1)
{
cout << "(";
}
for(strIter = strDIter->begin(); strIter != strDIter->end(); strIter++)
{
cout <<*strIter<< " ";
}
if(strDIter->size() > 1)
{
cout << ") ";
}
}
cout << "> :" << vMIter->second.size() << endl;
showData(vMIter->second);
projectData(vMIter->second,temp,threshold);
}
//find prefix like <(30 x)> or <(_ x)>
vectMap normalMap = normalProjectData(datas,prefix,threshold);
for(vMIter = normalMap.begin(); vMIter != normalMap.end(); vMIter++)
{
temp = prefix;
temp.at(temp.size()-1).push_back(vMIter->first);
strDVect::const_iterator strDIter;
strVect::const_iterator strIter;
cout << "<";
for(strDIter = temp.begin(); strDIter != temp.end(); strDIter++)
{
if(strDIter->size() > 1)
{
cout << "(";
}
for(strIter = strDIter->begin(); strIter != strDIter->end(); strIter++)
{
cout <<*strIter<< " ";
}
if(strDIter->size() > 1)
{
cout << ") ";
}
}
cout << "> :" << vMIter->second.size() << endl;
showData(vMIter->second);
projectData(vMIter->second,temp,threshold);
}
}
//initial data source and save in datas
void initData(strTVect &datas,strSet &items)
{
//"a","abc","ac","d","cf","ad","c","bc","ae","ef","ab","df","c","b","e","g","af","c","b","c"
strDVect seq;
strVect ele;
ele.push_back("a");
ele.push_back("b");
ele.push_back("c");
seq.push_back(ele);
ele.clear();
ele.push_back("a");
ele.push_back("b");
ele.push_back("c");
seq.push_back(ele);
ele.clear();
ele.push_back("a");
ele.push_back("c");
seq.push_back(ele);
ele.clear();
ele.push_back("d");
seq.push_back(ele);
ele.clear();
ele.push_back("c");
ele.push_back("f");
seq.push_back(ele);
datas.push_back(seq);
ele.clear();
seq.clear();
ele.push_back("a");
ele.push_back("d");
seq.push_back(ele);
ele.clear();
ele.push_back("c");
seq.push_back(ele);
ele.clear();
ele.push_back("a");
ele.push_back("b");
ele.push_back("c");
ele.push_back("d");
seq.push_back(ele);
ele.clear();
ele.push_back("a");
ele.push_back("e");
seq.push_back(ele);
datas.push_back(seq);
ele.clear();
seq.clear();
ele.push_back("e");
ele.push_back("f");
seq.push_back(ele);
ele.clear();
ele.push_back("a");
ele.push_back("b");
seq.push_back(ele);
ele.clear();
ele.push_back("d");
ele.push_back("f");
seq.push_back(ele);
ele.clear();
ele.push_back("c");
seq.push_back(ele);
ele.clear();
ele.push_back("b");
seq.push_back(ele);
datas.push_back(seq);
ele.clear();
seq.clear();
ele.push_back("e");
seq.push_back(ele);
ele.clear();
ele.push_back("g");
seq.push_back(ele);
ele.clear();
ele.push_back("a");
ele.push_back("f");
seq.push_back(ele);
ele.clear();
ele.push_back("c");
seq.push_back(ele);
ele.clear();
ele.push_back("b");
seq.push_back(ele);
ele.clear();
ele.push_back("c");
seq.push_back(ele);
datas.push_back(seq);
items.insert("a");
items.insert("b");
items.insert("c");
items.insert("d");
items.insert("e");
items.insert("f");
}
int _tmain(int argc, _TCHAR* argv[])
{
strTVect datas;
strSet items;
initData(datas,items);
showData(datas);
int threshold = 2;
strIntMap itemsMap = getItems(datas,items,threshold);
filtDataByItems(datas,itemsMap);
cout << "***********************************" << endl;
showData(datas);
vectMap pVectMap = simpleProjectData(datas,threshold);
show
cout << "***********************************" << endl;
vectMap::iterator mapIter;
for(mapIter = pVectMap.begin(); mapIter != pVectMap.end(); mapIter++)
{
cout << mapIter->first << " " << endl;
showData(mapIter->second);
}
vectMap::iterator vMIter;
strDVect prefix;
strVect preVect;
for(vMIter = pVectMap.begin(); vMIter != pVectMap.end();vMIter++)
{
cout << "***************************" << endl;
cout << "<" << vMIter->first << "> :" << vMIter->second.size() << endl;
//showData(vMIter->second);
prefix.clear();
preVect.clear();
preVect.push_back(vMIter->first);
prefix.push_back(preVect);
projectData(vMIter->second,prefix,threshold);
}
return 0;
}
来源:http://blog.sina.com.cn/s/blog_6e85bf420100o6cp.html