连续序列匹配
先暂时上个代码,回头补论述
template<typename T>
struct Type;
template<>
struct Type<uint32_t>
{
typedef int32_t type;
};
template<>
struct Type<uint64_t>
{
typedef int64_t type;
};
template<typename T>
class KMP
{
private:
static std::vector<size_t>longestPrePost(std::vector<T>const&pattern)
{
size_t const Len(pattern.size());
std::vector<size_t> table(Len);
// table[x]:以索引x处结尾的最长前后缀长度
table[0] = 0;
for (size_t i = 1; i < Len; ++i)
{
typename Type<size_t>::type x = table[i - 1];
if (pattern[i] == pattern[x])
{
table[i] = x+ 1;
continue;
}
--x;//0:table[i-1]-1 表示为0:x
while (x >= 0 && pattern[table[x]] != pattern[i])// until x <= 0 or pattern[table[x]] == pattern[i];
x = table[x] - 1;
if (x < 0)
table[i] = 0;
else
table[i] = table[x] + 1;
}
return table;
}
struct pair
{
size_t i_inc;
size_t j_inc;
};
protected:
struct Inc
{
public:
std::vector<size_t>const jump_table;
public:
Inc(std::vector<T>const&pat) :jump_table(longestPrePost(pat)) {}
pair operator[ ](size_t i)const
{
if (i == 0)return pair{ 1,0 };
size_t const x= jump_table[i - 1];
return pair{ i - x,x };
}
};
Inc const inc;
std::vector<T> const pat;
public:
KMP(std::vector<T>const&pat_):inc(pat_),pat(pat_){}
size_t match(
typename std::vector<T>::const_iterator const&begin,
typename std::vector<T>::const_iterator const&end,
bool &hit)const
{
if (begin > end || inc.jump_table.size() < 1)
{
hit = false;
return -1;
}
typename std::vector<T>::const_iterator i = begin;
size_t j = 0;
size_t const Len_pat = inc.jump_table.size();
while (true)
{
while (i != end&&j < Len_pat&&*i == pat[j])++i, ++j;//normally until *i!=pattern[j]
if (j >= Len_pat)
{
hit = true;
return i - begin - Len_pat;
}
if (i == end)break;
auto next(inc[j]);
i += next.i_inc;
j = next.j_inc;
}
hit = false;
return -1;
}
};
//连续序列匹配器
template<typename T>
class SerialMatcher
{
public:
enum APPROACH {approach_KMP,approach_BRUTEFORCE};
private:
std::shared_ptr<KMP<T> > const pKmp;
public:
SerialMatcher(std::vector<T>const&pat):pKmp(new KMP<T>(pat)){}
size_t match(std::vector<T>const&src,bool&hit,APPROACH approach = APPROACH::approach_KMP)const//if return==-1,means nothing is matched
{
if (approach == approach_KMP)
{
return pKmp->match(src.cbegin(), src.cend(),hit);
}
// cerr << "" << endl;
hit=false;
return -1;
}
size_t next(std::vector<T>const&src, size_t last_offset, bool&hit,APPROACH approach = APPROACH::approach_KMP)const//if return==-1,means nothing is matched
{
if (hit == false)return -1;
if (approach == approach_KMP)
{
size_t x= pKmp->match(src.cbegin()+last_offset+1, src.cend(),hit);
if (hit)
{
return last_offset + 1 + x;
}
}
//
hit=false;
return -1;
}
std::vector<size_t>matchAll(std::vector<T>const&src, APPROACH approach = APPROACH::approach_KMP)const
{
std::vector<size_t> res;
bool hit = false;
size_t x = match(src, hit,approach);
if (hit)
{
res.push_back(x);
L: x=next(src, x, hit,approach);
if (hit)
{
res.push_back(x);
goto L;
}
}
return res;
}
};