在原先博主的基础上稍作修改,可支持中文,英文,表情,标点符号。
#include <string>
#include <iostream>
#include <set>
#include <string>
#include <vector>
#include <queue>
#include <iostream>
#include <map>
using namespace std;
#define VISIBLE_NUMBER 224
//定义节点结构
struct StateNode {
bool finish_{false};
int state_{0};
int num_{0};
string pattern_{};
//goto table
vector<StateNode *> transition_table_{vector<StateNode *>(VISIBLE_NUMBER)};
};
//定义一个树的类
class TrieAc {
private:
StateNode *start_node_;
int state_count_;
vector<StateNode *> corresponding_node_;
vector<StateNode *> fail_;
public:
TrieAc() : start_node_{new StateNode()}, state_count_{0} {
//state0 is start_node_
corresponding_node_.push_back(start_node_);
}
~TrieAc() {};
//read all patterns and produce the goto table
void load_pattern(const vector<string> &_Patterns);
//produce fail function
void dispose();
//search matching
void match(const string &_Str, set<int> &matched);
};
class TreeModels {
public:
TrieAc trieac;
map<string, int> tagcollect;
vector<string> patterns;
void quick_find_tags(std::string &query);
TreeModels();
~TreeModels() {};
};
void TrieAc::load_pattern(const vector<string> &_Patterns) {
int latest_state = 1;
int count = 0;
for (const auto &pattern : _Patterns) {
auto *p = start_node_;
for (int i = 0; i < pattern.size(); ++i) {
auto *next_node = p->transition_table_[0x20 - pattern[i]];
if (next_node == nullptr) {
next_node = new StateNode();
}
if (next_node->state_ == 0) {
next_node->state_ = latest_state++;
corresponding_node_.push_back(next_node);
}
p->transition_table_[0x20 - pattern[i]] = next_node;
p = next_node;
}
p->finish_ = true;
p->pattern_ = pattern;
p->num_ = count++;
}
int aaa = 0;
int bbb = 0;
for (int i = 0; i < 224; ++i) {
if (start_node_->transition_table_[i] == nullptr) {
start_node_->transition_table_[i] = start_node_;
aaa++;
}else{
bbb++;
}
}
cout<<aaa<<endl;
cout<<bbb<<endl;
state_count_ = latest_state;
}
//produce fail function
void TrieAc::dispose() {
queue<StateNode *> q;
fail_ = std::move(vector<StateNode *>(state_count_));
for (const auto nxt : start_node_->transition_table_) {
if (nxt && nxt->state_ != 0) {
fail_[nxt->state_] = start_node_;
q.push(nxt);
}
}
while (!q.empty()) {
auto known = q.front();
q.pop();
for (int i = 0; i < 224; ++i) {
auto nxt = known->transition_table_[i];
if (nxt && nxt->state_ != 0) {
auto p = fail_[known->state_];
while (!p->transition_table_[i]) {
p = fail_[p->state_];
}
fail_[nxt->state_] = p->transition_table_[i];
q.push(nxt);
}
}
}
}
//search matching
void TrieAc::match(const string &_Str, set<int> &matched) {
int trans = 0;
auto p = start_node_;
for (int i = 0; i < _Str.size(); ++i) {
trans = 0x20 - _Str[i];
p = p->transition_table_[trans] ? p->transition_table_[trans] : (--i, fail_[p->state_]);
cout << "p:" << p << endl;
if (p && p->finish_) {
matched.insert(p->num_);
} else {
if (p == nullptr) {
p = start_node_;
}
}
cout << "matched.size:" << matched.size() << endl;
}
}
int main() {
TrieAc trieac;
set<int> matched;
vector<std::string> patterns{"躺平","毫无波澜","顺从心理","加班"};
std::string query = "躺平,网络流行词,指无论对方做出什么反应,你内心都毫无波澜,对此不会有任何反应或者反抗,表示顺从心理。另外在部分语境中表示为:瘫倒在地,不再鸡血沸腾、渴求成功了。躺平看似是妥协、放弃,但其实是“向下突破天花板”,选择最无所作为的方式反叛裹挟。年轻人选择躺平,就是选择走向边缘,超脱于加班、升职、挣钱、买房的主流路径之外,用自己的方式消解外在环境对个体的规训。";
trieac.load_pattern(patterns);
trieac.dispose();
trieac.match(query,matched);
for(auto m:matched){
cout << patterns[m] << endl;
}
cout<<"end"<<endl;
}