利用trietree树结构,使其匹配过程可以在O(n)内搞定
#ifndef MATCH_USER_AGENT_H_
#define MATCH_USER_AGENT_H_
#include <string>
#include <vector>
using std::string;
using std::vector;
#define UA_TREE_WIDTH 50
#define UA_TRIETREE_TRANS_ARR_LEN 128
class TreeNode {
public:
TreeNode *next_[UA_TREE_WIDTH];
bool end_flag_;
};
class UserAgentInfo {
public :
/// 对字符串进行转码,节省trietree树空间
void TrietreeTransInit();
/// 初始化,trietree根节点等初始化
/// @return true: 初始化成功 false: 初始化失败
bool UAInit(const string &user_agent_file);
/// 向UserAgent所使用的trietree中插入节点
/// @param[in] keyword 插入的字符串
/// @return true: 插入成功 false: 插入失败
bool UATreeNodeInsert(const char* keyword);
/// 构造失败指针,利用失败指针匹配时可以在O(N)内搞定,即不回溯
void UAFaildBuild();
/// 删除UA Tree
/// @return true: 删除成功 false: 删除失败
bool UATreeDestory();
/// 递归删除UA Tree_Node
/// @param[in] tree根节点
/// @return true: 删除成功 false: 删除失败
bool UATreeNodeDestory(TreeNode* root);
/// 清空UA 相关存储结构
/// @return true: 删除成功 false: 删除失败
/// @当需要动态载入UA时才需要调此函数
bool UATreeClean();
/// 重新载入数据
/// @return true: 载入成功 false: 载入失败
/// @当需要动态载入UA时才需要调此函数
bool UATreeDynamicLoad(const std::string &user_agent_file);
/// user_agent匹配函数
/// @param[in] user_agent:请求用户的user_agent
/// @return true: 匹配上UA false: 没有匹配上UA
bool UAMatch(const char* user_agent);
/// 读取user_agent文件
bool ReadUAFile(const std::string &user_agent_file, vector<string> &result);
private:
TreeNode* ua_root_; //trietree根节点
char ua_trans_arr_[UA_TRIETREE_TRANS_ARR_LEN];
};
#endif
#include "cpc_user_agent.h"
bool UserAgentInfo::ReadUAFile(const std::string &user_agent_file, vector<string> &result) {
if (LoadFile(user_agent_file, 0, &result) == false) {
printf("Failed to load user_agent_file %s\n.", user_agent_file.c_str());
return false;
}
return true;
}
void UserAgentInfo::TrietreeTransInit() {
memset(ua_trans_arr_, 0, sizeof(char) * UA_TRIETREE_TRANS_ARR_LEN);
ua_trans_arr_[(int)'0'] = 1;
ua_trans_arr_[(int)'1'] = 2;
ua_trans_arr_[(int)'2'] = 3;
ua_trans_arr_[(int)'3'] = 4;
ua_trans_arr_[(int)'4'] = 5;
ua_trans_arr_[(int)'5'] = 6;
ua_trans_arr_[(int)'6'] = 7;
ua_trans_arr_[(int)'7'] = 8;
ua_trans_arr_[(int)'8'] = 9;
ua_trans_arr_[(int)'9'] = 10;
ua_trans_arr_[(int)'a'] = 11;
ua_trans_arr_[(int)'b'] = 12;
ua_trans_arr_[(int)'c'] = 13;
ua_trans_arr_[(int)'d'] = 14;
ua_trans_arr_[(int)'e'] = 15;
ua_trans_arr_[(int)'f'] = 16;
ua_trans_arr_[(int)'g'] = 17;
ua_trans_arr_[(int)'h'] = 18;
ua_trans_arr_[(int)'i'] = 19;
ua_trans_arr_[(int)'j'] = 20;
ua_trans_arr_[(int)'k'] = 21;
ua_trans_arr_[(int)'l'] = 22;
ua_trans_arr_[(int)'m'] = 23;
ua_trans_arr_[(int)'n'] = 24;
ua_trans_arr_[(int)'o'] = 25;
ua_trans_arr_[(int)'p'] = 26;
ua_trans_arr_[(int)'q'] = 27;
ua_trans_arr_[(int)'r'] = 28;
ua_trans_arr_[(int)'s'] = 29;
ua_trans_arr_[(int)'t'] = 30;
ua_trans_arr_[(int)'u'] = 31;
ua_trans_arr_[(int)'v'] = 32;
ua_trans_arr_[(int)'w'] = 33;
ua_trans_arr_[(int)'x'] = 34;
ua_trans_arr_[(int)'y'] = 35;
ua_trans_arr_[(int)'z'] = 36;
ua_trans_arr_[(int)'A'] = 11;
ua_trans_arr_[(int)'B'] = 12;
ua_trans_arr_[(int)'C'] = 13;
ua_trans_arr_[(int)'D'] = 14;
ua_trans_arr_[(int)'E'] = 15;
ua_trans_arr_[(int)'F'] = 16;
ua_trans_arr_[(int)'G'] = 17;
ua_trans_arr_[(int)'H'] = 18;
ua_trans_arr_[(int)'I'] = 19;
ua_trans_arr_[(int)'J'] = 20;
ua_trans_arr_[(int)'K'] = 21;
ua_trans_arr_[(int)'L'] = 22;
ua_trans_arr_[(int)'M'] = 23;
ua_trans_arr_[(int)'N'] = 24;
ua_trans_arr_[(int)'O'] = 25;
ua_trans_arr_[(int)'P'] = 26;
ua_trans_arr_[(int)'Q'] = 27;
ua_trans_arr_[(int)'R'] = 28;
ua_trans_arr_[(int)'S'] = 29;
ua_trans_arr_[(int)'T'] = 30;
ua_trans_arr_[(int)'U'] = 31;
ua_trans_arr_[(int)'V'] = 32;
ua_trans_arr_[(int)'W'] = 33;
ua_trans_arr_[(int)'X'] = 34;
ua_trans_arr_[(int)'Y'] = 35;
ua_trans_arr_[(int)'Z'] = 36;
ua_trans_arr_[(int)'*'] = 37;
ua_trans_arr_[(int)'.'] = 38;
ua_trans_arr_[(int)'#'] = 39;
ua_trans_arr_[(int)' '] = 39;
ua_trans_arr_[(int)';'] = 40;
ua_trans_arr_[(int)'('] = 41;
ua_trans_arr_[(int)')'] = 42;
ua_trans_arr_[(int)'/'] = 43;
ua_trans_arr_[(int)'-'] = 44;
ua_trans_arr_[(int)'_'] = 45;
}
bool UserAgentInfo::UAInit(const string &user_agent_file) {
ua_root_ = new (std::nothrow) TreeNode();
if (NULL == ua_root_) {
printf("new TreeNode err: ua_root_\n");
return false;
}
TrietreeTransInit();
vector<string> keywords;
if (!ReadUAFile(user_agent_file, keywords)) {
return false;
}
vector<string>::const_iterator it = keywords.begin();
for (; it != keywords.end(); ++it) {
if (!UATreeNodeInsert((*it).c_str())) {
printf("in UAInit insert TreeNode err\n");
return false;
}
}
return true;
}
bool UserAgentInfo::UATreeNodeInsert(const char* keyword) {
TreeNode* curr = ua_root_;
TreeNode* new_node = NULL;
if (NULL == keyword || NULL == curr) {
printf("insert TreeNode err\n");
return false;
}
int str_len = strlen(keyword);
for (int i = 0; i < str_len; ++i) {
if (NULL == curr->next_[ua_trans_arr_[(int)keyword[i]]]) {
new_node = new (std::nothrow) TreeNode();
if (NULL == new_node) {
printf("insert TreeNode err. new node err\n");
return false;
}
curr->next_[ua_trans_arr_[(int)keyword[i]]] = new_node;
}
curr = curr->next_[ua_trans_arr_[(int)keyword[i]]];
if (i == str_len - 1) {
if ( 0 == curr->end_flag_) {
curr->end_flag_ = true;
}
else {
printf("insert UA Duplicate: %s\n", keyword);
}
}
}
return true;
}
/*
void UserAgentInfo::UAFaildBuild() {
TreeNode* temp = NULL;
TreeNode* p = NULL;
ua_root_->fail_ = NULL;
queue_nodes_[queue_head_++] = ua_root_;
while (queue_head_ != queue_tail_) {
temp = queue_nodes_[queue_tail_++];
for(int i = 0; i < UA_TREE_WIDTH; ++i) {
if(NULL != temp->next_[i]) {
if(temp == ua_root_) {
temp->next_[i]->fail_ = ua_root_;
}
else {
p = temp->fail_;
while (NULL != p) {
if(NULL != p->next_[i]) {
temp->next_[i]->fail_ = p->next_[i];
break;
}
p = p->fail_;
}
if(NULL == p) {
temp->next_[i]->fail_ = ua_root_;
}
}
queue_nodes_[queue_head_++] = temp->next_[i];
}
}
}
}
*/
bool UserAgentInfo::UATreeNodeDestory(TreeNode* root) {
if (root == NULL) {
printf("UATreeNodeDestory fail_d: root is null\n");
return false;
}
for (int i = 0; i < UA_TREE_WIDTH; ++i) {
if(NULL != (root->next_[i])) {
int ret = UATreeNodeDestory(root->next_[i]);
if (true != ret) {
printf("UATreeNodeDestory faild\n");
return false;
}
}
}
if (root != ua_root_) {
delete root;
}
return false;
}
bool UserAgentInfo::UATreeDestory() {
return UATreeNodeDestory(ua_root_);
}
bool UserAgentInfo::UATreeClean() {
int ret = UATreeDestory();
if (true != ret) {
printf("UATreeDestory faild\n");
return false;
}
/*
memset(queue_nodes_, 0, sizeof(TreeNode*) * (UA_TREE_WIDTH * UA_TRIETREE_TRANS_ARR_LEN));
queue_head_ = 1;
queue_tail_ = 1;
*/
return true;
}
bool UserAgentInfo::UATreeDynamicLoad(const std::string &user_agent_file) {
vector<string> keywords;
if (!ReadUAFile(user_agent_file, keywords)) {
return false;
}
vector<string>::const_iterator it = keywords.begin();
for (; it != keywords.end(); ++it) {
if (!UATreeNodeInsert((*it).c_str())) {
printf("in UAInit insert TreeNode err\n");
return false;
}
}
return true;
}
bool UserAgentInfo::UAMatch(const char* user_agent) {
TreeNode* curr = ua_root_;
if (NULL == ua_root_) {
printf("UAMatch err: ua_root_ is null\n");
return false;
}
if (NULL == user_agent) {
printf("UAMatch err: user_agent is null\n");
return false;
}
for (unsigned int i = 0; i < strlen(user_agent); ++i) {
if (NULL != curr->next_[ua_trans_arr_[(int)user_agent[i]]]) {
curr = curr->next_[ua_trans_arr_[(int)user_agent[i]]];
if (curr->end_flag_) {
return true;
}
}
else {
if (curr->end_flag_) {
return true;
}
else {
curr = ua_root_;
}
}
}
return false;
}