利用tree匹配user_agent

利用trietree树结构,使其匹配过程可以在O(n)内搞定

 

#ifndef MATCH_USER_AGENT_H_
#define MATCH_USER_AGENT_H_

#include <string>
#include <vector>

using std::string;
using std::vector;
#define UA_TREE_WIDTH              50
#define UA_TRIETREE_TRANS_ARR_LEN  128


class TreeNode {
 public:
  TreeNode *next_[UA_TREE_WIDTH];
  bool end_flag_;
};

class UserAgentInfo {
 public :
  /// 对字符串进行转码,节省trietree树空间
  void TrietreeTransInit();

  /// 初始化,trietree根节点等初始化
  /// @return true: 初始化成功       false: 初始化失败
  bool UAInit(const string &user_agent_file);

  /// 向UserAgent所使用的trietree中插入节点
  /// @param[in] keyword 插入的字符串
  /// @return true: 插入成功       false: 插入失败
  bool UATreeNodeInsert(const char* keyword);

  /// 构造失败指针,利用失败指针匹配时可以在O(N)内搞定,即不回溯
  void UAFaildBuild();

  /// 删除UA Tree 
  /// @return true: 删除成功       false: 删除失败
  bool UATreeDestory();

  /// 递归删除UA Tree_Node
  /// @param[in] tree根节点
  /// @return true: 删除成功       false: 删除失败
  bool UATreeNodeDestory(TreeNode* root);

  /// 清空UA 相关存储结构  
  /// @return true: 删除成功       false: 删除失败
  /// @当需要动态载入UA时才需要调此函数
  bool UATreeClean();

  /// 重新载入数据
  /// @return true: 载入成功       false: 载入失败
  /// @当需要动态载入UA时才需要调此函数
  bool UATreeDynamicLoad(const std::string &user_agent_file);

  /// user_agent匹配函数
  /// @param[in] user_agent:请求用户的user_agent
  /// @return true: 匹配上UA       false: 没有匹配上UA
  bool UAMatch(const char* user_agent);

  /// 读取user_agent文件
  bool ReadUAFile(const std::string &user_agent_file, vector<string> &result);
 private:
  TreeNode* ua_root_;  //trietree根节点
  char ua_trans_arr_[UA_TRIETREE_TRANS_ARR_LEN];
};


#endif


 

 

#include "cpc_user_agent.h"


bool UserAgentInfo::ReadUAFile(const std::string &user_agent_file, vector<string> &result) {
  if (LoadFile(user_agent_file, 0, &result) == false) {
    printf("Failed to load user_agent_file %s\n.", user_agent_file.c_str());
    return false;
  }
  return true;

}

void UserAgentInfo::TrietreeTransInit() {
  memset(ua_trans_arr_, 0, sizeof(char) * UA_TRIETREE_TRANS_ARR_LEN);
  ua_trans_arr_[(int)'0'] = 1;
  ua_trans_arr_[(int)'1'] = 2;
  ua_trans_arr_[(int)'2'] = 3;
  ua_trans_arr_[(int)'3'] = 4;
  ua_trans_arr_[(int)'4'] = 5;
  ua_trans_arr_[(int)'5'] = 6;
  ua_trans_arr_[(int)'6'] = 7;
  ua_trans_arr_[(int)'7'] = 8;
  ua_trans_arr_[(int)'8'] = 9;
  ua_trans_arr_[(int)'9'] = 10;
  ua_trans_arr_[(int)'a'] = 11;
  ua_trans_arr_[(int)'b'] = 12;
  ua_trans_arr_[(int)'c'] = 13;
  ua_trans_arr_[(int)'d'] = 14;
  ua_trans_arr_[(int)'e'] = 15;
  ua_trans_arr_[(int)'f'] = 16;
  ua_trans_arr_[(int)'g'] = 17;
  ua_trans_arr_[(int)'h'] = 18;
  ua_trans_arr_[(int)'i'] = 19;
  ua_trans_arr_[(int)'j'] = 20;
  ua_trans_arr_[(int)'k'] = 21;
  ua_trans_arr_[(int)'l'] = 22;
  ua_trans_arr_[(int)'m'] = 23;
  ua_trans_arr_[(int)'n'] = 24;
  ua_trans_arr_[(int)'o'] = 25;
  ua_trans_arr_[(int)'p'] = 26;
  ua_trans_arr_[(int)'q'] = 27;
  ua_trans_arr_[(int)'r'] = 28;
  ua_trans_arr_[(int)'s'] = 29;
  ua_trans_arr_[(int)'t'] = 30;
  ua_trans_arr_[(int)'u'] = 31;
  ua_trans_arr_[(int)'v'] = 32;
  ua_trans_arr_[(int)'w'] = 33;
  ua_trans_arr_[(int)'x'] = 34;
  ua_trans_arr_[(int)'y'] = 35;
  ua_trans_arr_[(int)'z'] = 36;
  ua_trans_arr_[(int)'A'] = 11;
  ua_trans_arr_[(int)'B'] = 12;
  ua_trans_arr_[(int)'C'] = 13;
  ua_trans_arr_[(int)'D'] = 14;
  ua_trans_arr_[(int)'E'] = 15;
  ua_trans_arr_[(int)'F'] = 16;
  ua_trans_arr_[(int)'G'] = 17;
  ua_trans_arr_[(int)'H'] = 18;
  ua_trans_arr_[(int)'I'] = 19;
  ua_trans_arr_[(int)'J'] = 20;
  ua_trans_arr_[(int)'K'] = 21;
  ua_trans_arr_[(int)'L'] = 22;
  ua_trans_arr_[(int)'M'] = 23;
  ua_trans_arr_[(int)'N'] = 24;
  ua_trans_arr_[(int)'O'] = 25;
  ua_trans_arr_[(int)'P'] = 26;
  ua_trans_arr_[(int)'Q'] = 27;
  ua_trans_arr_[(int)'R'] = 28;
  ua_trans_arr_[(int)'S'] = 29;
  ua_trans_arr_[(int)'T'] = 30;
  ua_trans_arr_[(int)'U'] = 31;
  ua_trans_arr_[(int)'V'] = 32;
  ua_trans_arr_[(int)'W'] = 33;
  ua_trans_arr_[(int)'X'] = 34;
  ua_trans_arr_[(int)'Y'] = 35;
  ua_trans_arr_[(int)'Z'] = 36;
  ua_trans_arr_[(int)'*'] = 37;  
  ua_trans_arr_[(int)'.'] = 38;
  ua_trans_arr_[(int)'#'] = 39;
  ua_trans_arr_[(int)' '] = 39;
  ua_trans_arr_[(int)';'] = 40;
  ua_trans_arr_[(int)'('] = 41;
  ua_trans_arr_[(int)')'] = 42;
  ua_trans_arr_[(int)'/'] = 43;
  ua_trans_arr_[(int)'-'] = 44;
  ua_trans_arr_[(int)'_'] = 45;  
}

bool UserAgentInfo::UAInit(const string &user_agent_file) {
  ua_root_ = new (std::nothrow) TreeNode(); 
  if (NULL == ua_root_) {
    printf("new TreeNode err: ua_root_\n");
    return false;
  }
  TrietreeTransInit();
  vector<string> keywords;
  if (!ReadUAFile(user_agent_file, keywords)) {
    return false;
  }
  vector<string>::const_iterator it = keywords.begin();
  for (; it != keywords.end(); ++it) {
    if (!UATreeNodeInsert((*it).c_str())) {
      printf("in UAInit insert TreeNode err\n");
      return false;
    }
  }
  return true;
}


bool UserAgentInfo::UATreeNodeInsert(const char* keyword) {
  TreeNode* curr = ua_root_;
  TreeNode* new_node = NULL;
  if (NULL == keyword || NULL == curr) {
    printf("insert TreeNode err\n");
    return false;
  }

  int str_len = strlen(keyword);
  for (int i = 0; i < str_len; ++i) {
    if (NULL == curr->next_[ua_trans_arr_[(int)keyword[i]]]) {
      new_node = new (std::nothrow) TreeNode();
      if (NULL == new_node) {
        printf("insert TreeNode err. new node err\n");
        return false;
      }
      curr->next_[ua_trans_arr_[(int)keyword[i]]] = new_node;
    }
    curr = curr->next_[ua_trans_arr_[(int)keyword[i]]];
    if (i == str_len - 1) {
      if ( 0 == curr->end_flag_) {
        curr->end_flag_ = true;
      }
      else {
        printf("insert UA Duplicate: %s\n", keyword);
      }
    }
  }
  return true;
}

/*
   void UserAgentInfo::UAFaildBuild() {
   TreeNode* temp = NULL;
   TreeNode* p = NULL;
   ua_root_->fail_ = NULL;
   queue_nodes_[queue_head_++] = ua_root_;

   while (queue_head_ != queue_tail_) {
   temp = queue_nodes_[queue_tail_++];
   for(int i = 0; i < UA_TREE_WIDTH; ++i) {
   if(NULL != temp->next_[i]) {
   if(temp == ua_root_) {
   temp->next_[i]->fail_ = ua_root_;
   }
   else {
   p = temp->fail_;
   while (NULL != p) {
   if(NULL != p->next_[i]) {
   temp->next_[i]->fail_ = p->next_[i];
   break;
   }
   p = p->fail_;
   }
   if(NULL == p) {
   temp->next_[i]->fail_ = ua_root_;
   }
   }
   queue_nodes_[queue_head_++] = temp->next_[i];
   }
   }
   }
   }
 */
bool UserAgentInfo::UATreeNodeDestory(TreeNode* root) {    
  if (root == NULL) {
    printf("UATreeNodeDestory fail_d: root is null\n");
    return false;
  }

  for (int i = 0; i < UA_TREE_WIDTH; ++i) {
    if(NULL != (root->next_[i])) {
      int ret = UATreeNodeDestory(root->next_[i]);
      if (true != ret) {
        printf("UATreeNodeDestory faild\n");
        return false;
      }
    }
  }
  if (root != ua_root_) {
    delete root;
  }
  return false;
}

bool UserAgentInfo::UATreeDestory() {
  return UATreeNodeDestory(ua_root_);
}

bool UserAgentInfo::UATreeClean() {
  int ret = UATreeDestory();
  if (true != ret) {
    printf("UATreeDestory faild\n");
    return false;
  }
  /*
     memset(queue_nodes_, 0, sizeof(TreeNode*) * (UA_TREE_WIDTH * UA_TRIETREE_TRANS_ARR_LEN));
     queue_head_ = 1;
     queue_tail_ = 1;
   */
  return true;
}

bool UserAgentInfo::UATreeDynamicLoad(const std::string &user_agent_file) {
  vector<string> keywords;
  if (!ReadUAFile(user_agent_file, keywords)) {
    return false;
  }
  vector<string>::const_iterator it = keywords.begin();
  for (; it != keywords.end(); ++it) {
    if (!UATreeNodeInsert((*it).c_str())) {
      printf("in UAInit insert TreeNode err\n");
      return false;
    }
  }
  return true;
}

bool UserAgentInfo::UAMatch(const char* user_agent) {
  TreeNode* curr = ua_root_;
  if (NULL == ua_root_) {
    printf("UAMatch err: ua_root_ is null\n");
    return false;
  }
  if (NULL == user_agent) {
    printf("UAMatch err: user_agent is null\n");
    return false;
  }

  for (unsigned int i = 0; i < strlen(user_agent); ++i) {
    if (NULL != curr->next_[ua_trans_arr_[(int)user_agent[i]]]) {
      curr = curr->next_[ua_trans_arr_[(int)user_agent[i]]];
      if (curr->end_flag_) {
        return true;
      }
    }
    else {
      if (curr->end_flag_) {
        return true;
      }
      else {
        curr = ua_root_;
      }
    }
  }
  return false;
}



 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值