老规矩。。推荐博客
Trie树详解
代码如下:
//
// Created by Administrator on 2020/2/18.
//
#include <iostream>
#include <map>
#include <string>
#include <cstdio>
#include <fstream>
#include <vector>
#include <cstring>
using namespace std;
typedef struct TrieNode{
char c;
int occurances;
map<char,TrieNode*> children;
} *lpTnode,TNode;
vector<string> split(const string& str, const string& dlim){
vector<string> res;
if(str.empty()) return res;
char *strs = new char[str.length()+1]; // 先将string转为char* 类型
strcpy(strs,str.c_str());
char *d = new char[dlim.length()+1];
strcpy(d,dlim.c_str());
char *p = strtok(strs,d);// 后面改成find&substr组合
while (p){
string sp = p;
res.push_back(sp);
p=strtok(nullptr,d);
}
return res;
}
class TrieTree {
lpTnode root;
int size;//字符串数量;
public:
TrieTree();
int insert(const string& str);
bool remove(const string& str);
bool contains(const string& str);
int frequency(const string& str);
int getSize(){
return this->size;
}
};
TrieTree::TrieTree() {
this->root = new TNode;
root->c = '\0';
root->occurances = 0;
this->size = 0;
}
int TrieTree::insert(const string& str) {
if(str.length()<=0) {
cout << "string is null" << endl;
return 0;
}
int strLen = str.length();
lpTnode atr = root;
int i;
for(i=0;i<strLen;i++){//每一个字符
if(atr->children.find(str[i])!=atr->children.end()){
atr = atr->children.at(str[i]);
} else {//没有 则建立节点
auto tn = new TNode;
tn->c = str[i];
tn->occurances =0;
atr->children[str[i]] = tn;
atr = atr->children[str[i]];
}
}
atr->occurances++;// 无论是找到还是插入 最后一个字符都++;
if(atr->occurances==1) this->size++;
return 1;
}
bool TrieTree::remove(const string &str) {
if(str.length()<=0) {
cout << "string is null" << endl;
return false;
}
int strLen = str.length();
lpTnode atr = root;
int i;
for(i=0;i<strLen;i++){//每一个字符
if(atr->children.find(str[i])!=atr->children.end()){
atr = atr->children.at(str[i]);
} else {//没有存在的字符;atr.children 为空了即树找完了但字符串还没完
return false;
}
}
// 如果完整运行到这:一是有这个字符串,二是有以该字符串为前缀的字符串
bool residue = atr->occurances > 0; // 简化表达式
if(residue) {
atr->occurances--;
this->size--;
return true;
}
return false;
}
bool TrieTree::contains(const string &str) {
if(str.length()<=0) {
cout << "string is null" << endl;
return false;
}
int strLen = str.length();
lpTnode atr = root;
int i;
for(i=0;i<strLen;i++){//每一个字符
if(atr->children.find(str[i])!=atr->children.end()){
atr = atr->children.at(str[i]);
} else {//没包含字符
return false;
}
}
return atr->occurances > 0; // 简化表达式
}
int TrieTree::frequency(const string &str) {
if(str.length()<=0) {
cout << "string is null" << endl;
return 0;
}
int strLen = str.length();
lpTnode atr = root;
int i;
for(i=0;i<strLen;i++){//每一个字符
if(atr->children.find(str[i])!=atr->children.end()){
atr = atr->children.at(str[i]);
} else {//没包含字符
return 0;
}
}
return atr->occurances; // 简化表达式
}
int main(){
ifstream infile;
TrieTree tt;
string buffer;
string delimiter = " ";
string fileName = R"(D:\hiicy\tempDownload\bible.txt)";
infile.open(fileName,ios::in);// 读取文件
while (!infile.eof()){
getline(infile,buffer);
vector<string> words = split(buffer,delimiter);
for(auto const& word: words){
tt.insert(word);
}
}
int size = tt.getSize();
cout<<"这个trie树共有: "<<size<<" 种不同单词"<<endl;
string words[] = {"and","And","no","my","you","the"};
for(auto const&word:words){
int freq = tt.frequency(word);
cout<<word<<endl<<
"出现次数为"<<freq<<"次"<<endl;
}
cout<<tt.contains("die")<<endl;
tt.remove("and");
cout<<"and: "<<tt.frequency("and")<<endl;
tt.remove("the");
cout<<"the: "<<tt.frequency("the")<<endl;
tt.remove("no");
cout<<"no: "<<tt.frequency("no")<<endl;
return 0;
}
}