目录
HDU 1075 What Are You Talking About
一,字典树
字典树,又称前缀树。
前缀树的3个基本性质:
- 根节点不包含字符,除根节点外每一个节点都只包含一个字符。
- 从根节点到某一节点,路径上经过的字符连接起来,为该节点对应的字符串。
- 每个节点的所有子节点包含的字符都不相同。
如图,没画出来的代表空指针。
节点11对应的字符串就是aba,节点15对应的字符串就是caaa
应用场景:存储大量字符串并频繁查找
template<typename T>
class Trie
{
public:
vector<vector<int>>v;
map<int, int>isEnd;//以任一节点作为尾节点的数目(不一定是叶子节点)
map<int, int>deep;//任一节点的深度
Trie()
{
auto valueNum = ValueNum(T{});
v.push_back(vector<int>(valueNum + 1, 0));
}
void push(const T* s, int len, int value = 0)//{s,len}是新增字符串,value是权值,允许插入重复字符串,权值和isEnd都进行累积
{
auto minValue = MinValue(T{});
auto valueNum = ValueNum(T{});
int j = 0;
for (int i = 0; i < len; i++)
{
if (v[j][s[i] - minValue + 1] == 0)
{
v[j][s[i] - minValue + 1] = v.size();
v.push_back(vector<int>(valueNum + 1, 0));
deep[v.size() - 1] = i + 1;
}
j = v[j][s[i] - minValue + 1];
}
v[j][0] += value;
isEnd[j]++;
}
int find(const T* s, int len, int &maxDeep, vector<int>&ends)//maxDeep是搜到的最大长度,ends是路过哪些end节点
{
auto minValue = MinValue(T{});
int j = 0;
maxDeep = 0;
for (int i = 0; i < len; i++)
{
if (v[j][s[i] - minValue + 1] == 0)return 0;
j = v[j][s[i] - minValue + 1];
maxDeep++;
if (isEnd[j])ends.push_back(j);
}
return v[j][0];
}
int find(const T* s, int len)
{
int maxDeep;
vector<int>ends;
return find(s, len, maxDeep, ends);
}
private:
static char MinValue(char)
{
return 'a';
}
static int MinValue(int)
{
return 0;
}
static int ValueNum(char) {
return 26;
}
static int ValueNum(int) {
return 10;
}
};
二,OJ实战
CSU 1115 最短的名字
题目:
在一个奇怪的村子中,很多人的名字都很长,比如aaaaa, bbb and abababab。
名字这么长,叫全名显然起来很不方便。所以村民之间一般只叫名字的前缀。比如叫'aaaaa'的时候可以只叫'aaa',因为没有第二个人名字的前三个字母是'aaa'。不过你不能叫'a',因为有两个人的名字都以'a'开头。村里的人都很聪明,他们总是用最短的称呼叫人。输入保证村里不会有一个人的名字是另外一个人名字的前缀(作为推论,任意两个人的名字都不会相同)。
如果村里的某个人要叫所有人的名字(包括他自己),他一共会说多少个字母?
输入第一行为数据组数T (T<=10)。每组数据第一行为一个整数n(1<=n<=1000),即村里的人数。以下n行每行为一个人的名字(仅有小写字母组成)。输入保证一个村里所有人名字的长度之和不超过1,000,000。
对于每组数据,输出所有人名字的字母总数。
1
3
aaaaa
bbb
abababab
5
思路:字典树
ans[i][j]表示第i个节点的第j个孩子的编号,j>0
ans[i][0]表示第i个节点的所有后代中叶子节点的个数
代码:
#include <iostream>
#include <string>
#include <map>
#include <vector>
using namespace std;
Trie<char>t;
int f(int k, int &s)
{
int r = 0, s2 = 0;
if (t.isEnd[k]) {
r = t.isEnd[k];
s += t.deep[t.v[k][0]];
}
else {
for (int i = 1; i <= 26; i++)
if (t.v[k][i])r += f(t.v[k][i], s2);
if (r > 1)s += s2;
else s += t.deep[k];
}
return r;
}
int main()
{
int T, n;
string s;
cin >> T;
while (T--)
{
cin >> n;
t= Trie<char>();
while (n--)
{
cin >> s;
t.push(s.data(), s.length(),1);
}
int su = 0;
f(0, su);
cout << su << endl;
}
return 0;
}
HDU 1075 What Are You Talking About
Ignatius is so lucky that he met a Martian yesterday. But he didn't know the language the Martians use. The Martian gives him a history book of Mars and a dictionary when it leaves. Now Ignatius want to translate the history book into English. Can you help him?
Input
The problem has only one test case, the test case consists of two parts, the dictionary part and the book part. The dictionary part starts with a single line contains a string "START", this string should be ignored, then some lines follow, each line contains two strings, the first one is a word in English, the second one is the corresponding word in Martian's language. A line with a single string "END" indicates the end of the directory part, and this string should be ignored. The book part starts with a single line contains a string "START", this string should be ignored, then an article written in Martian's language. You should translate the article into English with the dictionary. If you find the word in the dictionary you should translate it and write the new word into your translation, if you can't find the word in the dictionary you do not have to translate it, and just copy the old word to your translation. Space(' '), tab('\t'), enter('\n') and all the punctuation should not be translated. A line with a single string "END" indicates the end of the book part, and that's also the end of the input. All the words are in the lowercase, and each word will contain at most 10 characters, and each line will contain at most 3000 characters.
Output
In this problem, you have to output the translation of the history book.
Sample Input
START
from fiwo
hello difh
mars riwosf
earth fnnvk
like fiiwj
END
START
difh, i'm fiwo riwosf.
i fiiwj fnnvk!
END
Sample Output
hello, i'm from mars.
i like earth!
AC代码一(Map实现):
#include <iostream>
#include<string>
#include<map>
using namespace std;
int main()
{
string a, b;
char *ch = new char[3005], *p;
map<string, string>m;
m.clear();
cin >> a;
while (cin >> a >> b)
{
if (a[0] == 'E')break;
m[b] = a;
}
cin.get();
while (cin.getline(ch,3005))
{
p = ch;
if (ch[0] == 'E')break;
while (ch[0] != '\0')
{
b = "";
while (ch[0] >= 'a' && ch[0] <= 'z')b += ch[0], ch++;
if (m.find(b) == m.end())cout << b;
else cout << m[b];
while (ch[0] != '\0' && ch[0] < 'a' || ch[0] > 'z')
{
cout << ch[0];
ch++;
}
}
cout << endl;
ch = p;
}
return 0;
}
AC代码二(字典树实现):
#include <iostream>
#include<string>
#include<string.h>
#include<vector>
using namespace std;
string a[1000000], b;
int ka = 0;
int main()
{
char *ch = new char[3005], *p;
cin >> b;
Trie<char> t;
while (cin >> a[++ka] >> b)
{
if (a[ka][0] == 'E')break;
t.push(b.data(),b.length(),ka);
}
cin.get();
while (cin.getline(ch, 3005))
{
p = ch;
if (ch[0] == 'E')break;
while (ch[0] != '\0')
{
b = "";
while (ch[0] >= 'a' && ch[0] <= 'z')b += ch[0], ch++;
if (t.find(b.data(), b.length()) == 0)cout << b;
else cout << a[t.find(b.data(), b.length())];
while (ch[0] != '\0' && ch[0] < 'a' || ch[0] > 'z')
{
cout << ch[0];
ch++;
}
}
cout << endl;
ch = p;
}
return 0;
}
PS:a数组不能开小了,10万是不够的,100万是够的。
力扣 1804. 实现 Trie (前缀树) II
前缀树(trie ,发音为 "try")是一个树状的数据结构,用于高效地存储和检索一系列字符串的前缀。前缀树有许多应用,如自动补全和拼写检查。
实现前缀树 Trie 类:
Trie() 初始化前缀树对象。
void insert(String word) 将字符串 word 插入前缀树中。
int countWordsEqualTo(String word) 返回前缀树中字符串 word 的实例个数。
int countWordsStartingWith(String prefix) 返回前缀树中以 prefix 为前缀的字符串个数。
void erase(String word) 从前缀树中移除字符串 word 。
示例 1:
输入
["Trie", "insert", "insert", "countWordsEqualTo", "countWordsStartingWith", "erase", "countWordsEqualTo", "countWordsStartingWith", "erase", "countWordsStartingWith"]
[[], ["apple"], ["apple"], ["apple"], ["app"], ["apple"], ["apple"], ["app"], ["apple"], ["app"]]
输出
[null, null, null, 2, 2, null, 1, 1, null, 0]
解释
Trie trie = new Trie();
trie.insert("apple"); // 插入 "apple"。
trie.insert("apple"); // 插入另一个 "apple"。
trie.countWordsEqualTo("apple"); // 有两个 "apple" 实例,所以返回 2。
trie.countWordsStartingWith("app"); // "app" 是 "apple" 的前缀,所以返回 2。
trie.erase("apple"); // 移除一个 "apple"。
trie.countWordsEqualTo("apple"); // 现在只有一个 "apple" 实例,所以返回 1。
trie.countWordsStartingWith("app"); // 返回 1
trie.erase("apple"); // 移除 "apple"。现在前缀树是空的。
trie.countWordsStartingWith("app"); // 返回 0
提示:
1 <= word.length, prefix.length <= 2000
word 和 prefix 只包含小写英文字母。
insert、 countWordsEqualTo、 countWordsStartingWith 和 erase 总共调用最多 3 * 104 次。
保证每次调用 erase 时,字符串 word 总是存在于前缀树中。
class Trie {
public:
Trie() {
memset(ans[key], 0, sizeof(ans[key]));
}
void insert(string s)
{
int i, j;
for (i = 0, j = 0; s[i]; i++)
{
if (ans[j][s[i] - 'a' + 1] == 0)
{
ans[j][s[i] - 'a' + 1] = ++key;
memset(ans[key], 0, sizeof(ans[key]));
}
j = ans[j][s[i] - 'a' + 1];
}
ans[j][0] = ka;
m[j]++;
}
int find(string s)
{
int i, j;
for (i = 0, j = 0; s[i]; i++)
{
if (ans[j][s[i] - 'a' + 1] == ka)return 0;
j = ans[j][s[i] - 'a' + 1];
}
return j;
}
int countWordsEqualTo(string s) {
return m[find(s)];
}
int dfs(int id)
{
int s = m[id];
for (int i = 1; i <= 26; i++)if (ans[id][i])s += dfs(ans[id][i]);
return s;
}
int countWordsStartingWith(string s) {
if (find(s) == 0)return 0;
return dfs(find(s));
}
void erase(string s) {
m[find(s)]--;
}
int ans[50000][27];
map<int, int>m;
int key = 0;
int ka = 0;
};
力扣 139. 单词拆分
给你一个字符串 s
和一个字符串列表 wordDict
作为字典。请你判断是否可以利用字典中出现的单词拼接出 s
。
注意:不要求字典中出现的单词全部都使用,并且字典中的单词可以重复使用。
示例 1:
输入: s = "leetcode", wordDict = ["leet", "code"] 输出: true 解释: 返回 true 因为 "leetcode" 可以由 "leet" 和 "code" 拼接成。
示例 2:
输入: s = "applepenapple", wordDict = ["apple", "pen"] 输出: true 解释: 返回 true 因为 "applepenapple" 可以由 "apple" "pen" "apple" 拼接成。 注意,你可以重复使用字典中的单词。
示例 3:
输入: s = "catsandog", wordDict = ["cats", "dog", "sand", "and", "cat"] 输出: false
提示:
1 <= s.length <= 300
1 <= wordDict.length <= 1000
1 <= wordDict[i].length <= 20
s
和wordDict[i]
仅有小写英文字母组成wordDict
中的所有字符串 互不相同
class Solution {
public:
bool has(string s)
{
if (s.empty())return true;
if (dp[s.length()] == 1)return true;
if (dp[s.length()] == 2)return false;
int maxDeep;
vector<int>ends;
t.find(s.data(), s.length(), maxDeep, ends);
for (auto id : ends) {
if (has(s.substr(t.deep[id], s.length() - t.deep[id]))) {
dp[s.length()] = 1;
return true;
}
}
dp[s.length()] = 2;
return false;
}
bool wordBreak(string s, vector<string>& wordDict) {
for (auto s : wordDict)t.push(s.data(), s.length());
return has(s);
}
Trie<char>t;
map<int, int>dp;
};
力扣 140. 单词拆分 II
给定一个字符串 s
和一个字符串字典 wordDict
,在字符串 s
中增加空格来构建一个句子,使得句子中所有的单词都在词典中。以任意顺序 返回所有这些可能的句子。
注意:词典中的同一个单词可能在分段中被重复使用多次。
示例 1:
输入:s = "catsanddog
", wordDict =["cat","cats","and","sand","dog"]
输出:["cats and dog","cat sand dog"]
示例 2:
输入:s = "pineapplepenapple", wordDict = ["apple","pen","applepen","pine","pineapple"] 输出:["pine apple pen apple","pineapple pen apple","pine applepen apple"] 解释: 注意你可以重复使用字典中的单词。
示例 3:
输入:s = "catsandog", wordDict = ["cats","dog","sand","and","cat"] 输出:[]
提示:
1 <= s.length <= 20
1 <= wordDict.length <= 1000
1 <= wordDict[i].length <= 10
s
和wordDict[i]
仅有小写英文字母组成wordDict
中所有字符串都 不同
class Solution {
public:
vector<string> solve(string s)
{
if (s == "")return vector<string>(1);
if (dp.find(s.length())!=dp.end())return dp[s.length()];
int maxDeep;
vector<int>ends;
t.find(s.data(), s.length(), maxDeep, ends);
for (auto id : ends) {
vector<string> v = solve(s.substr(t.deep[id], s.length() - t.deep[id]));
for (auto &vi : v) {
dp[s.length()].push_back(s.substr(0, t.deep[id]) + " " + vi);
}
}
return dp[s.length()];
}
vector<string> wordBreak(string s, vector<string>& wordDict) {
for (auto s : wordDict)t.push(s.data(), s.length());
vector<string> v= solve(s);
for (auto &vi : v)vi.erase(vi.begin() + vi.length() - 1);
return v;
}
Trie<char>t;
map<int, vector<string>>dp;
};