哦,不!你不小心把一个长篇文章中的空格、标点都删掉了,并且大写也弄成了小写。像句子"I reset the computer. It still didn’t boot!“已经变成了"iresetthecomputeritstilldidntboot”。在处理标点符号和大小写之前,你得先把它断成词语。当然了,你有一本厚厚的词典dictionary,不过,有些词没在词典里。假设文章用sentence表示,设计一个算法,把文章断开,要求未识别的字符最少,返回未识别的字符数。
注意:本题相对原题稍作改动,只需返回未识别的字符数
示例:
输入:
dictionary = [“looked”,“just”,“like”,“her”,“brother”]
sentence = “jesslookedjustliketimherbrother”
输出: 7
解释: 断句后为"jess looked just like tim her brother",共7个未识别字符。
来源:力扣(LeetCode)添加链接描述
暴力动态规划:
class Solution {
public:
int respace(vector<string>& dictionary, string sentence) {
int n = sentence.size();
int dp[n+1];
dp[0] = 0;
for(int i = 1; i <= n; i++){
dp[i] = dp[i-1] + 1;
for(auto &word : dictionary) {
if(word.size() <= i) {
if(word == sentence.substr(i-word.size(), word.size())) {
dp[i] = min(dp[i], dp[i-word.size()]);
}
}
}
}
return dp[n];
}
};
字典树:
class Trie {
public:
Trie *next[26] = { NULL };
bool isEnd;
Trie(){
isEnd = false;
}
void insert(string w) {
int n = w.size();
Trie *cur = this;
for(int i = n-1; i >= 0; i--) {
int j = w[i] - 'a';
if(cur->next[j] == NULL) {
cur->next[j] = new Trie();
}
cur = cur->next[j];
}
cur->isEnd = true;
}
};
class Solution {
public:
int respace(vector<string>& dictionary, string sentence) {
Trie *root = new Trie();
for(auto &word : dictionary) {
root->insert(word);
}
int n = sentence.size();
int dp[n+1];
dp[0] = 0;
for(int i = 1; i <= n; i++) {
dp[i] = dp[i-1] + 1;
Trie *cur = root;
for(int j = i; j >= 1; j--) {
int t = sentence[j-1] - 'a';
if(cur->next[t] == NULL) break;
else if(cur->next[t]->isEnd){
dp[i] = min(dp[i], dp[j-1]);
}
cur = cur->next[t];
}
}
return dp[n];
}
};