616. Add Bold Tag in String
Given a string s and a list of strings dict, you need to add a closed pair of bold tag and to wrap the substrings in s that exist in dict. If two such substrings overlap, you need to wrap them together by only one pair of closed bold tag. Also, if two substrings wrapped by bold tags are consecutive, you need to combine them.
Example 1:
Input:
s = “abcxyz123”
dict = [“abc”,“123”]
Output:
“ abcxyz 123”
Example 2:
Input:
s = “aaabbcc”
dict = [“aaa”,“aab”,“bc”]
Output:
“ aaabbcc”
Note:
The given dict won’t contain duplicates, and its length won’t exceed 100.
All the strings in input have length in range [1, 1000].
Three steps:
(1) Use each dict word to match the string, and put each matched string interval, i.e. pair<int, int> for {start, end}, to a set which is sorted by the interval’s first element (the interval’s start position). Since there can be overlaps among these intervals, we call these raw intervals.
(2) Re-arrange the raw intervals by merging overlapped ones (e.g. b.s <= a.e), and combine contiguous ones (e.g. b.s == a.e + 1).
|==== interval a |
a.s--------------------a.e
-------------| interval b ====|
-------------b.s--------------------b.e
Therefore, if b.s <= a.e + 1, we merge/combine them by updating a: a.e = max(a.e, b.e)
(3) With the merged and refined intervals, we insert tags into the string based on the each interval’s start and end position. Note for the ith interval, the positions to insert are:
open tag: interval[i].s + 7 * i
close tag: interval[i].e + 7 * i + 4
方法1: summary range
reference: Bold Words in String
思路:
和这道题一样的解法,双指针或者set。
方法1: merge intervals
reference: 56. Merge Intervals,
思路:
或者可以用56. Merge Intervals,在第二步中合并出所有bold区间,最后根据这些区间同一插入tags。
class Solution {
public:
string addBoldTag(string s, vector<string>& dict) {
int n = s.size();
vector<vector<int>> intervals;
// find raw intervals
for (string word: dict) {
int i = 0;
while (s.find(word, i) != string::npos) {
i = s.find(word, i);
intervals.push_back({i, i + word.size() - 1});
i++;
}
}
// merge intervals
sort(intervals.begin(), intervals.end());
for (int i = 0; i < intervals.size() ; i++) {
int start = intervals[i][0], & end = intervals[i][1];
while (i < intervals.size() - 1 && intervals[i][1] >= intervals[i + 1][0] - 1) {
end = max(end, intervals[i + 1][1]);
intervals.erase(intervals.begin() + i + 1);
}
}
// insert tags
string res = s;
for (int i = 0; i < intervals.size(); i++) {
res.insert(intervals[i][0] + i * 7, "<b>");
res.insert(intervals[i][1] + i * 7 + 4, "</b>");
}
return res;
}
};
方法3: kmp
class Solution {
public:
vector<int> preprocessKMP(string& p, string& s) {
vector<int> ret(p.size());//ret[i] means the longest proper prefix that is also a suffix in p[0...i]
for (int i=1; i<p.size(); i++) {
int len = ret[i-1];
while ((len > 0) && (p[i] != p[len])) {
len = ret[len - 1];
}
if (p[i] == p[len]) {
ret[i] = len + 1;
}//else ret[i] == 0
}
return ret;
}
string addBoldTag(string s, vector<string>& dict) {
if (s.size() == 0) {
return s;
}
vector<bool> bold(s.size());
for (auto word : dict) {
//Do the kmp search for the pattern word in s
vector<int> kmp = preprocessKMP(word, s);
int idx = 0;
int last = -1;
for (int i=0; i<s.size(); i++) {
if (word[idx] == s[i]) {
idx++;
} else {
while ((idx > 0) && (s[i] != word[idx])) {
idx = kmp[idx - 1];
}
if (word[idx] == s[i]) {
idx++;
}
}
if (idx == kmp.size()) {
int start = max(last + 1, i - (int)kmp.size() + 1);
fill(bold.begin() + start, bold.begin() + i + 1, true);
last = i;
idx = kmp[idx - 1];
}
}
}
//Tag string s using boolean array bold
string ret;
bool state = true;
for (int i=0; i<s.size(); i++) {
if (state && bold[i]) {
ret += "<b>";
state = false;
} else if (!state && !bold[i]) {
ret += "</b>";
state = true;
}
ret.push_back(s[i]);
}
if (!state) {
ret += "</b>";
}
return ret;
}
};
方法4: trie
struct Node {
bool isEnd;
unordered_map<char, Node*> children;
Node() : isEnd() {}
};
class Trie {
Node* root;
public:
Trie() {
root = new Node();
}
void add(string& s) {
Node* node = root;
int idx = 0;
while (idx < s.size()) {
if (node->children.find(s[idx]) == node->children.end()) {
Node* newNode = new Node();
node->children[s[idx]] = newNode;
}
node = node->children[s[idx]];
idx++;
}
node->isEnd = true;
}
int search(int idx, string& s) {
Node* node = root;
int ret = -1;
while (idx < s.size()) {
if (node->children.find(s[idx]) == node->children.end()) {
return ret;
}
node = node->children[s[idx]];
if (node->isEnd) {
ret = idx;
}
idx++;
}
return ret;
}
};
class Solution {
public:
string addBoldTag(string s, vector<string>& dict) {
if (s.size() == 0) {
return s;
}
vector<bool> bold(s.size());
//Build a trie
Trie trie;
for (auto word : dict) {
trie.add(word);
}
//Search in s, mark bold array
for (int i=0; i<s.size(); i++) {
int idx = trie.search(i, s);
for (int j=i; j<=idx; j++) {
bold[j] = true;
}
}
//Tag using boolean array
bool state = true;
string ret;
for (int i=0; i<s.size(); i++) {
if (state && bold[i]) {
ret += "<b>";
state = false;
} else if (!state && !bold[i]) {
ret += "</b>";
state = true;
}
ret.push_back(s[i]);
}
if (!state) {
ret += "</b>";
}
return ret;
}
};