leetcode 2213 — 由单个字符重复的最长子字符串
一、题目描述
二、题目分析
这道题还是比较简单的。每次替换一个字母都有可能造成最长子串的更新,那么什么情况下会更新呢?这取决于被替换的字符和新加入的字符与其左右位置元素是否相同:
关键点1:如果我们将连续单个字母视为一个区间,那么每次替换最多影响三个区间:替换字符所在区间,其前一个元素所在区间,其后一个元素所在区间。
那么什么时候最长单字母子串会发生变化呢?
关键点2: 只有发生区间合并和最长子串所在区间分割时才有可能造成某次替换后的最长子串长度发生变化。
前者需要比较合并后的区间与当前最长子串,后者需要重新遍历求出最长子串。
因此,我们可以预处理得到所有区间,每次只需要更新区间并在上述情况下更新最长子串。
三、算法
1、初版实现
初版实现用了基本的 unordered_map 用于存储元素区间:
class Solution{
public:
vector<int> longestRepeating(string s, string queryCharacters, vector<int>&queryIndices) {
// 1.边界
s.insert(0, 1, 'A');
s.push_back('A');
char* ps = (char*)s.c_str();
// 2.左右连续单个字母长度映射
unordered_map<int, int> left, right;
int length = 1;
for (int i = 2; i < s.length(); ++i) {
if (*(ps + i) == *(ps + i - 1)) {
++length;
}
else {
left[i - length] = length;
right[i - 1] = length;
length = 1;
}
}
// 3.求出初始状态下的最长子串
auto fmax = [](pair<const int, int>& lfs, pair<const int, int>& rfs) {
return lfs.second < rfs.second;
};
pair<const int, int> p = *max_element(left.begin(), left.end(), fmax);
int pos = p.first;
length = p.second;
// 4.遍历替换和更新
vector<int> ret(queryIndices.size());
for (int i = 0; i < queryIndices.size(); ++i) {
// 4.1 字符替换
int index = queryIndices[i] + 1;
char replace = queryCharacters[i];
char original = *(ps + index);
*(ps + index) = replace;
ret[i] = length;
// 4.2 如果替换的字符等于本身,不用处理
if (replace == original) {
continue;
}
// 4.3 如果原字符等于左或右,需要拆分区间
if (original == *(ps + index + 1) || original == *(ps + index - 1)) {
if (original == *(ps + index + 1) && original == *(ps + index - 1)) {
int leftIndex = index - 1;
// 找到当前区间的开始位置
for (auto& posAndLength : left) {
if (posAndLength.first <= index - 1 && posAndLength.first + posAndLength.second > index) {
leftIndex = posAndLength.first;
break;
}
}
auto iterLeft = left.find(leftIndex);
auto iterRight = right.find(leftIndex + iterLeft->second - 1);
iterLeft->second = index - iterLeft->first;
iterRight->second = iterRight->first - index;
left[index + 1] = iterRight->second;
right[index - 1] = iterLeft ->second;
}
else if (original == *(ps + index + 1)) {
auto iterLeft = left.find(index);
auto iterRight = right.find(index + iterLeft->second - 1);
left[index + 1] = iterLeft->second - 1;
iterRight->second = iterLeft->second - 1;
left.erase(iterLeft);
}
else if (original == *(ps + index - 1)) {
auto iterRight = right.find(index);
auto iterLeft = left.find(index - iterRight->second + 1);
right[index - 1] = iterRight->second - 1;
iterLeft->second = iterRight->second - 1;
right.erase(iterRight);
}
// 4.3.1 判断是否是最长子串所在区间发生分割
if (pos <= index && index < pos + length) {
// 如果是,重新计算最长子串
auto p = *max_element(left.begin(), left.end(), fmax);
pos = p.first;
length = p.second;
}
ret[i] = length;
}
else {
// 4.4 否则,删除原字符所在区间
left.erase(left.find(index));
right.erase(right.find(index));
}
// 4.5 如果替换字符等于左或右,需要合并区间
if (replace == *(ps + index + 1) || replace == *(ps + index - 1)) {
// 4.5.1 合并后判断是否需要更新最长子串
if (replace == *(ps + index + 1) && replace == *(ps + index - 1)) {
auto iterLeft = left.find(index + 1);
auto iterRight = right.find(index - 1);
int newLength = 1 + iterLeft->second + iterRight->second;
left[index - iterRight->second] = newLength;
right[index + iterLeft->second] = newLength;
if (length < newLength) {
pos = index - iterRight->second;
length = newLength;
}
left.erase(iterLeft);
right.erase(iterRight);
}
else if (replace == *(ps + index + 1)) {
auto iterLeft = left.find(index + 1);
int newLength = 1 + iterLeft->second;
left[index] = newLength;
right[index + iterLeft->second] = newLength;
if (length < newLength) {
pos = index;
length = newLength;
}
left.erase(iterLeft);
}
else if (replace == *(ps + index - 1)) {
auto iterRight = right.find(index - 1);
int newLength = 1 + iterRight->second;
left[index - iterRight->second] = newLength;
right[index] = newLength;
if (length < newLength) {
pos = index - iterRight->second;
length = newLength;
}
right.erase(iterRight);
}
ret[i] = length;
}
else {
// 4.6 需要将新字符插入到区间中
left[index] = 1;
right[index] = 1;
}
}
return ret;
}
};
这里的主要耗时操作为 unordered_map 的插入、更新以及重新搜索最大元素时。
2、线段树
(1)结构
与树状数组类似,线段树也是用于维护区间信息的数据结构。不同的是,线段树中直接储存了原数组的所有信息,并将它们作为叶子结点:
不难发现,若数组元素个数为 n,其对应线段树的高度为
⌈
O
(
l
o
g
n
)
⌉
+
1
\lceil O(logn) \rceil + 1
⌈O(logn)⌉+1,总元素个数为
2
⌈
O
(
l
o
g
n
)
⌉
+
1
2^{\lceil O(logn) \rceil + 1}
2⌈O(logn)⌉+1。如果我们对线段树以树根为首元素从1开始编号,并将数组元素从1开始编号,则可以得到树中每个元素的下标及其所管理的区间:
这里需要注意区间管理数组中可能有某些中间节点不保存任何区间信息。例如,保存长度为6的数组时,节点10,11不保存任何数据。
(2)实现
template<class T, class U>
class SegmentTree {
struct Range {
int startIndex;
int endIndex;
};
vector<T> origin;
vector<pair<Range, U>> tree;
function<U(U, U)> rangeFunc; // 区间处理函数
int getLeftChildIndex(int index) const {
return (index << 1);
}
int getRightChildIndex(int index) const {
return (index << 1) + 1;
}
int getMidIndex(int startIndex, int endIndex) const {
return (startIndex + endIndex) >> 1;
}
void build(int startIndex, int endIndex, int nodeIndex) {
tree[nodeIndex].first.startIndex = startIndex;
tree[nodeIndex].first.endIndex = endIndex;
if (startIndex == endIndex) {
tree[nodeIndex].second = origin[startIndex - 1];
return;
}
int midIndex = getMidIndex(startIndex, endIndex);
build(startIndex, midIndex, getLeftChildIndex(nodeIndex)); // 构造左子树
build(midIndex + 1, endIndex, getRightChildIndex(nodeIndex)); // 构造右子树
tree[nodeIndex].second = rangeFunc(tree[getLeftChildIndex(nodeIndex)].second, tree[getRightChildIndex(nodeIndex)].second); // 构造当前节点
}
U query(int queryStartIndex, int queryendIndex, int nodeIndex) const {
int manageStartIndex = tree[nodeIndex].first.startIndex;
int manageEndIndex = tree[nodeIndex].first.endIndex;
if (queryStartIndex <= manageStartIndex && queryendIndex >= manageEndIndex) {
// 当前节点所管理区间为查询区间的子集时直接返回
return tree[nodeIndex].second;
}
int midIndex = getMidIndex(manageStartIndex, manageEndIndex);
U res;
// 如果左儿子与查询区间有交集,则查询左儿子
if (queryStartIndex <= midIndex) {
res = rangeFunc(res, query(queryStartIndex, queryendIndex, getLeftChildIndex(nodeIndex)));
}
// 如果右儿子与查询区间有交集,则查询右儿子
if (queryendIndex > midIndex) {
res = rangeFunc(res, query(queryStartIndex, queryendIndex, getRightChildIndex(nodeIndex)));
}
return res;
}
public:
SegmentTree(const vector<T>& origin, const function<U(U, U)>& rangeFunc) :
origin(origin),
tree(vector<pair<Range, U>>((highestBit(origin.size())) << 2)),
rangeFunc(rangeFunc)
{
build(1, origin.size(), 1);
}
U query(int startIndex, int endIndex) const {
return query(startIndex + 1, endIndex + 1, 1);
}
};
对于区间查询操作,进行区间拆分后,树的每层最多包含一个需要查询的区间。因此,时间复杂度为 O ( l o g n ) O(logn) O(logn)。
(3)测试代码
struct RangeElement {
int val;
RangeElement(int val = 0):
val(val)
{
}
};
RangeElement operator+(const RangeElement& lhs, const RangeElement& rhs) {
return lhs.val + rhs.val;
}
void test()
{
SegmentTree<int, RangeElement> tree1(vector<int> {2, 33, 15, 6, 9, 4, 27}, plus<int>(), plus<RangeElement>());
cout << tree1.query(2, 5).val << endl;
cout << tree1.query(4, 6).val << endl;
SegmentTree<int, RangeElement> tree2(vector<int> {2, 33, 15, 9, 4, 27}, plus<int>(), plus<RangeElement>());
cout << tree2.query(0, 5).val << endl;
cout << tree2.query(3, 4).val << endl;
}
(4)区间更新与懒惰标记
除了单点更新与区间查询,线段树还支持区间更新。我们可以简单地将区间更新实现为遍历区间的单点更新,也可以使用懒惰标记提高效率。懒惰标记实则是一种读时更新的策略。在更新区间时,我们更新所有最大子区间的懒惰标记;在读取时,如果发现某个区间有懒惰标记且所查询的区间包含此区间,则下放该懒惰标记。我们使用 t 表示懒惰标记数组:
如果我们更新区间 [3,5] 的值为5,则:
当我们查询区间 [3, 3] 时,发现区间 [3,4] 有未下方的标记,则会下放该标记:
此时查询到的结果将是20。这种优化可以在两次查询过程中,多次更新相同的区间时,有效减少更新次数。
(5)改进实现
template<class T, class U>
class SegmentTree {
struct Range {
int startIndex;
int endIndex;
};
vector<T> origin;
mutable vector<pair<Range, U>> tree; // update mutable
mutable vector<T> flags; // update 惰性标记
...
// 下放标记
void pushFlags(int manageStartIndex, int manageEndIndex, int midIndex, int nodeIndex) const {
if (flags[nodeIndex] != 0) {
for (int i = manageStartIndex; i <= midIndex; ++i) {
tree[getLeftChildIndex(nodeIndex)].second = rangeFunc(tree[getLeftChildIndex(nodeIndex)].second, flags[nodeIndex]);
}
for (int i = midIndex + 1; i <= manageEndIndex; ++i) {
tree[getRightChildIndex(nodeIndex)].second = rangeFunc(tree[getRightChildIndex(nodeIndex)].second, flags[nodeIndex]);
}
flags[getLeftChildIndex(nodeIndex)] += flags[nodeIndex];
flags[getRightChildIndex(nodeIndex)] += flags[nodeIndex];
flags[nodeIndex] = 0;
}
}
...
// update 区间更新实现
void update(int queryStartIndex, int queryendIndex, int nodeIndex, T val) {
int manageStartIndex = tree[nodeIndex].first.startIndex;
int manageEndIndex = tree[nodeIndex].first.endIndex;
if (queryStartIndex <= manageStartIndex && queryendIndex >= manageEndIndex) {
// 当前节点所管理区间为查询区间的子集时更新标记和当前区间的值
flags[nodeIndex] = val;
for (int i = 0; i <= manageEndIndex - manageStartIndex; ++i) {
tree[nodeIndex].second = rangeFunc(tree[nodeIndex].second, val);
}
// 更新父节点区间的值
nodeIndex >>= 1;
while (nodeIndex > 0) {
tree[nodeIndex].second = rangeFunc(tree[getLeftChildIndex(nodeIndex)].second, tree[getRightChildIndex(nodeIndex)].second);
nodeIndex >>= 1;
}
return;
}
int midIndex = getMidIndex(manageStartIndex, manageEndIndex);
pushFlags(manageStartIndex, manageEndIndex, midIndex, nodeIndex);
// 如果左儿子与更新区间有交集,则更新左儿子
if (queryStartIndex <= midIndex) {
update(queryStartIndex, queryendIndex, getLeftChildIndex(nodeIndex), val);
}
// 如果右儿子与更新区间有交集,则更新右儿子
if (queryendIndex > midIndex) {
update(queryStartIndex, queryendIndex, getRightChildIndex(nodeIndex), val);
}
}
U query(int queryStartIndex, int queryendIndex, int nodeIndex) const {
...
U res;
pushFlags(manageStartIndex, manageEndIndex, midIndex, nodeIndex);
...
}
public:
SegmentTree(const vector<T>& origin, const function<U(U, U)>& rangeFunc) :
...
flags(tree.size())
{
build(1, origin.size(), 1);
}
void update(int startIndex, int endIndex, T val) {
return update(startIndex + 1, endIndex + 1, 1, val);
}
...
};
(6)测试代码
void test() {
SegmentTree<int, RangeElement> tree1(vector<int> {2, 33, 15, 6, 9, 4, 27}, plus<RangeElement>());
cout << tree1.query(2, 5).val << endl;
tree1.update(0, 6, 4);
cout << tree1.query(2, 5).val << endl;
tree1.update(2, 5, 5);
cout << tree1.query(4, 6).val << endl;
SegmentTree<int, RangeElement> tree2(vector<int> {2, 33, 15, 9, 4, 27}, plus<RangeElement>());
cout << tree2.query(0, 5).val << endl;
tree2.update(3, 5, 5);
cout << tree2.query(3, 4).val << endl;
}
3、改进实现
根据线段树,我们可以对前面的实现进行改进。线段树上的每个节点需要保存三个数据:该区间最左侧开始连续单字母的长度,该区间最右侧开始连续单字母的长度,该区间最长连续单字母的长度。保存这些数据的原因在于这些数据可以递归进行计算。以下的大部分实现是线段树的模板,其中的核心函数为 mergeChildren:
struct RangeElement {
int leftSingle = 1;
int rightSingle = 1;
int maxSingle = 1;
};
class SegmentTree {
...
// 只保存这两项数据
string origin;
vector<pair<Range, RangeElement>> tree;
...
void build(int startIndex, int endIndex, int nodeIndex) {
...
mergeChildren(nodeIndex); // 构造当前节点
}
void mergeChildren(int nodeIndex) {
RangeElement lhs = tree[getLeftChildIndex(nodeIndex)].second;
RangeElement rhs = tree[getRightChildIndex(nodeIndex)].second;
RangeElement &result = tree[nodeIndex].second;
Range leftIndex = tree[getLeftChildIndex(nodeIndex)].first;
Range rightIndex = tree[getRightChildIndex(nodeIndex)].first;
result.leftSingle = lhs.leftSingle;
result.rightSingle = rhs.rightSingle;
result.maxSingle = max(lhs.maxSingle, rhs.maxSingle);
if (origin[leftIndex.endIndex - 1] == origin[rightIndex.startIndex - 1]) {
result.maxSingle = max(result.maxSingle, rhs.leftSingle + lhs.rightSingle);
if (leftIndex.endIndex - leftIndex.startIndex + 1 == result.leftSingle) {
result.leftSingle += rhs.leftSingle;
}
if (rightIndex.endIndex - rightIndex.startIndex + 1 == result.rightSingle) {
result.rightSingle += lhs.rightSingle;
}
}
}
// update
void update(int queryStartIndex, int queryendIndex, int nodeIndex, char val) {
int manageStartIndex = tree[nodeIndex].first.startIndex;
int manageEndIndex = tree[nodeIndex].first.endIndex;
if (manageEndIndex == manageStartIndex) {
origin[manageStartIndex - 1] = val;
return;
}
....
mergeChildren(nodeIndex);
}
public:
SegmentTree(const string& origin) :
origin(origin),
tree(vector<pair<Range, RangeElement>>((highestBit(origin.size())) << 2))
{
build(1, origin.size(), 1);
}
void update(int index, char val) {
update(index + 1, index + 1, 1, val);
}
RangeElement getTotal() const {
return tree[1].second;
}
};
public:
vector<int> longestRepeating(string s, string queryCharacters, vector<int>& queryIndices) {
SegmentTree tree(s);
vector<int> ret(queryIndices.size());
for (int i = 0; i < queryIndices.size(); ++i) {
tree.update(queryIndices[i], queryCharacters[i]);
ret[i] = tree.getTotal().maxSingle;
}
return ret;
}