【字典树】【KMP】【C++算法】3045统计前后缀下标对 II

最新推荐文章于 2024-11-05 17:16:24 发布

闻缺陷则喜何志丹

最新推荐文章于 2024-11-05 17:16:24 发布

阅读量851

点赞数 24

文章标签： c++ 算法 LeetCode 字典树 KMP 字符串前缀

本文链接：https://blog.csdn.net/he_zhidan/article/details/136153645

版权

困难算法题专栏收录该内容

540 篇文章 4 订阅

订阅专栏

作者推荐

动态规划的时间复杂度优化

本文涉及知识点

字符串字典树 KMP 前后缀

LeetCode:3045统计前后缀下标对 II

给你一个下标从 0 开始的字符串数组 words 。
定义一个布尔函数 isPrefixAndSuffix ，它接受两个字符串参数 str1 和 str2 ：
当 str1 同时是 str2 的前缀（prefix）和后缀（suffix）时，isPrefixAndSuffix(str1, str2) 返回 true，否则返回 false。
例如，isPrefixAndSuffix(“aba”, “ababa”) 返回 true，因为 “aba” 既是 “ababa” 的前缀，也是 “ababa” 的后缀，但是 isPrefixAndSuffix(“abc”, “abcd”) 返回 false。
以整数形式，返回满足 i < j 且 isPrefixAndSuffix(words[i], words[j]) 为 true 的下标对 (i, j) 的数量。
示例 1：
输入：words = [“a”,“aba”,“ababa”,“aa”]
输出：4
解释：在本示例中，计数的下标对包括：
i = 0 且 j = 1 ，因为 isPrefixAndSuffix(“a”, “aba”) 为 true 。
i = 0 且 j = 2 ，因为 isPrefixAndSuffix(“a”, “ababa”) 为 true 。
i = 0 且 j = 3 ，因为 isPrefixAndSuffix(“a”, “aa”) 为 true 。
i = 1 且 j = 2 ，因为 isPrefixAndSuffix(“aba”, “ababa”) 为 true 。
因此，答案是 4 。
示例 2：

输入：words = [“pa”,“papa”,“ma”,“mama”]
输出：2
解释：在本示例中，计数的下标对包括：
i = 0 且 j = 1 ，因为 isPrefixAndSuffix(“pa”, “papa”) 为 true 。
i = 2 且 j = 3 ，因为 isPrefixAndSuffix(“ma”, “mama”) 为 true 。
因此，答案是 2 。
示例 3：

输入：words = [“abab”,“ab”]
输出：0
解释：在本示例中，唯一有效的下标对是 i = 0 且 j = 1 ，但是 isPrefixAndSuffix(“abab”, “ab”) 为 false 。
因此，答案是 0 。
提示：
1 <= words.length <= 10⁵
1 <= words[i].length <= 10⁵
words[i] 仅由小写英文字母组成。
所有 words[i] 的长度之和不超过 5 * 10⁵ 。

分析

利用KMP 计算那些前缀等于后缀，然后在字典树中查询，此前缀（后缀）是否存在，如果存在根据编号查询出现数量。
注意：前缀不能为空，可以等于本串。

代码

核心代码

template<class TData = char, int iTypeNum = 26, TData cBegin = 'a'>
class CTrieNode
{
public:
	CTrieNode* AddChar(TData ele, int& iMaxID)
	{
#ifdef _DEBUG
		if ((ele < cBegin) || (ele >= cBegin + iTypeNum))
		{
			return nullptr;
		}
#endif
		const int index = ele - cBegin;
		auto ptr = m_vPChilds[ele - cBegin];
		if (!ptr)
		{
			m_vPChilds[index] = new CTrieNode();
#ifdef _DEBUG
			m_vPChilds[index]->m_iID = ++iMaxID;
			m_childForDebug[ele] = m_vPChilds[index];
#endif
		}
		return m_vPChilds[index];
	}
	CTrieNode* GetChild(TData ele)const
	{
#ifdef _DEBUG
		if ((ele < cBegin) || (ele >= cBegin + iTypeNum))
		{
			return nullptr;
		}
#endif
		return m_vPChilds[ele - cBegin];
	}
protected:
#ifdef _DEBUG
	int m_iID = -1;
	std::unordered_map<TData, CTrieNode*> m_childForDebug;
#endif
public:
	int m_iLeafIndex = -1;
protected:
	CTrieNode* m_vPChilds[iTypeNum] = { nullptr };
};

template<class TData = char, int iTypeNum = 26, TData cBegin = 'a'>
class CTrie
{
public:
	int GetLeadCount()
	{
		return m_iLeafCount;
	}
	template<class IT>
	int Add(IT begin, IT end)
	{
		auto pNode = &m_root;
		for (; begin != end; ++begin)
		{
			pNode = pNode->AddChar(*begin, m_iMaxID);
		}
		if (-1 == pNode->m_iLeafIndex)
		{
			pNode->m_iLeafIndex = m_iLeafCount++;
		}
		return pNode->m_iLeafIndex;
	}
	template<class IT>
	CTrieNode<TData, iTypeNum, cBegin>* Search(IT begin, IT end)
	{
		auto ptr = &m_root;
		for (; begin != end; ++begin)
		{
			ptr = ptr->GetChild(begin);
			if (nullptr == ptr)
			{
				return nullptr;
			}
		}
		return ptr;
	}
	CTrieNode<TData, iTypeNum, cBegin> m_root;
protected:
	int m_iMaxID = 0;
	int m_iLeafCount = 0;
};


class KMP
{
public:
	virtual int Find(const string& s, const string& t)
	{
		CalLen(t);
		m_vSameLen.assign(s.length(), 0);
		for (int i1 = 0, j = 0; i1 < s.length(); )
		{
			for (; (j < t.length()) && (i1 + j < s.length()) && (s[i1 + j] == t[j]); j++);
			//i2 = i1 + j 此时s[i1,i2)和t[0,j)相等 s[i2]和t[j]不存在或相等
			m_vSameLen[i1] = j;
			//t[0,j)的结尾索引是j-1，所以最长公共前缀为m_vLen[j-1]，简写为y 则t[0,y)等于t[j-y,j)等于s[i2-y,i2)
			if (0 == j)
			{
				i1++;
				continue;
			}
			const int i2 = i1 + j;
			j = m_vLen[j - 1];
			i1 = i2 - j;//i2不变
		}

		for (int i = 0; i < m_vSameLen.size(); i++)
		{//多余代码是为了增加可测试性
			if (t.length() == m_vSameLen[i])
			{
				return i;
			}
		}
		return -1;
	}
	vector<int> m_vSameLen;//m_vSame[i]记录 s[i...]和t[0...]最长公共前缀，增加可调试性
	static vector<int> Next(const string& s)
	{
		const int len = s.length();
		vector<int> vNext(len, -1);
		for (int i = 1; i < len; i++)
		{
			int next = vNext[i - 1];
			while ((-1 != next) && (s[next + 1] != s[i]))
			{
				next = vNext[next];
			}
			vNext[i] = next + (s[next + 1] == s[i]);
		}
		return vNext;
	}
protected:
	void CalLen(const string& str)
	{
		m_vLen.resize(str.length());
		for (int i = 1; i < str.length(); i++)
		{
			int next = m_vLen[i - 1];
			while (str[next] != str[i])
			{
				if (0 == next)
				{
					break;
				}
				next = m_vLen[next-1];
			}
			m_vLen[i] = next + (str[next] == str[i]);
		}
	}
	int m_c;
	vector<int> m_vLen;//m_vLen[i] 表示t[0,i]的最长公共前后缀	
};

class Solution {
public:
	long long countPrefixSuffixPairs(vector<string>& words) {
		CTrie<> trie;
		unordered_map<int, int> mNoNum;
		long long llRet = 0;
		for (const auto& str : words)
		{			
			KMP kmp;
			kmp.Find(str, str);
			queue<int> indexs;
			for (int i = str.length()-1; i >= 0 ; i--)
			{
				if (kmp.m_vSameLen[i] == (str.length() - i))
				{
					indexs.emplace(str.length() - i);
				}
			}
			
			auto ptr = &trie.m_root;
			for (int i = 0; i < str.length(); i++)
			{
				ptr = ptr->GetChild(str[i]);
				if (nullptr == ptr)
				{
					break;
				}
				if ((-1 != ptr->m_iLeafIndex)&&indexs.size()&&( indexs.front()==i+1 ))
				{
					llRet += mNoNum[ptr->m_iLeafIndex];					
				}
				while (indexs.size() && (indexs.front() == i + 1))
				{
					indexs.pop();
				}
			}
			mNoNum[trie.Add(str.begin(), str.end())]++;
		}
		return llRet;
	}
};

扩展阅读

视频课程

有效学习：明确的目标及时的反馈拉伸区（难度合适），可以先学简单的课程，请移步CSDN学院，听白银讲师（也就是鄙人）的讲解。
https://edu.csdn.net/course/detail/38771

如何你想快速形成战斗了，为老板分忧，请学习C#入职培训、C++入职培训等课程
https://edu.csdn.net/lecturer/6176

我想对大家说的话
闻缺陷则喜是一个美好的愿望，早发现问题，早修改问题，给老板节约钱。
子墨子言之：事无终始，无务多业。也就是我们常说的专业的人做专业的事。
如果程序是一条龙，那算法就是他的是睛