1.什么是二叉搜索树?
二叉搜索树又称二叉排序树,它或者是一棵空树,或者是具有以下性质的二叉树:
(1)若它的左子树不为空,则左子树上所有节点的值都小于根节点的值
(2)若它的右子树不为空,则右子树上所有节点的值都大于根节点的值
(3)它的左右子树也分别为二叉搜索树
2.二叉搜索树的操作
2.1二叉搜索树的查找
2.2二叉搜索树的插入
(1)如果树为空:直接插入,然后它就是树的根
(2)如果树不为空:按二叉搜索树性质查找插入位置,插入新节点
2.3二叉搜索树的删除
首先查找元素是否在二叉搜索树中,如果不存在,则返回,否则要删除的结点可能分下面三种情况:
(1)要删除的结点无孩子结点
(2)要删除的节点只有左孩子或右孩子节点
(3)要删除的节点有左孩子和右孩子节点
对于情况(1)来说,直接删除该节点,然后被删除节点的双亲指向空指针即可;
对于情况(2)来说,直接删除该节点,然后被删除节点的双亲指向被删除节点的左孩子或者右孩子即可;
对于情况(3)来说,将它的左子树中的最大值或是将它的右子树的最小值与该节点的值进行替换,然后删除替换节点即可。(在它的右子树中寻找中序下的第一个结点(关键码最小),用它的值填补到被删除节点中,再来处理该结点的删除问题)
2.4二叉搜索树的实现
template<class K>
struct BSTreeNode
{
BSTreeNode* m_ipLeft;
BSTreeNode* m_ipRight;
K m_iKey;
BSTreeNode(const K& Key)
:m_iKey(Key)
, m_ipLeft(nullptr)
, m_ipRight(nullptr)
{
}
};
template<class K>
class BSTree
{
typedef BSTreeNode<K> Node;
public:
BSTree()
:m_ipRoot(nullptr)
{}
//插入节点
bool Insert(K& Key)
{
if (m_ipRoot == nullptr)
{
m_ipRoot = new Node(Key);
return true;
}
Node* ipParent = nullptr;
Node* ipCur = m_ipRoot;
while(ipCur)
{
ipParent = ipCur;
if (ipCur->m_iKey < Key)
{
ipCur = ipCur->m_ipRight;
}
else if (ipCur->m_iKey > Key)
{
ipCur = ipCur->m_ipLeft;
}
else
{
return false;
}
}
if (ipParent->m_iKey < Key)
{
ipParent->m_ipRight = new Node(Key);
}
else if (ipParent->m_iKey > Key)
{
ipParent->m_ipLeft = new Node(Key);
}
return true;
}
//中序遍历
void InOrder()
{
InOrderTemp(m_ipRoot);
cout << endl;
}
void InOrderTemp(Node* iRoot)
{
if (iRoot == nullptr)
{
return;
}
InOrderTemp(iRoot->m_ipLeft);
cout<< iRoot->m_iKey <<" ";
InOrderTemp(iRoot->m_ipRight);
}
//查找
bool Find(K& iKey)
{
Node* ipCur = m_ipRoot;
while (ipCur)
{
if (ipCur->m_iKey < iKey)
{
ipCur = ipCur->m_ipRight;
}
else if (ipCur->m_iKey > iKey)
{
ipCur = ipCur->m_ipLeft;
}
else
{
return true;
}
return false;
}
}
//删除
bool Erase(const K& iKey)
{
if (m_ipRoot == nullptr)
{
return false;
}
Node* ipCur = m_ipRoot;
//记录父亲节点
Node* ipParent = nullptr;
while (ipCur)
{
if (ipCur->m_iKey == iKey)
{
break;
}
if (ipCur->m_iKey > iKey)
{
ipParent = ipCur;
ipCur = ipCur->m_ipLeft;
}
else if (ipCur->m_iKey < iKey)
{
ipParent = ipCur;
ipCur = ipCur->m_ipRight;
}
}
if (ipCur == nullptr)
{
return false;
}
// 当前节点只有右孩子---可直接删除
if (ipCur->m_ipLeft == nullptr)
{
if (ipParent == nullptr)
{
m_ipRoot = ipCur->m_ipRight;
delete ipCur;
return true;
}
if (ipParent->m_ipLeft == ipCur)
{
ipParent->m_ipLeft = ipCur->m_ipRight;
delete ipCur;
return true;
}
else
{
ipParent->m_ipRight = ipCur->m_ipRight;
delete ipCur;
return true;
}
}
// 当前节点只有左孩子---可直接删除
else if (ipCur->m_ipRight == nullptr)
{
if (ipParent == nullptr)
{
m_ipRoot = ipCur->m_ipLeft;
delete ipCur;
return true;
}
if (ipParent->m_ipLeft == ipCur)
{
ipParent->m_ipLeft = ipCur->m_ipLeft;
delete ipCur;
return true;
}
else
{
ipParent->m_ipRight = ipCur->m_ipLeft;
delete ipCur;
return true;
}
}
else
{
//被删除节点的两个孩子都存在的时候,采用替换法,找到右子树的最左孩子与被删除节点进行替换
Node* ipMinParent = ipCur;//记录父节点,至于为什么这个初始的父节点不是空节点,因为用的是替换法删除,需要父节点的指针,而前面用的是删除法,就直接删除就可以了,所以父亲节点可以为空节点
Node* ipMin = ipMinParent->m_ipRight;
while (ipMin->m_ipLeft)
{
ipMinParent = ipMin;
ipMin = ipMin->m_ipLeft;
}
ipCur->m_iKey = ipMin->m_iKey;
if (ipMinParent->m_ipLeft == ipMin)
{
ipMinParent->m_ipLeft = ipMin->m_ipRight;
}
else
{
ipMinParent->m_ipRight = ipMin->m_ipRight;
}
delete ipMin;
return true;
}
return false;
}
//用递归法查找指定值
Node* FindRTemp(const K& iKey, Node* ipRoot)
{
if (ipRoot == nullptr)
{
return nullptr;
}
if (ipRoot->m_iKey < iKey)
{
return FindRTemp(iKey, ipRoot->m_ipRight);
}
else if (ipRoot->m_iKey > iKey)
{
return FindRTemp(iKey, ipRoot->m_ipLeft);
}
else
{
return ipRoot;
}
}
Node* FindR(const K& iKey)
{
return FindRTemp(iKey, m_ipRoot);
}
//用递归法删除指定值
//参数中的引用作用非常大,它表示被删除节点父亲的别名
bool EraseRTemp(const K& iKey, Node*& ipRoot)
{
if (ipRoot == nullptr)
{
return false;
}
if (ipRoot->m_iKey < iKey)
{
return EraseRTemp(iKey, ipRoot->m_ipRight);
}
else if (ipRoot->m_iKey > iKey)
{
return EraseRTemp(iKey, ipRoot->m_ipLeft);
}
if (ipRoot->m_ipLeft == nullptr)
{
ipRoot = ipRoot->m_ipRight;
delete ipRoot;
return true;
}
else if (ipRoot->m_ipRight == nullptr)
{
ipRoot = ipRoot->m_ipLeft;
delete ipRoot;
return true;
}
else
{
Node* ipMinParent = ipRoot;
Node* ipMin = ipMinParent->m_ipRight;
while (ipMin->m_ipLeft)
{
ipMinParent = ipMin;
ipMin = ipMin->m_ipLeft;
}
swap(ipMin->m_iKey, ipRoot->m_iKey);
return EraseRTemp(ipMin->m_iKey, ipRoot->m_ipRight);
}
}
bool EraseR(const K& iKey)
{
return EraseRTemp(iKey, m_ipRoot);
}
private:
Node* m_ipRoot;
};
2.5二叉搜索树的应用
(1)K模型:K模型即只有key作为关键码,结构中只需要存储Key即可,关键码即为需要搜索到的值。比如:给一个单词word,判断该单词是否拼写正确,具体方式如下:
1.以单词集合中的每个单词作为key,构建一棵二叉搜索树
2.在二叉搜索树中检索该单词是否存在,存在则拼写正确,不存在则拼写错误。
(2)KV模型:每一个关键码key,都有与之对应的值Value,即<Key, Value>的键值对。该种方式在现实生活中非常常见:比如英汉词典就是英文与中文的对应关系,通过英文可以快速找到与其对应的中文,英文单词与其对应的中文<word, chinese>就构成一种键值对;再比如统计单词次数,统计成功后,给定单词就可快速找到其出现的次数,单词与其出现次数就是<word, count>就构成一种键值对。比如:实现一个简单的英汉词典dict,可以通过英文找到与其对应的中文,具体实现方式如下:
1.<单词,中文含义>为键值对构造二叉搜索树,注意:二叉搜索树需要比较,键值对比较时只比较
Key
2.查询英文单词时,只需给出英文单词,就可快速找到与其对应的key
template<class K, class V>
struct BSTreeNode
{
BSTreeNode* m_ipLeft;
BSTreeNode* m_ipRight;
K m_iKey;
V m_iValue;
BSTreeNode(const K& Key, const V& Value)
:m_iKey(Key)
, m_iValue(Value)
, m_ipLeft(nullptr)
, m_ipRight(nullptr)
{
}
};
template<class K, class V>
class BSTree
{
typedef BSTreeNode<K, V> Node;
public:
BSTree()
:m_ipRoot(nullptr)
{}
//插入节点
bool Insert(const K& Key, const V& Value)
{
if (m_ipRoot == nullptr)
{
m_ipRoot = new Node(Key, Value);
return true;
}
Node* ipParent = nullptr;
Node* ipCur = m_ipRoot;
while (ipCur)
{
ipParent = ipCur;
if (ipCur->m_iKey < Key)
{
ipCur = ipCur->m_ipRight;
}
else if (ipCur->m_iKey > Key)
{
ipCur = ipCur->m_ipLeft;
}
else
{
return false;
}
}
if (ipParent->m_iKey < Key)
{
ipParent->m_ipRight = new Node(Key, Value);
}
else if (ipParent->m_iKey > Key)
{
ipParent->m_ipLeft = new Node(Key, Value);
}
return true;
}
//中序遍历
void InOrder()
{
InOrderTemp(m_ipRoot);
cout << endl;
}
void InOrderTemp(Node* iRoot)
{
if (iRoot == nullptr)
{
return;
}
InOrderTemp(iRoot->m_ipLeft);
cout << iRoot->m_iKey << ":" << iRoot->m_iValue<<" ";
InOrderTemp(iRoot->m_ipRight);
}
//查找
Node* Find(K& iKey)
{
Node* ipCur = m_ipRoot;
while (ipCur)
{
if (ipCur->m_iKey < iKey)
{
ipCur = ipCur->m_ipRight;
}
else if (ipCur->m_iKey > iKey)
{
ipCur = ipCur->m_ipLeft;
}
else
{
return ipCur;
}
}
return nullptr;
}
//删除
bool Erase(const K& iKey)
{
if (m_ipRoot == nullptr)
{
return false;
}
Node* ipCur = m_ipRoot;
//记录父亲节点
Node* ipParent = nullptr;
while (ipCur)
{
if (ipCur->m_iKey == iKey)
{
break;
}
if (ipCur->m_iKey > iKey)
{
ipParent = ipCur;
ipCur = ipCur->m_ipLeft;
}
else if (ipCur->m_iKey < iKey)
{
ipParent = ipCur;
ipCur = ipCur->m_ipRight;
}
}
if (ipCur == nullptr)
{
return false;
}
// 当前节点只有右孩子---可直接删除
if (ipCur->m_ipLeft == nullptr)
{
if (ipParent == nullptr)
{
m_ipRoot = ipCur->m_ipRight;
delete ipCur;
return true;
}
if (ipParent->m_ipLeft == ipCur)
{
ipParent->m_ipLeft = ipCur->m_ipRight;
delete ipCur;
return true;
}
else
{
ipParent->m_ipRight = ipCur->m_ipRight;
delete ipCur;
return true;
}
}
// 当前节点只有左孩子---可直接删除
else if (ipCur->m_ipRight == nullptr)
{
if (ipParent == nullptr)
{
m_ipRoot = ipCur->m_ipLeft;
delete ipCur;
return true;
}
if (ipParent->m_ipLeft == ipCur)
{
ipParent->m_ipLeft = ipCur->m_ipLeft;
delete ipCur;
return true;
}
else
{
ipParent->m_ipRight = ipCur->m_ipLeft;
delete ipCur;
return true;
}
}
else
{
//被删除节点的两个孩子都存在的时候,采用替换法,找到右子树的最左孩子与被删除节点进行替换
Node* ipMinParent = ipCur;//记录父节点,至于为什么这个初始的父节点不是空节点,因为用的是替换法删除,需要父节点的指针,而前面用的是删除法,就直接删除就可以了,所以父亲节点可以为空节点
Node* ipMin = ipMinParent->m_ipRight;
while (ipMin->m_ipLeft)
{
ipMinParent = ipMin;
ipMin = ipMin->m_ipLeft;
}
ipCur->m_iKey = ipMin->m_iKey;
if (ipMinParent->m_ipLeft == ipMin)
{
ipMinParent->m_ipLeft = ipMin->m_ipRight;
}
else
{
ipMinParent->m_ipRight = ipMin->m_ipRight;
}
delete ipMin;
return true;
}
return false;
}
//用递归法查找指定值
Node* FindRTemp(const K& iKey, Node* ipRoot)
{
if (ipRoot == nullptr)
{
return nullptr;
}
if (ipRoot->m_iKey < iKey)
{
return FindRTemp(iKey, ipRoot->m_ipRight);
}
else if (ipRoot->m_iKey > iKey)
{
return FindRTemp(iKey, ipRoot->m_ipLeft);
}
else
{
return ipRoot;
}
}
Node* FindR(const K& iKey)
{
return FindRTemp(iKey, m_ipRoot);
}
//用递归法删除指定值
//参数中的引用作用非常大,它表示被删除节点父亲的别名
bool EraseRTemp(const K& iKey, Node*& ipRoot)
{
if (ipRoot == nullptr)
{
return false;
}
if (ipRoot->m_iKey < iKey)
{
return EraseRTemp(iKey, ipRoot->m_ipRight);
}
else if (ipRoot->m_iKey > iKey)
{
return EraseRTemp(iKey, ipRoot->m_ipLeft);
}
if (ipRoot->m_ipLeft == nullptr)
{
ipRoot = ipRoot->m_ipRight;
delete ipRoot;
return true;
}
else if (ipRoot->m_ipRight == nullptr)
{
ipRoot = ipRoot->m_ipLeft;
delete ipRoot;
return true;
}
else
{
Node* ipMinParent = ipRoot;
Node* ipMin = ipMinParent->m_ipRight;
while (ipMin->m_ipLeft)
{
ipMinParent = ipMin;
ipMin = ipMin->m_ipLeft;
}
swap(ipMin->m_iKey, ipRoot->m_iKey);
return EraseRTemp(ipMin->m_iKey, ipRoot->m_ipRight);
}
}
bool EraseR(const K& iKey)
{
return EraseRTemp(iKey, m_ipRoot);
}
private:
Node* m_ipRoot;
};
2.6 二叉搜索树的性能分析
插入和删除操作都必须先查找,查找效率代表了二叉搜索树中各个操作的性能。
对有n个结点的二叉搜索树,若每个元素查找的概率相等,则二叉搜索树平均查找长度是结点在二叉搜索树的深度的函数,即结点越深,则比较次数越多。
但对于同一个关键码集合,如果各关键码插入的次序不同,可能得到不同结构的二叉搜索树:
最优情况下,二叉搜索树为完全二叉树,其平均比较次数为:logN
最差情况下,二叉搜索树退化为单支树,其平均比较次数为:N/2