二叉查找树
能够将链表插入的灵活性和有序数组查找的高效性结合起来的符号表实现(链表)。具体来说,就是使用每个结点含有两个链接的二叉查找树来高效地实现符号表。
定义一些术语:数据结构由结点组成,结点包含的链接可以指向空或者其他结点。在二叉树中,每个结点只能有一个父节点(根结点例外),每个结点只有左右两个链接,分别指向左子节点和右子节点。在二查找树中,每个节点还包含了一个键和一个值,键之间有顺序之分以支持高效的查找。
基本实现
代码(很全):
注意:左链接指向一棵小于该节点的所有键组成的二叉查找树,右链接指向一棵由大于该节点的所有键组成的二叉查找树。
难点:注意deleteMin、deleteMax、Ceiling、Floor写法(有点绕)
public class BST<Key extends Comparable<Key>, Value> {
private Node root; // root of BST
private class Node {
private Key key; // sorted by key
private Value val; // associated data
private Node left, right; // left and right subtrees
private int N; // number of nodes in subtree
public Node(Key key, Value val, int N) {
this.key = key;
this.val = val;
this.N = N;
}
}
public BST() {
}
public boolean isEmpty() {
return size() == 0;
}
public int size() {
return size(root);
}
// return number of key-value pairs in BST rooted at x
private int size(Node x) {
if (x == null) return 0;
else return x.N;
}
public boolean contains(Key key) {
if (key == null) throw new NullPointerException("argument to contains() is null");
return get(key) != null;
}
public Value get(Key key) {
return get(root, key);
}
private Value get(Node x, Key key) {
if (x == null) return null;
int cmp = key.compareTo(x.key);
if (cmp < 0) return get(x.left, key);
else if (cmp > 0) return get(x.right, key);
else return x.val;
}
public void put(Key key, Value val) {
if (key == null) throw new NullPointerException("first argument to put() is null");
if (val == null) {
delete(key);
return;
}
root = put(root, key, val);
assert check();
}
private Node put(Node x, Key key, Value val) {
if (x == null) return new Node(key, val, 1);
int cmp = key.compareTo(x.key);
if (cmp < 0) x.left = put(x.left, key, val);
else if (cmp > 0) x.right = put(x.right, key, val);
else x.val = val;
x.N = 1 + size(x.left) + size(x.right);
return x;
}
public void deleteMin() {
if (isEmpty()) throw new NoSuchElementException("Symbol table underflow");
root = deleteMin(root);
assert check();
}
private Node deleteMin(Node x) {
if (x.left == null) return x.right;
x.left = deleteMin(x.left);
x.N = size(x.left) + size(x.right) + 1;
return x;
}
public void deleteMax() {
if (isEmpty()) throw new NoSuchElementException("Symbol table underflow");
root = deleteMax(root);
assert check();
}
private Node deleteMax(Node x) {
if (x.right == null) return x.left;
x.right = deleteMax(x.right);
x.N = size(x.left) + size(x.right) + 1;
return x;
}
public void delete(Key key) {
if (key == null) throw new NullPointerException("argument to delete() is null");
root = delete(root, key);
assert check();
}
private Node delete(Node x, Key key) {
if (x == null) return null;
int cmp = key.compareTo(x.key);
if (cmp < 0) x.left = delete(x.left, key);
else if (cmp > 0) x.right = delete(x.right, key);
else {
if (x.right == null) return x.left;
if (x.left == null) return x.right;
Node t = x;
x = min(t.right);
x.right = deleteMin(t.right);
x.left = t.left;
}
x.N = size(x.left) + size(x.right) + 1;
return x;
}
public Key min() {
if (isEmpty()) throw new NoSuchElementException("called min() with empty symbol table");
return min(root).key;
}
private Node min(Node x) {
if (x.left == null) return x;
else return min(x.left);
}
public Key max() {
if (isEmpty()) throw new NoSuchElementException("called max() with empty symbol table");
return max(root).key;
}
private Node max(Node x) {
if (x.right == null) return x;
else return max(x.right);
}
//return Node.key<=key
public Key floor(Key key) {
if (key == null) throw new NullPointerException("argument to floor() is null");
if (isEmpty()) throw new NoSuchElementException("called floor() with empty symbol table");
Node x = floor(root, key);
if (x == null) return null;
else return x.key;
}
private Node floor(Node x, Key key) {
if (x == null) return null;
int cmp = key.compareTo(x.key);
if (cmp == 0) return x;
if (cmp < 0) return floor(x.left, key);
Node t = floor(x.right, key);
if (t != null) return t;
else return x;
}
public Key ceiling(Key key) {
if (key == null) throw new NullPointerException("argument to ceiling() is null");
if (isEmpty()) throw new NoSuchElementException("called ceiling() with empty symbol table");
Node x = ceiling(root, key);
if (x == null) return null;
else return x.key;
}
//return Node.key>=key
private Node ceiling(Node x, Key key) {
if (x == null) return null;
int cmp = key.compareTo(x.key);
if (cmp == 0) return x;
if (cmp < 0) {
Node t = ceiling(x.left, key);
if (t != null) return t;
else return x;
}
return ceiling(x.right, key);
}
public Key select(int k) {
if (k < 0 || k >= size()) throw new IllegalArgumentException();
Node x = select(root, k);
return x.key;
}
// Return key of rank k.
private Node select(Node x, int k) {
if (x == null) return null;
int t = size(x.left);
if (t > k) return select(x.left, k);
else if (t < k) return select(x.right, k-t-1);
else return x;
}
public int rank(Key key) {
if (key == null) throw new NullPointerException("argument to rank() is null");
return rank(key, root);
}
// Number of keys in the subtree less than key.
private int rank(Key key, Node x) {
if (x == null) return 0;
int cmp = key.compareTo(x.key);
if (cmp < 0) return rank(key, x.left);
else if (cmp > 0) return 1 + size(x.left) + rank(key, x.right);
else return size(x.left);
}
public Iterable<Key> keys() {
return keys(min(), max());
}
public Iterable<Key> keys(Key lo, Key hi) {
if (lo == null) throw new NullPointerException("first argument to keys() is null");
if (hi == null) throw new NullPointerException("second argument to keys() is null");
Queue<Key> queue = new Queue<Key>();
keys(root, queue, lo, hi);
return queue;
}
private void keys(Node x, Queue<Key> queue, Key lo, Key hi) {
if (x == null) return;
int cmplo = lo.compareTo(x.key);
int cmphi = hi.compareTo(x.key);
if (cmplo < 0) keys(x.left, queue, lo, hi);
if (cmplo <= 0 && cmphi >= 0) queue.enqueue(x.key);
if (cmphi > 0) keys(x.right, queue, lo, hi);
}
public int size(Key lo, Key hi) {
if (lo == null) throw new NullPointerException("first argument to size() is null");
if (hi == null) throw new NullPointerException("second argument to size() is null");
if (lo.compareTo(hi) > 0) return 0;
if (contains(hi)) return rank(hi) - rank(lo) + 1;
else return rank(hi) - rank(lo);
}
public int height() {
return height(root);
}
private int height(Node x) {
if (x == null) return -1;
return 1 + Math.max(height(x.left), height(x.right));
}
public Iterable<Key> levelOrder() {
Queue<Key> keys = new Queue<Key>();
Queue<Node> queue = new Queue<Node>();
queue.enqueue(root);
while (!queue.isEmpty()) {
Node x = queue.dequeue();
if (x == null) continue;
keys.enqueue(x.key);
queue.enqueue(x.left);
queue.enqueue(x.right);
}
return keys;
}
private boolean check() {
if (!isBST()) StdOut.println("Not in symmetric order");
if (!isSizeConsistent()) StdOut.println("Subtree counts not consistent");
if (!isRankConsistent()) StdOut.println("Ranks not consistent");
return isBST() && isSizeConsistent() && isRankConsistent();
}
private boolean isBST() {
return isBST(root, null, null);
}
private boolean isBST(Node x, Key min, Key max) {
if (x == null) return true;
if (min != null && x.key.compareTo(min) <= 0) return false;
if (max != null && x.key.compareTo(max) >= 0) return false;
return isBST(x.left, min, x.key) && isBST(x.right, x.key, max);
}
// are the size fields correct?
private boolean isSizeConsistent() { return isSizeConsistent(root); }
private boolean isSizeConsistent(Node x) {
if (x == null) return true;
if (x.N != size(x.left) + size(x.right) + 1) return false;
return isSizeConsistent(x.left) && isSizeConsistent(x.right);
}
// check that ranks are consistent
private boolean isRankConsistent() {
for (int i = 0; i < size(); i++)
if (i != rank(select(i))) return false;
for (Key key : keys())
if (key.compareTo(select(rank(key))) != 0) return false;
return true;
}
public static void main(String[] args) {
BST<String, Integer> st = new BST<String, Integer>();
for (int i = 0; !StdIn.isEmpty(); i++) {
String key = StdIn.readString();
st.put(key, i);
}
for (String s : st.levelOrder())
StdOut.println(s + " " + st.get(s));
StdOut.println();
for (String s : st.keys())
StdOut.println(s + " " + st.get(s));
}
}
部分讲解:
二叉查找树键的投影是一组有序的键列:
查找示意:
插入示意:
递归的理解:
每一次递归相当于沿着树向下搜索,递归的return就是沿着树向上更新数据
分析:
命题C说明二叉查找树中查找随机键的成本比二分查找(上一节)高约39%,命题D说明这些成本是值得的(插入成本低,二分查找构造数组是平方级别的)
跟有序性有关的API解释:
floor示意:
select示意:
删除最小示意:
删除示意:(用后继结点替换被删除结点,也就是竖直投影刚好比被删除点大的结点)
一直使用后继结点代替的缺点:使树不平衡了,因此会产生性能问题
keys的示意:
性能分析: