3.1 Elementary Symbol Tables
http://algs4.cs.princeton.edu/31elementary/
定义:符号表是一种存储键值对的数据结构,支持两种操作:插入(put),查找(get)
符号表(或称字典)最主要的目的就是将一个键和值联系起来。用例能够将一个键值对插入符号表并希望在之后能够从符号表的所有键值对中按照键直接找到对应的值。
API
一种简单的泛型符号表API
有序符号表
典型的应用中,键都是 Comparable 的对象
许多符号表的实现都利用了 Comparable 接口带来的键的有序性来更好地实现 put() 和 get() 方法。
一种有序的泛型符号表的API
例子 FrequencyCounter
统计给定文本中的单词出现的频率,打印频率最高的单词
/*
* % java FrequencyCounter 1 < tinyTale.txt
* it 10
*
* % java FrequencyCounter 8 < tale.txt
* business 122
*
* % java FrequencyCounter 10 < leipzig1M.txt
* government 24763
*/
public class FrequencyCounter {
// Do not instantiate.
private FrequencyCounter() { }
/**
* Reads in a command-line integer and sequence of words from
* standard input and prints out a word (whose length exceeds
* the threshold) that occurs most frequently to standard output.
* It also prints out the number of words whose length exceeds
* the threshold and the number of distinct such words.
*
* @param args the command-line arguments
*/
public static void main(String[] args) {
int distinct = 0, words = 0;
int minlen = Integer.parseInt(args[0]);
ST<String, Integer> st = new ST<String, Integer>();
// compute frequency counts
while (!StdIn.isEmpty()) {
String key = StdIn.readString();
if (key.length() < minlen) continue;
words++;
if (st.contains(key)) {
st.put(key, st.get(key) + 1);
}
else {
st.put(key, 1);
distinct++;
}
}
// find a key with the highest frequency count
String max = "";
st.put(max, 0);
for (String word : st.keys()) {
if (st.get(word) > st.get(max))
max = word;
}
StdOut.println(max + " " + st.get(max));
StdOut.println("distinct = " + distinct);
StdOut.println("words = " + words);
}
}
研究符号表处理大型文本的性能要考虑两个方面的因素:单词总量,不同单词的总数
FrequencyCounter 是一种极为常见的应用的代表,它的这些特性也是许多其他符号表应用的共性:
- 混合使用查找和插入的操作
- 大量的不同键
- 查找操作比插入操作多得多
- 虽然不可预测,但查找和插入操作的使用模式并非随机
顺序查找
public class SequentialSearchST<Key, Value> {
private int n; // number of key-value pairs
private Node first; // the linked list of key-value pairs
// a helper linked list data type
private class Node {
private Key key;
private Value val;
private Node next;
public Node(Key key, Value val, Node next) {
this.key = key;
this.val = val;
this.next = next;
}
}
/**
* Initializes an empty symbol table.
*/
public SequentialSearchST() {
}
/**
* Returns the number of key-value pairs in this symbol table.
*
* @return the number of key-value pairs in this symbol table
*/
public int size() {
return n;
}
/**
* Returns true if this symbol table is empty.
*
* @return {@code true} if this symbol table is empty;
* {@code false} otherwise
*/
public boolean isEmpty() {
return size() == 0;
}
/**
* Returns true if this symbol table contains the specified key.
*
* @param key the key
* @return {@code true} if this symbol table contains {@code key};
* {@code false} otherwise
* @throws IllegalArgumentException if {@code key} is {@code null}
*/
public boolean contains(Key key) {
if (key == null) throw new IllegalArgumentException("argument to contains() is null");
return get(key) != null;
}
/**
* Returns the value associated with the given key in this symbol table.
*
* @param key the key
* @return the value associated with the given key if the key is in the symbol table
* and {@code null} if the key is not in the symbol table
* @throws IllegalArgumentException if {@code key} is {@code null}
*/
public Value get(Key key) {
if (key == null) throw new IllegalArgumentException("argument to get() is null");
for (Node x = first; x != null; x = x.next) {
if (key.equals(x.key))
return x.val;
}
return null;
}
/**
* Inserts the specified key-value pair into the symbol table, overwriting the old
* value with the new value if the symbol table already contains the specified key.
* Deletes the specified key (and its associated value) from this symbol table
* if the specified value is {@code null}.
*
* @param key the key
* @param val the value
* @throws IllegalArgumentException if {@code key} is {@code null}
*/
public void put(Key key, Value val) {
if (key == null) throw new IllegalArgumentException("first argument to put() is null");
if (val == null) {
delete(key);
return;
}
for (Node x = first; x != null; x = x.next) {
if (key.equals(x.key)) {
x.val = val;
return;
}
}
first = new Node(key, val, first);
n++;
}
/**
* Removes the specified key and its associated value from this symbol table
* (if the key is in this symbol table).
*
* @param key the key
* @throws IllegalArgumentException if {@code key} is {@code null}
*/
public void delete(Key key) {
if (key == null) throw new IllegalArgumentException("argument to delete() is null");
first = delete(first, key);
}
// delete key in linked list beginning at Node x
// warning: function call stack too large if table is large
private Node delete(Node x, Key key) {
if (x == null) return null;
if (key.equals(x.key)) {
n--;
return x.next;
}
x.next = delete(x.next, key);
return x;
}
/**
* Returns all keys in the symbol table as an {@code Iterable}.
* To iterate over all of the keys in the symbol table named {@code st},
* use the foreach notation: {@code for (Key key : st.keys())}.
*
* @return all keys in the symbol table
*/
public Iterable<Key> keys() {
Queue<Key> queue = new Queue<Key>();
for (Node x = first; x != null; x = x.next)
queue.enqueue(x.key);
return queue;
}
/**
* Unit tests the {@code SequentialSearchST} data type.
*
* @param args the command-line arguments
*/
public static void main(String[] args) {
SequentialSearchST<String, Integer> st = new SequentialSearchST<String, Integer>();
for (int i = 0; !StdIn.isEmpty(); i++) {
String key = StdIn.readString();
st.put(key, i);
}
for (String s : st.keys())
StdOut.println(s + " " + st.get(s));
}
}
二分查找
public class BinarySearchST<Key extends Comparable<Key>, Value> {
private static final int INIT_CAPACITY = 2;
private Key[] keys;
private Value[] vals;
private int n = 0;
/**
* Initializes an empty symbol table.
*/
public BinarySearchST() {
this(INIT_CAPACITY);
}
/**
* Initializes an empty symbol table with the specified initial capacity.
* @param capacity the maximum capacity
*/
public BinarySearchST(int capacity) {
keys = (Key[]) new Comparable[capacity];
vals = (Value[]) new Object[capacity];
}
// resize the underlying arrays
private void resize(int capacity) {
assert capacity >= n;
Key[] tempk = (Key[]) new Comparable[capacity];
Value[] tempv = (Value[]) new Object[capacity];
for (int i = 0; i < n; i++) {
tempk[i] = keys[i];
tempv[i] = vals[i];
}
vals = tempv;
keys = tempk;
}
/**
* Returns the number of key-value pairs in this symbol table.
*
* @return the number of key-value pairs in this symbol table
*/
public int size() {
return n;
}
/**
* Returns true if this symbol table is empty.
*
* @return {@code true} if this symbol table is empty;
* {@code false} otherwise
*/
public boolean isEmpty() {
return size() == 0;
}
/**
* Does this symbol table contain the given key?
*
* @param key the key
* @return {@code true} if this symbol table contains {@code key} and
* {@code false} otherwise
* @throws IllegalArgumentException if {@code key} is {@code null}
*/
public boolean contains(Key key) {
if (key == null) throw new IllegalArgumentException("argument to contains() is null");
return get(key) != null;
}
/**
* Returns the value associated with the given key in this symbol table.
*
* @param key the key
* @return the value associated with the given key if the key is in the symbol table
* and {@code null} if the key is not in the symbol table
* @throws IllegalArgumentException if {@code key} is {@code null}
*/
public Value get(Key key) {
if (key == null) throw new IllegalArgumentException("argument to get() is null");
if (isEmpty()) return null;
int i = rank(key);
if (i < n && keys[i].compareTo(key) == 0) return vals[i];
return null;
}
/**
* Returns the number of keys in this symbol table strictly less than {@code key}.
*
* @param key the key
* @return the number of keys in the symbol table strictly less than {@code key}
* @throws IllegalArgumentException if {@code key} is {@code null}
*/
public int rank(Key key) {
if (key == null) throw new IllegalArgumentException("argument to rank() is null");
int lo = 0, hi = n-1;
while (lo <= hi) {
int mid = lo + (hi - lo) / 2;
int cmp = key.compareTo(keys[mid]);
if (cmp < 0) hi = mid - 1;
else if (cmp > 0) lo = mid + 1;
else return mid;
}
return lo;
}
/**
* Inserts the specified key-value pair into the symbol table, overwriting the old
* value with the new value if the symbol table already contains the specified key.
* Deletes the specified key (and its associated value) from this symbol table
* if the specified value is {@code null}.
*
* @param key the key
* @param val the value
* @throws IllegalArgumentException if {@code key} is {@code null}
*/
public void put(Key key, Value val) {
if (key == null) throw new IllegalArgumentException("first argument to put() is null");
if (val == null) {
delete(key);
return;
}
int i = rank(key);
// key is already in table
if (i < n && keys[i].compareTo(key) == 0) {
vals[i] = val;
return;
}
// insert new key-value pair
if (n == keys.length) resize(2*keys.length);
for (int j = n; j > i; j--) {
keys[j] = keys[j-1];
vals[j] = vals[j-1];
}
keys[i] = key;
vals[i] = val;
n++;
assert check();
}
/**
* Removes the specified key and associated value from this symbol table
* (if the key is in the symbol table).
*
* @param key the key
* @throws IllegalArgumentException if {@code key} is {@code null}
*/
public void delete(Key key) {
if (key == null) throw new IllegalArgumentException("argument to delete() is null");
if (isEmpty()) return;
// compute rank
int i = rank(key);
// key not in table
if (i == n || keys[i].compareTo(key) != 0) {
return;
}
for (int j = i; j < n-1; j++) {
keys[j] = keys[j+1];
vals[j] = vals[j+1];
}
n--;
keys[n] = null; // to avoid loitering
vals[n] = null;
// resize if 1/4 full
if (n > 0 && n == keys.length/4) resize(keys.length/2);
assert check();
}
/**
* Removes the smallest key and associated value from this symbol table.
*
* @throws NoSuchElementException if the symbol table is empty
*/
public void deleteMin() {
if (isEmpty()) throw new NoSuchElementException("Symbol table underflow error");
delete(min());
}
/**
* Removes the largest key and associated value from this symbol table.
*
* @throws NoSuchElementException if the symbol table is empty
*/
public void deleteMax() {
if (isEmpty()) throw new NoSuchElementException("Symbol table underflow error");
delete(max());
}
/***************************************************************************
* Ordered symbol table methods.
***************************************************************************/
/**
* Returns the smallest key in this symbol table.
*
* @return the smallest key in this symbol table
* @throws NoSuchElementException if this symbol table is empty
*/
public Key min() {
if (isEmpty()) throw new NoSuchElementException("called min() with empty symbol table");
return keys[0];
}
/**
* Returns the largest key in this symbol table.
*
* @return the largest key in this symbol table
* @throws NoSuchElementException if this symbol table is empty
*/
public Key max() {
if (isEmpty()) throw new NoSuchElementException("called max() with empty symbol table");
return keys[n-1];
}
/**
* Return the kth smallest key in this symbol table.
*
* @param k the order statistic
* @return the {@code k}th smallest key in this symbol table
* @throws IllegalArgumentException unless {@code k} is between 0 and
* <em>n</em>–1
*/
public Key select(int k) {
if (k < 0 || k >= size()) {
throw new IllegalArgumentException("called select() with invalid argument: " + k);
}
return keys[k];
}
/**
* Returns the largest key in this symbol table less than or equal to {@code key}.
*
* @param key the key
* @return the largest key in this symbol table less than or equal to {@code key}
* @throws NoSuchElementException if there is no such key
* @throws IllegalArgumentException if {@code key} is {@code null}
*/
public Key floor(Key key) {
if (key == null) throw new IllegalArgumentException("argument to floor() is null");
int i = rank(key);
if (i < n && key.compareTo(keys[i]) == 0) return keys[i];
if (i == 0) return null;
else return keys[i-1];
}
/**
* Returns the smallest key in this symbol table greater than or equal to {@code key}.
*
* @param key the key
* @return the smallest key in this symbol table greater than or equal to {@code key}
* @throws NoSuchElementException if there is no such key
* @throws IllegalArgumentException if {@code key} is {@code null}
*/
public Key ceiling(Key key) {
if (key == null) throw new IllegalArgumentException("argument to ceiling() is null");
int i = rank(key);
if (i == n) return null;
else return keys[i];
}
/**
* Returns the number of keys in this symbol table in the specified range.
*
* @param lo minimum endpoint
* @param hi maximum endpoint
* @return the number of keys in this symbol table between {@code lo}
* (inclusive) and {@code hi} (inclusive)
* @throws IllegalArgumentException if either {@code lo} or {@code hi}
* is {@code null}
*/
public int size(Key lo, Key hi) {
if (lo == null) throw new IllegalArgumentException("first argument to size() is null");
if (hi == null) throw new IllegalArgumentException("second argument to size() is null");
if (lo.compareTo(hi) > 0) return 0;
if (contains(hi)) return rank(hi) - rank(lo) + 1;
else return rank(hi) - rank(lo);
}
/**
* Returns all keys in this symbol table as an {@code Iterable}.
* To iterate over all of the keys in the symbol table named {@code st},
* use the foreach notation: {@code for (Key key : st.keys())}.
*
* @return all keys in this symbol table
*/
public Iterable<Key> keys() {
return keys(min(), max());
}
/**
* Returns all keys in this symbol table in the given range,
* as an {@code Iterable}.
*
* @param lo minimum endpoint
* @param hi maximum endpoint
* @return all keys in this symbol table between {@code lo}
* (inclusive) and {@code hi} (inclusive)
* @throws IllegalArgumentException if either {@code lo} or {@code hi}
* is {@code null}
*/
public Iterable<Key> keys(Key lo, Key hi) {
if (lo == null) throw new IllegalArgumentException("first argument to keys() is null");
if (hi == null) throw new IllegalArgumentException("second argument to keys() is null");
Queue<Key> queue = new Queue<Key>();
if (lo.compareTo(hi) > 0) return queue;
for (int i = rank(lo); i < rank(hi); i++)
queue.enqueue(keys[i]);
if (contains(hi)) queue.enqueue(keys[rank(hi)]);
return queue;
}
/***************************************************************************
* Check internal invariants.
***************************************************************************/
private boolean check() {
return isSorted() && rankCheck();
}
// are the items in the array in ascending order?
private boolean isSorted() {
for (int i = 1; i < size(); i++)
if (keys[i].compareTo(keys[i-1]) < 0) return false;
return true;
}
// check that rank(select(i)) = i
private boolean rankCheck() {
for (int i = 0; i < size(); i++)
if (i != rank(select(i))) return false;
for (int i = 0; i < size(); i++)
if (keys[i].compareTo(select(rank(keys[i]))) != 0) return false;
return true;
}
/**
* Unit tests the {@code BinarySearchST} data type.
*
* @param args the command-line arguments
*/
public static void main(String[] args) {
BinarySearchST<String, Integer> st = new BinarySearchST<String, Integer>();
for (int i = 0; !StdIn.isEmpty(); i++) {
String key = StdIn.readString();
st.put(key, i);
}
for (String s : st.keys())
StdOut.println(s + " " + st.get(s));
}
}
符号表的各种实现的优缺点