自己理解三叉树TernarySearchTrie

最新推荐文章于 2024-08-22 16:41:33 发布

foamflower

最新推荐文章于 2024-08-22 16:41:33 发布

阅读量5.4k

点赞数 1

分类专栏：搜索引擎文章标签： null string integer exception object search

本文链接：https://blog.csdn.net/foamflower/article/details/6055008

版权

本文介绍了作者花费一天半时间研究三叉树的过程，分享了个人对三叉搜索树的理解，并在代码中做了注释，同时提出了存在的疑问，期待专业人士指导。

摘要由CSDN通过智能技术生成

花了差不多一天半的时间终于把一颗三叉树看完了，不过对于里面还有点疑惑，下面在代码里注释上了自己的理解，里面还存在一些疑问，欢迎理解的朋友们指出其中的错误，以及解答里面的疑问。

 
 package org.apache.spell; 
 
 
 import java.io.BufferedReader; 
 
 
 import java.io.BufferedWriter; 
 
 
 import java.io.File; 
 
 
 import java.io.FileInputStream; 
 
 
 import java.io.FileReader; 
 
 
 import java.io.FileWriter; 
 
 
 import java.io.IOException; 
 
 
 import java.io.InputStreamReader; 
 
 
 import java.util.ArrayList; 
 
 
 import java.util.Enumeration; 
 
 
 import java.util.List; 
 
 
 import java.util.Stack; 
 
 
 import java.util.StringTokenizer; 
 
 
 import java.util.Vector; 
 
 
 import java.util.zip.GZIPInputStream; 
 
 
 /**
 *  三叉搜索树实现：字符串排序数据结构
 *  Implementation of a Ternary Search Trie, a data structure for storing <code>String</code> objects
 *  that combines the compact size of a binary search tree with the speed of a digital search trie, and is 
 *  therefore ideal for practical use in sorting and searching data.</p> <p>
 * 
 *  This data structure is faster than hashing for many typical search problems, and supports
 *  a broader range of useful problems and operations. Ternary searches are faster than
 *  hashing and more powerful, too.</p> <p>
 * 
 *  The theory of ternary search trees was described at a symposium in 1997 (see "Fast 
 *  Algorithms for Sorting and Searching Strings," by J.L. Bentley and R. Sedgewick,
 *  Proceedings of the 8th Annual ACM-SIAM Symposium on Discrete Algorithms, January 1997).
 *  Algorithms in C, Third Edition, by Robert Sedgewick (Addison-Wesley, 1998) provides 
 *  yet another view of ternary search trees. 
 * 
 * @author Bruno Martins
 *
 */ 
 
 
 public 
 class TernarySearchTrie { 
 
  
 public 
 static 
 void main(String[] args) 
 throws Exception { 
 
  String triefile 
 = 
 "E://Java Projects//ses//src//test//lucene//dic//spell//trie.txt"; 
 
  TernarySearchTrie dictionary 
 = 
 new TernarySearchTrie( 
 new File(triefile)); 
 
    
 //benchMark(args[0]); 
 
   
 //benchMark(args[1]); 
 
 } 
 
  
  
 
  
 // 三叉树的创建，节点创建，删除等 
 
  
  
 
  
 /**
  *  An inner class of Ternary Search Trie that represents a node in the trie.
  *  三叉树内部类代表树中的一个节点
  */ 
 
  
 public 
 static 
 final 
 class TSTNode { 
 
   
 /** Index values for accessing relatives array. */ 
 
   
 protected 
 final 
 static 
 int PARENT 
 = 
 0, LOKID 
 = 
 1, EQKID 
 = 
 2, HIKID 
 = 
 3; 
 //父，左，中，右(相对节点数组访问下标) 
 
   
 /** The key to the node. */ 
 
   
 protected Object data;   
 //节点的值 
 
   
 /** The relative nodes. */ 
 
   
 protected TSTNode[] relatives 
 = 
 new TSTNode[ 
 4];   
 //存放父，左，中，右4个相对节点 
 
   
 /** The char used in the split. */ 
 
   
 protected 
 char splitchar; 
 //字符 
 
   
 /**
   *  Constructor method.
   *
   *@param  splitchar  The char used in the split.  字符
   *@param  parent     The parent node. 父节点
   */ 
 
   
 protected TSTNode( 
 char splitchar, TSTNode parent) { 
 
    
 this.splitchar 
 = splitchar; 
 
   relatives[PARENT] 
 = parent; 
 
  } 
 
   
 public String toString() 
 
  { 
 
    
 return String.valueOf(splitchar) 
 + 
 ":" 
 +data; 
 
  } 
 
 } 
 
  
 
  
 /**
  * 节点元素，仅仅保存当前节点值，即文件中存的一行数据：word:interger
  * key:word
  * data:interger
  * @author shentingting
  *
  */ 
 
  
 protected 
 static 
 class TSTItem { 
 
   
 /** 节点的值. */ 
 
   
 protected Object data; 
 
   
 /** 节点对应的key.即目标字符串 */ 
 
   
 protected String key; 
 
   
 /**
   *  Constructor method.
   *
   *@param  key 当前节点索引的key.
   *@param  data 当前节点的数值.
   */ 
 
   
 protected TSTItem(String key, Object data) { 
 
    
 this.key 
 = key; 
 
    
 this.data 
 = data; 
 
  } 
 
 } 
 
  
 
  
 /**
  *  Compares characters by alphabetical order.
  *  按字母顺序比较字符
  *@param  cCompare2  The first char in the comparison. 第一个字符
  *@param  cRef      The second char in the comparison. 第二个字符
  *@return           A negative number, 0 or a positive number if the second
  *      char is less, equal or greater.
  *      当第二个字符小于第一个字符 返回 负数
  *      当第二个字符等于第一个字符 返回 0
  *      当第二个字符大于第一个字符 返回 正数
  *      ASCII码对应值：
  *      A-Z 65-90 
  *      a-z 97-122
  * 其中忽略了a-x之间字符的大小写敏感度，经过下面的处理后其比较字符表从Ascii码转换成
  * A   a  B  b C c .... X    x  Y  Z    y  z
  * 65 66 67 68.........111 112 113 114 121 122
  * 至于为何YZyz四个字符没有作相同处理暂时还不知其原由
  */ 
 
  
 private 
 static 
 int compareCharsAlphabetically( 
 int cCompare2, 
 int cRef) { 
 
   
 int cCompare 
 = 
 0; 
 
   
 if (cCompare2 
 > 
 = 
 65) { 
 //从A开始 
 
    
 if (cCompare2 
 < 
 89) { 
 //A-Y之间的字符(不包含Y) 
 
    cCompare 
 = ( 
 2 
 * cCompare2) 
 - 
 65; 
 
   } 
 else 
 if (cCompare2 
 < 
 97) { 
 //在Y-a之间的字符(不包含a) 
 
    cCompare 
 = cCompare2 
 + 
 24; 
 
   } 
 else 
 if (cCompare2 
 < 
 121) { 
 //在a-y之间的字符(不包含y) 
 
    cCompare 
 = ( 
 2 
 * cCompare2) 
 - 
 128; 
 
   } 
 else 
 
    cCompare 
 = cCompare2; 
 
  } 
 else 
 //A之前的字符(不包含A) 
 
   cCompare 
 = cCompare2; 
 
   
 if (cRef 
 < 
 65) { 
 
    
 return cCompare 
 - cRef;   
 
  } 
 
   
 if (cRef 
 < 
 89) { 
 
    
 return cCompare 
 - (( 
 2 
 * cRef) 
 - 
 65);  
 
  } 
 
   
 if (cRef 
 < 
 97) { 
 
    
 return cCompare 
 - (cRef 
 + 
 24); 
 
  } 
 
   
 if (cRef 
 < 
 121) { 
 
    
 return cCompare 
 - (( 
 2 
 * cRef) 
 - 
 128); 
 
  } 
 
   
 return cCompare 
 - cRef; 
 
 } 
 
  
 /**  The default number of values returned by the <code>matchAlmost</code> method. */ 
 
  
 private 
 int defaultNumReturnValues 
 = 
 - 
 1;   
 //默认返回值 
 
  
 /** the number of differences allowed in a call to the <code>matchAlmostKey</code> method. */ 
 
  
 private 
 int matchAlmostDiff; 
 // 
 
  
 /** The base node in the trie. */ 
 
  
 private TSTNode rootNode;   
 //根节点 
 
  
 /**
  *  Constructs an empty Ternary Search Trie.
  */ 
 
  
 public TernarySearchTrie() { 
 
 } 
 
  
 /**
  *  Constructs a Ternary Search Trie and loads data from a <code>File</code> into the Trie. 
  *  The file is a normal text document, where each line is of the form
  *  word : integer.
  *
  *@param  file             The <code>File</code> with the data to load into the Trie.
  *@exception  IOException  A problem occured while reading the data.
  */ 
 
  
 public TernarySearchTrie(File file) 
 throws IOException { 
 
   
 this(file,false); 
 
 } 
 
  
 
  
 /**
  * 从文件中载入数据到字典树
  * 一个普通文本文档每行的格式：word : integer
  *  Constructs a Ternary Search Trie and loads data from a <code>File</code> into the Trie. 
  *  The file is a normal text document, where each line is of the form " word : integer".
  *
  *@param  file              The <code>File</code> with the data to load into the Trie.
     *@param compression If true, the file is compressed with the GZIP algorithm, and if false, 
     *                                  the file is a normal text document.
     *                   true：文件根据GZIP算法压缩
     *                   false：普通的文本文档
  *@exception  IOException  A problem occured while reading the data.
  */ 
 
  
 public TernarySearchTrie(File file, 
 boolean compression) 
 throws IOException { 
 
   
 this(); 
 
  BufferedReader in; 
 
   
 //如果是压缩文件则通过建立解压缩输出流 
 
   
 if(compression) in 
 = 
 new BufferedReader( 
 new InputStreamReader( 
 new GZIPInputStream( 
 new FileInputStream(file)))); 
 
   
 else in 
 = 
 new BufferedReader( 
 new InputStreamReader(( 
 new FileInputStream(file)))); 
 
  String word; 
 
   
 int pos; 
 
   
 int occur;