package com.chipmunk.algorithm.trie;
public class Branch {
private char word;
private byte status = 0;//0词语未结束1词语结束
private Branch[] branches = null;
public Branch(char word) {
super();
this.word = word;
}
public Branch(char word, byte status) {
super();
this.word = word;
this.status = status;
}
/**
*
* @param b
* @return
*/
public Branch add(Branch b){
if (branches==null) {
branches=new Branch[1];
branches[0] = b;
return branches[0];
}else {
char w1=b.getWord();
//判断是否已经有相等的字
int i = -1;
for (int j = 0; j < branches.length; j++) {
Branch bb =branches[j];
char w2 = bb.getWord();
if (w1==w2) {
i=j;
//如果原来这个字不是结束符,更改为结束符
if (b.getStatus()==1&&bb.getStatus()==0) {
bb.setStatus(b.getStatus());
}
break;
}
}
if (i>-1) {//字已经添加过了
return branches[i];
}else {//添加一个字
Branch[]branches2=new Branch[branches.length+1];
System.arraycopy(branches,0,branches2,0,branches.length);
branches2[branches2.length-1]=b;
branches=branches2;
return branches[branches.length-1];
}
}
}
public char getWord() {
return word;
}
public void setWord(char word) {
this.word = word;
}
public byte getStatus() {
return status;
}
public void setStatus(byte status) {
this.status = status;
}
public Branch[] getBranches() {
return branches;
}
public void setBranches(Branch[] branches) {
this.branches = branches;
}
}
package com.chipmunk.algorithm.trie;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
public class TrieTree {
/**
* 向trie树添加新词
* @param root
* @param text
*/
private static void addWords(Branch root,String text){
Branch branch_grow = root;
char[] chars = text.toCharArray();
for (int i = 0; i < chars.length; i++) {
byte status = 0;
if (i == chars.length-1) {
status = 1;
}
Branch b = branch_grow.add(new Branch(chars[i], status));
branch_grow=b;
}
}
/**
* 搜索一个词---全匹配
* @param root 树根
* @param text 搜索词
* @return
*/
private static boolean searchTerm(Branch root,String text){
char[]cs = text.toCharArray();
Branch finder = root;
for (int i = 0; i < cs.length; i++) {
char c = cs[i];
boolean bool_equal = false;
boolean bool_last = i==cs.length-1;//最后一个字
Branch[]bs = finder.getBranches();
if (bs!=null) {
for (Branch branch : bs) {
char w = branch.getWord();
if (c==w) {
finder=branch;
if (bool_last) {
byte status = branch.getStatus();
if (status==0) {//是否是词语最后一个字
bool_equal = false;
}else {
bool_equal = true;
}
}else {
bool_equal = true;
}
break;
}
}
}
if (!bool_equal) {
return false;
}
}
return true;
}
/**
* 搜索一个词---部分匹配
* @param root 树根
* @param text 搜索词
* @return
*/
public static List<String> searchTermPart(Branch root,String text){
char[]cs = text.toCharArray();
List<String> list = new ArrayList<String>();
StringBuffer text_temp = new StringBuffer();
Branch finder = root;
for (int i = 0; i < cs.length; i++) {
char c = cs[i];
boolean bool_equal = false;//是否相等
Branch[]bs = finder.getBranches();
if (bs!=null) {
for (Branch branch : bs) {
char w = branch.getWord();
if (c==w) {
finder=branch;
byte status = branch.getStatus();
text_temp.append(c);
// System.out.println(status+"--"+c);
if (status==1) {//词语结尾标示
list.add(text_temp.toString());
}
bool_equal = true;
break;
}
}
}
if (!bool_equal&&text_temp.length()>0) {
return list;
}
}
return list;
}
public static List<String> searchAllTerms(Branch root,String text){
char[]cs = text.toCharArray();
List<String> list = new ArrayList<String>();
StringBuffer text_temp = new StringBuffer();
Branch finder = root;
for (int i = 0; i < cs.length; i++) {
// System.out.println(i);
char c = cs[i];
boolean bool_equal = false;//是否相等
Branch[]bs = finder.getBranches();
if (bs!=null) {
for (Branch branch : bs) {
char w = branch.getWord();
if (c==w) {
finder=branch;
byte status = branch.getStatus();
text_temp.append(c);
// System.out.println(status+"--"+c+"--"+i);
if (status==1) {
list.add(text_temp.toString());
}
bool_equal = true;
break;
}
}
}
//到了不等的文字节点
if (!bool_equal&&text_temp.length()>0) {
i=i-text_temp.length();
text_temp=new StringBuffer();//临时匹配文字组归零
finder=root;//文字查找器回trie树根部
}
}
return list;
}
/**
* 种树--建立trie树
* @param words
* @return
*/
public static Branch plantTree(Collection<String> words){
Branch root = new Branch(' ');
for (String w : words) {
addWords(root, w);
}
return root;
}
/**
* 打印所有树枝字
* @param tree
*/
public static void print(Branch tree){
Branch[] bs = tree.getBranches();
if (bs!=null) {
for (Branch branch : bs) {
System.out.print(branch.getWord());
if (branch.getStatus()==1) {
System.out.println();
}
print(branch);
}
}
}
public static void main(String[] args) {
List<String> words = new ArrayList<String>();
words.add("中国人");
words.add("中华民族");
words.add("中华");
words.add("大中华区");
words.add("中华人民共和国");
words.add("中华民国");
words.add("华人");
words.add("国歌");
Branch tree = plantTree(words);
System.out.println(searchTerm(tree, "中华人民共和国"));
System.out.println(searchAllTerms(tree, "唱中华人民共和国国歌是"));
// 01234 56789 10
// print(tree);
}
}