
package treeroot.util.wordcount;

 * The class is the elment of the Word Set return by WordCount.
 * The instance have the word value and the apperance times.
 * <strong>NOTE</strong>:the word ignore the case,
 * so word "hello","Hello","HELLO" are
 * the same word.
 * @author  treeroot
 * @version 1.0, 04/12/06
 * @see WordCount
public class Word{
 private String value;
  * Construct a Word object,has the count 1.
 public Word(String value){
 private int count=1;
 //this method is only invoked by WordCount class
 protected void increase(){
  * @return the word as the lower case.
 public String getWord(){
  return value;
     * @return the apperance times of this word.
 public int getCount(){
  return count;
     * @return if the word was the same ignore case,return true.
 public boolean equals(Object o){
  return (o instanceof Word)&&(((Word)o).value.equals(value));
     * @return the hashCode of the word.
 public int hashCode(){
  return value.hashCode();

package treeroot.util.wordcount;
 * WordCount provides a static method to count the words of a text.
 * You can give a hyphens that the words can use to connect letters,
 * or it will be use the default hyphens.
 * the dafault hyphens are '-','_',''',but the first letter must be
 * a English letter(a-z,A-Z).
 * SO: a-b,a-b,it's are words,but the -ab,_ab,'as are't a word.
 * You can use a comparator to sort the Set,by dictionary or frequency,
 * if you don't give a comparator,dicitonary comparator will be used.

import java.util.Map;
import java.util.Set;
import java.util.HashMap;
import java.util.TreeSet;
import java.util.Comparator;
import java.util.Collections;

public class WordCount
 //the default hyphen collection.
 private static String regex="//-_'";
     * return the words as a Set by default comparator and hyphens
     * @see #getWordCount(String,String,Comparator)
 public static Set getWordCount(String text){
  return getWordCount(text,regex,WordCount.DICTIONARY_ORDER);
     * return the words as a Set by the default comparator
     * @see #getWordCount(String,String,Comparator)
 public static Set getWordCount(String text,String regex){
  return getWordCount(text,regex,WordCount.DICTIONARY_ORDER);  
  * return the words as a Set by the default hyphens
     * @see #getWordConut(String,String,Comparator)
 public static Set getWordCount(String text,Comparator order){
  return getWordCount(text,regex,order); 
  *  return the words as a Set by the text,the word are all changed to
  *  lower case.
     *  @param text  the English text you want to split.
     *  @param regex the hyphens that the word can use.
     *  @param order the order of the Set returned by.
     *  @return the word Set that the text contains.
 public static Set getWordCount(String text,String regex,Comparator order){
  Map map=new HashMap();
  String split1="[^a-zA-Z"+regex+"]+";
  String split2="[^a-zA-Z]+"+regex+"[^a-zA-Z]*";
  String split3="[^a-zA-Z]*"+regex+"[^a-zA-Z]+";
  String reg = "("+split2+")|("+split3+")|("+split1+")";
  String[] words = text.split(reg);
  for(int i=0;i<words.length;i++){
   Object o=new Word(words[i]);
   else {
  Set sort=new TreeSet(order);
  return Collections.unmodifiableSet(sort);
     * the sort constant of DICTIONARY,the default sort contant.
 public static final Comparator DICTIONARY_ORDER=new Comparator(){
  public int compare(Object o1,Object o2){
   Word w1=(Word)o1;
   Word w2=(Word)o2;
   return w1.getWord().compareTo(w2.getWord());
     * the sort contant of FREQUENCY,the words was sorted by the apperance
     * times in the Set.
 public static final Comparator FREQUENCY_ORDER =new Comparator(){
  public int compare(Object o1,Object o2){
   Word w1=(Word)o1;
   Word w2=(Word)o2;
   int i=w2.getCount()-w1.getCount();
    return w1.getWord().compareTo(w2.getWord());
   return i;






当前余额3.43前往充值 >
领取后你会自动成为博主和红包主的粉丝 规则
钱包余额 0


