package ustc.wordcount;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.StringTokenizer;
import java.util.Map.Entry;
public class wc {
public static void main(String arg[]) {
int wordCount = 0; // 用于统计单词的总个数
Map<String, Integer> map = new HashMap<String, Integer>();// 用于统计各个单词的个数,排序
String sentence = "hello,my name is Tom,what is your name?he said:\"my name is John\"";
StringTokenizer token = new StringTokenizer(sentence);// 这个类会将字符串分解成一个个的标记
while (token.hasMoreTokens()) { // 循环遍历
wordCount++;
String word = token.nextToken(", ?.!:\"\"''\n"); // 括号里的字符的含义是说按照,空格
// ? . : "" ''
// \n去分割,如果这里你没明确要求,即括号里为空,则默认按照空格,制表符,新行符和回车符去分割
if (map.containsKey(word)) { // HashMap不允许重复的key,所以利用这个特性,去统计单词的个数
int count = map.get(word);
map.put(word, count + 1); // 如果HashMap已有这个单词,则设置它的数量加1
} else
map.put(word, 1); // 如果没有这个单词,则新填入,数量为1
}
System.out.println("总共单词数:" + wordCount);
sort(map); // 调用排序的方法,排序并输出!
}
public static void sort(Map<String, Integer> map) {
List<Map.Entry<String, Integer>> infoIds = new ArrayList<Map.Entry<String, Integer>>(
map.entrySet());
Collections.sort(infoIds, new Comparator<Map.Entry<String, Integer>>() {
public int compare(Map.Entry<String, Integer> o1,
Map.Entry<String, Integer> o2) {
return (o2.getValue() - o1.getValue());
}
}); // 排序
for (int i = 0; i < infoIds.size(); i++) { // 输出
Entry<String, Integer> id = infoIds.get(i);
System.out.println(id.getKey() + ":" + id.getValue());
}
}
}
<pre name="code" class="java">package ustc.wordcount;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.PriorityQueue;
import java.util.Queue;
import java.util.Set;
import javax.swing.JOptionPane;
public class wc2 {
public static void main(String[] args) {
// 获取输入文字列
String input = JOptionPane.showInputDialog("Please input character!");
while (null == input || "".equals(input.trim())) {
JOptionPane.showMessageDialog(null, "Please input character!");
input = JOptionPane.showInputDialog("Please input character!");
}
// 取出单词
String[] ary = input.split("\\s+");
List<String> list = new ArrayList<String>();
for (String str : ary) {
// 取出的单词数组中提出标点符号
if (!str.matches("\\W+")) {
list.add(str);
}
}
// 创建集合对象,将链表存入集合对象,自动删除重复单词,此时集合的size就是剔除重复后的单词个数
Set<String> set = new HashSet<String>(list);
// 创建映射对象,以单词出现次数为key,单词内容为value存储
Map<Integer, String> map = new HashMap<Integer, String>();
for (String str : set) {
int count = 0;
while (true) {
int index = list.indexOf(str);
if (index >= 0) {
count++;
list.remove(index);
} else {
break;
}
}
// key在map中已经存在时用空格将单词分隔
if (null == map.get(new Integer(count))) {
map.put(new Integer(count), str);
} else {
String value = map.get(new Integer(count));
map.put(new Integer(count), str + " " + value);
}
}
// 按照出现次数升序
sort(map, true);
// 按照出现次数降序
sort(map, false);
}
/**
* 排序用方法.
*
* @param map
* 待排序容器对象
* @param sortType
* 排序种别,true表示升序,false表示降序
*/
public static void sort(Map<Integer, String> map, boolean sortType) {
List<String> list = new ArrayList<String>();
// 创建优先队列用于排序
Queue<Integer> queue = new PriorityQueue<Integer>(map.keySet());
String[] ary;
while (queue.size() != 0) {
Integer integer = queue.poll();
if (!sortType) {
ary = map.get(integer).split(" ");
for (String str : ary) {
list.add(0, str + "有" + integer + " 个");
}
} else {
ary = map.get(integer).split(" ");
for (String str : ary) {
list.add(str + "有" + integer + " 个");
}
}
}
System.out.println(list);
}
}
<pre name="code" class="java">package ustc.wordcount;
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.*;
public class wc3 {
public static void main(String[] args) {
try {
BufferedReader br = new BufferedReader(new FileReader("D:\\test.txt"));
String s;
StringBuffer sb = new StringBuffer();
while ((s = br.readLine()) != null) {
sb.append(s);
}
br.close();
Map<String,Integer> map = new HashMap<String, Integer>();
StringTokenizer st = new StringTokenizer(sb.toString(),",.! \n");
while (st.hasMoreTokens()) {
String letter = st.nextToken();
int count;
if (map.get(letter) == null) {
count = 1;
} else {
count = map.get(letter).intValue() + 1;
}
map.put(letter,count);
}
Set<WordEntity> set = new TreeSet<WordEntity>();
for (String key : map.keySet()) {
set.add(new WordEntity(key,map.get(key)));
}
// 自己拼接字符串,输出我们想要的字符串格式
System.out.println("输出形式一:");
for (Iterator<WordEntity> it = set.iterator(); it.hasNext(); ) {
WordEntity w = it.next();
System.out.println("单词:" + w.getKey() + " 出现的次数为: " + w.getCount());
}
// 直接打印 WordEntity 对象,实现我们想要的输出效果,只需在WordEntity类中重写toString()方法
System.out.println("输出形式二:");
for (Iterator<WordEntity> it = set.iterator(); it.hasNext(); ) {
WordEntity w = it.next();
System.out.println(w);
}
// 我们可以控制只输出前三名来
System.out.println("输出形式三:");
int count = 1;
for (Iterator<WordEntity> it = set.iterator(); it.hasNext(); ) {
WordEntity w = it.next();
System.out.println("第" + count + "名为单词:" + w.getKey() + " 出现的次数为: "
+ w.getCount());
if (count == 3)// 当输出3个后跳出循环
break;
count++;
}
} catch (FileNotFoundException e) {
System.out.println("文件未找到~!");
} catch (IOException e) {
System.out.println("文件读异常~!");
}
}
}
//WordEntity.java文件
class WordEntity implements Comparable<WordEntity> {
private String key;
private Integer count;
public WordEntity (String key, Integer count) {
this.key = key;
this.count = count;
}
public int compareTo(WordEntity o) {
int cmp = count.intValue() - o.count.intValue();
return (cmp == 0 ? key.compareTo(o.key) : -cmp);
//只需在这儿加一个负号就可以决定是升序还是降序排列 -cmp降序排列,cmp升序排列
//因为TreeSet会调用WorkForMap的compareTo方法来决定自己的排序
}
@Override
public String toString() {
return key + " 出现的次数为:" + count;
}
public String getKey() {
return key;
}
public Integer getCount() {
return count;
}
}