package com.icinfo; import java.io.*; import java.util.*; /** * 统计一个文件中各词出现的频率,并打印 */ public class FileWordCount { // 使用HashMap来存储单词的频率 Map<String, Integer> wordCount = new HashMap<>(); public static void main(String[] args) { HashMap<String, Integer> map = (HashMap<String, Integer>) new FileWordCount() .wordCount("C:/Users/hzhb/Desktop/test.txt"); // 自定义排序 List<Map.Entry<String, Integer>> list = new LinkedList<>(); list.addAll(map.entrySet()); list.sort(Comparator.comparingInt(e -> e.getValue())); list.forEach(System.out::println); } /** * @param fileName 文件名(将英文文章复制到一个文件中去) */ public Map<String, Integer> wordCount(String fileName) { File file = new File(fileName); FileInputStream fis = null; try { fis = new FileInputStream(file); } catch (FileNotFoundException e) { System.out.println("文件不存在!"); } BufferedReader bufr = new BufferedReader(new InputStreamReader(fis)); String s; try { while ((s = bufr.readLine()) != null) { // 移除字符串的前导空白和后尾部空白 s = s.trim(); // 正则表达式:以非字母或者是数字为分隔符,进行分割 // 英文单词以空格为分隔符,将单词分隔 String[] str = s.split("(\\s+\\W+)|[\\s+\\W+]"); for (int i = 0; i < str.length; i++) { //并将所有大写字母转换为小写 String currentStr = str[i].toLowerCase(); // 如果HashMap中已有该值,将值加1 if (wordCount.containsKey(currentStr)) { wordCount.put(currentStr, wordCount.get(currentStr) + 1); } else { // 默认初始化该单词的出现次数为1 wordCount.put(currentStr, 1); } } } } catch (IOException e) { e.printStackTrace(); } // 移除HashMap中的""空字符串 wordCount.remove(""); return wordCount; } }
统计一篇文章中各英语单词出现的频数
最新推荐文章于 2022-10-11 19:46:24 发布