统计目录下所有文本单词出现次数并生成次数排序文本(递归)

package doc.com;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.io.UnsupportedEncodingException;
import java.lang.annotation.Annotation;
import java.lang.reflect.Method;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;

public class TxtReader {

	/**
	 * 
	 *递归读取文件路径下的所有文件
	 */
	public static ArrayList readFiles1(String path, ArrayList<String> fileNameList) {

		File file = new File(path);

		if (file.isDirectory()) {

			File[] files = file.listFiles();
			for (int i = 0; i < files.length; i++) {
				if (files[i].isDirectory()) {
					readFiles1(files[i].getPath(), fileNameList);
				} else {
					String path1 = files[i].getPath();
					fileNameList.add(path1);
				}
			}
		} else {
			String path1 = file.getPath();
			String fileName = path1.substring(path1.lastIndexOf("\\") + 1);
			fileNameList.add(fileName);
		}
		return fileNameList;
	}

	/**
	 * 根据内容出现频率进行统计排序
	 * 
	 * @param args
	 */
	public static void main(String[] args) {

		String filePath = "D:\\";
		StringBuilder sb = new StringBuilder();
		try {
			/**递归查询目录下的所有文件路径**/
			ArrayList fileNameList = readFiles1(filePath, new ArrayList());
			System.out.println(fileNameList.size());
			for (int i = 0; i < fileNameList.size(); i++) {
				System.out.println("读取文件:"+fileNameList.get(i).toString());
				String sb1 = readAll(fileNameList.get(i).toString());
				sb.append(sb1);
			}

		} catch (Exception e) {
			e.printStackTrace();
		}
		/**匹配过滤查询到的字符串**/
		Pattern p = Pattern.compile("[a-zA-Z]{3,}");
		Matcher m = p.matcher(sb);
		Map<String, Integer> wordCountMap = new HashMap<String, Integer>();
		Map<Integer, String> wordCountSortMap = new TreeMap<Integer, String>();
		while (m.find()) {
			// System.out.println(m.group());
			String wordStr = m.group();
			if (wordCountMap.containsKey(wordStr)) {
				int wordCount = wordCountMap.get(wordStr);
				wordCount++;
				wordCountMap.put(wordStr, wordCount);
			} else {
				wordCountMap.put(wordStr, 1);
			}
		}
		Comparator<Map.Entry<String, Integer>> valueComparator = new Comparator<Map.Entry<String, Integer>>() {
			public int compare(Entry<String, Integer> o1, Entry<String, Integer> o2) {
				return o1.getValue().compareTo(o2.getValue());
			}
		};
//map转换成list进⾏排序
		List<Map.Entry<String, Integer>> list = new ArrayList<Map.Entry<String, Integer>>(wordCountMap.entrySet());
//排序
		Collections.sort(list, valueComparator);
		Collections.reverse(list);
		JSONArray array = JSONArray.parseArray(JSON.toJSONString(list));
		GenJSONFile.WriteStringToFile5(array.toJSONString(), "F://countWord.txt");

	}

	public static String readAll(String filename) {
		String text = "";
		try {
			FileInputStream filein = new FileInputStream(filename);
			byte[] b = new byte[filein.available()];
			filein.read(b);
			text = new String(b);
			filein.close();
		} catch (FileNotFoundException e1) {
			// TODO Auto-generated catch block
			e1.printStackTrace();
		} catch (IOException e1) {
			// TODO Auto-generated catch block
			e1.printStackTrace();
		}
		return text;
	}

	public static List<String> readLine(String filename) {
		int count = 0;
		List<String> lish = new ArrayList<String>();
		// File f = new File("F:\\xx.txt");
		File file = new File("F:\\xx.txt");

		// BufferedReader br = new BufferedReader(isr);

		String s = "";
		StringBuffer sb = new StringBuffer();
		try {
			FileInputStream fis = new FileInputStream(file);
			InputStreamReader isr = new InputStreamReader(fis, "UTF-8");
			BufferedReader br = new BufferedReader(isr);
			while ((s = br.readLine()) != null) {
				sb.append(s + '\n');
				lish.add(s);
			}
			br.close();
		} catch (FileNotFoundException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
		return lish;
	}

}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值