package doc.com;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.io.UnsupportedEncodingException;
import java.lang.annotation.Annotation;
import java.lang.reflect.Method;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
public class TxtReader {
/**
*
*递归读取文件路径下的所有文件
*/
public static ArrayList readFiles1(String path, ArrayList<String> fileNameList) {
File file = new File(path);
if (file.isDirectory()) {
File[] files = file.listFiles();
for (int i = 0; i < files.length; i++) {
if (files[i].isDirectory()) {
readFiles1(files[i].getPath(), fileNameList);
} else {
String path1 = files[i].getPath();
fileNameList.add(path1);
}
}
} else {
String path1 = file.getPath();
String fileName = path1.substring(path1.lastIndexOf("\\") + 1);
fileNameList.add(fileName);
}
return fileNameList;
}
/**
* 根据内容出现频率进行统计排序
*
* @param args
*/
public static void main(String[] args) {
String filePath = "D:\\";
StringBuilder sb = new StringBuilder();
try {
/**递归查询目录下的所有文件路径**/
ArrayList fileNameList = readFiles1(filePath, new ArrayList());
System.out.println(fileNameList.size());
for (int i = 0; i < fileNameList.size(); i++) {
System.out.println("读取文件:"+fileNameList.get(i).toString());
String sb1 = readAll(fileNameList.get(i).toString());
sb.append(sb1);
}
} catch (Exception e) {
e.printStackTrace();
}
/**匹配过滤查询到的字符串**/
Pattern p = Pattern.compile("[a-zA-Z]{3,}");
Matcher m = p.matcher(sb);
Map<String, Integer> wordCountMap = new HashMap<String, Integer>();
Map<Integer, String> wordCountSortMap = new TreeMap<Integer, String>();
while (m.find()) {
// System.out.println(m.group());
String wordStr = m.group();
if (wordCountMap.containsKey(wordStr)) {
int wordCount = wordCountMap.get(wordStr);
wordCount++;
wordCountMap.put(wordStr, wordCount);
} else {
wordCountMap.put(wordStr, 1);
}
}
Comparator<Map.Entry<String, Integer>> valueComparator = new Comparator<Map.Entry<String, Integer>>() {
public int compare(Entry<String, Integer> o1, Entry<String, Integer> o2) {
return o1.getValue().compareTo(o2.getValue());
}
};
//map转换成list进⾏排序
List<Map.Entry<String, Integer>> list = new ArrayList<Map.Entry<String, Integer>>(wordCountMap.entrySet());
//排序
Collections.sort(list, valueComparator);
Collections.reverse(list);
JSONArray array = JSONArray.parseArray(JSON.toJSONString(list));
GenJSONFile.WriteStringToFile5(array.toJSONString(), "F://countWord.txt");
}
public static String readAll(String filename) {
String text = "";
try {
FileInputStream filein = new FileInputStream(filename);
byte[] b = new byte[filein.available()];
filein.read(b);
text = new String(b);
filein.close();
} catch (FileNotFoundException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
} catch (IOException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
return text;
}
public static List<String> readLine(String filename) {
int count = 0;
List<String> lish = new ArrayList<String>();
// File f = new File("F:\\xx.txt");
File file = new File("F:\\xx.txt");
// BufferedReader br = new BufferedReader(isr);
String s = "";
StringBuffer sb = new StringBuffer();
try {
FileInputStream fis = new FileInputStream(file);
InputStreamReader isr = new InputStreamReader(fis, "UTF-8");
BufferedReader br = new BufferedReader(isr);
while ((s = br.readLine()) != null) {
sb.append(s + '\n');
lish.add(s);
}
br.close();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return lish;
}
}
统计目录下所有文本单词出现次数并生成次数排序文本(递归)
最新推荐文章于 2022-06-30 15:12:18 发布