一、实验目的与内容
1. 实验目的
掌握Java流的主要方法及运用,自学文本、字符串处理方法。
2. 实验内容
扫描指定文件夹里面的所有文件,给出各个文件的属性信息。另外,针对每个文件(如A.txt)统计里面文章中单词的出现频次并输出统计结果到文件Sta_A.txt。Sta_A.txt文件包含A.txt文件里面每个单词出现的频次。
3. 实验要求:
- 指定文件夹里面所有的文件的属性都得列出,输出到property.txt。
- 指定文件夹里面每个文件的单词频次都得统计,并保存到对应输出文件。输出文件中单词按频次降序排列。
- 统计程序运行时间,包括总体运行时间和各个文件单词频次统计所花时间。cx
package test; import java.io.*; import java.nio.charset.StandardCharsets; import java.util.*; import java.util.regex.*; public class WordsCount { public static void main(String []args){ long start ,end,totalStart,totalEnd,different; totalStart=System.currentTimeMillis();//从程序执行开始计算总时间 //指定文件夹路径 String inputPath="D:/work/Internet coding/Week3 datasets"; File folder= new File(inputPath);//定义文件夹 if(!folder.isDirectory()){//老规矩判断一下 System.out.println("Invalid folder path"); return; } File[] files=folder.listFiles(); if(files!=null){ //输出文件属性到property.txt try{ FileOutputStream fileoutputStream=new FileOutputStream("D:/work/Internet coding/property.txt"); OutputStreamWriter outputStreamWriter=new OutputStreamWriter(fileoutputStream,StandardCharsets.UTF_16LE);//记事本右下角显示格式是这个 BufferedWriter bufferedWriter=new BufferedWriter(outputStreamWriter); //输出提示 System.out.println("Write file properties into property.txt."); start=System.currentTimeMillis(); for(File file:files){ //向propert中写入信息 bufferedWriter.write("File absolute path: "+file.getAbsoluteFile()); bufferedWriter.newLine();//另起一行 bufferedWriter.write("File canonical path: "+file.getCanonicalPath()); bufferedWriter.newLine();//另起一行 bufferedWriter.write("File path: "+file.length()+"bytes"); bufferedWriter.newLine();//另起一行 bufferedWriter.write("File name: "+file.getName()); bufferedWriter.newLine();//另起一行 bufferedWriter.write("File length: "+file.length()+"bytes"); bufferedWriter.newLine();//另起一行 bufferedWriter.write("File can read: "+file.canRead()); bufferedWriter.newLine();//另起一行 bufferedWriter.write("File can write: "+file.canWrite()); bufferedWriter.newLine();//另起一行 bufferedWriter.write("File can execute: "+file.canExecute()); bufferedWriter.newLine();//另起一行 bufferedWriter.write("File is hidden: "+file.isHidden()); bufferedWriter.newLine();//另起一行 bufferedWriter.write("File last modified: "+file.lastModified()); bufferedWriter.newLine();//另起一行 bufferedWriter.newLine();//另起一行 } bufferedWriter.close();//关闭缓冲流,强制将缓冲区中的数据写入到文件中,即使缓冲区中的数据还不满 end=System.currentTimeMillis(); different=end-start; System.out.println("File properties have been written to property.txt."); System.out.println("It cost "+different+" millisecond.\n"); }catch(Exception e){ e.printStackTrace(); } //单词统计 for(File file:files) { start=System.currentTimeMillis(); if (file.isFile()) {//我直接把四个txt文件放到一个Week3 datasets文件夹里了 // 使用buffer流对每个文件进行单词统计并输出到对应的统计结果文件 try { FileInputStream fileInputStream=new FileInputStream(file); InputStreamReader inputStreamReader=new InputStreamReader(fileInputStream,StandardCharsets.UTF_16LE); BufferedReader bufferedReader=new BufferedReader(inputStreamReader); //统计单词出现频次 Map<String,Integer> wordMap=new HashMap<>(); String s; while((s= bufferedReader.readLine())!=null){ //利用正则表达式分割单词 //先全部转为小写好对其进行后面的一系列处理 s=s.toLowerCase(); String[] words=s.split("[^a-z]+");//除字母外全是分隔符 for(String word:words){ if(word.length()>=1){ //统计结果 if(wordMap.containsKey(word)){ wordMap.put(word,wordMap.get(word)+1); } else{ wordMap.put(word,1); } } } } //按value降序排序 List<Map.Entry<String,Integer>> wordList=new ArrayList<>(wordMap.entrySet()); wordList.sort((o1,o2)->o2.getValue().compareTo(o1.getValue())); //输出统计结果结果到文件 String resultFileName = "D:/work/Internet coding/Week3 datasets/Sta_" + file.getName(); try{ FileOutputStream fileOutputStream=new FileOutputStream(resultFileName); OutputStreamWriter outputStreamWriter=new OutputStreamWriter(fileOutputStream,StandardCharsets.UTF_16LE); BufferedWriter bufferedWriter=new BufferedWriter(outputStreamWriter); for(Map.Entry<String,Integer> map:wordList){ bufferedWriter.write(map.getKey()+" "+map.getValue()); bufferedWriter.newLine(); } System.out.println("Word count statistics for"+file.getName()+" is done"); }catch(Exception e) { e.printStackTrace(); } }catch(Exception e){ e.printStackTrace(); } } end=System.currentTimeMillis(); different=end-start; System.out.println("It took a total of "+different+" milliseconds to process the file "+file.getName()+"\n"); } } totalEnd=System.currentTimeMillis(); different=totalEnd-totalStart; System.out.println("The hole running time: "+different+" milliseconds"); } }