掌握Java流以及多线程的主要方法及运用,自学文本、字符串处理方法。
实验内容与要求:
扫描指定文件夹里面的所有文件,给出各个文件的属性信息。另外,针对每个文件(如A.txt)统计里面文章中单词的出现频次并输出统计结果到文件Sta_A.txt。Sta_A.txt文件包含A.txt文件里面每个单词出现的频次。运用多线程的方法编程实现每个文件创建一个线程进行单词频次统计,并且对比无多线程的计算时间。
数据集:
(1) 4个大文件,每个文件大小在1GB左右;
(2)100个小文件,每个文件大小在1MB左右。
实验要求:
1. 指定文件夹里面每个文件的单词频次都得统计,并保存到对应输出文件。输出文件中单词按频次降序排列。
2. 统计程序运行时间,包括总体运行时间和各个文件处理所花时间。
3. 对比多线程以及单线程情况下的计算时间。
4. 尽可能快的完成所有文件的单词频次统计。优化代码!
额外探索:对于大文件,尝试运用多线程来统计单个文件里面的单词频次,并与单线程做对比。对结果进行分析,思考多线程在此应用下的优劣。
main方法:
//只求能跑
import java.io.*;
import java.nio.charset.StandardCharsets;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
public class Multithreading {
public static void main(String []args){
long start ,end,different;
int threadCount=4;//四个线程分别执行
ExecutorService pool= Executors.newFixedThreadPool(threadCount);
//指定文件夹路径
String inputPath="D:/work/Internet coding/4BigFiles";
File folder= new File(inputPath);//定义文件夹
File[] files=folder.listFiles();
if(files!=null){
//输出文件属性到property.txt
try{
FileOutputStream fileoutputStream=new FileOutputStream("D:/work/Internet coding/property.txt");
OutputStreamWriter outputStreamWriter=new OutputStreamWriter(fileoutputStream,StandardCharsets.UTF_8);//记事本右下角显示格式是这个
BufferedWriter bufferedWriter=new BufferedWriter(outputStreamWriter);
//输出提示
System.out.println("Write file properties into property.txt.");
start=System.currentTimeMillis();
for(File file:files){
//向propert中写入信息,给出文件属性
bufferedWriter.write("File absolute path: "+file.getAbsoluteFile());
bufferedWriter.newLine();//另起一行
bufferedWriter.write("File canonical path: "+file.getCanonicalPath());
bufferedWriter.newLine();//另起一行
bufferedWriter.write("File path: "+file.length()+"bytes");
bufferedWriter.newLine();//另起一行
bufferedWriter.write("File name: "+file.getName());
bufferedWriter.newLine();//另起一行
bufferedWriter.write("File length: "+file.length()+"bytes");
bufferedWriter.newLine();//另起一行
bufferedWriter.write("File can read: "+file.canRead());
bufferedWriter.newLine();//另起一行
bufferedWriter.write("File can write: "+file.canWrite());
bufferedWriter.newLine();//另起一行
bufferedWriter.write("File can execute: "+file.canExecute());
bufferedWriter.newLine();//另起一行
bufferedWriter.write("File is hidden: "+file.isHidden());
bufferedWriter.newLine();//另起一行
bufferedWriter.write("File last modified: "+file.lastModified());
bufferedWriter.newLine();//另起一行
bufferedWriter.newLine();//另起一行
}
bufferedWriter.close();//关闭缓冲流,强制将缓冲区中的数据写入到文件中,即使缓冲区中的数据还不满
end=System.currentTimeMillis();
different=end-start;
System.out.println("File properties have been written to property.txt.");
System.out.println("It cost "+different+" millisecond.\n");
}catch(Exception e){
e.printStackTrace();
}
//单词统计
for(File file:files) {
start=System.currentTimeMillis();
Runnable task=new threadRunnable(file);
pool.submit(task);
}
}
}
}
实现的runable接口:
import java.io.*;
import java.nio.charset.StandardCharsets;
import java.util.*;
public class threadRunnable implements Runnable{
private File file;
public threadRunnable(File file){
this.file=file;
}
public void run() {
// 使用buffer流对每个文件进行单词统计并输出到对应的统计结果文件
long start=System.currentTimeMillis(),end,different;
try {
System.out.println(file.getName()+" is start.");
FileInputStream fileInputStream = new FileInputStream(file);
InputStreamReader inputStreamReader = new InputStreamReader(fileInputStream, StandardCharsets.UTF_8);
BufferedReader bufferedReader = new BufferedReader(inputStreamReader);
//统计单词出现频次
Map<String, Integer> wordMap = new HashMap<>();
String s;
while ((s = bufferedReader.readLine()) != null) {
//利用正则表达式分割单词
//先全部转为小写好对其进行后面的一系列处理
s = s.toLowerCase();
String[] words = s.split("[^a-z]+");//除字母外全是分隔符
for (String word : words) {
if (word.length() >= 1) {
//统计结果
if (wordMap.containsKey(word)) {
wordMap.put(word, wordMap.get(word) + 1);
} else {
wordMap.put(word, 1);
}
}
}
}
//按value降序排序
List<Map.Entry<String, Integer>> wordList = new ArrayList<>(wordMap.entrySet());
wordList.sort((o1, o2) -> o2.getValue().compareTo(o1.getValue()));
//输出统计结果结果到文件
String resultFileName = "D:/work/Internet coding/4BigFiles/Sta_" + file.getName();
try {
FileOutputStream fileOutputStream = new FileOutputStream(resultFileName);
OutputStreamWriter outputStreamWriter = new OutputStreamWriter(fileOutputStream, StandardCharsets.UTF_16LE);
BufferedWriter bufferedWriter = new BufferedWriter(outputStreamWriter);
for (Map.Entry<String, Integer> map : wordList) {
bufferedWriter.write(map.getKey() + " " + map.getValue());
bufferedWriter.newLine();
}
System.out.println("Word count statistics for" + file.getName() + " is done");
} catch (Exception e) {
e.printStackTrace();
}
} catch (Exception e) {
e.printStackTrace();
}
end=System.currentTimeMillis();
different=end-start;
System.out.println(file.getName()+" is over!");
System.out.println(file.getName()+": It cost "+different+" millisecond.\n");
}
}