大致思路:
1:读取文件行数
2:为每个线程分配读取行数
分配算法:略平均分配
线程数 文件总行数 线程分配的行数 2 10 thread1 = 5,thread2 = 5 2 9 thread1 = 4,thread2 = 5 3 10 thread1 = 3,thread2 = 3,thread3 = 4
3:启动线程读取文件
4:合并文件内容
5:校验文件完整性
核心文件已贴出,UploadService、ReadFileThread,欢迎大家交流讨论
UploadService
package cn.spring.ssm.service.impl;
import cn.spring.ssm.job.ReadFileThread;
import cn.spring.ssm.model.FileThreadVO;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang.StringUtils;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor;
import org.springframework.stereotype.Service;
import org.springframework.web.multipart.MultipartFile;
import javax.annotation.Resource;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.List;
/**
* Created with IntelliJ IDEA.
* Package: cn.spring.ssm.service.impl
* User: 25414
* Date: 2019/11/14
* Time: 16:10
* Description:
*/
@Service
@Slf4j
public class UploadService {
@Value("${file.thread.num}")
private Integer threadNum; //线程数
@Resource(name = "asyncServiceExecutor")
private ThreadPoolTaskExecutor executor; //线程池
/**
* 启用多个线程分段读取文件
* <p>
* PS:若文件行数小于线程数会造成线程浪费
* 适用于读取一行一行的数据报文
*
* @return
*/
public String uploadByThread(MultipartFile file) throws Exception {
if (file.isEmpty()) {
return null;
}
InputStream is = file.getInputStream();
List<FileThreadVO> threadVOS = new ArrayList<>(threadNum); //自定义线程实体对象
//为线程分配读取行数
Integer lines = getLineNum(is); //文件总行数
Integer line; //每个线程分配行数
Integer start_line; //线程读取文件开始行数
Integer end_line; //线程读取文件结束行数
StringBuffer data = new StringBuffer();
//根据文件行数和线程数计算分配的行数,这里有点繁琐了,待优化
if (lines < threadNum) {
for (int i = 1; i <= lines; i++) {
FileThreadVO fileThreadVO = new FileThreadVO();
start_line = end_line = i;
InputStream stream = file.getInputStream();
ReadFileThread readFileThread = new ReadFileThread(start_line, end_line, stream);
fileThreadVO.setStart_line(start_line);
fileThreadVO.setIs(stream);
fileThreadVO.setEnd_line(end_line);
fileThreadVO.setResult(executor.submit(readFileThread).get());
threadVOS.add(fileThreadVO);
}
} else {
for (int i = 1, tempLine = 0; i <= threadNum; i++, tempLine = ++end_line) {
InputStream stream = file.getInputStream();
FileThreadVO fileThreadVO = new FileThreadVO();
Integer var1 = lines / threadNum;
Integer var2 = lines % threadNum;
line = (i == threadNum) ? (var2 == 0 ? var1 : var1 + var2) : var1;
start_line = (i == 1) ? 1 : tempLine;
end_line = (i == threadNum) ? lines : start_line + line - 1;
ReadFileThread readFileThread = new ReadFileThread(start_line, end_line, stream);
fileThreadVO.setStart_line(start_line);
fileThreadVO.setIs(stream);
fileThreadVO.setEnd_line(end_line);
fileThreadVO.setResult(executor.submit(readFileThread).get());
threadVOS.add(fileThreadVO);
}
}
threadVOS.forEach(record -> data.append(record.getResult()).append("\r\n"));
String mergeStr = data.toString().trim();
boolean isComplete = isComplete(file, mergeStr);
if (!isComplete) {
log.error("###uploadByThread### 文件完整性校验失败!");
throw new Exception("The file is incomplete!");
} else {
return mergeStr;
}
}
/**
* 获取文件行数
*
* @param is
* @return
* @throws IOException
*/
public int getLineNum(InputStream is) throws IOException {
int line = 0;
BufferedReader reader = new BufferedReader(new InputStreamReader(is));
while (reader.readLine() != null) {
line++;
}
reader.close();
is.close();
return line;
}
/**
* 校验文件完整性
*
* @param file
* @param data
* @return
*/
public boolean isComplete(MultipartFile file, String data) throws IOException {
long originSize = file.getBytes().length;
long resultSize = data.getBytes(Charset.forName("utf-8")).length;
return StringUtils.equals(String.valueOf(originSize), String.valueOf(resultSize));
}
}
ReadFileThread
package cn.spring.ssm.job;
import lombok.extern.slf4j.Slf4j;
import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.Charset;
import java.util.concurrent.Callable;
/**
* Created with IntelliJ IDEA.
* Package: cn.spring.ssm.job
* User: 25414
* Date: 2019/11/14
* Time: 8:51
* Description:分段读取文件
*/
@Slf4j
public class ReadFileThread implements Callable<String> {
private Integer start_index; //文件开始读取行数
private Integer end_index; //文件结束读取行数
private InputStream is; //输入流
public ReadFileThread(int start_index, int end_index, InputStream is) {
this.start_index = start_index;
this.end_index = end_index;
this.is = is;
}
/**
* Computes a result, or throws an exception if unable to do so.
*
* @return computed result
* @throws Exception if unable to compute a result
*/
@Override
public String call() throws Exception {
StringBuilder result = new StringBuilder();
BufferedReader reader = new BufferedReader(new InputStreamReader(is, "utf-8"));
int loc = 1;
while (loc < start_index) {
reader.readLine();
loc++;
}
while (loc < end_index) {
result.append(reader.readLine()).append("\r\n");
loc++;
}
result.append(reader.readLine());
String strResult = result.toString();
reader.close();
is.close();
log.info("###ReadFileThread###FILE {} IS COMPLETE result = {} size = {}", Thread.currentThread().getName(), strResult, strResult
.getBytes(Charset.forName("utf-8")).length);
return strResult;
}
}
FileThreadVO
package cn.spring.ssm.web.model;
import lombok.Data;
import lombok.experimental.Accessors;
import java.io.InputStream;
/**
* Created with IntelliJ IDEA.
* Package: cn.spring.ssm.web.model
* User: 25414
* Date: 2019/11/14
* Time: 16:45
* Description:多线程读取文件实体类
*/
@Data
@Accessors(chain = true)
public class FileThreadVO {
private InputStream is;
private Integer start_line;
private Integer end_line;
private String result;
}
存在的问题
1:若文件行数小于线程数会造成线程浪费 ,适用于多行的数据报文
2:文件分段的方式,目前的方案是根据文件行数,若改成按字节分段更合理;但按字节分段会存在中文字节占比的问题,若第50个字节是中文,它占用的字节是2,这样读出来是乱码