JAVA实现多线程分段读取文件

1 篇文章 0 订阅
1 篇文章 0 订阅

大致思路:

     1:读取文件行数

     2:为每个线程分配读取行数

       分配算法:略平均分配

       

线程数文件总行数线程分配的行数
210thread1 = 5,thread2 = 5
29thread1 = 4,thread2 = 5
310thread1 = 3,thread2 = 3,thread3 = 4

     

     3:启动线程读取文件

     4:合并文件内容

     5:校验文件完整性

 核心文件已贴出,UploadService、ReadFileThread,欢迎大家交流讨论

UploadService

package cn.spring.ssm.service.impl;

import cn.spring.ssm.job.ReadFileThread;
import cn.spring.ssm.model.FileThreadVO;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang.StringUtils;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor;
import org.springframework.stereotype.Service;
import org.springframework.web.multipart.MultipartFile;

import javax.annotation.Resource;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.List;

/**
 * Created with IntelliJ IDEA.
 * Package: cn.spring.ssm.service.impl
 * User: 25414
 * Date: 2019/11/14
 * Time: 16:10
 * Description:
 */
@Service
@Slf4j
public class UploadService {

    @Value("${file.thread.num}")
    private Integer threadNum; //线程数

    @Resource(name = "asyncServiceExecutor")
    private ThreadPoolTaskExecutor executor;  //线程池

    /**
     * 启用多个线程分段读取文件
     * <p>
     * PS:若文件行数小于线程数会造成线程浪费
     * 适用于读取一行一行的数据报文
     *
     * @return
     */
    public String uploadByThread(MultipartFile file) throws Exception {
        if (file.isEmpty()) {
            return null;
        }
        InputStream is = file.getInputStream();
        List<FileThreadVO> threadVOS = new ArrayList<>(threadNum); //自定义线程实体对象
        //为线程分配读取行数
        Integer lines = getLineNum(is);     //文件总行数
        Integer line;                       //每个线程分配行数
        Integer start_line;                 //线程读取文件开始行数
        Integer end_line;                   //线程读取文件结束行数
        StringBuffer data = new StringBuffer();

        //根据文件行数和线程数计算分配的行数,这里有点繁琐了,待优化
        if (lines < threadNum) {
            for (int i = 1; i <= lines; i++) {
                FileThreadVO fileThreadVO = new FileThreadVO();
                start_line = end_line = i;
                InputStream stream = file.getInputStream();

                ReadFileThread readFileThread = new ReadFileThread(start_line, end_line, stream);
                fileThreadVO.setStart_line(start_line);
                fileThreadVO.setIs(stream);
                fileThreadVO.setEnd_line(end_line);
                fileThreadVO.setResult(executor.submit(readFileThread).get());
                threadVOS.add(fileThreadVO);
            }
        } else {
            for (int i = 1, tempLine = 0; i <= threadNum; i++, tempLine = ++end_line) {
                InputStream stream = file.getInputStream();
                FileThreadVO fileThreadVO = new FileThreadVO();
                Integer var1 = lines / threadNum;
                Integer var2 = lines % threadNum;
                line = (i == threadNum) ? (var2 == 0 ? var1 : var1 + var2) : var1;
                start_line = (i == 1) ? 1 : tempLine;
                end_line = (i == threadNum) ? lines : start_line + line - 1;

                ReadFileThread readFileThread = new ReadFileThread(start_line, end_line, stream);
                fileThreadVO.setStart_line(start_line);
                fileThreadVO.setIs(stream);
                fileThreadVO.setEnd_line(end_line);
                fileThreadVO.setResult(executor.submit(readFileThread).get());
                threadVOS.add(fileThreadVO);
            }
        }
        threadVOS.forEach(record -> data.append(record.getResult()).append("\r\n"));
        String mergeStr = data.toString().trim();
        boolean isComplete = isComplete(file, mergeStr);
        if (!isComplete) {
            log.error("###uploadByThread### 文件完整性校验失败!");
            throw new Exception("The file is incomplete!");
        } else {
            return mergeStr;
        }
    }

    /**
     * 获取文件行数
     *
     * @param is
     * @return
     * @throws IOException
     */
    public int getLineNum(InputStream is) throws IOException {
        int line = 0;
        BufferedReader reader = new BufferedReader(new InputStreamReader(is));
        while (reader.readLine() != null) {
            line++;
        }
        reader.close();
        is.close();
        return line;
    }

    /**
     * 校验文件完整性
     *
     * @param file
     * @param data
     * @return
     */
    public boolean isComplete(MultipartFile file, String data) throws IOException {
        long originSize = file.getBytes().length;
        long resultSize = data.getBytes(Charset.forName("utf-8")).length;

        return StringUtils.equals(String.valueOf(originSize), String.valueOf(resultSize));
    }

}

ReadFileThread 

package cn.spring.ssm.job;

import lombok.extern.slf4j.Slf4j;

import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.Charset;
import java.util.concurrent.Callable;

/**
 * Created with IntelliJ IDEA.
 * Package: cn.spring.ssm.job
 * User: 25414
 * Date: 2019/11/14
 * Time: 8:51
 * Description:分段读取文件
 */
@Slf4j
public class ReadFileThread implements Callable<String> {

    private Integer start_index;    //文件开始读取行数
    private Integer end_index;      //文件结束读取行数
    private InputStream is;         //输入流

    public ReadFileThread(int start_index, int end_index, InputStream is) {
        this.start_index = start_index;
        this.end_index = end_index;
        this.is = is;
    }

    /**
     * Computes a result, or throws an exception if unable to do so.
     *
     * @return computed result
     * @throws Exception if unable to compute a result
     */
    @Override
    public String call() throws Exception {
        StringBuilder result = new StringBuilder();
        BufferedReader reader = new BufferedReader(new InputStreamReader(is, "utf-8"));
        int loc = 1;
        while (loc < start_index) {
            reader.readLine();
            loc++;
        }

        while (loc < end_index) {
            result.append(reader.readLine()).append("\r\n");
            loc++;
        }
        result.append(reader.readLine());
        String strResult = result.toString();
        reader.close();
        is.close();
        log.info("###ReadFileThread###FILE {} IS COMPLETE result = {} size = {}", Thread.currentThread().getName(), strResult, strResult
                .getBytes(Charset.forName("utf-8")).length);
        return strResult;
    }
}

FileThreadVO  

package cn.spring.ssm.web.model;

import lombok.Data;
import lombok.experimental.Accessors;

import java.io.InputStream;

/**
 * Created with IntelliJ IDEA.
 * Package: cn.spring.ssm.web.model
 * User: 25414
 * Date: 2019/11/14
 * Time: 16:45
 * Description:多线程读取文件实体类
 */
@Data
@Accessors(chain = true)
public class FileThreadVO {
    private InputStream is;
    private Integer start_line;
    private Integer end_line;
    private String result;
}

存在的问题

1:若文件行数小于线程数会造成线程浪费 ,适用于多行的数据报文

2:文件分段的方式,目前的方案是根据文件行数,若改成按字节分段更合理;但按字节分段会存在中文字节占比的问题,若第50个字节是中文,它占用的字节是2,这样读出来是乱码

  • 1
    点赞
  • 16
    收藏
    觉得还不错? 一键收藏
  • 5
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 5
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值