多线程读单线程写文件工具类ReadWriteTaskUtil

背景

经常有需求读取一个文件用多线程处理,将处理结果保存到另外一个文件,如果等待全部处理完成再保存容易内存溢出,所以写了一个工具类用单线程写入文件。
使用方法

        String inputPath = "/data/test.txt";
        String outputPath = "/data/out.txt";
        int valid = ReadWriteTaskUtil.startCompute(inputPath, outputPath, 30,
                line -> Arrays.asList(line), line -> Arrays.asList(String.format("%s\n", line)));
        System.out.println(valid);

ReadWriteTaskUtil代码如下:

import lombok.AllArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileInputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.Arrays;
import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Function;

/**
 * 多线程处理数据,单线程写文件
 * @author angryfun
 * @date 2021/10/9
 */
@Slf4j
public class ReadWriteTaskUtil {

    /**
     * 多线程处理单线程写入文件
     * @param inputPath 本地路径源文件路径
     * @param outputPath 本地文件输出路径
     * @param threadNum 启动多少个线程处理数据
     * @param computeFunction 计算函数,入参是行字符串,出参是往队列添加的List<Object>
     * @param writeFunction 写函数,入参是队列返回的结果Object,出参是写入文件的List<String>
     * @param <T>
     * @return 写入的行数
     */
    public static <T> int startCompute(String inputPath, String outputPath, int threadNum, Function<String, List<T>> computeFunction, Function<T, List<String>> writeFunction) {
        try (FileInputStream inputStream = new FileInputStream(inputPath)) {
            return startCompute(inputStream, outputPath, threadNum, computeFunction, writeFunction);
        } catch (Exception ex) {
            log.error("computeError", ex);
        }
        return -1;
    }

    /**
     * 多线程处理单线程写入文件
     * @param is 输入文件流
     * @param outputPath 本地文件输出路径
     * @param threadNum 启动多少个线程处理数据
     * @param computeFunction 计算函数,入参是行字符串,出参是往队列添加的List<Object>
     * @param writeFunction 写函数,入参是队列返回的结果Object,出参是写入文件的List<String>
     * @param <T>
     * @return 写入的行数
     */
    public static <T> int startCompute(InputStream is, String outputPath, int threadNum, Function<String, List<T>> computeFunction, Function<T, List<String>> writeFunction) {
        LinkedBlockingQueue<T> queue = new LinkedBlockingQueue<>(10000);
        ExecutorService writeExecutor = ThreadUtil.getBlockThreadPoolExecutor(1, "MutliReadSingleWriteTaskWriteThread");
        writeExecutor.execute(new FileWrite(outputPath, queue, writeFunction));

        ExecutorService executorService = ThreadUtil.getBlockThreadPoolExecutor(threadNum, "MutliReadSingleWriteTasKReadThread");
        int total = 0;
        AtomicInteger valid = new AtomicInteger();
        try (
                Reader r = new InputStreamReader(is);
                BufferedReader reader = new BufferedReader(r)
        ) {
            String line;
            while ((line = reader.readLine()) != null) {
                if (StringUtils.isBlank(line)) {
                    continue;
                }
                executorService.execute(new Compute(line, valid, queue, computeFunction));
                total += 1;
                if (total % 1000 == 0) {
                    log.info(String.format("read %d lines, valid %d", total, valid));
                }
            }
        } catch (IOException e) {
            log.error(e.getMessage(), e);
        }

        try {
            executorService.shutdown();
            executorService.awaitTermination(1, TimeUnit.DAYS);
        } catch (Exception e) {
            log.error(e.getMessage(), e);
        }

        try {
            writeExecutor.shutdownNow();
            executorService.awaitTermination(1, TimeUnit.DAYS);
        } catch (Exception e) {
            log.error(e.getMessage(), e);
        }
        return valid.get();
    }

    @AllArgsConstructor
    private static class Compute<T> implements Runnable {
        private String line;
        private AtomicInteger valid;
        private LinkedBlockingQueue<T> queue;
        private Function<String, List<T>> function;

        @Override
        public void run() {
            try {
                List<T> result = function.apply(line);
                for (T t : result) {
                    queue.put(t);
                }
                valid.addAndGet(result.size());
            } catch (Exception e) {
                log.error(e.getMessage(), e);
            }
        }
    }

    @AllArgsConstructor
    private static class FileWrite<T> implements Runnable {
        private String path;
        private LinkedBlockingQueue<T> queue;
        private Function<T, List<String>> function;

        @Override
        public void run() {
            try (BufferedWriter writer = new BufferedWriter(new FileWriter(path))) {
                log.info("write result");
                T result = null;
                boolean interrrupt = false;
                while (true) {
                    try {
                        result = queue.poll(10, TimeUnit.SECONDS);
                        if (null == result && interrrupt) {
                            break;
                        }
                        if (null == result) {
                            continue;
                        }
                        List<String> resultList = function.apply(result);
                        for (String str : resultList) {
                            writer.write(str);
                        }
                    } catch (Exception e) {
                        if (e instanceof InterruptedException) {
                            interrrupt = true;
                        }
                        log.error(e.getMessage(), e);
                    }
                }
            } catch (IOException e) {
                log.error(e.getMessage(), e);
            }
        }
    }

    public static void main(String[] args) {
        String inputPath = "/data/test.txt";
        String outputPath = "/data/out.txt";
        int valid = startCompute(inputPath, outputPath, 30,
                line -> Arrays.asList(line), line -> Arrays.asList(String.format("%s\n", line)));
        System.out.println(valid);
    }
}

ThreadUtil代码如下

import com.google.common.util.concurrent.ThreadFactoryBuilder;
import lombok.extern.slf4j.Slf4j;

import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.RejectedExecutionHandler;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;


/**
 * @author angryfun
 * @date 2021/4/15
 */
@Slf4j
public class ThreadUtil {
    public static ExecutorService getBlockThreadPoolExecutor(int poolSize, String threadName) {
        ThreadFactory threadFactory = new ThreadFactoryBuilder().
                setNameFormat(threadName + "-thread-%d")
                .setUncaughtExceptionHandler(new ThreadUtil.DefaultUncaughtExceptionHandler())
                .build();
        return new ThreadPoolExecutor(poolSize, poolSize, 0,
                TimeUnit.MINUTES, new ArrayBlockingQueue<>(50),
                threadFactory, new ThreadUtil.BlockHandler());
    }

    static class DefaultUncaughtExceptionHandler implements Thread.UncaughtExceptionHandler {
        @Override
        public void uncaughtException(Thread t, Throwable e) {
            log.error("uncaught exception occurred in {}", t.getName(), e);
        }
    }

    static class BlockHandler implements RejectedExecutionHandler {
        @Override
        public void rejectedExecution(Runnable r, ThreadPoolExecutor executor) {
            if (!executor.isShutdown()) {
                try {
                    //put阻塞方法
                    executor.getQueue().put(r);
                } catch (Exception e) {
                    log.error("线程中断", e);
                }
            }
        }
    }
}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值