JAVA技巧---多线程容器式处理数据——主线程开启多线程处理数据并等待

最新推荐文章于 2025-03-11 21:00:00 发布

我是何

最新推荐文章于 2025-03-11 21:00:00 发布

阅读量1k

点赞数

分类专栏： Java常见细节问题 Java 文章标签：多线程处理数据容器式处理数据 BlockingQueue 线程等待蓝何忠

本文链接：https://blog.csdn.net/lanhezhong/article/details/83417969

版权

Java 同时被 2 个专栏收录

18 篇文章

订阅专栏

Java常见细节问题

2 篇文章

订阅专栏

假如你无意中有幸看到这篇文章，那么恭喜你，你将注定与别人不一样。

我将介绍如何用一段简单的代码，实现指定数量的多线程共同处理一件可分割的事情，并进行线程等待。或者说实现一个容器，这个容器里有8个线程，这8个线程共同处理一份较大的数据，并且主线程会等待所有数据执行完再往下执行。

我估计上面的话你应该看不明白，只怪我没学好语文，语文老师要是知道我语文这么烂，不知道还认不认我这个学生。不过不要紧，下面的你一定能明白：

问题：

假设有一份较大的数据，然后由于某种原因，这份数据只能一小段一小段的处理，然后我们考虑用多线程来处理这份数据。设想是这样的：

方法：

处理这个问题，我用了一个BlockingQueue，简单解释这个队列的2个方法：

void put(E e) throws InterruptedException

该方法将元素设置到队列中，如果队列中没有多余的空间，该方法会一直阻塞，直到队列中有多余的空间

E take() throws InterruptedException

该方法从队列中获取值，如果队列中没有值，线程会一直阻塞，直到队列中有值，并且该方法取得了该值。

假设要处理的10万个数据已经放在一个list中。

看下面的ThreadEating.java

package com.lan.Test_Project;

import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;

public class ThreadEating {
	//缓存线程池
	private static ExecutorService cachedThreadPool = Executors.newCachedThreadPool();
	
	//主方法测试
	public static void main(String[] args) {
		List<String> list = new ArrayList<String>();
		for(int i=0; i<100000; i++){
			list.add("data-"+i);
		}
		new ThreadEating().doHandelListUsingThread(list, 8, 100);
	}
	
	//这个方法是真正的处理数据的方法，我写个简单的打印
	private void doHandelList(List list){
		for(Object s:list){
			System.out.println(s);
		}
	}
	
	//关键方法，分割数据，然后用多线程同时处理
	public void doHandelListUsingThread(List list, int threadCount, int handelSize){
		if(list.size()<=handelSize || threadCount<=1){
			doHandelList(list);
			return;
		}
		final BlockingQueue<String> queue = new ArrayBlockingQueue<String>(threadCount);
		for(int i=0; i<list.size(); i+=handelSize){
			final List subList;
			if(i+handelSize<list.size()){
				subList = list.subList(i, i+handelSize);
			}else{
				subList = list.subList(i, list.size());
			}
			
			try {
				//**1--关键代码。该方法将元素设置到队列中，如果队列中没有多余的空间，该方法会一直阻塞，直到队列中有多余的空间**//
				queue.put("");
			} catch (InterruptedException e) {
				e.printStackTrace();
			}
			cachedThreadPool.execute(new Runnable() {
				public void run() {
					doHandelList(subList);
					try {
						//**2--关键代码。该方法从队列中取出元素**//
						queue.take();
					} catch (InterruptedException e) {
						e.printStackTrace();
					}
				}
			});
		}
		for(int i=0; i<threadCount; i++){
			try {
				//**3--关键代码。该方法将元素设置到队列中，如果队列中没有多余的空间，该方法会一直阻塞，直到队列中有多余的空间**//
				queue.put("");
			} catch (InterruptedException e) {
				e.printStackTrace();
			}
		}
	}
	
}

这个类主要方法是doHandelListUsingThread方法：

1、该方法把list一次次截取出长度为100的subList；

2、关键代码1，往BlockingQueue中添加一个元素，假如8个线程全满，则这里会进行阻塞等待；

3、关键代码2，线程池线程每处理完一段数据后，从BlockingQueue中取出元素；然后上一步的关键代码1又可以向线程池中添加数据任务；

4、关键代码3，这里是主线程等待线程池全部处理结束；当主线程已经完全把数据塞进去后，主线程会在这里阻塞等待，线程池中每处理完一段数据，这里就put方法就会执行一次，直到线程池全部处理结束，这里也刚好put结束，接着往后执行。

到这里代码和原理就已经讲解结束，再三考虑，难道我们每次写这样的功能都要来抄这段代码进行修改吗？假如处理的数据不是一个List，而是文件txt、excel、数据库、大文件系统等等，那是不是把代码写得不成样子了？

总结方便应用：

一、为了方便使用，我们把这个类抽象化，让它完成它该完成的任务，实际上要实现的功能由继承它的类来实现；

ThreadEating.java

package com.lan.Test_Project;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
/**
 * 继承此类，重写相应的方法，可实现多线程处理数据
 * @author LAN
 * @date 2018年11月6日
 */
public abstract class ThreadEating {
	
	private static ExecutorService cachedThreadPool = Executors.newCachedThreadPool();
	
	/**
	 * 使用该方法需重写doHandelList方法
	 * @author LAN
	 * @date 2018年11月6日
	 * @param list
	 * @param threadCount
	 * @param handelSize
	 */
	protected void doHandelListUsingThread(List list, int threadCount, int handelSize){
		if(list.size()<=handelSize || threadCount<=1){
			doHandelList(list);
			return;
		}
		final BlockingQueue<String> queue = new ArrayBlockingQueue<String>(threadCount);
		for(int i=0; i<list.size(); i+=handelSize){
			final List subList;
			if(i+handelSize<list.size()){
				subList = list.subList(i, i+handelSize);
			}else{
				subList = list.subList(i, list.size());
			}
			
			try {
				queue.put("");
			} catch (InterruptedException e) {
				e.printStackTrace();
			}
			cachedThreadPool.execute(new Runnable() {
				public void run() {
					doHandelList(subList);
					try {
						queue.take();
					} catch (InterruptedException e) {
						e.printStackTrace();
					}
				}
			});
		}
		for(int i=0; i<threadCount; i++){
			try {
				queue.put("");
			} catch (InterruptedException e) {
				e.printStackTrace();
			}
		}
	}
	
	/**
	 * 使用该方法需重写doHandelArray方法
	 * @author LAN
	 * @date 2018年11月6日
	 * @param objArray
	 * @param threadCount
	 * @param handelSize
	 */
	protected void doHandelArrayUsingThread(Object[] objArray, int threadCount, int handelSize){
		if(objArray.length<=handelSize || threadCount<=1){
			doHandelArray(objArray);
			return;
		}
		final BlockingQueue<String> queue = new ArrayBlockingQueue<String>(threadCount);
		for(int i=0; i<objArray.length; i+=handelSize){
			final Object[] subArray;
			if(i+handelSize<objArray.length){
				subArray = Arrays.copyOfRange(objArray, i, i+handelSize);
			}else{
				subArray = Arrays.copyOfRange(objArray, i, objArray.length);
			}
			
			try {
				queue.put("");
			} catch (InterruptedException e) {
				e.printStackTrace();
			}
			cachedThreadPool.execute(new Runnable() {
				public void run() {
					doHandelArray(subArray);
					try {
						queue.take();
					} catch (InterruptedException e) {
						e.printStackTrace();
					}
				}
			});
		}
		for(int i=0; i<threadCount; i++){
			try {
				queue.put("");
			} catch (InterruptedException e) {
				e.printStackTrace();
			}
		}
	}
	
	/**
	 * 使用该方法需重写readSource 和 doHandelList方法
	 * @author LAN
	 * @date 2018年11月6日
	 * @param source
	 * @param threadCount
	 * @param handelSize
	 */
	protected void doHandelSourceUsingThread(Object source, int threadCount, int handelSize){
		boolean hasNext = true;
		final BlockingQueue<String> queue = new ArrayBlockingQueue<String>(threadCount);
		int fromIndex=0;
		while (hasNext) {
			final List subList = new ArrayList();
			hasNext = readSource(source, subList, fromIndex, handelSize);
			fromIndex += handelSize;
			if(subList.size()<=0){
				break;
			}
			try {
				queue.put("");
			} catch (InterruptedException e) {
				e.printStackTrace();
			}
			cachedThreadPool.execute(new Runnable() {
				public void run() {
					doHandelList(subList);
					try {
						queue.take();
					} catch (InterruptedException e) {
						e.printStackTrace();
					}
				}
			});
		}
		for(int i=0; i<threadCount; i++){
			try {
				queue.put("");
			} catch (InterruptedException e) {
				e.printStackTrace();
			}
		}
	}
	
	//重写该方法实现所需的数据处理功能
	protected void doHandelList(List list){};
	
	//重写该方法实现所需的数据处理功能
	protected void doHandelArray(Object[] objArray){};
	
	//重写该方法实现从源读取数据，如从文件中、网络、硬件设备等读取数据
	protected boolean readSource(Object source, List intoList, int fromIndex, int size){
		return false;
	};
	
}

这个抽象类主要是三个方法

1、doHandelListUsingThread，这个方法分段多线程处理List数据，实际的处理数据逻辑只需重写doHandelList方法；

2、doHandelArrayUsingThread，这个方法分段多线程处理array数据，实际的处理数据逻辑只需重写doHandelArray方法；

3、doHandelSourceUsingThread，这个方法可实现从各种数据源自定义读取数据，并开启多线程分段交给doHandelList方法进行处理。使用该方法需重写readSource方法分段读取数据放到intoList中，并重写doHandelList实现实际的数据处理；

二、写一个类继承上面的抽象类，进行测试

MyEating.java

package com.lan.Test_Project;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

public class MyEating extends ThreadEating{

	@Override
	protected void doHandelList(List list) {
		//处理小段数据，根据需要实现所需功能
		for(Object obj:list){
			System.out.println(obj);
		}
	}
	
	@Override
	protected void doHandelArray(Object[] objArray) {
		//处理小段数据，根据需要实现所需功能
		for(Object obj:objArray){
			System.out.println(obj);
		}
	}
	
	//重写读取数据源方法，这里从txt文件读取
	@Override
	protected boolean readSource(Object source, List intoList, int fromIndex, int size) {
		BufferedReader br = (BufferedReader)source;
        String s = null;
        int i=0;
        try{
	        while((s = br.readLine())!=null){//使用readLine方法，一次读一行
	        	intoList.add(s);
	            i++;
	            if(i>=size){
	            	return true;
	            }
	        }
        }catch(IOException e){
        	e.printStackTrace();
        }
        return false;
	}
	
	//测试
	public static void main(String[] args) {
		//1、多线程分段处理list数据
		List<String> list = new ArrayList<String>();
		for(int i=0; i<1001; i++){
			list.add("data-"+i);
		}
		new MyEating().doHandelListUsingThread(list, 8, 100);
		
		//2、多线程分段处理array数据
		Object[] array = list.toArray();
		new MyEating().doHandelArrayUsingThread(array, 8, 99);
		
		//3、多线程分段处理从txt文件中读取数据
		File file = new File("D:\\测试字.txt");
        BufferedReader br;
		try {
			br = new BufferedReader(new FileReader(file));//构造一个BufferedReader类来读取文件
			new MyEating().doHandelSourceUsingThread(br, 8, 15);
			br.close();
		} catch (FileNotFoundException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
}

用main方法分别测试List、array数组、txt文件数据的处理，结果都非常不错。

总结：

上面这些技巧能用在哪些地方呢？我设计这个的初衷原来是用来比对数据的，需要把大量excel数据一条一条与数据库进行比对，这个比对过程是比较慢的，并且一次比对数据量太大，会导致系统卡死，而电脑恰好是8核的，于是我就考虑开固定的8线程做这个事情。其实可以用这个技巧来进行大数据的回归排序、生物基因比对、大数据分段存储、自定义数据搜索等等。

再回到最初的那句话：你将注定与别人不一样，为何呢？因为每一个人都注定与他人不一样。

author：蓝何忠

email：lanhezhong@163.com