线程池的使用：批量导入、数据汇总、异步保存搜索记录

最新推荐文章于 2024-08-06 22:01:00 发布

-代号9527

最新推荐文章于 2024-08-06 22:01:00 发布

阅读量1.2k

点赞数 34

分类专栏： JUC JavaDev 文章标签： java pool

年轻人不要Copy------CSDN@-代号9527

本文链接：https://blog.csdn.net/llg___/article/details/139626919

版权

JavaDev 同时被 2 个专栏收录

43 篇文章 12 订阅

订阅专栏

JUC

33 篇文章 3 订阅

订阅专栏

文章目录

1、场景一：MySQL批量导入数据到ES
2、场景二：数据汇总
- 2.1 流程图
- 2.2 代码实现
3、场景三：异步调用
- 3.1 需求
- 3.2 代码实现

1、场景一：MySQL批量导入数据到ES

场景： 需要将库里的1000万左右的数据量，导入到ES索引库中

实现思路： 分批处理，防止OOM，使用线程池 + CountDownLatch（控制线程协作，等所有线程都干完活儿后，才能继续往下走

在这里插入图片描述

1.1 CountDownLatch

将MySQL的数据按总量分批，一批批的提交到线程池去处理
在这里插入图片描述
提交完后，可以选择直接返回，不去等进入线程池阻塞队列的任务都执行完。也可以使用CountDownLatch，线程池的线程每处理一个提交的任务，就让CountDownLatch减一（CountDownLatch初始化等待计数等于批次数），阻塞请求过来的那个线程，直到CountDownLatch减为0，最后统计下总耗时，返回给前端

1.2 流程图

在这里插入图片描述

调用ES同步接口 ⇒ 查库分批 ⇒ 提交任务到线程池 ⇒ CountDownLatch阻塞当前http请求对应的线程 ⇒ CountDownLatch计数减为0后，http请求对应的线程继续执行，统计总耗时

1.3 代码实现

Service层：

public interface ApArticleService {

    /**
     * 批量导入
     */
    public void importAll();
}

Service层实现：

import com.alibaba.fastjson.JSON;
import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.common.xcontent.XContentType;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;

import java.util.List;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutorService;

@Service
@Transactional
@Slf4j
public class ApArticleServiceImpl implements ApArticleService {

	//数据层对象
    @Autowired
    private ApArticleMapper apArticleMapper;

	//ES客户端操作对象，自定义的Bean
    @Autowired
    private RestHighLevelClient client;

	//线程池对象，自定义的Bean
    @Autowired
    private ExecutorService executorService;

    private static final String ARTICLE_ES_INDEX = "app_info_article";

	//每批2000条
    private static final int PAGE_SIZE = 2000;

    /**
     * 批量导入
     */
    @SneakyThrows
    @Override
    public void importAll() {

        //总条数
        int count = apArticleMapper.selectCount();
        //总页数
        int totalPageSize = count % PAGE_SIZE == 0 ? count / PAGE_SIZE : count / PAGE_SIZE + 1;
        //开始执行时间
        long startTime = System.currentTimeMillis();
        //一共有多少页，就创建多少个CountDownLatch的计数
        CountDownLatch countDownLatch = new CountDownLatch(totalPageSize);

        int fromIndex;
        List<SearchArticleVo> articleList = null;

        for (int i = 0; i < totalPageSize; i++) {
            //起始分页条数
            fromIndex = i * PAGE_SIZE;
            //查询文章
            articleList = apArticleMapper.loadArticleList(fromIndex, PAGE_SIZE);
            //创建线程，做批量插入es数据操作，TaskThread实现Runnable，定义了写入ES的逻辑
            TaskThread taskThread = new TaskThread(articleList, countDownLatch);
            //提交任务给线程池中的线程执行
            executorService.execute(taskThread);
        }

        //调用await()方法,用来等待计数归零
        countDownLatch.await();

        long endTime = System.currentTimeMillis();
        log.info("es索引数据批量导入共:{}条,共消耗时间:{}秒", count, (endTime - startTime) / 1000);
    }
	
	//任务，遍历每一批里的一条条数据，用BulkRequest批量导入ES
    class TaskThread implements Runnable {

        List<SearchArticleVo> articleList;
        CountDownLatch cdl;

        public TaskThread(List<SearchArticleVo> articleList, CountDownLatch cdl) {
            this.articleList = articleList;
            this.cdl = cdl;
        }

        @SneakyThrows
        @Override
        public void run() {
            //批量导入
            BulkRequest bulkRequest = new BulkRequest(ARTICLE_ES_INDEX);

            for (SearchArticleVo searchArticleVo : articleList) {
                bulkRequest.add(new IndexRequest().id(searchArticleVo.getId().toString())
                        .source(JSON.toJSONString(searchArticleVo), XContentType.JSON));
            }
            //发送请求，批量添加数据到es索引库中
            client.bulk(bulkRequest, RequestOptions.DEFAULT);

            //让计数减一
            cdl.countDown();
        }
    }
}

ES客户端的Bean定义：

import lombok.Getter;
import lombok.Setter;
import org.apache.http.HttpHost;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;

@Getter
@Setter
@Configuration
@ConfigurationProperties(prefix = "elasticsearch")
public class ElasticSearchConfig {
    private String host;
    private int port;

    @Bean
    public RestHighLevelClient client(){
        return new RestHighLevelClient(RestClient.builder(
                new HttpHost(
                        host,
                        port,
                        "http"
                )
        ));
    }
}

线程池的Bean定义：

import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;

@Configuration
public class ThreadPoolConfig {

    /**
     * 核心线程池大小
     */
    private static final int CORE_POOL_SIZE = 17;

    /**
     * 最大可创建的线程数
     */
    private static final int MAX_POOL_SIZE = 50;

    /**
     * 队列最大长度
     */
    private static final int QUEUE_CAPACITY = 1000;

    /**
     * 线程池维护线程所允许的空闲时间
     */
    private static final int KEEP_ALIVE_SECONDS = 500;

    @Bean("taskExecutor")
    public ExecutorService executorService(){
        AtomicInteger c = new AtomicInteger(1);
        LinkedBlockingQueue<Runnable> queue = new LinkedBlockingQueue<Runnable>(QUEUE_CAPACITY);
        return new ThreadPoolExecutor(
                CORE_POOL_SIZE,
                MAX_POOL_SIZE,
                KEEP_ALIVE_SECONDS,
                TimeUnit.MILLISECONDS,
                queue,
                r -> new Thread(r, "sync-es-pool-" + c.getAndIncrement()),
                new ThreadPoolExecutor.DiscardPolicy()
        );
    }
}

1.4 效果

@SpringBootTest(classes = CDLApplication.class)
@RunWith(SpringRunner.class)
public class ApArticleServiceImplTest {

    @Autowired
    private ApArticleService apArticleService;

    @Test
    public void importAll() {
        apArticleService.importAll();
    }
}

在这里插入图片描述

2、场景二：数据汇总

2.1 流程图

在一个电商网站中，用户下单之后，需要查询数据，数据包含了三部分：订单信息、包含的商品、物流信息。但现在这三个信息各自在其他三个微服务中，如果用一个线程分别去查，再汇总，则耗时为 Time a + Time b + Time c，如下，耗时1800ms

在这里插入图片描述

可优化为：改为三个查询并行处理，总耗时800ms

在这里插入图片描述

2.2 代码实现

模拟下订单信息、包含的商品、物流信息的接口：

// 商品信息，sleep 800秒
@RestController
@RequestMapping("/product")
public class ProductController {


    @SneakyThrows
    @GetMapping("/get/{id}")
    public Map<String, Object> product(@PathVariable int id)  {
        HashMap<String, Object> map = new HashMap<>();
        if (id == 1) {
            map.put("name", "小爱音箱");
            map.put("price", 300);
        } else if (id == 2) {
            map.put("name", "小米手机");
            map.put("price", 2000);
        }
        map.put("id", id);
        Thread.sleep(800);
        return map;
    }
}

//订单信息，sleep 500秒
@RestController
@RequestMapping("/order")
public class OrderController {


    @SneakyThrows
    @GetMapping("/get/{id}")
    public Map<String, Object> order(@PathVariable int id) {
        HashMap<String, Object> map = new HashMap<>();
        map.put("id", id);
        map.put("total", "2300.00");
        Thread.sleep(500);
        return map;
    }
}

//快递信息，sleep 500秒
@RestController
@RequestMapping("/logistics")
public class LogisticsController {

    @SneakyThrows
    @GetMapping("/get/{id}")
    public Map<String, Object> logistics(@PathVariable int id) {
        HashMap<String, Object> map = new HashMap<>();
        map.put("id", id);
        map.put("name", "中通快递");
        Thread.sleep(500);
        return map;
    }
}

性能低的实现：一个线程里分别做三次远程调用

@SneakyThrows
@GetMapping("/get/detail/{id}")
public Map<String, Object> getOrderDetail() {

    long startTime = System.currentTimeMillis();

    Map<String, Object> order = restTemplate.getForObject("http://localhost:9991/order/get/{id}", Map.class, 1);

    Map<String, Object> product = restTemplate.getForObject("http://localhost:9991/product/get/{id}", Map.class, 1);

    Map<String, Object> logistics = restTemplate.getForObject("http://localhost:9991/logistics/get/{id}", Map.class, 1);

    long endTime = System.currentTimeMillis();



    Map<String, Object> resultMap = new HashMap<>();
    resultMap.put("order", order);
    resultMap.put("product", product);
    resultMap.put("logistics", logistics);

    log.info("接口调用共耗时:{}毫秒",endTime-startTime);
    return resultMap;
}

耗时与计算的近似：

在这里插入图片描述

优化：提交三个远程调用的任务，并行去查，使用Future去get获取线程执行的结果

public class OrderDetailController {

    //@Autowired
   //private RestTemplate restTemplate;

    @Autowired
    private ExecutorService executorService;


    @SneakyThrows
    @GetMapping("/get/detail_new/{id}")
    public Map<String, Object> getOrderDetailNew() {

        long startTime = System.currentTimeMillis();

        Future<Map<String, Object>> f1 = executorService.submit(() -> {
            Map<String, Object> r =
                    restTemplate.getForObject("http://localhost:9991/order/get/{id}", Map.class, 1);
            return r;
        });
        Future<Map<String, Object>> f2 = executorService.submit(() -> {
            Map<String, Object> r =
                    restTemplate.getForObject("http://localhost:9991/product/get/{id}", Map.class, 1);
            return r;
        });

        Future<Map<String, Object>> f3 = executorService.submit(() -> {
            Map<String, Object> r =
                    restTemplate.getForObject("http://localhost:9991/logistics/get/{id}", Map.class, 1);
            return r;
        });


        Map<String, Object> resultMap = new HashMap<>();
        resultMap.put("order", f1.get());
        resultMap.put("product", f2.get());
        resultMap.put("logistics", f3.get());

        long endTime = System.currentTimeMillis();

        log.info("接口调用共耗时:{}毫秒",endTime-startTime);
        return resultMap;
    }

耗时约800ms：

在这里插入图片描述

汇总数据时，如果所调用的接口之间没有依赖关系，则可使用线程池 + future 来优化性能，如报表汇总 。近期刚好做了一个资产全景的接口

在这里插入图片描述

3、场景三：异步调用

3.1 需求

搜索时，保存历史搜索记录。用户搜索一个keyword时，存历史搜索记录，不能影响到查询的性能，用异步去处理，将存历史记录的任务提交到线程池

在这里插入图片描述

3.2 代码实现

Controller层：

@RestController
@RequestMapping("/api/v1/article")
public class ArticleSearchController {

    @Autowired
    private ArticleSearchService articleSearchService;

    @GetMapping("/search")
    public List<Map> search(String keyword){
        return articleSearchService.search(keyword);
    }
}

Service层：注意调用 insert(userId,keyword) 异步写入：

import lombok.extern.slf4j.Slf4j;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.Operator;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.sort.SortOrder;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;

@Service
@Slf4j
public class ArticleSearchServiceImpl implements ArticleSearchService {

    @Autowired
    private RestHighLevelClient client;

    private static final String ARTICLE_ES_INDEX = "app_info_article";

    private int userId = 1102;

    @Autowired
    private ApUserSearchService apUserSearchService;

    /**
     * 文章搜索
     * @return
     */
    @Override
    public List<Map> search(String keyword) {

        try {
            SearchRequest request = new SearchRequest(ARTICLE_ES_INDEX);

            //设置查询条件
            BoolQueryBuilder boolQuery = QueryBuilders.boolQuery();
            //第一个条件
            if(null == keyword || "".equals(keyword)){
                request.source().query(QueryBuilders.matchAllQuery());
            }else {
                request.source().query(QueryBuilders.queryStringQuery(keyword).field("title").defaultOperator(Operator.OR));
                //保存搜索历史，异步
                apUserSearchService.insert(userId,keyword);
            }
            //分页
            request.source().from(0);
            request.source().size(20);

            //按照时间倒序排序
            request.source().sort("publishTime", SortOrder.DESC);
            //搜索
            SearchResponse response = client.search(request, RequestOptions.DEFAULT);

            //解析结果
            SearchHits searchHits = response.getHits();
            //获取具体文档数据
            SearchHit[] hits = searchHits.getHits();
            List<Map> resultList = new ArrayList<>();
            for (SearchHit hit : hits) {
                //文档数据
                Map map = JSON.parseObject(hit.getSourceAsString(), Map.class);
                resultList.add(map);
            }
            return resultList;

        } catch (IOException e) {
            throw new RuntimeException("搜索失败");
        }

    }
}

@Async异步写入用户的搜索记录（注意启动类加@EnableAsync）

@Service
@Slf4j
public class ApUserSearchServiceImpl implements ApUserSearchService {


    /**
     * 保存搜索历史记录
     * @param userId
     * @param keyword
     */
    @Async("taskExecutor")	//taskExecutor是前面场景一中自己定义的线程池的Bean
    @Override
    public void insert(Integer userId, String keyword) {

        //保存用户记录  mongodb或mysql
        //执行业务

        log.info("用户搜索记录保存成功,用户id:{},关键字:{}",userId,keyword);

    }
}