异步入库es方法的编写

Survive by day and develop by night.
talk for import biz , show your perfect code,full busy,skip hardness,make a better result,wait for change,challenge Survive.
happy for hardess to solve denpendies.

目录

在这里插入图片描述

概述

网络爬虫的是一个非常常见的需求。

需求:

设计思路

实现思路分析

1.加上异步注解

2.控制层调用

下载器我们可以使用建立HTTP请求把界面的URL元素下载下来。实质就是
下载器。

3.注入Spring的bean

爬虫调度器就是可以利用多线程机制,进行调度似的更快的进行网页爬取。

4.常见代码:

package com.georsoft.search.controller.provider;


import com.alibaba.fastjson.JSONArray;
import com.georsoft.common.core.utils.DateUtils;

import com.georsoft.search.es.entity.ArcDocument;
import com.georsoft.search.index.convert.ArcDocCreateConvert;
import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;

import org.apache.http.HttpHost;
import org.apache.http.auth.AuthScope;
import org.apache.http.auth.UsernamePasswordCredentials;
import org.apache.http.client.CredentialsProvider;
import org.apache.http.impl.client.BasicCredentialsProvider;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.support.WriteRequest;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestClientBuilder;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.common.xcontent.XContentType;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.scheduling.annotation.Async;
import org.springframework.stereotype.Component;

import java.io.IOException;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

@Component
@Slf4j
public class AsyncTaskEs {

    @Autowired
    private ArcDocCreateConvert arcDocumentConvert;

    @SneakyThrows
    @Async
    public void doTask1(List<Map<String, Object>> addEsDataMapList) {
        long t1 = System.currentTimeMillis();
        addElasticsearchData(addEsDataMapList);
        long t2 = System.currentTimeMillis();
        log.info("task1 cost {} ms" , t2-t1);

    }


    @Value("${spring.elasticsearch.rest.nodes:10.100.13.105}")
    private String host;
    @Value("${spring.elasticsearch.rest.port:8083}")
    private String port;
    @Value("${spring.elasticsearch.rest.indexName:test}")
    private String arc_document;//索引名称

    /**
     * 创建连接 高级客户端
     */
    public RestHighLevelClient restHighLevelClient() {
//        RestHighLevelClient client = new RestHighLevelClient(
//                RestClient.builder(new HttpHost(host, Integer.parseInt(port), "http")));
        HttpHost host = new HttpHost("10.100.13.105",  8083, HttpHost.DEFAULT_SCHEME_NAME);

        RestClientBuilder builder = RestClient.builder(host);

        CredentialsProvider credentialsProvider = new BasicCredentialsProvider();
        credentialsProvider.setCredentials(AuthScope.ANY, new UsernamePasswordCredentials("es", "111111"));
        builder.setHttpClientConfigCallback(f -> f.setDefaultCredentialsProvider(credentialsProvider));
        RestHighLevelClient client = new RestHighLevelClient(builder);
        return  client;
    }



        /**
         * Elasticsearch数据导入
         */
        public void addElasticsearchData(List<Map<String, Object>> addEsDataMapList) {
            //获取连接
            RestHighLevelClient client = restHighLevelClient();
            try {
                //创建请求
                BulkRequest bulkRequest = new BulkRequest();
                //创建index请求 千万注意,这个写在循环外侧,否则UDP协议会有丢数据的情况,看运气
                IndexRequest requestData = null;
                Map<Object,Object> dataMap=new HashMap<>();
                for (Map<String, Object> addEsDataMap : addEsDataMapList) {//添加数据
                        for (Map.Entry<String, Object> entry : addEsDataMap.entrySet()) {
                            dataMap.put(lineToHump(entry.getKey()),  entry.getValue());
                            dataMap.put("description","描述");
                            dataMap.put("year",2008);
                            dataMap.put("trxId","交易ID");
                            dataMap.put("contractNo","12332131");
                            dataMap.put("deadline",12332L);
                        }

                    ArcDocument arcDocument =arcDocumentConvert.convert(dataMap);

                    requestData=new IndexRequest(arc_document, "_doc", dataMap.get("arcId").toString()).source(arcDocument, XContentType.JSON);
                    bulkRequest.add(requestData);
                }
                log.info("es同步数据数量:{}", bulkRequest.numberOfActions());
                //设置索引刷新规则
                bulkRequest.setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE);
                //分批次提交,数量控制
                if (bulkRequest.numberOfActions() >= 1) {
    //                BulkResponse bulkResponse = client.bulk(bulkRequest, RequestOptions.DEFAULT);
    //                log.info("es同步数据结果:{}", bulkResponse.hasFailures());

                    BulkResponse bulkResponse = client.bulk(bulkRequest, RequestOptions.DEFAULT);
                    if(bulkResponse.hasFailures()){
                        log.info("数据写入失败:{}",bulkResponse.buildFailureMessage());
                    }else {
                        log.info("实时消息es写入成功");

                    }
                }
            } catch (Exception e) {
                e.printStackTrace();
                log.error("es同步数据执行失败:{}", addEsDataMapList);
            } finally {
                try {
                    client.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }


    public static String lineToHump(String str) {
        str = str.toLowerCase();
        Pattern linePattern = Pattern.compile("_(\\w)");
        Matcher matcher = linePattern.matcher(str);
        StringBuffer sb = new StringBuffer();
        while (matcher.find()) {
            matcher.appendReplacement(sb, matcher.group(1).toUpperCase());
        }
        matcher.appendTail(sb);
        return sb.toString();
    }


}

性能参数测试:

参考资料和推荐阅读

  1. 暂无

欢迎阅读,各位老铁,如果对你有帮助,点个赞加个关注呗!~

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

执于代码

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值