MyBatis流式查询并同步数据到ES

最新推荐文章于 2024-08-21 09:46:38 发布

code monkey

最新推荐文章于 2024-08-21 09:46:38 发布

阅读量1.2k

点赞数

分类专栏： elasticsearch JAVA

原文链接：https://blog.csdn.net/weixin_43221845/article/details/84871362

版权

JAVA 同时被 2 个专栏收录

15 篇文章 0 订阅

订阅专栏

elasticsearch

5 篇文章 0 订阅

订阅专栏

1.配置MyBatis（百度很多，省略）

2.配置ES

引入依赖

<dependency>
            <groupId>org.elasticsearch.client</groupId>
            <artifactId>transport</artifactId>
            <version>5.2.0</version>
            <exclusions>
                <exclusion>
                    <groupId>org.elasticsearch</groupId>
                    <artifactId>elasticsearch</artifactId>
                </exclusion>
            </exclusions>
        </dependency>
        <dependency>
            <groupId>org.elasticsearch</groupId>
            <artifactId>elasticsearch</artifactId>
            <version>5.2.0</version>
        </dependency>

ESConfig

@Configuration
public class ESConfig {
    @Bean
    public TransportClient client() throws UnknownHostException {
        Settings settings = Settings.builder().put("cluster.name", "elasticsearch").build();
        TransportClient client = new PreBuiltTransportClient(settings);
        InetSocketTransportAddress node = new InetSocketTransportAddress(InetAddress.getByName("localhost"), 9300);
        client.addTransportAddress(node);
        return client;
    }
}

3.Mapper.xml

<select id="list" resultType="xxxxx" fetchSize="1000">
        select * from xx
    </select>

4.自定义ResultHandler来分批处理结果集

其中Data为我自定义的数据类，根据需要进行替换

list.claer()很重要，不清除的话同步数据会越来越慢

public class GxidResultHandler implements ResultHandler<Data> {
    private TransportClient client = new ESConfig().client();
    private String index;
    private String type;
    // 这是每批处理的大小
    private final static int BATCH_SIZE = 10000;
    private int size;
    private int total;
    /**
     * 存储每批数据的临时容器
     */
    private List<Data> list = new ArrayList<>();
    public GxidResultHandler() throws UnknownHostException {
    }
    @Override
    public void handleResult(ResultContext<? extends Data> resultContext) {
        // 这里获取流式查询每次返回的单条结果
        Data data = resultContext.getResultObject();
        list.add(data);
        size++;
        if (size == BATCH_SIZE) {
            handle();
        }
        total++;
    }
    private void handle() {
        BulkRequestBuilder bulkRequest = client.prepareBulk();
        for (Data data : list) {
            // 在这里可以对你获取到的批量结果数据进行需要的业务处理
            IndexRequest index = client
                    .prepareIndex(this.index, this.type, data.getPrimaryKey() + "")
                    .setSource(JSONUtil.toJsonStr(data))
                    .request();
            bulkRequest.add(index);
        }
        BulkResponse bulkResponse = bulkRequest.setRefreshPolicy(WriteRequest.RefreshPolicy.NONE).execute().actionGet();
        size = 0;
        list.clear();
    }
    /**
     * 这个方法给外面调用，用来完成最后一批数据处理
     * @return
     */
    public Integer end() {
        // 处理最后一批数据
        this.handle();
        return total;
    }
    public String getIndex() {
        return index;
    }
    public void setIndex(String index) {
        this.index = index;
    }
    public String getType() {
        return type;
    }
    public void setType(String type) {
        this.type = type;
    }
}

5.使用方式

SQLServerDataSourceConfig config = new SQLServerDataSourceConfig(params.getUrls(), params.getDatabases());
// 手动获取sqlSession
SqlSessionFactory sqlSessionFactory = config.sqlServerSqlSessionFactory(config.masterDataSource());
sqlSession = sqlSessionFactory.openSession();
GxidResultHandler handler = new GxidResultHandler();
handler.setIndex("索引");
handler.setType("类型");
// 下面填入你需要查询的dao层接口的方法
sqlSession.select("xxx.xxx.xxx.dao.XxxMapper.list", handler);
num = handler.end();