java flink 读取ES

2运行环境flink standalone模式

1、main 入口


   
   
  1. package es;
  2. import org.apache.flink.api.java.DataSet;
  3. import org.apache.flink.api.java.ExecutionEnvironment;
  4. import org.apache.flink.api.java.tuple.Tuple3;
  5. import org.apache.flink.util.FileUtils;
  6. import java.io.File;
  7. import java.util.List;
  8. import java.util.Map;
  9. public class EsReadTest {
  10. private static EsRestClientService esRestClientService = new EsRestClientService();
  11. public static void main (String[] args) throws Exception {
  12. // set up the execution environment
  13. final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  14. // 查询数据searchResponse
  15. String scrollId = null;
  16. DataSet<Tuple3<String, String, Integer>> dataSet = null;
  17. List<Tuple3<String, String, Integer>> dataList = null;
  18. int count = 0;
  19. while (! "none".equals(scrollId)) {
  20. Map<String, Object> map = esRestClientService.queryDeviceListPage(scrollId);
  21. if (map.get( "tupleList") instanceof List)
  22. dataList = (List<Tuple3<String, String, Integer>>) map.get( "tupleList");
  23. scrollId = map.get( "scrollId").toString();
  24. if (dataList == null || dataList.size() < 10000 || count > 3)
  25. break;
  26. // 导入数据
  27. DataSet<Tuple3<String, String, Integer>> dataSetTemp = env.fromCollection(dataList);
  28. if (dataSet == null) {
  29. dataSet = dataSetTemp;
  30. } else {
  31. dataSet = dataSet.union(dataSetTemp);
  32. }
  33. ++count;
  34. }
  35. // 分组计算规则
  36. dataSet = dataSet.groupBy( 0).sum( 2);
  37. String output = "/opt/flink-data/esoutput2.txt";
  38. FileUtils.deleteFileOrDirectory( new File(output));
  39. dataSet.writeAsText(output);
  40. env.execute( "read es");
  41. }
  42. }

2、游标方式读取es


   
   
  1. package es;
  2. import com.alibaba.fastjson.JSONObject;
  3. import org.apache.flink.api.java.tuple.Tuple3;
  4. import org.apache.http.HttpHost;
  5. import org.elasticsearch.action.search.*;
  6. import org.elasticsearch.client.RestClient;
  7. import org.elasticsearch.client.RestClientBuilder;
  8. import org.elasticsearch.client.RestHighLevelClient;
  9. import org.elasticsearch.index.query.BoolQueryBuilder;
  10. import org.elasticsearch.index.query.QueryBuilders;
  11. import org.elasticsearch.rest.RestStatus;
  12. import org.elasticsearch.search.SearchHit;
  13. import org.elasticsearch.search.builder.SearchSourceBuilder;
  14. import java.io.IOException;
  15. import java.util.ArrayList;
  16. import java.util.HashMap;
  17. import java.util.List;
  18. import java.util.Map;
  19. /**
  20. * 阿里云服务器搭建的ES服务
  21. *
  22. * @author lizixian
  23. * @date 2020/3/16 10:41
  24. */
  25. public class EsRestClientService {
  26. private String host = "172.168.0.138:9200";
  27. private String scheme = "http";
  28. private String index = "es_index";
  29. private String type = "es_type";
  30. private RestClientBuilder builder = null;
  31. private RestHighLevelClient client = null;
  32. public void init () {
  33. String[] nodeIpInfos = host.split( ":");
  34. builder = RestClient.builder( new HttpHost(nodeIpInfos[ 0], Integer.parseInt(nodeIpInfos[ 1]), scheme))
  35. .setRequestConfigCallback(requestConfigBuilder -> {
  36. requestConfigBuilder.setConnectTimeout( 10 * 60 * 1000);
  37. requestConfigBuilder.setSocketTimeout( 10 * 60 * 1000);
  38. requestConfigBuilder.setConnectionRequestTimeout( 10 * 60 * 1000);
  39. return requestConfigBuilder;
  40. }).setMaxRetryTimeoutMillis( 10 * 60 * 1000);
  41. client = new RestHighLevelClient(builder);
  42. }
  43. /**
  44. * 分页查询应设备应用安装列表-使用游标
  45. *
  46. * @author lizixian
  47. * @date 2020/5/10 18:01
  48. */
  49. public Map<String, Object> queryDeviceListPage (String scrollId) {
  50. String brand = "CH";
  51. //设置查询数量
  52. SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
  53. sourceBuilder.size( 10000);
  54. BoolQueryBuilder bool = QueryBuilders.boolQuery();
  55. // 平台
  56. // bool.must(QueryBuilders.termQuery("brand", brand));
  57. sourceBuilder.query(bool); //查询条件
  58. return queryDeviceListPageResult(sourceBuilder, scrollId);
  59. }
  60. private Map<String, Object> queryDeviceListPageResult (SearchSourceBuilder sourceBuilder, String scrollId) {
  61. SearchRequest searchRequest = new SearchRequest(index)
  62. .types(type)
  63. .scroll( "2m")
  64. .source(sourceBuilder);
  65. if (client == null) {
  66. init();
  67. }
  68. Map<String, Object> resultMap = new HashMap<>( 5);
  69. List<Tuple3<String, String, Integer>> tupleList = new ArrayList<>();
  70. try {
  71. SearchResponse response = null;
  72. if (scrollId != null) {
  73. SearchScrollRequest scrollRequest = new SearchScrollRequest(scrollId).scroll( "2m");
  74. response = client.searchScroll(scrollRequest);
  75. } else {
  76. response = client.search(searchRequest);
  77. }
  78. int s = response.status().getStatus();
  79. if (s == RestStatus.OK.getStatus()) {
  80. SearchHit[] hits = response.getHits().getHits();
  81. scrollId = response.getScrollId();
  82. System.out.println( "*********************查询es结果");
  83. if (hits != null) {
  84. for (SearchHit hit : hits) {
  85. System.out.println( "*********************查询es结果:" + hit.getSourceAsString());
  86. JSONObject json = JSONObject.parseObject(hit.getSourceAsString());
  87. tupleList.add( new Tuple3<>(json.getString( "name"), json.getString( "city"), 1));
  88. }
  89. }
  90. } else {
  91. //清除滚屏
  92. ClearScrollRequest clearScrollRequest = new ClearScrollRequest();
  93. clearScrollRequest.addScrollId(scrollId); //也可以选择setScrollIds()将多个scrollId一起使用
  94. ClearScrollResponse clearScrollResponse = client.clearScroll(clearScrollRequest);
  95. boolean succeeded = clearScrollResponse.isSucceeded();
  96. }
  97. resultMap.put( "scrollId", scrollId);
  98. resultMap.put( "tupleList", tupleList);
  99. } catch (IOException e) {
  100. e.printStackTrace();
  101. }
  102. return resultMap;
  103. }
  104. }
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
Apache Flink 是一个分布式流处理引擎,可以在流数据和批数据上进行处理,具有高效、高可用、高容错等特点。而 ElasticsearchES)是一个用于实时搜索和分析的分布式搜索引擎,可以高效地存储、搜索和分析大量数据FlinkES 可以集成使用,实现实时数据处理和分析。常见的应用场景包括: 1. 数据实时同步:将 Flink 处理的流数据实时同步到 ES ,以便进行快速搜索和分析。 2. 实时数据分析:使用 Flink 处理流数据,并将处理结果实时写入 ES ,以便进行实时分析和可视化。 3. 实时报警:使用 Flink 处理流数据,根据特定的规则和条件实时检测数据,并将检测结果写入 ES ,以便进行实时报警和处理。 要实现 FlinkES 的集成,可以使用 FlinkElasticsearch Connector。该 Connector 可以将 Flink 处理的数据实时写入 ES ,同时支持数据批量提交、数据过滤、数据转换等功能,可以灵活地满足不同场景下的需求。 示例代码如下: ```java StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(1); DataStream<String> stream = env.socketTextStream("localhost", 9999); // 将数据写入 Elasticsearch List<HttpHost> httpHosts = Arrays.asList(new HttpHost("localhost", 9200, "http")); ElasticsearchSink.Builder<String> esSinkBuilder = new ElasticsearchSink.Builder<>(httpHosts, new ElasticsearchSinkFunction<String>() { public IndexRequest createIndexRequest(String element) { Map<String, String> json = new HashMap<>(); json.put("data", element); return Requests.indexRequest() .index("my-index") .type("my-type") .source(json); } public void process(String element, RuntimeContext ctx, RequestIndexer indexer) { indexer.add(createIndexRequest(element)); } }); stream.addSink(esSinkBuilder.build()); env.execute("Flink and Elasticsearch Example"); ``` 该示例代码通过 Flink 处理从 socket 读取数据,并使用 ElasticsearchSink 将数据写入 Elasticsearch ,其 `httpHosts` 参数指定了 ES 地址和端口,`createIndexRequest` 方法用于构造写入 ES数据,`process` 方法用于将数据写入 ES

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值