Flink elasticSearchSink

<dependency>
    <groupId>org.apache.flink</groupId>
    <artifactId>flink-connector-elasticsearch6_2.12</artifactId>
    <version>1.14.4</version>
</dependency>

SinkFunction实现类:

import org.apache.flink.api.common.functions.RuntimeContext
import org.apache.flink.streaming.connectors.elasticsearch.{ElasticsearchSinkFunction, RequestIndexer}

class ElasticIndexSinkFunction(indexType:String) extends ElasticsearchSinkFunction[JSONObject]{
  override def process(element: JSONObject, runtimeContext: RuntimeContext, requestIndexer: RequestIndexer): Unit = {
    try{        
            val uniqueKey=element.getString("uniqueKey")
            val indexName=EsIndexName.getWeekMondayIndexName(indexType,element.getLongValue("startTime"))
            val request: IndexRequest = Requests.indexRequest
            .index(indexName)
            .`type`("_doc")
            .id(uniqueKey)
            .source(element)

      requestIndexer.add(request)
    }catch {
      case e:Exception=>e.printStackTrace()
    }

  }
}

数据写入es:

import org.apache.commons.configuration2.builder.fluent.Configurations
import org.apache.flink.streaming.api.scala.DataStream
import org.apache.flink.streaming.connectors.elasticsearch6.ElasticsearchSink
import org.apache.http.HttpHost

import java.util
import java.util.UUID

object EsSlinkUtil {
  def sendData(indexType: String, data: DataStream[JSONObject], jobType: String) = {
    val configs = new Configurations()
    val configuration = configs.properties("SysConfig.properties")
    val esIP = configuration.getString("eSPort_export")
    val httpHosts = new util.ArrayList[HttpHost]()
    httpHosts.add(new HttpHost(esIP, 9200, "http"))
    try {
      val elasticSink: ElasticsearchSink.Builder[JSONObject] = new ElasticsearchSink.Builder[JSONObject](httpHosts, new ElasticIndexSinkFunction(indexType))
      data.addSink(elasticSink.build())
        .name(indexType + "-" + UUID.randomUUID().toString.replaceAll("-", ""))
        .uid(indexType + "-" + jobType)
    } catch {
      case e: Exception => e.printStackTrace()
    }
  }
}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
当使用Flink CDC(Change Data Capture)时,可以通过以下代码来实现: 1. 首先,需要添加Flink CDC的依赖包到你的项目中。可以在pom.xml文件中添加以下依赖项: ```xml <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-connector-kafka_2.12</artifactId> <version>${flink.version}</version> </dependency> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-connector-elasticsearch7_2.12</artifactId> <version>${flink.version}</version> </dependency> ``` 请确保`${flink.version}`的值与你使用的Flink版本匹配。 2. 创建一个Flink Job来消费变更数据,并将其发送到Elasticsearch。 ```java import org.apache.flink.api.common.restartstrategy.RestartStrategies; import org.apache.flink.api.common.serialization.SimpleStringSchema; import org.apache.flink.api.java.utils.ParameterTool; import org.apache.flink.streaming.api.datastream.DataStreamSource; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; import org.apache.flink.streaming.connectors.elasticsearch.ElasticsearchSink; import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer; import org.apache.flink.streaming.connectors.kafka.KafkaDeserializationSchema; import org.apache.flink.streaming.connectors.kafka.KafkaSerializationSchema; import org.apache.flink.streaming.connectors.kafka.KafkaTopicPartition; import org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartitionAssigner; import org.apache.flink.streaming.util.serialization.KeyedSerializationSchema; import org.apache.flink.types.Row; import java.util.*; public class FlinkCDCJob { public static void main(String[] args) throws Exception { // 解析命令行参数 ParameterTool parameterTool = ParameterTool.fromArgs(args); // 创建Flink执行环境 StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // 设置并行度 env.setParallelism(parameterTool.getInt("parallelism", 1)); // 设置重启策略 env.setRestartStrategy(RestartStrategies.fixedDelayRestart( parameterTool.getInt("restart-attempts", 3), parameterTool.getLong("delay-between-attempts", 1000L) )); // 创建Kafka消费者 Properties props = new Properties(); props.setProperty("bootstrap.servers", parameterTool.getRequired("bootstrap-servers")); props.setProperty("group.id", parameterTool.getRequired("group-id")); FlinkKafkaConsumer<Row> kafkaConsumer = new FlinkKafkaConsumer<>( parameterTool.getRequired("topic"), (KafkaDeserializationSchema<Row>) (record, timestamp) -> { // 解析Kafka记录,将其转换为Flink的Row对象 // 这里需要根据你的数据格式进行相应的解析操作 // 返回的Row对象中包含了变更前后的数据信息 return null; }, props ); // 从指定的offset开始消费数据 if (parameterTool.has("from-offset")) { String[] offsets = parameterTool.getRequired("from-offset").split(","); Map<KafkaTopicPartition, Long> specificOffsets = new HashMap<>(); for (String offset : offsets) { String[] parts = offset.split(":"); specificOffsets.put(new KafkaTopicPartition(parts[0], Integer.parseInt(parts[1])), Long.parseLong(parts[2])); } kafkaConsumer.setStartFromSpecificOffsets(specificOffsets); } // 创建Kafka消费数据流 DataStreamSource<Row> stream = env.addSource(kafkaConsumer); // 将数据流发送到Elasticsearch List<HttpHost> httpHosts = new ArrayList<>(); httpHosts.add(new HttpHost(parameterTool.getRequired("es-host"), parameterTool.getInt("es-port", 9200))); ElasticsearchSink.Builder<Row> esSinkBuilder = new ElasticsearchSink.Builder<>( httpHosts, (KafkaSerializationSchema<Row>) (element, timestamp) -> { // 将Flink的Row对象转换为要发送到Elasticsearch的数据格式 // 这里需要根据你的数据格式进行相应的转换操作 return null; } ); // 批量写入配置 esSinkBuilder.setBulkFlushMaxActions(parameterTool.getInt("bulk-flush-max-actions", 100)); stream.addSink(esSinkBuilder.build()); // 执行Flink Job env.execute("Flink CDC Job"); } } ``` 请根据你的实际情况修改上述代码中的配置参数和数据转换逻辑。 这段代码使用Flink从Kafka消费变更数据,并将其发送到Elasticsearch。你需要根据你的数据格式和业务逻辑来实现`KafkaDeserializationSchema`和`KafkaSerializationSchema`接口中的方法。同时,你还需要配置Kafka和Elasticsearch的连接信息、消费组ID、并设置适当的并行度和重启策略等。 希望对你有帮助!

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值