scala Flink读取kafka 写入elasticsearch 简单实现
1.引入pom依赖
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka_2.11</artifactId>
<version>1.1.0</version>
</dependency>
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka-clients</artifactId>
<version>1.1.0</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-kafka_2.11</artifactId>
<version>1.11.3</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-elasticsearch6_2.11</artifactId>
<version>1.11.3</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-scala_2.11</artifactId>
<version>1.11.3</version>
</dependency>
2.es的Mapping
{
"order": 0,
"index_patterns": [
"xiaojin_*"
],
"settings": {
"index": {
"number_of_shards": "3",
"translog": {
"flush_threshold_size": "1g"
},
"number_of_replicas": "1"
}
},
"mappings": {
"_default_": {
"dynamic_templates": [
{
"string_as_keyword": {
"mapping": {
"type": "keyword"
},
"match_mapping_type": "string",
"match": "*"
}
}
],
"_all": {
"enabled": false
},
"properties": {
"originalMsg": {
"index": false,
"type": "text"
},
"index": {
"index": false,
"type": "keyword"
},
"location": {
"type": "geo_point"
},
"id": {
"type": "keyword"
},
"table": {
"index": false,
"type": "keyword"
},
"speed": {
"type": "double"
}
}
}
},
"aliases": {}
}
3.读取kafka 写入es 代码实现
package com.test
import java.util.Properties
import org.apache.flink.streaming.connectors.kafka._
import org.apache.flink.streaming.api.scala._
import org.apache.flink.api.common.functions.RuntimeContext
import org.apache.flink.streaming.connectors.elasticsearch.ElasticsearchSinkFunction
import org.apache.flink.streaming.connectors.elasticsearch.RequestIndexer
import org.apache.flink.streaming.connectors.elasticsearch6.ElasticsearchSink
import org.apache.http.HttpHost
import org.apache.kafka.clients.consumer.ConsumerConfig
import org.elasticsearch.action.index.IndexRequest
import org.elasticsearch.client.Requests
import org.apache.flink.api.common.serialization.SimpleStringSchema
object DataSink_es {
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.enableCheckpointing(5000)
import org.apache.flink.api.scala._
val props = new Properties()
props.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "mastercs:9092,slave1cs:9092,slave2cs:9092")
props.setProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "latest")
props.setProperty(ConsumerConfig.GROUP_ID_CONFIG, "g1")
val consumer = new FlinkKafkaConsumer[String]("es", new SimpleStringSchema(), props)
consumer.setStartFromLatest()
val stream = env.addSource(consumer)
stream.print()
val httpHosts = new java.util.ArrayList[HttpHost]
httpHosts.add(new HttpHost("slave1cs", 9200, "http"))
val esSinkBuilder = new ElasticsearchSink.Builder[String](
httpHosts,
new ElasticsearchSinkFunction[String] {
def createIndexRequest(element: String): IndexRequest = {
val json = new java.util.HashMap[String, String]
json.put("wei", element.split(",")(0))
json.put("jing", element.split(",")(1))
json.put("time", element.split(",")(2))
return Requests.indexRequest()
.index("zp")
.`type`("es")
.source(json)
}
override def process(element: String, runtimeContext: RuntimeContext, requestIndexer: RequestIndexer): Unit = {
requestIndexer.add(createIndexRequest(element))
}
}
)
esSinkBuilder.setBulkFlushMaxActions(1)
stream.addSink(esSinkBuilder.build())
env.execute("DataSink_es")
}
}
4.scala 构建kafka生产者
package com.test.kafka_producer
import java.util.Properties
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord, RecordMetadata}
object KafkaProducerDemo {
def main(args: Array[String]): Unit = {
val prop = new Properties
prop.put("bootstrap.servers", "mastercs:9092,slave1cs:9092,slave2cs:9092")
prop.put("acks", "all")
prop.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer")
prop.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer")
prop.put("request.timeout.ms", "60000")
val producer = new KafkaProducer[String, String](prop)
for (i <- 1 to 100) {
val msg = s"${i},this is a,linys ${i} kafka data"
println("send -->" + msg)
val rmd: RecordMetadata = producer.send(new ProducerRecord[String, String]("es", msg)).get()
println(rmd.toString)
Thread.sleep(500)
}
producer.close()
}
}
5.scala 构建kafka消费者
package com.test.kafka_consumer
import java.util.{Collections, Properties}
import org.apache.kafka.clients.consumer.{ConsumerRecords, KafkaConsumer}
object KafkaConsumerDemo {
def main(args: Array[String]): Unit = {
val prop = new Properties
prop.put("bootstrap.servers", "mastercs:9092,slave1cs:9092,slave2cs:9092")
prop.put("group.id", "g1")
prop.put("auto.offset.reset", "earliest")
prop.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer")
prop.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer")
prop.put("enable.auto.commit", "true")
prop.put("session.timeout.ms", "30000")
val kafkaConsumer = new KafkaConsumer[String, String](prop)
kafkaConsumer.subscribe(Collections.singletonList("es"))
while (true) {
val msgs: ConsumerRecords[String, String] = kafkaConsumer.poll(2000)
val it = msgs.iterator()
while (it.hasNext) {
val msg = it.next()
println(s"partition: ${msg.partition()}, offset: ${msg.offset()}, key: ${msg.key()}, value: ${msg.value()}")
}
}
}
}