前提:准备好Java环境。可docker;使用es为2.3.4版本(高版本连接器索引问题)所以使用低版本连接器,测试时为单节点运行。
发送到kafka的日志格式必须是json格式
启动zookeeper
zookeeper
tar -zxf zookeeper-*.tar.gz
cd zookeeper-*
bin/zkServer.sh start
# bin/zkServer.sh stop
启动kafka
tar -zxf kafka_* tar.gz
cd kafka_*
bin/kafka-server-start.sh -daemon config/server.properties
启动elasticsearch
tar -zxf elasticsearch-2.3.4.zip
# 编辑配置文件修改数据和日志路径及可访问网络地址及其他优化修改
# 创建数据日志存储目录并授权新用户
bin/elasticsearch -d
构建kafka-connect-elasticsearch包
高版本可下载已有包,或切换分支自己构建,需要准备好maven、Java、Git环境,maven配置好国内仓库源
git clone -b 0.10.0.0 https://github.com/confluentinc/kafka-connect-elasticsearch.git
cd kafka-connect-elasticsearch
mvn clean package
复制target/kafka-connect-elasticsearch-3.2.0-SNAPSHOT-package/share/java/kafka-connect-elasticsearch/目录下的所有jar包至 kafka/libs/ 目录,删除 guava-18.jar 文件,否则启动失败。
修改kafka配置文件
正式环境配置按需修改
elasticsearch-connect.properties
单机节点使用该配置
# elasticsearch-connect.properties
name=elasticsearch-sink
connector.class=io.confluent.connect.elasticsearch.ElasticsearchSinkConnector
tasks.max=1
topics=logs
topic.index.map=logs:logs_index
connection.url=http://localhost:9200
type.name=log
key.ignore=true
schema.ignore=true
connect-standalone.properties
单机节点使用该配置
# connect-standalone.properties
bootstrap.servers=localhost:9092
key.converter=org.apache.kafka.connect.json.JsonConverter
value.converter=org.apache.kafka.connect.json.JsonConverter
key.converter.schemas.enable=false
value.converter.schemas.enable=false
internal.key.converter=org.apache.kafka.connect.json.JsonConverter
internal.value.converter=org.apache.kafka.connect.json.JsonConverter
internal.key.converter.schemas.enable=false
internal.value.converter.schemas.enable=false
offset.storage.file.filename=/tmp/connect.offsets
offset.flush.interval.ms=10000
connect-distributed.properties
分布式集群下使用该配置
#connect-distributed.properties
bootstrap.servers=localhost:9092
key.converter=org.apache.kafka.connect.json.JsonConverter
value.converter=org.apache.kafka.connect.json.JsonConverter
key.converter.schemas.enable=false
value.converter.schemas.enable=false
internal.key.converter=org.apache.kafka.connect.json.JsonConverter
internal.value.converter=org.apache.kafka.connect.json.JsonConverter
internal.key.converter.schemas.enable=false
internal.value.converter.schemas.enable=false
offset.flush.interval.ms=10000
group.id=connect-cluster
offset.storage.topic=connect-offsets
config.storage.topic=connect-configs
status.storage.topic=connect-status
plugin.path=/plugin
rest.advertised.host.name=ip
启动kafka-connect
单机节点使用此命令
bin/connect-standalone.sh config/connect-standalone.properties config/elasticsearch-connect.properties
分布式集群下使用此命令
bin/connect-distributed.sh config/connect-distributed.properties
您可能已经注意到,与以独立模式运行Kafka Connect相比,有一个区别–我们未提供连接器本身的配置。这不是错!在分布式模式下使用Kafka Connect时,我们需要使用REST API创建连接器
分布式集群下使用REST API创建连接器
curl -XPOST -H 'Content-type:application/json' 'localhost:8083/connectors' -d '{
"name" : "second_es_sink",
"config" : {
"connector.class" : "io.confluent.connect.elasticsearch.ElasticsearchSinkConnector",
"tasks.max" : "1",
"topics" : "logs",
"topic.index.map" : "logs:logs_index",
"connection.url" : "http://localhost:9200",
"type.name" : "true",
"key.ignore" : "true",
"schema.ignore" : "true"
}
}'
测试
发送示例数据
bin/kafka-console-producer.sh --topic logs --broker-list localhost:9092
{"name":"Testlog", "severity": "INFO"}
{"name":"Testlog2", "severity": "WARN"}
ES查询
curl -XGET 'localhost:9200/logs/_search?pretty'
Kafka Connect REST API
了解有关Kafka Connect REST API的所有选项,请参阅:http://kafka.apache.org/documentation.html#connect_rest
# 检查其是否正在运行获得构建的版本和提交哈希
curl 'localhost:8083'
# 检索正在运行的连接器的列表
curl 'localhost:8083/connectors'
# 检索安装在Kafka集群中的连接器插件列表
curl 'localhost:8083/connector-plugins'
# 检索配置
curl 'localhost:8083/connectors/elasticsearch-sink/config'
# 检查给定连接器的状态
curl 'localhost:8083/connectors/elasticsearch-sink/status'
# 检索由给定连接器运行的任务
curl 'localhost:8083/connectors/elasticsearch-sink/tasks'
# 暂停,恢复和重新启动连接器
curl -XPUT 'localhost:8083/connectors/elasticsearch-sink/pause'
curl -XPUT 'localhost:8083/connectors/elasticsearch-sink/resume'
curl -XPOST 'localhost:8083/connectors/elasticsearch-sink/restart'
# 提供连接器的名称来删除给定的连接器
curl -XDELETE 'localhost:8083/connectors/second_es_sink'