kafka+logstash+elasticsearch环境搭建
环境要求
1.系统环境:centos7.4
2.logstash,elasticsearch,kafka版本均为最新版本
3.logstash,elasticsearch不使用root运行
kafka环境搭建
#下载kafka,并解压到相应目录
#下载地址http://mirror.bit.edu.cn/apache/kafka/1.1.0/kafka_2.12-1.1.0.tgz
#我解压到了/usr/local/kafka下
#修改配置文件,kafka启动需要zookeeper,所以必须先启动zookeeper,再启动kafka
cat zookeeper.properties
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# the directory where the snapshot is stored.
dataDir=/usr/local/kafka/zookeeper
dataLogDir=/usr/local/kafka/logs/zookeeper
#dataDir=/tmp/zookeeper
# the port at which the clients will connect
clientPort=2181
# disable the per-ip limit on the number of connections since this is a non-production config
maxClientCnxns=0
#=====================
cat server.properties
broker.id=0
num.network.threads=3
num.io.threads=8
socket.send.buffer.bytes=102400
socket.receive.buffer.bytes=102400
socket.request.max.bytes=104857600
log.dirs=/tmp/kafka-logs
num.partitions=1
num.recovery.threads.per.data.dir=1
offsets.topic.replication.factor=1
transaction.state.log.replication.factor=1
transaction.state.log.min.isr=1
log.retention.hours=168
log.segment.bytes=1073741824
log.retention.check.interval.ms=300000
zookeeper.connect=localhost:2181
zookeeper.connection.timeout.ms=6000
group.initial.rebalance.delay.ms=0
delete.topic.enable=true
#修改完成后启动 -daemon后台启动
/usr/local/kafka/bin/zookeeper-server-start.sh -daemon config/zookeeper.properties
/usr/local/kafka/bin/kafka-server-start.sh -daemon config/server.properties
#测试
#创建topic
kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic test
#查看topic
kafka-topics.sh --list --zookeeper localhost:2181
#产生
kafka-console-producer.sh --broker-list localhost:9092 --topic test
#消费 --from-beginning 从头开始读取
kafka-console-consumer.sh --zookeeper localhost:2181 --topic test --from-beginning
elasticsearch,logstash环境搭建(非root账号执行)
elasticsearch
#修改配置
cat elasticsearch.yml | grep -v "#"
network.host: 0.0.0.0
http.port: 9200
bootstrap.memory_lock: false
bootstrap.system_call_filter: false
node.master: true
node.data: true
http.cors.allow-origin: "*"
#启动 -d后台启动
./bin/elasticsearch -d
logstash
#创建目录
mkdir /data/elk/logstash/kafka_conf
mkdir /data/elk/logstash/kafka_data
mkdir /data/elk/logstash/kafka_data/test
#创建配置文件
vim /data/elk/logstash/kafka_conf/test.conf
input{
kafka {
bootstrap_servers => "127.0.0.1:9092" #kafka地址
topics => "test" #kafka topics
group_id => "logstash" #kafka group id,如果想要重新读取需要修改次选项
auto_offset_reset => "earliest" #自动将偏移重置为最早的偏移量
}
}
filter {
json { #我的数据都是json格式,logstash可以自动将json格式化后输入到es里方便查询
source => "message"
}
mutate { #产出一些无用字段
remove_field => "@timestamp"
remove_field => "message"
remove_field => "@version"
}
date {
match => [ "loginDat", "yyyy-MM-dd HH:mm:ss"]
}
}
output {
#stdout { codec => rubydebug }
elasticsearch { #elasticsearch相关配置
hosts => "127.0.0.1:9200"
index => "test_kafka"
document_type => "test"
}
}
#启动lostash --path.data当多个项目启动的时候需要制定目录,不然会报错。--log.level修改日志级别,减少日志输出
/data/elk/logstash/bin/logstash -f kafka_conf/test.conf --path.data=kafka_data/test --log.level error &