缺的不是思考,而是思考后的落实
引言
目前许多数据服务安排到了k8s上,对于服务的日志分散在不同的机器上,由于是线上环境,尽量做到结偶,对开发人员来说,k8s集群是一个黑盒,不允许普通开发者到机器上查看日志。一开始,使用flume收集日志到kafka和hive(统一要求,小公司没有太多资源做测试),由于同步hive是一小时一次,再加上同步时间,导致不能及时发现问题,要求用最快时间解决他们的问题,于是用了不到3个小时的时间,用ELK完美的解决了日志问题。
搭建
es (我部署的是12台,这里全拿三台为例)
docker-compose-server.yml (创建相关的目录)
version: '3.7'
services:
elasticsearch-server:
image: elasticsearch:6.7.0
restart: always
network_mode: "host"
container_name: "elasticsearch"
ports:
- "9200:9200"
- "9300:9300"
volumes:
- "/xxx/elasticsearch/config/elasticsearch.yml:/usr/share/elasticsearch/config/elasticsearch.yml"
- "/xxx/elasticsearch/config/jvm.options:/usr/share/elasticsearch/config/jvm.options"
- "/xxx/elasticsearch/plugins:/usr/share/elasticsearch/plugins"
- "/xxx/elasticsearch/log:/usr/share/elasticsearch/log"
- "/xxx/elasticsearch/data:/usr/share/elasticsearch/data"
- "/etc/localtime:/etc/localtime:ro"
修改配置
vim /etc/sysctl.conf
添加:vm.max_map_count=655360
sysctl -p /etc/sysctl.conf/
必要时需要
chmod 777 /xxx/elasticsearch/data
elasticsearch.yml
#集群名称
cluster.name: data-es-cluster
#当前该节点的名称
node.name: es01
#是不是有资格竞选主节点
node.master: true
#是否存储数据
node.data: true
#最大集群节点数
node.max_local_storage_nodes: 3
#数据存档位置
path.data: /usr/share/elasticsearch/data
#日志存放位置
path.logs: /usr/share/elasticsearch/log
# ---------------------------------- Network -----------------------------------
#然后启动一直报无效的IP地址,无法注入9300端口,这里只需要填写0.0.0.0
network.host: 0.0.0.0
#设置其它结点和该结点交互的ip地址,如果不设置它会自动判断,值必须是个真实的ip地址,设置当前物理机地址,
#如果是docker安装节点的IP将会是配置的IP而不是docker网管ip
network.publish_host: 10.10.30.203
#设置映射端口
http.port: 9200
#内部节点之间沟通端口
transport.tcp.port: 9300
# --------------------------------- Discovery ----------------------------------
#集群发现默认值为127.0.0.1:9300,如果要在其他主机上形成包含节点的群集,如果搭建集群则需要填写
discovery.seed_hosts: ["es01:9300","es02:9300","es03:9300"]
cluster.initial_master_nodes: ["es01"]
discovery.zen.minimum_master_nodes: 1
# ---------------------------------- Gateway -----------------------------------
gateway.recover_after_nodes: 2
logstash
docker run -itd -p 5044:5044 -p 9600:9600 --name logstash -v /xxx/logstash/data:/usr/share/logstash/data --privileged=true logstash:7.5.1 /bin/bash
kibana
docker-compose.yml
version: '3.7'
services:
kibana:
image: kibana:6.7.0
restart: always
network_mode: "host"
container_name: "kibana"
ports:
- "5601:5601"
volumes:
- "/xxxx/kibana/config/kibana.yml:/usr/share/kibana/config/kibana.yml"
- "/etc/localtime:/etc/localtime:ro"
kibana.yml
erver.name: "data-kibana"
server.host: "0"
elasticsearch.hosts: ["http://es01:9200","http://es02:9200","http://es03:9200"]
xpack.monitoring.ui.container.elasticsearch.enabled: true
i18n.locale: "zh-CN"
例子
1、log数据
2020/11/26 18:49:37 ◊ INFO ◊ ****/common/endpointkit/baseRequestEndpoint.go ◊ (*BaseRequestEndpoint):60 ◊ {"api":"/****/v1/******?/******?=/******?","caller":"******?","method":"POST","logid":"20201126184937-36283567-df060e15-ed8d-48c4-9a95-095f788ea102","ts":"1606387777765628577"} ◊ {"id":"3907944","limit":"1","pool_level":"0"} ◊ {"status":200,"message":"","data":{"id":[243715],"comments":null}}
2、logstage.conf
grok语法对初学者来说是个大问题,这里用最暴力的方式来解决
input {
kafka{
bootstrap_servers=> "kafka01:9092,kafka02:9092,kafka03:9092"
group_id =>"es"
topics =>"service_log"
consumer_threads =>3
decorate_events =>true
}
}
filter {
grok {
match => { "message" => "%{GREEDYDATA:ctime}\ [◊]\ %{GREEDYDATA:level}\ [◊]\ %{GREEDYDATA:fliename}\ [◊]\ %{GREEDYDATA:point}\ [◊]\ %{GREEDYDATA:base_json}\ [◊]\ %{GREEDYDATA:request_json}\ [◊]\ %{GREEDYDATA:response_json}"}
remove_field => "message"
remove_field => "point"
}
date {
match => ["ctime", "yyyy-MM-dd HH:mm:ss"]
target => "@timestamp"
}
mutate {
split => ["ctime"," "]
add_field => {
"date" => "%{[ctime][0]}"
}
remove_field => ["ctime"]
}
mutate {
split => ["date","/"]
add_field => {
"idx" => "%{[date][0]}.%{[date][1]}.%{[date][2]}"
}
remove_field => ["date"]
}
}
output{
elasticsearch{
hosts => ["es01:9200","es02:9200","es03:9200"]
index =>"service_log_%{idx}"
document_type => "doc"
}
}
注:service_log_yyyy-MM-dd
这样的好处是便于定时删除index
3、运行
## 进入logstash的容器
docker exec -it logstash bash
## 执行
bin/logstash -f data/logstage.conf --path.data tmp/log