- Logstash介绍
Logstash是一个数据收集处理转发系统,是 Java开源项目。 它只做三件事:输入、清洗过滤和输出。如下图:
logstash通过在配置文件编写输入(input),过滤清洗(filter),输出(output)相关规则,对数据收集转发。
具体可以在安装目录下的conf/目录下新建配置文件,如logstash_indexer.conf
如下
#输入
input {
。。。
}
#加工过滤
filter {
。。。
}
#输出
output {
。。。
}
1.input实例
以kafka做为数据源,设置如下
zk_connect :zk连接
group_id :groupId
topic_id :topic
consumer_id :consumerId 对应以下kafka的Owner,前缀logstash_topic,后缀为线程
input {
# PV
kafka {
zk_connect => "172.16.5.7:2180" # string (optional), default: "localhost:2181"
group_id => "logstash_pvBolt" # string (optional), default: "logstash"
topic_id => "pvBolt" # string (optional), default: "test"
reset_beginning => false # boolean (optional), default: false
consumer_threads => 5 # number (optional), default: 1
queue_size => 5000 # number (optional), default: 20
rebalance_max_retries => 4 # number (optional), default: 4
rebalance_backoff_ms => 2000 # number (optional), default: 2000
consumer_timeout_ms => -1 # number (optional), default: -1
consumer_restart_on_error => true # boolean (optional), default: true
consumer_restart_sleep_ms => 0 # number (optional), default: 0
decorate_events => true # boolean (optional), default: false
consumer_id => "172.16.5.7 kafka consumer" # string (optional) default: nil
fetch_message_max_bytes => 1048576 # number (optional) default: 1048576
}
# 广告
kafka {
zk_connect => "172.16.5.7:2180" # string (optional), default: "localhost:2181"
group_id => "logstash_adVisitBolt" # string (optional), default: "logstash"
topic_id => "adVisitBolt" # string (optional), default: "test"
reset_beginning => false # boolean (optional), default: false
consumer_threads => 5 # number (optional), default: 1
queue_size => 5000 # number (optional), default: 20
rebalance_max_retries => 4 # number (optional), default: 4
rebalance_backoff_ms => 2000 # number (optional), default: 2000
consumer_timeout_ms => -1 # number (optional), default: -1
consumer_restart_on_error => true # boolean (optional), default: true
consumer_restart_sleep_ms => 0 # number (optional), default: 0
decorate_events => true # boolean (optional), default: false
consumer_id => "172.16.5.7 kafka consumer" # string (optional) default: nil
fetch_message_max_bytes => 1048576 # number (optional) default: 1048576
}
...
kafka { }
}
2.filter实例
filter {
mutate {
replace => [ "fromhost", "172.16.5.7" ]//kafka的ip
remove_field => [ "host" ]
}
# PV
if [kafka][topic] == "pvBolt" {//topic
ruby {
code => "new_event = LogStash::Event.new
new_event['index_name'] = 'uservisit'//在ES的索引名的后缀,与output中的index相拼接
# 变量赋值
new_event['appId'] = event.getString('appId', '')//获取kafka的字段,若未查到,刚用后面的代替
new_event['terminal'] = event.getTerminal('terminal', 'os')
new_event['customer_key'] = event.getString('customer_key', '')
new_event['udid'] = event.getString('udid', '')
new_event['date'] = event.getInteger('date', 0)
new_event['timestamp'] = event.getInteger('timestamp', 0)
new_event['createTime'] = event.getString('createTime', '')
new_event['channel'] = event.getString('channel', '')
new_event['pageCode'] = event.getString('pageCode', '')
new_event['version'] = event.getString('version', '')
new_event['ip'] = event.getString('ip', '')
new_event['osDetail'] = event.getString('osDetail', '')
new_event['osVersion'] = event.getString('osVersion', '')
new_event['deviceVersion'] = event.getString('deviceVersion', '')
new_event['operator'] = event.getString('operator', '')
new_event['network'] = event.getString('network', '')
# 事件重写
event.overwrite(new_event)// 固定格式
"
}
}
# 广告
else if [kafka][topic] == "adVisitBolt" {
ruby {
code => "new_event = LogStash::Event.new
new_event['index_name'] = 'advisit'
# 变量赋值
new_event['appId'] = event.getString('appId', '')
new_event['terminal'] = event.getTerminal('terminal', 'os')
new_event['customer_key'] = event.getString('customer_key', '')
new_event['udid'] = event.getString('udid', '')
new_event['date'] = event.getInteger('date', 0)
new_event['timestamp'] = event.getInteger('timestamp', 0)
new_event['createTime'] = event.getString('createTime', '')
new_event['adId'] = event.getString('adId', '')
new_event['viewtype'] = event.getInteger('type', 0)
# 事件重写
event.overwrite(new_event)
"
}
}
。。。
}
3.output实例
output {
stdout {
codec => rubydebug
}
elasticsearch {
hosts => ["172.16.5.6:9200"]// 输出ES的IP
index => "logstash-%{index_name}"//保存ES的index名,前缀加上面filter中配置的index_name
workers => 40
template_overwrite => true
flush_size => 1000
idle_flush_time => 10
}
}
- ES新建模板
把数据索引到ES前,新建模板(相当于mysql的建表),定义入库的数据的数据类型mapping
如下建立模板,
curl -XPUT "192.168.1.10:9200/_template/template_storehouse_news" -d '~~这里是json~~ '
查看ES所有模板
http://10.23.214.56:9200/_template?pretty
查看ES某个的模板
http://10.23.214.56:9200/_template/template_storehouse_news?pretty