flume mysql json_Flume 实现自己的实时日志(5)

本文档详细介绍了如何配置和定制Flume,使其将实时日志发送到Elasticsearch。内容包括配置文件设定、插件开发、依赖引入、代码实现以及在Elasticsearch中建立索引,并提供了完整的配置文件示例。通过这种方式,可以实时监控和搜索不同系统的日志。
摘要由CSDN通过智能技术生成

(1) 定义配置文件

首先要确定哪些参数是需要我们在外部配置文件进行配置的,这里选择最基础的:

类型,集群地址,索引名称,批次,通道

agent.sinks.elasticsearch_sink01.type = elasticsearch_high

agent.sinks.elasticsearch_sink01.hostNames = 192.168.88.130:9200

agent.sinks.elasticsearch_sink01.indexName = flume_yz

agent.sinks.elasticsearch_sink01.batchSize = 500

agent.sinks.elasticsearch_sink01.channel = elasticsearch_channel01

(2) 插件的开发

在SinkType里面新增ElasticSearchHighSink类型,如果不在此处新增,则需要配置全名

称,类似kafka的配置,这里选用使用类型的方式

ELASTICSEARCH_HIGH("org.apache.flume.sink.elasticsearch.high.ElasticSearchHighSink");

同时SinkConfiguration里面新增配置类信息

ELASTICSEARCH_HIGH("org.apache.flume.sink.elasticsearch.high.ElasticSearchHighSinkConfiguration")

(3) 新增依赖

6.4.3

org.elasticsearch.client

elasticsearch-rest-high-level-client

${elasticsearch.version}

注意:升级版本后会有很多包依赖不对,需要自己替换,可以参考github完整项目

(4) 代码实现

这里只是按照自己建的的约定去实现一个简单的sink,和官方的有很大不同,如果

需要可以自己加以改造。这里主要是把日志拆分为两个字段,一个系统来源,一个

完整的日志写入ES中。系统来源字段是在我们改造TAILDIR source基础上自动

追加到日志上面的。

1-初始化执行获取配置信息

public void configure(Context context) {

logger.info("configure elasticsearch......");

//获取配置信息

if (StringUtils.isNotBlank(context.getString(ElasticSearchHighSinkConstants.HOSTNAMES))) {

serverAddresses = StringUtils.deleteWhitespace(

context.getString(ElasticSearchHighSinkConstants.HOSTNAMES)).split(",");

}

Preconditions.checkState(serverAddresses != null

&& serverAddresses.length > 0, "Missing Param:" + ElasticSearchHighSinkConstants.HOSTNAMES);

if (StringUtils.isNotBlank(context.getString(INDEX_NAME))) {

this.indexName = context.getString(INDEX_NAME);

}

if (StringUtils.isNotBlank(context.getString(BATCH_SIZE))) {

this.batchSize = Integer.parseInt(context.getString(BATCH_SIZE));

}

logger.info("获取到ES配置信息,address:"+StringUtils.join(serverAddresses)+",index:"+indexName+",batchSize:"+batchSize);

if (sinkCounter == null) {

sinkCounter = new SinkCounter(getName());

}

Preconditions.checkState(StringUtils.isNotBlank(indexName),

"Missing Param:" + INDEX_NAME);

Preconditions.checkState(batchSize >= 1, BATCH_SIZE

+ " must be greater than 0");

}

2-启动sink的一些配置,初始化ES客户端

@Override

public synchronized void start() {

logger.info("start elasticsearch sink......");

HttpHost[] httpHosts = new HttpHost[serverAddresses.length];

for (int i = 0; i < serverAddresses.length; i++) {

String[] hostPort = serverAddresses[i].trim().split(":");

String host = hostPort[0].trim();

int port = hostPort.length == 2 ? Integer.parseInt(hostPort[1].trim())

: DEFAULT_PORT;

logger.info("elasticsearch host:{},port:{}", host, port);

httpHosts[i] = new HttpHost(host, port, "http");

}

client = new RestHighLevelClient(

RestClient.builder(httpHosts));

sinkCounter.start();

super.start();

}

//ES批处理的方法

public void bulkExecute(List events) throws Exception {

//批量插入数据

BulkRequest request = new BulkRequest();

for (Event event : events) {

//解析event

String body = new String(event.getBody(), charset);

EventIndex eventIndex = null;

if (body.contains("INFO") == true || body.contains("WARN") == true || body.contains("ERROR") == true ||

body.contains("DEBUG") == true) {

//serviceName , body

eventIndex = new EventIndex(body.substring(0, body.indexOf(" ")),

body.substring(body.indexOf(" ") + 1));

} else {

//默认系统名称是sys

eventIndex = new EventIndex("sys", body);

}

request.add(new IndexRequest(indexName, indexName)

.source(objectMapper.writeValueAsString(eventIndex), XContentType.JSON));

}

BulkResponse bulkResponse = client.bulk(request, RequestOptions.DEFAULT);

//The Bulk response provides a method to quickly check if one or more operation has failed:

if (bulkResponse.hasFailures()) {

logger.info("all success");

}

TimeValue took = bulkResponse.getTook();

logger.info("[批量新增花费的毫秒]:" + took + "," + took.getMillis() + "," + took.getSeconds());

//所有操作结果进行迭代

/*for (BulkItemResponse bulkItemResponse : bulkResponse) {

if (bulkItemResponse.isFailed()) {

BulkItemResponse.Failure failure = bulkItemResponse.getFailure();

}

}*/

}

3-执行写入event到ES的具体操作

public Status process() throws EventDeliveryException {

logger.info("process elasticsearch sink......");

Status status = Status.READY;

Channel channel = getChannel();

Transaction txn = channel.getTransaction();

List events = Lists.newArrayList();

try {

txn.begin();

int count;

for (count = 0; count < batchSize; ++count) {

//从channel中获取元素,实际是event = queue.poll();如果为空则会抛出异常,会被捕获处理返回null

Event event = channel.take();

if (event == null) {

break;

}

//进行批处理到ES

events.add(event);

}

//当达到设置的批次后进行提交

if (count <= 0) {

sinkCounter.incrementBatchEmptyCount();

counterGroup.incrementAndGet("channel.underflow");

status = Status.BACKOFF;

} else {

if (count < batchSize) {

sinkCounter.incrementBatchUnderflowCount();

status = Status.BACKOFF;

} else {

sinkCounter.incrementBatchCompleteCount();

}

sinkCounter.addToEventDrainAttemptCount(count);

//提交当前批次到ES

bulkExecute(events);

}

txn.commit();

sinkCounter.addToEventDrainSuccessCount(count);

counterGroup.incrementAndGet("transaction.success");

} catch (Throwable ex) {

try {

txn.rollback();

counterGroup.incrementAndGet("transaction.rollback");

} catch (Exception ex2) {

logger.error(

"Exception in rollback. Rollback might not have been successful.",

ex2);

}

if (ex instanceof Error || ex instanceof RuntimeException) {

logger.error("Failed to commit transaction. Transaction rolled back.",

ex);

Throwables.propagate(ex);

} else {

logger.error("Failed to commit transaction. Transaction rolled back.",

ex);

throw new EventDeliveryException(

"Failed to commit transaction. Transaction rolled back.", ex);

}

} finally {

txn.close();

}

return status;

}

4-关闭资源

public synchronized void stop() {

logger.info("stop elasticsearch sink......");

if (client != null) {

try {

client.close();

} catch (IOException e) {

e.printStackTrace();

}

}

sinkCounter.stop();

super.stop();

}

(5) 在ES上建立索引

PUT flume_yz

{

"settings": {

"number_of_replicas": 0,

"number_of_shards": 5,

"index.store.type": "niofs",

"index.query.default_field": "title",

"index.unassigned.node_left.delayed_timeout": "5m"

},

"mappings": {

"flume_yz": {

"dynamic": "strict",

"properties": {

"serviceName": {

"type": "keyword"

},

"body": {

"type": "text",

"analyzer": "ik_smart",

"search_analyzer": "ik_smart"

}

}

}

}

}

(5) 完整配置文件

[avro-es.conf]

agent.sources = elasticsearch_sources01

agent.channels = elasticsearch_channel01

agent.sinks = elasticsearch_sink01

agent.sources.elasticsearch_sources01.type = avro

agent.sources.elasticsearch_sources01.bind = 192.168.88.129

agent.sources.elasticsearch_sources01.port = 4545

agent.sources.elasticsearch_sources01.channels = elasticsearch_channel01

agent.channels.elasticsearch_channel01.type = memory

agent.channels.elasticsearch_channel01.capacity = 1000000

agent.channels.elasticsearch_channel01.transactionCapacity = 6000

agent.sinks.elasticsearch_sink01.type = elasticsearch-high

agent.sinks.elasticsearch_sink01.hostNames = 192.168.88.130:9200

agent.sinks.elasticsearch_sink01.indexName = flume_yz

agent.sinks.elasticsearch_sink01.batchSize = 500

agent.sinks.elasticsearch_sink01.channel = elasticsearch_channel01

(6) 启动flume,启动你的ES

消费agent启动:

./bin/flume-ng agent --conf conf -f /home/elasticsearch/data/flume/avro-es.conf -n agent -Dflume.root.logger=INFO,console

生产agent启动:

./bin/flume-ng agent --conf conf -f /home/hadoop/app/data/taildir-avro.conf -n agent -Dflume.root.logger=INFO,console

(7) 生产日志数据

echo "2019-02-12 10:33:26 [com.faya.data.controller.LoginController]-[INFO] 用户登陆入参:userId = 10" >log.txt

可以去ES的head插件或者kibana上面查看是否有数据已经进去。并且可以通过serviceName

来查询不同系统的日志。

这样我们就可以比较实时的查看不同系统的日志了。

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值