flume mysql json_Flume 实现自己的实时日志(5)

最新推荐文章于 2022-05-28 15:20:37 发布

想跟生活扳手腕

最新推荐文章于 2022-05-28 15:20:37 发布

阅读量139

点赞数

文章标签： flume mysql json

本文链接：https://blog.csdn.net/weixin_31885977/article/details/114497426

版权

本文档详细介绍了如何配置和定制Flume，使其将实时日志发送到Elasticsearch。内容包括配置文件设定、插件开发、依赖引入、代码实现以及在Elasticsearch中建立索引，并提供了完整的配置文件示例。通过这种方式，可以实时监控和搜索不同系统的日志。

摘要由CSDN通过智能技术生成

(1) 定义配置文件

首先要确定哪些参数是需要我们在外部配置文件进行配置的,这里选择最基础的:

类型,集群地址,索引名称,批次,通道

agent.sinks.elasticsearch_sink01.type = elasticsearch_high

agent.sinks.elasticsearch_sink01.hostNames = 192.168.88.130:9200

agent.sinks.elasticsearch_sink01.indexName = flume_yz

agent.sinks.elasticsearch_sink01.batchSize = 500

agent.sinks.elasticsearch_sink01.channel = elasticsearch_channel01

(2) 插件的开发

在SinkType里面新增ElasticSearchHighSink类型，如果不在此处新增，则需要配置全名

称，类似kafka的配置,这里选用使用类型的方式

ELASTICSEARCH_HIGH("org.apache.flume.sink.elasticsearch.high.ElasticSearchHighSink");

同时SinkConfiguration里面新增配置类信息

ELASTICSEARCH_HIGH("org.apache.flume.sink.elasticsearch.high.ElasticSearchHighSinkConfiguration")

(3) 新增依赖

6.4.3

org.elasticsearch.client

elasticsearch-rest-high-level-client

${elasticsearch.version}

注意：升级版本后会有很多包依赖不对,需要自己替换,可以参考github完整项目

(4) 代码实现

这里只是按照自己建的的约定去实现一个简单的sink,和官方的有很大不同,如果

需要可以自己加以改造。这里主要是把日志拆分为两个字段,一个系统来源,一个

完整的日志写入ES中。系统来源字段是在我们改造TAILDIR source基础上自动

追加到日志上面的。

1-初始化执行获取配置信息

public void configure(Context context) {

logger.info("configure elasticsearch......");

//获取配置信息

if (StringUtils.isNotBlank(context.getString(ElasticSearchHighSinkConstants.HOSTNAMES))) {

serverAddresses = StringUtils.deleteWhitespace(

context.getString(ElasticSearchHighSinkConstants.HOSTNAMES)).split(",");

}

Preconditions.checkState(serverAddresses != null

&& serverAddresses.length > 0, "Missing Param:" + ElasticSearchHighSinkConstants.HOSTNAMES);

if (StringUtils.isNotBlank(context.getString(INDEX_NAME))) {

this.indexName = context.getString(INDEX_NAME);

}

if (StringUtils.isNotBlank(context.getString(BATCH_SIZE))) {

this.batchSize = Integer.parseInt(context.getString(BATCH_SIZE));

}

logger.info("获取到ES配置信息,address:"+StringUtils.join(serverAddresses)+",index:"+indexName+",batchSize:"+batchSize);

if (sinkCounter == null) {

sinkCounter = new SinkCounter(getName());

}

Preconditions.checkState(StringUtils.isNotBlank(indexName),

"Missing Param:" + INDEX_NAME);

Preconditions.checkState(batchSize >= 1, BATCH_SIZE

+ " must be greater than 0");

}

2-启动sink的一些配置,初始化ES客户端

@Override

public synchronized void start() {

logger.info("start elasticsearch sink......");

HttpHost[] httpHosts = new HttpHost[serverAddresses.length];

for (int i = 0; i < serverAddresses.length; i++) {

String[] hostPort = serverAddresses[i].trim().split(":");

String host = hostPort[0].trim();

int port = hostPort.length == 2 ? Integer.parseInt(hostPort[1].trim())

: DEFAULT_PORT;

logger.info("elasticsearch host:{},port:{}", host, port);

httpHosts[i] = new HttpHost(host, port, "http");

}

client = new RestHighLevelClient(

RestClient.builder(httpHosts));

sinkCounter.start();

super.start();

}

//ES批处理的方法

public void bulkExecute(List events) throws Exception {

//批量插入数据

BulkRequest request = new BulkRequest();

for (Event event : events) {

//解析event

String body = new String(event.getBody(), charset);

EventIndex eventIndex = null;

if (body.contains("INFO") == true || body.contains("WARN") == true || body.contains("ERROR") == true ||

body.contains("DEBUG") == true) {

//serviceName , body

eventIndex = new EventIndex(body.substring(0, body.indexOf(" ")),

body.substring(body.indexOf(" ") + 1));

} else {

//默认系统名称是sys

eventIndex = new EventIndex("sys", body);

}

request.add(new IndexRequest(indexName, indexName)

.source(objectMapper.writeValueAsString(eventIndex), XContentType.JSON));

}

BulkResponse bulkResponse = client.bulk(request, RequestOptions.DEFAULT);

//The Bulk response provides a method to quickly check if one or more operation has failed:

if (bulkResponse.hasFailures()) {

logger.info("all success");

}

TimeValue took = bulkResponse.getTook();

logger.info("[批量新增花费的毫秒]:" + took + "," + took.getMillis() + "," + took.getSeconds());

//所有操作结果进行迭代

/*for (BulkItemResponse bulkItemResponse : bulkResponse) {

if (bulkItemResponse.isFailed()) {

BulkItemResponse.Failure failure = bulkItemResponse.getFailure();

}

}*/

}

3-执行写入event到ES的具体操作

public Status process() throws EventDeliveryException {

logger.info("process elasticsearch sink......");

Status status = Status.READY;

Channel channel = getChannel();

Transaction txn = channel.getTransaction();

List events = Lists.newArrayList();

try {

txn.begin();

int count;

for (count = 0; count < batchSize; ++count) {

//从channel中获取元素,实际是event = queue.poll();如果为空则会抛出异常,会被捕获处理返回null

Event event = channel.take();

if (event == null) {

break;

}

//进行批处理到ES

events.add(event);

}

//当达到设置的批次后进行提交

if (count <= 0) {

sinkCounter.incrementBatchEmptyCount();

counterGroup.incrementAndGet("channel.underflow");

status = Status.BACKOFF;

} else {

if (count < batchSize) {

sinkCounter.incrementBatchUnderflowCount();

status = Status.BACKOFF;

} else {

sinkCounter.incrementBatchCompleteCount();

}

sinkCounter.addToEventDrainAttemptCount(count);

//提交当前批次到ES

bulkExecute(events);

}

txn.commit();

sinkCounter.addToEventDrainSuccessCount(count);

counterGroup.incrementAndGet("transaction.success");

} catch (Throwable ex) {

try {

txn.rollback();

counterGroup.incrementAndGet("transaction.rollback");

} catch (Exception ex2) {

logger.error(

"Exception in rollback. Rollback might not have been successful.",

ex2);

}

if (ex instanceof Error || ex instanceof RuntimeException) {

logger.error("Failed to commit transaction. Transaction rolled back.",

ex);

Throwables.propagate(ex);

} else {

logger.error("Failed to commit transaction. Transaction rolled back.",

ex);

throw new EventDeliveryException(

"Failed to commit transaction. Transaction rolled back.", ex);

}

} finally {

txn.close();

}

return status;

}

4-关闭资源

public synchronized void stop() {

logger.info("stop elasticsearch sink......");

if (client != null) {

try {

client.close();

} catch (IOException e) {

e.printStackTrace();

}

sinkCounter.stop();

super.stop();

}

(5) 在ES上建立索引

PUT flume_yz

{

"settings": {

"number_of_replicas": 0,

"number_of_shards": 5,

"index.store.type": "niofs",

"index.query.default_field": "title",

"index.unassigned.node_left.delayed_timeout": "5m"

"mappings": {

"flume_yz": {

"dynamic": "strict",

"properties": {

"serviceName": {

"type": "keyword"

"body": {

"type": "text",

"analyzer": "ik_smart",

"search_analyzer": "ik_smart"

}

(5) 完整配置文件

[avro-es.conf]

agent.sources = elasticsearch_sources01

agent.channels = elasticsearch_channel01

agent.sinks = elasticsearch_sink01

agent.sources.elasticsearch_sources01.type = avro

agent.sources.elasticsearch_sources01.bind = 192.168.88.129

agent.sources.elasticsearch_sources01.port = 4545

agent.sources.elasticsearch_sources01.channels = elasticsearch_channel01

agent.channels.elasticsearch_channel01.type = memory

agent.channels.elasticsearch_channel01.capacity = 1000000

agent.channels.elasticsearch_channel01.transactionCapacity = 6000

agent.sinks.elasticsearch_sink01.type = elasticsearch-high

agent.sinks.elasticsearch_sink01.hostNames = 192.168.88.130:9200

agent.sinks.elasticsearch_sink01.indexName = flume_yz

agent.sinks.elasticsearch_sink01.batchSize = 500

agent.sinks.elasticsearch_sink01.channel = elasticsearch_channel01

(6) 启动flume,启动你的ES

消费agent启动:

./bin/flume-ng agent --conf conf -f /home/elasticsearch/data/flume/avro-es.conf -n agent -Dflume.root.logger=INFO,console

生产agent启动:

./bin/flume-ng agent --conf conf -f /home/hadoop/app/data/taildir-avro.conf -n agent -Dflume.root.logger=INFO,console

(7) 生产日志数据

echo "2019-02-12 10:33:26 [com.faya.data.controller.LoginController]-[INFO] 用户登陆入参：userId = 10" >log.txt

可以去ES的head插件或者kibana上面查看是否有数据已经进去。并且可以通过serviceName

来查询不同系统的日志。

这样我们就可以比较实时的查看不同系统的日志了。

想跟生活扳手腕

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
flume mysql json_Flume 实现自己的实时日志(5)

(1) 定义配置文件首先要确定哪些参数是需要我们在外部配置文件进行配置的,这里选择最基础的:类型,集群地址,索引名称,批次,通道agent.sinks.elasticsearch_sink01.type = elasticsearch_highagent.sinks.elasticsearch_sink01.hostNames = 192.168.88.130:9200agent.sinks.el...
复制链接

扫一扫