mapreduce下可以启动hadoop的日志合并功能,但是很多不需要存入的info/warn等日志也都合并到了一起,不利于查看程序主动打出的log。
这种应用场景比较多的是适用于etl时有一些原始数据输入格式不符合要求而被过滤的排查。
所以自己调研了一下分布式环境下异步存储日志的开源组件,觉得fluentd这个组件较好。
虽然说flume/logstash之类的肯定能完成相应的功能,但mapreduce用户主动打出的log一般不多,fluentd这种轻量级解决方案非常合适。
使用如下:
1)安装fluentd组件
curl -L https://toolbelt.treasuredata.com/sh/install-redhat-td-agent2.sh | sh
2)启动fluentd
推荐使用supervisor来管理控制。
[root@com supervisor]# cat fluentd.conf
[program:fluentd-ui]
command=/usr/sbin/td-agent-ui start
autostart = true
startsecs = 5
autorestart=true
numprocs=1
user = root
redirect_stderr = true
stdout_logfile = /data/logs/fluentd-ui.log
3)配置fluentd写入hdfs
<source>
type forward
port 24224
</source>
<source>
type http
port 8888
</source>
<source>
type monitor_agent
port 24220
</source>
<source>
type debug_agent
port 24230
</source>
<match debug.*>
type stdout
</match>
<match hdfs.*.*>
type webhdfs
host com.hunantv.hadoopnamenode
port 50070
path /data/etl/logs/%Y%m%d/etl.fluentd.%Y%m%d_%H.${hostname}.log
username root
flush_interval 10s
</match>
4)java插件
4.1 插件类
package com.hunantv.bigdata.etl.common.log;
import org.fluentd.logger.FluentLogger;
import java.io.IOException;
import java.util.Properties;
/**
* Created by lzphoenix on 15/10/29.
*/
public class FluentdLogAppender {
private static final String fluentd_host;
private static final int fluentd_port;
private static final String fluentd_tag;
private static final String fluentd_db_tag;
static {
String filePath = "/fluentd.properties";
Properties properties = new Properties();
try {
properties.load(FluentdLogAppender.class.getResourceAsStream(filePath));
} catch (IOException e) {
e.printStackTrace();
}
fluentd_host = properties.getProperty("fluentd_host");
fluentd_port = Integer.parseInt(properties.getProperty("fluentd_port"));
fluentd_tag = properties.getProperty("fluentd_tag");
fluentd_db_tag = properties.getProperty("fluentd_db_tag");
}
public static FluentLogger getHdfsLogger(){
FluentLogger LOG = FluentLogger.getLogger(fluentd_tag,fluentd_host,fluentd_port);
return LOG;
}
public static FluentLogger getHdfsDBLogger(){
FluentLogger LOG = FluentLogger.getLogger(fluentd_db_tag,fluentd_host,fluentd_port);
return LOG;
}
}
fluentd_host=10.100.1.46
fluentd_port=24224
fluentd_tag=hdfs.etl
fluentd_db_tag=http.db
private FluentLogger fluentLogger = FluentdLogAppender.getHdfsLogger();
fluentLogger.log(*fluentdLogTag,"wrong_line",*lineContent);