收集的数据为json格式,需要解析成以|分隔的字符串
参考:http://blog.csdn.net/zfszhangyuan/article/details/52449060
pom:
<dependency>
<groupId>org.apache.flume</groupId>
<artifactId>flume-ng-core</artifactId>
<version>1.7.0</version>
</dependency>
代码:
package flume;
import com.google.common.base.Charsets;
import com.google.common.collect.Lists;
import org.apache.flume.Context;
import org.apache.flume.Event;
import org.apache.flume.interceptor.Interceptor;
import java.text.SimpleDateFormat;
import java.util.List;
public class LogAnalysis implements Interceptor {
private LogAnalysis() {
}
@Override
public void initialize() {
// NO-OP...
}
@Override
public void close() {
// NO-OP...
}
@Override
public Event intercept(Event event) {
/**event:
* [{"body":'{"f_time":1515058629,"f_server_lan_address":"","f_server_wan_address":"101.37.75.140","f_params":{"f_dept":"2","f_character_grade":"20","f_server_address_id":"77","f_character_ip":"123.55.182.138","f_character_name":"横扫八荒","f_opt_type":"3","f_yuanbao_after":"205","f_channel":"9","f_sid":"77","f_yunying_id":"989c9961d30c5b76c79d2f9ba6410512","f_character_id":"77002097","f_yuanbao":"10"},"f_game_id":1,"f_log_name":"log_jinbi"}'}]
*/
String body = new String(event.getBody(), Charsets.UTF_8);
SimpleDateFormat timeFormater = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
String str = body.replaceAll("\\{|\\}", "");
StringBuffer bodymid = new StringBuffer();
String[] list1 = str.split(",");
/**
* f_time是PHP时间戳(9位),加三个0可以转换为时间
*/
String f_time = list1[0].split(":")[1];
long l = Long.parseLong(f_time + "000");
String time = timeFormater.format(l);
for (String str1 : list1) {
String[] list = str1.split(":");
bodymid.append("|" + list[1].replace("\"", ""));
}
String bodyoutput = bodymid.toString().replace(f_time, time).substring(1);
event.setBody(bodyoutput.getBytes());
return event;
}
@Override
public List<Event> intercept(List<Event> events) {
List<Event> intercepted = Lists.newArrayListWithCapacity(events.size());
for (Event event : events) {
Event interceptedEvent = intercept(event);
if (interceptedEvent != null) {
intercepted.add(interceptedEvent);
}
}
return intercepted;
}
public static class Builder implements Interceptor.Builder {
//使用Builder初始化Interceptor
@Override
public Interceptor build() {
return new LogAnalysis();
}
@Override
public void configure(Context context) {
}
}
}
conf:
#flume flile
agent.sources=httpSrc
agent.channels=c1
agent.sinks=k1
agent.sources.httpSrc.type=http
agent.sources.httpSrc.bind=172.16.90.62
agent.sources.httpSrc.port=55555
agent.sources.httpSrc.channels=c1
agent.sources.httpSrc.interceptors = i2
agent.sources.httpSrc.interceptors.i2.type = flume.LogAnalysis$Builder
agent.sources.httpSrc.interceptors.i2.regex = ([^+]*)log_
agent.sources.httpSrc.interceptors.i2.serializers = s1 s2
agent.sources.httpSrc.interceptors.i2.serializers.s1.name = data
agent.sources.httpSrc.interceptors.i2.serializers.s2.name = type
agent.channels.c1.type=memory
agent.channels.c1.capacity=100000
agent.channels.c1.transactionCapacity=100000
agent.sinks.k1.type = flume.MySinks
agent.sinks.k1.sink.directory=/data/flume_data/
agent.sinks.k1.channel=c1
agent.sinks.k1.sink.rollInterval=300
2017-05-18 03:40:05||120.27.248.131|f_dept|66|-245486|37|182.134.22.141|冷冷 雪儿|1|17|0|37|2bb3fd5f623eaa2f|37000270|1|log_jinbi
因为是json的格式可以使用阿里fastjson处理,但是我们要求各个字段的顺序不能打乱,只能以普通字符串处理,如果不要求顺序可以使用fastjson处理
public Event intercept(Event event):此方法是针对单个event处理,其它业务处理也可以写在此方法体内
public List<Event> intercept(List<Event> events) :此方法是针对批event处理,其它业务处理也可以写在此方法体内