从文件名提取日期、小时信息,决定数据发送到hdfs哪天哪小时的分区目录。
需要自定义一个拦截器
package interceptor;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang.StringUtils;
import org.apache.flume.Context;
import org.apache.flume.Event;
import org.apache.flume.interceptor.Interceptor;
import org.apache.flume.interceptor.RegexExtractorInterceptorPassThroughSerializer;
import org.apache.flume.interceptor.RegexExtractorInterceptorSerializer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.base.Charsets;
import com.google.common.base.Preconditions;
import com.google.common.base.Throwables;
import com.google.common.collect.Lists;
/**
* Interceptor that extracts matches using a specified regular expression and
* appends the matches to the event headers using the specified serializers</p>
* Note that all regular expression matching occurs through Java's built in
* java.util.regex package</p>. Properties:
* <p>
* regex: The regex to use
* <p>
* serializers: Specifies the group the serializer will be applied to, and the
* name of the header that will be added. If no serializer is specified for a
* group the default {@link RegexExtractorInterceptorPassThroughSerializer} will
* be used
* <p>
* Sample config:
* <p>
* agent.sources.r1.channels = c1
* <p>
* agent.sources.r1.type = SEQ
* <p>
* agent.sources.r1.interceptors = i1
* <p>
* agent.sources.r1.interceptors.i1.type = REGEX_EXTRACTOR
* <p>
* agent.sources.r1.interceptors.i1.regex = (WARNING)|(ERROR)|(FATAL)
* <p>
* agent.sources.r1.interceptors.i1.serializers = s1 s2
* agent.sources.r1.interceptors.i1.serializers.s1.type = com.blah.SomeSerializer
* agent.sources.r1.interceptors.i1.serializers.s1.name = warning
* agent.sources.r1.interceptors.i1.serializers.s2.type = org.apache.flume.interceptor.RegexExtractorInterceptorTimestampSerializer
* agent.sources.r1.interceptors.i1.serializers.s2.name = error
* agent.sources.r1.interceptors.i1.serializers.s2.dateFormat = yyyy-MM-dd
* </code>
* </p>
* <pre>
* Example 1:
* </p>
* EventBody: 1:2:3.4foobar5</p> Configuration:
* agent.sources.r1.interceptors.i1.regex = (\\d):(\\d):(\\d)
* </p>
* agent.sources.r1.interceptors.i1.serializers = s1 s2 s3
* agent.sources.r1.interceptors.i1.serializers.s1.name = one
* agent.sources.r1.interceptors.i1.serializers.s2.name = two
* agent.sources.r1.interceptors.i1.serializers.s3.name = three
* </p>
* results in an event with the the following
*
* body: 1:2:3.4foobar5 headers: one=>1, two=>2, three=3
*
* Example 2:
*
* EventBody: 1:2:3.4foobar5
*
* Configuration: agent.sources.r1.interceptors.i1.regex = (\\d):(\\d):(\\d)
* <p>
* agent.sources.r1.interceptors.i1.serializers = s1 s2
* agent.sources.r1.interceptors.i1.serializers.s1.name = one
* agent.sources.r1.interceptors.i1.se