有朋友提问flume开发环境怎么搭建的,给个pom文件放着,只能帮到这了
- <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
- xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
- <modelVersion>4.0.0</modelVersion>
- <groupId>com.hupu.dace</groupId>
- <artifactId>flume-plugins</artifactId>
- <version>1.0</version>
- <packaging>jar</packaging>
- <name>kafka message key interceptor</name>
- <url>http://maven.apache.org</url>
- <!-- 基于项目配置私服信息 -->
- <repositories>
- <repository>
- <id>cloudera</id>
- <url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
- </repository>
- </repositories>
- <properties>
- <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
- </properties>
- <dependencies>
- <dependency>
- <groupId>junit</groupId>
- <artifactId>junit</artifactId>
- <version>3.8.1</version>
- <scope>test</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-common</artifactId>
- <version>2.5.0-cdh5.3.0</version>
- <!--<scope>provided</scope>-->
- </dependency>
- <!--<dependency>-->
- <!--<groupId>org.apache.hadoop</groupId>-->
- <!--<artifactId>hadoop-hdfs</artifactId>-->
- <!--<version>2.5.0-cdh5.3.0</version>-->
- <!--<scope>provided</scope>-->
- <!--</dependency>-->
- <dependency>
- <groupId>org.apache.flume</groupId>
- <artifactId>flume-ng-core</artifactId>
- <version>1.5.0-cdh5.3.0</version>
- <scope>provided</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.flume.flume-ng-sources</groupId>
- <artifactId>flume-kafka-source</artifactId>
- <version>1.5.0-cdh5.3.0</version>
- <scope>provided</scope>
- </dependency>
- <dependency>
- <groupId>org.jvnet.hudson</groupId>
- <artifactId>ganymed-ssh2</artifactId>
- <version>build210-hudson-1</version>
- </dependency>
- </dependencies>
- <build>
- <plugins>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-dependency-plugin</artifactId>
- <executions>
- <execution>
- <id>copy</id>
- <phase>package</phase>
- <goals>
- <goal>copy-dependencies</goal>
- </goals>
- <configuration>
- <outputDirectory>
- ${project.build.directory}/lib/
- </outputDirectory>
- <includeScope>compile</includeScope>
- <includeArtifactIds>ganymed-ssh2</includeArtifactIds>
- </configuration>
- </execution>
- </executions>
- </plugin>
- </plugins>
- <pluginManagement>
- <plugins>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-compiler-plugin</artifactId>
- <version>2.3.2</version>
- <configuration>
- <source>1.6</source>
- <target>1.6</target>
- </configuration>
- </plugin>
- </plugins>
- </pluginManagement>
- </build>
- </project>
再给个自定义拦截器实现:
- import com.google.common.collect.Lists;
- import org.apache.flume.Context;
- import org.apache.flume.Event;
- import org.apache.flume.interceptor.Interceptor;
- import java.util.List;
- import java.util.Map;
- /**
- * 该拦截器将kafka message的key按照指定分隔符分隔成一个一个的event header key,在sink中可以通过%{}方式来引用
- */
- public class KafkaMessageKeyInterceptor implements Interceptor {
- private final String splitChar;
- /**
- * Only {@link com.hupu.dace.flume.interceptors.KafkaMessageKeyInterceptor.Builder} can build me
- */
- private KafkaMessageKeyInterceptor(String splitChar) {
- this.splitChar = splitChar;
- }
- public void initialize() {
- // no-op
- }
- /**
- * Modifies events in-place.
- */
- public Event intercept(Event event) {
- Map<String, String> headers = event.getHeaders();
- if (headers.containsKey(Constants.KAFKA_MESSAGE_KEY)) {
- String kafkaMessageKey = headers.get(Constants.KAFKA_MESSAGE_KEY); //从header中获取kafka message key
- String[] values = kafkaMessageKey.split(splitChar); //按照指定的分隔符将key拆分
- for (int i = 0; i < values.length; i++) {
- headers.put(Constants.SPLIT_KEY_PREFIX + i, values[i]);//拆分出来的每一个片段,按指定的前缀加上序号,写入event header
- }
- return event;
- } else {
- return null;
- }
- }
- /**
- * Delegates to {@link #intercept(Event)} in a loop.
- *
- * @param events
- * @return
- */
- public List<Event> intercept(List<Event> events) {
- List<Event> out = Lists.newArrayList();
- for (Event event : events) {
- Event outEvent = intercept(event);
- if (outEvent != null) {
- out.add(outEvent);
- }
- }
- return out;
- }
- public void close() {
- // no-op
- }
- public static class Builder implements Interceptor.Builder {
- private String splitChar = Constants.DEFAULT_SPLIT_CHAR;
- public Interceptor build() {
- return new KafkaMessageKeyInterceptor(splitChar);
- }
- public void configure(Context context) {
- splitChar = context.getString(Constants.SPLIT_CHAR, Constants.DEFAULT_SPLIT_CHAR);
- }
- }
- public static class Constants {
- public static String KAFKA_MESSAGE_KEY = "key"; //kafka source会将message key写进event header中一个叫"key"的header
- public static String SPLIT_CHAR = "split";
- public static String DEFAULT_SPLIT_CHAR = ":";
- public static String SPLIT_KEY_PREFIX = "s";
- }
- }
http://blog.csdn.net/xiao_jun_0820/article/details/50628349