# Name the components on this agent
a1.sources = r1
a1.sinks = k1
a1.channels = c1
# Describe/configure the source## exec表示flume回去调用给的命令,然后从给的命令的结果中去拿数据
a1.sources.r1.type =exec## 使用tail这个命令来读数据
a1.sources.r1.command =tail -F /opt/bigdata/flume/default/conf/data.txt
a1.sources.r1.channels = c1
# Describe the sink## 表示下沉到hdfs,类型决定了下面的参数
a1.sinks.k1.type = hdfs
## sinks.k1只能连接一个channel,source可以配置多个
a1.sinks.k1.channel = c1
## 下面的配置告诉用hdfs去写文件的时候写到什么位置,下面的表示不是写死的,而是可以动态的变化的。表示输出的目录名称是可变的
a1.sinks.k1.hdfs.path = /data/flume/logs/%y-%m-%d/%H%M/
##表示最后的文件的前缀
a1.sinks.k1.hdfs.filePrefix = logs-
## 表示到了需要触发的时间时,是否要更新文件夹,true:表示要
a1.sinks.k1.hdfs.round =true## 表示每隔1分钟改变一次
a1.sinks.k1.hdfs.roundValue = 1
## 切换文件的时候的时间单位是分钟
a1.sinks.k1.hdfs.roundUnit = minute
## 表示只要过了3秒钟,就切换生成一个新的文件
a1.sinks.k1.hdfs.rollInterval = 3
## 如果记录的文件大于20字节时切换一次
a1.sinks.k1.hdfs.rollSize = 20
## 当写了5个事件时触发
a1.sinks.k1.hdfs.rollCount = 5
## 收到了多少条消息往dfs中追加内容
a1.sinks.k1.hdfs.batchSize = 10
## 使用本地时间戳
a1.sinks.k1.hdfs.useLocalTimeStamp =true#生成的文件类型,默认是Sequencefile,可用DataStream:为普通文本
a1.sinks.k1.hdfs.fileType = DataStream
# Use a channel which buffers events in memory##使用内存的方式,内存缓存的数据,重启会丢失,file channel会更可靠,速度低于内存
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100
# Bind the source and sink to the channel
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1
启动测试
启动命令,log在flume家目录下的logs目录里
$FLUME_HOME/bin/flume-ng agent -c conf -f conf/file-to-hdfs.conf -n a1
[root@ecs-6531-0002 default]# bin/flume-ng agent -c conf -f conf/file-to-hdfs.conf -n a1
Info: Including Hadoop libraries found via (/opt/bigdata/hadoop/default/bin/hadoop)for HDFS access
Info: Including Hive libraries found via (/opt/bigdata/hive/default)for Hive access
+ exec /usr/local/java_1.8.0_121/bin/java -Xmx20m -cp '/opt/bigdata/flume/default/conf:/opt/bigdata/flume/default/lib/*:/opt/bigdata/hadoop/default/etc/hadoop:/opt/bigdata/hadoop/default/share/hadoop/common/lib/*:/opt/bigdata/hadoop/default/share/hadoop/common/*:/opt/bigdata/hadoop/default/share/hadoop/hdfs:/opt/bigdata/hadoop/default/share/hadoop/hdfs/lib/*:/opt/bigdata/hadoop/default/share/hadoop/hdfs/*:/opt/bigdata/hadoop/default/share/hadoop/mapreduce/lib/*:/opt/bigdata/hadoop/default/share/hadoop/mapreduce/*:/opt/bigdata/hadoop/default/share/hadoop/yarn:/opt/bigdata/hadoop/default/share/hadoop/yarn/lib/*:/opt/bigdata/hadoop/default/share/hadoop/yarn/*:/opt/bigdata/hive/default/lib/*' -Djava.library.path=:/opt/bigdata/hadoop/default/lib org.apache.flume.node.Application -f conf/file-to-hdfs.conf -n a1
SLF4J: Class path contains multiple SLF4J bindings.
SLF4J: Found binding in[jar:file:/opt/bigdata/flume/apache-flume-1.9.0-bin/lib/slf4j-log4j12-1.7.25.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in[jar:file:/opt/bigdata/hadoop/hadoop-3.2.0/share/hadoop/common/lib/slf4j-log4j12-1.7.25.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in[jar:file:/opt/bigdata/hive/apache-hive-2.3.5-bin/lib/log4j-slf4j-impl-2.6.2.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
SLF4J: Actual binding is of type[org.slf4j.impl.Log4jLoggerFactory]