1. 下载安装
官网下载压缩包,解压至指定目录即可,略。
2. 配置环境变量
打开并进入文件:etc/profile
添加如下:
#Java环境变量
export JAVA_HOME=/usr/lib/jvm/java-1.6.0-openjdk-1.6.0.0.x86_64/jre
export CLASSPATH=.:$CLASSPATH:$JAVA_HOME/lib
export PATH=$PATH:$JAVA_HOME/bin
#Flume环境变量
export FLUME_HOME=/usr/flume/apache-flume-1.3.1-bin
export FLUME_CONF_DIR=$FLUME_HOME/conf
export PATH=$PATH:$FLUME_HOME/bin
3. 配置文件修改
进入flume的配置文件目录:conf,添加文件agent.conf,其内如如下:
#数据源、接收器及传输通道名称定义
a1.sources = r1
a1.sinks = k1
a1.channels = c1
# exec 数据源定义
#a1.sources.r1.type = exec
#a1.sources.r1.command = tail -F /usr/dymq/log/report.log
#a1.sources.r1.restart = true
#a1.sources.r1.logStdErr = true
#a1.sources.r1.batchSize = 100
#spool数据源定义
a1.sources.r1.type = spooldir
#是否在头中添加文件名
a1.sources.r1.fileHeader = false
#文件名
a1.sources.r1.fileHearder = filename
#数据源目录
a1.sources.r1.spoolDir = /usr/dymq/log
#向缓冲区提交的最大行的字符长度
a1.sources.r1.bufferMaxLineLength = 60000
a1.sources.r1.batchSize = 50
#每次向缓冲区可提交最大行数
a1.sources.r1.bufferMaxLines = 50
#avro数据源定义
#a1.sources.r1.type = avro
#a1.sources.r1.bind = 0.0.0.0
#a1.sources.r1.port = 4141
#netcat 数据源定义
#a1.sources.r1.type = netcat
#a1.sources.r1.bind = localhost
#a1.sources.r1.port = 44444
# Describe the sink
#hdfs接收器定义
a1.sinks.k1.type = hdfs
a1.sinks.k1.hdfs.path = hdfs://192.168.18.1:9000/flume/
a1.sinks.k1.hdfs.filePrefix = flumefile
a1.sinks.k1.hdfs.fileSuffix = .txt
a1.sinks.k1.hdfs.writeFormat = Text
a1.sinks.k1.hdfs.fileType = DataStream
a1.sinks.k1.sink.serializer = TEXT
a1.sinks.k1.sink.serializer.appendNewline = false
a1.sinks.k1.hdfs.rollSize = 20000000
a1.sinks.k1.hdfs.rollCount = 150000
a1.sinks.k1.hdfs.rollInterval = 60
#file接收器定义
#a1.sinks.k1.type = file_roll
#a1.sinks.k1.sink.directory=/usr/logs
#a1.sinks.k1.sink.serializer = text
#a1.sinks.k1.sink.serializer.appendNewline = true
#终端打印接收器定义
#a1.sinks.k2.type = logger
#memory传输通道定义
a1.channels.c1.type = memory
a1.channels.c1.keep-alive = 10
a1.channels.c1.capacity = 20000
a1.channels.c1.transactionCapacity = 5000
#文件型 传输通道定义
#a1.channels.c1.type = file
#a1.channels.c1.capacity = 1000000
#a1.channels.c1.transactionCapacity = 100000
#将数据源和接收器绑定到传输通道
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1