#架构图
##source :使用spoolDir 监控目录
##channel :使用kafka
##sink :使用hdfs
---------------------------------------------------------------------------------
#安装配置
#1.vi flume-env.sh
cp flume-env.sh.templte flume-env.sh
vi flume-env.sh
export JAVA_HOME=/opt/bigdata/jdk1.8
export JAVA_OPTS="-Xms1000m -Xmx2000m -Dcom.sun.management.jmxremote"
#2. vi flume-kafka.conf
#sum
agent-kafka.sources = ssp
agent-kafka.channels = ckafka
agent-kafka.sinks = shdfs
#source
agent-kafka.sources.ssp.type = spooldir
agent-kafka.sources.ssp.channels = ckafka
agent-kafka.sources.ssp.spoolDir=/opt/bigdata/logDataDir
agent-kafka.sources.ssp.deserializer.maxLineLength=2048000
agent-kafka.sources.ssp.deserializer.outputCharset=UTF-8
agent-kafka.sources.ssp.includePattern=^.*ing$
#sink
agent-kafka.sinks.shdfs.type = hdfs
agent-kafka.sinks.shdfs.channel = ckafka
agent-kafka.sinks.shdfs.hdfs.path = hdfs://${namenode}:9000/data/erh/click_events/
agent-kafka.sinks.shdfs.hdfs.filePrefix = click_events
#agent-kafka.sinks.shdfs.hdfs.round=true
#agent-kafka.sinks.shdfs.hdfs.roundValue=24
#agent-kafka.sinks.shdfs.hdfs.roundUnit=hour
#agent-kafka.sinks.shdfs.hdfs.useLocalTimeStamp=true
agent-kafka.sinks.shdfs.hdfs.rollSize=134217728
agent-kafka.sinks.shdfs.hdfs.rollCount=0
agent-kafka.sinks.shdfs.hdfs.fileType=DataStream
#channel
agent-kafka.channels.ckafka.type = org.apache.flume.channel.kafka.KafkaChannel
agent-kafka.channels.ckafka.kafka.bootstrap.servers=kafka-1:9092,kafka-2:9092
agent-kafka.channels.ckafka.kafka.topic=spark
agent-kafka.channels.ckafka.kafka.consumer.group.id=flume-consumer
#3. jar包
cd /opt/bigdata/apache-flume
cp /opt/bigdata/hadoop/share/hadoop/common/*.jar lib
cp /opt/bigdata/hadoop/share/hadoop/common/lib/*.jar lib
cp /opt/bigdata/hadoop/share/hadoop/hdfs/*.jar lib
#启动服务
nohup bin/flume-ng agent --conf conf --conf-file conf/flume-kafka.conf --name agent-kafka -Dflume.root.logger=INFO,console > flume.out 2>&1 &
# 扩展hive sink
vi flume-hive.conf
#sum
agent-kafka-hive.sources = source-kafka
agent-kafka-hive.channels = channel-kafka
agent-kafka-hive.sinks = sink-hive
#source
agent-kafka-hive.sources.source-kafka.type = spooldir
agent-kafka-hive.sources.source-kafka.channels = channel-kafka
agent-kafka-hive.sources.source-kafka.spoolDir=/opt/bigdata/logDataDir
agent-kafka-hive.sources.source-kafka.deserializer.maxLineLength=2048000
agent-kafka-hive.sources.source-kafka.deserializer.outputCharset=UTF-8
agent-kafka-hive.sources.source-kafka.includePattern=^.*txt$
#sink
agent-kafka-hive.sinks.sink-hive.type = hive
agent-kafka-hive.sinks.sink-hive.channel = channel-kafka
agent-kafka-hive.sinks.sink-hive.hive.metastore = thrift://master:9083
agent-kafka-hive.sinks.sink-hive.hive.database = erh
agent-kafka-hive.sinks.sink-hive.hive.table = userinfo
agent-kafka-hive.sinks.sink-hive.useLocalTimeStamp = false
agent-kafka-hive.sinks.sink-hive.round = true
agent-kafka-hive.sinks.sink-hive.roundValue = 10
agent-kafka-hive.sinks.sink-hive.roundUnit = minute
agent-kafka-hive.sinks.sink-hive.serializer = DELIMITED
agent-kafka-hive.sinks.sink-hive.serializer.delimiter = ,
agent-kafka-hive.sinks.sink-hive.serializer.serdeSeparator = ,
agent-kafka-hive.sinks.sink-hive.serializer.fieldnames = id,name,age,tel
#channel
agent-kafka-hive.channels.channel-kafka.type = org.apache.flume.channel.kafka.KafkaChannel
agent-kafka-hive.channels.channel-kafka.kafka.bootstrap.servers=master:9092,slave1:9092
agent-kafka-hive.channels.channel-kafka.kafka.topic=flume-hive-channel
agent-kafka-hive.channels.channel-kafka.kafka.consumer.group.id=erh_group
cp ${hadoop_home}/etc/hadoop/core-site.xml conf/
cp ${hadoop_home}/etc/hadoop/hdfs-site.xml conf/
## 启动报错时,根据报错信息将缺失的jarb拷贝到lib目录下即可