flume1.9 安装配置

#架构图

 

##source :使用spoolDir 监控目录

##channel :使用kafka

##sink  :使用hdfs

---------------------------------------------------------------------------------
#安装配置

#1.vi flume-env.sh

cp flume-env.sh.templte flume-env.sh

vi flume-env.sh

export JAVA_HOME=/opt/bigdata/jdk1.8

export JAVA_OPTS="-Xms1000m -Xmx2000m -Dcom.sun.management.jmxremote"

#2. vi flume-kafka.conf

#sum

agent-kafka.sources = ssp 
agent-kafka.channels = ckafka 
agent-kafka.sinks = shdfs 
#source 
agent-kafka.sources.ssp.type = spooldir
agent-kafka.sources.ssp.channels = ckafka
agent-kafka.sources.ssp.spoolDir=/opt/bigdata/logDataDir

agent-kafka.sources.ssp.deserializer.maxLineLength=2048000

agent-kafka.sources.ssp.deserializer.outputCharset=UTF-8
agent-kafka.sources.ssp.includePattern=^.*ing$ 
#sink 
agent-kafka.sinks.shdfs.type = hdfs 
agent-kafka.sinks.shdfs.channel = ckafka 
agent-kafka.sinks.shdfs.hdfs.path = hdfs://${namenode}:9000/data/erh/click_events/ 
agent-kafka.sinks.shdfs.hdfs.filePrefix = click_events
#agent-kafka.sinks.shdfs.hdfs.round=true 
#agent-kafka.sinks.shdfs.hdfs.roundValue=24 
#agent-kafka.sinks.shdfs.hdfs.roundUnit=hour 
#agent-kafka.sinks.shdfs.hdfs.useLocalTimeStamp=true 

agent-kafka.sinks.shdfs.hdfs.rollSize=134217728

agent-kafka.sinks.shdfs.hdfs.rollCount=0

agent-kafka.sinks.shdfs.hdfs.fileType=DataStream
#channel 
agent-kafka.channels.ckafka.type = org.apache.flume.channel.kafka.KafkaChannel 
agent-kafka.channels.ckafka.kafka.bootstrap.servers=kafka-1:9092,kafka-2:9092 
agent-kafka.channels.ckafka.kafka.topic=spark 
agent-kafka.channels.ckafka.kafka.consumer.group.id=flume-consumer

#3. jar包

cd /opt/bigdata/apache-flume

cp /opt/bigdata/hadoop/share/hadoop/common/*.jar lib

cp /opt/bigdata/hadoop/share/hadoop/common/lib/*.jar lib

cp /opt/bigdata/hadoop/share/hadoop/hdfs/*.jar lib

 

#启动服务

nohup bin/flume-ng agent --conf conf --conf-file conf/flume-kafka.conf --name agent-kafka -Dflume.root.logger=INFO,console > flume.out 2>&1 &

# 扩展hive sink

vi flume-hive.conf

#sum
agent-kafka-hive.sources = source-kafka
agent-kafka-hive.channels = channel-kafka
agent-kafka-hive.sinks = sink-hive

#source
agent-kafka-hive.sources.source-kafka.type = spooldir
agent-kafka-hive.sources.source-kafka.channels = channel-kafka
agent-kafka-hive.sources.source-kafka.spoolDir=/opt/bigdata/logDataDir
agent-kafka-hive.sources.source-kafka.deserializer.maxLineLength=2048000
agent-kafka-hive.sources.source-kafka.deserializer.outputCharset=UTF-8
agent-kafka-hive.sources.source-kafka.includePattern=^.*txt$

#sink
agent-kafka-hive.sinks.sink-hive.type = hive
agent-kafka-hive.sinks.sink-hive.channel = channel-kafka
agent-kafka-hive.sinks.sink-hive.hive.metastore = thrift://master:9083
agent-kafka-hive.sinks.sink-hive.hive.database = erh
agent-kafka-hive.sinks.sink-hive.hive.table = userinfo
agent-kafka-hive.sinks.sink-hive.useLocalTimeStamp = false
agent-kafka-hive.sinks.sink-hive.round = true
agent-kafka-hive.sinks.sink-hive.roundValue = 10
agent-kafka-hive.sinks.sink-hive.roundUnit = minute
agent-kafka-hive.sinks.sink-hive.serializer = DELIMITED
agent-kafka-hive.sinks.sink-hive.serializer.delimiter = ,
agent-kafka-hive.sinks.sink-hive.serializer.serdeSeparator = ,
agent-kafka-hive.sinks.sink-hive.serializer.fieldnames = id,name,age,tel

#channel
agent-kafka-hive.channels.channel-kafka.type = org.apache.flume.channel.kafka.KafkaChannel
agent-kafka-hive.channels.channel-kafka.kafka.bootstrap.servers=master:9092,slave1:9092
agent-kafka-hive.channels.channel-kafka.kafka.topic=flume-hive-channel
agent-kafka-hive.channels.channel-kafka.kafka.consumer.group.id=erh_group

cp ${hadoop_home}/etc/hadoop/core-site.xml  conf/

cp ${hadoop_home}/etc/hadoop/hdfs-site.xml  conf/

## 启动报错时,根据报错信息将缺失的jarb拷贝到lib目录下即可

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

吃火锅的胖纸

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值