flume简例

一、netcat
vim flume01.cnf
#组件声明
a1.sources = s1
a1.channels = c1
a1.sinks = k1
#数据源
a1.sources.s1.type = netcat
a1.sources.s1.bind = 111.111.111.111
a1.sources.s1.port = 6666
#通道
a1.channels.c1.type = memory
a1.channels.c1.capacity = 100
a1.channels.c1.transactionCapacity = 10
#数据槽
a1.sinks.k1.type = logger
#关联组件
a1.sources.s1.channels = c1
a1.sinks.k1.channel = c1
#flume根目录下执行以下命令
flume-ng agent -n a1 -c conf/ -f /root/flume_job/logconf/flume01.conf -Dflume.root.logger=INFO,console
二、spooldir -> hdfs
#组件声明
a1,sources = s1
a1.channels = c1
a1.sinks = k1
#初始化数据源
a1.sources.s1.type = spooldir
a1.sources.s1.spoolDir = /root/data/flume
a1.sources.s1.ignorePattern = ^(.)*\\.bak$
a1.sources.s1.fileSuffix = \\.bak
#初始化通道
a1.channels.c1.type = file
a1.channels.c1.checkpointDir = /opt/software/flume190/mydata/checkpoint
a1.channels.c1.dataDirs = /opt/software/flume190/mydata/data
a1.channels.c1.capacity = 100000
a1.channels.c1.transactionCapacity = 10000
#初始化数据槽
a1.sinks.k1.type = hdfs
s1.sinks.k1.hdfs.path = hdfs://192.168.181.180:9820/flume/events/fakeorder/%Y-%m-%d/%H
s1.sinks.k1.hdfs.round = true
a1.sinks.k1.hdfs.roundValue = 10
a1.sinks.k1.hdfs.rountUnit = minute
a1.sinks.k1.hdfs.filePrifix = log_%Y%m%d_%H
a1.sinks.k1.hdfs.fileSuffix = .log
a1.sinks.k1.hdfs.useLocalTimeStamp = true
a1.sinks.k1.hdfs.writeFormat = Text
a1.sinks.k1.hdfs.rollCount = 0
a1.sinks.k1.hdfs.rollSize = 1000
a1.sinks.k1.hdfs.threadsPool
a1.sinks.k1.hdfs.idleTimeout = 0
a1.sinks.k1.hdfs.minBlockReplicas = 1
#关联组件
a1.sources.s1.channels = c1
a1.sinks.k1.channel = c1
#flume根目录下执行以下命令
flume-ng agent -n a1 -c conf/ -f /root/flume_job/logconf/flume02.conf -Dflume.root.logger=INFO,console
三、avro
#组件声明
a1.sources = s1
a1.channels = c1
a1.sinks = k1
#初始化数据源
a1.sources.s1.type = avro
a1.sources.s1.bind = 192.168.181.180
a1.sources.s1.port = 7777
a1.sources.s1.threads = 5
#初始化通道
a1.channels.c1.type = file
a1.channels.c1.checkpointDir = /opt/software/flume190/mydata/checkpoint
a1.channels.c1.dataDirs = /opt/software/flume190/mydata/data
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100
#初始化数据槽
a1.sinks.k1.type = hdfs
a1.sinks.k1.hdfs.path = hdfs://192.168.140.180:9820/flume/events/avroevent/%Y-%m-%d/%H
a1.sinks.k1.hdfs.round = true
a1.sinks.k1.hdfs.roundValue = 10
a1.sinks.k1.hdfs.roundUnit = minute 
a1.sinks.k1.hdfs.filePrefix = log_%Y%m%d_%H
a1.sinks.k1.hdfs.fileSuffix = .log
a1.sinks.k1.hdfs.useLocalTimeStamp = true
a1.sinks.k1.hdfs.writeFormat = Text
a1.sinks.k1.hdfs.rollCount = 0
a1.sinks.k1.hdfs.rollSize = 134217728
a1.sinks.k1.hdfs.rollInterval = 0
a1.sinks.k1.hdfs.batchSize = 100
a1.sinks.k1.hdfs.threadsPoolSize = 10
a1.sinks.k1.hdfs.idleTimeout = 0
a1.sinks.k1.hdfs.minBlockReplicas = 1
#关联组件
a1.sources.s1.channels = c1
a1.sinks.k1.channel = c1
#flume根目录下执行以下命令
flume-ng agent -n a1 -c conf/ -f /root/flume_job/logconf/flume03.conf -Dflume.root.logger=INFO,console
#另开一个窗口,执行以下命令(1000条数据)
flume-ng avro-client -H 192.168.140.180 -p 7777 -c /conf -F /root/data/flume/prohead1000.copy
四、taildir source
#组件声明
a1.sources = s1
a1.channels = c1
a1.sinks = k1
#初始化数据源
a1.sources.s1.type = taildir
a1.sources.s1.filegroups = f1 f2
a1.sources.s1.filegroups.f1 = /root/data/flume/tail01/pro.*\\.log
a1.sources.s1.filegroups.f2 = /root/data/flume/tail02/.*\\.log
a1.sources.s1.positionFile = /opt/software/flume190/data/taildir/taildir_position.json
#初始化通道
a1.channels.c1.type = file
a1.channels.c1.checkpointDir = /opt/software/flume190/mydata/checkpoint
a1.channels.c1.dataDirs = /opt/software/flume190/mydata/data
a1.channels.c1.capacity = 10000
a1.channels.c1.transactionCapacity = 1000
#初始化数据槽
a1.sinks.k1.type = hdfs
a1.sinks.k1.hdfs.path = hdfs://192.168.140.180:9820/flume/events/tailevent/%Y-%m-%d/%H
a1.sinks.k1.hdfs.round = true
a1.sinks.k1.hdfs.roundValue = 10
a1.sinks.k1.hdfs.roundUnit = minute
a1.sinks.k1.hdfs.filePrefix = log_%Y%m%d_%H
a1.sinks.k1.hdfs.fileSuffix = .log
a1.sinks.k1.hdfs.useLocalTimeStamp = true
a1.sinks.k1.hdfs.writeFormat = Text
a1.sinks.k1.hdfs.rollCount = 0
a1.sinks.k1.hdfs.rollSize = 134217728
a1.sinks.k1.hdfs.rollInterval = 0
a1.sinks.k1.hdfs.batchSize = 1000
a1.sinks.k1.hdfs.threadsPoolSize = 4
a1.sinks.k1.hdfs.idleTimeout = 0
a1.sinks.k1.hdfs.minBlockReplicas = 1
#关联组件
a1.sources.s1.channels = c1
a1.sinks.k1.channel = c1
#flume根目录下执行以下命令
flume-ng agent -n a1 -c conf/ -f /root/flume_job/logconf/flume04.conf -Dflume.root.logger=INFO,console
五、hive sink
#hive  开启事务支持
SET hive.support.concurrency = true;
SET hive.enforce.bucketing = true;
SET hive.exec.dynamic.partition.mode = nonstrict;
SET hive.txn.manager = org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
SET hive.compactor.initiator.on = true;
SET hive.compactor.worker.threads = 1;
#创建hive表
create table familyinfo(
family_id int,
family_name string,
family_age int,
family_gender string
)
partitioned by(intime string)
clustered by(family_gender) into 2 buckets
row format delimited
fields terminated by ','
lines terminated by '\n'
stored as orc
tblproperties('transactional'='true');
#手动添加分区
alter table familyinfo add partition(intime='21-07-05-15');
#flume对hive hcatalog的依赖
cp /opt/software/hive312/hcatalog/share/hcatalog/*.jar /opt/software/flume190/lib/
# vim flume05.conf
#组件声明
a1.sources = s1
a1.channels = c1
a1.sinks = k1
#初始化数据源 taildir
a1.sources.s1.type = taildir
a1.sources.s1.filegroups = f1
a1.sources.s1.filegroups.f1 = /root/data/flume/tail03/.*\\.log
a1.sources.s1.positionFile = /opt/software/flume190/data/taildir/taildir_position.json
a1.sources.s1.batchSize = 10
#初始化管道 file
a1.channels.c1.type = file
a1.channels.c1.checkpointDir = /opt/software/flume190/mydata/checkpoint
a1.channels.c1.dataDirs = /opt/software/flume190/mydata/data
a1.channels.c1.capacity = 100
a1.channels.c1.transactionCapacity = 10
#初始化数据槽 hive sink
a1.sinks.k1.type = hive
a1.sinks.k1.hive.metastore = thrift://192.168.140.180:9083
a1.sinks.k1.hive.database = test
a1.sinks.k1.hive.table = familyinfo
a1.sinks.k1.hive.partition = %y-%m-%d-%H
a1.sinks.k1.useLocalTimeStamp = true
a1.sinks.k1.autoCreatePartitions = false
a1.sinks.k1.batchSize = 10
a1.sinks.k1.round = true
a1.sinks.k1.roundValue = 10
a1.sinks.k1.roundUnit = minute
a1.sinks.k1.serializer = DELIMITED
a1.sinks.k1.serializer.delimiter = ","
a1.sinks.k1.serializer.serdeSeparator = ','
a1.sinks.k1.serializer.fieldnames = family_id,family_name,family_age,family_gender
#关联组件
a1.sources.s1.channels = c1
a1.sinks.k1.channel = c1
#flume根目录下执行以下命令
flume-ng agent -n a1 -c conf/ -f /root/flume_job/logconf/flume05.conf -Dflume.root.logger=INFO,console
六、hbase sink
#hbase表
create 'test':'stubs','base'
# vim flume06.conf
#initialize
a1.sources = s1
a1.channels = c1
a1.sinks = k1
#taildir source 
a1.sources.s1.type = taildir
a1.sources.s1.filegroups = f1
a1.sources.s1.filegroups.f1 = /root/data/flume/tail04/.*\\.log
a1.sources.s1.positionFile = /opt/software/flume190/data/taildir/taildir_position.json
a1.sources.s1.batchSize = 10
#file channel
a1.channels.c1.type = file
a1.channels.c1.checkpointDir = /opt/software/flume190/mydata/checkpoint
a1.channels.c1.dataDirs = /opt/software/flume190/mydata/data
a1.channels.c1.capacity = 100
a1.channels.c1.transactionCapacity = 10
#hive sink
a1.sinks.k1.type = hbase2
a1.sinks.k1.table = test:stubs
a1.sinks.k1.columnFamily = base
a1.sinks.k1.serializer.regex = (.*),(.*),(.*),(.*)
a1.sinks.k1.serializer = org.apache.flume.sink.hbase.RegexHbaseEventSerializer
a1.sinks.k1.serializer.colNames = ROW_KEY,name,age,gender
a1.sinks.k1.serializer.rowKeyIndex = 0
a1.sinks.k1.batchSize = 10
#link channel
a1.sources.s1.channels = c1
a1.sinks.k1.channel = c1
#flume根目录下执行以下命令
flume-ng agent -n a1 -c conf/ -f /root/flume_job/logconf/flume06.conf -Dflume.root.logger=INFO,console
  • 2
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 2
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值