hbase中键个表
create ‘test:stuflumehbasesink’,‘base’
创建hbase配置文件test06_taildir_file_hive.conf
#initialize
a1.sources = s1
a1.channels = c1
a1.sinks = k1
#taildir source
a1.sources.s1.type = taildir
a1.sources.s1.filegroups = f1
a1.sources.s1.filegroups.f1 = /root/data/flume/tail04/.*.log #这里面是要插入hbase表 各字段的值信息文件
a1.sources.s1.positionFile = /opt/software/flume190/data/taildir/daildir_position.json
a1.sources.s1.batchSize = 10
#file channel
a1.channels.c1.type = file
a1.channels.c1.checkpointDir = /opt/software/flume190/mydata/checkpoint02
a1.channels.c1.dataDirs = /opt/software/flume190/mydata/data
a1.channels.c1.capacity = 100
a1.channels.c1.transactionCapacity = 10
#hive sink
a1.sinks.k1.type = hive
a1.sinks.k1.hive.metastore = thrift://192.168.75.245:9083
a1.sinks.k1.hive.database = test
a1.sinks.k1.hive.table = familyinfo
a1.sinks.k1.hive.partition = %y-%m-%d/%H
a1.sinks.k1.useLocalTimeStamp = true
a1.sinks.k1.autoCreatePartitions = false
a1.sinks.k1.batchSize = 10
a1.sinks.k1.round = true
a1.sinks.k1.roundValue = 10
a1.sinks.k1.roundUnit = minute
a1.sinks.k1.serializer = DELIMITED
a1.sinks.k1.serializer.delimiter = ","
a1.sinks.k1.serializer.serdeSeparator = ','
a1.sinks.k1.serializer.fieldnames = family_id,family_name,family_age,family_gender
#hive sink
a1.sinks.k1.type = hbase2
a1.sinks.k1.table = test:stuflumehbasesink
a1.sinks.k1.columnFamily = base
a1.sinks.k1.serializer.regex = (.*),(.*),(.*),(.*)
a1.sinks.k1.serializer = org.apache.flume.sink.hbase.RegexHbaseEventSerializer
a1.sinks.k1.serializer.colNames = ROW_KEY,name,age,gender
a1.sinks.k1.serializer.rowKeyIndex = 0
a1.sinks.k1.batchSize = 10
#connection to channel
a1.sources.s1.channels = c1
a1.sinks.k1.channel = c1
运行命令:
flume-ng agent -n a1 -c conf/ -f /opt/software/flume190/flume-conf-files/test05_taildir_file_hive.conf -Dflume.root.logger=INFO,console