注:这个模拟实际上也相当于是将 Flume 日志输出到Hdfs中,然后再通过Hive外部表关联Hdfs对应的路径而已。
- 配置文件:
# Name the components on this agent
a1.sources = r1
a1.sinks = k1
a1.channels = c1
# Describe/configure the source
a1.sources.r1.type = exec
a1.sources.r1.command = tail -F /usr/hdp/2.5.3.0-37/flume/conf/demo/test/log.log
# Describe the sink
a1.sinks.k1.type = hdfs
a1.sinks.k1.hdfs.path = hdfs://hdp39:8020/tmp/flumetest
a1.sinks.k1.hdfs.fileType = DataStream
a1.sinks.k1.hdfs.writeFormat = Text
a1.sinks.k1.hdfs.rollSize=10240
tier1.sinks.sink1.hdfs.idleTimeout=60
# Use a channel which buffers events in memory
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100
# Bind the source and sink to the channel
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1
- 启动 flume agent
flume-ng agent -n a1 -c ../conf -f exec2hive -Dflume.root.logger=DEBUG,console
往log.log中追加数据,模拟日志的生成
echo hello,flume! >> log.log
echo hello,flume!! >> log.log
创建hive 外部表
create external table flume1(info string)
row format delimited
fields terminated by '\t'
location '/tmp/flumetest/';
查询hive
hive> select * from flume1;
OK
hello,flume!
hello,flume!
hello,flumemore log.log more log.log
hello,flumemore log.log more log.log
hello