将centos7虚拟机“文件中的表信息“用flume导入到hive表中

flume hive sink要求hive的表格式

hive table
parition
bucket
orc

查看端口

#netstat -nl|grep 9083 :metastore服务

flume对hive hcatalog依赖

cd /opt/software/flume190/lib/
cp /opt/software/hive312/hcatalog/share/hcatalog/*.jar ./

开启hive事务支持 在hive中运行以下命令

SET hive.support.concurrency = true;
SET hive.enforce.bucketing = true;
SET hive.exec.dynamic.partition.mode = nonstrict;
SET hive.txn.manager = org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
SET hive.compactor.initiator.on = true;
SET hive.compactor.worker.threads = 1;

创建hive表 在hive中创建表

create table familyinfo(
family_id int,
family_name string,
family_age int,
family_gender string
)
partitioned by(intime string)
clustered by(family_gender) into 2 buckets
row format delimited
fields terminated by ‘,’
lines terminated by ‘\n’
stored as orc
tblproperties(‘transactional’=‘true’);

根据当前日期时间手动添加分区 给表添加分区

alter table familyinfo add partition(intime=‘21-07-05-16’);

创建hive配置文件test05_taildir_file_hive.conf

#initialize
a1.sources = s1
a1.channels = c1
a1.sinks = k1

#taildir source
a1.sources.s1.type = taildir
a1.sources.s1.filegroups = f1
a1.sources.s1.filegroups.f1 = /root/data/flume/tail03/.*.log
a1.sources.s1.positionFile = /opt/software/flume190/data/taildir/daildir_position.json
a1.sources.s1.batchSize = 10

#file channel
a1.channels.c1.type = file
a1.channels.c1.checkpointDir = /opt/software/flume190/mydata/checkpoint02
a1.channels.c1.dataDirs = /opt/software/flume190/mydata/data
a1.channels.c1.capacity = 100
a1.channels.c1.transactionCapacity = 10

#hive sink
a1.sinks.k1.type = hive
a1.sinks.k1.hive.metastore = thrift://192.168.75.245:9083
a1.sinks.k1.hive.database = test
a1.sinks.k1.hive.table = familyinfo
a1.sinks.k1.hive.partition = %y-%m-%d/%H
a1.sinks.k1.useLocalTimeStamp = true
a1.sinks.k1.autoCreatePartitions = false

a1.sinks.k1.batchSize = 10
a1.sinks.k1.round = true
a1.sinks.k1.roundValue = 10
a1.sinks.k1.roundUnit = minute
a1.sinks.k1.serializer = DELIMITED
a1.sinks.k1.serializer.delimiter = ","
a1.sinks.k1.serializer.serdeSeparator = ','
a1.sinks.k1.serializer.fieldnames = family_id,family_name,family_age,family_gender

#connection to channel
a1.sources.s1.channels = c1
a1.sinks.k1.channel = c1

运行命令

flume-ng agent -n a1 -c conf/ -f /opt/software/flume190/flume-conf-files/test05_taildir_file_hive.conf -Dflume.root.logger=INFO,console;

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值