flume收集日志到hive
- 切换到root用户
cp /usr/local/hadoop/share/hadoop/common/lib/hadoop-auth-2.7.7.jar /usr/local/flume/lib
cp /usr/local/hadoop/share/hadoop/common/hadoop-common-2.7.7.jar /usr/local/flume/lib
cp /usr/local/hadoop/share/hadoop/hdfs/hadoop-hdfs-2.7.7.jar /usr/local/flume/lib
cp /usr/local/hadoop/share/hadoop/tools/lib/htrace-core-3.1.0-incubating.jar /usr/local/flume/lib
cp /usr/local/hadoop/share/hadoop/tools/lib/commons-io-2.4.jar /usr/local/flume/lib
cd /usr/local/hive/hcatalog/share/hcatalog
cp hive-hcatalog-core-2.1.0.jar /usr/local/flume/lib
cp hive-hcatalog-server-extensions-2.1 /usr/local/flume/lib
cp hive-hcatalog-pig-adapter-2.1.0.jar /usr/local/flume/lib
cp hive-hcatalog-streaming-2.1.0.jar /usr/local/flume/lib
cd /usr/local/hive/conf
-
在hive中加入以下配置:
vim hive-site.xml
<property>
<name>hive.metastore.uris</name>
<value>thrift://master:9083</value>
</property>
- 启动集群并启动spark
cd /usr/local/tomcat/bin
startup.sh
- 启动hive:
cd /usr/local/hive/bin
hive --service metastore
- 启动hiveserver2服务:
hive --service hiveserver2
- 启动hive客户端
hive
- 配置文件
cd /usr/local/flume/conf
touch tomcat_to_hive.conf
vim tomcat_to_hive.conf
- 启动集群并启动spark
-
文件配置:
a1.sources = r1
a1.sinks = k1
a1.channels = c1
a1.sources.r1.type=exec
a1.sources.r1.command=tail -F /usr/local/tomcat/logs/localhost_access_log.2023-03-28.txt
a1.sources.r1.shell = /bin/bash -c
a1.sources.r1.batchSize=100
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100
a1.sinks.k1.type = hive
a1.sinks.k1.batchSize = 50
a1.sinks.k1.hive.metastore = thrift://master:9083
a1.sinks.k1.hive.database = tomcat
a1.sinks.k1.hive.table = weblogs
a1.sinks.k1.autoCreatePartitions = false
a1.sinks.k1.serializer = DELIMITED
a1.sinks.k1.serializer.delimiter = " "
a1.sinks.k1.serializer.serdeSeparator = ' '
a1.sinks.k1.serializer.fieldnames = h,l,u,t,r,s,b
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1
- 进hive客户端
create database tomcat;
use tomcat;
create table weblogs(h string,l string,u string,t string,r string,s string,b string ) clustered by (h) into 5 buckets stored as orc TBLPROPERTIES('transactional'='true');
开启事务对话
set hive.enforce.bucketing = true;
set hive.exec.dynamic.partition.mode = nonstrict;
set hive.txn.manager = org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
set hive.compactor.initiator.on = true;
set hive.compactor.worker.threads = 5;
7启动flume
cd /usr/local/flume/bin
flume-ng agent --name a1 --conf conf --conf-file ../conf/tomcat_to_hive.conf
8查看hive表