flume收集日志到hive

1dress

于 2023-09-17 12:28:26 发布

阅读量159

点赞数

文章标签： flume hive 大数据

本文链接：https://blog.csdn.net/m0_60595687/article/details/132941106

版权

flume收集日志到hive

切换到root用户
cp /usr/local/hadoop/share/hadoop/common/lib/hadoop-auth-2.7.7.jar /usr/local/flume/lib
cp /usr/local/hadoop/share/hadoop/common/hadoop-common-2.7.7.jar /usr/local/flume/lib
cp /usr/local/hadoop/share/hadoop/hdfs/hadoop-hdfs-2.7.7.jar /usr/local/flume/lib
cp /usr/local/hadoop/share/hadoop/tools/lib/htrace-core-3.1.0-incubating.jar /usr/local/flume/lib
cp /usr/local/hadoop/share/hadoop/tools/lib/commons-io-2.4.jar /usr/local/flume/lib

cd /usr/local/hive/hcatalog/share/hcatalog
cp hive-hcatalog-core-2.1.0.jar /usr/local/flume/lib
cp hive-hcatalog-server-extensions-2.1 /usr/local/flume/lib
cp hive-hcatalog-pig-adapter-2.1.0.jar /usr/local/flume/lib
cp hive-hcatalog-streaming-2.1.0.jar /usr/local/flume/lib

cd /usr/local/hive/conf

在hive中加入以下配置:
vim hive-site.xml
<property>
<name>hive.metastore.uris</name>
<value>thrift://master:9083</value>
</property>
1. 启动集群并启动spark
  cd /usr/local/tomcat/bin
  startup.sh
2. 启动hive:
  cd /usr/local/hive/bin
  hive --service metastore
3. 启动hiveserver2服务:
  hive --service hiveserver2
4. 启动hive客户端
  hive
5. 配置文件
  cd /usr/local/flume/conf
  touch tomcat_to_hive.conf
  vim tomcat_to_hive.conf
文件配置：

a1.sources = r1
a1.sinks = k1
a1.channels = c1

a1.sources.r1.type=exec
a1.sources.r1.command=tail  -F  /usr/local/tomcat/logs/localhost_access_log.2023-03-28.txt
a1.sources.r1.shell = /bin/bash -c
a1.sources.r1.batchSize=100

a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100

a1.sinks.k1.type = hive
a1.sinks.k1.batchSize = 50
a1.sinks.k1.hive.metastore = thrift://master:9083
a1.sinks.k1.hive.database = tomcat
a1.sinks.k1.hive.table = weblogs
a1.sinks.k1.autoCreatePartitions = false
a1.sinks.k1.serializer = DELIMITED
a1.sinks.k1.serializer.delimiter = " "
a1.sinks.k1.serializer.serdeSeparator = ' '
a1.sinks.k1.serializer.fieldnames = h,l,u,t,r,s,b


a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1

进hive客户端
create database tomcat;
use tomcat;
create table weblogs(h string,l string,u string,t string,r string,s string,b string ) clustered by (h) into 5 buckets stored as orc TBLPROPERTIES('transactional'='true');
开启事务对话

set hive.enforce.bucketing = true;
set hive.exec.dynamic.partition.mode = nonstrict;
set hive.txn.manager = org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
set hive.compactor.initiator.on = true;
set hive.compactor.worker.threads = 5;

7启动flume
cd /usr/local/flume/bin
flume-ng agent --name a1 --conf conf --conf-file ../conf/tomcat_to_hive.conf
8查看hive表