Flume配置案例(四)-Hive篇

 官方文档(中文)

Flume 1.9用户手册中文版 — 可能是目前翻译最完整的版本了

官方文档(英文)

Flume 1.9.0 User Guide — Apache Flume

exec-hive

1、jar包前置准备:

将/usr/local/soft/apache-hive-3.1.1-bin/hcatalog/share/hcatalog/下面的所有jar放入到/usr/local/soft/apache-flume-1.9.0-bin/lib/路径下

2、Hive-site.xml添加配置

hive配置文件中完整如下配置:

<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?><!--
   Licensed to the Apache Software Foundation (ASF) under one or more
   contributor license agreements.  See the NOTICE file distributed with
   this work for additional information regarding copyright ownership.
   The ASF licenses this file to You under the Apache License, Version 2.0
   (the "License"); you may not use this file except in compliance with
   the License.  You may obtain a copy of the License at
​
       http://www.apache.org/licenses/LICENSE-2.0
​
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.
-->
<configuration>
  <property>
    <name>javax.jdo.option.ConnectionDriverName</name>
    <value>com.mysql.cj.jdbc.Driver</value>
    <description>Driver class name for a JDBC metastore</description>
  </property>
<property>
    <name>javax.jdo.option.ConnectionUserName</name>
    <value>root</value>
    <description>Username to use against metastore database</description>
  </property>
<property>
    <name>javax.jdo.option.ConnectionPassword</name>
    <value>root123</value>
    <description>password to use against metastore database</description>
  </property>
<property>
    <name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://hadoop100:3306/hive?useUnicode=true&amp;characterEncoding=utf8&amp;useSSL=false&amp;serverTimezone=GMT</value>
    <description>
      JDBC connect string for a JDBC metastore.
      To use SSL to encrypt/authenticate the connection, provide database-specific SSL flag in the connection URL.
      For example, jdbc:postgresql://myhost/db?ssl=true for postgres database.
    </description>
  </property>
  <property>
    <name>datanucleus.schema.autoCreateAll</name>
    <value>true</value>
    <description>Auto creates necessary schema on a startup if one doesn't exist. Set this to false, after creating it once.To enable auto create also set hive.metastore.schema.verification=false. Auto creation is not recommended for production use cases, run schematool command instead.</description>
  </property>
<property>
    <name>hive.metastore.schema.verification</name>
    <value>false</value>
    <description>
      Enforce metastore schema version consistency.
      True: Verify that version information stored in is compatible with one from Hive jars.  Also disable automatic
            schema migration attempt. Users are required to manually migrate schema after Hive upgrade which ensures
            proper metastore schema migration. (Default)
      False: Warn if the version information stored in metastore doesn't match with one from in Hive jars.
    </description>
  </property>
<property>
    <name>hive.exec.local.scratchdir</name>
    <value>/usr/local/soft/apache-hive-3.1.1-bin/tmp/${user.name}</value>
    <description>Local scratch space for Hive jobs</description>
  </property>
  <property>
<name>system:java.io.tmpdir</name>
<value>/usr/local/soft/apache-hive-3.1.1-bin/iotmp</value>
<description/>
</property>
​
  <property>
    <name>hive.downloaded.resources.dir</name>
<value>/usr/local/soft/apache-hive-3.1.1-bin/tmp/${hive.session.id}_resources</value>
    <description>Temporary local directory for added resources in the remote file system.</description>
  </property>
<property>
    <name>hive.querylog.location</name>
    <value>/usr/local/soft/apache-hive-3.1.1-bin/tmp/${system:user.name}</value>
    <description>Location of Hive run time structured log file</description>
  </property>
  <property>
    <name>hive.server2.logging.operation.log.location</name>
<value>/usr/local/soft/apache-hive-3.1.1-bin/tmp/${system:user.name}/operation_logs</value>
    <description>Top level directory where operation logs are stored if logging functionality is enabled</description>
  </property>
  <property>
    <name>hive.metastore.db.type</name>
    <value>mysql</value>
    <description>
      Expects one of [derby, oracle, mysql, mssql, postgres].
      Type of database used by the metastore. Information schema &amp; JDBCStorageHandler depend on it.
    </description>
  </property>
  <property>
    <name>hive.cli.print.current.db</name>
    <value>true</value>
    <description>Whether to include the current database in the Hive prompt.</description>
  </property>
  <property>
    <name>hive.cli.print.header</name>
    <value>true</value>
    <description>Whether to print the names of the columns in query output.</description>
  </property>
  <property>
    <name>hive.metastore.warehouse.dir</name>
    <value>/user/hive/warehouse</value>
    <description>location of default database for the warehouse</description>
  </property>
  
<property>  
  <name>hive.metastore.uris</name>  
  <value>thrift://192.168.1.100:9083</value>  
</property>  
​
    <property>
        <name>hive.metastore.event.db.notification.api.auth</name>
        <value>false</value>
    </property>
​
    <property>
        <name>hive.server2.thrift.bind.host</name>
        <value>hadoop100</value>
    </property>
​
    <property>
    <name>hive.server2.thrift.port</name>
    <value>10000</value>
    </property>
  <property>
                <name>hive.aux.jars.path</name>
                <value>file:///usr/local/soft/apache-hive-3.1.1-bin/lib/json-serde-1.3.8-jar-with-dependencies.jar,file:///usr/local/soft/apache-hive-3.1.1-bin/lib/hiveUDF-app_logs_hive.jar</value>
   </property>
      <property>
            <name>hive.exec.compress.output</name>
            <value>false</value>
    </property>
    
    
     <property>
    <name>hive.support.concurrency</name>
    <value>true</value>
</property>
  
  <property>
  <name>hive.txn.manager</name>
  <value>org.apache.hadoop.hive.ql.lockmgr.DbTxnManager</value>
</property>
<property>
  <name>hive.compactor.initiator.on</name>
  <value>true</value>
</property>
 
<property>
  <name>hive.metastore.local</name>
  <value>false</value>
</property>
​
<property>
    <name>hive.exec.dynamic.partition.mode</name>
    <value>nonstrict</value>
</property>
<property>
    <name>hive.compactor.initiator.on</name>
    <value>true</value>
</property>
<property>
    <name>hive.compactor.worker.threads</name>
    <value>1</value>
</property>
<property>
    <name>hive.enforce.bucketing</name>
    <value>true</value>
</property>
</configuration>
​
​

3、启动hive

启动元数据服务

hive --service metastore 

启动hiveserver2

hiveserver2

连接hive

hive

4、创建hive表

创建表:flume1.9 建表时 分区 分桶 stored as orc tblproperties('transactional'='true') ,否则就会出错

create table flumehive(nid int,name string,phone string)  partitioned by (ftime string) clustered by(nid) into 3 buckets row format delimited fields terminated by ',' stored as orc tblproperties('transactional'='true'); 


select * from flumehive;

5、创建flume配置文件flume-exec-hive.properties

a1.sources = r2
a1.channels = c2
a1.sinks = k2

a1.sources.r2.type = exec
a1.sources.r2.command = tail -F /usr/local/data/flumehive.log
a1.sources.tailsource-1.shell = /bin/bash -c


a1.sinks.k2.type = hive
a1.sinks.k2.hive.metastore = thrift://hadoop100:9083
a1.sinks.k2.hive.database = default
a1.sinks.k2.hive.table = flumehive
a1.sinks.k2.batchSize = 3000

a1.sinks.k2.hive.partition = %y-%m-%d-%H-%M
a1.sinks.k2.useLocalTimeStamp = true
a1.sinks.k2.round = true
a1.sinks.k2.roundValue = 10
a1.sinks.k2.roundUnit = minute
a1.sinks.k2.serializer = DELIMITED
a1.sinks.k2.serializer.delimiter = ","
a1.sinks.k2.serializer.serdeSeparator = ','
a1.sinks.k2.serializer.fieldnames =nid,name,phone

a1.channels.c2.type = memory
a1.channels.c2.capacity = 10000
a1.channels.c2.transactionCapacity = 5000

#用channel链接source和sink
a1.sources.r2.channels = c2
a1.sinks.k2.channel =c2


启动

cd /usr/local/soft/apache-flume-1.9.0-bin/conf/
flume-ng agent -n a1 -c conf -f flume-exec-hive.properties -Dflume.root.logger=DEBUG,console

6、添加测试数据

/usr/local/data/路径下面新建flumehive.log文件

内容添加1,superjean,18311315869

  • 6
    点赞
  • 10
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

数智侠

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值