一、准备
1、版本
apache-hive-3.1.3-bin
引用的hadoop安装参考:hadoop 3.3.6 HA安装_hadoop安装包3.3.6-CSDN博客
2、服务角色
No. | ip | role1 | role2 | remark |
1 | hadoop01 | hive mate | hive server | |
2 | hadoop02 | hive mate | hive server | |
3 | hadoop03 | hive mate | hive server |
3、创建用户
useradd hdfs
二、服务部署
1、安装
cd /BigData/install
tar xvf apache-hive-3.1.3-bin.tar.gz
ln -snf /BigData/install/apache-hive-3.1.3-bin /BigData/run/hive
2、定义环境变量
cat > /etc/profile.d/hive.sh << EOF
#!/bin/bash
#HIVE
export HIVE_HOME=/BigData/run/hive
export HCAT_HOME=$HIVE_HOME/hcatalog
export HIVE_CONF=$HIVE_HOME/conf
export PATH=$HIVE_HOME/bin:$PATH
export HADOOP_USER_CLASSPATH_FIRST=true
# User specific aliases and functions
#export HIVE_AUX_JARS_PATH=/BigData/run/hive/lib/hive-contrib-1.1.0-cdh5.5.0.jar,/data/dbcenter/tcyarn/hive_udf_jar/DCHive.jar,/data/dbcenter/tcyarn/hive_udf_jar/hive-format.jar
EOF
3、创建hive 元数据库(基于mariadb)
create database hivemeta;
CREATE USER 'hivemeta'@'localhost' IDENTIFIED BY 'hivemeta';
GRANT ALL PRIVILEGES ON hivemeta.* TO 'hivemeta'@'localhost';
CREATE USER 'hivemeta'@'hadoop01.jedy.com.cn' IDENTIFIED BY 'hivemeta';
GRANT ALL PRIVILEGES ON hivemeta.* TO 'hivemeta'@'hadoop01.jedy.com.cn';
CREATE USER 'hivemeta'@'%' IDENTIFIED BY 'hivemeta';
GRANT ALL PRIVILEGES ON hivemeta.* TO 'hivemeta'@'%';
FLUSH PRIVILEGES;
4、初始化hive meta数据库
cd /BigData/run/hive/; bin/schematool -dbType postgres -initSchema -verbose
5、修改配置
hive-env.sh
[root@hadoop01 conf]# more hive-env.sh| grep -vE '^$|^#'
export HADOOP_CLIENT_OPTS=-Xmx5120m
if [ "$SERVICE" = "hiveserver2" ]; then
export HADOOP_HEAPSIZE=2000
export HADOOP_OPTS="$HADOOP_OPTS -XX:MaxPermSize=512m -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:/data/logs/hive/hiveserver_gc.log -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=10 -XX:GCLogFileSize=64M -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/data/logs/hive/hiveserver.dump -XX:OnOutOfMemoryError=/script/killparent.sh"
fi
if [ "$SERVICE" = "metastore" ]; then
export HADOOP_HEAPSIZE=1000
export HADOOP_OPTS="$HADOOP_OPTS -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:/data/logs/hive/metastore_gc.log -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=10 -XX:GCLogFileSize=64M -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/data/logs/hive/metastore.dump -XX:OnOutOfMemoryError=/script/killparent.sh"
fi
hive-site.xml
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<!-- jdbc连接的url-->
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://hadoop01.jedy.com.cn:3306/hivemeta?createDatabaseIfNotExist=true&useSSL=false</value>
</property>
<!-- jdbc连接的Driver-->
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
</property>
<!-- jdbc连接的username-->
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>hivemeta</value>
</property>
<!-- jdbc连接的password-->
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>hivemeta</value>
</property>
<!-- Hive元数据存储版本的验证 -->
<property>
<name>hive.metastore.schema.verification</name>
<value>false</value>
</property>
<!-- 元数据存储授权-->
<property>
<name>hive.metastore.event.db.notification.api.auth</name>
<value>false</value>
</property>
<!-- Hive默认在HDFS的工作目录-->
<property>
<name>hive.metastore.warehouse.dir</name>
<value>/user/hive/warehouse</value>
</property>
<!-- 指定存储元数据要连接的地址 -->
<property>
<name>hive.metastore.uris</name>
<value>thrift://hadoop01.jedy.com.cn:9083,thrift://hadoop02.jedy.com.cn:9083,thrift://hadoop03.jedy.com.cn:9083</value>
</property>
<!-- 指定hiveserver2连接的host-->
<property>
<name>hive.server2.thrift.bind.host</name>
<value>0.0.0.0</value>
</property>
<!-- 指定hiveserver2连接的端口号-->
<property>
<name>hive.server2.thrift.port</name>
<value>10000</value>
</property>
<!-- 打印表头信息-->
<property>
<name>hive.cli.print.header</name>
<value>true</value>
</property>
<!-- 打印当前数据库信息-->
<property>
<name>hive.cli.print.current.db</name>
<value>true</value>
</property>
<property>
<name>datanucleus.schema.autoCreateAll</name>
<value>true</value>
</property>
<property>
<name>hive.exec.scratchdir</name>
<value>/user/hive/tmp</value>
</property>
<!-- 日志目录 -->
<property>
<name>hive.querylog.location</name>
<value>/user/hive/log</value>
</property>
</configuration>
6、附加jdbc连接器
mysql-connector-java-5.1.49.jar(自行下载)
三、服务管理
1、启动hive meta
su - hdfs -c 'cd /BigData/run/hive ; nohup ./bin/hive --service metastore &'
2、启动hive server
su - hdfs -c 'cd /BigData/run/hive ; nohup ./bin/hive --service hiveserver2 &'
四、验证
1、hive
[hdfs@hadoop01 ~]$ hive
hive> show databases;
OK
default
Time taken: 0.914 seconds, Fetched: 1 row(s)
hive> show tables;
OK
Time taken: 0.33 seconds
hive>
2、beeline
[hdfs@hadoop01 ~]$ beeline
Beeline version 3.1.3 by Apache Hive
beeline> !connect jdbc:hive2://172.31.52.81:10000
Connecting to jdbc:hive2://172.31.52.81:10000
Enter username for jdbc:hive2://172.31.52.81:10000: hivemeta
Enter password for jdbc:hive2://172.31.52.81:10000: ********
Connected to: Apache Hive (version 3.1.3)
Driver: Hive JDBC (version 3.1.3)
Transaction isolation: TRANSACTION_REPEATABLE_READ
0: jdbc:hive2://172.31.52.81:10000> show tables;
INFO : Compiling command(queryId=hdfs_20240607153107_256a070e-ed40-452d-8105-af546c73f9e5): show tables
INFO : Concurrency mode is disabled, not creating a lock manager
INFO : Semantic Analysis Completed (retrial = false)
INFO : Returning Hive schema: Schema(fieldSchemas:[FieldSchema(name:tab_name, type:string, comment:from deserializer)], properties:null)
INFO : Completed compiling command(queryId=hdfs_20240607153107_256a070e-ed40-452d-8105-af546c73f9e5); Time taken: 0.028 seconds
INFO : Concurrency mode is disabled, not creating a lock manager
INFO : Executing command(queryId=hdfs_20240607153107_256a070e-ed40-452d-8105-af546c73f9e5): show tables
INFO : Starting task [Stage-0:DDL] in serial mode
INFO : Completed executing command(queryId=hdfs_20240607153107_256a070e-ed40-452d-8105-af546c73f9e5); Time taken: 0.011 seconds
INFO : OK
INFO : Concurrency mode is disabled, not creating a lock manager
+-----------+
| tab_name |
+-----------+
| my_table |
+-----------+
1 row selected (0.146 seconds)
0: jdbc:hive2://172.31.52.82:10000> !quit
五、遇到的问题
1、hiveserver启动后进程在,但是10000端口没有启动
报错:
没有明显日志
解决方案:
hive meta中增加如下配置
<property>
<name>hive.metastore.event.db.notification.api.auth</name>
<value>false</value>
</property>