实验环境
master:192.168.10.131
slave1:192.168.10.129
slave2:192.168.10.130
操作系统ubuntu-16.04.3
hadoop-2.7.1
zookeeper-3.4.8
hive-2.1.1
实验步骤
1.安装hive
下载并安装
wget https://archive.apache.org/dist/hive/hive-2.1.1/apache-hive-2.1.1-src.tar.gz
tar -zvxf apache-hive-2.1.1-src.tar.gz /opt/Data
添加环境配置
#hive
export HIVE_HOME=/opt/Data/apache-hive-2.1.1-bin
export PATH=$HIVE_HOME/bin:$PATH
export HIVE_CONF_DIR=$HIVE_HOME/conf
2.安装mysql
安装mysql
apt-get install mysql-server #弹出的页面选择yes,输入两次密码
验证是否成功 mysql -u root -p
为hive建立相应的mysql用户,并赋予足够的权限
建立了hive专用元数据库后,记得使用创建好的hive的账号登录
create user 'hive'@'%' identified by 'hive'; #账号名为hive,'%' 远程访问和local访问,账号密码为hive,后面hive的配置文件中需要
grant all privileges on *.* to 'hive'@'%' identified by 'hive'; #赋予所有权限给hive用户,*.*表示所有数据库的所有表
flush privileges; #即使生效
修改my.cnf
vim /etc/mysql/my.cnf
注释该行 # bind-address = 127.0.0.1
重启mysql
/etc/init.d/mysql restart
添加jdbc驱动包
下载mysql驱动包mysql-connector-java-8.0.18.jar,然后放入lib目录
3.修改配置
修改conf文件夹中的配置文件
- 修改hive-site.xml
cp hive-default.xml.template hive-site.xml
删除配置文件中的配置信息,复制以下内容
<configuration>
<!-- WARNING!!! This file is auto generated for documentation purposes ONLY! -->
<!-- WARNING!!! Any changes you make to this file will be ignored by Hive. -->
<!-- WARNING!!! You must make your changes in hive-site.xml instead. -->
<!-- Hive Execution Parameters -->
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://master:3306/hive?createDatabaseIfNotExist=true&useSSL=false&serverTimezone=GMT</value>
<description>JDBC connect string for a JDBC metastore</description>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.cj.jdbc.Driver</value>
<description>Driver class name for a JDBC metastore</description>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>hive</value>
<description>username to use against metastore database</description>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>hive</value>
<description>password to use against metastore database</description>
</property>
<property>
<name>datanucleus.autoCreateSchema</name>
<value>true</value>
</property>
<property>
<name>datanucleus.autoCreateTables</name>
<value>true</value>
</property>
<property>
<name>datanucleus.autoCreateColumns</name>
<value>true</value>
</property>
<!-- 设置 hive仓库的HDFS上的位置 -->
<property>
<name>hive.metastore.warehouse.dir</name>
<value>/hive</value>
<description>location of default database for the warehouse</description>
</property>
<!--资源临时文件存放位置 -->
<property>
<name>hive.downloaded.resources.dir</name>
<value>/opt/Data/apache-hive-2.1.1-bin/tmp/resources</value>
<description>Temporary local directory for added resources in the remote file system.</description>
</property>
<!-- Hive在0.9版本之前需要设置hive.exec.dynamic.partition为true, Hive在0.9版本之后默认为true -->
<property>
<name>hive.exec.dynamic.partition</name>
<value>true</value>
</property>
<property>
<name>hive.exec.dynamic.partition.mode</name>
<value>nonstrict</value>
</property>
<!-- 修改日志位置 -->
<property>
<name>hive.exec.local.scratchdir</name>
<value>/app/hive/apache-hive-2.1.1-bin/tmp/HiveJobsLog</value>
<description>Local scratch space for Hive jobs</description>
</property>
<property>
<name>hive.downloaded.resources.dir</name>
<value>/opt/Data/apache-hive-2.1.1-bin/tmp/ResourcesLog</value>
<description>Temporary local directory for added resources in the remote file system.</description>
</property>
<property>
<name>hive.querylog.location</name>
<value>/opt/Data/apache-hive-2.1.1-bin/tmp/HiveRunLog</value>
<description>Location of Hive run time structured log file</description>
</property>
<property>
<name>hive.server2.logging.operation.log.location</name>
<value>/opt/Data/apache-hive-2.1.1-bin/tmp/OpertitionLog</value>
<description>Top level directory where operation tmp are stored if logging functionality is enabled</description>
</property>
<!-- 配置HWI接口 -->
<property>
<name>hive.hwi.war.file</name>
<value>lib/hive-hwi-2.1.1.jar</value>
<description>This sets the path to the HWI war file, relative to ${HIVE_HOME}. </description>
</property>
<property>
<name>hive.hwi.listen.host</name>
<value>0.0.0.0</value>
<description>This is the host address the Hive Web Interface will listen on</description>
</property>
<property>
<name>hive.hwi.listen.port</name>
<value>9999</value>
<description>This is the port the Hive Web Interface will listen on</description>
</property>
<!-- Hiveserver2已经不再需要hive.metastore.local这个配置项了(hive.metastore.uris为空,则表示是metastore在本地,否则就是远程)远程的话直接配置hive.metastore.uris即可 -->
<!-- property>
<name>hive.metastore.uris</name>
<value>thrift://master:9083</value>
<description>Thrift URI for the remote metastore. Used by metastore client to connect to remote metastore.</description>
</property -->
<property>
<name>hive.server2.thrift.bind.host</name>
<value>master</value>
</property>
<property>
<name>hive.server2.thrift.port</name>
<value>10000</value>
</property>
<property>
<name>hive.server2.thrift.http.port</name>
<value>10001</value>
</property>
<property>
<name>hive.server2.thrift.http.path</name>
<value>cliservice</value>
</property>
<!-- HiveServer2的WEB UI -->
<property>
<name>hive.server2.webui.host</name>
<value>master</value>
</property>
<property>
<name>hive.server2.webui.port</name>
<value>10002</value>
</property>
<property>
<name>hive.scratch.dir.permission</name>
<value>755</value>
</property>
<!-- 下面hive.aux.jars.path这个属性里面你这个jar包地址如果是本地的记住前面要加file://不然找不到, 而且会报org.apache.hadoop.hive.contrib.serde2.RegexSerDe错误
<property>
<name>hive.aux.jars.path</name>
<value>file://</value>
</property>
-->
<property>
<name>hive.server2.enable.doAs</name>
<value>false</value>
</property>
<!-- property>
<name>hive.server2.authentication</name>
<value>NOSASL</value>
</property -->
<property>
<name>hive.auto.convert.join</name>
<value>false</value>
</property>
<property>
<name>spark.dynamicAllocation.enabled</name>
<value>true</value>
<description>动态分配资源</description>
</property>
<!-- 使用Hive on spark时,若不设置下列该配置会出现内存溢出异常 -->
<property>
<name>spark.driver.extraJavaOptions</name>
<value>-XX:PermSize=128M -XX:MaxPermSize=512M</value>
</property>
</configuration>
- 修改hive-env.sh
添加以下信息
JAVA_HOME=/opt/jdk1.8.0_221
HADOOP_HOME=/opt/Data/hadoop-2.7.1
HIVE_HOME=/opt/Data/apache-hive-2.1.1-bin
export HIVE_CONF_DIR=$HIVE_HOME/conf
export CLASSPATH=$CLASSPATH:$JAVA_HOME/lib:$HADOOP_HOME/lib:$HIVE_HOME/lib
- 修改hive-log4j2.properties
添加日志存储路径
hive.log.dir=/opt/Data/apache-hive-2.1.1-bin/tmp //tmp目录需手动创建
4.hive实例化
实例化hive元数据库(使用MySQL数据库),初始化在bin目录下
schematool -initSchema -dbType mysql
5.验证hive
hive