1.安装Mysql
2.上传并解压Hive安装包
使用SecureCRT将Hive1.2.1上传至/opt/software目录。
# 修改用户与组
[root@master software]# chown dendan:dendan apache-hive-1.2.1-bin.tar.gz
# 解压
[root@master software]# tar -zxvf apache-hive-1.2.1-bin.tar.gz -C /opt/module/
# 切换目录
[root@master software]# cd /opt/module/
[root@master module]# ll
# 修改名称
[root@master module]# mv apache-hive-1.2.1-bin hive-1.2.1
3.将Mysql驱动包上传到Hive
[root@master hive-1.2.1]# mv /opt/software/mysql-libs/mysql-connector-java-5.1.27 ./lib/
4.Hadoop集群配置
Hive需要将元数据存储到HDFS上,因此需要提前在HDFS上做相关配置。
- 启动HDFS与YARN。
- 创建/tmp目录
bin/hdfs dfs --mkdir /tmp
- 创建/user/hive/warehouse目录
bin/hdfs dfs -mkdir -p /user/hive/warehouse
- 给/tmp目录与/user/hive/warehouse目录追组可写权限。
bin/hdfs dfs -chmod g+w /tmp
bin/hdfs dfs -chmod g+w /user/hive/warehouse
5.修改配置文件
# 切换到该目录
[root@master conf]# pwd
/opt/module/hive-1.2.1/conf
hive-env.xml
# Set HADOOP_HOME to point to a specific hadoop install directory
export HADOOP_HOME=/opt/module/hadoop-2.7.2
# Hive Configuration Directory can be controlled by:
export HIVE_CONF_DIR=/opt/module/hive-1.2.1/conf
hive-site.xml
[root@master conf]# touch hive-site.xml
[root@master conf]# vi hive-site.xml
追加如下内容:
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<!-- JDBC连接字符串 -->
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://master:3306/metastore?createDatabaseIfNotExist=true</value>
<description>JDBC connect string for a JDBC metastore</description>
</property>
<!-- mysql驱动包 -->
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
<description>Driver class name for a JDBC metastore</description>
</property>
<!-- 用户名 -->
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>root</value>
<description>username to use against metastore database</description>
</property>
<!-- 密码 -->
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>123456</value>
<description>password to use against metastore database</description>
</property>
<!-- 在hive窗口中是否显示当前数据库 -->
<property>
<name>hive.cli.print.current.db</name>
<value>true</value>
<description>Whether to include the current database in the Hive prompt.</description>
</property>
<!-- 在查询结果中是否打印头信息 -->
<property>
<name>hive.cli.print.header</name>
<value>false</value>
<description>Whether to print the names of the columns in query output.</description>
</property>
<!-- 配置仓库地址 -->
<property>
<name>hive.metastore.warehouse.dir</name>
<value>/user/hive/warehouse</value>
<description>location of default database for the warehouse</description>
</property>
</configuration>
配置运行日志的保存位置,修改hive-log4j.properties
[root@master conf]# mv hive-log4j.properties.template hive-log4j.properties
[root@master conf]# vi hive-log4j.properties
修改hive.log.dir
# 配置运行日志保存位置
hive.log.dir=/opt/module/hive-1.2.1/logs
6.连接测试
[dendan@master hive-1.2.1]$ bin/hive
Logging initialized using configuration in file:/opt/module/hive-1.2.1/conf/hive-log4j.properties
hive (default)> show databases;
OK
default
Time taken: 1.582 seconds, Fetched: 1 row(s)
hive (default)> create database test;
OK
Time taken: 0.539 seconds
hive (default)> show databases;
OK
default
test
Time taken: 0.014 seconds, Fetched: 2 row(s)
hive (default)>
HDFS产生数据test.db:
mysql 数据库产生记录:
mysql> show databases;
+--------------------+
| Database |
+--------------------+
| information_schema |
| metastore |
| mysql |
| performance_schema |
| test |
+--------------------+
5 rows in set (0.06 sec)
mysql>