1、准备工作
# 基础准备工作
# hive
wget http://mirrors.cnnic.cn/apache/hive/hive-1.2.1/apache-hive-1.2.1-bin.tar.gz
tar -zxvf apache-hive-1.2.1-bin.tar.gz
mv apache-hive-1.2.1-bin /www/hive
# 创建/www/hive的软连接/opt/hive
ln -s /www/hive /opt/hive
2、hive文件配置
mkdir -p /data/hive/iotmp
cp /opt/hive/lib/jline-2.12.jar /opt/hadoop/share/hadoop/yarn/lib
# mysql-connector-java-5.1.39-bin.jar 需要手动下载一下
# 下载地址:http://dev.mysql.com/downloads/connector/j/
# 然后解压,找到 mysql-connector-java-5.1.39-bin.jar
mv mysql-connector-java-5.1.39-bin.jar $HIVE_HOME/lib
cp conf/hive-env.sh.template conf/hive-env.sh
cp conf/hive-default.xml.template conf/hive-site.xml
2.1、vim conf/hive-env.sh
export HADOOP_HOME=/opt/hadoop
export HADOOP_CONF_DIR=/opt/hadoop/etc/hadoop
export HIVE_CONF_DIR=/opt/hive/conf
2.2、vim conf/hive-site.xml(请对着修改value值)
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<!-- hive JDBC mysql-->
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://apachecn.org-18:3306/adhive?createDatabaseIfNotExist=true</value>
<description>
JDBC connect string for a JDBC metastore.
To use SSL to encrypt/authenticate the connection, provide database-specific SSL flag in the connection URL.
For example, jdbc:postgresql://myhost/db?ssl=true for postgres database.
</description>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
<description>Driver class name for a JDBC metastore</description>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>datasys</value>
<description>Username to use against metastore database</description>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>datasys</value>
<description>password to use against metastore database</description>
</property>
<!-- hivemetastore HA-->
<property>
<name>hive.metastore.uris</name>
<value>thrift://apachecn.org-16:9083,thrift://apachecn.org-17:9083</value>
<description>Thrift URI for the remote metastore. Used by metastore client to connect to remote metastore.</description>
</property>
<!-- io.tmpdir -->
<property>
<name>system:java.io.tmpdir</name>
<value>/data/hive/iotmp</value>
</property>
<property>
<name>system:user.name</name>
<value>hadoopuser</value>
</property>
<property>
<name>hive.insert.into.multilevel.dirs</name>
<value>true</value>
<description>
Where to insert into multilevel directories like
"insert directory '/HIVEFT25686/chinna/' from table"
</description>
</property>
<!-- python 连接hiveserver2-->
<property>
<name>hive.server2.authentication</name>
<value>NOSASL</value>
<description>
Expects one of [nosasl, none, ldap, kerberos, pam, custom].
Client authentication types.
NONE: no authentication check
LDAP: LDAP/AD based authentication
KERBEROS: Kerberos/GSSAPI authentication
CUSTOM: Custom authentication provider
(Use with property hive.server2.custom.authentication.class)
PAM: Pluggable authentication module
NOSASL: Raw transport
</description>
</property>
<!-- hiveserver2 HA-->
<property>
<name>hive.server2.support.dynamic.service.discovery</name>
<value>true</value>
</property>
<property>
<name>hive.server2.zookeeper.namespace</name>
<value>hiveserver2</value>
</property>
<property>
<name>hive.zookeeper.quorum</name>
<value>apachecn.org-19:2181,apachecn.org-20:2181,apachecn.org-21:2181</value>
</property>
<configuration>
2.3 、初始化数据库
./bin/schematool -initSchema -dbType mysql
[datasys@mtcloud-distributeddev-01 bin]$ schematool -initSchema -dbType mysql
which: no hbase in (.:.:/opt/jdk/bin:/opt/storm/bin:/opt/zookeeper/bin:/opt/kafka/bin:/usr/local/bin:/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/sbin:/opt/hadoop/bin:/opt/hadoop/sbin:/home/datasys/bin)
SLF4J: Class path contains multiple SLF4J bindings.
SLF4J: Found binding in [jar:file:/opt/hive/lib/log4j-slf4j-impl-2.4.1.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/opt/hadoop/share/hadoop/common/lib/slf4j-log4j12-1.7.5.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
SLF4J: Actual binding is of type [org.apache.logging.slf4j.Log4jLoggerFactory]
Metastore connection URL: jdbc:mysql://mtcloud-distributeddev-01:3306/hive?createDatabaseIfNotExist=true
Metastore Connection Driver : com.mysql.jdbc.Driver
Metastore connection User: datasys
Starting metastore schema initialization to 2.1.0
Initialization script hive-schema-2.1.0.mysql.sql
Initialization script completed
schemaTool completed
3、 权限分配
# 同步软件
pscp -A -h ~/jzl/ip_slave.txt -t 0 -e ./error/ -l root -r /www/hive /www
# 同步日志
pscp -A -h ~/jzl/ip_slave.txt -t 0 -e ./error/ -l root -r /data/hive /data
# 分配权限
pssh -A -h ~/jzl/ip_list.txt -e ./error/ -l root "chown -R hadoopuser.hadoopuser /www/hive;chown -R hadoopuser.hadoopuser /data/hive"
4、批启动脚本
sh start_hive_master.sh
# 启动 metastore(可任意Hadoop集群部署:HiveServer,其他的就是HiveClient)
pssh -h ~/jzl/ip_master.txt -e ~/jzl/error/ "/opt/hive/bin/hive --service metastore -p 9083 &"
# 启动 hiveserver2(HiveWeb用于Web远程访问)
pssh -h ~/jzl/ip_master.txt -e ~/jzl/error/ "/opt/hive/bin/hive --service hiveserver2 --hiveconf hive.server2.thrift.port=10000 &"
5、测试入库
# 通过zk连接hiveserver2
> bin/beeline
beeline> !connect jdbc:hive2://apachecn.org-19:2181,apachecn.org-20:2181,apachecn.org-21:2181/;serviceDiscoveryMode=zooKeeper;zooKeeperNamespace=hiveserver2;auth=noSasl hadoopuser
vi /home/hadoopuser/jzl/hive-test.txt
1 hadoop
2 hive
3 hbase
4 hello
hive (直接回车就可以启动)
create table if not exists tmp_words
(
id int,
word string
)
row format delimited fields
terminated by " "
lines terminated by "\n";
hive> truncate table tmp_words;
hive> load data local inpath '/home/hadoopuser/jzl/hive-test.txt' overwrite into table tmp_words;
Loading data to table default.tmp_words
OK
Time taken: 0.998 seconds
hive> select * from tmp_words;
OK
1 hadoop
2 hive
3 hbase
4 hello
插入数据 (这个时候我们可以看到程序运行了一个mr作业)
insert into tmp_words values(5,'nihao'), (6,'nihao'), (7,'nihao'), (8,'nihao');