Hive的安装和使用
一、安装mysql
下载安装包
wget http://dev.mysql.com/get/mysql-community-release-el7-5.noarch.rpm
解压:
rpm -ivh mysql-community-release-el7-5.noarch.rpm
安装:
yum install mysql-community-server
重启mysql服务:
service mysqld restart
为root用户设置密码root:
mysql -u root
mysql> set password for ‘root’@‘localhost’ =password(‘root’);
配置文件/etc/my.cnf:
vi /etc/my.cnf
加上编码配置:
[mysql] default-character-set =utf8
grant all privileges on . to root@’ %'identified by ‘root’
刷新权限:
flush privileges
二、hive的安装及配置
官网下载安装包:http://mirror.bit.edu.cn/apache/hive/
利用xftp上传到虚拟机
解压安装到指定目录下/opt/module
修改etc/profile文件,添加HIVE_HOME安装路径
Source profile
使其生效
配置hive-env.sh:
cp hive-env.sh.template hive-env.sh
修改Hadoop的安装路径
HADOOP_HOME=/opt/module /hadoop-2.7.3
修改Hive的conf目录的路径
export HIVE_CONF_DIR=/opt/module/hive/conf
配置hive-site.xml:
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://127.0.0.1:3306/hive?characterEncoding=UTF-8&serverTimezone=GMT%2B8</value>
<description>
JDBC connect string for a JDBC metastore.
To use SSL to encrypt/authenticate the connection, provide database-specific SSL flag in the connection URL.
For example, jdbc:postgresql://myhost/db?ssl=true for postgres database.
</description>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.cj.jdbc.Driver</value>
<description>Driver class name for a JDBC metastore</description>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>root</value>
<description>Username to use against metastore database</description>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>123456</value>
<description>password to use against metastore database</description>
</property>
<property>
<name>hive.exec.local.scratchdir</name>
<value>/usr/local/hive/apache-hive-2.3.4-bin/tmp/${user.name}</value>
<description>Local scratch space for Hive jobs</description>
</property>
<property>
<name>hive.downloaded.resources.dir</name>
<value>/usr/local/hive/apache-hive-2.3.4-bin/iotmp/${hive.session.id}_resources</value>
<description>Temporary local directory for added resources in the remote file system.</description>
</property>
<property>
<name>hive.querylog.location</name>
<value>/usr/local/hive/apache-hive-2.3.4-bin/iotmp/${system:user.name}</value>
<description>Location of Hive run time structured log file</description>
</property>
<property>
<name>hive.server2.logging.operation.log.location</name>
<value>/usr/local/hive/apache-hive-2.3.4-bin/iotmp/${system:user.name}/operation_logs</value>
<description>Top level directory where operation logs are stored if logging functionality is enabled</description>
</property>
<property>
<name>hive.server2.thrift.bind.host</name>
<value>bigdata</value>
<description>Bind host on which to run the HiveServer2 Thrift service.</description>
</property>
<property>
<name>system:java.io.tmpdir</name>
<value>/usr/local/hive/apache-hive-2.3.4-bin/iotmp</value>
<description/>
</property>
初始化:
schematool -dbType mysql -initSchema
三、 编写wordcount程序(上传文件到hdfs)
vim 1.txt
hdfs dfs -mkdir /input
hdfs dfs -put 1.txt /input
hdfs dfs -ls /input
create table words(line string);
load data inpath '/input/1.txt' overwrite into table words;
create table wordcount as select word, count(1) as count from (select explode(split(line,' '))as word from words) w group by word order by word;
select * from wordcount;