hive 原理与安装
hive 是把类似SQL的语名变成相关的mapreduce任务进行计算得到结果,对于结构化数据,可以不用写mapreduce程序就可以进行大数统计分析.
hive 使用mysql、Derby作为hive元数据的存储
将数据文件放入hive建立的表目录中,hive就可以使用SQL语句进行查询了(
如:hadoop fs -put /home/hadoop/testFile/t_boy.data /user/hive/warehouse/test001.db/t_boy ,,其中t_boy是建立的一张hive表,在test001.db(数据库)中)
t_boy.data数据:
1 huang 18 32
2 yong 19 34
3 xing 20 36
4 ming 21 38
5 ling 25 40
//安装
[url]https://blog.csdn.net/t1dmzks/article/details/72026876[/url] (安装)
[url]https://blog.csdn.net/just4you/article/details/79981202[/url] (安装)
[url]https://www.yiibai.com/hive/hive_installation.html[/url] (hive教程)
cd /home/hadoop/
tar -zxvf apache-hive-1.2.2-bin.tar.gz -C /home/hadoop/
设置环境变量
vim /etc/proflie
export HIVE_HOME=ome/hadoop/apache-hive-1.2.2-bin
export PATH=$PATH:$HIVE_HOME/bin
//配置
cd $HIVE_HOME/conf/
cp hive-default.xml.template hive-site.xml
cp hive-env.sh.template hive-env.sh
cp hive-exec-log4j.properties.template hive-exec-log4j.properties
cp hive-log4j.properties.template hive-log4j.properties
//hive-site.xml
<configuration>
<property>
<name>javax.jdo.option.ConnectionURL</name>
<!--mysql默认端口3306-->
<value>jdbc:mysql://ubuntuHadoop:3306/hive</value>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<!--创建的hive用户-->
<value>root</value>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<!--创建hive用户时设置的密码-->
<value>123456</value>
</property>
</configuration>
//运行
./hive
//出错就把
[ERROR] Terminal initialization failed; falling back to unsupported
java.lang.IncompatibleClassChangeError: Found class jline.Terminal, but interface was expected
//处理
/home/hadoop/hadoop-2.6.5/share/hadoop/yarn/jline-0.9.94.jar 删除,
放入放hive目录中的lib里的jlineXXXX.jar
//测试
show databases;
create database test001;
use test001;
在mysql中先设置字符集,否则会报message:For direct MetaStore DB connections, we don't support retries at the client level.
mysql> alter database hive character set latin1;
create table t_boy(id int,nmae string,age int,size string);
show tables;
create table t_boy(id int,nmae string,age int,size string)
row format delimited
fields terminated by "\t";
create table t_baby(id int,nmae string,age int,size string)
row format delimited
fields terminated by "\t";
hadoop fs -put /home/hadoop/testFile/t_boy.data /user/hive/warehouse/test001.db/t_boy
use test001;
select * from t_boy;
select * from t_boy where age<20;
select count(*) from t_boy; //时间会长一点,因为要启动mapREDUce进行计算
hive 是把类似SQL的语名变成相关的mapreduce任务进行计算得到结果,对于结构化数据,可以不用写mapreduce程序就可以进行大数统计分析.
hive 使用mysql、Derby作为hive元数据的存储
将数据文件放入hive建立的表目录中,hive就可以使用SQL语句进行查询了(
如:hadoop fs -put /home/hadoop/testFile/t_boy.data /user/hive/warehouse/test001.db/t_boy ,,其中t_boy是建立的一张hive表,在test001.db(数据库)中)
t_boy.data数据:
1 huang 18 32
2 yong 19 34
3 xing 20 36
4 ming 21 38
5 ling 25 40
//安装
[url]https://blog.csdn.net/t1dmzks/article/details/72026876[/url] (安装)
[url]https://blog.csdn.net/just4you/article/details/79981202[/url] (安装)
[url]https://www.yiibai.com/hive/hive_installation.html[/url] (hive教程)
cd /home/hadoop/
tar -zxvf apache-hive-1.2.2-bin.tar.gz -C /home/hadoop/
设置环境变量
vim /etc/proflie
export HIVE_HOME=ome/hadoop/apache-hive-1.2.2-bin
export PATH=$PATH:$HIVE_HOME/bin
//配置
cd $HIVE_HOME/conf/
cp hive-default.xml.template hive-site.xml
cp hive-env.sh.template hive-env.sh
cp hive-exec-log4j.properties.template hive-exec-log4j.properties
cp hive-log4j.properties.template hive-log4j.properties
//hive-site.xml
<configuration>
<property>
<name>javax.jdo.option.ConnectionURL</name>
<!--mysql默认端口3306-->
<value>jdbc:mysql://ubuntuHadoop:3306/hive</value>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<!--创建的hive用户-->
<value>root</value>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<!--创建hive用户时设置的密码-->
<value>123456</value>
</property>
</configuration>
//运行
./hive
//出错就把
[ERROR] Terminal initialization failed; falling back to unsupported
java.lang.IncompatibleClassChangeError: Found class jline.Terminal, but interface was expected
//处理
/home/hadoop/hadoop-2.6.5/share/hadoop/yarn/jline-0.9.94.jar 删除,
放入放hive目录中的lib里的jlineXXXX.jar
//测试
show databases;
create database test001;
use test001;
在mysql中先设置字符集,否则会报message:For direct MetaStore DB connections, we don't support retries at the client level.
mysql> alter database hive character set latin1;
create table t_boy(id int,nmae string,age int,size string);
show tables;
create table t_boy(id int,nmae string,age int,size string)
row format delimited
fields terminated by "\t";
create table t_baby(id int,nmae string,age int,size string)
row format delimited
fields terminated by "\t";
hadoop fs -put /home/hadoop/testFile/t_boy.data /user/hive/warehouse/test001.db/t_boy
use test001;
select * from t_boy;
select * from t_boy where age<20;
select count(*) from t_boy; //时间会长一点,因为要启动mapREDUce进行计算