hive安裝
curl -O https://mirrors.tuna.tsinghua.edu.cn/apache/hive/hive-2.3.4/apache-hive-2.3.4-bin.tar.gz
tar -xzvf apache-hive-2.3.4-bin.tar.gz -C apps/
ln -s apache-hive-2.3.4-bin hive
cd apps/apache-hive-2.3.4-bin
修改conf目錄下的配置文件:
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://10.141.170.56:3306/hive123?createDatabaseIfNotExist=true&useSSL=false</value>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>root</value>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>dddd</value>
</property>
切換到bin目錄下
查看是否安装成功
bin/hive -v
./schematool -dbType mysql -initSchema
-rwxr-xr-x 1 root root 881 11月 1 05:10 beeline
drwxr-xr-x 3 root root 4096 1月 7 19:12 ext
-rwxr-xr-x 1 root root 9838 11月 1 05:10 hive
-rwxr-xr-x 1 root root 1900 11月 1 05:10 hive-config.sh
-rwxr-xr-x 1 root root 885 11月 1 05:10 hiveserver2
-rwxr-xr-x 1 root root 880 11月 1 05:10 hplsql
-rwxr-xr-x 1 root root 832 11月 1 05:10 metatool
-rwxr-xr-x 1 root root 884 11月 1 05:10 schematool
Hive不是关系数据库,不是OLTP(实时事务处理),而是OLAP(实时分析处理),不能实时查询和行级更新。
-----------------------------------------------------------------------------
当中的脚本最终执行的都是hive中的命令 hive --service来开启相应的服务,默认是 hive --service cli。
hiveserver提供以JDBC、ODBC等方式的远程访问,监听10000端口
./hiveserver2
./beeline
$beeline>!help //查看帮助
$beeline>!quit //退出
$beeline>!connect jdbc:hive2://localhost:10000/mydb //连接到mydb数据库
-----------------------------------------------------------------------------
使用jdbc方式远程连接hiveserver
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-jdbc</artifactId>
<version>2.3.4</version>
</dependency>
修改hadoop 配置文件 etc/hadoop/core-site.xml,加入如下配置项
<property>
<name>hadoop.proxyuser.root.hosts</name>
<value>*</value>
<description>The superuser can connect only from host1 and host2 to impersonate a user</description>
</property>
<property>
<name>hadoop.proxyuser.root.groups</name>
<value>*</value>
<description>Allow the superuser oozie to impersonate any members of the group group1 and group2</description>
</property>
public static void main(String[] args) throws Exception {
Class.forName("org.apache.hive.jdbc.HiveDriver");
Connection conn = DriverManager.getConnection("jdbc:hive2://hdp01:10000/person_db");
Statement st = conn.createStatement();
ResultSet rs = st.executeQuery("select id , name ,age from t1");
while(rs.next()){
System.out.println(rs.getInt(1) + "," + rs.getString(2)) ;
}
rs.close();
st.close();
conn.close();
}
-----------------------------------------------------------------------------
建表:
hive> create [external] table if not exists custom(
> id int ,
> name string,
> age int comment "user age"
> )comment "custom table"
> row format delimited
> fields terminated by ","
> STORED AS TEXTFILE;
分区表,相当于数据库中的索引:year,month会被映射成为表中的字段。
hive> create table t1 (
> id int ,
> name string ,
> age int
> ) partitioned by(year int ,month int)
> row format delimited
> fields terminated by ","
> stored as textfile;
hive> show partitions t1; //查询分区
OK
year=2014/month=12
添加分区
hive> alter table t1 add partition(year=2014,month=12); //会生成目录/user/hive/warehouse/person_db.db/t1/year=2014/month=12
hive> load data local inpath "/root/ori/custom.txt" into table t1 partition(year=2014,month=12); //加载数据到分区
查詢(避免查詢所有分區),只扫描指定分区下的数据
hive> select * from t1 where year=2014 and month =12;
桶表:执行插入时会有三个reduce task
hive> create table if not exists t2(
> id int ,
> name string,
> age int
> ) clustered by(id) into 3 buckets
> row format delimited
> fields terminated by ","
> stored as textfile;
//查询桶表
hive>select * from t4 where id =2;
//load方式不会分桶,需要set hive.strict.checks.bucketing to false and that hive.mapred.mode is not set to 'strict' 才能加载成功
hive> load data local inpath "/root/ori/custom.txt" into table t2 ;
//会生成三个文件,本质是运行了三个reduce task
hive> insert into t2 select id,name,age from t1;
hive> dfs -ls /user/hive/warehouse/person_db.db/t2;
Found 3 items
-rwxr-xr-x 1 root supergroup 8 2019-01-08 00:41 /user/hive/warehouse/person_db.db/t2/000000_0
-rwxr-xr-x 1 root supergroup 8 2019-01-08 00:41 /user/hive/warehouse/person_db.db/t2/000001_1
-rwxr-xr-x 1 root supergroup 8 2019-01-08 00:41 /user/hive/warehouse/person_db.db/t2/000002_0
//创建视图
$hive>create view v1 as select a.id aid,a.name ,b.id bid , b.order from customers a left outer join default.tt b on a.id = b.cid ;
加载数据,其实是创建新文件:
load data [local] inpath "/root/ori/custom.txt" [overwrite] into table custom; //overwrite 会删除之前的内容,local表示从本地文件系统上传数据
-----------------------------------------------------------------------------
hive严格模式
1.分区表必须指定分区进行查询。
2.order by时必须使用limit子句。
3.不允许笛卡尔积。
-----------------------------------------------------------------------------
Hive的动态分区
//临时设为非严格模式
hive> set hive.exec.dynamic.partition.mode=nonstrict;
hive>insert into t5 partition(country,province) select (包含分区的字段信息就可以) from orders;
如果当前模式是严格模式,则要求至少有一列的字段是静态的,且静态字段必须出现在最前面
hive>insert into t5 partition(country=china,province) select (包含分区的字段信息就可以) from orders;
-----------------------------------------------------------------------------
Hive的排序
selece .......from ...... order by 字段;//按照这个字段全排序
selece .......from ...... sort by 字段; //按照这个字段局部有序
selece 字段.....from ...... distribute by 字段;//按照这个字段分区