最新版Hive的基本语法及常用操作

最新推荐文章于 2022-01-24 10:28:43 发布

夏小白.

最新推荐文章于 2022-01-24 10:28:43 发布

阅读量757

点赞数

分类专栏： hive基本语法文章标签： hive

本文链接：https://blog.csdn.net/xia1140418216/article/details/109050402

版权

hive基本语法专栏收录该内容

1 篇文章 0 订阅

订阅专栏

HIVE基本操作命令

 创建数据库

create database db_name;
create database if not exists db_name;//创建一个不存在的数据库final
查看数据库
show databases;
选择性查看数据库
show databases like ‘f.*’;
查看某一个数据库的详细信息
describe database db_name;
删除非空数据库
drop database db_name CASCADE;
创建数据库时，指定数据库位置
create database db_name location ‘/home/database/’
创建数据库的时候希望能够给数据库增加一些描述性东西
create database db_name comment ‘helloworld’;
创建数据库的时候，需要为数据库增加属性信息，可以使用with dbproperties信息
create database db_name with dbproperties<‘createor’=‘hello’,‘date’=‘2018-3-3’);
如果要使用自己已经存在的数据库
use db_name;
修改数据库的属性信息
alter database db_name set dbproperties(‘edited-by’=‘hello’);
创建表
create table tab_name(id int,name string) row format delimited fields terminated by ‘\t’;
创建一个表，该表和已有的某一个表的结构一样（复制表结构）
create table if not exists emp like employeel;
查看当前数据库下的所有表
show tables;
删除一个已经存在的表
drop table employee;
修改一个表明，重命名
alter table old_name rename to new_name;
将hdfs上面的文件信息导入到hive表中(/home/bigdata代表文件在在HDFS上位置）使用改命令时一定要注意数据与数据之间在txt文件编辑的时候一定要Tab间隔
load data local inpath ‘/home/bigdata’ into table hive.dep;
修改某一个表的某一列的信息
alter table tab_name change column key key_1 int comment ‘h’ after value;
给某一个表增加某一列的信息
alter table tab_name add columns(value1 string,value2 string);
如果想替换表中的某一个列
alter table tab_name replace columns(values string,value11 string);
修改表中某一列的属性
alter table tab_name set tblproperties(‘value’=‘hello’);
hive成批向某一表插入数据
insert overwrite table tab_name select * from tab_name2;
将查询结果保留到一个新表中去
create table tab_name as select * from t_name2;
将查询结果保存到指定的文件目录（可以是本地，也可以HDFS）
insert overwrite local directory ‘/home/hadoop/test’ select *from t_name;
insert overwrite directory ‘/aaa/bbb/’ select *from t_p;
两表内连
select *from dual a join dual b on a.key=b.key;
将hive查询结果输出到本地特定目录
insert overwrite local directory ‘/home/bigdata1/mydir’ select *from test;
将hive查询结果输出到HDFS特定目录
insert overwrite directory ‘/home/mydir’ select *from test;

修改内部表student2为外部表

alter table student2 set tblpropertites (‘external’=‘true’ )

查询表的类型

desc formatted student2

分区表实际就是对应hdfs文件系统上的的独立的文件夹，该文件是夹下是该分区所有数据文件

create external if not exists stup(‘id int’,‘name string’,‘gender string’,‘birthday date’,‘phone string’,‘loc string’)partitioned by (clazz string) #按照什么分区（clazz 为分区类别，string为类型）

row format delimited fields terminated by’\t’; #分隔符

#为分区加载数据
load date [local] inpath ‘/opt/module/datas/student.txt’ overwrite| into table student [partition(partcol1=val1,…)];

#创建一张表
create table student (id int,name string) row format delimited fields terminated by ‘\t’;

#加载本地文件到hive
load data local inpath ‘/opt/module/datas/student.txt’ into table default.student;

#加载HDFS文件到hive中上传文件到HDFS
dfs -put /opt/module/datas/student.txt/user/atguigu/hive;

#加载HDFS上数据
load date inpath ‘user/atguigu/hive/student.txt’ into table default.student;

#加载数据覆盖表中已有的数据
#上传文件到HDFS
dfs -put /opt/module/datas/student.txt/user/atguigu/hive;

#加载数据覆盖表中已有的数据
load data inpath’/user/atguigu/hive/student.txt’ overwrite into table default.student;

#分区查询
#单查询
select * from dept_partition where month=‘200109’;

#联合查询
select * from dept_partition where month=‘20200930’ union select * from dept_partition where month ‘20000901’;

#增加分区
#创建单个分区
alter table dept_partition add partition(month=‘19990429’);

#同时创建多个分区

alter table dept_partithon add partition(month=‘19710218’) partition(month=‘19720623’);

6.删除分区

#删除单个分区

alter table dept_partition drop partition(month=‘19990429’);

同时删除多个分区

alter table dept_partition drop partition(month=‘19710218’),partition(month=‘19720623’);

7.查看分区表有多少个分区

show partitions dept_partition;

8. 查看分区表结构

desc formatted dept_partition;

#########通过查询语句向表中插入数据

1．创建一张分区表

create table student (id int,name string) partitioned by (month string) row format d（）limited fields terminated by ‘\t’;

2.基本插入数据

insert into table partition (month=‘20191128’) values (22,‘夏飞飞’);

3.基本模式插入(根据单张表查询结果)

insert overwrite table student partition (month=‘20200930’) select id,name from student where month=‘20201001’;

#######select语句
#紧跟列名，也可以在列名和别名之间加入关键字‘AS’

4．案例实操

#查询名称和部门

select ename as name,deptno dn from emp;

SELECT [ALL | DISTINCT] select_expr, select_expr, …
FROM table_reference
[WHERE where_condition]
[GROUP BY col_list]
[HAVING having_condition]
[CLUSTER BY col_list | [DISTRIBUTE BY col_list] [SORT BY col_list]]
[LIMIT number];

1.求总行数（count）

select count(*) cnt from emp;

2.求工资的最大值

select max(sal)max_sal from emp;

3.求工资的最小值

select min(sal)min_sal from emp;

4.求工资的总和

select sum(sal)sum_sal from emp;

5.求工资的平均值

select avg(sal)avg_sal from emp;

6.1.5 Limit语句

#典型的查询会返回多行数据。LIMIT子句用于限制返回的行数。

select * from emp limit 5;

########### where

1. 查询出薪水等于5000的所有员工

select * from emp where sal=5000;

2.查询工资在500-1000的所有员工

select * from emp where sal between 500 and 1000;

3.查询comm为空的所有员工

select * from emp where comm is null;

4.查询工资是4500或5000的员工

select * from emp where sal=4500 or sal=5000;
select * from emp where sal in (4500,5000);

Like和RLike

#1）使用LIKE运算选择类似的值
#2）选择条件可以包含字符或数字:
#% 代表零个或多个字符(任意个字符)。
#_ 代表一个字符。
#3）RLIKE子句是Hive中这个功能的一个扩展，其可以通过Java的正则表达式这个更强大的语言来指定匹配条件。
#4）案例实操
#（1）查找以2开头薪水的员工信息

hive (default)> select * from emp where sal LIKE ‘2%’;
1
#(2）查找第二个数值为2的薪水的员工信息

hive (default)> select * from emp where sal LIKE ‘_2%’;
1
#(3）查找薪水中含有2的员工信息

hive (default)> select * from emp where sal RLIKE ‘[2]’;

####### 排序 order by

ORDER BY
SELECT [ALL | DISTINCT] select_expr, select_expr, …
FROM table_reference
[WHERE where_condition]
[GROUP BY col_list]
[HAVING having_condition]
[ORDER BY col_list]]
[LIMIT number];
#按照字典序以某列为排序对象对数据排序
#对输入做全局排序，只有一个reducer（且不能通过改配置优化这一点）
#因此当输入规模较大时，需要较长的计算时间

select * from id,name,phone where order by phone desc;

Having语句
#1．having与where不同点
#（1）where针对表中的列发挥作用，查询数据；having针对查询结果中的列发挥作用，筛选数据。
#（2）where后面不能写分组函数，而having后面可以使用分组函数。
#（3）having只用于group by分组统计语句。
#2．案例实操
#（1）求每个部门的平均薪水大于2000的部门
select deptno,avg(sal) avg_sal where emp group by deptno having avg_sal>2000;

SELECT [ALL | DISTINCT] select_expr, select_expr, …
FROM table_reference
[WHERE where_condition]
[GROUP BY col_list]
[HAVING having_condition]
[ORDER BY col_list]]
[LIMIT number];

按照某些字段的值进行分组，有相同值放到一起。

使用了reduce操作，受限于reduce数量，设置reduce参数mapred.reduce.tasks

输出文件个数与reduce数相同，文件大小与reduce处理的数据量有关。

示例：

select phone ,count(*) from emp where group by phone;

DISTINCT

hive> select distinct name from test;
…

OK
zhao
Time taken: 37.047 seconds, Fetched: 1 row(s)

hive> select distinct name,age from test;
OK
zhao 14
zhao 15
zhao 16
Time taken: 39.131 seconds, Fetched: 3 row(s)

hive> select distinct(name),age from test;
OK
zhao 14
zhao 15
zhao 16
Time taken: 37.739 seconds, Fetched: 3 row(s)

select mid,money,name from

夏小白.

关注

0
点赞
踩
3

收藏

觉得还不错? 一键收藏
0
评论
最新版Hive的基本语法及常用操作

HIVE基本操作命令创建数据库create database db_name;create database if not exists db_name;//创建一个不存在的数据库final查看数据库show databases;选择性查看数据库show databases like ‘f.*’;查看某一个数据库的详细信息describe database db_name;删除非空数据库drop database db_name CASCADE;创建数据库时，指定数据库位置c
复制链接

扫一扫