总结: HQL语句

Rouer_jia

已于 2024-03-27 09:57:40 修改

阅读量1k

点赞数 21

分类专栏： Linux/大数据云南经贸授课笔记文章标签： oracle 数据库

于 2024-03-21 14:49:35 首次发布

本文链接：https://blog.csdn.net/rouer_jia/article/details/136907181

版权

Linux/大数据同时被 2 个专栏收录

5 篇文章 0 订阅

订阅专栏

云南经贸授课笔记

5 篇文章 0 订阅

订阅专栏

总结: HQL语句

Part1 数据库的操作

查看数据库: show databases;
创建数据库: create database if not exists 数据库名
使用数据库: use 数据库名;
查看数据库详细信息: desc database 数据库名
创建数据库并指定HDFS的存储路径: create database 数据库名 location 'HDFS path'
删除空的数据库: drop database 数据库名
强制删除数据库: drop database 数据库名 cascade

Part2 数据表的操作

1. 创建普通表

create table 表名(
	  列名  类型
      id int,
      name string
      ....
)

删除表 : drop table 表名;

2. 内外部表

1. 内部表

CREATE table mytest.stu1(
	id int,
	name string,
	gender string
) row format delimited fields terminated by '\t';

外部表

在Linux上创建新文件，test_external.txt，并填入如下内容

**两种方式 - 方式1 **
先创建外部表，然后移动数据到LOCATION目录

首先检查：hadoop fs -ls /tmp，确认不存在/tmp/test_ext1目录
创建外部表：create external table test_ext1(id int, name string) row format delimited fields terminated by ‘\t’ location ‘/tmp/test_ext1’;
可以看到，目录/tmp/test_ext1被创建
select * from test_ext1，空结果，无数据
上传数据： hadoop fs -put test_external.txt /tmp/test_ext1/
select * from test_ext1，即可看到数据结果

两种方式 - 方式2

先存在数据，后创建外部表

hadoop fs -mkdir /tmp/test_ext2
hadoop fs -put test_external.txt /tmp/test_ext2/
create external table test_ext2(id int, name string) row format delimited fields terminated by ‘\t’ location ‘/tmp/test_ext2’;
select * from test_ext2;

3. 内外部表转换

查看表类型: desc formatted 表名

内部表转外部表
alter table stu set tblproperties(‘EXTERNAL’=‘TRUE’);
外部表转内部表
alter table stu set tblproperties(‘EXTERNAL’=‘FALSE’);
要注意：('EXTERNAL'='FALSE') 或 ('EXTERNAL'='TRUE')为固定写法，区分大小写！！！

4. 数据的加载和导出

数据加载 - LOAD语法

-- 从本地导入数据  -- 本地文件依旧存在
LOAD data local inpath '/home/hadoop/search_log.txt' into table mytest.test_load ;

select * from mytest.test_load;
-- 从HDFS系统中导入(追加) -- HDFS系统的文件不存在
load data inpath '/tmp/search_log.txt' into table mytest.test_load ;

-- 覆盖
load data local inpath '/home/hadoop/search_log.txt' overwrite into table mytest.test_load;

导入数据 - INSERT SELECT

INSERT into table mytest.test_load2 select * from mytest.test_load;

数据导出 - INSERT OVERWRITE

-- 将查询的结果导出到本地 - 使用默认列分隔符
INSERT overwrite local directory '/home/hadoop/export1' SELECT * from mytest.test_load ;

-- 将查询的结果导出到本地 - 指定列分隔符
INSERT overwrite local directory '/home/hadoop/export2' row format delimited fields terminated by '#' SELECT * from mytest.test_load;

-- 将查询的结果导出到HDFS上(不带local关键字)
INSERT overwrite directory '/tmp/export_to_hdfs' row format delimited fields terminated by '\t' SELECT * from mytest.test_load;

在这里插入图片描述

5. 分区表

创建单分区表

create table tablename(
	列名 列类型
) partitioned by (分区列 列类型, ......) 
row format delimited fields terminated by '\t';

导入数据

load data local inpath '/home/hadoop/score.txt' into table tabname partition(month='202401');

创建一个表带有多个分区

CREATE table tablename(
   列名 列类型
) partitioned by(year string,month string,day string)
row FORMAT DELIMITED FIELDS TERMINATED by '\t';

导入数据

load data local inpath '/home/hadoop/score.txt' into table score2 
partition(year='2024',month='01',day='10');

其他语句

-- 查看分区
show partitions score2;

-- 添加分区
alter table score add partition(month='202403');

alter table score2 add partition(year='2023',month='03',day='30');

-- 同时添加多个分区
alter table score add partition(month='202404') partition(month='202405') partition(month='202406') ;

--删除分区
ALTER table score drop partition(month='202406');

6. 分桶表

分桶表创建

--1. 开启自动优化
set hive.enforce.bucketing=true;
--2. 创建分桶表
CREATE table course(
	c_id string,
	c_name string,
	t_id string
) clustered by(c_id) into 3 buckets 
row format delimited fields terminated by ' \t ';

为分桶表导入数据

-- 为分桶表导入数据 insert select语法(从其他表中导入数据)
-- 1.创建临时表保存数据
CREATE table course_tmp(
	c_id string,
	c_name string,
	t_id string
)row format delimited fields terminated by '\t';
-- 2. 将数据导入到临时表中
load data local inpath '/home/hadoop/course.txt' into table course_tmp;
select * from course_tmp;
--3. 通过 insert select 将course_tmp中的数据导入到 course中
insert overwrite table course select * from course_tmp cluster by(c_id);

SELECT * from course;

7. 修改表

-- 修改表名
ALTER table mytest.stu rename to mytest.stu_new;
-- 查看表属性
desc formatted mytest.stu_new;
-- 修改表属性
ALTER table stu_new set TBLPROPERTIES ('EXTERNAL'='TRUE');
-- 注释
ALTER table stu_new set TBLPROPERTIES ('comment'='this is a student table');

属性: https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DDL#LanguageManualDDL-listTableProperties

-- 添加分区
ALTER table score add partition(month='202406');
-- 修改分区
ALTER table score partition(month='202401') rename to partition(month='202312'); 
-- 删除分区
ALTER table score drop partition(month='202403');
ALTER table score drop partition(month='202404');
ALTER table score drop partition(month='202405');
ALTER table score drop partition(month='202406');

-- 添加列
ALTER table score add columns(v1 int,v2 string);
-- 修改列名(修改类型,注意类型转换)
ALTER table score change v1 v1new string;  -- 可以
ALTER table score change v2 v2new string;  -- 报错
--清空表(内部表)
TRUNCATE TABLE score;
--清空表(外部表)  -- 报错,不能清空外部表
TRUNCATE table stu_new;

8. 复杂类型操作

array类型

在某一列中, 存储多条(一组)数据, 用array类型

create table mytest.test_array(
	name string,
	work_locations array<string>
)row format delimited fields terminated  by '\t'
COLLECTION ITEMS TERMINATED by ','     -- 指定数组中的分隔符是什么

[‘beijing’, ‘shanghai’,‘tianjin’]
[‘xiaohong’#‘xiaoming’#‘xiaohuang’]

-- 查询
SELECT * from test_array;
-- 查询work_locations数组中的第一个元素
-- ['beijing','shanghai','tianjin','hangzhou'];
--    0           1          2          3
select name, work_locations[0] loc from mytest.test_array;
-- 查询work_locations数组中元素的个数
SELECT name, size(work_locations) loc from mytest.test_array;
-- 查询work_locations数组中包含tianjin的信息
SELECT name from mytest.test_array where ARRAY_CONTAINS(work_locations,'beijing');

map类型

键值对: key-value ;
电脑信息:
color: grey
brand: 联想
size: 16寸

-- map类型
drop table mytest.test_map;
CREATE table mytest.test_map(
  id int,
  name string,
  members map<string,string>,
  age int
)row format delimited fields terminated  by ','
COLLECTION ITEMS TERMINATED BY '#'   -- 键值对#键值对#键值对 
MAP KEYS TERMINATED BY ':'; -- 键和值之间的符号

--members : father:xiaoming#mother:xiaohuang#brother:xiaoxu
--获取father值:xiaoming   members['father'] 
--获取mother值:xiaohuang  members['mother']
--获取所有键名: map_keys(members)
--获取所有值: map_values(members)
--判断是否包含某个keys: ARRAY_CONTAINS(map_keys(members),'brother');
--判断是否包含某个值('xiaoming'):ARRAY_CONTAINS(map_values(members),'xiaoming')
load data local inpath '/home/hadoop/data_for_map.txt' into table mytest.test_map;
--查询全部
SELECT * from mytest.test_map;
--查看成员中, 每个人的父亲, 母亲是谁
SELECT id, name, members['father'] father,members['mother'] mother from mytest.test_map;

--查询全部map的key，使用map_keys函数，结果是array类型
SELECT map_keys(members) from mytest.test_map;
SELECT map_values(members) from mytest.test_map;
SELECT size(members) from mytest.test_map;
-- 查询map的key中有brother的数据
SELECT * FROM mytest.test_map WHERE ARRAY_CONTAINS(map_keys(members),'brother');

struct类型

-- struct类型
CREATE table mytest.test_struct(
	id string,
	info struct<name:string, age:int>
)row format delimited fields terminated by '#'
COLLECTION ITEMS TERMINATED BY ':'; -- 表示struct中二级列的数据之间的分隔符
--导入数据
load data local inpath '/home/hadoop/data_for_struct.txt' into table mytest.test_struct;
-- 查询
SELECT * from mytest.test_struct;
SELECT id, info.name from mytest.test_struct;
SELECT id, info.age from mytest.test_struct;
SELECT id, info.name,info.age from mytest.test_struct;