2021-04-27

最新推荐文章于 2022-06-30 00:41:10 发布

The Great Ant

最新推荐文章于 2022-06-30 00:41:10 发布

阅读量78

点赞数

分类专栏： hive

本文链接：https://blog.csdn.net/qq_37698495/article/details/116211391

版权

hive 专栏收录该内容

10 篇文章 0 订阅

订阅专栏

Hive中的DDL和DML语言合集

一.DDL(数据定义语言)

1.1 库的ddl

1.1.1 创建库的语法

CREATE DATABASE [IF NOT EXISTS] database_name   --IF NOT EXISTS 增强代码的健壮性
[COMMENT database_comment]                      --库的注释   这个库你将来拿来干嘛
[LOCATION hdfs_path]                            --可以指定当前库存在hdfs的具体位置
[WITH DBPROPERTIES (property_name=property_value, ...)]; --对于库加一些属性一点用没有(鸡肋)

1.1.2 增

create database db_hive
comment 'this in my first db'
with dbproperties('dbtype'='hive','owner'='heihei');

create database db_hive2
location '/db_hive2';

create database db_hive3
location '/dsadsadsasd';

create database if not exists db_hive2
location '/db_hive2';

--在你不指定的location的情况下 默认在你的hdfs/user/hive/warehouse下创建一个以database_name.db名的文件夹 来当做库
--在你指定location的情况下 拿最后一级目录当做库的名字

1.1.3 查

--展示所有的数据库
show databases; 
--模糊展示
show databases like 'db_hive*'
--描述数据库    --不会展示 库的属性 dbproperties
desc database 库名
desc database db_hive; 
--描述数据库详情 --会展示 库的属性  dbproperties parameters
desc database extended 库名;
desc database extended db_hive;

1.1.4 切换数据库

use 库名
use db_hive2;

1.1.5 改

用户可以使用ALTER DATABASE命令为某个数据库的DBPROPERTIES设置键-值对属性值，来描述这个数据库的属性信息。数据库的其他元数据信息都是不可更改的，包括数据库名和数据库所在的目录位置。

alter database db_hive set dbproperties('dbtype'='db');  --修改原来的属性
alter database db_hive set dbproperties('createtime'='2020-08-19');  --增加原来的属性

1.1.6 删

drop database 库名
drop database db_hive2;

drop database if exists db_hive2; --加上 if exists 增加代码的健壮性

drop database db_hive cascade; --强制删除 (当你库下面有表的时候) 慎用(只有你确定所有表都没用的时候)

1.2 表的ddl

1.2.1创建表的语法

CREATE [EXTERNAL] TABLE [IF NOT EXISTS] table_name --external 表示创建的表是否为外部表
[(col_name data_type [COMMENT col_comment], ...)]--列名 列的类型 列的注释(这个列拿来描述什么东西)
[COMMENT table_comment]                          --表的注释 这个表你哪来描述什么业务
[PARTITIONED BY (col_name data_type [COMMENT col_comment], ...)] --分区表
[CLUSTERED BY (col_name, col_name, ...)                          --分桶表
[SORTED BY (col_name [ASC|DESC], ...)] INTO num_buckets BUCKETS] --桶内排序字段 分几个桶
[
 ROW FORMAT DELIMITED                          --一行数据的分隔符
 [FIELDS TERMINATED BY char]                   --一行数据字段分隔符是什么
 --songsong,bingbing_lili,xiao song:18_xiaoxiao song:19,hui long guan_beijing_10010
 --这一行数据而言 他的各字段分隔符是 "," 
 --对于分隔符是有默认值的 ascii 码表对应第二个  ^A     ctrl+v ctrl+a
 [COLLECTION ITEMS TERMINATED BY char]         --集合(map array struct)各元素分隔符
 --bingbing_lili   xiao song:18_xiaoxiao song:19 hui long guan_beijing_10010
 --对于这些集合各元素分隔符 "_"
 --元素分隔符也有默认值 ascii 码表对应第三个  ^B     ctrl+v ctrl+b
 [MAP KEYS TERMINATED BY char]                 --map的kv分隔符
 --xiao song:18   xiaoxiao song:19
 --这个 map的kv分隔符 是":"
 --map的kv分隔符也有默认值 ascii 码表对应第四个  ^C     ctrl+v ctrl+c 
 [LINES TERMINATED BY char]                    --每一行数据的分隔符 '\n'
] 
[STORED AS file_format]                      --指定表所对应的文件的存储格式  默认是 textfile
[LOCATION hdfs_path]                         --指定当前表的存储hdfs路径
[TBLPROPERTIES (property_name=property_value, ...)]--表的属性(用处很大)
[AS select_statement]                        --根据查询结构创建一张表 包括表的结构和数据
[LIKE table_name]                            --模仿一张表示 但是只有结构没有数据

1.2.2 增

1.2.2.1 增加内部表(管理表)

内部表的含义:hive掌握着表的数据的生命周期,当在Hive里删除表的时候,会一并把hdfs上数据给删了
用的少  1.中间表 2.测试表
--内部表测试
create table student(id int, name string)
row format delimited fields terminated by '\t'
--默认分隔符测试
create table test2(id int, name string)
--根据查询结构创建一张表  它虽然会带表结构和数据 但是分隔符不会带 会使用默认值
create table student2 as select * from student;
--根据查询结构创建一张表  创建一张相同分隔符的
create table student3 row format delimited fields terminated by '\t' as select * from student;
--根据存在的表的结构来创建一张表  拿不到数据 --他的分隔符跟模仿表的是一样的
create table student4 like student;

1.2.2.2 增加外部表

外部表的含义:hive不掌握着表的数据生命周期,当在Hive里删除表的时候,不会一并把hdfs上数据给删了，只会删除元数据
除了上述内部表的情况 全是外部表
create external table if not exists dept(
deptno int,
dname string,
loc int)
row format delimited fields terminated by '\t'
location '/company/dept';

create external table if not exists emp(
empno int,
ename string,
job string,
mgr int,
hiredate string, 
sal double, 
comm double,
deptno int)
row format delimited fields terminated by '\t'
location '/company/emp/';

1.2.2.3 内部表和外部表相互转换

Table Type:            EXTERNAL_TABLE       
Table Parameters:      EXTERNAL            TRUE 
表是否为内部表还是外部表是由Table Parameters 里面的EXTERNAL属性来控制 包括TRUE和FALSE 都得大写
--内部表转换成外部表
alter table student4 set tblproperties('EXTERNAL'='TRUE');
--外部表转成内部表
alter table emp set tblproperties('EXTERNAL'='FALSE');

1.2.3 查

--展示库下面的所有表
show tables;
--描述表
desc student;
--描述表的详情
desc formatted student;

1.2.4 删

1.删除表
1.1 删除内部表 
drop table student;
1.2 删除外部表
drop table dept;  --只能删除元数据 不能删除hdfs上的数据
1.3 清空表
truncate table student3;
清空外部表测试      --不能清空外部表
truncate table emp;

1.2.5 改

--改表名  会连同你的hdfs文件夹名字一起改掉
alter table student3 rename to student2;
--更新列  注意改的列的数据类型 只能由小往大改 或者不变
ALTER TABLE table_name CHANGE [COLUMN] col_old_name col_new_name column_type [COMMENT col_comment] [FIRST|AFTER column_name]
alter table stu2 change column id id int;
alter table stu2 change column id id tinyint; --这是错的
alter table stu2 change column id ids bigint;
alter table stu2 change column id idss bigint;
-- 增加列
ALTER TABLE table_name ADD COLUMNS (col_name data_type [COMMENT col_comment], ...) 
alter table stu2 add columns(weight double,hair bigint);
-- 替换列
ALTER TABLE table_name REPLACE COLUMNS (col_name data_type [COMMENT col_comment], ...) 
-- 替换之减少列  如果你想替换时候较少列 那么你减少后剩余部分 应该和之前字段 满足类型的大小关系
alter table stu2 replace columns (id bigint , name string);
-- 替换之增加列  增加部分可以没有类型大小的关系 ，如果有对应的部分则满足类型大小的对应关系
alter table stu2 replace columns (id bigint , name string , height double, hair bigint);

二.DML（数据操作语言）

2.1 数据导入

2.1.1 load 装载数据

load data [local] inpath '数据的path' [overwrite] into table student [partition (partcol1=val1,…)];
测试表
create table student (id int ,name string) row format delimited fields terminated by '\t';
--load 数据之追加数据   本地导入 是复制进去的
load data local inpath '/opt/module/hive/datas/student.txt' into table student;
--load 数据之覆盖数据
load data local inpath '/opt/module/hive/datas/student1.txt' overwrite into table student;
--load 数据之hdfs导入  hdfs导入时剪切进去的
load data  inpath '/student.txt' into table student;

2.1.2 insert 插入数据

--追加插入
insert into table student2 values(1,'banzhang'),(2,'haiwangbin');
--覆盖插入
insert overwrite table student values(1,'banzhang'),(2,'haiwangbin');
--查询插入  --注意:第一你所插入的表必须存在 然后你查询的字段必须满足目标表的里的字段数
insert into table student  select id,name from student3;
--查询覆盖
insert overwrite table student  select id,name from student3;

2.1.3 as select

create table if not exists student3
as select id, name from student;

create as select, insert into table table_name select这两个就是拿来创建中间表

[AS select_statement]                        --根据查询结构创建一张表 包括表的结构和数据
[LIKE table_name]                            --模仿一张表示 但是只有结构没有数据

2.1.4 location

create  table if not exists student4(
id int, name string
)
row format delimited fields terminated by '\t'
location '/student4';
--指定location 必须是文件夹

2.1.5 import 导入（必须是export导出并且导入的表不能存在）

import table student6 from '/user/hive/warehouse/export/student'

2.2 数据导出(少)

2.2.1 insert 导出

--无格式导出
insert overwrite local directory '/opt/module/hive/datas/export/student1' select * from student;
--有格式导出
insert overwrite local directory '/opt/module/hive/datas/export/student1' row format delimited fields terminated by '\t' select * from student;
--没有local 写在hdfs上
insert overwrite  directory '/opt/module/hive/datas/export/student1' row format delimited fields terminated by '\t' select * from student;

2.2.2 hadoop 下载

hadoop fs  -get /user/hive/warehouse/student/student.txt
/opt/module/hive/datas/export/student3.txt;

2.2.3 hive 的shell命令

hive -e 'select * from default.student;' > /opt/module/hive/datas/export/student4.txt

2.2.4 export 导出

export table student to '/student';

The Great Ant

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
2021-04-27

Hive中的DDL和DML语言合集一.DDL(数据定义语言)1.1 库的ddl1.1.1 创建库的语法CREATE DATABASE [IF NOT EXISTS] database_name --IF NOT EXISTS 增强代码的健壮性[COMMENT database_comment] --库的注释这个库你将来拿来干嘛[LOCATION hdfs_path] --可以指定当前库存在h
复制链接

扫一扫