【博学谷学习记录】超强总结，用心分享|狂野大数据课程【Hive的数据库和表操作】

Garyodd

已于 2023-03-22 01:09:53 修改

阅读量110

点赞数

分类专栏：博学谷It技术支持 java 文章标签：学习 Powered by 金山文档

于 2023-03-11 17:16:07 首次发布

本文链接：https://blog.csdn.net/GaryNB/article/details/129465929

版权

博学谷It技术支持同时被 2 个专栏收录

25 篇文章 1 订阅

订阅专栏

java

25 篇文章 1 订阅

订阅专栏

操作数据库

-- 1、当你创建一个数据库，则hive自动会在/user/hive/warehouse目录创建一个数据库目录
这个目录是在hive-site.xml中一个属性指定的
create database if not exists myhive;
show databases ;

-- 2、手动指定数据库映射的文件夹
create database if not exists myhive2 location  '/myhive2';
show databases ;

-- 3、查看数据库的元数据信息
desc database myhive;

-- 4、删除数据库
-- 4.1 可以删除空数据库
drop database myhive2;

-- 4.2 如果要删除的数据库里边有表，则必须加 cascade关键字
use myhive;
create table A(id int);
drop database myhive cascade ;

操作表

介绍

1、Hive创建表的操作就是指定：表名、表的列名、表的列类型

2、Hive创建表内部的结构和传统的数据库的SQL除了语法相似，内部原理完全不同

3、Hive表文件字段之间默认的分隔符是'\001'2

Hive的表数据类型

整形：   int
浮点型： float / double / decimal(10,2)
字符串： string   mysql一般是varchar
日期类型：
   年月日：date
   时分秒：time
   年月日-时分秒：date_time
   注意：如果是日期或者时间，则使用字符串可以避免一些不必要的兼容问题
复杂类型：
  array：数组，集合
  map  ：键值对集合
  struct： 类

表分类

1、Hive表分为两类，一个是内部表，一个是外部表

内部表（管理表）

 语法
  create  table 表名(字段信息);

 特点
  1、内部表认为该表独占表数据文件，该文件不能共享
  2、内部表对表文件有绝对的控制权
  3、当删除内部表时，表文件会跟着一起删除（同时删除元数据和表数据）
  4、所有的非共享表都可以设置为内部表

外部表

  语法
    create  external table 表名(字段信息);

  特点
    1、外部表认为该表不能独占表数据文件，文件可以共享
    2、外部表对表文件没有绝对的控制权
    3、当删除外部表时，表文件不会跟着一起删除（只会删除元数据（映射信息），不会表数据）
    4、所有的非共享表都可以设置为内部表
    5、如果表数据需要被共享，则可以设置为外部表

内部表操作

#1、创建内部表
create table t_covid(
    dt string comment '日期' ,
    county string comment '县',
    state  string comment '州',
    county_code string comment  '县编码',
    cases int comment '确诊人数',
    deaths int comment '死亡任务'
)comment '美国新冠数据'
row format delimited fields terminated by ','; -- 自定字段之间的分隔符

#2、给内部表加载数据-从Linux本地-复制
#将本地的文件复制到表目录：/user/hive/warehouse/myhive.db/t_covid
load data local inpath '/root/test/covid19.dat' into table t_covid;

#2、给内部表加载数据-从HDFS加载-剪切
#将HDFS文件剪切到表目录：/user/hive/warehouse/myhive.db/t_covid
load data  inpath '/input/covid19.dat' into table t_covid;

#3、查看数据
select * from t_covid;

外部表操作

-- 1、创建外部表
drop table if exists t_covid;
create external table t_covid(
    dt string comment '日期' ,
    county string comment '县',
    state  string comment '州',
    county_code string comment  '县编码',
    cases int comment '确诊人数',
    deaths int comment '死亡任务'
)comment '美国新冠数据'
row format delimited fields terminated by ','; -- 自定字段之间的分隔符

-- 2、给内部表加载数据-从Linux本地-复制
#将本地的文件复制到表目录：/user/hive/warehouse/myhive.db/t_covid
load data local inpath '/root/test/covid19.dat' into table t_covid;

-- 2、给内部表加载数据-从HDFS加载-剪切
#将HDFS文件剪切到表目录：/user/hive/warehouse/myhive.db/t_covid
load data  inpath '/input/covid19.dat' into table t_covid;

-- 3、查看数据
select * from t_covid;



-- ------演示-让多张表共享同一份数据文件-------
-- 1、创建外部表1映射到文件covid19.dat
drop table if exists t_covid1;
create external table t_covid1(
    dt string comment '日期' ,
    country string comment '县',
    state  string comment '州',
    country_code string comment  '县编码',
    cases int comment '确诊人数',
    deaths int comment '死亡任务'
)comment '美国新冠数据'
row format delimited fields terminated by ',' -- 自定字段之间的分隔符
location '/input/data';

select * from t_covid1;

-- 2、创建外部表2映射到文件covid19.dat
drop table if exists t_covid2;
create external table t_covid2(
    dt string comment '日期' ,
    country string comment '县',
    state  string comment '州',
    country_code string comment  '县编码',
    cases int comment '确诊人数',
    deaths int comment '死亡任务'
)comment '美国新冠数据'
row format delimited fields terminated by ',' -- 自定字段之间的分隔符
location '/input/data';

select * from t_covid2;

-- 3、创建外部表3映射到文件covid19.dat
drop table if exists t_covid3;
create external table t_covid3(
    dt string comment '日期' ,
    country string comment '县',
    state  string comment '州',
    country_code string comment  '县编码',
    cases int comment '确诊人数',
    deaths int comment '死亡任务'
)comment '美国新冠数据'
row format delimited fields terminated by ',' -- 自定字段之间的分隔符
location '/input/data';

select * from t_covid3;

-- 4、删除测试

drop table t_covid1;
select * from t_covid1;
select * from t_covid2;

drop table t_covid3;
select * from t_covid3;

其他操作

#如何判断一张表是内部表还是外部表，通过元数据查看
desc formatted t_covid;

#查看以下信息
Table Type:         ,EXTERNAL_TABLE     #外部表
Table Type:         ,MANAGED_TABLE      #内部表

表信息都是通过mysql查看的

复杂类型操作

-- -----------Hive的复杂类型-Array类型------------
-- 1、数据样例
/*
 zhangsan      beijing,shanghai,tianjin,hangzhou
 wangwu       changchun,chengdu,wuhan,beijing
 */
-- 2、建表
use myhive;
create external table hive_array(
    name string,
    work_locations array<string>
)
row format delimited fields terminated by '\t'  -- 字段之间的分隔符
collection items terminated by  ',';            -- 数组元素之间的分割符

-- 3、加载数据
load data local inpath '/root/hive_data/array.txt' overwrite into table hive_array;--overwrite覆盖数据

-- 4、查询数据
-- 查询所有数据
select * from hive_array;

-- 查询work_locations数组中第一个元素
select name, work_locations[0] location from hive_array;

-- 查询location数组中元素的个数
select name, size(work_locations) location_size from hive_array;

-- 查询location数组中包含tianjin的信息
select * from hive_array where array_contains(work_locations,'tianjin');

-- -----------Hive的复杂类型-Map类型------------
-- 1、数据样例
/*
1,zhangsan,father:xiaoming#mother:xiaohuang#brother:xiaoxu,28
2,lisi,father:mayun#mother:huangyi#brother:guanyu,22
3,wangwu,father:wangjianlin#mother:ruhua#sister:jingtian,29
4,mayun,father:mayongzhen# mother:angelababy,26
 */
-- 2、建表
create table hive_map(
    id int,
    name string,
    members map<string,string>,
    age int
)
row format delimited fields terminated by ','
collection items terminated by  '#'
map keys terminated by  ':';

-- 3、加载数据
load data local inpath '/root/hive_data/map.txt' overwrite into table hive_map;

-- 4、查询数据
-- 查询全部数据
select * from hive_map;
-- 根据键找对应的值
select id, name, members['father'] father, members['mother'] mother, age from hive_map;

-- 获取所有的键
select id, name, map_keys(members) as relation from hive_map;

-- 获取所有的值
select id, name, map_values(members) as relation from hive_map;
-- 获取键值对个数
select id,name,size(members) num from hive_map;

-- 获取有指定key的数据
select * from hive_map where array_contains(map_keys(members), 'brother');

-- 查找包含brother这个键的数据，并获取brother键对应的值
select id,name, members['brother'] brother from hive_map
                                           where array_contains(map_keys(members), 'brother');

-- -----------Hive的复杂类型-Struct类型------------
-- 1、数据样例
/*
192.168.1.1#zhangsan:40:男
192.168.1.2#lisi:50:女
192.168.1.3#wangwu:60:女
192.168.1.4#zhaoliu:70:男
 */
-- 2、建表
create table hive_struct(
    ip string,
    info struct<name:string,age:int,gender:string>
)
row format delimited fields terminated by '#'
collection items terminated by  ':';


-- 3、加载数据
load data local inpath '/root/hive_data/struct.txt' overwrite into table hive_struct;

-- 4、查询数据
-- 查询全部数据
select * from hive_struct;

-- 根据字段查询
select ip,info.name,info.gender from hive_struct;

Garyodd

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
【博学谷学习记录】超强总结，用心分享|狂野大数据课程【Hive的数据库和表操作】

1、Hive创建表的操作就是指定：表名、表的列名、表的列类型2、Hive创建表内部的结构和传统的数据库的SQL除了语法相似，内部原理完全不同Hive的表数据类型整形： int浮点型： float / double / decimal(10,2)字符串： string日期类型：年月日：date时分秒：time年月日-时分秒：date_time注意：如果是日期或者时间，则使用字符串可以避免一些不必要的兼容问题复杂类型：array：数组，集合map ：键值对集合struct：类表分类。
复制链接

扫一扫