【博学谷学习记录】超强总结，用心分享 | Hive普通表操作

Onzswhite

已于 2023-06-12 10:12:43 修改

阅读量866

点赞数

文章标签： hive hadoop 大数据

于 2022-10-20 15:14:34 首次发布

本文链接：https://blog.csdn.net/Onzswhite/article/details/127426956

版权

#博学谷IT技术支持#

一、内部表和外部表

Hive表文件默认分隔符是'\001'
Hive默认不允许对数据进行删除和修改，即不支持update和delete
Hive中创建表，自动在HDFS数据库目录创建对应的表目录，默认表目录的名字和表名相同。

1.内部表

内部表是私有表，一旦给表加载数据之后，内部表认为这份数据就是他独占的，表一旦删除，表数据文件会跟着全部删除，如果在应用中，数据是部门内部的，或者个人的，则表可以设置为内部表，不会对其他人造成影响。
内部表创建语法：create table 表名

示例

-- 1、创建内部表-使用默认分隔符:'\001'
create table stu(id int, name string);

-- 2、创建内部表-使用指定分隔符: ','

create table stu2(id int, name string)
row format delimited fields terminated by ',';

-- 3、通过复制表结构来建表
create table stu3 as select * from stu2;  -- 即复制表结构，又复制数据
create table stu4 like stu2;   -- 仅复制表结构

-- 4、查看表的元数据信息
 desc   stu2;          -- 查看字段信息(简单)
 desc formatted stu2;  -- 查看详细的元数据信息

-- 5、删除表
-- 内部表删除，将表数据和元数据全部删除
drop table stu2;
select * from stu;

-- 6、给表加载数据(最正式的) - 本地 -复制

create table stux(id int, name string)
row format delimited fields terminated by '\t';

-- 从本地加载--复制
load data local inpath '/export/data/hivedatas/1.txt' into table stux;
select * from stux;

2.外部表

外部表是公有表，一旦给表加载数据之后，外部表认为这份数据大家的，表一旦删除，表数据文件不会删除，只删除表和文件之间的映射关系，如果在应用中，数据是各部门共享，则可以设置为外部表，你的表只是对文件有访问权。
外部表创建语法：create external table 表

多张外部表共享数据示例

create external  table covid1(
    date_val string,
    country  string,
    state    string,
    code     string,
    cases    int,
    deaths   int
)
row format delimited fields terminated by ','
location '/input/covid';
select * from covid1;

create external  table covid2(
    date_val string,
    country  string,
    state    string,
    code     string,
    cases    int,
    deaths   int
)
row format delimited fields terminated by ','
location '/input/covid';
select * from covid2;

-- 删除covid1
drop table covid1;
select * from covid2;

-- 删除covid2
drop table covid2;
select * from covid2;

二、Hive复杂类型

array类型

数组类型中的数据可以使用索引值访问。

-- 1、准备数据
zhangsan    beijing,shanghai,tianjin,hangzhou
wangwu  changchun,chengdu,wuhan,beijing

-- 2、创建表
create external table hive_array
(
    name           string,
    work_locations array<string>
)
row format delimited fields terminated by '\t'
collection items terminated by ',';

-- 3、加载数据
load data local inpath '/export/data/hivedatas/work_locations.txt' into table hive_array;

select * from hive_array;

-- 查询work_locations数组中第一个元素
select name, work_locations[0] location from hive_array;
-- 查询location数组中元素的个数
select name, size(work_locations) location_size from hive_array;
-- 查询location数组中包含tianjin的信息
select * from hive_array where array_contains(work_locations,'tianjin');

map类型

map类型中value值使用map名['key']的形式访问。
使用map_keys()获取所有的键值。
使用map_values()获取所有的值。
使用array_contains(map_keys(),'键值')获取包含指定键值的行。

-- 1、准备数据
1,zhangsan,father:xiaoming#mother:xiaohuang#brother:xiaoxu,28
2,lisi,father:mayun#mother:huangyi#brother:guanyu,22
3,wangwu,father:wangjianlin#mother:ruhua#sister:jingtian,29
4,mayun,father:mayongzhen#mother:angelababy,26

-- 2、建表
create table hive_map
(
    id      int,
    name    string,
    members map<string,>,
    age     int
)
row format delimited fields terminated by ','
collection items terminated by '#'
map keys terminated by ':';

-- 3、加载数据
load data local inpath '/export/data/hivedatas/hive_map.txt' into table hive_map;
select * from hive_map;

-- 4、查询操作
-- 根据键找对应的值
select id, name, members['father'] father, members['mother'] mother, age from hive_map;

-- 获取所有的键
select id, name, map_keys(members) as relation from hive_map;

-- 获取所有的值
select id, name, map_values(members) as relation from hive_map;

-- 获取键值对个数
select id,name,size(members) num from hive_map;

-- 获取有指定key的数据
-- 判断亲属关系中哪一个包含brother
select * from hive_map where array_contains(map_keys(members), 'brother');

-- 查找包含brother这个键的数据，并获取brother键对应的值
select id,name, members['brother'] brother from hive_map where array_contains(map_keys(members), 'brother');

struct类型

struct类型可以看作java中的类，struct类型中的数据使用struct名称.变量名的形式访问。

-- 1、准备数据
192.168.1.1#zhangsan:40
192.168.1.2#lisi:50
192.168.1.3#wangwu:60
192.168.1.4#zhaoliu:70

-- 2、创建表
create table hive_struct(
    ip string,
    info struct<name:string, age:int>
)
row format delimited fields terminated by '#'
collection items terminated by ':';

-- 3、给表加载数据

load data local inpath '/export/data/hivedatas/hive_struct.txt' into table hive_struct;
select * from hive_struct;

-- 4、查询表数据
select  ip,info.name,info.age from hive_struct;
select  ip,info from hive_struct;