Hive的表操作

最新推荐文章于 2024-09-05 12:30:40 发布

陈万君Allen

最新推荐文章于 2024-09-05 12:30:40 发布

阅读量1k

点赞数

分类专栏： Java和大数据文章标签： hive 大数据 hadoop 数据仓库

本文链接：https://blog.csdn.net/weixin_53280379/article/details/127353637

版权

Java和大数据专栏收录该内容

43 篇文章 6 订阅

订阅专栏

Hive系列

注：大家觉得博客好的话，别忘了点赞收藏呀，本人每周都会更新关于人工智能和大数据相关的内容，内容多为原创，Python Java Scala SQL 代码，CV NLP 推荐系统等，Spark Flink Kafka Hbase Hive Flume等等~写的都是纯干货，各种顶会的论文解读，一起进步。
今天继续和大家分享一下Hive的表操作
#博学谷IT学习技术支持

前言

在这里插入图片描述
1、Hive是数仓管理工具,用来管理数仓
2、Hive可以将数仓存在HDFS上的文件变成一张张的表
3、Hive提供一种HiveSQL可以表进行分析处理
4、HiveSQL底层默认是MapReduce，以后可以换成其他的引擎（Spark），我们写HiveSQL会去匹配底层的MR模板，匹配上则执行，否则不能执行

在这里插入图片描述

一、Hive表操作1-内部表和外部表

1、内部表是私有表，一旦给表加载数据之后，内部表认为这份数据就是他独占的，表一旦删除，表数据文件会跟着全部删除，如果在应用中，数据是部门内部的，或者个人的，则表可以设置为内部表，不会对其他人造成影响。

2、内部表创建语法： create table 表

3、外部表是公有表，一旦给表加载数据之后，外部表认为这份数据大家的，表一旦删除，表数据文件不会删除，只删除表和文件之间的映射关系，如果在应用中，数据是各部门共享，则可以设置为外部表，你的表只是对文件有访问权。

4、外部表创建语法： create external table 表

-- 1、创建外部表
create external table teacher
(
    tid   string,
    tname string
) row format delimited fields terminated by '\t';

create external table student
(
    sid    string,
    sname  string,
    sbirth string,
    ssex   string
) row format delimited fields terminated by '\t';

-- 加载数据
load data local inpath '/export/data/hivedatas/student.txt' into table student;
load data local inpath '/export/data/hivedatas/teacher.txt' into table teacher;

select * from student;
select * from teacher;


-- 删除表,只删除元数据，不会删除表数据
drop table teacher;

外部表可以实现共享一份数据

-- 模拟多张表共享一份数据
drop table covid1;
create external  table covid1(
    date_val string,
    country  string,
    state    string,
    code     string,
    cases    int,
    deaths   int
)
row format delimited fields terminated by ','
location '/input/covid';
select * from covid1;


create external  table covid2(
    date_val string,
    country  string,
    state    string,
    code     string,
    cases    int,
    deaths   int
)
row format delimited fields terminated by ','
location '/input/covid';
select * from covid2;

-- 删除covid1
drop table covid1;
select * from covid2;

-- 删除covid2
drop table covid2;
select * from covid2;

二、Hive的复杂类型

1.array类型

-- 1、准备数据
zhangsan    beijing,shanghai,tianjin,hangzhou
wangwu  changchun,chengdu,wuhan,beijing

-- 2、创建表
create external table hive_array
(
    name           string,
    work_locations array<string>
)
row format delimited fields terminated by '\t'
collection items terminated by ',';

-- 3、加载数据
load data local inpath '/export/data/hivedatas/work_locations.txt' into table hive_array;

select * from hive_array;

-- 4、查询数据
-- 查询所有数据
select * from hive_array;
-- 查询work_locations数组中第一个元素
select name, work_locations[0] location from hive_array;
-- 查询location数组中元素的个数
select name, size(work_locations) location_size from hive_array;
-- 查询location数组中包含tianjin的信息
select * from hive_array where array_contains(work_locations,'tianjin');

2.map类型

-- 1、准备数据

1,zhangsan,father:xiaoming#mother:xiaohuang#brother:xiaoxu,28
2,lisi,father:mayun#mother:huangyi#brother:guanyu,22
3,wangwu,father:wangjianlin#mother:ruhua#sister:jingtian,29
4,mayun,father:mayongzhen#mother:angelababy,26

-- 2、建表
create table hive_map
(
    id      int,
    name    string,
    members map<string,>,
    age     int
)
row format delimited fields terminated by ','
collection items terminated by '#'
map keys terminated by ':';

-- 3、加载数据

load data local inpath '/export/data/hivedatas/hive_map.txt' into table hive_map;
select * from hive_map;

-- 4、查询操作
select * from hive_map;
-- 根据键找对应的值
select id, name, members['father'] father, members['mother'] mother, age from hive_map;

-- 获取所有的键
select id, name, map_keys(members) as relation from hive_map;

-- 获取所有的值
select id, name, map_values(members) as relation from hive_map;

-- 获取键值对个数
select id,name,size(members) num from hive_map;

-- 获取有指定key的数据
-- 判断亲属关系中哪一个包含brother
select * from hive_map where array_contains(map_keys(members), 'brother');

-- 查找包含brother这个键的数据，并获取brother键对应的值
select id,name, members['brother'] brother from hive_map where array_contains(map_keys(members), 'brother');

3.struct类型

class  类名{
   String name,
   int  age ,
   double score
}

-- 1、准备数据
192.168.1.1#zhangsan:40
192.168.1.2#lisi:50
192.168.1.3#wangwu:60
192.168.1.4#zhaoliu:70

-- 2、创建表
create table hive_struct(
    ip string,
    info struct<name:string, age:int>
)
row format delimited fields terminated by '#'
collection items terminated by ':';

-- 3、给表加载数据

load data local inpath '/export/data/hivedatas/hive_struct.txt' into table hive_struct;

select * from hive_struct;


-- 4、查询表数据
select  ip,info.name,info.age from hive_struct;
select  ip,info from hive_struct;