DDL建表(三)

一、内部表和外部表

1、内部表

(1)source file

1,罗大佑,60
2,田馥甄,40

(2)DDL

create table super_star(
    id int,
    name string,
    age int
)
row format delimited fields terminated by ','
stored as textfile ;

(3)加载数据

0: jdbc:hive2://master:10000> load data local inpath '/root/SuperStar.txt' into table test.super_star

(4)查询数据

select * from test.super_star;

 2、外部表

(1)source file

1,周杰伦,43
2,林俊杰,42

(2)DDL

create external table pop_singer(
    id int,
    name string,
    age int
)
row format delimited fields terminated by ','
stored as TEXTFILE ;

(3)加载数据

0: jdbc:hive2://master:10000> load data local inpath '/root/PopSinger.txt' into table test.pop_singer;

(4)查询数据

select * from pop_singer;

(5)Drop table

drop table pop_singer;

(6)重建表,数据仍然还在

create external table test.pop_singer(
    id int,
    name string,
    age int
)
row format delimited fields terminated by ','
stored as TEXTFILE ;


select * from test.pop_singer;

二、分区表

1、静态分区

(1)数据源

1,Maple,网球-手游,age:29-gender:male
2,Max,足球-抠脚,age:30-gender:male

(2)DDL

create table partition_table(
    id      int
   ,name    string
   ,hobby   array<string>
   ,add     map<String,string>
)
partitioned by (pt_d string)
row format delimited fields terminated by ','
collection items terminated by '-'
map keys terminated by ':'
;

(3)加载数据

0: jdbc:hive2://master:10000> load data local inpath '/root/partitionData.txt' into table test.partition_table partition (pt_d = '20230501');

(4)查询数据

select * from partition_table;

 2、动态分区

  • 加载方式:insert into 分区表 select *,需要的分区字段 from 普通表

(1)数据源

u001    ZSS    23    M    beijing
u002    YMM    33    F    nanjing
u003    LSS    43    M    beijing
u004    ZY    23    F    beijing
u005    ZM    23    M    beijing
u006    CL    23    M    dongjing
u007    LX    23    F    beijing
u008    YZ    23    M    beijing
u009    YM    23    F    nanjing
u010    XM    23    M    beijing
u011    XD    23    F    beijing
u012    LH    23    M    dongjing
u013    FTM    23    F    dongjing

(2)普通表DDL

create  table  if not exists  tb_person(
uid string ,
name  string ,
age int ,
gender string ,
address string 
)
row format delimited fields  terminated by  "\t" ;

(3)加载数据

0: jdbc:hive2://master:10000> load data local inpath '/root/tbPerson.txt' into table test.tb_person;

 (4)查询数据

 (5)创建分区表

create  table  if not exists  tb_person_p(
uid string ,
name  string ,
age int ,
gender string ,
address string 
)
partitioned  by (addr string) 
row format delimited fields  terminated by  "\t" ;

 (6)数据动态插入分区表

set hive.exec.dynamic.partition=true ;
set hive.exec.dynamic.partition.mode=nonstrick;

insert into tb_person_p partition(addr)
select uid , name , age , gender , address , address from  tb_person ;

 (7)查询分区表数据

select * from tb_person_p;

 

三、Row Formats

1、Array 

(1)Source File

class01,maple:kelly:max,3
class02,Avery:Paul:jack:Vina,4

(2)DDL

create table array_table(
    class string,
    stus array<string>,
    num int
)
row format delimited fields terminated by ','
collection items terminated by ':'
stored as textfile;

(3)数据加载

0: jdbc:hive2://master:10000> load data local inpath '/root/ArrayData.txt' into table test.array_table;

(4)数据查询

select
 class
, stus[0] as first_stu
, stus[1] as second_stu
, stus[2] as third_stu
from array_table;

2、Map

(1)Source File

1,name:maple#gender:male,10
2,name:max#gender:female,20

(2)DDL

create table map_table(
    id int,
    stus map<string,string>,
    age int
)
row format delimited fields terminated by ','
    collection items terminated by '#'
    map keys terminated by ':'
stored as textfile;

(3) 数据加载

0: jdbc:hive2://master:10000> load data local inpath '/root/MapData.txt' into table test.map_table;

(4) 数据查询

select id,stus['name'] as stu_name,stus['gender'] as stu_gender,age
from map_table;

 3、Struct

(1) Source File

1,maple:male:18
2,max:female:22

(2)DDL

create table struct_table(
    id int,
    stu struct<name:string,gender:string,age:int>
)
row format delimited fields terminated by ','
collection items terminated by ":"
stored as textfile ;

(3) 数据加载

0: jdbc:hive2://master:10000> load data local inpath '/root/StuctData.txt' into table test.struct_table;

(4) 数据查询

select id,stu.name,stu.gender,stu.age from struct_table;

以上三种Row Format其实是一种简化写法,底层是通过定义SERDE类实现的,对于文本类型,对应的SERDE类包括org.apache.hadoop.hive.serde2.OpenCSVSerdeorg.apache.hadoop.hive.serde2.RegexSerDeorg.apache.hive.hcatalog.data.JsonSerDe,如果source 数据源不方便直接通过简化方式直接定义,可以考虑通过定义Serde类的方式实现,以下举例Json SERDE和正则SERDE。

4、Json SERDE

(1)Source File

{"name":"Maple","city":"北京","gender":"male"}
{"name":"Kelly","city":"上海","gender":"female"}
{"name":"max","city":"广州","gender":"male"}

(2)DDL

create table JsonTable(
    name string,
    city string,
    gender string
)
ROW FORMAT SERDE
'org.apache.hive.hcatalog.data.JsonSerDe';

(3)数据加载

0: jdbc:hive2://master:10000> load data local inpath '/root/JsonData' into table test.jsontable;

(4) 数据查询

 5、RegEx SERDE

(1)Source File

zhangsan_29:beijing|Java
lisi_30:beijing|Java

(2)DDL

CREATE table regex_table(
    name string,
    age int,
    city string,
    job_title string
)
row format SERDE
'org.apache.hadoop.hive.serde2.RegexSerDe'
WITH SERDEPROPERTIES
(
"input.regex" = "(.*)_(.*):(.*)\\|(.*)"
)
stored as TEXTFILE;

(3)数据加载

0: jdbc:hive2://master:10000> load data local inpath '/root/RegExData.txt' into table test.regex_table;

(4)数据查询

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值