1、创建数据库并且使用
create database demo; use demo;
2、内部表的创建
create table table_1(
id int,
name varchar(8)
);
3、外部表的创建
create external table table_2(
id int,
name varchar(8)
);
补充:指定格式用于文件的导入
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ’,’
COLLECTION ITEMS TERMINATED BY ‘-’
MAP KEYS TERMINATED BY ‘:’ :
LINES TERMINATED BY ‘\n’
文件的导入
1、load data local inpath '/root/test.txt' into table demo.table_1;
将本地的数据导入到hive中
2、load data inpath 'hdfs://node01:9000/user/tes.txt' into table demo.table_1;
从hdfs集群导入数据(首先上传test.txt到hdfs中)3、insert into---内外部表,不适应于分区
4、from table1
insert into(overwrite) tables2
select id ,name
4、分区表的创建
1)、静态分区创建
单分区
create table table_3 (id int, name varchar(8)) partitioned by (day int);
上传数据:
load data local inpath '/root/tes.txt' into table demo.table_3 partition (day=10);
双分区建表语句:
create table table_4(id int, name varchar(8)) partitioned by (day int, hour int);
上传数据:
load data local inpath '/root/tes.txt' into table demo.table_4 partition (day=10,hour=20);
增加分区
alter table table_3 add partition(day=10,hour=40);
删除分区
alert table table_4 drop partiton(day=10,hour=20)
2)动态分区创建
修改权限的方式:
1、conf/hive-site.xml
2、在hive内部使用set进行相应的设置
3、hive启动的时候设置 hive --hiveconf hive.exec.dynamic.partiton=true
1、修改权限
set hive.exec.dynamic.partiton=true //开启动态分区
2、修改默认状态
set hive.exec.dynamic.partiton.mode=nostrict //默认strict。至少有一个静态分区
创建分区:
create table table_11(
id int,
name varchar(8),
)
partitioned by (day int ,hour int)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ' '
LINES TERMINATED BY '\n'
;
写入数据:
insert into table table_11 partiton (day,hour)
select * table_3;
5、分桶表
开启分桶
set hive.enforce.bucketing=true
创建桶
create table table_111 (
id int,
name varchar(8),
)
clustered by (id) into 4 buckets
row format delimited
fields terminated by ' '
加载数据
insert into table table_111 select id,name from table_2;
抽样
select * from table_111 tablesample(bucket 1 out of 4 on id);