动态分区和静态分区的区别
1.动态分区和静态分区都需要自己去指定分区的字段,静态分区同时需要去指定分区的内容
2.动态分区不能用load data的方法进行数据的导入,而静态分区可以通过load data和insert overwrite两种方法导入数据
3.进行insert overwrite 导入的时候,静态不需要将分区字段的内容在select中查询出来的,但是动态需要查询出这个字段
4.动态分区在使用之前,需要打开动态分区的开关,并且要打开非严格模式的开光。
分桶使用的场景
分桶在表链接的时候使用,可以加快两张表联合查询的速度,且只有当联合表格的分桶数量相等,或者分桶数量是倍数关系的时候,才有加速效果
分桶可以让表链接加速,是因为分桶可以让表连接的笛卡尔积数据量变少。
#!/bin/bash
echo 用shell脚本实现分区的加载
sleep 2s
#删除已存在的文件
rm -rvf /root/log*.txt
#1、生成log1.txt
echo 100,登录,20210510>>/root/log1.txt
echo 101,下单,20210510>>/root/log1.txt
echo 102,退出,20210510>>/root/log1.txt
echo 103,登录,20210510>>/root/log1.txt
echo 104,退出,20210510>>/root/log1.txt
echo 105,登录,20210510>>/root/log1.txt
echo 106,下单,20210510>>/root/log1.txt
echo 107,退出,20210510>>/root/log1.txt
#2、生成log2.txt
echo 1100,登录,20210511>>/root/log2.txt
echo 1101,下单,20210511>>/root/log2.txt
echo 1102,退出,20210511>>/root/log2.txt
echo 1103,登录,20210511>>/root/log2.txt
echo 1104,退出,20210511>>/root/log2.txt
echo 1105,登录,20210511>>/root/log2.txt
echo 1106,下单,20210511>>/root/log2.txt
echo 1107,退出,20210511>>/root/log2.txt
#3、生成log3.txt
echo 2100,登录,20210512>>/root/log3.txt
echo 2101,下单,20210512>>/root/log3.txt
echo 2102,退出,20210512>>/root/log3.txt
echo 2103,登录,20210512>>/root/log3.txt
echo 2104,退出,20210512>>/root/log3.txt
echo 2105,登录,20210512>>/root/log3.txt
echo 2106,下单,20210512>>/root/log3.txt
echo 2107,退出,20210512>>/root/log3.txt
sleep 3s
#新建目录并上传文件
#先删除已经存在的目录
hadoop fs -rm -r /logs
#新建目录
hadoop fs -mkdir /logs
hadoop fs -mkdir /logs/20210510
hadoop fs -mkdir /logs/20210511
hadoop fs -mkdir /logs/20210512
#切换路径
cd /root
hadoop fs -put log1.txt /logs/20210510
hadoop fs -put log2.txt /logs/20210511
hadoop fs -put log3.txt /logs/20210512
#执行hive3.hql脚本
hive -f /root/birth/hive3.hql
--用vi新建hive3.hql脚本
--切换数据库
use bigdate007;
--删除表
drop table if exists ext_logs_p;
--在bigdata 数据库中建表
create external table bigdate007.ext_logs_p(
id int,
name string,
operate string)
partitioned by(date_time string)
row format delimited fields terminated by ','
location '/logs';
--挂载分区
alter table bigdate007.ext_logs_p add partition (date_time='20210510') location '/logs/20210510';
alter table bigdate007.ext_logs_p add partition (date_time='20210511') location '/logs/20210511';
alter table bigdate007.ext_logs_p add partition (date_time='20210512') location '/logs/20210512';
1.先创建一个带有分桶属性的表格
create table s_cl(
id int,
name string,
sex string,
age int)
clustered by(age) into 4 buckets
row format delimited fields terminated by ',';
insert into s_cl values(15,'wangxiaoer15','男',15);
--2.复制一个相同结构的备份表格
create table s_cl_tmp like s_cl;
--3.将文件的内容映射到备份表中
load data local inpath '/root/s.txt' into table bigdate007.s_cl_tmp;
--4.使用查询的方式对备份表进行分桶
insert overwrite table s_cl select * from s_c1_tmp cluster by age;
#!/bin/bash
echo 开始运行脚本
cd /root/
rm -vf stu_c.txt
sleep 2s
echo 1001,lilei,男,18>>stu_c.txt
echo 1002,lucy,女,16>>stu_c.txt
echo 1003,tom,男,17>>stu_c.txt
echo 1004,jack,男,18>>stu_c.txt
echo 1005,eve,女,18>>stu_c.txt
echo 1006,allen,男,18>>stu_c.txt
echo 1007,miller,男,19>> stu_c.txt
echo 1008,flower,女,16>> stu_c.txt
echo 1009,adam,男,17>> stu_c.txt
echo 1010,toly,男,16>> stu_c.txt
echo 1011,steven,男,17>> stu_c.txt
echo 1012,bob,男,18>> stu_c.txt
echo 1013,lucky,女,17>> stu_c.txt
sleep 2s
hadoop fs -rm -r /data
hadoop fs -mkdir /data
hadoop fs -put stu_c.txt /data
sleep 2s
#执行hive5.hql
hive -f /root/hive5.hql
echo 脚本执行完毕!!!
hive5.hql脚本
--1.删除表
use bigdate007;
drop table if exists stu_info_c;
drop table if exists stu_info_c_tmp;
--2.新建表
create table stu_info_c(
id int,
name string,
sex string,
age int)
clustered by(age) into 4 buckets
row format delimited fields terminated by ',';
--3.再复制一个和分桶表相同结构的临时表,只有结构不会有内容
create table stu_info_c_tmp like stu_info_c;
--4.在临时表中添加表格的所有的数据
load data inpath '/data/stu_c.txt' into table bigdate007.stu_info_c_tmp;
--5.使用insert复制表格的内容到分桶表中,复制的同时进行数据的分桶,cluster by (age)
insert overwrite table stu_info_c
select * from stu_info_c_tmp cluster by (age);