关于Hive的部分笔记(黑马程序员hive课)

------开启
set hive.exec.dynamic.partition.mode=nonstrict;
set hive.exec.dynamic.partition=true;
set hive.exec.parallel=true;
set hive.support.concurrency=false;
set mapreduce.map.memory.mb=1536;
show databases ;
drop database data cascade ;--强制删除数据库
------表的语法树
create table if not exists k_v(
    id int comment "baba",
    name  string  comment "xxx",
    skip  map<string,int>
)
comment "this is a table"  -----对表的说明/注释 (可选)
partitioned by (main string)   -----分区(可选)(后面还可以加字段,多发区表)
clustered by (name,id) into 4 buckets      -----分桶(into 4 buckets 以name把数据分为几桶)
                                                            ------ (name,id)括号里面可多选
----stored by  ( id desc ) into 4 buckets   ---对分桶的表进行排序
row format delimited fields terminated by "\n"  ---字段以什么进行切割
----[COLLECTION ITEMS TERMINATED BY char]   集合-------->map Arrays struce
 ---  [MAP KEYS TERMINATED BY char]  ---Map键和值用什么切割
 --   [LINES TERMINATED BY char] --- 行用什么切割
--STORED AS file_format]       存储的文件的格式
   -- [LOCATION hdfs_path]     这张表的位置
--[TBLPROPERTIES (property_name=property_value, ...)]
--[AS select_statement]  用as创建表
------like允许用户复制现有的表结构,但是不复制数据。
------4,铠,52,龙域领主:288-曙光守护者:1776
use hive_learn;
        create table if not exists k_v(
            id int,
            name string,
            price int,
            skip map<string,int>,
            test array<string>
        )
row format delimited fields terminated by ","
collection items terminated by "-"
map keys terminated by ":"
show tables ;
load data local inpath "/root/hot_hero_skin_price.txt" into table k_v;
select * from k_v;
----------------------建立外部表
----------10   ACCOUNTING 1700
-------20  RESEARCH   1800
------------30 SALES  1900
-------------40    OPERATIONS 1700

create external  table  if not exists waibu_table(
        id int,
        depament string,
        count int
)
row format delimited fields terminated by "\t";
select * from waibu_table;
drop table waibu_table;
----------分区
-------1   后羿 5986   1784   396    336    remotely   archer
create table no_fenqv_table(
    id int,
    name string,
    max_sm int,
    min_sm int,
    max_gj  int,
    min_gj  int,
    nokonw string,
    main_local string
)

row format delimited fields terminated by "\t";
drop table no_fenqv_table;
----分区表导入数据时应该在后面表面分区,否则无法导入
----分区的作用就是提高查询效率,不用全表扫描
-----静态导入数据:
load data local inpath "/root/*.txt" into table no_fenqv_table; partition (main ="archer");
load data local inpath "/root/assassin.txt" into table fenqv_table partition (main ="assassin");
load data local inpath "/root/tank.txt" into table fenqv_table partition (main ="tank");
load data local inpath "/root/mage.txt" into table fenqv_table partition (main ="mage");
load data local inpath "/root/support.txt" into table fenqv_table partition (main ="support");
load data local inpath "/root/warrior.txt" into table fenqv_table partition (main ="warrior");
set hive.exec.dynamic.partition=true;
select name from fenselect *from no_fenqv_table;qv_table where main="tank"and max_sm>9000;
--------动态导入数据,即导入数据时分区后面没写死
set hive.exec.dynamic.partition.mode = nonstrict;
insert into table fenqv_table_dynamic partition (main)  -----这里的main是分区的字段
select no_fenqv_table.*,main_local from no_fenqv_table;   ------插入,分区字段的值是查询得到的最后一列数据
desc formatted fenqv_table_dynamic;
select  * from fenqv_table_dynamic;
----------分桶表,可以于抽样查询
----2021-01-28,Autauga,Alabama,01001,5554,69
create table if not exists fentong_table(
    date_ string,
    local_ string,
    author string,
    daihao  string,
    ganran  bigint,
    dath  int
)
clustered by (author) into 5 buckets
row format delimited fields terminated by ",";
load data local inpath "/root/us-covid19-counties.dat" into table fentong_table;
select *from fentong_table;
select *from fentong_table where author="New York";
drop table fentong_table;
-----没有指定分割符号时,hive默认为\001
create table if not exists no_format(
    id int,
    name_team string,
    name_player string
);
load data local inpath "/root/team_ace_player.txt" into table no_format;
select *from no_format;
------视图,通过视图可以保护用户隐私,后接as select 通过查询得到表
----20223307,lihua7,2406@qq.com,11107,12345612
create table if not exists view_test(
    id string,
    name string,
    emil string,
    phone string,
     passwd string
)
row format delimited fields terminated by ",";
load data local inpath "/root/view.txt" into table view_test;
select *from view_test;
create view safe_user as select id,name,emil from view_test;
select * from safe_user;
show views ;
---------物化视图 hive3.0新特性
---------几乎和创建表的语法相同,只是要在前面要加上materialized view
---------也后接as select,通过查询得到表
create materialized view if not exists wuhua_view
as select   name,passwd from view_test;
--------清空数据,但不删除表
truncate table view_test;
------这里建立数据库在HDFS上面不可见
create database if not exists db_test
comment "this is a db"
location "/output/DB";
drop database db_test cascade ;
show databases ;
describe db_test;
use db_test;
create table if not exists test_
(
    id string
);
describe database db_test;
-----更改数据库的所有者
alter database db_test set owner user;
-------------===========   show相关语法   ============
--------查询表的所有分区消息
show partitions fenqv_table_dynamic;
-------显示表创建的语法
show create table fenqv_table_dynamic;
------显示hive自定义的函数
show   functions  explode ;
----------======Load=============
---load data local inpath "/root/*.txt" into table no_fenqv_table; partition (main ="archer");
-------insert into 插入数据非常慢,不推荐,hive底层跑mapreduce
insert into table no_format(id, name_team) values (10,"xswl");
select * from no_format;
-------us-covid19-counties.dat
----日期,县,州,邮政编码,确诊,死亡
--2021-01-28,Autauga,Alabama,01001,5554,69
create table if not exists covid3(
    date_ string,
    count string,
    zhou  string,
    youbian string,
    ganran  bigint,
    death  bigint
)
row format delimited fields terminated by ",";
create table if not exists stu(
                                  id bigint,
                                  name string,
                                  sex string,
                                  age  string,
                                  xibie  string
)
    row format delimited fields terminated by ",";
load data local inpath "/root/student.txt" into table stu;
select *from stu;
load data local inpath "/root/us-covid19-counties.dat" into table covid_nofenqv;
select  zhou from covid_nofenqv group by zhou;
set hive.exec.dynamic.partition.mode=nonstrict
insert into table covid_fenqv partition (zhou_)
select covid_nofenqv.*,zhou from covid_nofenqv;
select * from covid_fenqv limit 3;
-----insert select的多个作用,其一可以利用查询结果来导入数据到多张表中
---- 其二可以实现动态分区,分区值为查询结果的最后一列,其三可以导出数据
-------其一:多重插入,一次查询,多次使用,语法如下:
--from table_name
---insert into table table_name1
---select 语句
---insert into table table_name2
---select 语句
---insert into table table_name3
---select 语句
from covid_nofenqv
insert into table covid1
select * where zhou="Puerto Rico"
insert into table covid2
select * where zhou="Alabama"
insert into table covid3
    select * where zhou="California";
select * from covid3;
-----.......
---- 其二:可以实现动态分区,分区值为查询结果的最后一列
----insert  into table fenqv_table_dynamic partition (main)  -----这里的main是分区的字段
-----select no_fenqv_table.*,main_local from no_fenqv_table;
insert into table covid_fenqv partition (zhou_)
select covid_nofenqv.*,zhou from covid_nofenqv;
-----其三:导出数据,导出数据是一个覆盖操作,一定不要写错路径
-----语法如下:
----insert overwrite [local] directory "paths"
-----row format delimited fields terminated by "分割符";
----stored as"文件存储格式";
----  select from
insert overwrite local directory "/root/covid1.txt"
row format delimited fields terminated by "\t"
select * from covid3;
create table if not exists address (
                                      id string,
                                      xian string,
                                      shi string,
                                      shen string
)
    row format delimited fields terminated by "\t";
load data local inpath "/root/lianxi.txt" into table lianxi;
select  *from lianxi limit 5;
-----------------==========下面为Hive的语法=========----------
----------------第一个为select的语法
---distinct去重较慢,整体去重
select  distinct zhou,count from covid_nofenqv limit 2;
-----where  后面接条件语句,语句为真,执行,否则不执行 (在分组之前对数据进行过滤)
select * from covid3 where 2<1;
select * from covid3 where 2>1;
----使用where查询count名字长度大于10的数据
select *from covid3 where length(count)>10;
-----where还支持子查询,但不能和聚合函数一起使用,如count,sum
select *from covid_nofenqv where length(count)>10 and zhou in (select zhou from covid3);
------ group by  分组,可和聚合函数使用
select count(*) as xs from covid_nofenqv group by zhou;
select zhou,sum (death) as zhongji from covid_nofenqv group by zhou;
------having  对where无法使用聚合函数的补充,它可以使用聚合函数   (在分组之后对数据进行过滤)
select zhou,sum(death) as shiman from covid_nofenqv group by zhou having shiman>10000;
------  limit 返回多少数据
select *from covid3 limit 5;   ---一个参数,表示返回几行数据
select *from covid3 limit 2,5;  ---两个参数,第一个表示从第几行开始返回(第一行为 0),第二个参数表示返回几行,
---------=======查询语句执行顺序如下:
-----  from ---> where --->  group by ----->  having ----->  order  ----> select
----------  order   by  排序 全局排序,asc 升序,desc 倒序
--------只能排序一列,建议和limit 一起使用,否则可能数据量过大导致系统崩溃
select count,ganran,death from covid3 order by death asc  limit 5;
select count,ganran,death from covid3 order by death desc  limit  5;
--------cluster by  对结果分区排序,只能是正序
select * from covid3 cluster by count ;
select *from stu cluster by age;
-------distribute by  负责分组,sort by 负责排序
------distribute by + sort by 一般配套出现,是对 cluster by 功能的拆分,也是补充,使得排序能够倒序
select * from stu distribute by sex sort by age desc ;
--------  union  联合查询  将多个表的结果汇合为一个结果集合,这些表的列应该相同
select name,sex from stu  union  select count,zhou from covid3;
-------union 里面还可以使用条件查询......
select count,death from (select * from covid1 where length(zhou)>6 limit 30) res1
union select count,death from covid3  limit 10;
-------select * from covid1 where length(zhou)>6 limit 30这是一个临时结果并给结果取名为res1,写法如上
-------最后的limit 10 是限制整体结果返回10行
-----   from 子查询,子查询得到的结果必须有名称,如下:
-----   from (select * from covid1 where length(zhou)>6 limit 30) res1
-----  同时from 子查询里面也可以有  select  语法树里面的所有东西 还有  union
select count,death from (select * from covid1 where length(zhou)>6
union select * from covid3  limit 10) res1;
------  where 子查询,in    not in  like ........  (生产数据)
------  CTE表达式:公用表达式  (  with  )可以在 insert into, creat table as select ,  select ,creat view  as select
with res1 as ( select name,age,id from stu where xibie = "MA")
select name,age from res1 where id=95003;
------利用CTE表达式创建视图
create view v_ as with res1 as ( select * from yuangong where sal >9000)
select res1.id,res1.name,lianxi.phno from res1,lianxi where lianxi.id = res1.id;
select * from v_;
----------以下为 join 用法,其内连接与左连接比较常用,若两张表都有 id 为空,也会连接
--------   inner join  内连接   只查询两张表共有的部分,没有的部分用 null 填充
with res2 as ( select * from yuangong where sal > 18000);
select  yuangong.id,yuangong.name,address.id ,address.shen,address.shi,address.xian
from yuangong join address  on  yuangong.id = address.id;
-------  left join 连接, 左连接  查询左表所有的东西。
select id,name,phon,emil from
(select  yuangong.id,yuangong.name,lianxi.phon,lianxi.emil
from yuangong left join lianxi on yuangong.id = lianxi.id ) res
    where length(phon)<11 or length(emil) < 6;
---清空表里面的数据
truncate table lianxi;
truncate table yuangong;
truncate table address;
select  * from lianxi;
load data local inpath "/root/xingxi.txt" into table yuangong;
load data local inpath "/root/lianxi.txt" into table lianxi;
load data local inpath "/root/address.txt" into table address;
------  right join    右连接   查询右表所以的东西,和左连接刚好相反
select lianxi.id,lianxi.phon,lianxi.emil,yuangong.name from  yuangong right join lianxi on yuangong.id = lianxi.id;
truncate table lianxi;
truncate table yuangong;
truncate table address;
------  full  outer join  全外连接     返回全部数据
select yuangong.*,lianxi.* from yuangong full join lianxi on yuangong.id = lianxi.id;
------  left semi join   返回左边表的部分,不包括重合部分
select yuangong.* from yuangong left semi join address on yuangong.id = address.id;
------------   jion  在  where 之前
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值