关于Hive的部分笔记（黑马程序员hive课）

最新推荐文章于 2024-01-20 19:38:20 发布
阿斗还能扶
最新推荐文章于 2024-01-20 19:38:20 发布
阅读量1k
点赞数
文章标签： hive
本文链接：https://blog.csdn.net/qq_59127145/article/details/127062868
版权
------开启
set hive.exec.dynamic.partition.mode=nonstrict;
set hive.exec.dynamic.partition=true;
set hive.exec.parallel=true;
set hive.support.concurrency=false;
set mapreduce.map.memory.mb=1536;
show databases ;
drop database data cascade ;--强制删除数据库
------表的语法树
create table if not exists k_v(
    id int comment "baba",
    name  string  comment "xxx",
    skip  map<string,int>
)
comment "this is a table"  -----对表的说明/注释 （可选）
partitioned by (main string)   -----分区（可选）(后面还可以加字段,多发区表)
clustered by (name,id) into 4 buckets      -----分桶(into 4 buckets 以name把数据分为几桶)
                                                            ------ （name,id）括号里面可多选
----stored by  ( id desc ) into 4 buckets   ---对分桶的表进行排序
row format delimited fields terminated by "\n"  ---字段以什么进行切割
----[COLLECTION ITEMS TERMINATED BY char]   集合-------->map Arrays struce
 ---  [MAP KEYS TERMINATED BY char]  ---Map键和值用什么切割
 --   [LINES TERMINATED BY char] --- 行用什么切割
--STORED AS file_format]       存储的文件的格式
   -- [LOCATION hdfs_path]     这张表的位置
--[TBLPROPERTIES (property_name=property_value, ...)]
--[AS select_statement]  用as创建表
------like允许用户复制现有的表结构，但是不复制数据。
------4,铠,52,龙域领主:288-曙光守护者:1776
use hive_learn;
        create table if not exists k_v(
            id int,
            name string,
            price int,
            skip map<string,int>,
            test array<string>
        )
row format delimited fields terminated by ","
collection items terminated by "-"
map keys terminated by ":"
show tables ;
load data local inpath "/root/hot_hero_skin_price.txt" into table k_v;
select * from k_v;
----------------------建立外部表
----------10   ACCOUNTING 1700
-------20  RESEARCH   1800
------------30 SALES  1900
-------------40    OPERATIONS 1700

create external  table  if not exists waibu_table(
        id int,
        depament string,
        count int
)
row format delimited fields terminated by "\t";
select * from waibu_table;
drop table waibu_table;
----------分区
-------1   后羿 5986   1784   396    336    remotely   archer
create table no_fenqv_table(
    id int,
    name string,
    max_sm int,
    min_sm int,
    max_gj  int,
    min_gj  int,
    nokonw string,
    main_local string
)

row format delimited fields terminated by "\t";
drop table no_fenqv_table;
----分区表导入数据时应该在后面表面分区，否则无法导入
----分区的作用就是提高查询效率，不用全表扫描
-----静态导入数据：
load data local inpath "/root/*.txt" into table no_fenqv_table; partition (main ="archer");
load data local inpath "/root/assassin.txt" into table fenqv_table partition (main ="assassin");
load data local inpath "/root/tank.txt" into table fenqv_table partition (main ="tank");
load data local inpath "/root/mage.txt" into table fenqv_table partition (main ="mage");
load data local inpath "/root/support.txt" into table fenqv_table partition (main ="support");
load data local inpath "/root/warrior.txt" into table fenqv_table partition (main ="warrior");
set hive.exec.dynamic.partition=true;
select name from fenselect *from no_fenqv_table;qv_table where main="tank"and max_sm>9000;
--------动态导入数据,即导入数据时分区后面没写死
set hive.exec.dynamic.partition.mode = nonstrict;
insert into table fenqv_table_dynamic partition (main)  -----这里的main是分区的字段
select no_fenqv_table.*,main_local from no_fenqv_table;   ------插入，分区字段的值是查询得到的最后一列数据
desc formatted fenqv_table_dynamic;
select  * from fenqv_table_dynamic;
----------分桶表,可以于抽样查询
----2021-01-28,Autauga,Alabama,01001,5554,69
create table if not exists fentong_table(
    date_ string,
    local_ string,
    author string,
    daihao  string,
    ganran  bigint,
    dath  int
)
clustered by (author) into 5 buckets
row format delimited fields terminated by ",";
load data local inpath "/root/us-covid19-counties.dat" into table fentong_table;
select *from fentong_table;
select *from fentong_table where author="New York";
drop table fentong_table;
-----没有指定分割符号时，hive默认为\001
create table if not exists no_format(
    id int,
    name_team string,
    name_player string
);
load data local inpath "/root/team_ace_player.txt" into table no_format;
select *from no_format;
------视图，通过视图可以保护用户隐私，后接as select 通过查询得到表
----20223307,lihua7,2406@qq.com,11107,12345612
create table if not exists view_test(
    id string,
    name string,
    emil string,
    phone string,
     passwd string
)
row format delimited fields terminated by ",";
load data local inpath "/root/view.txt" into table view_test;
select *from view_test;
create view safe_user as select id,name,emil from view_test;
select * from safe_user;
show views ;
---------物化视图 hive3.0新特性
---------几乎和创建表的语法相同，只是要在前面要加上materialized view
---------也后接as select，通过查询得到表
create materialized view if not exists wuhua_view
as select   name,passwd from view_test;
--------清空数据，但不删除表
truncate table view_test;
------这里建立数据库在HDFS上面不可见
create database if not exists db_test
comment "this is a db"
location "/output/DB";
drop database db_test cascade ;
show databases ;
describe db_test;
use db_test;
create table if not exists test_
(
    id string
);
describe database db_test;
-----更改数据库的所有者
alter database db_test set owner user;
-------------===========   show相关语法   ============
--------查询表的所有分区消息
show partitions fenqv_table_dynamic;
-------显示表创建的语法
show create table fenqv_table_dynamic;
------显示hive自定义的函数
show   functions  explode ;
----------======Load=============
---load data local inpath "/root/*.txt" into table no_fenqv_table; partition (main ="archer");
-------insert into 插入数据非常慢，不推荐，hive底层跑mapreduce
insert into table no_format(id, name_team) values (10,"xswl");
select * from no_format;
-------us-covid19-counties.dat
----日期，县，州，邮政编码，确诊，死亡
--2021-01-28,Autauga,Alabama,01001,5554,69
create table if not exists covid3(
    date_ string,
    count string,
    zhou  string,
    youbian string,
    ganran  bigint,
    death  bigint
)
row format delimited fields terminated by ",";
create table if not exists stu(
                                  id bigint,
                                  name string,
                                  sex string,
                                  age  string,
                                  xibie  string
)
    row format delimited fields terminated by ",";
load data local inpath "/root/student.txt" into table stu;
select *from stu;
load data local inpath "/root/us-covid19-counties.dat" into table covid_nofenqv;
select  zhou from covid_nofenqv group by zhou;
set hive.exec.dynamic.partition.mode=nonstrict
insert into table covid_fenqv partition (zhou_)
select covid_nofenqv.*,zhou from covid_nofenqv;
select * from covid_fenqv limit 3;
-----insert select的多个作用，其一可以利用查询结果来导入数据到多张表中
---- 其二可以实现动态分区，分区值为查询结果的最后一列，其三可以导出数据
-------其一：多重插入，一次查询，多次使用，语法如下：
--from table_name
---insert into table table_name1
---select 语句
---insert into table table_name2
---select 语句
---insert into table table_name3
---select 语句
from covid_nofenqv
insert into table covid1
select * where zhou="Puerto Rico"
insert into table covid2
select * where zhou="Alabama"
insert into table covid3
    select * where zhou="California";
select * from covid3;
-----.......
---- 其二：可以实现动态分区，分区值为查询结果的最后一列
----insert  into table fenqv_table_dynamic partition (main)  -----这里的main是分区的字段
-----select no_fenqv_table.*,main_local from no_fenqv_table;
insert into table covid_fenqv partition (zhou_)
select covid_nofenqv.*,zhou from covid_nofenqv;
-----其三：导出数据，导出数据是一个覆盖操作，一定不要写错路径
-----语法如下：
----insert overwrite [local] directory "paths"
-----row format delimited fields terminated by "分割符";
----stored as"文件存储格式";
----  select from
insert overwrite local directory "/root/covid1.txt"
row format delimited fields terminated by "\t"
select * from covid3;
create table if not exists address (
                                      id string,
                                      xian string,
                                      shi string,
                                      shen string
)
    row format delimited fields terminated by "\t";
load data local inpath "/root/lianxi.txt" into table lianxi;
select  *from lianxi limit 5;
-----------------==========下面为Hive的语法=========----------
----------------第一个为select的语法
---distinct去重较慢,整体去重
select  distinct zhou,count from covid_nofenqv limit 2;
-----where  后面接条件语句，语句为真，执行，否则不执行 (在分组之前对数据进行过滤)
select * from covid3 where 2<1;
select * from covid3 where 2>1;
----使用where查询count名字长度大于10的数据
select *from covid3 where length(count)>10;
-----where还支持子查询,但不能和聚合函数一起使用，如count，sum
select *from covid_nofenqv where length(count)>10 and zhou in (select zhou from covid3);
------ group by  分组，可和聚合函数使用
select count(*) as xs from covid_nofenqv group by zhou;
select zhou,sum (death) as zhongji from covid_nofenqv group by zhou;
------having  对where无法使用聚合函数的补充，它可以使用聚合函数   (在分组之后对数据进行过滤)
select zhou,sum(death) as shiman from covid_nofenqv group by zhou having shiman>10000;
------  limit 返回多少数据
select *from covid3 limit 5;   ---一个参数，表示返回几行数据
select *from covid3 limit 2,5;  ---两个参数，第一个表示从第几行开始返回（第一行为 0），第二个参数表示返回几行，
---------=======查询语句执行顺序如下：
-----  from ---> where --->  group by ----->  having ----->  order  ----> select
----------  order   by  排序 全局排序，asc 升序，desc 倒序
--------只能排序一列，建议和limit 一起使用，否则可能数据量过大导致系统崩溃
select count,ganran,death from covid3 order by death asc  limit 5;
select count,ganran,death from covid3 order by death desc  limit  5;
--------cluster by  对结果分区排序，只能是正序
select * from covid3 cluster by count ;
select *from stu cluster by age;
-------distribute by  负责分组，sort by 负责排序
------distribute by + sort by 一般配套出现，是对 cluster by 功能的拆分，也是补充，使得排序能够倒序
select * from stu distribute by sex sort by age desc ;
--------  union  联合查询  将多个表的结果汇合为一个结果集合,这些表的列应该相同
select name,sex from stu  union  select count,zhou from covid3;
-------union 里面还可以使用条件查询......
select count,death from (select * from covid1 where length(zhou)>6 limit 30) res1
union select count,death from covid3  limit 10;
-------select * from covid1 where length(zhou)>6 limit 30这是一个临时结果并给结果取名为res1，写法如上
-------最后的limit 10 是限制整体结果返回10行
-----   from 子查询，子查询得到的结果必须有名称，如下：
-----   from (select * from covid1 where length(zhou)>6 limit 30) res1
-----  同时from 子查询里面也可以有  select  语法树里面的所有东西 还有  union
select count,death from (select * from covid1 where length(zhou)>6
union select * from covid3  limit 10) res1;
------  where 子查询，in    not in  like ........  （生产数据）
------  CTE表达式：公用表达式  （  with  ）可以在 insert into, creat table as select ,  select ,creat view  as select
with res1 as ( select name,age,id from stu where xibie = "MA")
select name,age from res1 where id=95003;
------利用CTE表达式创建视图
create view v_ as with res1 as ( select * from yuangong where sal >9000)
select res1.id,res1.name,lianxi.phno from res1,lianxi where lianxi.id = res1.id;
select * from v_;
----------以下为 join 用法，其内连接与左连接比较常用,若两张表都有 id 为空，也会连接
--------   inner join  内连接   只查询两张表共有的部分,没有的部分用 null 填充
with res2 as ( select * from yuangong where sal > 18000);
select  yuangong.id,yuangong.name,address.id ,address.shen,address.shi,address.xian
from yuangong join address  on  yuangong.id = address.id;
-------  left join 连接， 左连接  查询左表所有的东西。
select id,name,phon,emil from
(select  yuangong.id,yuangong.name,lianxi.phon,lianxi.emil
from yuangong left join lianxi on yuangong.id = lianxi.id ) res
    where length(phon)<11 or length(emil) < 6;
---清空表里面的数据
truncate table lianxi;
truncate table yuangong;
truncate table address;
select  * from lianxi;
load data local inpath "/root/xingxi.txt" into table yuangong;
load data local inpath "/root/lianxi.txt" into table lianxi;
load data local inpath "/root/address.txt" into table address;
------  right join    右连接   查询右表所以的东西，和左连接刚好相反
select lianxi.id,lianxi.phon,lianxi.emil,yuangong.name from  yuangong right join lianxi on yuangong.id = lianxi.id;
truncate table lianxi;
truncate table yuangong;
truncate table address;
------  full  outer join  全外连接     返回全部数据
select yuangong.*,lianxi.* from yuangong full join lianxi on yuangong.id = lianxi.id;
------  left semi join   返回左边表的部分，不包括重合部分
select yuangong.* from yuangong left semi join address on yuangong.id = address.id;
------------   jion  在  where 之前