------开启 set hive.exec.dynamic.partition.mode=nonstrict; set hive.exec.dynamic.partition=true; set hive.exec.parallel=true; set hive.support.concurrency=false; set mapreduce.map.memory.mb=1536; show databases ; drop database data cascade ;--强制删除数据库 ------表的语法树 create table if not exists k_v( id int comment "baba", name string comment "xxx", skip map<string,int> ) comment "this is a table" -----对表的说明/注释 (可选) partitioned by (main string) -----分区(可选)(后面还可以加字段,多发区表) clustered by (name,id) into 4 buckets -----分桶(into 4 buckets 以name把数据分为几桶) ------ (name,id)括号里面可多选 ----stored by ( id desc ) into 4 buckets ---对分桶的表进行排序 row format delimited fields terminated by "\n" ---字段以什么进行切割 ----[COLLECTION ITEMS TERMINATED BY char] 集合-------->map Arrays struce --- [MAP KEYS TERMINATED BY char] ---Map键和值用什么切割 -- [LINES TERMINATED BY char] --- 行用什么切割 --STORED AS file_format] 存储的文件的格式 -- [LOCATION hdfs_path] 这张表的位置 --[TBLPROPERTIES (property_name=property_value, ...)] --[AS select_statement] 用as创建表 ------like允许用户复制现有的表结构,但是不复制数据。 ------4,铠,52,龙域领主:288-曙光守护者:1776 use hive_learn; create table if not exists k_v( id int, name string, price int, skip map<string,int>, test array<string> ) row format delimited fields terminated by "," collection items terminated by "-" map keys terminated by ":" show tables ; load data local inpath "/root/hot_hero_skin_price.txt" into table k_v; select * from k_v; ----------------------建立外部表 ----------10 ACCOUNTING 1700 -------20 RESEARCH 1800 ------------30 SALES 1900 -------------40 OPERATIONS 1700 create external table if not exists waibu_table( id int, depament string, count int ) row format delimited fields terminated by "\t"; select * from waibu_table; drop table waibu_table; ----------分区 -------1 后羿 5986 1784 396 336 remotely archer create table no_fenqv_table( id int, name string, max_sm int, min_sm int, max_gj int, min_gj int, nokonw string, main_local string ) row format delimited fields terminated by "\t"; drop table no_fenqv_table; ----分区表导入数据时应该在后面表面分区,否则无法导入 ----分区的作用就是提高查询效率,不用全表扫描 -----静态导入数据: load data local inpath "/root/*.txt" into table no_fenqv_table; partition (main ="archer"); load data local inpath "/root/assassin.txt" into table fenqv_table partition (main ="assassin"); load data local inpath "/root/tank.txt" into table fenqv_table partition (main ="tank"); load data local inpath "/root/mage.txt" into table fenqv_table partition (main ="mage"); load data local inpath "/root/support.txt" into table fenqv_table partition (main ="support"); load data local inpath "/root/warrior.txt" into table fenqv_table partition (main ="warrior"); set hive.exec.dynamic.partition=true; select name from fenselect *from no_fenqv_table;qv_table where main="tank"and max_sm>9000; --------动态导入数据,即导入数据时分区后面没写死 set hive.exec.dynamic.partition.mode = nonstrict; insert into table fenqv_table_dynamic partition (main) -----这里的main是分区的字段 select no_fenqv_table.*,main_local from no_fenqv_table; ------插入,分区字段的值是查询得到的最后一列数据 desc formatted fenqv_table_dynamic; select * from fenqv_table_dynamic; ----------分桶表,可以于抽样查询 ----2021-01-28,Autauga,Alabama,01001,5554,69 create table if not exists fentong_table( date_ string, local_ string, author string, daihao string, ganran bigint, dath int ) clustered by (author) into 5 buckets row format delimited fields terminated by ","; load data local inpath "/root/us-covid19-counties.dat" into table fentong_table; select *from fentong_table; select *from fentong_table where author="New York"; drop table fentong_table; -----没有指定分割符号时,hive默认为\001 create table if not exists no_format( id int, name_team string, name_player string ); load data local inpath "/root/team_ace_player.txt" into table no_format; select *from no_format; ------视图,通过视图可以保护用户隐私,后接as select 通过查询得到表 ----20223307,lihua7,2406@qq.com,11107,12345612 create table if not exists view_test( id string, name string, emil string, phone string, passwd string ) row format delimited fields terminated by ","; load data local inpath "/root/view.txt" into table view_test; select *from view_test; create view safe_user as select id,name,emil from view_test; select * from safe_user; show views ; ---------物化视图 hive3.0新特性 ---------几乎和创建表的语法相同,只是要在前面要加上materialized view ---------也后接as select,通过查询得到表 create materialized view if not exists wuhua_view as select name,passwd from view_test; --------清空数据,但不删除表 truncate table view_test; ------这里建立数据库在HDFS上面不可见 create database if not exists db_test comment "this is a db" location "/output/DB"; drop database db_test cascade ; show databases ; describe db_test; use db_test; create table if not exists test_ ( id string ); describe database db_test; -----更改数据库的所有者 alter database db_test set owner user; -------------=========== show相关语法 ============ --------查询表的所有分区消息 show partitions fenqv_table_dynamic; -------显示表创建的语法 show create table fenqv_table_dynamic; ------显示hive自定义的函数 show functions explode ; ----------======Load============= ---load data local inpath "/root/*.txt" into table no_fenqv_table; partition (main ="archer"); -------insert into 插入数据非常慢,不推荐,hive底层跑mapreduce insert into table no_format(id, name_team) values (10,"xswl"); select * from no_format; -------us-covid19-counties.dat ----日期,县,州,邮政编码,确诊,死亡 --2021-01-28,Autauga,Alabama,01001,5554,69 create table if not exists covid3( date_ string, count string, zhou string, youbian string, ganran bigint, death bigint ) row format delimited fields terminated by ","; create table if not exists stu( id bigint, name string, sex string, age string, xibie string ) row format delimited fields terminated by ","; load data local inpath "/root/student.txt" into table stu; select *from stu; load data local inpath "/root/us-covid19-counties.dat" into table covid_nofenqv; select zhou from covid_nofenqv group by zhou; set hive.exec.dynamic.partition.mode=nonstrict insert into table covid_fenqv partition (zhou_) select covid_nofenqv.*,zhou from covid_nofenqv; select * from covid_fenqv limit 3; -----insert select的多个作用,其一可以利用查询结果来导入数据到多张表中 ---- 其二可以实现动态分区,分区值为查询结果的最后一列,其三可以导出数据 -------其一:多重插入,一次查询,多次使用,语法如下: --from table_name ---insert into table table_name1 ---select 语句 ---insert into table table_name2 ---select 语句 ---insert into table table_name3 ---select 语句 from covid_nofenqv insert into table covid1 select * where zhou="Puerto Rico" insert into table covid2 select * where zhou="Alabama" insert into table covid3 select * where zhou="California"; select * from covid3; -----....... ---- 其二:可以实现动态分区,分区值为查询结果的最后一列 ----insert into table fenqv_table_dynamic partition (main) -----这里的main是分区的字段 -----select no_fenqv_table.*,main_local from no_fenqv_table; insert into table covid_fenqv partition (zhou_) select covid_nofenqv.*,zhou from covid_nofenqv; -----其三:导出数据,导出数据是一个覆盖操作,一定不要写错路径 -----语法如下: ----insert overwrite [local] directory "paths" -----row format delimited fields terminated by "分割符"; ----stored as"文件存储格式"; ---- select from insert overwrite local directory "/root/covid1.txt" row format delimited fields terminated by "\t" select * from covid3; create table if not exists address ( id string, xian string, shi string, shen string ) row format delimited fields terminated by "\t"; load data local inpath "/root/lianxi.txt" into table lianxi; select *from lianxi limit 5; -----------------==========下面为Hive的语法=========---------- ----------------第一个为select的语法 ---distinct去重较慢,整体去重 select distinct zhou,count from covid_nofenqv limit 2; -----where 后面接条件语句,语句为真,执行,否则不执行 (在分组之前对数据进行过滤) select * from covid3 where 2<1; select * from covid3 where 2>1; ----使用where查询count名字长度大于10的数据 select *from covid3 where length(count)>10; -----where还支持子查询,但不能和聚合函数一起使用,如count,sum select *from covid_nofenqv where length(count)>10 and zhou in (select zhou from covid3); ------ group by 分组,可和聚合函数使用 select count(*) as xs from covid_nofenqv group by zhou; select zhou,sum (death) as zhongji from covid_nofenqv group by zhou; ------having 对where无法使用聚合函数的补充,它可以使用聚合函数 (在分组之后对数据进行过滤) select zhou,sum(death) as shiman from covid_nofenqv group by zhou having shiman>10000; ------ limit 返回多少数据 select *from covid3 limit 5; ---一个参数,表示返回几行数据 select *from covid3 limit 2,5; ---两个参数,第一个表示从第几行开始返回(第一行为 0),第二个参数表示返回几行, ---------=======查询语句执行顺序如下: ----- from ---> where ---> group by -----> having -----> order ----> select ---------- order by 排序 全局排序,asc 升序,desc 倒序 --------只能排序一列,建议和limit 一起使用,否则可能数据量过大导致系统崩溃 select count,ganran,death from covid3 order by death asc limit 5; select count,ganran,death from covid3 order by death desc limit 5; --------cluster by 对结果分区排序,只能是正序 select * from covid3 cluster by count ; select *from stu cluster by age; -------distribute by 负责分组,sort by 负责排序 ------distribute by + sort by 一般配套出现,是对 cluster by 功能的拆分,也是补充,使得排序能够倒序 select * from stu distribute by sex sort by age desc ; -------- union 联合查询 将多个表的结果汇合为一个结果集合,这些表的列应该相同 select name,sex from stu union select count,zhou from covid3; -------union 里面还可以使用条件查询...... select count,death from (select * from covid1 where length(zhou)>6 limit 30) res1 union select count,death from covid3 limit 10; -------select * from covid1 where length(zhou)>6 limit 30这是一个临时结果并给结果取名为res1,写法如上 -------最后的limit 10 是限制整体结果返回10行 ----- from 子查询,子查询得到的结果必须有名称,如下: ----- from (select * from covid1 where length(zhou)>6 limit 30) res1 ----- 同时from 子查询里面也可以有 select 语法树里面的所有东西 还有 union select count,death from (select * from covid1 where length(zhou)>6 union select * from covid3 limit 10) res1; ------ where 子查询,in not in like ........ (生产数据) ------ CTE表达式:公用表达式 ( with )可以在 insert into, creat table as select , select ,creat view as select with res1 as ( select name,age,id from stu where xibie = "MA") select name,age from res1 where id=95003; ------利用CTE表达式创建视图 create view v_ as with res1 as ( select * from yuangong where sal >9000) select res1.id,res1.name,lianxi.phno from res1,lianxi where lianxi.id = res1.id; select * from v_; ----------以下为 join 用法,其内连接与左连接比较常用,若两张表都有 id 为空,也会连接 -------- inner join 内连接 只查询两张表共有的部分,没有的部分用 null 填充 with res2 as ( select * from yuangong where sal > 18000); select yuangong.id,yuangong.name,address.id ,address.shen,address.shi,address.xian from yuangong join address on yuangong.id = address.id; ------- left join 连接, 左连接 查询左表所有的东西。 select id,name,phon,emil from (select yuangong.id,yuangong.name,lianxi.phon,lianxi.emil from yuangong left join lianxi on yuangong.id = lianxi.id ) res where length(phon)<11 or length(emil) < 6; ---清空表里面的数据 truncate table lianxi; truncate table yuangong; truncate table address; select * from lianxi; load data local inpath "/root/xingxi.txt" into table yuangong; load data local inpath "/root/lianxi.txt" into table lianxi; load data local inpath "/root/address.txt" into table address; ------ right join 右连接 查询右表所以的东西,和左连接刚好相反 select lianxi.id,lianxi.phon,lianxi.emil,yuangong.name from yuangong right join lianxi on yuangong.id = lianxi.id; truncate table lianxi; truncate table yuangong; truncate table address; ------ full outer join 全外连接 返回全部数据 select yuangong.*,lianxi.* from yuangong full join lianxi on yuangong.id = lianxi.id; ------ left semi join 返回左边表的部分,不包括重合部分 select yuangong.* from yuangong left semi join address on yuangong.id = address.id; ------------ jion 在 where 之前
关于Hive的部分笔记(黑马程序员hive课)
最新推荐文章于 2024-01-20 19:38:20 发布