hive的SQL操作记录

 

partition:

1、删除表、创建外部表和分区、根据分区导入数据

dfs -rmr /tmp/ext/tab_name1; --删除hadoop文件系统中的目录及文件
dfs -mkdir /tmp/ext/tab_name1;--重新创建目录。建表前需要先创建与表名称对应的目录才行
drop table if exists tab_name1;
create external table  if not exists tab_name1(id int ,name string) partitioned by (aa string) row format delimited fields terminated by ',' location '/tmp/ext/tab_name1';
load data local inpath '/home/conkeyn/jar/a.txt' into table tab_name1 partition(aa='1');
load data local inpath '/home/conkeyn/jar/b.txt' into table tab_name1 partition(aa='2');
dfs -ls /tmp/ext/tab_name1;

 2、根据分页查询

select * from tab_name1 where aa='1';

clustered

1、创建表簇

 

--创建普通表
drop table if exists tab_cdr;
create table if not exists tab_cdr(oaddr string,oareacode string,daddr string,dareacode string,ts string,type string) row format delimited fields terminated by ',';
load data local inpath '/home/conkeyn/jar/output/cdr01.txt' into table tab_cdr;
select count(*) from tab_cdr;
--创建表簇
drop table if exists tab_cdr_buc;
create table if not exists tab_cdr_buc(oaddr string,oareacode string,daddr string,dareacode string,ts string,type string) clustered by (oaddr) into 10 buckets;
--强制使用批量
set hive.enforce.bucketing=true;
--添加数据
insert into table tab_cdr_buc select * from tab_cdr;

 file_format(文件类型)

需要依赖:安装hadoop-lzo压缩库

创建普通表,待会儿需要从普通表上查询记录,并插入到压缩表中

drop table if exists tab_cdr;
create table if not exists tab_cdr(oaddr string,oareacode string,daddr string,dareacode string,ts string,type string) row format delimited fields terminated by ',';
load data local inpath '/home/conkeyn/jar/output/ab.txt' into table tab_cdr;
dfs -ls /user/hive/warehouse/tab_cdr;
select count(*) from tab_cdr;

 

1、sequence file

 

--sequencefile
set hive.exec.compress.output=true;
set mapred.output.compress=true;
set mapred.output.compression.type=BLOCK;
set mapred.output.compression.codec=com.hadoop.compression.lzo.LzoCodec;
drop table if exists tab_cdr_seq;
create table if not exists tab_cdr_seq(oaddr string,oareacode string,daddr string,dareacode string,ts string,type string) row format delimited fields terminated by ',' stored as sequencefile;
insert overwrite  table tab_cdr_seq select * from tab_cdr;
dfs -ls /user/hive/warehouse/tab_cdr_seq;
select count(*) from tab_cdr_seq;

 

2、text file

3、rcf file

--rcfile
set hive.exec.compress.output=true;
set mapred.output.compress=true;
set mapred.output.compression.type=BLOCK;
set mapred.output.compression.codec=com.hadoop.compression.lzo.LzoCodec;
drop table if exists tab_cdr_rc;
create table if not exists tab_cdr_rc(oaddr string,oareacode string,daddr string,dareacode string,ts string,type string) row format delimited fields terminated by ',' stored as rcfile;
insert overwrite  table tab_cdr_rc select * from tab_cdr;
dfs -ls /user/hive/warehouse/tab_cdr_rc;
select count(*) from tab_cdr_rc;

4、input file

5、gz

--使用压缩文件格式
drop table if exists tab_name;
create table  if not exists tab_name(id int ,name string) row format delimited fields terminated by ',';
load data local inpath '/home/conkeyn/jar/a.txt.gz' into table tab_name;
dfs -ls /user/hive/warehouse/tab_name;
--在查询时,后台会自动为我们解压
select * from tab_name;

 6、字段的复杂类型

(1)、Array类型

   数据准备:

  

rob,bob,steven  1,2,3
amy,andy        11,22
jac     11,22,33,44,55

   SQL操作:

--field array type
drop table if exists tab_arr;
create table if not exists tab_arr(a array<string>,b array<int>) 
row format delimited fields terminated by '\t'
collection items terminated by ',';
load data local inpath '/home/conkeyn/jar/arr.txt' into table tab_arr;
select a[2] from tab_arr;
select * from tab_arr where array_contains(a,'rob');
select * from tab_arr where array_contains(b,22);
insert into table tab_arr select array(oaddr,oareacode),array(0) from tab_cdr;

 (2)Map类型

 数据准备:

rob     age:10,tel:87654321
amy     age:17,tel:09876543,addr:shanghai
bob     age:18,tel:98765432,addr:beijing

 SQL操作

drop table if exists tab_map;
create table if not exists tab_map(name string,info map<string,string>) 
row format delimited fields terminated by '\t'
collection items terminated by ','
map keys terminated by ':';
load data local inpath '/home/conkeyn/jar/map.txt' into table tab_map;
select info['age'] from tab_map;
insert into table tab_map select oaddr,map('oareacode',oareacode,'daddr',daddr) from tab_cdr;
select * from tab_map limit 10;

 (3)、Struct类型

数据准备:

rob     10,87654321
amy     17,09876543,shanghai
bob     18,98765432beijing

 SQL操作

drop table if exists tab_struct;
create table if not exists tab_struct(name string,info struct<age:int,tel:string,addr:string>) 
row format delimited fields terminated by '\t'
collection items terminated by ',';
load data local inpath '/home/conkeyn/jar/struct.txt' into table tab_struct;
select * from tab_struct limit 10;
select info.age from tab_struct;
insert into table tab_struct select oaddr,named_struct('age',0,'tel',daddr,'addr',dareacode) from tab_cdr;

 7、查询

(1)join(联接)

 

--表联接
drop table if exists tab_areaname;
create table if not exists tab_areaname(code string,name string) 
row format delimited fields terminated by '\t';
load data local inpath '/home/conkeyn/jar/areaname.txt' overwrite into table tab_areaname;
select * from tab_areaname;

drop table if exists tab_res1;
create table if not exists tab_res1(oaddr string ,cityname string);
insert into table tab_res1 select a.oaddr,b.name from tab_cdr a inner join tab_areaname b on a.oareacode = b.code;
-- map join
--mapjoin使用条件是,mapjoin(table_name)的table_name这张表的数据量要比较小时才能显现出效果。
insert into table tab_res1 select /*+ mapjoin(a) */ b.oaddr,a.name from  tab_cdr b inner join tab_areaname a on a.code = b.oareacode;

--三张表联接
drop table if exists tab_user;
create table tab_user(addr string);
insert overwrite table tab_user select distinct(oaddr) from tab_cdr limit 100;
insert into table tab_res1 select /*+ mapjoin(a) */ b.oaddr,a.name from  tab_cdr b inner join tab_areaname a on a.code = b.oareacode inner join tab_user c on c.addr = b.oaddr;

 

从同张表中取出数据分别插入不同表(以下示例是使用同一张表不同分区)中。

drop table if exists tab_indb_tmp; 
create table tab_indb_tmp(oaddr string,oareacode string,daddr string,dareacode string ,ts string, type string) row format delimited fields terminated by ',';
drop table if exists tab_indb; 
create table tab_indb(oaddr string,oareacode string,daddr string,dareacode string ,ts string, type string) partitioned by (date string) stored as rcfile;
--按分区插入数据语法
from tab_indb_tmp insert into table tab_indb partition(date="1st") select * where substr(ts,7,2)<="10"
insert into table tab_indb partition(date="2ed") select * where substr(ts,7,2) between "11" and "20"
insert into table tab_indb partition(date="3rd") select * where substr(ts,7,2)>"20";

 

d

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值