hive常用sql语句(1)

set hive.cli.print.header=true;

CREATE TABLE page_view(viewTime INT, userid BIGINT,
     page_url STRING, referrer_url STRING,
     ip STRING COMMENT 'IP Address of the User')
 COMMENT 'This is the page view table'
 PARTITIONED BY(dt STRING, country STRING)
 ROW FORMAT DELIMITED
   FIELDS TERMINATED BY '\001'
STORED AS SEQUENCEFILE;

//sequencefile
create table tab_ip_seq(id int,name string,ip string,country string) 
    row format delimited
    fields terminated by ','
    stored as sequencefile;
insert overwrite table tab_ip_seq select * from tab_ext;


//create & load
create table tab_ip(id int,name string,ip string,country string) 
    row format delimited
    fields terminated by ','
    stored as textfile;
load data local inpath '/home/hadoop/ip.txt' into table tab_ext;

//external   创建一个外部表
CREATE EXTERNAL TABLE tab_ip_ext(id int, name string,
     ip STRING,
     country STRING)
 ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
 STORED AS TEXTFILE
 LOCATION '/external/hive';
 

// CTAS   通过已存在的表来新建一个表定义,只建立表结构,并不会插入数据
CREATE TABLE tab_ip_ctas
   AS
SELECT id new_id, name new_name, ip new_ip,country new_country
FROM tab_ip_ext
SORT BY new_id;
如果要通过select的方式批量插入数据
insert into table tab_ip_ctas select * from tab_ip;


//CLUSTER <--相对高级一点,你可以放在有精力的时候才去学习>
create table tab_ip_cluster(id int,name string,ip string,country string)
clustered by(id) into 3 buckets;

load data local inpath '/home/hadoop/ip.txt' overwrite into table tab_ip_cluster;
set hive.enforce.bucketing=true;
insert into table tab_ip_cluster select * from tab_ip;

select * from tab_ip_cluster tablesample(bucket 2 out of 3 on id); 

//PARTITION
create table tab_ip_part(id int,name string,ip string,country string) 
    partitioned by (part_flag string)
    row format delimited fields terminated by ',';
    
load data local inpath '/home/hadoop/ip.txt' overwrite into table tab_ip_part
     partition(part_flag='part1');
    
    
load data local inpath '/home/hadoop/ip_part2.txt' overwrite into table tab_ip_part
     partition(part_flag='part2');

select * from tab_ip_part;

select * from tab_ip_part  where part_flag='part2';
select count(*) from tab_ip_part  where part_flag='part2';


alter table tab_ip change id id_alter string;
ALTER TABLE tab_cts ADD PARTITION (partCol = 'dt') location '/external/hive/dt';

show partitions tab_ip_part;

//insert from select
create table tab_ip_like like tab_ip;
insert overwrite table tab_ip_like
    select * from tab_ip;
   
//write to hdfs
insert overwrite local directory '/home/hadoop/hivetemp/test.txt' select * from tab_ip_part where part_flag='part1';    
insert overwrite directory '/hiveout.txt' select * from tab_ip_part where part_flag='part1';

//cli shell  在linux的shell界面下直接执行hql语句
hive -S -e 'select country,count(*) from tab_ext' > /home/hadoop/hivetemp/e.txt  

select * from tab_ext sort by id desc limit 5;

select a.ip,b.book from tab_ext a join tab_ip_book b on(a.name=b.name);


//array 
create table tab_array(a array<int>,b array<string>)
row format delimited
fields terminated by '\t'
collection items terminated by ',';

select a[0] from tab_array;
select * from tab_array where array_contains(b,'word');
insert into table tab_array select array(0),array(name,ip) from tab_ext t; 

//map
create table tab_map(name string,info map<string,string>)
row format delimited
fields terminated by '\t'
collection items terminated by ','
map keys terminated by ':';

load data local inpath '/home/hadoop/hivetemp/tab_map.txt' overwrite into table tab_map;
insert into table tab_map select name,map('name',name,'ip',ip) from tab_ext; 

//struct
create table tab_struct(name string,info struct<age:int,tel:string,addr:string>)
row format delimited
fields terminated by '\t'
collection items terminated by ','

load data local inpath '/home/hadoop/hivetemp/tab_st.txt' overwrite into table tab_struct;
insert into table tab_struct select name,named_struct('age',id,'tel',name,'addr',country) from tab_ext;

//UDF
select if(id=1,first,no-first),name from tab_ext;

hive>add jar /home/hadoop/myudf.jar;
hive>CREATE TEMPORARY FUNCTION my_lower AS 'org.dht.Lower';
select my_upper(name) from tab_ext;  


 

转载于:https://my.oschina.net/newchaos/blog/1600938

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值