hive连接Hadoop和sql,使用sql语句完成对Hadoop的操作
describe function substr;查看函数用法
create table tbname(id int, name string)stored as textfile; 创建表,并按分隔符分割行中的字段值
create external table extbname(id int, name string);创建外部表
create table tbname(id int,name string )partitioned by(ds string);创建外部表,并建立单分区字段ds
create table tbname(id int, name string)partitioned by(ds string,hour string )创建双分区
alter table tbname add columns(new_col int); 表增加一列
alter table tbname add columns(new_col int comment 'a comment');增加一列并增加列字段注释
alert table tbname rename to tbname_new;更改表名
drop table tbname; 删除表(删除表的元数据,如果是托管表还会删除表数据)
dfs-rmr'warehouse/my-table';只删除内容,而保留元数据
alter table tbname drop partition (dt='2008-08-08',hour='09');删除分区,分区的元数据和数据一并删除
load data local inpath'mytext.txt' overwrite into table tbname; 将文件中的数据加载到表中
alter table tbname add partition(ds='20120701')location'/user/hadoop/record/20120701';在已创建的表上添加单分区并指定数据
alert table tbname add partition(ds='2008-08-08,hour='08')location'path/pv1.txt'partition(id='',hour='')location'';在已创建的表上添加双分区并指定数据
alter table tbname add partition(ds='2013-12-12');加载本地数据,根据给定分区列信息
describe tbname;hive> desc tbname;查看表结构
show tables;显示所有表
show tables '.*s'; show tables’*_*’;按正条件(正则表达式)显示表
select * from tbName;查询表数据不会做mapreduce操作
select a.id from tbname a ;查询一列数据,会做mapreduce操作
select tbname2.* from tbname2 a where a.ds='2013-12-12' ;基于分区的查询的语句
show partitions tbname2;查看分区语句
CREATE VIEW valid_records AS SELECT * FROM records2 WHERE temperature !=9999;创建视图
DESCRIBE EXTENDED valid_records;查看视图详细信息
insert overwrite directory '/tmp/hdfs_out' select a.* from tbname2 a where a.ds='2013-12-12';将查询数据输出至目录
将查询结果输出至本地目录
insert overwrite local directory '/tmp/local_out' select ds,count(1) from tbname group by ds;
insert overwrite table events select a.* from tbname a where a.id < 100;
insert overwrite local directory '/tmp/sum' select sum(a.pc) from tbpc a ;
将一个表的统计结果插入另一个表中
from tbname a insert overwrite table events select a.bar,count(1) where a.foo > 0 group by a.bar;
insert overwrite table events select a.bar,count(1) from tbname a where a.foo > 0 group by a.bar;
JOIN:
hive> from tbname t1 join tbname2 t2 on (t1.id = t2.id) insert overwrite table events select t1.id,t1.name,t2,ds;
将多表数据插入到同一表中
FROM src
INSERT OVERWRITE TABLE dest1 SELECT src.* WHERE src.key < 100
INSERT OVERWRITE TABLE dest2 SELECT src.key, src.value WHERE src.key >= 100 and src.key < 200
INSERT OVERWRITE TABLE dest3 PARTITION(ds='2008-04-08', hr='12') SELECT src.key WHERE src.key >= 200 and src.key < 300
INSERT OVERWRITE LOCAL DIRECTORY '/tmp/dest4.out' SELECT src.value WHERE src.key >= 300;
将文件流直接插入文件
FROM invites a INSERT OVERWRITE TABLE events SELECT TRANSFORM(a.foo, a.bar) AS (oof, rab) USING '/bin/cat' WHERE a.ds > '2008-08-09';