创建数据库
hive> create database base1;
使用数据库
hive> use base1;
创建表
hive> create table table1(cols string,cols1 string);
创建一个表与当前hive存在的表结构一样
hive> create table table1 like table2;
创建分区表
hive> create table table1(cols string,cols2 string) partitioned by (cols3 string,cols4 string);
加载分区表数据
hive> load data local inpath ‘/home/hadoop/input/hive/partitions/file1’ into table table1 partition (cols=’1998-01-01’,cols1=’SZ’);
从本地文件加载数据
hive> load data local inpath ‘/d:/123/123.txt’ overwrite into table table1;
展示表中分区
hive> show partitions table1;
展示所有数据库
hive> show databases;
展示所有表
hive> show tables;
展示后缀为S的表
hive> show table’.*s’;
显示表结构
hive> describe table1;
重命名表名称
hive> alter table tables1 rename to table2;
添加新一列
hive> alter table table1 add columns (other string comment ‘其他’);
删除表
hive> drop table table1;
删除表且保持表结构表定义
hive> dfs -rmr /home/hive/base1/table1;
显示所有函数
hive> show functions;
查看函数用法
hive> describe function xx;
查看数组,map,结构
hive> select cols[0],cols1[‘a’],cols.b from complex;
内连接
hive> select sales.,things. from sales join things on (sales.id = things.id);
外连接
hive> selectsales.*,things.*from sales left outer join things on(sales.id = things.id);
hive> select sales., things. fromsales right outer join things on(sales.id = things.id);
hive> select sales., things. Fromsales full outer join things on(sales.id = things.id);
in查询
hive> select * from thingsleft semi join sales on (sales.id = things.id);
map连接(hive可以把较小的表放入每个mapper的内存来执行连接)
insertoverwrite table ..select:新表预先存在
hive> from records2
insert overwritetable stations_by_year select year,count(distinct station) group by year
insert overwrite table records_by_year select year,count(1) groupby year
insert overwrite table good_records_by_year selectyear,count(1) where temperature != 9999 and (quality = 0 or quality = 1 orquality =4 or quality = 5 or quality = 9) group by year;
create table … as select:新表表预先不存在
hive>create table target as select cols1,cols2 from source;
创建视图
hive> create view valid_records as select *from records2 where temperature!=9999;
查看视图详细信息
hive> describe extended valid_records;
查看hive为某个查询使用多少个mapreduce作业
hive> explain select sales.*, things.*from sales join things on (sales.id =things.id);