hive 命令

最新推荐文章于 2022-07-06 19:05:01 发布

一只小菜鸟(*￣︶￣)

最新推荐文章于 2022-07-06 19:05:01 发布

阅读量171

点赞数

分类专栏： Hive 文章标签： hive hive 基本命令 hive 视图 hive 事务处理 hive 数据导出

本文链接：https://blog.csdn.net/mao502010435/article/details/88879800

版权

Hive 专栏收录该内容

5 篇文章 0 订阅

订阅专栏

//创建分区表，设置 ， 为字段分隔符
CREATE TABLE IF NOT EXISTS tablename(
id int COMMENT '主键',
name string  COMMENT '名称',
age int COMMENT '年龄')
COMMENT '表名' 
PARTITIONED BY (Year INT COMMENT '年分区',
Month INT COMMENT '月分区')
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' 
STORED AS TEXTFILE ;

//创建表,external 外部表
	$hive>CREATE external TABLE IF NOT EXISTS t2(id int,name string,age int)
	COMMENT 'xx' ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE ; 

	//查看表数据
	$hive>desc t2 ;
	$hive>desc formatted t2 ;

	//加载数据到hive表
	$hive>load data local inpath '/home/centos/customers.txt' into table t2 ;	//local上传文件
	$hive>load data inpath '/user/centos/customers.txt' [overwrite] into table t2 ;	//移动文件

	//复制表
	mysql>create table tt as select * from users ;		//携带数据和表结构
	mysql>create table tt like users ;			//不带数据，只有表结构

	hive>create table tt as select * from users ;	
	hive>create table tt like users ;	


	//count()查询要转成mr
	$hive>select count(*) from t2 ;
	$hive>select id,name from t2 ;


	//
	$hive>select * from t2 order by id desc ;				//MR
		
	//启用/禁用表
	$hive>ALTER TABLE t2 ENABLE NO_DROP;	//不允许删除
	$hive>ALTER TABLE t2 DISABLE NO_DROP;	//允许删除


	//分区表,优化手段之一，从目录的层面控制搜索数据的范围。
	//创建分区表.
	$hive>CREATE TABLE t3(id int,name string,age int) PARTITIONED BY (Year INT, Month INT) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' ;

	//显式表的分区信息
	$hive>SHOW PARTITIONS t3;

	//添加分区,创建目录
	$hive>alter table t3 add partition (year=2014, month=12);
	
	//删除分区
	hive>ALTER TABLE employee_partitioned DROP IF EXISTS PARTITION (year=2014, month=11);

	//分区结构
	hive>/user/hive/warehouse/mydb2.db/t3/year=2014/month=11
	hive>/user/hive/warehouse/mydb2.db/t3/year=2014/month=12


	//加载数据到分区表
	hive>load data local inpath '/home/centos/customers.txt' into table t3 partition(year=2014,month=11);

	//创建桶表
	$hive>CREATE TABLE t4(id int,name string,age int) CLUSTERED BY (id) INTO 3 BUCKETS ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' ;
	
	//加载数据不会进行分桶操作
	$hive>load data local inpath '/home/centos/customers.txt' into table t4 ;
	
	//查询t3表数据插入到t4中。
	$hive>insert into t4 select id,name,age from t3 ;

	//桶表的数量如何设置?
	//评估数据量，保证每个桶的数据量block的2倍大小。


	//连接查询
	$hive>CREATE TABLE customers(id int,name string,age int) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' ;
	$hive>CREATE TABLE orders(id int,orderno string,price float,cid int) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' ;

	//加载数据到表
	//内连接查询
	hive>select a.*,b.* from customers a , orders b where a.id = b.cid ;
	//左外
	hive>select a.*,b.* from customers a left outer join orders b on a.id = b.cid ;
	hive>select a.*,b.* from customers a right outer join orders b on a.id = b.cid ;
	hive>select a.*,b.* from customers a full outer join orders b on a.id = b.cid ;

	//explode,炸裂,表生成函数。
	//使用hive实现单词统计
	//1.建表
	$hive>CREATE TABLE doc(line string) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' ;

export 数据导出
---------
	$hive>EXPORT TABLE customers TO '/user/centos/tmp.txt';		//导出表结构+数据。

	//order全排序
	$hive>select * from orders order by id asc ;

	//sort,map端排序,本地有序。,组内排序
	$hive>select * from orders sort by id asc ;

	//DISTRIBUTE BY类似于mysql的group by,进行分区操作。
	//select cid , ... from orders distribute by cid sort by name ;			//注意顺序.
	$hive>select id,orderno,cid from orders distribute by cid sort by cid desc ;

	//cluster by ===>  distribute by cid sort by cid
    select id,orderno,cid from orders distribute by cid sort by cid desc ; 
    <===>select id,orderno,cid from orders cluster by cid ;

函数
----------------
	mysql>select concat('tom',1000) ;
	$hive>select current_database(),current_user() ;
	$hive>tab								//查看帮助

设置作业参数
---------------
	 $hive>set hive.exec.reducers.bytes.per.reducer=xxx			//设置reducetask的字节数。
	 $hive>set hive.exec.reducers.max=0				//设置reduce task的最大任务数	
	 $hive>set mapreduce.job.reduces=0				//设置reducetask个数。

动态分区
---------------
	动态分区模式:strict-严格模式，插入时至少指定一个静态分区，nonstrict-非严格模式-可以不指定静态分区。
	set hive.exec.dynamic.partition.mode=nonstrict			//设置非严格模式
	$hive>INSERT OVERWRITE TABLE employees PARTITION (country, state) SELECT ..., se.cnty, se.st FROM staged_employees se WHERE se.cnty = 'US';

hive事务处理在>0.13.0之后支持行级事务。
---------------------------------------
	1.所有事务自动提交。
	2.只支持orc格式文件。
	3.使用bucket表。
	4.配置hive参数，使其支持事务。
        $hive>SET hive.support.concurrency = true;				
        $hive>SET hive.enforce.bucketing = true;					
        $hive>SET hive.exec.dynamic.partition.mode = nonstrict;	
        $hive>SET hive.txn.manager = org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
        $hive>SET hive.compactor.initiator.on = true;
        $hive>SET hive.compactor.worker.threads = 1;

	5.使用事务性操作
	$>CREATE TABLE tx(id int,name string,age int) CLUSTERED BY (id) INTO 3 BUCKETS ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' stored as orc TBLPROPERTIES ('transactional'='true');

聚合处理
---------------
	$hive>select cid,count(*) c ,max(price) from orders group by cid having c > 1 ;

wordcount 单词统计
----------------
	$hive>select t.word,count(*) c from ((select explode(split(line, ' ')) as word from doc) as t) group by t.word order by c desc limit 2 ;

创建新表:stats(word string,c int) ;
	将查询结果插入到指定表中。

$hive>create table states as select t.word,count(*) c from ((select explode(split(line, ' ')) as word from doc) as t) group by t.word order by c desc limit 2 ;

-- 直接将查询结果写入表states

view:视图,虚表
-----------
	//创建视图
	$hive>create view v1 as select a.id aid,a.name ,b.id bid , b.order from customers a left outer join default.tt b on a.id = b.cid ;

	//查看视图
	$hive>show tables ;
	$hive>select * from v1 ;

Map端连接
-------------------
	$hive>set hive.auto.convert.join=true			//设置自动转换连接,默认开启了。
	//使用mapjoin连接暗示实现mapjoin
	$hive>select /*+ mapjoin(customers) */ a.*,b.* from customers a left outer join orders b on a.id = b.cid ;

一只小菜鸟(*￣︶￣)

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
hive 命令

//创建分区表，设置，为字段分隔符CREATE TABLE IF NOT EXISTS tablename(id int COMMENT '主键',name string COMMENT '名称',age int COMMENT '年龄')COMMENT '表名' PARTITIONED BY (Year INT COMMENT '年分区',Month INT COMMENT...
复制链接

扫一扫

专栏目录