hive命令

#4、mysql驱动拷贝
在D:\Maven\jar_mgr\apache-maven-3.5.4\repository\mysql\mysql-connector-java\5.1.47找到mysql-connector-java-5.1.47.jar包将其复制到/opt/software/hive312/lib下
#5、初始化
cd /opt/software/hive312
./schematool -dbType mysql -initSchema
#6、启动关闭hive服务
#[后台启动]
nohup hive --service metastore>/dev/null 2>&1 &
nohup hive --service hiveserver2>/dev/null 2>&1 &
# 查看
jps -ml
# [关闭]
kill -9

#释放权限
hdfs dfs -chmod -R 777 /

#查看端口号,做侦听
netstat -anp|grep 10000

#beeline [hql]
beeline -u jdbc:hive2://192.168.171.200:10000 [-n root]
#退出
ctrl d 

#7、hive执行流程

#8、hive数据类型
java mysql hive

字符 char[]/String char(n)/varchar(n) string
数值 byte/short/int/long tinyint/smallint/int/bigint int
BigInteger decimal(38,18) decimal(38,18)
float/double
BigDecimal
布尔 boolean bit boolean
日期 Date date/datetime/timestamp date/datetime/timestamp
Calender
SimpleDateFormat 取值
结构 ArrayList ‘aa,bb,cc’ array f[n]
HashMap ‘key1,key2,key3;v1,v2,v3’ map<KEY_TYPE,VALUE_TYPE> f[‘KEY’]
class MyClass{} structf1:TYPE,f2:TYPE,... f.f1

#9、orc
#列式文件存储格式
#很高的压缩比
#支持分裂(切片)
#查询优化
#为每一个字段建立一个轻量级的索引(字段索引:行偏移量)
#查询时先通过索引确认,不匹配直接跳行
#数据存储量少,传输量少,减少了Task数量

#10、内部表和外部表
create [external] table TABLE_NAME(…)…;
drop table TABLE_NAME;
#内部表:同时删除mysql中hive312内的表结构,和hdfs上存储的映射数据

	#外部表:只能删除mysql中hive312内的表结构,如需hdfs上的数据文件需要使用hdfs dfs -rm ... 命令

#11、分区表和分桶表
分区表 partitioned by … partition by order by 虚列 文件夹
create [external] table TABLE_NAME(

)
partitioned by (name type,name2 type2,…) #(左为父文件夹,右为子文件夹)
…;

分桶表	clusted by ... into N buckets				实列	文件

#12、常用函数
算数运算符 + - * / % & | ~ ^
关系运算符 == <> > >= < <=
逻辑运算符 and or not
数学函数
#1、 pow()
#2、 mod()
#3、 pmod(±n,m)
---------------------------------------------------------
select pmod(-3,5);
(-3+n*5)%5
# n为正整数取值为从1开始带入至表达式第一次为正值时的值
---------------------------------------------------------
#4、 rand()、rand(seed) 伪随机,值固定
#5、 sqrt(num) 取平方根
#6、 sign(num) 取符号{-1,0,1}
#7、 positive(num) 取本身
#8、 negative(num) 取相反数
#9、 abs(num) 取绝对值
#10、ceil(num) 向上取整 ceiling()
#11、floor(num) 向下取整
#12、round(num) 四舍五入 round(num,3) 保留小数点后3位
#13、bround(decimal) 银行家舍入法:bround(2.5)=2 bround(3.5)=4
#14、bin() 十进制转为二进制
#15、binary() 返回字符串
#16、conv(num,from_base,to_base) 进制转换
#17、greatest(T…ts) 返回最大值
#18、least(T,…ts) 返回最小值
#19、factorial 阶乘
#20、shiftleft(bigint|int,int) 左移
#21、shiftright(bigint|int,int) 右移
字符串函数
#1、concat() 字符串拼接
#2、concat_ws(sep,string,array()) 指定分隔符拼接
#3、collect_list 取集合
select stu_gender,collect_list(stu_name) names from stuscore group by stu_gender;
±------------±---------------------------------------------------+
| stu_gender | names |
±------------±---------------------------------------------------+
| female | [“henry2”,“henry6”,“henry8”] |
| male | [“henry”,“henry1”,“henry3”,“henry4”,“henry5”,“henry7”,“henry9”] |
±------------±---------------------------------------------------+

	select stu_gender,concat_ws(',',collect_list(stu_name)) names from stuscore group by stu_gender;
	+-------------+--------------------------------------------------+
	| stu_gender  |                      names                       |
	+-------------+--------------------------------------------------+
	| female      | henry2,henry6,henry8                             |
	| male        | henry,henry1,henry3,henry4,henry5,henry7,henry9  |
	+-------------+--------------------------------------------------+

	#4、substring(string,pos int[,len int])	
	select substring('henry.chen@hotmail.com',6);
	+--------------------+
	|        _c0         |
	+--------------------+
	| .chen@hotmail.com  |
	+--------------------+
	
	select substring('henry.chen@hotmail.com',instr('henry.chen@hotmail.com','@')+1);
	+--------------+
	|     _c0      |
	+--------------+
	| hotmail.com  |
	+--------------+

	#5、locate(substr,str[,pos])		从pos开始找到substr首次出现首字母的位置,否则返回0
	select locate('@','henry.chen@hotmail.com');
	+------+
	| _c0  |
	+------+
	| 11   |
	+------+

	#6、instr(str,substr)				找到substr在str中首次出现的首字母的位置,否则返回0
	select instr('henry.chen@hotmail.com','@');
	+------+
	| _c0  |
	+------+
	| 11   |
	+------+

	#7、replace(str,substr,rep)			将str中所有substr转换为rep						
	select replace('123abc456abc789','abc','');
	+------------+
	|    _c0     |
	+------------+
	| 123456789  |
	+------------+
	
	#8、regexp_replace(str,regex,rep)	将str中满足regex规则的内容替换为rep	
	select regexp_replace('123abc456def789','[a-z]+','');
	+------------+
	|    _c0     |
	+------------+
	| 123456789  |
	+------------+
	
	#9、sentences(str)				
	#将str以标点符号和空格分别作为第1、2个维度的分隔符,将str拆分成一个二维数组
	select sentences('how are you? fine, thank you! and you');
	+----------------------------------------------------+
	|                        _c0                         |
	+----------------------------------------------------+
	| [["how","are","you"],["fine","thank","you"],["and","you"]] |
	+----------------------------------------------------+

	#10、ngrams(array<array<string>> arr,int n,int topk)
	#针对arr中连续n个单词做词频统计并倒序排序,将topk个统计
	select ngrams(sentences('how are you? fine, thank you! and you'),1,2);
	+----------------------------------------------------+
	|                        _c0                         |
	+----------------------------------------------------+
	| [{"ngram":["you"],"estfrequency":3.0},{"ngram":["how"],"estfrequency":1.0}] |
	+----------------------------------------------------+

	select ngrams(sentences('how are you? fine, thank you! and you'),2,2);
	+----------------------------------------------------+
	|                        _c0                         |
	+----------------------------------------------------+
	| [{"ngram":["how","are"],"estfrequency":1.0},{"ngram":["thank","you"],"estfrequency":1.0}] |
	+----------------------------------------------------+

	#11、context_ngrams(array<array<string>> arr,array<string> cnt,int topk)
	#针对arr中连续size(cnt)个单词组合以cnt中非null内容匹配统计,按数量倒序排列,将topk个结果返回
	select context_ngrams(sentences('how are you? fine, thank you! and you'),array(null,null),2);
	+----------------------------------------------------+
	|                        _c0                         |
	+----------------------------------------------------+
	| [{"ngram":["how","are"],"estfrequency":1.0},{"ngram":["thank","you"],"estfrequency":1.0}] |
	+----------------------------------------------------+
	
	select context_ngrams(sentences('how are you? fine, are you ok? are they friends?'),array('are',null),2);
	+----------------------------------------------------+
	|                        _c0                         |
	+----------------------------------------------------+
	| [{"ngram":["you"],"estfrequency":2.0},{"ngram":["they"],"estfrequency":1.0}] |
	+----------------------------------------------------+

	#12、encode(string cnt,string encode)
	#将cnt转换为encode编码内容
	
	#13、cast(exp as type)				将exp转为type类型的值
	
	#14、get_json_object(string json,string path)
	#提取json格式字符串中指定key的值,一次解析一项,但可多层
	#path(key:value): '$.key' '$.arraykey[index]' '$.objkey.subkey'
	select get_json_object('{"name":"henry","hobbies":["s","a","c"],"address":{"province":"js","city":"nj"}}','$.name');
	+--------+
	|  _c0   |
	+--------+
	| henry  |
	+--------+

	select get_json_object('{"name":"henry","hobbies":["s","a","c"],"address":{"province":"js","city":"nj"}}','$.hobbies');
	+----------------+
	|      _c0       |
	+----------------+
	| ["s","a","c"]  |
	+----------------+

	select get_json_object('{"name":"henry","hobbies":["s","a","c"],"address":{"province":"js","city":"nj"}}','$.hobbies[0]');
	+------+
	| _c0  |
	+------+
	| s    |
	+------+

	select get_json_object('{"name":"henry","hobbies":["s","a","c"],"address":{"province":"js","city":"nj"}}','$.address.city');
	+------+
	| _c0  |
	+------+
	| nj   |
	+------+


	#15、json_tuple(string json,string...ps)
	#提取json字符串中指定key列表的值,一次解析多个但仅限一层
	select json_tuple('{"name":"henry","hobbies":["s","a","c"],"address":{"province":"js","city":"nj"}}','name');
	+--------+
	|   c0   |
	+--------+
	| henry  |
	+--------+

	select json_tuple('{"name":"henry","hobbies":["s","a","c"],"address":{"province":"js","city":"nj"}}','hobbies');
	+----------------+
	|       c0       |
	+----------------+
	| ["s","a","c"]  |
	+----------------+

	select json_tuple('{"name":"henry","hobbies":["s","a","c"],"address":{"province":"js","city":"nj"}}','hobbies[0]');
	+-------+
	|  c0   |
	+-------+
	| NULL  |
	+-------+

	select json_tuple('{"name":"henry","hobbies":["s","a","c"],"address":{"province":"js","city":"nj"}}','name','hobbies','address');
	+--------+----------------+--------------------------------+
	|   c0   |       c1       |               c2               |
	+--------+----------------+--------------------------------+
	| henry  | ["s","a","c"]  | {"province":"js","city":"nj"}  |
	+--------+----------------+--------------------------------+

	#16、in_file(string cnt,string filepath)
	#返回内容是否在文件filepath中,必须整行
	select in_file('asfaf','/root/kb12/test.log');
	+-------+
	|  _c0  |
	+-------+
	| true  |
	+-------+

	#17、parse_url(string url,string part[,string key])
	#解析url根据part提取内容,当part为query时添加key进行单独键的值提取
	#part:PROTOCOL,HOST,QUERY,REF,PATH,USERINFO		必须大写
	select parse_url('http://192.168.171.200:9870/explorer.html#/user/hive/warehouse/test.db/stuscorecopy/class_name=kb12','HOST');
	+------------------+
	|       _c0        |
	+------------------+
	| 192.168.171.200  |
	+------------------+

	#18、printf(string format,T...t)		格式化字符串
	select printf('%s,%d,%.2f','henry',18,23456.234);
	+--------------------+
	|        _c0         |
	+--------------------+
	| henry,18,23456.23  |
	+--------------------+

	#19、str like #%	左侧有%时失效
	
	#20、str rlike regex
	select '{"name":"henry","age":"18","gender":"male"}' rlike '^.*?"age":"\\d+",.*?$';
	+-------+
	|  _c0  |
	+-------+
	| true  |
	+-------+
	
	#21、regexp_extract(str,group_regex,pos)
	#根据group_regex匹配str提取第pos个元素
	select regexp_extract('{"name":"henry","age":"18","gender":"male"}','\\{"name":"(.*?)","age":"(.*?)","gender":"(.*?)"\\}',1);
	+--------+
	|  _c0   |
	+--------+
	| henry  |
	+--------+

	#22、array<string> split(string src,string regex)
	#根据正则regex分割字符串,支持多字符分割
	select split('aa,bb.cc',',|\\.');
	+-------------------+
	|        _c0        |
	+-------------------+
	| ["aa","bb","cc"]  |
	+-------------------+
	
	#23、str_to_map(string cnt,string kvsSep,string kvSep)
	#将字符串cnt使用kvsSep作为键值对之间分隔符kvSep作为键和值得分隔符转化为map对象
	select str_to_map('name:henry,age:18,gender:male',',',':');
	+----------------------------------------------+
	|                     _c0                      |
	+----------------------------------------------+
	| {"name":"henry","age":"18","gender":"male"}  |
	+----------------------------------------------+

	#24、translate(str,from,to)
	#用to来替换str中from的内容
	select translate('abc123aacc1123','a','');
	+--------------+
	|     _c0      |
	+--------------+
	| bc123cc1123  |
	+--------------+

	select translate('abc123aacc1123','a1','');
	+-----------+
	|    _c0    |
	+-----------+
	| bc23cc23  |
	+-----------+
	
	select translate('abc123aacc1123','a1','A');
	+--------------+
	|     _c0      |
	+--------------+
	| Abc23AAcc23  |
	+--------------+

	select translate('abc123aacc1123','a1','AB');
	+-----------------+
	|       _c0       |
	+-----------------+
	| AbcB23AAccBB23  |
	+-----------------+

	#25、initcap()				首字母大写
	select initcap('abc');
	+------+
	| _c0  |
	+------+
	| Abc  |
	+------+

	#26、md5(concat('salt_prefix',field,'salt_sufffix')) 加密
	select md5('kb12');
	+-----------------------------------+
	|                _c0                |
	+-----------------------------------+
	| fc32ce27d478041efedc9e988a2c0b0e  |
	+-----------------------------------+

	select md5(concat('kgc','kb12','henry'));	#加严
	+-----------------------------------+
	|                _c0                |
	+-----------------------------------+
	| 27add61faa561923d81bdd68c6437f0f  |
	+-----------------------------------+

	#27、base64(binary(str))	将str字符串使用base64加密
	select base64(binary('kb12'));
	+-----------+
	|    _c0    |
	+-----------+
	| a2IxMg==  |
	+-----------+

	#28、unbase64(str)			将str字符串使用base64解密
	select unbase64('a2IxMg==');
	+-------+
	|  _c0  |
	+-------+
	| kb12  |
	+-------+
	
	#29、substring_index
	
	#30、base64(aes_decrypt(string cnt,string secretKey))	复杂加密
	#secretKey的长度 16+n(0~正整数)*8
	select base64(aes_encrypt('I Love You','kb12202106160890')); 
	+---------------------------+
	|            _c0            |
	+---------------------------+
	| P48HxSlhsjK8/QUrDFvTSg==  |
	+---------------------------+

	#31、aes_decrypt(unbase64(strAfterEnc),string secretKey)	解密
	select aes_decrypt(unbase64('P48HxSlhsjK8/QUrDFvTSg=='),'kb12202106160890');
	+-------------+
	|     _c0     |
	+-------------+
	| I Love You  |
	+-------------+

集合函数
	#1、size(array/map)					返回集合元素的数量
	
	#2、array_contains(array,item)		返回数组中包含元素item行
	
	#3、array(item1,...,itemn)			返回多个元素的数组
	
	#4、map(k1,v1,...,kn,vn)			返回多个元素的键值对
	select map('name','henry','age',18);
	+------------------------------+
	|             _c0              |
	+------------------------------+
	| {"name":"henry","age":"18"}  |
	+------------------------------+

	#5、struct(v1,...,vn)				返回多个元素的结构体(自动追加列名col1,...,coln)
	select struct('henry',18);
	+-----------------------------+
	|             _c0             |
	+-----------------------------+
	| {"col1":"henry","col2":18}  |
	+-----------------------------+

	#6、map_keys(map)					返回键值对的键集合
	select map_keys(scores) from student;
	+----------------------------+
	|            _c0             |
	+----------------------------+
	| ["java","mysql","hadoop"]  |
	| ["java","mysql","hadoop"]  |
	| ["java","mysql","hadoop"]  |
	+----------------------------+

	#7、map_values(map)					返回键值对的值集合
	select map_values(scores) from student;
	+-------------+
	|     _c0     |
	+-------------+
	| [88,76,83]  |
	| [78,75,73]  |
	| [82,89,80]  |
	+-------------+

	#8、sort_array(array)				返回数组升序排序后的结果
	
	
日期函数
	#1、current_date()					返回系统当前日期
		#current_timestamp()			返回系统当前日期的完整格式
		#unix_timestamp([string|date|datetime|timestamp[,date_format]])
		#返回系统当前时间戳,单位秒
		#如果只有参数1,返回参数1的完整格式对应的时间戳
		#如果有两个参数,返回参数1对应参数2格式的时间戳
	select current_timestamp();
	+--------------------------+
	|           _c0            |
	+--------------------------+
	| 2021-06-17 09:53:27.456  |
	+--------------------------+

	select unix_timestamp();
	+-------------+
	|     _c0     |
	+-------------+
	| 1623895026  |
	+-------------+

	#2、date_add(date start_date,int days)		返回start_date之后
		#add_months(string|date|datetime|timestamp start_date,int months)
		#返回start_date之后第months月的日期
	select date_add(current_date(),3);
	+-------------+
	|     _c0     |
	+-------------+
	| 2021-06-20  |
	+-------------+

	select date_add('2020-10-01',-3);
	+-------------+
	|     _c0     |
	+-------------+
	| 2020-09-28  |
	+-------------+
	
	
	select date_sub('2020-10-01',3);
	+-------------+
	|     _c0     |
	+-------------+
	| 2020-09-28  |
	+-------------+

	select date_sub('2020-10-01',-3);
	+-------------+
	|     _c0     |
	+-------------+
	| 2020-10-04  |
	+-------------+
	
	select add_months(current_date(),6);
	+-------------+
	|     _c0     |
	+-------------+
	| 2021-12-17  |
	+-------------+

	#3、datediff(big_date,small_date)	返回两日期的天数差
	#4、from_unixtime(bigint[,date_format])
	#返回时间戳对应的date_format格式字符串信息
	select from_unixtime(134541241421);
	+----------------------+
	|         _c0          |
	+----------------------+
	| 6233-06-11 07:03:41  |
	+----------------------+

	select from_unixtime(134541241421,'yyyy-MM');
	+----------+
	|   _c0    |
	+----------+
	| 6233-06  |
	+----------+

	#5、date_format(string|date|datetime|timestamp,date_format)
	#返回日期对应的date_format格式
	select date_format('6233-06-11 07:03:41','yyyy/MM');
	+----------+
	|   _c0    |
	+----------+
	| 6233/06  |
	+----------+

	#6、to_date(string|datetime|timestamp)
	#返回日期的年月日
	select to_date('6233-06-11 07:03:41');
	+-------------+
	|     _c0     |
	+-------------+
	| 6233-06-11  |
	+-------------+

	#7、next_day(string|datetime|timestamp,weekday)		weekday是英文星期的前两位
	#距离当前日期最近的下一个weekday
	
	select next_day(current_date(),'mo');
	+-------------+
	|     _c0     |
	+-------------+
	| 2021-06-21  |
	+-------------+

	#8、last_day(string|datetime|timestamp)	返回参数日期所属月份最后一天的日期
	select last_day(current_date());
	+-------------+
	|     _c0     |
	+-------------+
	| 2021-06-30  |
	+-------------+

	#9、trunc(string|datetime|timestamp,part)
	#返回参数1对应参数2单位的第一天日期
	#part : YY=> 年		Q=> 季		MM=>月
	#date_add(next_day(date,'MO'),-7) 
	#返回参数日期所属周的第一天(星期一)日期
	select trunc(current_date(),'YY');
	+-------------+
	|     _c0     |
	+-------------+
	| 2021-01-01  |
	+-------------+

	select trunc(current_date(),'Q');
	+-------------+
	|     _c0     |
	+-------------+
	| 2021-04-01  |
	+-------------+

	select trunc(current_date(),'MM');
	+-------------+
	|     _c0     |
	+-------------+
	| 2021-06-01  |
	+-------------+

	select date_add(next_day(current_date(),'MO'),-7);
	+-------------+
	|     _c0     |
	+-------------+
	| 2021-06-14  |
	+-------------+

	#年最后一天
		select concat(year(current_date()),'-12-31');
		+-------------+
		|     _c0     |
		+-------------+
		| 2021-12-31  |
		+-------------+
	#季最后一天
		select date_sub(add_months(trunc(current_date(),'Q'),3),1);
		+-------------+
		|     _c0     |
		+-------------+
		| 2021-06-30  |
		+-------------+
	#周最后一天
		select date_sub(next_day(current_date(),'MO'),1);
		+-------------+
		|     _c0     |
		+-------------+
		| 2021-06-20  |
		+-------------+
	
	#10、months_between(big_date,small_date)
	#返回两个日期之间的月数差
	select months_between(current_date(),'2021-3-15');
	+-------------+
	|     _c0     |
	+-------------+
	| 3.06451613  |
	+-------------+

条件函数
	#1、if(condition,if_true_val,if_false_val);
	#双重分支,相当于三元运算符
		#nvl(field,default_val)		field==null?default_val:field
		
	#2、case [ field when CONST_V1 ] then V1 ... else VN end
	#等值判断多分支
	
	#3、case [ when field>=CONST_V1 ] then V1 ... else VN end
	#区间判断多分支
	
	#4、coalesce(T,...vs)
	#返回参数列表中第一个非空值
	
	#5、ifnull(field)
	#返回field列值是否为空
	
	#6、ifnotnull(field)
	#返回field列值是否为不空

侧视图
	#select ...,item_alias from TABLE_NAME lateral view func(field) LV_alias as item_alias;
	
转换函数
聚合函数
	#1、 sum([distinct] field)
	#2、 avg([distinct] field)
	#3、 count([distinct] field)
	#4、 min(field)
	#5、 max(field)
	#6、 collect_list(field)				列举
	#7、 collect_set(field)					去重
	#8、 var_pop(numField)					方差
	#9、 stddev_pop(numField)				标准差
	#10、covar_pop(numBase,numField)		协方差
	
	
窗口函数(统计函数) 不支持window子句
	row_number()							行号
	rank()									排序
	dense_rank()
	ntile(n)								
	lag(field,nth,defaultVal)				下一行
	lead(field,nth,defaultVal)				上一行
	first_value(field)
	last_value(field)
	nth_value(field,nth)
	
over重句
	over([partition by field[ order by field]] rows between ... and ...)
		#	...	window 子句		rows between ... and ...
		unbounded_preceding
		n preceding
		current row
		n following
		unbounded_following
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值