show partitions t_order_wk;
新增分区:
alter table t_order_wk partition(month="201402")
把查询结果输出:
insert overwrite local directory "hiveout.txt" seletc * from t_order_wk where partition="201401";
支持集合类型,但是需要加每个集合字段的分隔符
01,13812345678,18 tom,beijing,fengtai
02,13512345678,23 merry,shanghai,putong
create table t_order_wk(a array ,b array)
partitioned by(month string)
row format delimited
fields ternimated by "/t"
collection iterms terminated by ","
;
支持map类型数据
fengjie age:18,size:36A,addr:USA
furong age:20,size:37C,addr:beijing,weight:100KG
create table tab_map(name,map)
row format delimited
fields terminated by "/t"
collection iterms terminated by ","
map keys terminated by ";"
;
shell脚本输出HQL
hive -S -e ""select * from wk110.t_order_wk;
在shell中同样也会显示出查询的结果,在实际生产中就是使用shell脚本
有了这种执行机制,就使得我们可以利用脚本语言(bash shell,python)进行hql语句的批量执行
自定义函数UDF
1.写出java类实现逻辑功能
需要导入hive下的lib
package cn.itcast.bigdata;
import java.util.HashMap;
//写的这个java类需要符合一定的规范,继承extends
public class PhoneNumberToArea extends UDF{
//是一个重载的方法,需要根据自定义去写出这个重载的方法
//public void evaluate() {
//}
public static HashMap areaMap = new HashMap();
static {
areaMap.put("1398","北京");
areaMap.put("1378","上海");
areaMap.put("1368","深圳");
areaMap.put("1358","苏州");
}
//一定要用public修饰才能被hive调用
public String evaluate(String pnb) {
String result = areaMap.get(pnb.substring(0,4))==null?(pnb+"huoxing"):(pnb+areaMap.get(pnb.substring(0,4)));
return result;
}
}
2.打成jar包上传到linux系统中,然后通过hive传入到lib中
3.创建一个函数与jar包关联
create temporary function getArea as "cn.itcast.bigdata.PhoneNumberToArea";
创建表,导入数据
create table t_flower(phoneNumber String,upflow string,upflowstring)
row format delimited
fields terminated by ",";
load data local inpath "home/hadoop/flow.data" into table t_flower;
使用UDF自定义函数来操作hive
select getArea(phoneNumber),upflow,upflow from t_flower;
cluster 分桶