1.数据格式实例:
-------------------------------------
1,zhangsan:18:beijing|nan|it,2000
2,lisi:28:nanjing|nan|it,4000
3,xiaowu:38:qingdao|nan|it,1000
--------------------------------
创建表,然后load到hive中:
create table userinfo (id int,info string,salary int)
row format delimited
fields terminated by ",";
---------------------------
load data local inpath "/home/darren/hive/qipa.data" into table userinfo;
select id,name,age,sex from where salary >1000 //一般的sql查询是这样的,但是数据不规则,这种方式无效
select id,fun1(info,1) as name,fun1(info,2) as age,fun1(info,3) as city from where salary >1000 //自定义一个函数,处理不规则数据。
2.编写一个hive api java程序:
package com.darren.wang;
import org.apache.hadoop.hive.ql.exec.UDF;
public class Personinfo extends UDF {
public St