Hive总结3(自定义函数,jdbc连接)

1.自定义UTF函数

举例:实现add()函数

1.继承一个类:UDF

2.在里面开发多个重载的方法:evaluate()

3.注册这个函数,将打包的jar包放到hdfs上

4.通过create function创建这个函数,drop function删除这个函数,删除后会有缓存,在重新登陆一次。

1.1新建maven项目

添加依赖:

<!-- https://mvnrepository.com/artifact/org.apache.hive/hive-service -->

<dependency>

<groupId>org.apache.hive</groupId>

<artifactId>hive-service</artifactId>

<version>1.2.2</version>

</dependency>

<dependency>

<groupId>org.apache.hadoop</groupId>

<artifactId>hadoop-client</artifactId>

<version>2.7.7</version>

</dependency>

1.2开发一个类继承UDF

package cn.udf;

import org.apache.hadoop.hive.ql.exec.UDF;

//1:

public class AddUDF extends UDF {

//2:实现evaluate的某个方法,重载多个方法

public int evaluate(int a,int b){

System.out.println("实现加运算:"+a+","+b);

int c = a+b;

return c;

}

}

1.3生成jar包上传到linux上

生成的jar包命名为hive.jar

把jar包从本地移动到hdfs上$:hdfs dfs -moveFromLocal hive.jar /udf/

1.4启动Hive,添加jar包

hive> add jar hdfs://hadoop31:8020/udf/hive.jar;

1.5创建function

hive>create function fadd as 'cn.udf.AddUDF';

测试 hive>select add(2,3); >>5

第四步和第五部可以合成一步

hive>create function fadd as 'cn.udf.AddUDF'

>using jar 'hdfs://hadoop31:8020/udf/hive.jar';

2.UDAF(聚合函数)

模拟开发sum函数需要开发五个方法,对应mapreduce五个过程:

以类继承自GenericUDAFEvaluate类

 init       Iterate   terminatePartial     iterate   terminate

 Mapreduce - > mapper - partitioner      ->      merge - > Reducer

2.1:开发一个类,继承UDAF函数(已过时)

package cn.udf;

 

import org.apache.hadoop.hive.ql.exec.UDAF;

import org.apache.hadoop.hive.ql.exec.UDAFEvaluator;

import org.apache.hadoop.hive.ql.metadata.HiveException;

import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;

import org.apache.hadoop.io.LongWritable;

 

@SuppressWarnings("deprecation") //去掉警告

public class FSumUDAF extends UDAF {

//2:开发一个内部类,继承

public static class FSumEval implements UDAFEvaluator {

private static LongWritable sum;

 

@Override

public void init() {

System.out.println("1:初始化"); //在合并数据时调用 - shuffle之前

sum = new LongWritable(0);

}

 

//开发其他的四个方法mapper,每读取一行,就进行求和

//第二个方法,用于计算某一个分区中的数据

public boolean iterate(LongWritable value) throws HiveException {

System.out.println("2:接收到:" +sum+","+value);

if (value == null) {

return true;//必须总是返回true,否则会阻止后面的执行

}

if(sum==null){

sum=new LongWritable(0);//为什么会是null

}

sum.set(sum.get() + value.get());

return true;

}

//第三个方法,用于合并每一个分区中的最后结果。返回这个分区的数据

public LongWritable terminatePartial() throws HiveException {

System.out.println("3:合并一个分区中的数据,返回这个分区中的数据");

return sum;

}

//合并多个分区计算的结果

public boolean merge(LongWritable value) throws HiveException {

System.out.println("4:在merge合并数据:" + value);

if(value==null){

return true;

}

sum.set(sum.get() + value.get());

return true;

}

//第5个方法,用于返回最后的结果

public LongWritable terminate() throws HiveException {

System.out.println("5:返回最后的数据");

return sum;

}

}

}

2.2使用新的API去开发UDAF:

请用:

UDAF

Abstract..

- fcount

- favg

package cn.udf;

import org.apache.hadoop.hive.ql.metadata.HiveException;

import org.apache.hadoop.hive.ql.parse.SemanticException;

import org.apache.hadoop.hive.ql.udf.generic.AbstractGenericUDAFResolver;

import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;

import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;

import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;

import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;

import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;

import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;

 

//1:继承类

public class FSumUDAF2 extends AbstractGenericUDAFResolver {

@Override

public GenericUDAFEvaluator getEvaluator(TypeInfo[] info) throws SemanticException {

//2:添加 这个方法

return new MySumEval();

}

 

//AggregationBuffer - 中间计算的结果的缓存对象

//3:开发GenericUDAFEvaluator接口的子类

@SuppressWarnings("deprecation")

public static class MySumEval extends GenericUDAFEvaluator {

 

//4:声明一个对象,用于保存中间的结果

public static class MyBuffer implements AggregationBuffer {

Long sum;

 

@Override

public String toString() {

return "MyBuffer{" +

"sum=" + sum +

'}';

}

}

 

//声明类型

//5:声明基本的类型,用于获取数据的

private PrimitiveObjectInspector objectInspector;

 

@Override

public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException {

System.out.println("1:初始化");

//声明返回的类型

objectInspector = (PrimitiveObjectInspector) parameters[0];

return PrimitiveObjectInspectorFactory.javaLongObjectInspector;

}

 

//6:获取一个新的对象

@Override

public AggregationBuffer getNewAggregationBuffer() throws HiveException {

System.out.println("新的缓存的对象");

MyBuffer myBuffer = new MyBuffer();

myBuffer.sum = 0L;

return myBuffer;

}

 

@Override

public void reset(AggregationBuffer agg) throws HiveException {

MyBuffer my = (MyBuffer) agg;

my.sum = 0L;

my = null;

}

 

//null,34

@Override

public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException {

System.out.println("获取数据:" + agg + "," + parameters[0]);

Object value = parameters[0];

if (value != null) {

//用基本类型的工具类获取数据

long val = PrimitiveObjectInspectorUtils.getLong(value, objectInspector);

//进行加

MyBuffer my = (MyBuffer) agg;

my.sum += val;

}

}

 

@Override

public Object terminatePartial(AggregationBuffer agg) throws HiveException {

System.out.println("每一个分区合并数据:" + agg);

MyBuffer my = (MyBuffer) agg;

return my.sum;

}

 

@Override

public void merge(AggregationBuffer agg, Object partial) throws HiveException {

System.out.println("shuffle...");

if (partial != null) {

//用基本类型的工具类获取数据

long val = PrimitiveObjectInspectorUtils.getLong(partial, objectInspector);

//进行加

MyBuffer my = (MyBuffer) agg;

my.sum += val;

}

}

 

@Override

public Object terminate(AggregationBuffer agg) throws HiveException {

System.out.println("最后输出:" + agg);

MyBuffer my = (MyBuffer) agg;

return my.sum;

}

}

}

3.JDBC连接数据库

开启服务:$hive --service hiveserver2 & //&表示后台启动

登陆:$:hive --service beeline

连接:

4.java代码连接

依赖

<dependency>

<groupId>org.apache.hive</groupId>

<artifactId>hive-jdbc</artifactId>

<version>1.2.2</version>

</dependency>

 

package cn.hive;

 

import org.junit.Test;

 

import java.sql.Connection;

import java.sql.DriverManager;

import java.sql.ResultSet;

import java.sql.Statement;

 

public class HiveJdbc {

 

@Test

public void test1() throws Exception{

//1:注册驱动

Class.forName("org.apache.hive.jdbc.HiveDriver");

String url = "jdbc:hive2://hadoop31:10000/db01";

Connection con =

DriverManager.getConnection(url,"wangjian","888888");

Statement st =con.createStatement();

ResultSet rs = st.executeQuery("select * from stud01");

while(rs.next()){

String id = rs.getString("id");

String name = rs.getString("name");

int age = rs.getInt("age");

System.out.println(id+","+name+","+age);

}

rs.close();

st.close();

con.close();

 

}

@Test

public void test2() throws Exception{

//1:注册驱动

Class.forName("org.apache.hive.jdbc.HiveDriver");

String url = "jdbc:hive2://hadoop31:10000/db01";

Connection con =

DriverManager.getConnection(url,"wangjian","888888");

Statement st =con.createStatement();

ResultSet rs = st.executeQuery("select count(1) from stud01");

if(rs.next()){

Long size = rs.getLong(1);

System.out.println("行数:"+size);

}

rs.close();

st.close();

con.close();

 

}

}

5.关于用户名和密码

打开

其中1文件会把2文件覆盖

找到2文件下的如下图

选择复制到1文件中修改如下

package cn.cn.hive;

import org.apache.hive.service.auth.PasswdAuthenticationProvider;

import javax.security.sasl.AuthenticationException;

//Web的加密方式 - Shiro - > Realm -

public class PasswordAuth implements PasswdAuthenticationProvider {

@Override

public void Authenticate(String user, String password) throws AuthenticationException {

if(!user.equals("keys") || !password.equals("123456")){

System.out.println("用户名或是密码错误...");

throw new AuthenticationException("用户名或密码错误");

}

System.out.println("登录成功");

}

}

将此类打包放到lib文件下,注意命名(区分系统jar包和自己开发的jar包)

以后登陆就可以指定用户名密码登陆。

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值