连接hive的工具类
package com.bfd.util;
import java.sql.*;
/**
* author:liming
* date:2022-01-12
* tittle:连接hive的jdbcDao
*/
public class HiveDao {
// 此Class 位于 hive-jdbc的jar包下
private static String driverName ="org.apache.hive.jdbc.HiveDriver";
//填写hive的IP,之前在配置文件中配置的IP
private static String Url="jdbc:hive2://192.168.157.136:10000/";
//建立连接
private static Connection conn=null;
public static Connection getConnnection() {
try{
//加载driver
Class.forName(driverName);
//获取连接
conn = DriverManager.getConnection(Url,"hive",""); //只是连接hive, 用户名可不传
}
catch(ClassNotFoundException e) {
e.printStackTrace();
System.exit(1);
}
catch (SQLException e) {
e.printStackTrace();
}
return conn;
}
//获取PreparedStatement
public static PreparedStatement prepare(Connection conn, String sql) {
PreparedStatement ps = null;
try {
ps = conn.prepareStatement(sql);
} catch (SQLException e) {
e.printStackTrace();
}
return ps;
}
public static void main(String[] args) {
System.out.println(HiveDao.getConnnection());
}
}
依赖
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>1.2.2</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-jdbc</artifactId>
<version>1.2.2</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.7.3</version>
</dependency>
关联码表o_ql_t_com_enterprise_cid ,根据统一社会信用代码、组织机构代码、企业名称来补全cid
package com.bfd;
import com.bfd.util.HiveDao;
import org.apache.hadoop.hive.ql.exec.UDF;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.logging.Logger;
/**
* author:liming
* date:2022-01-12
* tittle:连接hive进行关联码表的udf
*
* 调用说明:
* 1.定义udf的名字为find_cid
* 2.在sql中使用select find_cid(cid_in,//cid
* enterprise_code_1_in,//统一社会信用代码
* enterprise_code_2_in,//组织机构代码
* enterprise_name_in,//企业名称
* etl_date,//分区
* database//数据库) as cid from s.xxx
*
* 例子:
* 库 : src
* 表 : src_ent_basic_info
* 字段 : id、name
* sql : select find_cid(cid_in,//cid
* enterprise_code_1_in,//统一社会信用代码
* enterprise_code_2_in,//组织机构代码
* enterprise_name_in,//企业名称
* "2021-09-09",//分区
* "src"//数据库) as id from src.src_ent_basic_info
*
*/
//cid
// 统一社会信用代码:enterprise_code_1
// 组织机构代码:enterprise_code_2
// 企业名称:enterprise_name
public class CidRepair extends UDF {
static Logger logger = Logger.getLogger("com.bfd");
public static String evaluate(String cid_in,//cid
String enterprise_code_1_in,//统一社会信用代码
String enterprise_code_2_in,//组织机构代码
String enterprise_name_in,//企业名称
String etl_date,//分区
String database//数据库
) {
//判断cid是否为空或者空字符串,如果有值则不处理,返回原有的cid
if(cid_in == null || cid_in.trim().length()==0) {
logger.info("cid进行修复ing");
//查询cid管理表
String sql = "select cid,enterprise_code_1,enterprise_code_2,enterprise_name from " +
database + ".o_ql_t_com_enterprise_cid where etl_date='"+etl_date+"'";
logger.info(sql);
//建立hive连接
Connection conn = HiveDao.getConnnection();
//获取PreparedStatement
PreparedStatement prepare =
HiveDao.prepare(conn, sql);
//建立ResultSet
ResultSet rs = null;
try {
//rs执行sql
rs = prepare.executeQuery();
//循环获取下一条
while (rs.next()) {
//enterprise_code_1_in统一社会信用代码
// enterprise_code_2_in组织机构代码
// enterprise_name_in企业名称
String cid = rs.getString("cid");
String enterprise_code_1 = rs.getString("enterprise_code_1");
String enterprise_code_2 = rs.getString("enterprise_code_2");
String enterprise_name = rs.getString("enterprise_name");
logger.info(cid_in+"\t"+enterprise_code_1_in+"\t"+enterprise_code_2_in+"\t"+enterprise_name_in);
int flag1=0;
int flag2=0;
int flag3=0;
//进行匹配enterprise_code_1_in、enterprise_code_2_in、enterprise_name_in如果一样返回cid
if(enterprise_code_1_in==null) flag1=0;
else if(enterprise_code_1_in.equals(enterprise_code_1)) flag1=1;
if(enterprise_code_2_in==null) flag2=0;
else if(enterprise_code_2_in.equals(enterprise_code_2)) flag2=1;
if(enterprise_name_in==null) flag3=0;
else if(enterprise_name_in.equals(enterprise_name)) flag3=1;
int flag =flag1+flag2+flag3;
if(flag>0) {logger.info("匹配上"+String.valueOf(flag));return cid;}
else { logger.info("匹配上"+String.valueOf(flag)); return null;}
}
return "匹配不上" + conn.toString();
} catch (SQLException throwables) {
throwables.printStackTrace();
logger.info("语法错误");
return null;
} finally {
try {
//关闭连接
if (prepare != null) prepare.close();
if (conn != null) conn.close();
} catch (SQLException throwables) {
throwables.printStackTrace();
}
}
}
else if (cid_in!=null) return cid_in;
else return null;
}
public static void main(String[] args) {
System.out.println(evaluate(null,"tyshxydm:1","zzjgdm:2","bob" ,"2022-01-17","bigdata" ) ) ;
System.out.println(evaluate(null,"tyshxydm:3","zzjgdm:2","jake" ,"2022-01-17","bigdata" ) ) ;
System.out.println(evaluate(null,"tyshxydm:1",null,null ,"2022-01-17","bigdata" ) ) ;
System.out.println(evaluate(null,null,"zzjgdm:2",null ,"2022-01-17","bigdata" ) ) ;
System.out.println(evaluate(null,null,null,"bob" ,"2022-01-17","bigdata" ));
}
}