- 通过IDEA创建一个maven工程,pom.xml如下
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.jd.hyxfjr</groupId>
<artifactId>TestUDF</artifactId>
<version>1.0-SNAPSHOT</version>
<name>TestUDF</name>
<!-- FIXME change it to the project's website -->
<url>http://www.example.com</url>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>1.7</maven.compiler.source>
<maven.compiler.target>1.7</maven.compiler.target>
</properties>
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.11</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>1.2.1</version>
</dependency>
</dependencies>
<build>
<finalName>AesDecUdf</finalName>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<version>2.3</version>
<configuration>
<!-- get all project dependencies -->
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
<!-- MainClass in mainfest make a executable jar -->
<archive>
<manifest>
<mainClass>com.ab.hyxfjr.AesDecUdf</mainClass>
</manifest>
</archive>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<!-- bind to the packaging phase -->
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
- udf开发–做个简单脱敏udf保留前5位,后面全部替换成*****
import org.apache.hadoop.hive.ql.exec.UDF;
/**
* @Descripion TODO
* @Author lixianwei
* @Date 2018/11/1115:50
* @Email lixianwei@jd.com
**/
public class ID_Number extends UDF{
public String evaluate(String var){
String encryptText = null;
try {
String str = "********";
if(var.length()>5){
encryptText=var.substring(0,5)+str;
}else{
encryptText=str;
}
} catch (Exception e) {
e.printStackTrace();
}
return encryptText;
}
public static void main(String[] args) {
String str = "3830200044009";
String str2 = "********";
System.out.println(str.substring(0,5)+str2);
}
}
- 在IDEA中点击install 打包成jar
- 在该工程下面将会生成一个target文件夹,里面有生成的jar文件
- 将生成的jar文件上传到hadoop客户端服务器(能访问hive的服务器上)
1)在hdfs上创建一个专门存放udf jar文件的文件夹
hdfs dfs -mkdir /user/hive/warehouse/udf
2)将 udf jar文件放入该hdfs目录
hdfs dfs -put AesEncUdf-jar-with-dependencies.jar /user/hive/warehouse/udf/
- 找CDH的nameservice
1)
2)
3)
4)
- 上面找到的nameservice1是在这里用的
1)打开hive命令行,然后创建函数
hive>
>
>
> CREATE FUNCTION stage.aesenc AS 'com.ab.hyxfjr.AesEncUdf' using jar 'hdfs://nameservice1/user/hive/warehouse/udf/AesEncUdf-jar-with-dependencies.jar';
其中,stage为数据库名称、aesenc是自己命名的函数名称
8. 测试
1)建测试数据
1|61234522222000654321|18613718137|abc@jd0.com|010381199909183217
2|51234522222000654322|18613718126|abc@jd1.com|020381199909183216
3|41234522222000654323|18613718125|abc@jd2.com|030381199909183215
4|31234522222000654324|+8613718124|abc@jd3.com|040381199909183214
5|21234522222000654325|+8613718123|abc@jd4.com|050381199909183213
2)建表语句
hive>
>
> create table IF NOT EXISTS user
> (
> id string,
> bankNum string,
> phoneNum string,
> email string,
> id_num string
> )
> ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
> stored as textfile;
3)load 数据到表
hive> load data local inpath './user.txt' into table user;
Loading data to table stage.user
Table stage.user stats: [numFiles=1, totalSize=330]
OK
Time taken: 0.373 seconds
hive>
hive> select * from user;
OK
1 61234522222000654321 18613718137 abc@jd0.com 010381199909183217
2 51234522222000654322 18613718126 abc@jd1.com 020381199909183216
3 41234522222000654323 18613718125 abc@jd2.com 030381199909183215
4 31234522222000654324 +8613718124 abc@jd3.com 040381199909183214
5 21234522222000654325 +8613718123 abc@jd4.com 050381199909183213
Time taken: 0.23 seconds, Fetched: 5 row(s)
- 测试函数
hive> select aesenc(id_num) from user;
Stage-Stage-1: Map: 1 Cumulative CPU: 6.01 sec HDFS Read: 4188 HDFS Write: 325 SUCCESS
Total MapReduce CPU Time Spent: 6 seconds 10 msec
OK
7ED58AF2C2B339AAC0C00E881DD1AC691C67C08113A49498E2A935A43FA13419
D7138307AFCE6BAC7C98D36228D7993F775DFFF896AF193ACD8AFEEC336FA27A
2EF782803E1347A9720A44E6B438EA0C994CE38E64248FFB3B03989ACD6304A1
AB7A3060E2B3D9024D8737382E253A697231A0000E35C290DB279C2F0B70B5C4
ABDDD97A620BD7D6F1CE38F19492186FA004F46F3D63CC8327C30399B45D880E
Time taken: 17.485 seconds, Fetched: 5 row(s)
hive>