Intellij IDEA 开发 UDF 过程
1. 安装 Maven
- 官网下载:https://maven.apache.org
- 安装说明:https://maven.apache.org/install.html
- Maven和JDK对应关系:https://maven.apache.org/docs/history.html
- Maven 与 JDK 版本
- 因环境需要 jdk 1.7, 安装完成之后的JDK和Maven,两者版本保持一致
- 因环境需要 jdk 1.7, 安装完成之后的JDK和Maven,两者版本保持一致
- Intellij IDEA 与 Maven 版本
- Intellij IDEA 版本:2020.1,选择 Maven Release Date 在 IDEA 版本之前的且符合 JDK 版本要求,此处选择:Maven 3.6.3
- 如果 Intellij IDEA 与 Maven 版本不匹配,IDEA Reimport将报错
报错日志:Unable to import maven project: See logs for details
- Intellij IDEA 版本:2020.1,选择 Maven Release Date 在 IDEA 版本之前的且符合 JDK 版本要求,此处选择:Maven 3.6.3
2023-12-13 13:43:28,453 [ 475380] ERROR - #org.jetbrains.idea.maven - java.lang.RuntimeException: org.codehaus.plexus.component.repository.exception.ComponentLookupException: com.google.inject.ProvisionException: Unable to provision, see the following errors:
1) Error injecting constructor, java.lang.NoSuchMethodError: org.apache.maven.model.validation.DefaultModelValidator: method <init>()V not found
at org.jetbrains.idea.maven.server.embedder.CustomModelValidator.<init>(Unknown Source)
while locating org.jetbrains.idea.maven.server.embedder.CustomModelValidator
at ClassRealm[maven.ext, parent: ClassRealm[plexus.core, parent: null]] (via modules: org.eclipse.sisu.wire.WireModule -> org.eclipse.sisu.plexus.PlexusBindingModule)
while locating org.apache.maven.model.validation.ModelValidator annotated with @com.google.inject.name.Named(value=ide)
1 error
role: org.apache.maven.model.validation.ModelValidator
roleHint: ide
java.lang.RuntimeException: java.lang.RuntimeException: org.codehaus.plexus.component.repository.exception.ComponentLookupException: com.google.inject.ProvisionException: Unable to provision, see the following errors:
1) Error injecting constructor, java.lang.NoSuchMethodError: org.apache.maven.model.validation.DefaultModelValidator: method <init>()V not found
at org.jetbrains.idea.maven.server.embedder.CustomModelValidator.<init>(Unknown Source)
while locating org.jetbrains.idea.maven.server.embedder.CustomModelValidator
at ClassRealm[maven.ext, parent: ClassRealm[plexus.core, parent: null]] (via modules: org.eclipse.sisu.wire.WireModule -> org.eclipse.sisu.plexus.PlexusBindingModule)
while locating org.apache.maven.model.validation.ModelValidator annotated with @com.google.inject.name.Named(value=ide)
1 error
role: org.apache.maven.model.validation.ModelValidator
roleHint: ide
at org.jetbrains.idea.maven.server.Maven3XServerEmbedder.getComponent(Maven3XServerEmbedder.java:489)
at org.jetbrains.idea.maven.server.Maven3XServerEmbedder.customizeComponents(Maven3XServerEmbedder.java:578)
at org.jetbrains.idea.maven.server.Maven3XServerEmbedder.customize(Maven3XServerEmbedder.java:546)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at sun.rmi.server.UnicastServerRef.dispatch(UnicastServerRef.java:322)
at sun.rmi.transport.Transport$2.run(Transport.java:202)
at sun.rmi.transport.Transport$2.run(Transport.java:199)
at java.security.AccessController.doPrivileged(Native Method)
at sun.rmi.transport.Transport.serviceCall(Transport.java:198)
at sun.rmi.transport.tcp.TCPTransport.handleMessages(TCPTransport.java:567)
at sun.rmi.transport.tcp.TCPTransport$ConnectionHandler.run0(TCPTransport.java:828)
at sun.rmi.transport.tcp.TCPTransport$ConnectionHandler.access$400(TCPTransport.java:619)
at sun.rmi.transport.tcp.TCPTransport$ConnectionHandler$1.run(TCPTransport.java:684)
at sun.rmi.transport.tcp.TCPTransport$ConnectionHandler$1.run(TCPTransport.java:681)
at java.security.AccessController.doPrivileged(Native Method)
at sun.rmi.transport.tcp.TCPTransport$ConnectionHandler.run(TCPTransport.java:681)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
at java.rmi/sun.rmi.transport.StreamRemoteCall.exceptionReceivedFromServer(StreamRemoteCall.java:303)
at java.rmi/sun.rmi.transport.StreamRemoteCall.executeCall(StreamRemoteCall.java:279)
at java.rmi/sun.rmi.server.UnicastRef.invoke(UnicastRef.java:164)
at java.rmi/java.rmi.server.RemoteObjectInvocationHandler.invokeRemoteMethod(RemoteObjectInvocationHandler.java:217)
at java.rmi/java.rmi.server.RemoteObjectInvocationHandler.invoke(RemoteObjectInvocationHandler.java:162)
at com.sun.proxy.$Proxy155.customize(Unknown Source)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.base/java.lang.reflect.Method.invoke(Method.java:566)
at com.intellij.execution.rmi.RemoteUtil.invokeRemote(RemoteUtil.java:155)
at com.intellij.execution.rmi.RemoteUtil.access$400(RemoteUtil.java:25)
at com.intellij.execution.rmi.RemoteUtil$1.lambda$invoke$0(RemoteUtil.java:139)
at com.intellij.openapi.util.ClassLoaderUtil.computeWithClassLoader(ClassLoaderUtil.java:31)
at com.intellij.execution.rmi.RemoteUtil.executeWithClassLoader(RemoteUtil.java:207)
at com.intellij.execution.rmi.RemoteUtil$1.invoke(RemoteUtil.java:139)
at com.sun.proxy.$Proxy155.customize(Unknown Source)
at org.jetbrains.idea.maven.server.MavenEmbedderWrapper.doCustomize(MavenEmbedderWrapper.java:92)
at org.jetbrains.idea.maven.server.MavenEmbedderWrapper.lambda$customizeForResolve$1(MavenEmbedderWrapper.java:65)
at org.jetbrains.idea.maven.server.RemoteObjectWrapper.perform(RemoteObjectWrapper.java:76)
at org.jetbrains.idea.maven.server.MavenEmbedderWrapper.customizeForResolve(MavenEmbedderWrapper.java:64)
at org.jetbrains.idea.maven.project.MavenProjectResolver.resolve(MavenProjectResolver.java:77)
at org.jetbrains.idea.maven.project.MavenProjectsProcessorResolvingTask.perform(MavenProjectsProcessorResolvingTask.java:45)
at org.jetbrains.idea.maven.project.MavenProjectsProcessor.doProcessPendingTasks(MavenProjectsProcessor.java:141)
at org.jetbrains.idea.maven.project.MavenProjectsProcessor.access$000(MavenProjectsProcessor.java:35)
at org.jetbrains.idea.maven.project.MavenProjectsProcessor$1.run(MavenProjectsProcessor.java:110)
at org.jetbrains.idea.maven.utils.MavenUtil.lambda$runInBackground$5(MavenUtil.java:488)
at com.intellij.util.RunnableCallable.call(RunnableCallable.java:20)
at com.intellij.util.RunnableCallable.call(RunnableCallable.java:11)
at com.intellij.openapi.application.impl.ApplicationImpl$1.call(ApplicationImpl.java:255)
at java.base/java.util.concurrent.FutureTask.run(FutureTask.java:264)
at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
at java.base/java.lang.Thread.run(Thread.java:834)
Caused by: java.lang.RuntimeException: org.codehaus.plexus.component.repository.exception.ComponentLookupException: com.google.inject.ProvisionException: Unable to provision, see the following errors:
1) Error injecting constructor, java.lang.NoSuchMethodError: org.apache.maven.model.validation.DefaultModelValidator: method <init>()V not found
at org.jetbrains.idea.maven.server.embedder.CustomModelValidator.<init>(Unknown Source)
while locating org.jetbrains.idea.maven.server.embedder.CustomModelValidator
at ClassRealm[maven.ext, parent: ClassRealm[plexus.core, parent: null]] (via modules: org.eclipse.sisu.wire.WireModule -> org.eclipse.sisu.plexus.PlexusBindingModule)
while locating org.apache.maven.model.validation.ModelValidator annotated with @com.google.inject.name.Named(value=ide)
2.创建 Maven 项目
- New -> Project
- 选择 Maven ,勾选 Create from archetype
- 配置项目
注意 Maven 配置和 本地仓库
3.依赖 dependencies
- 修改 Maven pom.xml
<mirrors>
<mirror>
<id>aliyunmaven</id>
<mirrorOf>m2</mirrorOf>
<name>spring-plugin</name>
<url>https://maven.aliyun.com/repository/spring-plugin</url>
</mirror>
<mirror>
<id>mirror</id>
<mirrorOf>*</mirrorOf>
<name>mirror</name>
<url>https://maven.aliyun.com/repository/public</url>
</mirror>
</mirrors>
<profiles>
<profile>
<id>aliyun</id>
<repositories>
<repository>
<id>public</id>
<url>https://maven.aliyun.com/repository/public</url>
<releases><enabled>true</enabled></releases>
<snapshots><enabled>true</enabled></snapshots>
</repository>
<repository>
<id>m1</id>
<url>https://maven.aliyun.com/repository/public</url>
<releases><enabled>true</enabled></releases>
<snapshots><enabled>true</enabled></snapshots>
</repository>
<repository>
<id>m2</id>
<url>https://maven.aliyun.com/repository/spring-plugin</url>
<releases><enabled>true</enabled></releases>
<snapshots><enabled>true</enabled></snapshots>
</repository>
</repositories>
- 多镜像下载
- Could not find artifact org.pentaho:pentaho-aggdesigner-algorithm:pom:5.1.5-jhyde … 解决方式
pom导入相关依赖
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>1.2.1</version>
</dependency>
出现以下报错:
Could not find artifact org.pentaho:pentaho-aggdesigner-algorithm:pom:5.1.5-jhyde in ...
是因为这个包不在阿里云公共maven镜像仓库上,需要添加一个新的镜像仓库,修改maven的settings.xml
<mirrors>
<mirror>
<id>aliyunmaven</id>
<mirrorOf>m2</mirrorOf>
<name>spring-plugin</name>
<url>https://maven.aliyun.com/repository/spring-plugin</url>
</mirror>
<mirror>
<id>mirror</id>
<mirrorOf>*</mirrorOf>
<name>mirror</name>
<url>https://maven.aliyun.com/repository/public</url>
</mirror>
</mirrors>
之后重新导入依赖(Reimport),设置多镜像过程中如果因为顺序问题导致其他包无法下载,可以调整顺序重新打包(mvn package)
- 相关报错解决方法
- Could not find artifact XX:pom:XX …,删除本地依赖文件并触发Maven重新下载
- “IDEA开发工具,maven执行clean、package、install指令成功,但是执行run失败”, 使用 mvn idea:idea
- 依赖报错,删除 “除 src 和 pom.xm” 的所文件,重新打开项目
4.Maven 命令
- clean:清除当前工程编译后生成的文件(即删除target整个目录);
- validate:对工程进行基础验证,如工程结构、pom、资源文件等是否正确;
- compile:对src/main/java目录下的源码进行编译(会生成target目录);
- test:编译并执行src/test/java/目录下的所有测试用例;
- package:将当前项目打包,普通项目打jar包,webapp项目打war包;
- verify:验证工程所有代码、配置进行是否正确,如类中代码的语法检测等;
- install:将当前工程打包,然后安装到本地仓库,别人可通过GAV导入;
- site:生成项目的概述、源码测试覆盖率、开发者列表等站点文档(需要额外配置);
- deploy:将当前工程对应的包,上传到远程仓库,提供给他人使用(私服会用);
- mvn dependency:tree -Dverbose 检查一下该依赖在本地仓库中是否已经存在;
- mvn idea:clean 清除idea项目工程文件;
- mvn idea:idea 大招:生成idea项目工程所有文件:
5.增加 Log 日志
- pom增加依赖
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-api</artifactId>
<version>2.8.2</version>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-core</artifactId>
<version>2.8.2</version>
</dependency>
- 在resources,新增log4j2.xml
内容如下:
<?xml version="1.0" encoding="UTF-8"?>
<Configuration status="WARN">
<Appenders>
<Console name="Console" target="SYSTEM_OUT">
<PatternLayout pattern="%d{HH:mm:ss.SSS} [%t] %-5level %logger{36} - %msg%n"/>
</Console>
</Appenders>
<Loggers>
<Root level="info">
<AppenderRef ref="Console"/>
</Root>
</Loggers>
</Configuration>
- 使用 Log
package com;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.util.ArrayList;
import java.util.List;
/**
* Hello world!
*
*/
public class App
{
public static void main( String[] args )
{
System.out.println( "Hello World!" );
Logger log = LogManager.getLogger(App.class);
log.info("This is info message.");
List<String> list = new ArrayList<String>();
list.add("20231208");
list.add("20231209");
list.add("20221210");
String max = null;
for (String s:list) {
String maxPartition = s;
if(max == null || maxPartition.compareTo(max) > 0){
max = maxPartition;
}
}
log.info(max);
}
}
6.开发 UDF 函数
- UDF
package com;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.io.Text;
public final class ToChar extends UDF {
public String evaluate(boolean bo) {
if (null != (Object) bo) {
String ret = Boolean.toString(bo);
return ret;
} else {
return "null";
}
}
public String evaluate(int tmp) {
if (null != (Object) tmp) {
String ret = Integer.toString(tmp);
return ret;
} else {
return "null";
}
}
public String evaluate(double d) {
if (null != (Object) d) {
String ret = Double.toString(d);
return ret;
} else {
return "null";
}
}
}
- UDTF
package com;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import java.util.Arrays;
import java.util.List;
@Description("_FUNC_(separator, col) - split column into several rows by separator")
public class GenericUDTFSplit extends GenericUDTF {
@Override
public StructObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
if (arguments.length != 2) {
throw new UDFArgumentException("Function GenericUDTFSplit requires 2 arguments, got " + arguments.length);
}
PrimitiveObjectInspector separatorOI = (PrimitiveObjectInspector) arguments[0];
PrimitiveObjectInspector valueOI = (PrimitiveObjectInspector) arguments[1];
if (separatorOI.getPrimitiveCategory() != PrimitiveObjectInspector.PrimitiveCategory.STRING
|| valueOI.getPrimitiveCategory() != PrimitiveObjectInspector.PrimitiveCategory.STRING) {
throw new UDFArgumentException("Both separator and value should be string.");
}
List<String> fieldNames = Arrays.asList("col1");
List<ObjectInspector> fieldOIs = Arrays.asList(
(ObjectInspector) PrimitiveObjectInspectorFactory.javaStringObjectInspector);
return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs);
}
@Override
public void process(Object[] args) throws HiveException {
String separator = (String) args[0];
String value = (String) args[1];
if (value != null) {
for (String substr : value.split(separator)) {
forward(substr);
}
}
}
@Override
public void close() throws HiveException {
}
}
- UDAF
package com;
import org.apache.hadoop.hive.ql.exec.UDAF;
import org.apache.hadoop.hive.ql.exec.UDAFEvaluator;
public final class WMConcat extends UDAF {
public static class DtWMConcatState {
private StringBuilder resultBuilder;
private boolean isInit;
private String separator;
}
public static class DtWMConcatEvaluator implements UDAFEvaluator {
DtWMConcatState state;
public DtWMConcatEvaluator() {
super();
state = new DtWMConcatState();
init();
}
public void init() {
state.resultBuilder = new StringBuilder();
state.isInit = true;
state.separator = "";
}
public boolean iterate(String i, String s) {
if (s != null) {
state.resultBuilder.append(i).append(s);
state.isInit = false;
state.separator = i;
}
return true;
}
public DtWMConcatState terminatePartial() {
// This is SQL standard - average of zero items should be null.
return state.isInit == true ? null : state;
}
public boolean merge(DtWMConcatState o) {
if (o != null) {
state.resultBuilder.append(o.resultBuilder);
state.isInit = false;
}
return true;
}
public String terminate() {
int i = state.resultBuilder.indexOf(state.separator);
// This is SQL standard - average of zero items should be null.
return state.isInit == true ? "" : state.resultBuilder.substring(i + state.separator.length());
}
}
}