hiveUDF函数开发

maven配置

<!-- 添加依赖组件,版本与cdh集群版本相同即可 -->
<dependency>
         <groupId>org.apache.hadoop</groupId>
         <artifactId>hadoop-assemblies</artifactId>
         <version>2.5.1</version>
       </dependency>
        <!-- junit是java的单元测试框架 -->
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>4.10</version>
            <scope>test</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-common</artifactId>
            <version>2.5.0-cdh5.2.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-hdfs</artifactId>
            <version>2.5.0-cdh5.2.0</version>
        </dependency>
        
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-maven-plugins</artifactId>
            <version>2.5.0-cdh5.2.0</version>
        </dependency>
        
        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-client</artifactId>
            <version>0.98.6-cdh5.2.0</version>
        </dependency> 

     <dependency>
      <groupId>org.apache.hive</groupId>
      <artifactId>hive-exec</artifactId>
      <version>2.3.2</version>
  </dependency>

代码开发

// 继承UDF类
public class Regexp_Count extends UDF{
 private final Text srcText = new Text();
//重写 evaluate 方法(主要逻辑)
 public  int evaluate(Text source_char,Text pattern  ) {
  int count = 0; 
  if(source_char==null||source_char.equals(srcText)||pattern==null) {
   return count;
  }
  Pattern p = Pattern.compile(pattern.toString()); 
  Matcher m = p.matcher(source_char.toString()); 
  while (m.find()) { 
   count++; 
  } 
  return count;
 }
 }

maven打包相关配置

cdh依赖下载

<!-- cdh依赖 maven下载镜像 -->
  <repositories>
        <repository>
            <id>cloudera</id>
            <url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
            <releases>
                <enabled>true</enabled>
            </releases>
            <snapshots>
                <enabled>false</enabled>
            </snapshots>
        </repository>
    </repositories>

maven setting文件中

<!--mirrorOf 中添加   ,!cloudera,才能下载CDH相关依赖-->
  
    <mirror>
      <id>nexus</id>
      <mirrorOf>*,!cloudera</mirrorOf>
      <name>aliyun MAVEN</name>
      <url>http://maven.aliyun.com/nexus/content/groups/public</url>
    </mirror>

maven bulid只打项目源码和部分依赖

<!--使用该插件后,只打项目源码,并将json-lib依赖打入项目lib目录下-->
      <plugin> 
        <groupId>org.apache.maven.plugins</groupId>  
        <artifactId>maven-dependency-plugin</artifactId>  
        <version>${maven-dependency-plugin-version}</version>  
        <executions> 
          <execution> 
            <id>copy</id>  
            <phase>test</phase>  
            <goals> 
              <goal>copy</goal> 
            </goals>  
            <configuration> 
              <artifactItems> 
                <artifactItem> 
                  <groupId>net.sf.json-lib</groupId>  
                  <artifactId>json-lib</artifactId>  
                  <version>2.4</version>  
                  <type>jar</type>  
                  <classifier>jdk15</classifier> 
                </artifactItem> 
              </artifactItems>  
              <outputDirectory>${project.build.directory}/classes/lib</outputDirectory>  
              <excludeTransitive>false</excludeTransitive>  
              <stripVersion>true</stripVersion> 
            </configuration> 
          </execution> 
        </executions> 
      </plugin>  

maven bulid将项目依赖打入项目源码中

<plugin> 
  <groupId>org.apache.maven.plugins</groupId>  
  <artifactId>maven-shade-plugin</artifactId>  
  <version>3.2.2</version>  
  <executions> 
    <execution> 
      <phase>package</phase>  
      <goals> 
        <goal>shade</goal> 
      </goals>  
      <configuration> 
        <createDependencyReducedPom>true</createDependencyReducedPom>  
        <!-- 自动将所有不使用的类全部排除掉,将 uber-jar 最小化。 -->  
        <minimizeJar>true</minimizeJar>  
        <!-- 指定 -jar的后缀名。 -->  
        <!--<shadedArtifactAttached>true</shadedArtifactAttached>
                <shadedClassifierName>shade</shadedClassifierName> -->  
        <transformers> 
          <transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer"> 
            <mainClass>com.ntep.App</mainClass> 
          </transformer>  
          <transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer"> 
            <resource>META-INF/spring.handlers</resource> 
          </transformer>  
          <transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer"> 
            <resource>META-INF/spring.schemas</resource> 
          </transformer> 
        </transformers>  
        <!-- 将该工程依赖的部分 Jar 包 include/exclude 掉。 -->  
        <artifactSet> 
          <!--<excludes>
                        <exclude>junit:junit</exclude>
                    </excludes>--> 
        </artifactSet>  
        <!-- 将依赖的某个 Jar 包内部的类或者资源 include/exclude 掉。 -->  
        <filters> 
          <!-- <filter>
                         <artifact>junit:junit</artifact>
                         <includes>
                             <include>junit/framework/**</include>
                             <include>org/junit/**</include>
                         </includes>
                         <excludes>
                             <exclude>org/junit/experimental/**</exclude>
                             <exclude>org/junit/runners/**</exclude>
                         </excludes>
                     </filter>-->  
          <filter> 
            <artifact>log4j:log4j</artifact>  
            <includes> 
              <include>**</include> 
            </includes> 
          </filter>  
          <filter> 
            <artifact>commons-logging:*</artifact>  
            <includes> 
              <include>**</include> 
            </includes> 
          </filter>  
          <filter> 
            <artifact>*:*</artifact>  
            <excludes> 
              <exclude>META-INF/*.SF</exclude>  
              <exclude>META-INF/*.DSA</exclude>  
              <exclude>META-INF/*.RSA</exclude> 
            </excludes> 
          </filter> 
        </filters> 
      </configuration> 
    </execution> 
  </executions> 
</plugin>

maven bulid将项目所有依赖单独copy到项目外的lib下

<plugin>
    <groupId>org.apache.maven.plugins</groupId>
    <artifactId>maven-dependency-plugin</artifactId>
    <executions>
     <execution>
      <id>copy-dependencies</id>
      <phase>prepare-package</phase>
      <goals>
       <goal>copy-dependencies</goal>
      </goals>
      <configuration>
       <outputDirectory>${project.build.directory}/lib</outputDirectory>
       <overWriteReleases>false</overWriteReleases>
       <overWriteSnapshots>false</overWriteSnapshots>
       <overWriteIfNewer>true</overWriteIfNewer>
      </configuration>
     </execution>
    </executions>
   </plugin>
   <plugin>
    <groupId>org.apache.maven.plugins</groupId>
    <artifactId>maven-jar-plugin</artifactId>
    <configuration>
     <archive>
      <manifest>
       <addClasspath>true</addClasspath>
       <classpathPrefix>lib/</classpathPrefix>
       <mainClass>theMainClass</mainClass>
      </manifest>
     </archive>
    </configuration>
   </plugin>

将配置文件打入项目

<!-- 将资源文件打入项目中,适用于项目中创建文件夹不进行bulid path的配置文件,bulid path自动加载进jar包中  开始 -->  
<resources>
        <resource>
            <directory>temp</directory>
            <includes>
                <include>**/**</include>
            </includes>
        </resource>
</resources>

hive自定义函数的依赖在集群中没有,加载方式

  通过函数在hive中报错,定位到缺失的依赖包

临时函数解决办法

 通过add jar 方式将缺失依赖加载进 hive
 创建临时函数指定自己项目中的启动类
  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值