hive concat_ws源代码

 
  
其他相关源码可以到以下链接查看:
https://github.com/apache/hive/tree/master/ql/src/java/org/apache/hadoop/hive/ql/udf/generic
 
  

 

package org.apache.hadoop.hive.ql.udf.generic;

import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
import org.apache.hadoop.io.Text;

/**
 * Generic UDF for string function
 * <code>CONCAT_WS(sep, [string | array(string)]+)</code>.
 * This mimics the function from
 * MySQL http://dev.mysql.com/doc/refman/5.0/en/string-functions.html#
 * function_concat-ws
 *
 * @see org.apache.hadoop.hive.ql.udf.generic.GenericUDF
 */
@Description(name = "concat_ws",
    value = "_FUNC_(separator, [string | array(string)]+) - "
    + "returns the concatenation of the strings separated by the separator.",
    extended = "Example:\n"
    + "  > SELECT _FUNC_('.', 'www', array('facebook', 'com')) FROM src LIMIT 1;\n"
    + "  'www.facebook.com'")
public class GenericUDFConcatWS extends GenericUDF {
  private transient ObjectInspector[] argumentOIs;

  @Override
  public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
    if (arguments.length < 2) {
      throw new UDFArgumentLengthException(
          "The function CONCAT_WS(separator,[string | array(string)]+) "
            + "needs at least two arguments.");
    }

    // check if argument is a string or an array of strings
    for (int i = 0; i < arguments.length; i++) {
      switch(arguments[i].getCategory()) {
        case LIST:
          if (isStringOrVoidType(
              ((ListObjectInspector) arguments[i]).getListElementObjectInspector())) {
            break;
          }
        case PRIMITIVE:
          if (isStringOrVoidType(arguments[i])) {
          break;
          }
        default:
          throw new UDFArgumentTypeException(i, "Argument " + (i + 1)
            + " of function CONCAT_WS must be \"" + serdeConstants.STRING_TYPE_NAME
            + " or " + serdeConstants.LIST_TYPE_NAME + "<" + serdeConstants.STRING_TYPE_NAME
            + ">\", but \"" + arguments[i].getTypeName() + "\" was found.");
      }
    }

    argumentOIs = arguments;
    return PrimitiveObjectInspectorFactory.writableStringObjectInspector;
  }

  protected boolean isStringOrVoidType(ObjectInspector oi) {
    if (oi.getCategory() == Category.PRIMITIVE) {
      if (PrimitiveGrouping.STRING_GROUP
          == PrimitiveObjectInspectorUtils.getPrimitiveGrouping(
              ((PrimitiveObjectInspector) oi).getPrimitiveCategory())
          || ((PrimitiveObjectInspector) oi).getPrimitiveCategory() == PrimitiveCategory.VOID) {
        return true;
      }
    }
    return false;
  }

  private final Text resultText = new Text();

  @Override
  public Object evaluate(DeferredObject[] arguments) throws HiveException {
    if (arguments[0].get() == null) {
      return null;
    }
    String separator = PrimitiveObjectInspectorUtils.getString(
        arguments[0].get(), (PrimitiveObjectInspector)argumentOIs[0]);

    StringBuilder sb = new StringBuilder();
    boolean first = true;
    for (int i = 1; i < arguments.length; i++) {
      if (arguments[i].get() != null) {
        if (first) {
          first = false;
        } else {
          sb.append(separator);
        }
        if (argumentOIs[i].getCategory().equals(Category.LIST)) {
          Object strArray = arguments[i].get();
          ListObjectInspector strArrayOI = (ListObjectInspector) argumentOIs[i];
          boolean strArrayFirst = true;
          for (int j = 0; j < strArrayOI.getListLength(strArray); j++) {
            if (strArrayFirst) {
              strArrayFirst = false;
            } else {
              sb.append(separator);
            }
            sb.append(strArrayOI.getListElement(strArray, j));
          }
        } else {
          sb.append(PrimitiveObjectInspectorUtils.getString(
              arguments[i].get(), (PrimitiveObjectInspector)argumentOIs[i]));
        }
      }
    }

    resultText.set(sb.toString());
    return resultText;
  }

  @Override
  public String getDisplayString(String[] children) {
    assert (children.length >= 2);
    return getStandardDisplayString("concat_ws", children);
  }
}

 

转载于:https://www.cnblogs.com/dtmobile-ksw/p/11149225.html

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
Hive中的CONCAT_WS函数是一个特殊形式的CONCAT函数,用于将多个字符串连接在一起,并使用指定的分隔符分隔它们。CONCAT_WS的语法为CONCAT_WS(separator,str1,str2,...)。第一个参数是分隔符,后面的参数是要连接的字符串。如果分隔符为NULL,则结果为NULL。CONCAT_WS会忽略分隔符参数后的NULL值,但不会忽略空字符串。例如,使用SELECT CONCAT_WS('_',id,name) AS con_ws FROM info LIMIT 1;可以将id和name字段用下划线连接起来。 另外,如果使用CONCAT_WS('|', array())这种模式,array中的null值不会被跳过。 在Hive中,还可以通过CONCAT_WS函数将数组中的元素连接起来。例如,使用SELECT CONCAT_WS(',',c_array) FROM test_array WHERE dt='2016-09-26' AND size(c_array)=2 LIMIT 2;可以将数组c_array的元素用逗号分隔连接起来。<span class="em">1</span><span class="em">2</span><span class="em">3</span> #### 引用[.reference_title] - *1* *3* [hive:函数:concat_ws函数](https://blog.csdn.net/weixin_38750084/article/details/97775323)[target="_blank" data-report-click={"spm":"1018.2226.3001.9630","extra":{"utm_source":"vip_chatgpt_common_search_pc_result","utm_medium":"distribute.pc_search_result.none-task-cask-2~all~insert_cask~default-1-null.142^v93^chatsearchT3_1"}}] [.reference_item style="max-width: 50%"] - *2* [hiveconcat_ws的秘密](https://blog.csdn.net/u012861792/article/details/125532456)[target="_blank" data-report-click={"spm":"1018.2226.3001.9630","extra":{"utm_source":"vip_chatgpt_common_search_pc_result","utm_medium":"distribute.pc_search_result.none-task-cask-2~all~insert_cask~default-1-null.142^v93^chatsearchT3_1"}}] [.reference_item style="max-width: 50%"] [ .reference_list ]
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值