hive udf 批量写入redis

5 篇文章 0 订阅
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.serde2.objectinspector.*;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.io.IntWritable;
import redis.clients.jedis.HostAndPort;
import redis.clients.jedis.Jedis;
import redis.clients.jedis.Pipeline;

import java.io.IOException;
import java.util.HashMap;
import java.util.Map;

@Description(name = "redis_batch_hset",
        value = "_FUNC_(host_and_port,keyField, array<map>) - Return ret "
)
public class RedisBatchHSetUDF extends GenericUDF {

    private HostAndPort hostAndPort;
    private String keyField;
    private Object writableKeyField; //实际上是org.apache.hadoop.io.Text类型
    private StandardListObjectInspector paramsListInspector;
    private StandardMapObjectInspector paramsElementInspector;

    @Override
    public Object evaluate(DeferredObject[] arg0) throws HiveException {

        try (Jedis jedis = new Jedis(hostAndPort.getHost(), hostAndPort.getPort(), 10000, 60000);
             Pipeline pipeline = jedis.pipelined()
        ) {
            for (int i = 0; i < paramsListInspector.getListLength(arg0[2].get()); i++) {
                Object row = paramsListInspector.getListElement(arg0[2].get(), i);
                Map<?, ?> map = paramsElementInspector.getMap(row);
//                Object obj = ObjectInspectorUtils.copyToStandardJavaObject(row,paramsElementInspector); //转成标准的java map,否则里面的key value字段为hadoop writable对象
                if (map.containsKey(writableKeyField)) {
                    String did = map.get(writableKeyField).toString();
                    Map<String, String> data = new HashMap<>();
                    for (Map.Entry<?, ?> entry : map.entrySet()) {
                        if (!writableKeyField.equals(entry.getKey()) && entry.getValue() != null && !"".equals(entry.getValue().toString())) {
                            data.put(entry.getKey().toString(), entry.getValue().toString());
                        }
                    }
                    pipeline.hmset(did,data);
                }
            }
            pipeline.sync();
            return new IntWritable(1);
        } catch (IOException e) {
            e.printStackTrace();
            throw new HiveException(e);
        }

    }


    @Override
    public String getDisplayString(String[] arg0) {
        return "redis_batch_hset(redishost_and_port,keyField, array<map<string,string>>)";
    }


    @Override
    public ObjectInspector initialize(ObjectInspector[] arg0)
            throws UDFArgumentException {
        if (arg0.length != 3) {
            throw new UDFArgumentException(" Expecting   two  arguments:<redishost:port>  <keyField> array<map<string,string>> ");
        }
        //第一个参数校验
        if (arg0[0].getCategory() == Category.PRIMITIVE
                && ((PrimitiveObjectInspector) arg0[0]).getPrimitiveCategory() == PrimitiveObjectInspector.PrimitiveCategory.STRING) {
            if (!(arg0[0] instanceof ConstantObjectInspector)) {
                throw new UDFArgumentException("redis host:port  must be constant");
            }
            ConstantObjectInspector redishost_and_port = (ConstantObjectInspector) arg0[0];

            String[] host_and_port = redishost_and_port.getWritableConstantValue().toString().split(":");
            hostAndPort = new HostAndPort(host_and_port[0], Integer.parseInt(host_and_port[1]));
        }

        //第2个参数校验
        if (arg0[1].getCategory() == Category.PRIMITIVE
                && ((PrimitiveObjectInspector) arg0[1]).getPrimitiveCategory() == PrimitiveObjectInspector.PrimitiveCategory.STRING) {
            if (!(arg0[1] instanceof ConstantObjectInspector)) {
                throw new UDFArgumentException("redis hset key   must be constant");
            }
            ConstantObjectInspector keyFieldOI = (ConstantObjectInspector) arg0[1];

            keyField = keyFieldOI.getWritableConstantValue().toString();
            writableKeyField = keyFieldOI.getWritableConstantValue();
        }

        //第3个参数校验
        if (arg0[2].getCategory() != Category.LIST) {
            throw new UDFArgumentException(" Expecting an array<map<string,string>> field as third argument ");
        }
        ListObjectInspector third = (ListObjectInspector) arg0[2];
        if (third.getListElementObjectInspector().getCategory() != Category.MAP) {
            throw new UDFArgumentException(" Expecting an array<map<string,string>> field as third argument ");
        }
        paramsListInspector = ObjectInspectorFactory.getStandardListObjectInspector(third.getListElementObjectInspector());
        paramsElementInspector = (StandardMapObjectInspector) third.getListElementObjectInspector();
        System.out.println(paramsElementInspector.getMapKeyObjectInspector().getCategory());
        System.out.println(paramsElementInspector.getMapValueObjectInspector().getCategory());

        return PrimitiveObjectInspectorFactory.writableIntObjectInspector;
    }

}


如何注册就不说了,可以翻看原来的那个Mysql udf的文章。

用法还蛮苛刻的:

SELECT g,xydb.redis_batch_hset('192.168.78.87:6379','did',collect_list(map('did',concat('xyzs.profile.',did),'sex',sex,'age',age))) AS result
FROM 
  (
	SELECT did,pmod(abs(hash(did)),1000) AS g,sex,age FROM data_mining.adl_xyzs_user_profile_day_new WHERE ds='2017-08-02' AND isnotnull(sex) AND isnotnull(age)
  ) a
GROUP BY g;


redis_batch_hset(redishost_and_port,keyField, array<map<string,string>>)

第一个参数是要连接的redis的host:port。我们redis没有密码并且默认连db0,所以就配一个hostport就行了,否则自己改一下代码。。。

第三个参数是要入库的数据,是一个array,array里面的元素是一个map<string,string>。

第二个参数是指第三个数组里面每一行map数据里面作为redis hset的key的字段名,这里样例里面指did,当然did这个字段名必须在map中存在,不然会忽略。


did是用户唯一标识,由于入库的量比较大(每天有30多万个did待入库),可以按did mod分组,比如我这拆成了1000个数组分别使用pipline写入redis,这样每个数组里面其实只有几百条数据,使用pipline批量提交提高redis写入性能。


再附一个简单的测试用例:


import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.StandardListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StandardMapObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.junit.Before;
import org.junit.Test;
public class RedisBatchHsetUDFTest {

    private RedisBatchHSetUDF udf;

    @Before
    public void before() {
        udf = new RedisBatchHSetUDF();
    }


    @Test
    public void test() throws HiveException {
        Text host = new Text("name87:6379");
        Text keyField = new Text("did");

        ObjectInspector hostAndPort = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.stringTypeInfo, host);
        ObjectInspector key = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.stringTypeInfo, keyField);
        TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString("map<string,string>");
        StandardMapObjectInspector elementOI = (StandardMapObjectInspector) TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(typeInfo);
        StandardListObjectInspector listIO = ObjectInspectorFactory.getStandardListObjectInspector(elementOI);
        ObjectInspector resultoi = udf.initialize(new ObjectInspector[]{hostAndPort, key, listIO});
        Object list = listIO.create(1);
        Object row = elementOI.create();

        elementOI.put(row, new Text("did"), new Text("xiaojuntest"));
        elementOI.put(row, new Text("sex"), new IntWritable(1));
        elementOI.put(row, new Text("age"), new Text("85"));
        listIO.set(list, 0, row);

        Object result = udf.evaluate(new GenericUDF.DeferredObject[]{new GenericUDF.DeferredJavaObject(host), new GenericUDF.DeferredJavaObject(keyField), new GenericUDF.DeferredJavaObject(list)});
        System.out.println(result);

    }

//    @Test
//    public void test2() {
//        TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString("array<struct<a:int,b:string>>");
//        StandardListObjectInspector objInsp = (StandardListObjectInspector) TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(typeInfo);
//        System.out.println(objInsp);
//    }


}



  • 0
    点赞
  • 7
    收藏
    觉得还不错? 一键收藏
  • 3
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值