1,因业务需求,需要把hive的数据写入到reids的Set集合中,看网上都是写入到hash的实现,故自己写了一个写入Hash的实现
2,需要的pom.xml文件
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>${hadoop.version}</version>
<exclusions>
<exclusion>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</exclusion>
</exclusions>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>${hadoop.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>${hive.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>redis.clients</groupId>
<artifactId>jedis</artifactId>
<version>2.9.0</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
<scope>test</scope>
</dependency>
</dependencies>
3,具体代码实现如下
import com.crgt.redis.JedisUtil;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
import org.apache.hadoop.io.IntWritable;
import redis.clients.jedis.HostAndPort;
/**
* https://blog.csdn.net/xiao_jun_0820/article/details/76638198
* 入参校验,可以简单点
*/
@Description(name = "redis_batch_sadd", value = "_FUNC_(host_and_port,keyField, value(string)) - Return ret ")
public class RedisBatchSetUDF extends GenericUDF {
private HostAndPort hostAndPort;
StringObjectInspector keyElementOI;
StringObjectInspector valueElementOI;
@Override
public Object evaluate(DeferredObject[] arg0) throws HiveException {
try {
String dataKey = keyElementOI.getPrimitiveJavaObject(arg0[1].get());
String data = valueElementOI.getPrimitiveJavaObject(arg0[2].get());
if(data.isEmpty() || data == null){
return new IntWritable(-1);
}
JedisUtil.getJedis(hostAndPort).sadd(dataKey,data);
return new IntWritable(1);
} catch (Exception e) {
e.printStackTrace();
throw new HiveException(e);
}
}
@Override
public String getDisplayString(String[] arg0) {
return "redis_batch_sadd(redishost_and_port,keyField, value(String))";
}
@Override
public ObjectInspector initialize(ObjectInspector[] arg0) throws UDFArgumentException {
if (arg0.length != 3) {
throw new UDFArgumentException(" Expecting three arguments: <redishost:port> <keyField> value<string> ");
}
//第一个参数校验
if (arg0[0].getCategory() == Category.PRIMITIVE
&& ((PrimitiveObjectInspector) arg0[0]).getPrimitiveCategory() == PrimitiveObjectInspector.PrimitiveCategory.STRING) {
if (!(arg0[0] instanceof ConstantObjectInspector)) {
throw new UDFArgumentException("redis host:port must be constant");
}
ConstantObjectInspector redishost_and_port = (ConstantObjectInspector) arg0[0];
String[] host_and_port = redishost_and_port.getWritableConstantValue().toString().split(":");
hostAndPort = new HostAndPort(host_and_port[0], Integer.parseInt(host_and_port[1]));
}
// 1. 检查是否接收到正确的参数类型
ObjectInspector key = arg0[1];
ObjectInspector value = arg0[2];
if (!(key instanceof StringObjectInspector) ) {
throw new UDFArgumentException("two argument must be a string");
}
if ( !(value instanceof StringObjectInspector) ) {
throw new UDFArgumentException("thhree argument must be a string");
}
this.keyElementOI = (StringObjectInspector) key;
this.valueElementOI = (StringObjectInspector) value;
return PrimitiveObjectInspectorFactory.writableIntObjectInspector;
}
}
4,依赖的 JedisUtil如下 ,主要作用是单例,防止链接数过大
public class JedisUtil {
//单例,保证一个map jvm里面只有一个jedis实例,这样所有分到这个map container里面的数据共享这一个jedis实例,比在evaluate每次创建jedis性能要好一点
// Jedis jedis = new Jedis(hostAndPort.getHost(), hostAndPort.getPort(), 10000, 60000);
private static volatile Jedis jedis = null;
public static synchronized Jedis getJedis(HostAndPort hostAndPort) {
if (jedis == null) {
jedis = new Jedis(hostAndPort.getHost(), hostAndPort.getPort(), 10000, 60000);
jedis.auth("iINEivdRA8Kpin13");
jedis.select(2);
Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
// Clean up at exit
@Override
public void run() {
System.out.println(JedisUtil.class.getSimpleName() + " shutdown");
try {
if (jedis != null) {
jedis.close();
}
} catch (Exception e) {
e.printStackTrace();
} finally {
jedis = null;
}
}
}));
}
return jedis;
}
}