自定义UDF函数实现:根据用户的生日,判断用户是什么星座.
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.joda.time.DateTime;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;
import java.util.Date;
/**
* 此函数实现用户输入字符串格式为yyyy-MM-dd形式的日期,返回该用户的星座类型
*/
@Description(name = "zodiac_cn"
, value = "_FUNC_(date) - from the input date string or separate month and day arguments, returns the sing of the Zodiac."
, extended = "Example:\n > select _FUNC_(date_string) from src;\n > select _FUNC_(month, day) from src;")
public class UDFZodiacSignCn extends UDF {
//日期的输入格式固定为:yyyy-MM-dd
public final static DateTimeFormatter DEFAULT_DATE_FORMATTER = DateTimeFormat.forPattern("yyyy-MM-dd");
private Text result = new Text();
public UDFZodiacSignCn() {
}
public Text evaluate(Text birthday) {
DateTime dateTime = null;
try {
dateTime = DateTime.parse(birthday.toString(), DEFAULT_DATE_FORMATTER);
} catch (Exception e) {
return null;
}
return evaluate(dateTime.toDate());
}
public Text evaluate(Date birthday) {
DateTime dateTime = new DateTime(birthday);
return evaluate(new IntWritable(dateTime.getMonthOfYear()), new IntWritable(dateTime.getDayOfMonth()));
}
public Text evaluate(IntWritable month, IntWritable day) {
result.set(getZodiac(month.get(), day.get()));
return result;
}
private String getZodiac(int month, int day) {
String[] zodiacArray = {"魔羯座", "水瓶座", "双鱼座", "白羊座", "金牛座", "双子座", "巨蟹座", "狮子座",
"处女座", "天秤座", "天蝎座", "射手座"};
int[] splitDay = {19, 18, 20, 20, 20, 21, 22, 22, 22, 22, 21, 21}; // 两个星座分割日
int index = month;
// 所查询日期在分割日之前,索引-1,否则不变
if (day <= splitDay[month - 1]) {
index = index - 1;
} else if (month == 12) {
index = 0;
}
// 返回索引指向的星座string
return zodiacArray[index];
}
public static void main(String[] args) {
UDFZodiacSignCn udfZodiacSignCn = new UDFZodiacSignCn();
System.out.println("1990-11-02: "+udfZodiacSignCn.evaluate(new Text("1990-11-02")));
//错误格式的日期,返回值为null
System.out.println(udfZodiacSignCn.evaluate(new Text("19901102")));
System.out.println("2000-11-02: "+udfZodiacSignCn.evaluate(new Text("2000-11-02")));
System.out.println("2000-01-02: "+udfZodiacSignCn.evaluate(new Text("2000-01-02")));
}
}
测试结果显示:
1990-11-02: 天蝎座
null
2000-11-02: 天蝎座
2000-01-02: 魔羯座