根据手机号得到运营商和归属地的hive UDTF

博客指出Hive的UDTF只能单独使用的问题,即不能用select col1,udtf(col2) from xxx。通过多加一个原样返回的参数,实现了select udtf(col1,col2) from xxx,使col1字段可用于和其他表join,还附上了pom文件。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

package com.tq.udf.person;


import com.google.gson.JsonObject;
import com.google.gson.JsonParser;



import com.google.i18n.phonenumbers.PhoneNumberUtil;
import com.google.i18n.phonenumbers.Phonenumber;
import com.google.i18n.phonenumbers.geocoding.PhoneNumberOfflineGeocoder;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;


import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;


/**
 * @author tq
 * @date 2019/5/5 16:10
 */
public class GetTelDetail extends GenericUDTF {
    public static void main(String[] args) throws HiveException {


//        String[] result = getInfo("15967193899asd");
//        System.out.println(result[0]);
//        System.out.println(result[1]);
//
//        Object[] objects = {"qwe",null};
//        new GetTelDetail().process(objects);
    }

    public static String[] getInfo(String tel) {

        //正则提取数字
        String regex = "[^0-9]";
        Pattern p = Pattern.compile(regex);
        Matcher m = p.matcher(tel);
        //判断是否为11位号码
        if(m.replaceAll("").length()==11) {
            tel = m.replaceAll("");
        }else {
            tel = "123";
        }
       // System.out.println(tel);

        //第三方接口,可以返回运营商,归属地,后者只精确到省份,所以只要运营商
        String url = "https://tcc.taobao.com/cc/json/mobile_tel_segment.htm?tel=".concat(tel);

        StringBuilder json = new StringBuilder();
        try {
            URL oracel = new URL(url);
            // System.out.println(oracel);
            URLConnection yc = oracel.openConnection();
            // System.out.println(yc);
            BufferedReader in = new BufferedReader(new InputStreamReader(
                    yc.getInputStream(), "GBK"
            ));
            String inputLine = null;
            while ((inputLine = in.readLine()) != null) {
                json.append(inputLine);
            }
            in.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
        // System.out.println(json);
        String b = json.toString().replace("__GetZoneResult_ = ", "").replace("'", "\"").replace(",", ",\"").replace("{", "{\"").replace(":", "\":").replace(" ", "");

        // System.out.println(b.contains("catName"));
        String[] result = new String[2];
        
        //谷歌的包,可以根据手机号得到归属地,精确到市
        PhoneNumberUtil phoneNumberUtil = PhoneNumberUtil.getInstance();
        PhoneNumberOfflineGeocoder phoneNumberOfflineGeocoder = PhoneNumberOfflineGeocoder.getInstance();

        String language = "CN";
        Phonenumber.PhoneNumber referencePhonenumber = null;


        try {
            referencePhonenumber = phoneNumberUtil.parse(tel, language);
        } catch (Exception e) {
            e.printStackTrace();
        }

        String city = phoneNumberOfflineGeocoder.getDescriptionForNumber(referencePhonenumber, Locale.CHINA);

        result[1] = city;
        
        //判断第三方接口返回的数据是否正确
        if (b.contains("catName")) {
            JsonParser jsonParser = new JsonParser();
            JsonObject jsonObject = jsonParser.parse(b).getAsJsonObject();

            // System.out.println(jsonObject.has("catName"));
            result[0] = jsonObject.get("catName").getAsString();
            return result;
        } else {
            result[0] = "未知";
            result[1] = "未知";
            return result;
        }
    }

    @Override
    public StructObjectInspector initialize(ObjectInspector[] args) throws UDFArgumentException {
        if (args.length != 2) {
            throw new UDFArgumentLengthException("ExplodeMap takes only two argument");
        }
        if (args[0].getCategory() != ObjectInspector.Category.PRIMITIVE || args[1].getCategory() != ObjectInspector.Category.PRIMITIVE ) {
            throw new UDFArgumentException("ExplodeMap takes string as a parameter");
        }

        ArrayList<String> fieldNames = new ArrayList<String>();
        ArrayList<ObjectInspector> fieldOIs = new ArrayList<ObjectInspector>();
        fieldNames.add("col1");
        fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
        fieldNames.add("col2");
        fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
        fieldNames.add("col3");
        fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
        return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs);
    }

    @Override
    public void process(Object[] objects) throws HiveException {

        String tel;
        if (objects[1] == null || objects[1] == "") {
            tel = "123";
        } else {
            tel = objects[1].toString();
        }


        String[] arr = getInfo(tel);


        String[] result = {objects[0].toString(), arr[0], arr[1]};
//        System.out.println(result[0]);
//        System.out.println(result[1]);
//        System.out.println(result[2]);
        forward(result);
    }

    @Override
    public void close() throws HiveException {

    }
}

由于hive的udtf只能单独使用,不能select col1,udtf(col2) from xxx
所以我多加了个参数,这个参数原样返回,这样使用 select udtf(col1,col2) from xxx,col1字段可以用做和其它表join


pom文件附上

<dependency>
        <groupId>org.apache.hive</groupId>
        <artifactId>hive-exec</artifactId>
        <version>1.1.0-cdh5.16.0</version>
    </dependency>

    <!-- https://mvnrepository.com/artifact/org.apache.hive/hive-common -->
    <dependency>
        <groupId>org.apache.hive</groupId>
        <artifactId>hive-common</artifactId>
        <version>1.1.0-cdh5.16.1</version>
    </dependency>

    <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-common -->
    <dependency>
        <groupId>org.apache.hadoop</groupId>
        <artifactId>hadoop-common</artifactId>
        <version>2.6.0-cdh5.16.0</version>
    </dependency>

        <dependency>
            <groupId>com.google.code.gson</groupId>
            <artifactId>gson</artifactId>
            <version>2.8.3</version>
        </dependency>

        <!-- https://mvnrepository.com/artifact/com.googlecode.libphonenumber/libphonenumber -->
        <dependency>
            <groupId>com.googlecode.libphonenumber</groupId>
            <artifactId>libphonenumber</artifactId>
            <version>8.9.9</version>
        </dependency>

        <!-- https://mvnrepository.com/artifact/com.googlecode.libphonenumber/geocoder -->
        <dependency>
            <groupId>com.googlecode.libphonenumber</groupId>
            <artifactId>geocoder</artifactId>
            <version>2.98</version>
        </dependency>

        <!-- https://mvnrepository.com/artifact/com.googlecode.libphonenumber/carrier -->
        <dependency>
            <groupId>com.googlecode.libphonenumber</groupId>
            <artifactId>carrier</artifactId>
            <version>1.88</version>
        </dependency>

        <!-- https://mvnrepository.com/artifact/com.googlecode.libphonenumber/prefixmapper -->
        <dependency>
            <groupId>com.googlecode.libphonenumber</groupId>
            <artifactId>prefixmapper</artifactId>
            <version>2.97</version>
        </dependency>
评论 5
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值