Java提取网络URL一级二级三级域名

package xxx;

import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.lang.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class URLUtil {
	
	private static final Logger LOGGER = LoggerFactory.getLogger(URLUtil.class);

	private static final String RE_TOP_DOMAIN = "(com\\.cn|net\\.cn|gov\\.cn|org\\.nz|org\\.cn|com|net|org|gov)";

    // 一级域名提取
    private static final String RE_TOP_1 = "(\\w*\\.?){1}\\." + RE_TOP_DOMAIN;

    // 二级域名提取
    private static final String RE_TOP_2 = "(\\w*\\.?){2}\\." + RE_TOP_DOMAIN;

    // 三级域名提取
    private static final String RE_TOP_3 = "(\\w*\\.?){3}\\." + RE_TOP_DOMAIN;

    private static final Pattern PATTEN_IP = Pattern.compile("((http://)|(https://))?((\\d+\\.){3}(\\d+))");
    private static final Pattern PATTEN_TOP1 = Pattern.compile(RE_TOP_1);
    private static final Pattern PATTEN_TOP2 = Pattern.compile(RE_TOP_2);
    private static final Pattern PATTEN_TOP3 = Pattern.compile(RE_TOP_3);

    public static String getDomain(String url, int level) {
    	try {
    		URL u = new URL(url);
        	if ("localhost".equalsIgnoreCase(u.getHost())) {
        		return u.getHost();
        	}
		} catch (Exception e) {
			LOGGER.error(url, e);
		}
    	
        Matcher matcher = PATTEN_IP.matcher(url);
        if (matcher.find()){
            return matcher.group(4);
        }

        switch (level) {
            case 1:
                matcher = PATTEN_TOP1.matcher(url);
                break;
            case 2:
                matcher = PATTEN_TOP2.matcher(url);
                break;
            case 3:
                matcher = PATTEN_TOP3.matcher(url);
                break;
            default:
                return "";
        }
        
        if (matcher.find()) {
            return matcher.group(0);
        }
        
        return "";
    }
    
    /**
     * 获取参数对
     * @param url
     * @return
     */
    public static List<String> getParamPairs(String query){
    	List<String> params = new ArrayList<>();
    	try {
    		query = query.trim();
			if (StringUtils.isNotBlank(query)) {
				String arr[] = query.split("&");
				for (String e :arr) {
					params.add(e);
				}
			}
		} catch (Exception e) {
			LOGGER.error(query, e);
		}
    	
    	return params;
    }
    
    /**
     * 获取host前缀
     * @param url
     * @return
     */
    public static String getHostPrefix(String url) {
    	String search = ".";
    	try {
    		URL u = new URL(url);
    		String host = u.getHost();
    		if (!host.contains(search)) {
    			return host;
    		}
    		
    		try {
    			Long.parseLong(host.replace(search, ""));
    			return host;
			} catch (Exception e) {
			}

    		return host.split("\\.")[0];
		} catch (Exception e) {
			LOGGER.error(url, e);
		}
    	
    	return null;
    }
}

评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值