【需求】
域名中包含一级、二级域名等信息,需要提取根域名。
例如:www.baidu.com 提取出 baidu.com
http://write.blog..net/ 提取出 .net
【实现】
不多说,直接上代码,主要思路就是通过正则表达式解决。
import java.io.BufferedReader;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class TopDomainUtil {
private Pattern pattern;
// 定义正则表达式,域名的根需要自定义,这里不全
private static final String RE_TOP = "[\\w-]+\\.(com.cn|net.cn|gov.cn|org\\.nz|org.cn|com|net|org|gov|cc|biz|info|cn|co)\\b()*";
// 构造函数
public TopDomainUtil() {
pattern = Pattern.compile(RE_TOP , Pattern.CASE_INSENSITIVE);
}
public String getTopDomain(String url) {
String result = url;
try {
Matcher matcher = this.pattern.matcher(url);
matcher.find();
result = matcher.group();
} catch (Exception e) {
System.out.println("[getTopDomain ERROR]====>");
e.printStackTrace();
}
return result;
}
public static void main(String[] args) {
TopDomainUtil obj = new TopDomainUtil();
// 示例
String url = "www.baidu.cc";
String res1 = obj.getTopDomain(url);
System.out.println(url + " ==> " + res1);
url = "ac.asd.c.sina.com.cn";
String res2 = obj.getTopDomain(url);
System.out.println(url + " ==> " + res2);
url = "whois.chinaz.com/reverse?ddlSearchMode=1";
String res3 = obj.getTopDomain(url);
System.out.println(url + " ==> " + res3);
url = "http://write.blog..net/";
String res4 = obj.getTopDomain(url);
System.out.println(url + " ==> " + res4);
url = "http://write.test.org.nz/";
String res5 = obj.getTopDomain(url);
System.out.println(url + " ==> " + res5);
}
}
【代码执行输出】
D:\>javac -encoding utf-8 TopDomainUtil.java
D:\>java TopDomainUtil
www.baidu.cc ==> baidu.cc
ac.asd.c.sina.com.cn ==> sina.com.cn
whois.chinaz.com/reverse?ddlSearchMode=1 ==> chinaz.com
http://write.blog..net/ ==> .net
http://write.test.org.nz/ ==> test.org.nz