package xxx;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class URLUtil {
private static final Logger LOGGER = LoggerFactory.getLogger(URLUtil.class);
private static final String RE_TOP_DOMAIN = "(com\\.cn|net\\.cn|gov\\.cn|org\\.nz|org\\.cn|com|net|org|gov)";
// 一级域名提取
private static final String RE_TOP_1 = "(\\w*\\.?){1}\\." + RE_TOP_DOMAIN;
// 二级域名提取
private static final String RE_TOP_2 = "(\\w*\\.?){2}\\." + RE_TOP_DOMAIN;
// 三级域名提取
private static final String RE_TOP_3 = "(\\w*\\.?){3}\\." + RE_TOP_DOMAIN;
private static final Pattern PATTEN_IP = Pattern.compile("((http://)|(https://))?((\\d+\\.){3}(\\d+))");
private static final Pattern PATTEN_TOP1 = Pattern.compile(RE_TOP_1);
private static final Pattern PATTEN_TOP2 = Pattern.compile(RE_TOP_2);
private static final Pattern PATTEN_TOP3 = Pattern.compile(RE_TOP_3);
public static String getDomain(String url, int level) {
try {
URL u = new URL(url);
if ("localhost".equalsIgnoreCase(u.getHost())) {
return u.getHost();
}
} catch (Exception e) {
LOGGER.error(url, e);
}
Matcher matcher = PATTEN_IP.matcher(url);
if (matcher.find()){
return matcher.group(4);
}
switch (level) {
case 1:
matcher = PATTEN_TOP1.matcher(url);
break;
case 2:
matcher = PATTEN_TOP2.matcher(url);
break;
case 3:
matcher = PATTEN_TOP3.matcher(url);
break;
default:
return "";
}
if (matcher.find()) {
return matcher.group(0);
}
return "";
}
/**
* 获取参数对
* @param url
* @return
*/
public static List<String> getParamPairs(String query){
List<String> params = new ArrayList<>();
try {
query = query.trim();
if (StringUtils.isNotBlank(query)) {
String arr[] = query.split("&");
for (String e :arr) {
params.add(e);
}
}
} catch (Exception e) {
LOGGER.error(query, e);
}
return params;
}
/**
* 获取host前缀
* @param url
* @return
*/
public static String getHostPrefix(String url) {
String search = ".";
try {
URL u = new URL(url);
String host = u.getHost();
if (!host.contains(search)) {
return host;
}
try {
Long.parseLong(host.replace(search, ""));
return host;
} catch (Exception e) {
}
return host.split("\\.")[0];
} catch (Exception e) {
LOGGER.error(url, e);
}
return null;
}
}
Java提取网络URL一级二级三级域名
最新推荐文章于 2024-08-28 13:42:17 发布