import org.jsoup.Connection; //导入方法依赖的package包/类
/**
* 按照规则,加载解析html
* @param url:加载URL
* @param paramMap:请求参数
* @param cookieMap:请求cookie
* @param ifPost:是否使用post请求
* @param tagMap:解析规则[0-选择器方式、1-id方式、2-class方式、3-tag]
* @return
*/
public static Elements loadParse(String url, Map paramMap, Map cookieMap,
boolean ifPost, Map> tagMap) {
if (!isUrl(url)) {
return null;
}
try {
// 请求设置
Connection conn = Jsoup.connect(url);
conn.userAgent("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36");
if (paramMap != null && !paramMap.isEmpty()) {
conn.data(paramMap);
}
if (cookieMap != null && !cookieMap.isEmpty()) {
conn.cookies(cookieMap);
}
conn.timeout(5000);
// 发出请求
Document html = null;
if (ifPost) {
html = conn.post();
} else {
html = conn.get();
}
// 过滤元素
Elements resultAll = new Elements();
if (tagMap != null && !tagMap.isEmpty()) {
for (Entry> tag : tagMap.entrySet()) {
int tagType = tag.getKey();
Set tagNameList = tag.getValue();
if (tagNameList != null && tagNameList.size() > 0) {
for (String tagName : tagNameList) {
if (tagType == 0) {
Elements resultSelect = html.select(tagName);// 选择器方式
resultAll.addAll(resultSelect);
} else if (tagType == 1) {
Element resultId = html.getElementById(tagName);// 元素ID方式
resultAll.add(resultId);
} else if (tagType == 2) {
Elements resultClass = html.getElementsByClass(tagName);// ClassName方式
resultAll.addAll(resultClass);
} else if (tagType == 3) {
Elements resultTag = html.getElementsByTag(tagName);// html标签方式
resultAll.addAll(resultTag);
}
}
}
}
} else {
resultAll = html.getElementsByTag("body");
}
return resultAll;
} catch (IOException e) {
logger.error("", e);
}
return null;
}