第一种方式,通过正则来获取
第二种:通过jsoup来抓取
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.13.1</version>
</dependency>
mport java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class GrabUtil {
private static final Pattern p = Pattern.compile("<input name=\"transaction_id\" type=\"hidden\" value=\"(.{8})");
public static void main(String args[]) {
String html = "<!DOCTYPE html>" +
"<html ng-app=\"\">" +
"<head>" +
" <title>授权访问您的帐户 - Worktile</title>" +
" <link rel=\"stylesheet\" type=\"text/css\"" +
" href=\"/dist/open.min.css?v=7.8.3\">" +
" <link rel=\"Shortcut Icon\" href=\"/image/favicon/favicon.ico\">" +
"</head>" +
" <form action=\"/oauth2/decision\" method=\"post\">" +
" <input name=\"grant_type\" type=\"hidden\" value=\"code\">" +
" <input name=\"team_id\" type=\"hidden\" value=\"5704def7c8b59ad142f6d33b\">" +
" <input name=\"transaction_id\" type=\"hidden\" value=\"1UqdZnRk\">" +
"" +
" <div class=\"form-group open-oauth-btn\">" +
" <input type=\"submit\" value=\"授权\" id=\"allow\" class=\"btn btn-primary open-oauth\">" +
" <input type=\"submit\" value=\"取消\" name=\"cancel\" id=\"deny\" class=\"btn btn-primary cancel\">" +
" </div>" +
" </form>" +
"</div>" +
"" +
"</body>" +
"</html>";
// 简单示例,相当于String html=getHtml(String urlString);
List resultList = getContext(html);
System.out.println("取得的信息为:"+resultList.get(0));
}
/**
* 通过正则表达式来提取内容
* @param html 要解析的html文档内容
* @return 解析结果,可以多次匹配,每次匹配的结果按文档中出现的先后顺序添加进结果List
*/
public static List<String> getContext(String html) {
List<String> resultList = new ArrayList();
Matcher m = p.matcher(html);//开始编译
while (m.find()) {
resultList.add(m.group(1));
}
return resultList;
}
/**
* 通过 Jsoup来提取网页内容
* @param html 要解析的html文档内容
* @return
*/
public static List<String> getContext2(String html) {
List<String> resultList = new ArrayList();
Document doc = Jsoup.parse(html);
Elements items = doc.getElementsByAttributeValue("name", "transaction_id");
resultList.add(items.get(0).val());
return resultList;
}
}