最近刚好有个朋友想再次解析第三方网页内容,来查看交通违章信息。无意中发现一个比htmlparser更好用的html解析工具jsoup。最人性化的地方是,它支持类jquery语法,对,你没看错,是jquery选择器语法。下载地址:
http://jsoup.org/
详细使用文档,这个地址应该说的很清楚了: http://baike.baidu.com/view/4066913.htm
使用非常简单,示例如下(是读取山西交通违章信息):
详细使用文档,这个地址应该说的很清楚了: http://baike.baidu.com/view/4066913.htm
使用非常简单,示例如下(是读取山西交通违章信息):
1
import java.util.HashMap;
2 import java.util.Map;
3
4 import org.jsoup.Jsoup;
5 import org.jsoup.nodes.Document;
6 import org.jsoup.select.Elements;
7 /**
8 *
9 * @author Rocky
10 *
11 */
12 public class spider {
13 private static final String POSTURL="http://59.49.18.116:8008/sxwwpt_wai/inquire/illegalAction!carInquire.action";
14 private void spiderData() throws Exception{
15 Map<String,String> req= new HashMap<String,String>();
16 req.put("authCode", "");
17 req.put("csjcKey","110000");
18 req.put("hpzl", "02");
19 req.put("vioViolation.hphm", "xxx"); // 您的车牌号
20 req.put("type","1");
21 req.put("pagination.currentPage", "1");
22 req.put("pagination.pageSize", "5");
23
24 Document doc=Jsoup.connect(POSTURL).data(req).get();
25 Elements newsHeadlines=doc.select(".if_tr td");
26 System.out.println(newsHeadlines.text());
27 }
28 /**
29 * @param args
30 * @throws Exception
31 */
32 public static void main(String[] args) throws Exception {
33
34 spider spider= new spider();
35 spider.spiderData();
36 }
37
38 }
2 import java.util.Map;
3
4 import org.jsoup.Jsoup;
5 import org.jsoup.nodes.Document;
6 import org.jsoup.select.Elements;
7 /**
8 *
9 * @author Rocky
10 *
11 */
12 public class spider {
13 private static final String POSTURL="http://59.49.18.116:8008/sxwwpt_wai/inquire/illegalAction!carInquire.action";
14 private void spiderData() throws Exception{
15 Map<String,String> req= new HashMap<String,String>();
16 req.put("authCode", "");
17 req.put("csjcKey","110000");
18 req.put("hpzl", "02");
19 req.put("vioViolation.hphm", "xxx"); // 您的车牌号
20 req.put("type","1");
21 req.put("pagination.currentPage", "1");
22 req.put("pagination.pageSize", "5");
23
24 Document doc=Jsoup.connect(POSTURL).data(req).get();
25 Elements newsHeadlines=doc.select(".if_tr td");
26 System.out.println(newsHeadlines.text());
27 }
28 /**
29 * @param args
30 * @throws Exception
31 */
32 public static void main(String[] args) throws Exception {
33
34 spider spider= new spider();
35 spider.spiderData();
36 }
37
38 }