下载jsoup.jar加到工程里 -> http://jsoup.org/download
分析彩票网站html源码,出奖信息大概如下:
<tr class="bgcolor1">
<td class="td1">2013-10-15</td>
<td class="td2">
<a href="/lottery/draw/view/50?phase=2013121">2013121</a>
</td>
<td class="td3"><span class="result"> <span class="ball_1">04</span>
<span class="ball_1">05</span>
<span class="ball_1">06</span>
<span class="ball_1">07</span>
<span class="ball_1">25</span>
<span class="ball_1">27</span>
<span class="ball_2">07</span>
</span></td>
<td class="td4">337,100,116</td>
</tr>
<tr class="bgcolor2">
<td class="td1">2013-10-13</td>
<td class="td2">
<a href="/lottery/draw/view/50?phase=2013120">2013120</a>
</td>
<td class="td3"><span class="result"> <span class="ball_1">05</span>
<span class="ball_1">06</span>
<span class="ball_1">13</span>
<span class="ball_1">18</span>
<span class="ball_1">23</span>
<span class="ball_1">31</span>
<span class="ball_2">11</span>
</span></td>
<td class="td4">404,684,072</td>
</tr>
get请求url如下:
http://baidu.lecai.com/lottery/draw/list/50?lottery_type=50&page=1&ds=2013-09-03&de=2013-12-03
通过上述分析,数据抓取及简单解析过程如下:
public class Test {
public static void main(String[] args) {
try
{
Document doc = Jsoup.connect("http://baidu.lecai.com/lottery/draw/list/50")
.data("ds", "2013-01-01")
.data("de", "2013-12-03")
.data("lottery_type","50")
.data("page", "1").get();
Elements elms = doc.getElementsByClass("bgcolor1");
String fmt = "日期:%s,期数:%s,号码:%s,销量:%s";
for (Element e:elms)
{
System.out.println(String.format(fmt,
e.getElementsByClass("td1").text(),
e.getElementsByClass("td2").text(),
e.getElementsByClass("td3").text(),
e.getElementsByClass("td4").text()));
}
elms = doc.getElementsByClass("bgcolor2");
for (Element e:elms)
{
System.out.println(String.format(fmt,
e.getElementsByClass("td1").text(),
e.getElementsByClass("td2").text(),
e.getElementsByClass("td3").text(),
e.getElementsByClass("td4").text()));
}
} catch (IOException e)
{
e.printStackTrace();
}
}
}
日期:2013-12-01,期数:2013141,号码:03 04 05 25 30 31 04,销量:409,703,546
日期:2013-11-26,期数:2013139,号码:07 08 11 13 21 27 08,销量:372,082,440
日期:2013-11-21,期数:2013137,号码:04 17 19 23 24 27 10,销量:379,293,130
日期:2013-11-17,期数:2013135,号码:09 23 24 25 29 31 12,销量:406,720,240
日期:2013-11-12,期数:2013133,号码:04 07 12 19 22 25 01,销量:361,709,328
日期:2013-11-07,期数:2013131,号码:04 06 12 17 19 26 09,销量:388,319,260
日期:2013-11-03,期数:2013129,号码:05 06 10 14 27 31 14,销量:418,710,844
日期:2013-10-29,期数:2013127,号码:02 03 13 20 22 33 14,销量:376,726,836
日期:2013-10-24,期数:2013125,号码:04 06 08 18 25 28 16,销量:372,984,566
日期:2013-10-20,期数:2013123,号码:01 02 06 11 17 25 02,销量:385,822,278
日期:2013-10-15,期数:2013121,号码:04 05 06 07 25 27 07,销量:337,100,116
日期:2013-10-10,期数:2013119,号码:05 15 20 22 26 32 09,销量:357,071,040
日期:2013-10-06,期数:2013117,号码:09 12 13 24 27 33 16,销量:368,579,726
日期:2013-10-01,期数:2013115,号码:03 12 16 17 18 27 08,销量:319,974,332
日期:2013-09-26,期数:2013113,号码:04 07 11 17 24 33 09,销量:353,992,568
日期:2013-11-28,期数:2013140,号码:01 05 12 13 21 22 10,销量:359,556,480
日期:2013-11-24,期数:2013138,号码:04 15 16 24 27 28 03,销量:402,650,432
日期:2013-11-19,期数:2013136,号码:04 06 14 16 18 26 06,销量:369,164,734
日期:2013-11-14,期数:2013134,号码:01 17 18 19 25 29 10,销量:372,289,714
日期:2013-11-10,期数:2013132,号码:20 21 22 23 25 27 12,销量:420,848,208
日期:2013-11-05,期数:2013130,号码:01 03 15 16 31 33 08,销量:385,080,972
日期:2013-10-31,期数:2013128,号码:07 13 17 19 25 31 08,销量:381,844,688
日期:2013-10-27,期数:2013126,号码:04 10 19 27 31 33 16,销量:408,866,622
日期:2013-10-22,期数:2013124,号码:03 09 15 23 25 30 07,销量:347,249,642
日期:2013-10-17,期数:2013122,号码:07 10 13 15 26 27 11,销量:345,544,080
日期:2013-10-13,期数:2013120,号码:05 06 13 18 23 31 11,销量:404,684,072
日期:2013-10-08,期数:2013118,号码:02 03 17 22 32 33 16,销量:343,511,272
日期:2013-10-03,期数:2013116,号码:12 15 21 26 32 33 07,销量:312,633,700
日期:2013-09-29,期数:2013114,号码:04 06 17 21 23 33 07,销量:398,506,774
日期:2013-09-24,期数:2013112,号码:01 06 12 13 22 31 07,销量:344,881,702