java代码模拟ajax请求_模拟ajax实现网络爬虫——HtmlUnit

package com.lanyotech.www.wordbank;

import java.io.FileOutputStream;

import java.io.IOException;

import java.io.InputStream;

import java.io.OutputStream;

import java.net.MalformedURLException;

import java.util.List;

import com.gargoylesoftware.htmlunit.FailingHttpStatusCodeException;

import com.gargoylesoftware.htmlunit.NicelyResynchronizingAjaxController;

import com.gargoylesoftware.htmlunit.ScriptResult;

import com.gargoylesoftware.htmlunit.WebClient;

import com.gargoylesoftware.htmlunit.html.HtmlOption;

import com.gargoylesoftware.htmlunit.html.HtmlPage;

import com.gargoylesoftware.htmlunit.html.HtmlSelect;

public class WorldBankCrawl {

private static String TARGET_URL = "http://databank.worldbank.org/ddp/home.do";

public static void main(String[] args) throws FailingHttpStatusCodeException, MalformedURLException, IOException {

//模拟一个浏览器

WebClient webClient = new WebClient();

//设置webClient的相关参数

webClient.setJavaScriptEnabled(true);

webClient.setCssEnabled(false);

webClient.setAjaxController(new NicelyResynchronizingAjaxController());

webClient.setTimeout(35000);

webClient.setThrowExceptionOnScriptError(false);

//模拟浏览器打开一个目标网址

HtmlPage rootPage= webClient.getPage(TARGET_URL);

//获取第一个数据库

HtmlSelect hs = (HtmlSelect) rootPage.getElementById("lstCubes");

//按要求选择第一个数据库

hs.getOption(0).setSelected(true);

//模拟点击Next按钮,跳转到第二个页面

System.out.println("正在跳转…");

//执行按钮出发的js事件

ScriptResult sr = rootPage.executeJavaScript("javascript:setCubeData(2,-1,4,'/ddp');");

//跳转到第二个页面,选择国家

HtmlPage countrySelect = (HtmlPage) sr.getNewPage();

//获得包含全部国家信息的选择框页面

HtmlPage framePage=(HtmlPage)countrySelect.getFrameByName("frmTree1″).getEnclosedPage();

//获得selectAll按钮,触发js事件

framePage.executeJavaScript("javascript:TransferListAll(‘countrylst','countrylstselected','no');SetSelectedCount(‘countrylstselected','tdcount');");

//获取Next按钮,触发js事件

ScriptResult electricityScriptResult = framePage.executeJavaScript("javascript:wrapperSetCube('/ddp')");

System.out.println("正在跳转…");

//跳转到下一个页面electricitySelect

HtmlPage electricitySelect = (HtmlPage) electricityScriptResult.getNewPage();

//获得electricity选择的iframe

HtmlPage electricityFrame = (HtmlPage) electricitySelect.getFrameByName("frmTree1″).getEnclosedPage();

//获得选择框

HtmlSelect seriesSelect = (HtmlSelect) electricityFrame.getElementById("countrylst");

//获得所有的选择框内容

List optionList = seriesSelect.getOptions();

//将指定的选项选中

optionList.get(1).setSelected(true);

//模拟点击select按钮 electricityFrame.executeJavaScript("javascript:TransferList('countrylst','countrylstselected','no');SetSelectedCount('countrylstselected','tdcount');");

//获取选中后,下面的选择框

HtmlSelect electricitySelected = (HtmlSelect) electricityFrame.getElementById("countrylstselected");

List list = electricitySelected.getOptions();

//模拟点击Next按钮,跳转到选择时间的页面

ScriptResult timeScriptResult = electricityFrame.executeJavaScript("javascript:wrapperSetCube('/ddp')");

System.out.println("正在跳转…");

HtmlPage timeSelectPage = (HtmlPage) timeScriptResult.getNewPage();

//获取选中时间的选择框

timeSelectPage = (HtmlPage) timeSelectPage.getFrameByName("frmTree1″).getEnclosedPage();

//选中所有的时间 timeSelectPage.executeJavaScript("javascript:TransferListAll('countrylst','countrylstselected','no');SetSelectedCount('countrylstselected','tdcount');");

//点击Next按钮

ScriptResult exportResult = timeSelectPage.executeJavaScript("javascript:wrapperSetCube('/ddp')");

System.out.println("正在跳转…");

//转到export页面

HtmlPage exportPage = (HtmlPage) exportResult.getNewPage();

//点击页面上的Export按钮,进入下载页面

ScriptResult downResult = exportPage.executeJavaScript("javascript:exportData('/ddp' ,'EXT_BULK' ,'WDI_Time=51||WDI_Series=1||WDI_Ctry=244||' );");

System.out.println("正在跳转…");

HtmlPage downLoadPage = (HtmlPage) downResult.getNewPage();

//点击Excel图标,开始下载

ScriptResult downLoadResult = downLoadPage.executeJavaScript("javascript:exportData('/ddp','BULKEXCEL');");

//下载Excel文件

InputStream is = downLoadResult.getNewPage().getWebResponse().getContentAsStream();

OutputStream fos = new FileOutputStream("d://test.xls");

byte[] buffer=new byte[1024*30];

int len=-1;

while((len=is.read(buffer))>0){

fos.write(buffer, 0, len);

}

fos.close();

fos.close();

System.out.println("Success!");

}

}

注释:

/**HtmlUnit请求web页面*/

WebClient wc = new WebClient();

wc.getOptions().setJavaScriptEnabled(true); //启用JS解释器,默认为true

wc.getOptions().setCssEnabled(false); //禁用css支持

wc.getOptions().setThrowExceptionOnScriptError(false); //js运行错误时,是否抛出异常

wc.getOptions().setTimeout(10000); //设置连接超时时间 ,这里是10S。如果为0,则无限期等待

HtmlPage page = wc.getPage("http://cq.qq.com/baoliao/detail.htm?294064");

String pageXml = page.asXml(); //以xml的形式获取响应文本

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值