JAVA 爬虫 htmlunit 获取不到 某些页面form 表单, 来讨论一下

下面代码中 能获取到百度页面的Form 表单, 但是获取不到另外一个 网页的表单, 此时该如何处理?

ps:获取另外一个网页的源码保存html后,用浏览器打开,能看到 form 表单。

package com.xttx.cn.fetchpro.fetchImp;

import java.io.IOException;
import java.net.URL;
import java.util.List;

import com.gargoylesoftware.htmlunit.BrowserVersion;
import com.gargoylesoftware.htmlunit.CollectingAlertHandler;
import com.gargoylesoftware.htmlunit.FailingHttpStatusCodeException;
import com.gargoylesoftware.htmlunit.HttpWebConnection;
import com.gargoylesoftware.htmlunit.NicelyResynchronizingAjaxController;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.WebRequest;
import com.gargoylesoftware.htmlunit.html.HtmlForm;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
import com.xttx.cn.fetchpro.exception.PageNotFoundException;

public class SimulationFetch {
	protected static WebClient webClient = null;
	public  WebClient getWebClient() {
		return webClient;
	}

	public  void setWebClient(WebClient webClient) {
		this.webClient = webClient;
	}
	static WebRequest request = null;

	public static void main(String[] args) throws Exception{
		// TODO Auto-generated method stub
		BrowserVersion.FIREFOX_24.setBrowserLanguage("zh-CN");
		BrowserVersion.FIREFOX_24.setSystemLanguage("zh-CN");
		BrowserVersion.FIREFOX_24.setUserLanguage("zh-CN");
		BrowserVersion.FIREFOX_24.setUserAgent("Mozilla/5.0 (Windows NT 6.1; WOW64; rv:46.0) Gecko/20100101 Firefox/46.0");
		BrowserVersion.FIREFOX_24.setBrowserVersion(46.0f);
		BrowserVersion.FIREFOX_24.setCpuClass("x64");
		webClient = new WebClient(BrowserVersion.FIREFOX_24);

		webClient.setWebConnection(new HttpWebConnection(webClient));
		webClient.getCache().clear();
		webClient.getCookieManager().clearCookies();
		webClient.getOptions().setJavaScriptEnabled(true);
		webClient.setJavaScriptTimeout(60*1000);
		webClient.getOptions().setCssEnabled(true);
		webClient.getOptions().setActiveXNative(true);
		webClient.getOptions().setPopupBlockerEnabled(true);
		webClient.getOptions().setRedirectEnabled(true);
		webClient.getOptions().setTimeout(10000);
		webClient.getOptions().setDoNotTrackEnabled(true);
		webClient.getCookieManager().setCookiesEnabled(true);
		webClient.getOptions().setThrowExceptionOnFailingStatusCode(true);
		webClient.getOptions().setThrowExceptionOnScriptError(false);
		webClient.getOptions().setUseInsecureSSL(true);
		webClient.getOptions().setSSLInsecureProtocol("TLSv1.2");
		webClient.setAjaxController(new NicelyResynchronizingAjaxController());
		webClient.setAlertHandler(new CollectingAlertHandler());
		
		
		HtmlPage loginPageWithForm =(HtmlPage)webClient.getPage(new WebRequest(new URL("https://login.youzan.com/sso/index?service=kdt&from_source=pzshouye")));
		List<HtmlForm> htmlForm0 = loginPageWithForm.getForms();
			
		HtmlPage loginPageNoForm = (HtmlPage)webClient.getPage(new WebRequest(new URL("https://www.baidu.com/")));
		HtmlForm htmlForm = loginPageNoForm.getForms().get(0);
			
		System.out.println("---");
		
	}
}
  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值