HtmlUnitUtils

maven

 <!-- 模拟浏览器请求 -->
        <dependency>
            <groupId>net.sourceforge.htmlunit</groupId>
            <artifactId>htmlunit</artifactId>
            <version>2.46.0</version>
        </dependency>

Ip.class

public class Ip {

    protected int hash;

    protected String supply;

    protected String ip;

    protected int port;

    protected long expire = -1;

    protected long errors = 0;

    public Ip() {
        super();
    }

    public Ip(String supply, String ip, int port, long expire, long errors) {
        super();
        this.supply = supply;
        this.ip = ip;
        this.port = port;
        this.expire = expire;
        this.errors = errors;
        this.hash = (this.ip + this.port).hashCode();
    }

    public synchronized void error() {
        this.errors += 1;
    }

    public String key() {
        return this.ip + ":" + this.port;
    }

    public String getSupply() {
        return supply;
    }

    public void setSupply(String supply) {
        this.supply = supply;
    }

    public String getIp() {
        return ip;
    }

    public void setIp(String ip) {
        this.ip = ip;
        this.hash = (this.ip + this.port).hashCode();
    }

    public int getPort() {
        return port;
    }

    public void setPort(int port) {
        this.port = port;
        this.hash = (this.ip + this.port).hashCode();
    }

    public long getExpire() {
        return expire;
    }

    public void setExpire(long expire) {
        this.expire = expire;
    }

    public long getErrors() {
        return errors;
    }

    public void setErrors(long errors) {
        this.errors = errors;
    }

    @Override
    public Ip clone() {
        return new Ip(supply, ip, port, expire, errors);
    }

    @Override
    public int hashCode() {
        return (this.ip + this.port).hashCode();
    }

    @Override
    public boolean equals(Object obj) {
        if (!(obj instanceof Ip)) {
            return false;
        }

        Ip _ip = (Ip) obj;
        return (this.ip + this.port).equals(_ip.ip + _ip.port);
    }

    @Override
    public String toString() {
        return this.ip + ":" + this.port;
    }

}

HtmlUnitUtils

import com.gargoylesoftware.htmlunit.*;
import com.gargoylesoftware.htmlunit.html.HtmlImage;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
import com.gargoylesoftware.htmlunit.util.NameValuePair;
import org.apache.commons.io.IOUtils;
import org.apache.logging.log4j.Logger;

import javax.imageio.ImageIO;
import javax.imageio.ImageReader;
import java.awt.image.BufferedImage;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;

/**
 * 浏览器模拟操作工具
 */
public class HtmlUnitUtils {

    protected static final String User_Agent = "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko)Chrome/62.0.3202.89 Safari/537.36";

    /**
     * 获取 WebClient 连接实例
     *
     * @return
     */
    public static WebClient getConnection(Ip ip) {
        WebClient webClient = new WebClient();
        init(webClient); //加载配置
        if (ip != null) {
            setProxy(webClient, ip.getIp(), ip.getPort()); //设置代理IP
        }
        return webClient;
    }

    /**
     * 获取 WebClient 连接实例
     *
     * @return
     */
    public static WebClient getConnection(Ip ip,WebClient webClient) {
        if (webClient == null){
            webClient = new WebClient();
            init(webClient); //加载配置
        }

        if (ip != null) {
            setProxy(webClient, ip.getIp(), ip.getPort()); //设置代理IP
        }
        return webClient;
    }

    /**
     * Get请求
     *
     * @param url
     * @return HtmlPage
     * @throws Exception
     */
    public static HtmlPage sendGetRequest(WebClient webClient, String url) throws Exception {
        WebRequest webRequest = GetRequest(url);
        HtmlPage htmlPage = webClient.getPage(webRequest);
        return htmlPage;
    }

    /**
     * Get请求
     *
     * @param url
     * @return HtmlPage
     * @throws Exception
     */
    public static WebRequest GetRequest(String url) throws Exception {
        WebRequest webRequest = new WebRequest(new URL(url));
        webRequest.setAdditionalHeader("User-Agent", User_Agent);
        webRequest.setHttpMethod(HttpMethod.GET);
        return webRequest;
    }

    /**
     * Post 请求
     *
     * @param url
     * @param params
     * @return HtmlPage
     * @throws Exception
     */
    public static HtmlPage sendPostRequest(WebClient webClient, String url, Map<String, Object> params) throws Exception {
        WebRequest webRequest = PostRequest(url, params);
        HtmlPage htmlPage = webClient.getPage(webRequest);
        return htmlPage;
    }

    /**
     * Post 请求
     *
     * @param url
     * @param params
     * @return HtmlPage
     * @throws Exception
     */
    public static WebRequest PostRequest(String url, Map<String, Object> params) throws Exception {
        WebRequest webRequest = new WebRequest(new URL(url));
        webRequest.setHttpMethod(HttpMethod.POST);
        webRequest.setAdditionalHeader("User-Agent", User_Agent);
        if (params != null && params.size() > 0) {
            List<NameValuePair> lnv = new ArrayList<>();
            for (Map.Entry<String, Object> param : params.entrySet()) {
                String key = param.getKey();
                Object value = param.getValue();
                if (value instanceof List<?>) {
                    List<?> subInputs = (List<?>) value;
                    for (Object subInput : subInputs) {
                        NameValuePair nv = new NameValuePair(key, String.valueOf(subInput));
                        lnv.add(nv);
                    }
                }
                NameValuePair nv = new NameValuePair(key, String.valueOf(value));
                lnv.add(nv);
            }
            webRequest.setRequestParameters(lnv);
        }
        return webRequest;
    }

    /**
     * 将 HtmlPage 转化为 String
     *
     * @param page
     * @return
     * @throws IOException
     */
    public static String getPageToString(HtmlPage page) throws IOException {
        return new String(getPageToByte(page), "utf-8");
    }

    /**
     * 将 HtmlPage 转化为 byte
     *
     * @param page
     * @return
     * @throws IOException
     */
    public static byte[] getPageToByte(HtmlPage page) throws IOException {
        byte[] responseContent = null;
        WebResponse webResponse = null;
        try {
            webResponse = page.getWebResponse();
            int status = webResponse.getStatusCode();
            // 读取数据内容
            if (status == 200) {
                if (page.isHtmlPage()) {
                    // 等待JS执行完成
                    responseContent = page.asXml().getBytes();
                } else {
                    InputStream bodyStream = webResponse.getContentAsStream();
                    responseContent = IOUtils.toByteArray(bodyStream);
                    bodyStream.close();
                }
            }
        } catch (IOException e) {
            throw new IOException(e);
        } finally {
            if (webResponse != null) {
                // 关闭响应流
                webResponse.cleanUp();
            }
        }
        return responseContent;
    }

    /***
     * 加载配置
     */
    private static void init(WebClient webClient) {
        // 1 启动JS
        webClient.getOptions().setJavaScriptEnabled(false);
        // 2 禁用Css,可避免自动二次请求CSS进行渲染
        webClient.getOptions().setCssEnabled(false);
        // 3 启动客户端重定向
        webClient.getOptions().setRedirectEnabled(true);
        // 4 js运行错误时,是否抛出异常
        webClient.getOptions().setThrowExceptionOnScriptError(false);
        // 5 设置超时
        webClient.getOptions().setTimeout(60000);
        //6 设置忽略证书
        webClient.getOptions().setUseInsecureSSL(true);
        //7 设置Ajax
        webClient.setAjaxController(new NicelyResynchronizingAjaxController());
        //8设置cookie
        webClient.getCookieManager().setCookiesEnabled(true);
//        webClient.waitForBackgroundJavaScript(600*1000);
//        webClient.getOptions().setThrowExceptionOnFailingStatusCode(false);
//        webClient.getOptions().setActiveXNative(false);
//        webClient.getOptions().setMaxInMemory(1024*1024*1024);
//        webClient.getOptions().setHistoryPageCacheLimit(1024);
//        webClient.getOptions().setHistorySizeLimit(1000);

    }

    /**
     * 设置代理IP
     */
    private static void setProxy(WebClient webClient, String address, int port) {
        ProxyConfig proxyConfig = webClient.getOptions().getProxyConfig();
        proxyConfig.setProxyHost(address);
        proxyConfig.setProxyPort(port);

        DefaultCredentialsProvider credentialsProvider = (DefaultCredentialsProvider) webClient
                .getCredentialsProvider();
        credentialsProvider.addCredentials("YsProxy", "YsProxy@0023");
    }

    /**
     * 获取验证码
     *
     * @param ta
     * @param image
     * @return
     * @throws IOException
     */
    public static byte[] getCode(String ta, HtmlImage image, Logger logger) throws Exception {
        String code = "";
        ImageReader imageReader = image.getImageReader();
        BufferedImage bufferedImage = imageReader.read(0);
        ByteArrayOutputStream out = new ByteArrayOutputStream();
        ImageIO.write(bufferedImage, "png", out);
        byte[] bytes = out.toByteArray();
        return bytes;
    }

    /**
     * 保存图片到本地
     *
     * @param img
     * @param name
     * @throws IOException
     */
    public void saveImage(HtmlImage img, String name) throws IOException {
        ImageReader imageReader = img.getImageReader();
        BufferedImage bufferedImage = imageReader.read(0);
        BufferedImage inputbig = new BufferedImage(160, 60, BufferedImage.TYPE_INT_BGR);
        inputbig.getGraphics().drawImage(bufferedImage, 0, 0, 160, 60, null); //画图
        File file2 = new File("C:\\users\\Desktop\\" + name + ".png");
        ImageIO.write(inputbig, "png", file2);
    }

}
  1. 需要代理,new 一个ip并赋值ip和port,否则为空即可
// 片段
Ip ip = null;
if (proxyEnable){
    ip = new Ip();
    ip.setIp(proxyHostname);
    ip.setPort(proxyPort);
}
webClient = HtmlUnitUtils.getConnection(ip);
  1. 自定义WebClient
// 片段
 // 模拟一个浏览器
        WebClient webClient = new WebClient(BrowserVersion.CHROME);
        // 设置webClient的相关参数
        webClient.setCssErrorHandler(new SilentCssErrorHandler());
        //设置ajax
        webClient.setAjaxController(new NicelyResynchronizingAjaxController());
        //设置支持js
        webClient.getOptions().setJavaScriptEnabled(true);
        //CSS渲染禁止
        webClient.getOptions().setCssEnabled(false);
        //设置忽略证书
        webClient.getOptions().setUseInsecureSSL(true);
        //超时时间
        webClient.getOptions().setTimeout(50000);
        //设置js抛出异常:false
        webClient.getOptions().setThrowExceptionOnScriptError(false);
        //允许重定向
        webClient.getOptions().setRedirectEnabled(true);
        //允许cookie
        webClient.getCookieManager().setCookiesEnabled(true);
        Ip ip = new Ip();
        WebClient connection = HtmlUnitUtils.getConnection(ip, webClient);
        // 模拟浏览器打开一个目标网址
//        HtmlPage page = connection.getPage("https://api.twitter.com/oauth/authorize?oauth_token=JrbVIAAAAAABASakAAABdsxFCaI");
        HtmlPage page = connection.getPage("https://botometer.osome.iu.edu/");
        //等待js加载完全
//        connection.waitForBackgroundJavaScript(10000*3);
        System.out.println(page.asText());
        System.out.println(page.asXml());
        HtmlAnchor a = page.getFirstByXPath("/html/body/nav/div/div[2]/ul/li[7]/ul/li/a");
        System.out.println(a.asXml());
        HtmlPage click = a.click();
        webClient.waitForBackgroundJavaScript(2000);
        System.out.println("-------------------------------");
        System.out.println(click.asXml());
        System.out.println("-------------------------------");
        System.out.println(click.asText());
        System.out.println("-------------------------------");
        System.out.println(click.getUrl().toString());
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值