Java网络爬虫crawler4j学习笔记<13> AuthInfo类

源代码

package edu.uci.ics.crawler4j.crawler.authentication;

import javax.swing.text.html.FormSubmitEvent.MethodType;
import java.net.MalformedURLException;
import java.net.URL;

/**
 * Created by Avi Hayun on 11/23/2014.
 *
 * Abstract class containing authentication information needed to login into a user/password protected site<br>
 * This class should be extended by specific authentication types like form authentication and basic authentication etc<br>
 * <br>
 * This class contains all of the mutual authentication data for all authentication types
 */
// 用于登录验证的抽象类,主要包括form和base的authentication。
public abstract class AuthInfo {
  public enum AuthenticationType {
    BASIC_AUTHENTICATION, FORM_AUTHENTICATION
  }

  protected AuthenticationType authenticationType;  //验证类型
  protected MethodType httpMethod;  // 包括MethodType.GET和MethodType.Post 
  protected String protocol;        // 协议
  protected String host;            // 主机域名
  protected String loginTarget;     // path
  protected int port;               // 端口
  protected String username;
  protected String password;

  /** Constructs a new AuthInfo. */
  public AuthInfo() {
  }

  /**
   * This constructor should only be used by extending classes
   *
   * @param authenticationType Pick the one which matches your authentication
   * @param httpMethod Choose POST / GET
   * @param loginUrl Full URL of the login page
   * @param username Username for Authentication
   * @param password Password for Authentication
   *
   * @throws MalformedURLException Make sure your URL is valid
   */
  protected AuthInfo(AuthenticationType authenticationType, MethodType httpMethod, String loginUrl, String username, String password) throws MalformedURLException {
    this.authenticationType = authenticationType;
    this.httpMethod = httpMethod;
    URL url = new URL(loginUrl);
    this.protocol = url.getProtocol();
    this.host = url.getHost();
    this.port = url.getDefaultPort();
    this.loginTarget = url.getFile();

    this.username = username;
    this.password = password;
  }

  /**
   * @return Authentication type (BASIC, FORM)
   */
  public AuthenticationType getAuthenticationType() {
    return authenticationType;
  }

  /**
   *
   * @param authenticationType Should be set only by extending classes (BASICAuthInfo, FORMAuthInfo)
   */
  public void setAuthenticationType(AuthenticationType authenticationType) {
    this.authenticationType = authenticationType;
  }

  /**
   *
   * @return httpMethod (POST, GET)
   */
  public MethodType getHttpMethod() {
    return httpMethod;
  }

  /**
   * @param httpMethod Should be set by extending classes (POST, GET)
   */
  public void setHttpMethod(MethodType httpMethod) {
    this.httpMethod = httpMethod;
  }

  /**
   * @return protocol type (http, https)
   */
  public String getProtocol() {
    return protocol;
  }

  /**
   * @param protocol Don't set this one unless you know what you are doing (protocol: http, https)
   */
  public void setProtocol(String protocol) {
    this.protocol = protocol;
  }

  /**
   * @return host (www.sitename.com)
   */
  public String getHost() {
    return host;
  }

  /**
   * @param host Don't set this one unless you know what you are doing (sets the domain name)
   */
  public void setHost(String host) {
    this.host = host;
  }

  /**
   * @return file/path which is the rest of the url after the domain name (eg: /login.php)
   */
  public String getLoginTarget() {
    return loginTarget;
  }

  /**
   * @param loginTarget Don't set this one unless you know what you are doing (eg: /login.php)
   */
  public void setLoginTarget(String loginTarget) {
    this.loginTarget = loginTarget;
  }

  /**
   * @return port number (eg: 80, 443)
   */
  public int getPort() {
    return port;
  }

  /**
   * @param port Don't set this one unless you know what you are doing (eg: 80, 443)
   */
  public void setPort(int port) {
    this.port = port;
  }

  /**
   * @return username used for Authentication
   */
  public String getUsername() {
    return username;
  }

  /**
   * @param username username used for Authentication
   */
  public void setUsername(String username) {
    this.username = username;
  }

  /**
   * @return password used for Authentication
   */
  public String getPassword() {
    return password;
  }

  /**
   * @param password password used for Authentication
   */
  public void setPassword(String password) {
    this.password = password;
  }
}

分析

如果需要登录验证,可以继承AuthInfo类来实现自定义的爬虫登录功能。

  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值