Java爬虫使用Apache HttpClient3.1库编写的Java爬虫代码,其中使用了http隧道代理来访问目标网址。具体代码如下:
import org.apache.commons.httpclient.Credentials;
import org.apache.commons.httpclient.HostConfiguration;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpMethod;
import org.apache.commons.httpclient.HttpStatus;
import org.apache.commons.httpclient.UsernamePasswordCredentials;
import org.apache.commons.httpclient.auth.AuthScope;
import org.apache.commons.httpclient.methods.GetMethod;
import java.io.IOException;
public class Main {
# 代理服务器
private static final String PROXY_HOST = "ip.hahado.cn";
private static final int PROXY_PORT = 31111;
public static void main(String[] args) {
HttpClient client = new HttpClient();
HttpMethod method = new GetMethod("https://httpbin.org/ip");
HostConfiguration config = client.getHostConfiguration();