package test.jsoup;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.InetSocketAddress;
import java.net.MalformedURLException;
import java.net.Proxy;
import java.net.URL;
import org.apache.http.HttpEntity;
import org.apache.http.HttpHost;
import org.apache.http.HttpResponse;
import org.apache.http.auth.AuthScope;
import org.apache.http.auth.Credentials;
import org.apache.http.auth.UsernamePasswordCredentials;
import org.apache.http.client.AuthCache;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.CredentialsProvider;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.protocol.HttpClientContext;
import org.apache.http.impl.auth.BasicScheme;
import org.apache.http.impl.client.BasicAuthCache;
import org.apache.http.impl.client.BasicCredentialsProvider;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
/**
* <pre>
* 业务名:三种获取页面信息的方法
* 功能说明:
* 编写日期: 2016年2月2日
* 作者: liuyx
*
* 历史记录
* 1、修改日期:
* 修改人:
* 修改内容:
* </pre>
*/
public class TestJsoup
{
/**
* 代理服务器地址
*/
private static String host = "proxy.william.com";
/**
* 代理服务器断开
*/
private static String port = "8080";
/**
* 账号
*/
private static String username = "";
/**
* 密码
*/
private static String password = "";
public static void main(String[] args) throws Exception
{
String url = "http://www.baidu.com";
Document doc = getHTMLByHttpClient(url);
System.out.println(doc);
}
/**
* 方法说明:jsoup 获取页面信息 since jsoup1.8.3
*
* @param url
* @return
* @throws IOException
*/
private static Document getHTMLByJsoup(String url) throws IOException
{
System.getProperties().setProperty("proxySet", "true");
System.getProperties().setProperty("http.proxyHost", host);
System.getProperties().setProperty("http.proxyPort", port);
// System.getProperties().setProperty("http.proxyUser", username);
// System.getProperties().setProperty("http.proxyPassword", password);
System.getProperties().setProperty("http.nonProxyHosts", "localhost|127.0.0.1");
return Jsoup.connect(url).get();
}
/**
* 方法说明:java.net 获取页面信息
*
* @param url
* @return
*/
private static Document getHTMLByNet(String url)
{
StringBuffer tmp = new StringBuffer();
URL _url;
HttpURLConnection uc;
try {
_url = new URL(url);
Proxy proxy = new Proxy(Proxy.Type.HTTP,
new InetSocketAddress(host, Integer.valueOf(port)));
uc = (HttpURLConnection) _url.openConnection(proxy);
// String encoded = new String(
// Base64.encodeBase64(new String(username + ":" + password).getBytes()));
// uc.setRequestProperty("Proxy-Authorization", "Basic " + encoded);
uc.connect();
String line = null;
BufferedReader in = new BufferedReader(new InputStreamReader(uc.getInputStream()));
while ((line = in.readLine()) != null) {
tmp.append(line);
}
} catch (MalformedURLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return Jsoup.parse(tmp.toString());
}
/**
* 方法说明:httpClient 获取页面信息 since httpClient4.4
*
* @param url
* @return
*/
private static Document getHTMLByHttpClient(String url)
{
CloseableHttpClient httpclient = HttpClients.createDefault();
StringBuffer sb = new StringBuffer();
try {
HttpResponse response = httpclient.execute(new HttpHost(host, Integer.valueOf(port)),
new HttpGet(url));
// HttpResponse response = httpclient.execute(new HttpHost(host,
// Integer.valueOf(port)),new HttpGet(url),createBasicAuthContext(username, password));
HttpEntity entry = response.getEntity();
if (entry != null) {
InputStreamReader is = new InputStreamReader(entry.getContent());
BufferedReader br = new BufferedReader(is);
String str = null;
while ((str = br.readLine()) != null) {
sb.append(str.trim());
}
br.close();
}
} catch (ClientProtocolException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return Jsoup.parse(sb.toString());
}
/**
* 方法说明:httpClient 封装认证信息 since httpClient4.4
*
* @param username
* @param password
* @return
*/
private static HttpClientContext createBasicAuthContext(String username, String password)
{
CredentialsProvider credsProvider = new BasicCredentialsProvider();
Credentials defaultCreds = new UsernamePasswordCredentials(username, password);
credsProvider.setCredentials(new AuthScope(host, Integer.valueOf(port)), defaultCreds);
AuthCache authCache = new BasicAuthCache();
BasicScheme basicAuth = new BasicScheme();
authCache.put(new HttpHost(host, Integer.valueOf(port)), basicAuth);
HttpClientContext context = HttpClientContext.create();
context.setCredentialsProvider(credsProvider);
context.setAuthCache(authCache);
return context;
}
}
pom.xml
<dependency>
<!-- jsoup HTML parser library @ http://jsoup.org/ -->
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.8.3</version>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.4</version>
</dependency>