1、简介
URLConnection 是 java.net 包中的一个抽象类,其主要用于实现应用程序与 URL 之间的通信。HttpURLConnection 继承自 URLConnection,也是抽象类。在网络爬虫 中,可以使用 URLConnection 或 HttpURLConnection 请求 URL 获取流数据,通过对 流数据的操作,获取具体的实体内容。
2、实例化
URLConnection 与 HttPURLConnection 都是抽象类,无法直接创建实例化对象, 但可以通过 java.net 包 URL 类中的 openConnection()方法创建 URLConnection 与 HttPURLConnection 实例。
URL url = new URL("http://www.********.com.cn/b.asp");
URLConnection conn = url.openConnection();
HttpURLConnection httpConn = (HttpURLConnection)url.openConnection();
3、获取网页内容
要获取 URLConnection 请求到的实体内容,需通过数据流操作。在 openConnection()方法执行完毕后,通过 getInputStream()方法获取输入流,之后采用 BufferedReader 读取输入流信息。
public static void main(String[] args) throws IOException {
URL url = new URL("http://www.baidu.com");
//URLConnection conn = url.openConnection();
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
//获取流数据
InputStream in = conn.getInputStream();
// 定义BufferedReader输入流来读取响应实体内容
BufferedReader reader = new BufferedReader(new InputStreamReader(in));
String line;
String html = "";
while ((line = reader.readLine()) != null) {
html += line;
}
System.out.println(html);
reader.close();
}
4、GET请求
针对实例化的 HttpURLConnection,可以使用 setRequestMethod(String method)方 法设置 HTTP 请求方法,其可设置的请求方法包括 GET、POST、HEAD、OPTIONS、 PUT、DELETE 以及 TRACE。程序演示了设置 GET 的操作。 setDoInput(true)表示 URL 连接可用于输入,setRequestMethod(“GET”)表示设置的请求方法为 GET。基于 getResponseCode()方法可以获取响应状态码,如果该状态码为 200,则利用实例化的 StringBuffer 将响应内容读取出来。
public class GetDemo {
public static void main(String[] args) throws IOException {
//初始化 URL
URL url = new URL("http://www.********.com.cn/b.asp");
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
//允许Input
conn.setDoInput(true);
conn.setRequestMethod("GET"); //设置请求的方法
conn.connect();//连接操作
int statusCode = conn.getResponseCode();//获取响应状态码
String responseBody = null;
//如果响应状态码为200
if (HttpURLConnection.HTTP_OK == statusCode) {
// 定义BufferedReader输入流来读取URL的响应,这里设置编码
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(conn.getInputStream(), "UTF8"));
//读取内容
String readLine = null;
StringBuffer response = new StringBuffer();
while (null != (readLine = bufferedReader.readLine())) {
response.append(readLine);
}
bufferedReader.close();
responseBody = response.toString();
}
System.out.println(responseBody);
}
}
5、模拟提交表单(POST请求)
在使用 POST 提交参数时,必须将 setDoOutput(boolean dooutput)方法中的参数设置为 true。
public class PostDemo {
public static void main(String[] args) throws IOException {
//POST 表单需要提交的参数
String wen = "EH629625211CS";
String action = "ajax";
//初始化URL
URL url = new URL("http://www.*****.com/ems.php");
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
//允许Output
conn.setDoOutput(true);
conn.setRequestMethod("POST"); //POST提交参数
StringBuffer params = new StringBuffer();
// 表单参数拼接
params.append("wen").append("=").append(wen).append("&") .append("action").append("=").append(action);
byte[] bypes = params.toString().getBytes();
conn.getOutputStream().write(bypes);//在连接中添加参数
// 定义BufferedReader输入流来读取URL的响应,这里设置编码
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(conn.getInputStream(), "utf-8"));
String line;
String html = "";
while ((line = bufferedReader.readLine()) != null) {
html += line;
}
System.out.println(html); bufferedReader.close();
}
}
6、设置头信息
针对初始化的 URLConnection 及 HttpURLConnection,可以使用 setRequestProperty (key,value)方法设置具体的请求头信息
public class HeaderDemo {
public static void main(String[] args) throws IOException {
//初始化 URL
URL url = new URL("http://www.********.com.cn/b.asp");
URLConnection conn = url.openConnection();
//HttpURLConnection conn = (HttpURLConnection) url.openConnection()
// 添加请求头信息
conn.setRequestProperty("Accept", "text/html");
conn.setRequestProperty("Accept-Language", "zh-CN,zh;q=0.9");
conn.setRequestProperty("Host", "www.********.com.cn");
conn.setRequestProperty("Cache-Control", "max-age=0");
conn.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome /63.0.3239.108 Safari/537.36");
conn.connect();
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(conn.getInputStream(), "gbk")); String line;
String html = "";
while ((line = bufferedReader.readLine()) != null) {
html += line;
}
System.out.println(html);
bufferedReader.close();
}
}
7、连接超时设置
使用 URLConnection 与 HttpURLConnection 时,可以设置两种超时时间,分别是连接超时时间(ConnectTimeout)和读取超时时间(ReadTimeout)
URL url = new URL("http://www.********.com.cn/b.asp"); URLConnection conn = url.openConnection();
conn.setConnectTimeout(30000); //连接超时,单位为毫秒
conn.setReadTimeout(30000); //读取超时,单位为毫秒
8、代理服务器的使用
针对 URLConnection 与 HttpURLConnection,可以使用 Proxy 设置代理
//代理的 IP 及端口设置
Proxy proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress ("171.97.67.160", 3128));
URL url = new URL("http://www.********.com.cn/b.asp"); URLConnection conn = url.openConnection(proxy); //添加代理
conn.connect(); //建立连接
9、HTTPS请求认证
public class URLConnectionSSL {
public static void main(String[] args) throws IOException {
initUnSecureTSL();
//使用URLConnection请求数据
URL url = new URL("https://cn.*******.com");
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
int statusCode = conn.getResponseCode(); //获取响应状态码
String responseBody = null;
//如果响应状态码为200
if (HttpURLConnection.HTTP_OK == statusCode) {
// 定义BufferedReader输入流来读取URL的响应,这里设置编码
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(conn.getInputStream(), "utf-8"));
//读取内容
String readLine = null;
StringBuffer response = new StringBuffer();
while (null != (readLine = bufferedReader.readLine())) {
response.append(readLine);
}
bufferedReader.close();
responseBody = response.toString();
}
System.out.println(responseBody);
}
private static void initUnSecureTSL() {
// 创建信任管理器(不验证证书)
final TrustManager[] trustAllCerts = new TrustManager[]{
new X509TrustManager() {
//检查客户端证书
public void checkClientTrusted(final X509Certificate[] chain, final String authType) {
//do nothing 接受任意客户端证书
}
//检查服务器端证书
public void checkServerTrusted(final X509Certificate[] chain, final String authType) {
//do nothing 接受任意服务端证书
}
//返回受信任的X509证书
public X509Certificate[] getAcceptedIssuers() {
return null;
}
}
};
try {
// 创建SSLContext对象,并使用指定的信任管理器初始化
SSLContext sslContext = SSLContext.getInstance("SSL");
sslContext.init(null, trustAllCerts, new java.security.SecureRandom());
//基于信任管理器,创建套接字工厂
SSLSocketFactory sslSocketFactory = sslContext.getSocketFactory();
//为HttpsURLConnection配置套接字工厂
HttpsURLConnection.setDefaultSSLSocketFactory(sslSocketFactory);
} catch (Exception e) {
e.printStackTrace();
}
}
}