1.介绍
java原生的 不需要导包 URLConnection是java.net包中的一个抽象类,其主要用于实现应用程序与URL之间的通信。HttpURLConnection继承自URLConnection,也是抽象类。在网络爬虫中,可以使用URLConnection或HttpURLConnection请求URL获取流数据,通过对流数据的操作,获取具体的实体内容
2.实例化
@Test
void contextLoads() throws Exception {
String urls = "https://**.jd.com/";
URL url = new URL(urls);
URLConnection conn = url.openConnection();
HttpURLConnection conn1 = (HttpURLConnection) url.openConnection();
}
3.获取网页内容
@Test
void contextLoads() throws Exception {
String urls = "https://**.jd.com/";
URL url = new URL(urls);
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
//获取流数据
InputStream in = conn.getInputStream();
//定义BufferedReader输入流读取响应实体内容
BufferedReader reader = new BufferedReader(new InputStreamReader(in));
String line;
String html = "";
while ((line = reader.readLine()) != null) {
html+=line;
}
System.out.println(html);
reader.close();
}
4.GET请求
针对实例化的HttpURLConnection,可以使用setRequestMethod(String method)方法设置HTTP请求方法,其可设置的请求方法包括GET、POST、HEAD、OPTIONS、PUT、DELETE以及TRACE。程序4-46演示了设置GET的操作。在程序4-46中,setDoInput(true)表示URL连接可用于输入,setRequestMethod("GET")表示设置的请求方法为GET。基于getResponseCode()方法可以获取响应状态码,如果该状态码为200,则利用实例化的StringBuffer将响应内容读取出来
@Test
void contextLoads() throws Exception {
//初始化URL
URL url = new URL("https://**.jd.com/");
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
//允许input
conn.setDoInput(true);
conn.setRequestMethod("GET"); //设置请求方式
conn.connect(); //连接操作
int statusCode = conn.getResponseCode(); //获取响应状态码
String responseBody = null;
if (HttpURLConnection.HTTP_OK == statusCode) {
//定义BufferedReader输入流来读取URL的响应 ,这里设置编码
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(conn.getInputStream()));
//读取内容
String readLine = null;
StringBuffer response = new StringBuffer();
while (null != (readLine = bufferedReader.readLine())) {
response.append(response);
}
bufferedReader.close();
responseBody = response.toString();
}
System.out.println(responseBody);
}
5.模拟提交表单(POST请求)
在使用POST提交参数时,必须将setDoOutput(boolean dooutput)方法中的参数设置为true。
@Test
void contextLoads() throws Exception {
StringBuffer params = new StringBuffer();
params.append("wen").append("=").append("ajax");
post("https://**.jd.com/", params);
}
public String post(String path, StringBuffer params) throws Exception {
//初始化URL
URL url = new URL(path);
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
//允许output
conn.setDoOutput(true);
conn.setRequestMethod("POST"); //设置请求方式
byte[] bytes = params.toString().getBytes();
conn.getOutputStream().write(bytes); //在连接中参加参数
//定义BufferedReader输入流来读取URL的响应 ,这里设置编码
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(conn.getInputStream(),
StandardCharsets.UTF_8));
String line;
String html = "";
while ((line = bufferedReader.readLine()) != null) {
html += line;
}
bufferedReader.close();
return html;
}
6.设置请求头
token也在请求头中
7.连接超时设置
8.代理服务器的使用
@Test
void contextLoads() throws Exception {
//代理的IP及端口设置
Proxy proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress("171.97.67.160", 32123));
URL url = new URL("https://**.jd.com/");
URLConnection conn = url.openConnection(proxy);//添加代理
conn.connect(); //建立连接
}
9. HTTPS请求认证
使用URLConnection与HttpURLConnection直接访问一些以https://为前缀的URL时,也会产生错误。为此,在使用URLConnection与HttpURLConnection之前,也需要创建信任管理器(忽略证书验证)。
import org.junit.jupiter.api.Test;
import org.springframework.boot.test.context.SpringBootTest;
import javax.net.ssl.*;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.Socket;
import java.net.URL;
import java.security.cert.X509Certificate;
@SpringBootTest
class JsoupApplicationTests {
@Test
void contextLoads() throws Exception {
initUnSecureTSL();
URL url = new URL("https://**.jd.com/");
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
//允许input
conn.setDoInput(true);
conn.setRequestMethod("GET"); //设置请求方式
conn.connect(); //连接操作
int statusCode = conn.getResponseCode(); //获取响应状态码
String responseBody = null;
if (HttpURLConnection.HTTP_OK == statusCode) {
//定义BufferedReader输入流来读取URL的响应 ,这里设置编码
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(conn.getInputStream()));
//读取内容
String readLine = null;
StringBuffer response = new StringBuffer();
while (null != (readLine = bufferedReader.readLine())) {
response.append(response);
}
bufferedReader.close();
responseBody = response.toString();
}
System.out.println(responseBody);
}
private static void initUnSecureTSL() {
//创建信任管理器 (不验证证书)
final TrustManager[] trustManagers = {
new X509ExtendedTrustManager() {
public void checkClientTrusted(X509Certificate[] x509Certificates, String s, Socket socket) {
}
public void checkServerTrusted(X509Certificate[] x509Certificates, String s, Socket socket) {
}
public void checkClientTrusted(X509Certificate[] x509Certificates, String s, SSLEngine sslEngine) {
}
@Override
public void checkServerTrusted(X509Certificate[] x509Certificates, String s, SSLEngine sslEngine) {
}
@Override
public void checkClientTrusted(X509Certificate[] x509Certificates, String s) {
}
@Override
public void checkServerTrusted(X509Certificate[] x509Certificates, String s) {
}
@Override
public X509Certificate[] getAcceptedIssuers() {
return new X509Certificate[0];//或者return null
}
}
};
try {
//使用指定的信任管理器初始化
final SSLContext sslContext = SSLContext.getInstance("SSL");
sslContext.init(null, trustManagers, new java.security.SecureRandom());
//基于信任管理器创建套接字工厂
final SSLSocketFactory sslSocketFactory = sslContext.getSocketFactory();
//为 HttpsURLConnection配置套接字工厂
HttpsURLConnection.setDefaultSSLSocketFactory(sslSocketFactory);
//正常访问Https协议网站
} catch (Exception e) {
e.printStackTrace();
}
}
}
10.工具类
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.nio.charset.StandardCharsets;
public class HttpURLConnectionUtil {
/**
* get 请求 需要将请求拼接
*/
public String get(String path) throws Exception {
//初始化URL
URL url = new URL(path);
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
//允许input
conn.setDoInput(true);
conn.setRequestMethod("GET"); //设置请求方式
conn.setConnectTimeout(30000); //连接超时,单位毫秒
conn.setReadTimeout(30000); //读取超时, 单位毫秒
conn.connect(); //连接操作
int statusCode = conn.getResponseCode(); //获取响应状态码
String responseBody = null;
if (HttpURLConnection.HTTP_OK == statusCode) {
//定义BufferedReader输入流来读取URL的响应 ,这里设置编码
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(conn.getInputStream()));
//读取内容
String readLine = null;
StringBuffer response = new StringBuffer();
while (null != (readLine = bufferedReader.readLine())) {
response.append(response);
}
bufferedReader.close();
responseBody = response.toString();
}
return responseBody;
}
/**
* get 请求 带token
*/
public String get(String path, String token) throws Exception {
//初始化URL
URL url = new URL(path);
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
//允许input
conn.setDoInput(true);
conn.setRequestMethod("GET"); //设置请求方式
conn.setConnectTimeout(30000); //连接超时,单位毫秒
conn.setReadTimeout(30000); //读取超时, 单位毫秒
//设置请求头
conn.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
+ "Chrome/104.0.0.0 Safari/537.36");
conn.setRequestProperty("content-type", "application/json");
conn.setRequestProperty("X-Access-Token", token);
conn.connect(); //连接操作
int statusCode = conn.getResponseCode(); //获取响应状态码
String responseBody = null;
if (HttpURLConnection.HTTP_OK == statusCode) {
//定义BufferedReader输入流来读取URL的响应 ,这里设置编码
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(conn.getInputStream()));
//读取内容
String readLine = null;
StringBuffer response = new StringBuffer();
while (null != (readLine = bufferedReader.readLine())) {
response.append(response);
}
bufferedReader.close();
responseBody = response.toString();
}
return responseBody;
}
/**
* post 请求带参 无token
*
* @param path
* @param params
* @return
* @throws Exception
*/
public String post(String path, StringBuffer params) throws Exception {
//初始化URL
URL url = new URL(path);
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
//允许output
conn.setDoOutput(true);
conn.setRequestMethod("POST"); //设置请求方式
conn.setConnectTimeout(30000); //连接超时,单位毫秒
conn.setReadTimeout(30000); //读取超时, 单位毫秒
byte[] bytes = params.toString().getBytes();
conn.getOutputStream().write(bytes); //在连接中参加参数
//定义BufferedReader输入流来读取URL的响应 ,这里设置编码
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(conn.getInputStream(),
StandardCharsets.UTF_8));
String line;
String html = "";
while ((line = bufferedReader.readLine()) != null) {
html += line;
}
bufferedReader.close();
return html;
}
/**
* post 请求 带参数 带token
*
* @param path
* @param params
* @param token
* @return
* @throws Exception
*/
public String post(String path, StringBuffer params, String token) throws Exception {
//初始化URL
URL url = new URL(path);
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
//允许output
conn.setDoOutput(true);
conn.setRequestMethod("POST"); //设置请求方式
conn.setConnectTimeout(30000); //连接超时,单位毫秒
conn.setReadTimeout(30000); //读取超时, 单位毫秒
//设置请求头
conn.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
+ "Chrome/104.0.0.0 Safari/537.36");
conn.setRequestProperty("content-type", "application/json");
conn.setRequestProperty("X-Access-Token", token);
byte[] bytes = params.toString().getBytes();
conn.getOutputStream().write(bytes); //在连接中参加参数
//定义BufferedReader输入流来读取URL的响应 ,这里设置编码
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(conn.getInputStream(),
StandardCharsets.UTF_8));
String line;
String html = "";
while ((line = bufferedReader.readLine()) != null) {
html += line;
}
bufferedReader.close();
return html;
}
}