根据url获取html 方式其一
依赖
<!-- https://mvnrepository.com/artifact/com.liferay/org.apache.commons.httpclient -->
<dependency>
<groupId>com.liferay</groupId>
<artifactId>org.apache.commons.httpclient</artifactId>
<version>3.1.LIFERAY-PATCHED-1</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.commons/commons-lang3 -->
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.3.1</version>
</dependency>
代码
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpException;
import org.apache.commons.httpclient.HttpMethod;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.lang3.StringUtils;
import java.io.IOException;
public class HttpClientUtil {
public static HttpClient getClient() {
HttpClient client = new HttpClient();
return client;
}
public static String getHtml(String url) throws HttpException, IOException {
return getHtml(url, 80, null, null, 0, null);
}
public static String getHtml(String url, String cookie) throws HttpException, IOException {
return getHtml(url, 80, null, null, 0, cookie);
}
public static String getHtml(String url, int port, String cookie) throws HttpException, IOException {
return getHtml(url, port, null, null, 0, cookie);
}
public static String getHtml(String url, int port, String encoding, String proxyHost, int proxyPort, String cookie)
throws HttpException, IOException {
HttpClient httpClient = getClient();
String rest = null;
if (proxyHost != null && proxyPort != 0) httpClient.getHostConfiguration().setProxy(proxyHost, proxyPort);
HttpMethod method = new GetMethod(url);
if (!StringUtils.isBlank(cookie)) {
method.addRequestHeader("Cookie", cookie);
}
method.addRequestHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.90 Safari/537.36");
//Mozilla/5.0 (Windows NT 6.1; rv:7.0.1) Gecko/20100101 Firefox/7.0.1
httpClient.executeMethod(method);
//根据http头解析正确的字符集
String header = method.getResponseHeader("Content-Type").getValue();
if (header.contains("charset=")) {
encoding = header.substring(header.indexOf("charset=") + "charset=".length(), header.length());
}
if (encoding == null) encoding = "GBK";
rest = new String(method.getResponseBody(), encoding);
method.releaseConnection();
return rest;
}
public static void main(String[] args) throws HttpException, IOException {
String url = "https://www.baidu.com";
System.out.println(getHtml(url));
}
}