从html中过滤image类型的地址
- 工具类okhttp的依赖
<dependency>
<groupId>com.squareup.okhttp3</groupId>
<artifactId>okhttp</artifactId>
<version>4.4.0</version>
</dependency>
- 工具类okhttp
package com.hengqin.life.common.util;
import lombok.extern.slf4j.Slf4j;
import okhttp3.*;
import javax.net.ssl.*;
import java.io.IOException;
import java.io.InputStream;
import java.security.SecureRandom;
import java.security.cert.X509Certificate;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.concurrent.TimeUnit;
@Slf4j
public class OkHttpUtils {
public static final MediaType JSON = MediaType.parse("application/json; charset=utf-8");
public static final MediaType XML = MediaType.parse("application/xml");
private OkHttpUtils() {
throw new IllegalStateException("请不要对我实例化!");
}
public static String get(final String url) throws IOException {
return get(url, null, null);
}
public static String get(String url, Map<String, String> headerMap, Map<String, String> paramMap) throws IOException {
return Objects.requireNonNull(getToResponse(url, headerMap, paramMap).body()).string();
}
public static InputStream getToInputStream(final String url) throws IOException {
return getToInputStreamByMap(url, null, null);
}
public static InputStream getToInputStreamByMap(String url, Map<String, String> headerMap, Map<String, String> paramMap) throws IOException {
return Objects.requireNonNull(getToResponse(url, headerMap, paramMap).body()).byteStream();
}
public static Response getToResponse(String url, Map<String, String> headerMap, Map<String, String> paramMap) throws IOException {
if (paramMap != null && !paramMap.isEmpty()) {
url = url + "?" + map2String(paramMap);
}
Request request = new Request.Builder()
.url(url)
.build();
if (headerMap != null && !headerMap.isEmpty()) {
request = getNewRequest(request, headerMap);
}
OkHttpClient client = new OkHttpClient.Builder()
.retryOnConnectionFailure(false)
.build();
return client.newCall(request).execute();
}
public static String postJSON(final String url, final String requestMessage) throws IOException {
return postJSON(url, requestMessage, null);
}
public static String postJSON(final String url, String requestMessage, Map<String, String> headerMap) throws IOException {
return postJSON(url, requestMessage, headerMap,30);
}
public static String postJSON(final String url, String requestMessage, Map<String, String> headerMap,final int timeout) throws IOException {
return post(timeout, TimeUnit.SECONDS, JSON, url, requestMessage, headerMap);
}
public static String postXML(final String url, final String requestMessage) throws IOException {
return postXML(url, requestMessage, null);
}
public static String postXML(final String url, String requestMessage, Map<String, String> headerMap) throws IOException {
return post(30, TimeUnit.SECONDS, XML, url, requestMessage, headerMap);
}
public static Response postToResponseByJSON(final String url, final String requestMessage) throws IOException {
return postToResponse(30, TimeUnit.SECONDS, JSON, url, requestMessage, null);
}
public static String post(final int timeout, final TimeUnit timeUnit, final MediaType mediaType, final String url, final String requestMessage, Map<String, String> headerMap) throws IOException {
return Objects.requireNonNull(postToResponse(timeout, timeUnit, mediaType, url, requestMessage, headerMap).body()).string();
}
public static InputStream postToInputStreamByJSON(final String url, final String requestMessage) throws IOException {
return postToInputStreamByJSON(url, requestMessage, null);
}
public static InputStream postToInputStreamByJSON(final String url, String requestMessage, Map<String, String> headerMap) throws IOException {
return postToInputStream(30, TimeUnit.SECONDS, JSON, url, requestMessage, headerMap);
}
public static InputStream postToInputStream(final int timeout, final TimeUnit timeUnit, final MediaType mediaType, final String url, final String requestMessage, Map<String, String> headerMap) throws IOException {
return Objects.requireNonNull(postToResponse(timeout, timeUnit, mediaType, url, requestMessage, headerMap).body()).byteStream();
}
public static Response postToResponse(final int timeout, final TimeUnit timeUnit, final MediaType mediaType, final String url, final String requestMessage, Map<String, String> headerMap) throws IOException {
Request request = new Request.Builder()
.url(url)
.post(RequestBody.create(requestMessage, mediaType))
.build();
if (headerMap != null && !headerMap.isEmpty()) {
request = getNewRequest(request, headerMap);
}
OkHttpClient client = new OkHttpClient.Builder()
.retryOnConnectionFailure(false)
.connectTimeout(timeout, timeUnit)
.readTimeout(timeout, timeUnit)
.build();
return client.newCall(request).execute();
}
public static String postByMap(final String url, Map<String, String> mapParams) throws IOException {
return postByMap(url, mapParams, null);
}
public static String postByMap(final String url, Map<String, String> mapParams, Map<String, String> headerMap) throws IOException {
return postByMap(30, TimeUnit.SECONDS, url, mapParams, headerMap);
}
public static String postByMap(final int timeout, final TimeUnit timeUnit, final String url, Map<String, String> mapParams, Map<String, String> headerMap) throws IOException {
return Objects.requireNonNull(postToResponseByForm(timeout, timeUnit, url, mapParams, headerMap).body()).string();
}
public static InputStream postToInputStreamByMap(final String url, Map<String, String> mapParams) throws IOException {
return postToInputStreamByMap(url, mapParams, null);
}
public static InputStream postToInputStreamByMap(final String url, Map<String, String> mapParams, Map<String, String> headerMap) throws IOException {
return postToInputStreamByMap(30, TimeUnit.SECONDS, url, mapParams, headerMap);
}
public static InputStream postToInputStreamByMap(final int timeout, final TimeUnit timeUnit, final String url, Map<String, String> mapParams, Map<String, String> headerMap) throws IOException {
return Objects.requireNonNull(postToResponseByForm(timeout, timeUnit, url, mapParams, headerMap).body()).byteStream();
}
public static Response postToResponseByForm(final int timeout, final TimeUnit timeUnit, final String url, Map<String, String> mapParams, Map<String, String> headerMap) throws IOException {
FormBody.Builder builder = new FormBody.Builder();
if (mapParams != null && !mapParams.isEmpty()) {
builder = getNewFormBody(mapParams);
}
FormBody formBody = builder.build();
Request request = new Request.Builder()
.url(url)
.post(formBody)
.build();
if (headerMap != null && !headerMap.isEmpty()) {
request = getNewRequest(request, headerMap);
}
OkHttpClient client = new OkHttpClient.Builder()
.retryOnConnectionFailure(false)
.connectTimeout(timeout, timeUnit)
.readTimeout(timeout, timeUnit)
.build();
return client.newCall(request).execute();
}
private static FormBody.Builder getNewFormBody(Map<String, String> mapParams) {
FormBody.Builder builder = new FormBody.Builder();
for (Map.Entry<String, String> entry : mapParams.entrySet()) {
builder.add(entry.getKey(), entry.getValue());
}
return builder;
}
private static Request getNewRequest(Request request, Map<String, String> headerMap) {
Request.Builder requestBuilder = request.newBuilder();
for (Map.Entry<String, String> entry : headerMap.entrySet()) {
requestBuilder.header(entry.getKey(), entry.getValue());
}
return requestBuilder.build();
}
private static SSLSocketFactory createSSLSocketFactory() {
SSLSocketFactory sSLSocketFactory = null;
try {
SSLContext sc = SSLContext.getInstance("TLSv1.2");
sc.init(null, new TrustManager[]{new OkHttpUtils.TrustAllManager()},
new SecureRandom());
sSLSocketFactory = sc.getSocketFactory();
} catch (Exception e) {
log.info("createSSLSocketFactory()-异常={}", e);
}
return sSLSocketFactory;
}
private static class TrustAllManager implements X509TrustManager {
@Override
public void checkClientTrusted(X509Certificate[] chain, String authType) {
log.info("TrustAllManager.checkClientTrusted()-无信任证书!");
}
@Override
public void checkServerTrusted(X509Certificate[] chain, String authType) {
log.info("TrustAllManager.checkServerTrusted()-无信任证书!");
}
@Override
public X509Certificate[] getAcceptedIssuers() {
return new X509Certificate[]{};
}
}
private static class TrustAllHostnameVerifier implements HostnameVerifier {
@Override
public boolean verify(String requestedHost, SSLSession remoteServerSession) {
return requestedHost.equalsIgnoreCase(remoteServerSession.getPeerHost());
}
}
public static String map2String(final Map<String, String> messageMap) {
Optional<String> messageStr = messageMap.entrySet().stream()
.filter(m -> m.getValue() != null && !"".equals(m.getValue()) && !"null".equals(m.getValue()))
.map(map -> map.getKey() + "=" + map.getValue())
.reduce((a, b) -> a + "&" + b);
return messageStr.isPresent() ? messageStr.get() : "";
}
public static String cleanUrl(String url) {
return url.replaceAll("/+$", "");
}
}
- 对图片处理的工具类
public static List<String> getImgSrc (String resultString){
List<String> list = new ArrayList<String>();
Pattern p_img = Pattern.compile("<(img|IMG)(.*?)(/>|></img>|>)");
Matcher m_img = p_img.matcher(resultString);
boolean result_img = m_img.find();
if (result_img) {
while (result_img) {
String str_img = m_img.group(2);
Pattern p_src = Pattern.compile("(src|SRC)=(\"|\')(.*?)(\"|\')");
Matcher m_src = p_src.matcher(str_img);
if (m_src.find()) {
String str_src = m_src.group(3);
list.add(str_src);
}
result_img = m_img.find();
}
}
return list;
}
- 获取body内容工具类
package com.hengqin.life.common.util;
import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
public class HtmlUtils {
public static String getBody(String path){
String body = "";
try {
InputStream iStream = OkHttpUtils.getToInputStream(path);
Reader reader = new InputStreamReader(iStream);
BufferedReader htmlReader = new BufferedReader(reader);
String line;
boolean found = false;
while (!found && (line = htmlReader.readLine()) != null) {
if (line.toLowerCase().indexOf("<body") != -1) {
found = true;
}
}
found = false;
while (!found && (line = htmlReader.readLine()) != null) {
if (line.toLowerCase().indexOf("</body") != -1) {
found = true;
} else {
body = body + line;
}
}
htmlReader.close();
} catch (Exception e) {
e.printStackTrace();
}
return body;
}
}
- 通过网络在线文档的html中取出image地址
String resultImage = OkHttpUtils.get(filesExtendVO.getFileHtml());
List<String> imgFile = ImageUtils.getImgSrc(resultImage);
filesExtendVO.setImgFile(imgFile);
- 通过网络在线文档的html中取出body内容
String body = HtmlUtils.getBody(filesExtendVO.getFileHtml());
filesExtendVO.setContent(body);