import org.apache.commons.lang3.StringUtils;
import sun.misc.BASE64Encoder;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class HTMLUtils {
public static String StripHT(String strHtml) {
if (StringUtils.isBlank(strHtml)) {
return "";
}
String txtcontent = strHtml.replaceAll("</?[^>]+>", "");
txtcontent = txtcontent.replaceAll("<a>\\s*|\t|\r|\n</a>", "");
return txtcontent;
}
public static List<String> extractImg(String content) {
List<String> srcList = new ArrayList<String>();
if (StringUtils.isBlank(content)) {
return srcList;
}
Pattern p = Pattern.compile("<(img|IMG)(.*?)(>|></img>|/>)");
Matcher matcher = p.matcher(content);
boolean hasPic = matcher.find();
if (hasPic == true)
{
while (hasPic)
{
String group = matcher.group(2);
Pattern srcText = Pattern.compile("(src|SRC)=(\"|\')(.*?)(\"|\')");
Matcher matcher2 = srcText.matcher(group);
if (matcher2.find()) {
srcList.add(matcher2.group(3));
}
hasPic = matcher.find();
}
}
return srcList;
}
public static String encodeImageToBase64(String imgURL) throws Exception {
URL url = new URL(imgURL);
HttpURLConnection conn = null;
InputStream inStream = null;
ByteArrayOutputStream outStream = null;
try {
conn = (HttpURLConnection) url.openConnection();
conn.setRequestMethod("GET");
conn.setConnectTimeout(5 * 1000);
inStream = conn.getInputStream();
outStream = new ByteArrayOutputStream();
byte[] buffer = new byte[1024];
int len = 0;
while ((len = inStream.read(buffer)) != -1) {
outStream.write(buffer, 0, len);
}
byte[] data = outStream.toByteArray();
BASE64Encoder encoder = new BASE64Encoder();
String base64 = encoder.encode(data);
return base64;
} catch (IOException e) {
e.printStackTrace();
throw new Exception("图片上传失败,请联系客服!");
} finally {
inStream.close();
outStream.close();
conn.disconnect();
}
}
}