/**
* 剔除html标签,并保留换行
* @param html
* @return
*/
public static String removeHtmlTags(String html) {
Pattern pattern = Pattern.compile(HtmlUtil.RE_HTML_MARK);
Matcher matcher = pattern.matcher(html);
StringBuffer result = new StringBuffer();
// 使用循环逐个匹配替换标签
while (matcher.find()) {
String matchedTag = matcher.group();
if (matchedTag.startsWith("</p>") || matchedTag.startsWith("<br>") || matchedTag.startsWith("</li>")) {
matcher.appendReplacement(result, System.lineSeparator());
} else {
matcher.appendReplacement(result, "");
}
}
matcher.appendTail(result);
// 去掉首尾空白,并剔除连续换行符
return removeMultipleNewlines(result.toString().trim());
}
/**
* 剔除连续换行符
* @param content
* @return
*/
public static String removeMultipleNewlines(String content) {
// 使用正则表达式剔除连续多个换行符
Pattern newLines = Pattern.compile("(\\r?\\n){2,}");
Matcher matcher = newLines.matcher(content);
return matcher.replaceAll(System.lineSeparator());
}
Java剔除富文本的HTML标签并保留换行
于 2024-07-17 15:15:50 首次发布