目标是将数据库中存贮的一个字符串加工处理,让后放到xml文件中
1、去除html标签
2、依旧会存在特殊字符影响xml解析,所以加入CDATA标签,避免转义
import org.apache.commons.lang.StringEscapeUtils;
import org.apache.commons.lang.StringUtils;
public class ContentUtil {
/**
* Regular expression
*/
public static final String replaceRegularExpression = "<[.[^<]]*>";
public static String replaceContent(String source) {
if (StringUtils.isBlank(source)) {
return source;
}
source = source.replaceAll(replaceRegularExpression, "");
source = source.replaceAll(" ", "");
source = StringEscapeUtils.escapeJava("<![CDATA[") + source + "]]>" ;
return source;
}
public static void main(String[] args) {
String o = "<p>数据脱敏数据脱敏213131231231dvcxvxx rdc 123<sup>12</sup><sup><span style=\"text-decoration: " +
"underline;\">123</span></sup><span style=\"text-decoration: none;\"><em>22</em><script>aler" +
"t(1)</script><strong>qwer</strong></span></p><p><span style=\"text-decoration: underline;\">" +
"<strong>hjdsjjfsdjkfkjdskf. </strong></span></p><p><span style=\"text-decoration: " +
"underline;\"><strong>qe</strong></span></p><p><span style=\"text-decoration: none;\">hhh" +
"</span></p><p><span style=\"text-decoration: none;\"><img src=\"p2p/ueditor/image/get/8a078f" +
"4d14c14454aefad0c83de808b8\" title=\"o-57faa441f0b1477fbe75ac6d1c461280..jpeg\" alt=\"42018af" +
"ea30af9e30c4028f9d3653a33.jpeg\"/></span></p>";
String s;
s = replaceContent(o);
System.out.println(s);
}
}
运行结果
<![CDATA[数据脱敏数据脱敏213131231231dvcxvxx rdc 1231212322<script>alert(1)</script>qwerhjdsjjfsdjkfkjdskf.qehhh]]>
这个字符串就可以完美的放入xml中了,类似如下
<books>
<book>
<author>李刚</author>
<title>疯狂XML讲义</title>
<publisher>电子工业出版社</publisher>
</book>
<book>
<author>Developer</author>
<title>XML格式化工具</title>
<publisher>
<![CDATA[数据脱敏数据脱敏213131231231dvcxvxx rdc 1231212322<script>alert(1)</script>qwerhjdsjjfsdjkfkjdskf.qehhh]]>
</publisher>
</book>
</books>