Java 过滤 html、script、style 代码得到纯字符串方法

最新推荐文章于 2024-09-15 09:47:04 发布

xh_wanghe

最新推荐文章于 2024-09-15 09:47:04 发布

阅读量113

点赞数

分类专栏： java 文章标签： java

本文链接：https://blog.csdn.net/xh_wanghe/article/details/84165938

版权

java 专栏收录该内容

1 篇文章 0 订阅

订阅专栏

/***
*
* @param content 内容String
* @param p >0 .位数
* @return @tale:
* @purpose：得到相应位数已过滤html、script、style 标签的内容内容结尾为...
* @author：Simon - 赵振明
* @CreationTime：Aug 25, 201011:07:06 AM
*/
public static String getNoHTMLString(String content,int p){
if(null==content) return "";
if(0==p) return "";
java.util.regex.Pattern p_script;
java.util.regex.Matcher m_script;
java.util.regex.Pattern p_style;
java.util.regex.Matcher m_style;
java.util.regex.Pattern p_html;
java.util.regex.Matcher m_html;
try {
String regEx_script = "<[\\s]*?script[^>]*?>[\\s\\S]*?<[\\s]*?\\/[\\s]*?script[\\s]*?>";
//定义script的正则表达式{或<script[^>]*?>[\\s\\S]*?<\\/script> }
String regEx_style = "<[\\s]*?style[^>]*?>[\\s\\S]*?<[\\s]*?\\/[\\s]*?style[\\s]*?>";
//定义style的正则表达式{或<style[^>]*?>[\\s\\S]*?<\\/style> }
String regEx_html = "<[^>]+>"; //定义HTML标签的正则表达式
p_script = Pattern.compile(regEx_script,Pattern.CASE_INSENSITIVE);
m_script = p_script.matcher(content);
content = m_script.replaceAll(""); //过滤script标签
p_style = Pattern.compile(regEx_style,Pattern.CASE_INSENSITIVE);
m_style = p_style.matcher(content);
content = m_style.replaceAll(""); //过滤style标签
p_html = Pattern.compile(regEx_html,Pattern.CASE_INSENSITIVE);
m_html = p_html.matcher(content);
content = m_html.replaceAll(""); //过滤html标签
}catch(Exception e) {
return "";
}
if(content.length()>p){
content = content.substring(0, p)+"...";
}else{
content = content + "...";
}
return content;
}