package com.gwideal.jxwfkjlweb.util;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* 去除字符串中的HTML元素
* @author zhou_chaofei
*
*/
public class TxtWithoutHTMLElement {
public static String getTxtWithoutHTMLElement (String element)
{
if(null==element||"".equals(element.trim()))
{
return element;
}
Pattern pattern=Pattern.compile("<[^<|^>]*>");
Matcher matcher=pattern.matcher(element);
StringBuffer txt=new StringBuffer();
while(matcher.find())
{
String group=matcher.group();
if(group.matches("<[\\s]*>"))
{
matcher.appendReplacement(txt,group);
}
else
{
matcher.appendReplacement(txt,"");
}
}
matcher.appendTail(txt);
repaceEntities(txt,"&","&");
repaceEntities(txt,"<","<");
repaceEntities(txt,">",">");
repaceEntities(txt,""","\"");
repaceEntities(txt," ","");
return txt.toString();
}
private static void repaceEntities ( StringBuffer txt,String entity,String replace)
{
int pos=-1;
while(-1!=(pos=txt.indexOf(entity)))
{
txt.replace(pos,pos+entity.length(),replace);
}
}
public static void main(String[] args) {
System.out.println(getTxtWithoutHTMLElement("<a href='a/test'>test</a>"));
System.out.println(getTxtWithoutHTMLElement("<a href='a/test'>test</a>"));
}
}