package com.nfa;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class UtilHtml {
/*删除所有HTML标签,返回纯文本*/
public static String deleteAll(String html){
if(isBlank(html)){
return "";
}
Pattern p = Pattern.compile("]*)>");
Matcher m = p.matcher(html);
return m.replaceAll("");
}
/*删除指定单标签*/
public static String deleteTagsOfOne(String tag,String html){
String regxp = "]*)\\s*>";
Pattern p = Pattern.compile(regxp);
Matcher m = p.matcher(html);
StringBuffer sb = new StringBuffer();
boolean rs = m.find();
while (rs) {
m.appendReplacement(sb, "");
rs = m.find();
}
m.appendTail(sb);
return sb.toString();
}
/*删除指定成对标签*/
public static String deleteTagsOfTwo(String tag,String html){
if(isBlank(html)){
return html;
}
Pattern p = Pattern.compile("]*?>[\\s\\S]*?");
Matcher m = p.matcher(html);
return m.replaceAll("");
}
/*删除指定的属性*/
public static String deleteAttr(String attr,String html){
if(isBlank(html)){
return html;
}
Pattern p = Pattern.compile(attr+"=\"([^\"]+)\"");
Matcher m = p.matcher(html);
return m.replaceAll("");
}
/*删除所有空白字符*/
public static String deleteAllBlank(String html){
if(isBlank(html)){
return html;
}
Pattern p = Pattern.compile("[\\s| ]");
Matcher m = p.matcher(html);
return m.replaceAll("");
}
private static boolean isBlank(String str){
if(str!=null && str.trim().length()>0){
return false;
}else{
return true;
}
}
}