java 正则实现
- 去除内容中冗余
- 获取尖括号内的内容
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class HTMLSpirit {
//保留<body></body>间的内容
public static String delHTMLTag(String htmlStr) {
String regEx_be4body = "<html[^>]+>[\\s\\S]*?<body[^>]*?>";
String regEx_aftbody = "<\\/body>[\\s\\S]*?<\\/html>";
//Pattern.CASE_INSENSITIVE 不区分大小写的UNICODE_CASE样式
Pattern p_be4body = Pattern.compile(regEx_be4body,Pattern.CASE_INSENSITIVE);
Matcher m_be4body = p_be4body.matcher(htmlStr);
htmlStr = m_be4body.replaceAll("");
Pattern p_aftbody = Pattern.compile(regEx_aftbody,Pattern.CASE_INSENSITIVE);
Matcher m_aftbody = p_aftbody.matcher(htmlStr);
htmlStr = m_aftbody.replaceAll("");
return htmlStr.trim();
}
//...<..要取的内容..>...
public static String getCleanAddress(String mailAddrs){
StringBuffer addr=new StringBuffer();
Pattern pattern = Pattern.compile("<(.*?)>");
Matcher matcher = pattern.matcher(mailAddrs);
if(matcher.find()){
addr.append(matcher.group(1)+";");
return addr.toString();
}else {
return mailAddrs;
}
}
}