package com.jrj.stock.common.util;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* html标签处理类
* @author TonsonMiao
*/
public class HtmlTagUtil {
/**
* 获取指定标签的第一次出现的内容
* @param target
* @param tag
* @return
*/
public String getTagContentFirst(String target, String tag) {
String str = "<[" + tag.toUpperCase() + "," + tag.toLowerCase()
+ "].*?>(.*?)</[" + tag.toUpperCase() + "," + tag.toLowerCase()
+ "]>";
Pattern p = Pattern.compile(str);
Matcher m = p.matcher(target);
if (m.find()) {
return m.group();
} else {
return null;
}
}
/**
* 获取所有指定标签的内容
* @param target
* @param tag
* @return
*/
public List<String> getTagContentAll(String target, String tag) {
String str = "<[" + tag.toUpperCase() + "," + tag.toLowerCase()
+ "].*?>(.*?)</[" + tag.toUpperCase() + "," + tag.toLowerCase()
+ "]>";
Pattern p = Pattern.compile(str);
Matcher m = p.matcher(target);
List<String> list = new ArrayList<String>();
while (m.find()) {
list.add(m.group(1));
}
return list;
}
/**
* 清除指定标签
* @param target
* @param tag
* @return
*/
public String clearTag(String target, String tag) {
String str = "<[" + tag.toUpperCase() + "," + tag.toLowerCase()
+ "].*?>(.*?)</[" + tag.toUpperCase() + "," + tag.toLowerCase()
+ "]>";
Pattern p = Pattern.compile(str);
Matcher m = p.matcher(target);
StringBuffer sb = new StringBuffer();
while (m.find()) {
m.appendReplacement(sb, "");
}
m.appendTail(sb);
return sb.toString();
}
/**
* 清除指定标签的标签名
* @param target
* @param tag
* @return
*/
public String clearTagName(String target, String tag) {
String str = "<" + tag.toUpperCase() + ".*?>(.*?)</"
+ tag.toUpperCase() + ">";
boolean doubleFlag = true;
if (tag.equalsIgnoreCase("input") || tag.equalsIgnoreCase("img")) {
str = "<" + tag.toUpperCase() + "(.*?)>";
doubleFlag = false;
}
Pattern p = Pattern.compile(str, Pattern.CASE_INSENSITIVE);
Matcher m = p.matcher(target);
StringBuffer sb = new StringBuffer();
boolean flag = false;
while (sb.length() == 0 || flag) {
sb = new StringBuffer();
while (flag || m.find()) {
m.appendReplacement(sb, "");
if (doubleFlag)
sb.append(m.group(1));
flag = false;
}
m.appendTail(sb);
flag = (m = p.matcher(sb.toString())).find();
}
return sb.toString();
}
/**
* 清除所有标签名
* @param target
* @return
*/
public String clearTagNameAll(String target) {
String str = "<([^>]*)>";
Pattern p = Pattern.compile(str, Pattern.CASE_INSENSITIVE);
Matcher m = p.matcher(target);
StringBuffer sb = new StringBuffer();
while (m.find()) {
m.appendReplacement(sb, "");
}
m.appendTail(sb);
return sb.toString();
}
/**
* 列出出现过的标签名
* @param target
* @return
*/
public Set<String> listTagNameAll(String target) {
String str = "<\\s*[a-zA-Z]+";
String name = "[a-zA-Z]+";
Pattern p = Pattern.compile(str, Pattern.CASE_INSENSITIVE);
Pattern pname = Pattern.compile(name, Pattern.CASE_INSENSITIVE);
Matcher m = p.matcher(target);
Set<String> list = new HashSet<String>();
while (m.find()) {
Matcher mn = pname.matcher(m.group());
mn.find();
list.add(mn.group().toUpperCase());
}
return list;
}
}
java正则表达式处理HTML标签
最新推荐文章于 2021-06-02 21:11:31 发布