import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Test {
/**
* @param args
*/
public static void main(String[] args) {
// TODO Auto-generated method stub
// String htmlStr = "<b><font www=\"red\" pp='oo'>aaaaa</font><font www=\"red\" pp='oo'>aaaaa</font></b> ";
String htmlStr = "<p><span><img src=\'http://imagesv5.vivame.cn/pic/upload/phote/201511/07516786-7d60-4ca5-ad11-b6c8bb82d99d.jpg\' border=0></span></p><p><span>"
+ "<img src=\'http://imagesv5.vivame.cn/pic/upload/phote/201511/07516786-7d60-4ca5-ad11-b6c8bb82d99d.jpg\' border=0></span></p>"
+ "<p>hahahahha<span><img src=\'http://imagesv5.vivame.cn/pic/upload/phote/201511/07516786-7d60-4ca5-ad11-b6c8bb82d99d.jpg\' border=0></span></p> ";
//System.out.println("zyj "+Test.updateHtmlTag(htmlStr, "img", "src","http://www.redirect.com/xxx?url=\""));
System.out.println(imgSum(htmlStr));
}
/**
* @param htmlStr html文本
* @param searchTag 要修改的目标标签
* @param searchAttrib 目标标签中的属性
* @param newStr 修改值
*/
public static String updateHtmlTag(String htmlStr, String searchTag,
String searchAttrib,String newStr) {
// String regxpForTag = "<\\s*img\\s+([^>]*)\\s*>"; // 找出IMG标签
// String regxpForTagAttrib = "src=\"([^\"]+)\""; // 找出IMG标签的SRC属性
String regxpForTag ="<\\s*" + searchTag + "\\s+([^>]*)\\s*>";
String regxpForTagAttrib = searchAttrib + "\\s*=\\s*[\"|']([^\"|']+)[\"|']";
Pattern patternForTag = Pattern.compile(regxpForTag);
Pattern patternForAttrib = Pattern.compile(regxpForTagAttrib);
Matcher matcherForTag = patternForTag.matcher(htmlStr);
StringBuffer sb = new StringBuffer();
boolean result = matcherForTag.find();
while (result) {
StringBuffer sbreplace = new StringBuffer("<"+searchTag +" ");
System.out.println(matcherForTag.group(1));
Matcher matcherForAttrib = patternForAttrib.matcher(matcherForTag
.group(1));
if (matcherForAttrib.find()) {
System.out.println(matcherForAttrib.group(1));
matcherForAttrib.appendReplacement(sbreplace, searchAttrib+"=\""+newStr);
}
// matcherForTag.appendReplacement(sb, sbreplace.toString());
matcherForAttrib.appendTail(sbreplace);
matcherForTag.appendReplacement(sb, sbreplace.toString()+">");
result = matcherForTag.find();
}
matcherForTag.appendTail(sb);
return sb.toString();
}
/**
* 删除input字符串中的html格式
*
* @param input
* @param length
* @return
*/
public static String splitAndFilterString(String input) {
if (input == null || input.trim().equals("")) {
return "";
}
// 去掉所有html元素,
String str = input.replaceAll("\\&[a-zA-Z]{1,10};", "").replaceAll(
"<[^>]*>", "").replaceAll("[(/>)<]", "");
return str;
}
/**
*计算字符窜中标签img标签的个数
*/
public static int imgSum(String htmlStr){
String regxpForTag ="<\\s*" + "img" + "\\s+([^>]*)\\s*>";
int count = 0;
Pattern patternForTag = Pattern.compile(regxpForTag);
Matcher matcherForTag = patternForTag.matcher(htmlStr);
StringBuffer sb = new StringBuffer();
boolean result = matcherForTag.find();
while (result) {
StringBuffer sbreplace = new StringBuffer("<img" );
System.out.println(matcherForTag.group(1));
count++;
result = matcherForTag.find();
}
return count;
}
}
import java.util.regex.Pattern;
public class Test {
/**
* @param args
*/
public static void main(String[] args) {
// TODO Auto-generated method stub
// String htmlStr = "<b><font www=\"red\" pp='oo'>aaaaa</font><font www=\"red\" pp='oo'>aaaaa</font></b> ";
String htmlStr = "<p><span><img src=\'http://imagesv5.vivame.cn/pic/upload/phote/201511/07516786-7d60-4ca5-ad11-b6c8bb82d99d.jpg\' border=0></span></p><p><span>"
+ "<img src=\'http://imagesv5.vivame.cn/pic/upload/phote/201511/07516786-7d60-4ca5-ad11-b6c8bb82d99d.jpg\' border=0></span></p>"
+ "<p>hahahahha<span><img src=\'http://imagesv5.vivame.cn/pic/upload/phote/201511/07516786-7d60-4ca5-ad11-b6c8bb82d99d.jpg\' border=0></span></p> ";
//System.out.println("zyj "+Test.updateHtmlTag(htmlStr, "img", "src","http://www.redirect.com/xxx?url=\""));
System.out.println(imgSum(htmlStr));
}
/**
* @param htmlStr html文本
* @param searchTag 要修改的目标标签
* @param searchAttrib 目标标签中的属性
* @param newStr 修改值
*/
public static String updateHtmlTag(String htmlStr, String searchTag,
String searchAttrib,String newStr) {
// String regxpForTag = "<\\s*img\\s+([^>]*)\\s*>"; // 找出IMG标签
// String regxpForTagAttrib = "src=\"([^\"]+)\""; // 找出IMG标签的SRC属性
String regxpForTag ="<\\s*" + searchTag + "\\s+([^>]*)\\s*>";
String regxpForTagAttrib = searchAttrib + "\\s*=\\s*[\"|']([^\"|']+)[\"|']";
Pattern patternForTag = Pattern.compile(regxpForTag);
Pattern patternForAttrib = Pattern.compile(regxpForTagAttrib);
Matcher matcherForTag = patternForTag.matcher(htmlStr);
StringBuffer sb = new StringBuffer();
boolean result = matcherForTag.find();
while (result) {
StringBuffer sbreplace = new StringBuffer("<"+searchTag +" ");
System.out.println(matcherForTag.group(1));
Matcher matcherForAttrib = patternForAttrib.matcher(matcherForTag
.group(1));
if (matcherForAttrib.find()) {
System.out.println(matcherForAttrib.group(1));
matcherForAttrib.appendReplacement(sbreplace, searchAttrib+"=\""+newStr);
}
// matcherForTag.appendReplacement(sb, sbreplace.toString());
matcherForAttrib.appendTail(sbreplace);
matcherForTag.appendReplacement(sb, sbreplace.toString()+">");
result = matcherForTag.find();
}
matcherForTag.appendTail(sb);
return sb.toString();
}
/**
* 删除input字符串中的html格式
*
* @param input
* @param length
* @return
*/
public static String splitAndFilterString(String input) {
if (input == null || input.trim().equals("")) {
return "";
}
// 去掉所有html元素,
String str = input.replaceAll("\\&[a-zA-Z]{1,10};", "").replaceAll(
"<[^>]*>", "").replaceAll("[(/>)<]", "");
return str;
}
/**
*计算字符窜中标签img标签的个数
*/
public static int imgSum(String htmlStr){
String regxpForTag ="<\\s*" + "img" + "\\s+([^>]*)\\s*>";
int count = 0;
Pattern patternForTag = Pattern.compile(regxpForTag);
Matcher matcherForTag = patternForTag.matcher(htmlStr);
StringBuffer sb = new StringBuffer();
boolean result = matcherForTag.find();
while (result) {
StringBuffer sbreplace = new StringBuffer("<img" );
System.out.println(matcherForTag.group(1));
count++;
result = matcherForTag.find();
}
return count;
}
}