Pattern p = Pattern.compile("href\\s*=\\s*(?:"([^"]*)"|'([^']*)'|([^"'>\s]+))");//这个不正确
/**
* 得到网页中图片的地址
*/
public static List getImgStr(String htmlStr){
String img="";
Pattern p_image;
Matcher m_image;
List pics = new ArrayList();
// String regEx_img = "]*?>"; //图片链接地址
String regEx_img = "]*?>";
p_image = Pattern.compile
(regEx_img,Pattern.CASE_INSENSITIVE);
m_image = p_image.matcher(htmlStr);
while(m_image.find()){
img = img + "," + m_image.group();
// Matcher m = Pattern.compile("src=\"?(.*?)(\"|>|\\s+)").matcher(img); //匹配src
Matcher m = Pattern.compile("src\\s*=\\s*\"?(.*?)(\"|>|\\s+)").matcher(img);
while(m.find()){
pics.add(m.group(1));
}
}
return pics;
}
//重点在于正则表达式 ]*?>
// src=\"?(.*?)(\"|>|\\s+)
private final static String regxpForHtml = "]*)>"; // 过滤所有以结尾的标签
private final static String regxpForImgTag = "]*)\\s*>"; // 找出IMG标签
private final static String regxpForImaTagSrcAttrib = "src=\"([^\"]+)\""; // 找出IMG标签的SRC属性
String regxp = "]*)\\s*>"; 红色的 tag 是动态的变(指定标签)
1. public static String getImgStr(String htmlStr){
2. String img="",tmp="";
3. java.util.regex.Pattern p_image;
4. java.util.regex.Matcher m_image;
5.
6. String regEx_img = "http://[([a-z0-9]|.|/|\\-)]+.[(jpg)|(bmp)|(gif)|(png)]";//图片链接地址
7. p_image = java.util.regex.Pattern.compile(regEx_img,java.util.regex.Pattern.CASE_INSENSITIVE);
8. m_image = p_image.matcher(htmlStr);
9. while(m_image.find()){
10. img = img + "," + m_image.group();
11. }
12. if(img.indexOf(",")>=0)
13. return img.substring(1);
14. else
15. return img;
16. }
方法一:
http://www.cnblogs.com/jintan/archive/2009/10/31/1593639.html
package com.cn;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class img_src {
public static void main(String[] args) {
String html = "\r\n" +
"
test\r\n" +"
" +"
abcdefg" +
"
" +
"
" src=\"abc.jpg\" \r\n" +
" weight=\"30\">abcdefg \r\n" +
" " +
" " +
// "" + //这种我也无能为力
"";
System.out.println(getImgSrc(html));
}
public static final Pattern PATTERN = Pattern.compile("]*)src\\s*=\\s*([^>]+)", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE);
public static List getImgSrc(String html) {
Matcher matcher = PATTERN.matcher(html);
List list = new ArrayList();
while (matcher.find()) {
String group = matcher.group(1);
if (group == null) {
continue;
}
// 这里可能还需要更复杂的判断,用以处理src="...."内的一些转义符
if (group.startsWith("'")) {
list.add(group.substring(1, group.indexOf("'", 1)));
} else if (group.startsWith("\"")) {
list.add(group.substring(1, group.indexOf("\"", 1)));
} else {
list.add(group.split("\\s")[0]);
}
}
return list;
}
}
方法二:
package com.cn;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class test {
public static void main(String[] args) {
String s = "<IMG height=55 src=\"http://www.gobygo.com/TheGoByGo/images/book-channel.gif\" width=210 border=0 />";
Pattern p1 = Pattern.compile("<IMG[\\w\\s\\d\\p{Punct}]*/>");
Matcher m = p1.matcher(s);
while (m.find()) {
String str = m.group();
Pattern p = Pattern.compile("src=\"[\\w\\s\\d\\p{Punct}]*\"");
Matcher m1 = p.matcher(s);
while (m1.find()) {
String str1 = m1.group();
str = str1.substring(5, str1.length() - 1);
System.out.println(str);
}
}
}
}