String httpTag= "<a\\b([^>]+)(.*?)</a>";
//String linkHref= "(?:HREF\\s*=\\s*(?:\")([^\"]*)\"|'([^']*)'|([^'\">\\s]+))";
String linkHref= "HREF\\s*=\\s*(?:\"([^\"]*)\"|'([^']*)'|([^'\">\\s]+))";
// String linkHref= "(<a[\\s+]*([^> h]|h(?!ref\b))*href[\\s+]*=[\\s+]*[( '|\ ")]?)([^(\\s+| '|\ ")]*)([^> ]*> ) ";
String linkTitle ="(?:>)(.*)(?:</a>$)";
pathHtml=ie.frame(id,"main").div(id,"PathPanel").html().toString();
//println("LINK is : "+pathHtml);
pattern = Pattern.compile(httpTag, Pattern.CASE_INSENSITIVE);
matcher = pattern.matcher(pathHtml);
while (matcher.find())
{
// int start = matcher.start();
// int end = matcher.end();
// String pureUrl = pathHtml.substring(start, end);
// System.out.println(" match Str is : "+pureUrl);
// println("1: "+matcher.group().length());
// println("2 : "+matcher.groupCount());
// println("22555 --> "+matcher.group(2));
lastLinkHtml=matcher.group();
}
println("Link content: "+lastLinkHtml);
patternCon = Pattern.compile(linkHref, Pattern.CASE_INSENSITIVE);
//patternCon = Pattern.compile(linkTitle, Pattern.CASE_INSENSITIVE);
matcherCon = patternCon.matcher(lastLinkHtml);
while (matcherCon.find())
{
// int start = matcherCon.start();
// int end = matcherCon.end();
// String pureUrl = pathHtml.substring(start, end);
linkValue=matcherCon.group(1);
}
println("link value is: "+linkValue);
- import java.util.regex.*;
- public class RegExpParseHTML {
- /**
- * @param args
- */
- public static void main(String[] args) {
- // TODO Auto-generated method stub
- String html = "<a href=\"http://www.autohome.com.cn/780/\" style=\"text-decoration:none;\" target=\"_blank\" title=\"[D] 大众UP频道\" >[D] 大众UP频道</a>";
- String href = parseHref(html);
- System.out.println(href);
- }
- public static String parseHref(String html)
- {
- String regex = "<a[\\s]+href[\\s]*=[\\s]*\"([^<\"]+)\"";
- //String regex = "[^.]";
- Pattern p = Pattern.compile(regex, Pattern.CASE_INSENSITIVE);
- Matcher m = p.matcher(html);
- StringBuffer ret = new StringBuffer();
- while(m.find())
- {
- ret.append(m.group(1));
- }
- return ret.toString();
- }
- }
痴人硕梦-->Aerchi