如果你想确保你真的匹配一个url地址,而不仅仅是一个以’www’开头的单词.您可以使用DVK之前提到的表达式.我修改了一点,并写了一个小代码段作为您的起点:
import java.util.*;
import java.util.regex.*;
class FindUrls
{
public static List extractUrls(String input) {
List result = new ArrayList();
Pattern pattern = Pattern.compile(
"\\b(((ht|f)tp(s?)\\:\\/\\/|~\\/|\\/)|www.)" +
"(\\w+:\\w+@)?(([-\\w]+\\.)+(com|org|net|gov" +
"|mil|biz|info|mobi|name|aero|jobs|museum" +
"|travel|[a-z]{2}))(:[\\d]{1,5})?" +
"(((\\/([-\\w~!$+|.,=]|%[a-f\\d]{2})+)+|\\/)+|\\?|#)?" +
"((\\?([-\\w~!$+|.,*:]|%[a-f\\d{2}])+=?" +
"([-\\w~!$+|.,*:=]|%[a-f\\d]{2})*)" +
"(&(?:[-\\w~!$+|.,*:]|%[a-f\\d{2}])+=?" +
"([-\\w~!$+|.,*:=]|%[a-f\\d]{2})*)*)*" +
"(#([-\\w~!$+|.,*:=]|%[a-f\\d]{2})*)?\\b");
Matcher matcher = pattern.matcher(input);
while (matcher.find()) {
result.add(matcher.group());
}
return result;
}
}