import java.util.*;
import java.io.*;
import java.util.regex.*;
public class AddressDectect {
public static void main(String[] args) throws IOException {
// TODO Auto-generated method stub
StringBuffer buffer = new StringBuffer();
try {
String encoding="GBK";
File file=new File("getUrl.txt");
if(file.isFile() && file.exists()){ //判断文件是否存在
InputStreamReader read = new InputStreamReader(
new FileInputStream(file),encoding);//考虑到编码格式
BufferedReader bufferedReader = new BufferedReader(read);
String lineTxt = null;
while((lineTxt = bufferedReader.readLine()) != null){
buffer.append(lineTxt);
}
read.close();
}else{
System.out.println("找不到指定的文件");
}
} catch (Exception e) {
System.out.println("读取文件内容出错");
e.printStackTrace();
}
String reg = "(<(\\s*?)a{1}[^>]*>.*?<(\\s*?)/(\\s*?)a(\\s*?)>)|(\\[(\\s*?)url{1}[^\\]]*\\].*?\\[(\\s*?)/(\\s*?)url(\\s*?)\\])";
Pattern pattern = Pattern.compile(reg);
Matcher matcher = pattern.matcher(buffer);
while(matcher.find()){
String result = matcher.group();
System.out.println(result);
}
}
}
目前还只是检索<a> 和<url>