package cn.zhengze;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class netbug {
/**
* @param args
* @throws IOException
*/
public static void main(String[] args) throws IOException {
File file = new File("mail.html");
String regex = "\\w+@[a-zA-Z0-9]+(\\.[a-zA-Z]{2,3}){1,3}";
List<String> mailList = getMails(file, regex);
for (String mail : mailList) {
System.out.println(mail);
}
}
private static List<String> getMails(File file, String regex)
throws IOException {
BufferedReader bufr = new BufferedReader(new FileReader(file));
Pattern p = Pattern.compile(regex);
List<String> list = new ArrayList<String>();
String line = null;
while ((line = bufr.readLine()) != null) {
Matcher m = p.matcher(line);
while (m.find()) {
list.add(m.group());
}
}
return list;
}
}
正则表达式,模拟网络爬虫小例子
最新推荐文章于 2021-09-28 17:17:32 发布