抓取网页中的email地址
index.html为保存的网址,需要自行选择保存
// 抓取网页中的email地址
public class EmailSpider {
public static void main(String[] args) {
try {
BufferedReader br = new BufferedReader(new FileReader("E:\\gongfang\\JavaDemo\\resources\\index.html"));
String line = "";
while ((line = br.readLine())!=null) {
// 分析读取的这一行是否有我们需要的email地址
parse(line);
}
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
private static void parse(String lin) {
Pattern p = Pattern.compile("[\\w[.-]]+@[\\w[.-]]+\\.[\\w]+");
Matcher m = p.matcher(lin);
while(m.find()) {
// 如果是存到数据库的话就改这里
System.out.println(m.group());
}
}
}