工作半年了,再回头看自己的博客的时候感慨良多,时至今日,完善这个抓去邮箱的程序,如果加上网络爬虫,完全是可以靠这段程序去分析网上的邮箱,从而进行批量发送邮件。
package grabmail;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Scanner;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class CaptrueEmailInPage {
public static List getEmail(String str){
List list = new ArrayList();
String email = "";
Pattern p = Pattern.compile("\\w+@(\\w+.)+[a-z]{2,3}");
//下面的校验太复杂,用于网页收集邮箱太耗时间,不如上面的效果好
// Pattern p = Pattern.compile("^\\s*\\w+(?:\\.{0,1}[\\w-]+)*@[a-zA-Z0-9]+(?:[-.][a-zA-Z0-9]+)*\\.[a-zA-Z]+\\s*$");
Matcher m = p.matcher(str);
while(m.find()){
email = m.group();
list.add(email);
System.out.println("***"+email);
}
return list;
}
public static String inputURL(){
Scanner s = new Scanner(System.in);
String url = s.nextLine();
return url;
}
public static String captrueUrl(String url) throws Exception{
URL urlStr = new URL(url);
HttpURLConnection conn = (HttpURLConnection) urlStr.openConnection();
InputStreamReader isReader = new InputStreamReader(conn.getInputStream(),"utf-8");
BufferedReader bfReader = new BufferedReader(isReader);
String line = "";
StringBuffer strBuffer = new StringBuffer();
while((line = bfReader.readLine()) != null){
strBuffer.append(line);
}
System.out.println(strBuffer);
return strBuffer.toString();
}
public static void main(String args[]){
System.out.println("请输入一个网址:(如:http://www.baidu.com)");
String url = inputURL();
System.out.println();
System.out.println();
try {
String str = captrueUrl(url);
List list = getEmail(str);
Iterator it = list.iterator();
System.out.println("抓取的邮箱的如下:");
while(it.hasNext()){
System.out.println(it.next());
}
} catch (Exception e) {
System.out.println("获取网址失败,原因:输入了非法网址!");
e.printStackTrace();
}
}
}
转载于:https://blog.51cto.com/ywj852752270/1208590