/*
正则表达式-网页爬虫(蜘蛛),爬邮箱
*/
import java.io.*;
import java.util.regex.*;
import java.net.*;
class RegexTest2
{
public static void main(String[] args) throws Exception
{
//getMails();
getMails_1();
}
//获取网页中的邮件地址
public static void getMails_1() throws Exception
{
URL url=new URL("http://168.245.124.92:8080/myweb/mail.html");
URLConnection conn=url.openConnection();
BufferedReader bufIn=new BufferedReader(new InputStreamReader(conn.getInputStream()));
String line=null;
String reg="\\w+@\\w+(\\.\\w+)+";
Pattern p=Pattern.compile(reg);
while ((line=bufIn.readLine())!=null)
{
Matcher m=p.matcher(line);
while (m.find())
{
System.out.println(m.group());
}
}
}
/*获取指定文档中的邮件地址
使用获取功能,Pattern,Matcher
*/
public static void getMails() throws Exception
{
BufferedReader bufr=new BufferedReader(new FileReader("mails.txt"));
String line=null;
String reg="\\w+@\\w+(\\.\\w+)+";
Pattern p=Pattern.compile(reg);
while ((line=bufr.readLine())!=null)
{
Matcher m=p.matcher(line);
while (m.find())
{
System.out.println(m.group());
}
}
}
}