网页爬虫import java.net.*;
import java.io.*;
import java.util.regex.*;
class findMail
{
public static void main(String[] args) throws Exception
{
//读取流 关联文件
//BufferedReader bin = new BufferedReader(new FileReader("mail.txt"));
//获取网页上的数据 需要获取输入流 来自网页端的 URLConnection的getInputStream()来获取输入流
URL url = new URL("http://127.0.0.1:8080/myweb/mail.html");
URLConnection conn = url.openConnection();
BufferedReader bin = new BufferedReader(new InputStreamReader(conn.getInputStream()));
String line = null;
//定义 邮箱 格式 正则规则
String mailreg = "\\w{2,13}@\\w{2,5}(\\.[a-z]+)+";
//正则规则 封装模式 对象
Pattern p = Pattern.compile(mailreg);
while ((line = bin.readLine())!=null)
{
Matcher m = p.matcher(line);//将模式与字符串关联
if (m.find())
{
System.out.println(m.group());
}
//System.out.println(line);
}
}
}