/*
需求:
从web.txt文件中存的网址里抓取邮箱地址, 存在mail.txt文件中.
思路:
1, 从web.txt中读取网址, 建立连接, 用SequenceInputStream将所有网页源文件存在source.txt中.
2, 调用getMails方法抓邮箱, 抓到的邮箱存在mail.txt中.
*/
import java.net.*;
import java.io.*;
import java.util.*;
import java.util.regex.*;
public class Spider
{
public static void main(String[] args) throws Exception
{
System.out.println("please wait..");
getWebSource();
System.out.println("get source success!");
getMails();
System.out.println("you have done.");
}
public static void getWebSource() throws Exception
{
Vector<InputStream> v = new Vector<InputStream>();
BufferedReader bufr =
new BufferedReader(new FileReader("web.txt"));
String line = null;
while((line=bufr.readLine()) != null)
{
URL url = new URL(line);
v.add(url.openStream());
}
Enumeration<InputStream> en = v.elements();
SequenceInp