import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/*
* 网页爬虫(蜘蛛)
*
*/
public class RegexTest2 {
public static void main(String[] args) {
}
//获取网页中的邮件地址
/*
* 只要服务器开着就可以从服务器上的数据
*/
public static void getmails1() {
try {
URL url=new URL("指定的网站");
URLConnection conn=url.openConnection();
BufferedReader bufrin=new BufferedReader(new InputStreamReader(conn.getInputStream()));
String line=null;
String mailreg="[a-zA-Z0-9_]+@[a-zA-Z0-9]+(\\.[a-zA-Z]+){1,3}";
Pattern p=Pattern .compile(mailreg);
while ((line=bufrin.readLine())!=null) {
Matcher m=p.matcher(line);
while (m.find()) {
System.out.println(m.group());
}
}
} catch (MalformedURLException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
//获取指定文档中的邮件地址。使用获取功能。pattern matches
public static void getmails() {
try {
BufferedReader bufr=new BufferedReader(new FileReader("d:\\3.txt"));
String line=null;
String mailreg="[a-zA-Z0-9_]+@[a-zA-Z0-9]+(\\.[a-zA-Z]+){1,3}";
Pattern p=Pattern .compile(mailreg);
while ((line=bufr.readLine())!=null) {
Matcher m=p.matcher(line);
while (m.find()) {
System.out.println(m.group());
}
}
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}