/*
网页爬虫(蜘蛛)
*/
import java.io.*;
import java.util.regex.*;
import java.net.*;
class RegexTest2
{
public static void main(String[] args) throws Exception
{
//getMail();
getTomcatMail();
}
//获取指定文件中的邮箱地址
//使用获取功能 Pattern Matcher
public static void getMail()throws Exception
{
BufferedReader bufr = new BufferedReader(new FileReader("mail.txt"));
String line=null;
Pattern p = Pattern.compile("[a-zA-Z0-9_]+@[a-z]+(\\.[a-z]+)+");
while((line=bufr.readLine())!=null)
{
Matcher m = p.matcher(line);
while(m.find())
{
System.out.println(m.group());
}
}
}
//获取Tomcat服务器上网页mail.html内容中的邮箱
public static void getTomcatMail() throws Exception
{
//URL url = new URL("http://localhost:8080/myweb/mail.html");
URL url = new URL("http://zhidao.baidu.com/link?url=xF77TDRnMmNWlPTMJIdwcZn4CEHrKjhJBpCPXh23Dwxm-wGMKZXPkA8JkwmIXApGJBHX5xZJOhXa9jQRX9PkI_");
URLConnection conn = url.openConnection();
InputStream in = conn.getInputStream();
BufferedReader bufrIn = new BufferedReader(new InputStreamReader(in));
Pattern p = Pattern.compile("[a-zA-Z0-9_]+@[a-z0-9]+(\\.[a-z]+)+");
String line = null;
while((line=bufrIn.readLine())!=null)
{
Matcher m = p.matcher(line);
while(m.find())
{
System.out.println(m.group());
}
}
}
}
day25/RegexTest2.java
最新推荐文章于 2023-01-12 00:21:53 发布