package Ray.regex;
import java.io.BufferedReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;import java.io.OutputStream;
import java.io.PrintWriter;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Crawler {
public static void main(String[] args) {
PrintWriter pw =null;
InputStream is = null;
BufferedReader bReader = null;
try {
URL url = new URL("http://tieba.baidu.com/p/1321631274");
is= url.openStream();
bReader = new BufferedReader(new InputStreamReader(is, "utf-8"));
String str = null;
String regex = "[1-9a-zA-Z][0-9a-zA-Z_]*@[1-9a-zA-Z][0-9a-zA-Z_]*(\\.[a-zA-Z]+){1,3}";
Pattern p = Pattern.compile(regex);
pw = new PrintWriter(new FileWriter("E:/A/hello.txt"));
while((str=bReader.readLine())!=null){
// System.out.println(str);
Matcher matcher = p.matcher(str);
while(matcher.find()){
String group = matcher.group();
pw.println(group);
System.out.println(group);
pw.flush();
}
}
} catch (MalformedURLException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}finally{
try {
bReader.close();
pw.close();
is.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
学习正则表达式的时候简单测试了一下网络上爬取邮箱,简单写了一个demo