java获取网页上邮箱地址存放到.txt文件中
package com.test;
import java.io.*;
import java.net.*;
import java.util.*;
import java.util.regex.*;
public class GetEmailDemo {
/**
* 测试方法;
*
* @param args
*/
public static void main(String[] args) {
GetEmailDemo emailDemo = new GetEmailDemo();
@SuppressWarnings("unused")
String error = emailDemo
.getWebContent("http://www.tradesns.com/forum_messages.php?mid=24877&ccid=5906");
System.out.println("邮件地址查找完成...");
}
/**
* 获得网页中的源代码; 逐行解析;
*
* @param path
* @return
*/
private String getWebContent(String path) {
BufferedReader bufferedReader = null;
StringBuffer sb = new StringBuffer();
if (path != null && !"".equals(path)) {
try {
URL url = new URL(path);
bufferedReader = new BufferedReader(new InputStreamReader(
url.openStream()));
String line = null;
System.out.println("开始分析邮件地址...");
String filePath = "e:/email/";
File file = new File(filePath);
if (!file.exists()) {
file.mkdir();
}
String str2 = "";
while ((line = bufferedReader.readLine()) != null) {
String str = parse(line);
if (!str.equals("")) {
str2 += str;
}
}
if (!str2.equals("")) {
String[] arrstr = str2.split(" ");
FileWriter fw = new FileWriter(filePath + "email.txt");
PrintWriter pw = new PrintWriter(fw);
//去重复
Set<String> set = new TreeSet<String>();
for (String i : arrstr) {
set.add(i);
}
String[] dest = new String[set.size()];
Iterator<String> iter = set.iterator();
for (int i = 0; i < dest.length && iter.hasNext(); i++) {
dest[i] = iter.next();
}
//输出
for (int i = 0; i < dest.length; i++) {
pw.write(dest[i] + "\n");
}
pw.close();
fw.close();
}
} catch (MalformedURLException e) {
e.printStackTrace();
sb.append(e.toString());
} catch (IOException e) {
e.printStackTrace();
sb.append(e.toString());
} finally {
try {
bufferedReader.close();
} catch (IOException e) {
e.printStackTrace();
sb.append(e.toString());
}
}
}
return sb.toString();
}
/**
* 分析数据;
*
* @param line
*/
private String parse(String line) {
String str = "";
if (line != null && line.length() > 0) {
// 邮箱正则表达式;
String regexExpression = "[\\w[.-]]+@[\\w[.-]]+\\.[\\w]+";
Pattern pattern = Pattern.compile(regexExpression);
Matcher matcher = pattern.matcher(line);
try {
while (matcher.find()) {
String str2 = matcher.group();
if (!str2.equals("")) {
str += str2 + " ";
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
return str;
}