import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.net.HttpURLConnection; import java.net.MalformedURLException; import java.net.URL; import java.net.URLConnection; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * 抓取邮箱 * @author lsh * */ public class GetEmail { public static void main(String[] args) throws IOException { URL url = new URL("https://book.douban.com/subject/24753651/discussion/58975313/"); HttpURLConnection conn = (HttpURLConnection) url.openConnection(); InputStream is = conn.getInputStream(); BufferedReader br = new BufferedReader(new InputStreamReader(is, "utf-8")); // 书写正则规则 String regex = "\\w+@[0-9a-z]{2,8}\\.com"; // 获得正则对象 Pattern compile = Pattern.compile(regex); // line 始终代表网页中一行数据 String line = br.readLine(); while(line != null){ // 正则对象和 要操作字符串关联 得到匹配引擎 Matcher matcher = compile.matcher(line); while(matcher.find()){ System.out.println("邮箱 : "+matcher.group()); } line = br.readLine(); } } }