import java.io.BufferedReader;
import java.io.FileReader;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.Statement;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Crawler {
public static void main(String[] args) {
getMail();
}
public static void getMail() {
try {
URL url = new URL("http://localhost:8080/hibernate/");
URLConnection conn = url.openConnection();
BufferedReader reader = new BufferedReader(new InputStreamReader(conn.getInputStream()));
// BufferedReader reader = new BufferedReader(new FileReader("src/mail.txt"));
String mailreg = "\\w+@\\w+(\\.\\w+)+";
Pattern pattern = Pattern.compile(mailreg);
String line;
Matcher matcher;
Connection connection = null;
Statement statement = null;
Class.forName("com.mysql.jdbc.Driver");
connection = DriverManager.getConnection("jdbc:mysql://localhost:3306/test","root","root");
statement = connection.createStatement();
while((line = reader.readLine()) != null) {
matcher = pattern.matcher(line);
while(matcher.find()) {
String sql = "insert into email values(" + null + ",'" + matcher.group() + "')";
statement.executeUpdate(sql);
System.out.println(matcher.group());
}
}
statement.close();
connection.close();
reader.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
使用爬虫技术抓取文件或页面中的email并存入数据库
最新推荐文章于 2022-07-11 07:35:00 发布