本文是对博客 下雨天没带伞–JAVA实现网页爬虫及将数据写入数据库 –http://blog.csdn.net/sinat_38224744/article/details/70652767 中代码的理解和补充,并使用 Navicat for MySQL 软件实现对数据的可视化操作。代码分成两部分:Web.java + Jdbc.java
一、Web.java
/**
* 原博客:http://blog.csdn.net/sinat_38224744/article/details/70652767
* (1)设置URL、URLConnection、BufferedReader
* (2)设置正则表达式,通过获取的数据流进行解析
* (3)将符合匹配要求的数据存放到list数组中和数据库中
* (4)List<String> list = new ArrayList<String>();
* -->http://blog.csdn.net/u010340178/article/details/53507964
* (5) 对 " \\w+@\\w+(\\.\\w+)+ "的理解:
* -->https://zhidao.baidu.com/question/875897286591066732.html
*/
package web_one;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Web {
public static void main(String[] args) throws IOException{
// TODO Auto-generated method stub
web();
}
private static void web( ) throws IOException{
// TODO Auto-generated method stub
URL url = new URL("http://bbs.tianya.cn/post-enterprise-758850-1.shtml");
URLConnection connection = url.openConnection();
BufferedReader buffer = new BufferedReader(new InputStreamReader( connection.getInputStream() ) );
String line = null;
String mailreg = "\\w+@\\w+(\\.\\w+)+"; //Email的通配符
Pattern pattern = Pattern.compile(mailreg);
//matcher:匹配
while (( line = buffer.readLine() ) != null) {
Matcher matcher = pattern.matcher(line);
while ( matcher.find() ) {
String string = matcher.group();
List<String> list = new ArrayList<String>();
list.add(string);
for ( String string2 : list ) {
Jdbc.insert(list);
System.out.println( string2 );
}
}
}
}
}
二、Jdbc.java
/**
* (1)mysql数据库的连接
* (2)数据库的操作:http://blog.csdn.net/sinat_38224744/article/details/70652612
* (3)PreparedStatement.executeUpdate() --> (a)SQL数据操作语言(DML)语句的行计数或
* --> (b)0不返回的SQL语句
* (4)insert into QQ (email) values(?) --> 往表QQ中添加属性email
*/
package web_one;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.util.List;
public class Jdbc {
//通过jdbc连接数据库
private static Connection getConn() {
String driver = "com.mysql.jdbc.Driver";
//下面的变量设置根据自己的情况
String url = "jdbc:mysql://localhost:3306/webone"; //数据库是webone
String username = "mysql的用户名";
String password = "mysql的密码";
Connection conn = null;
try {
Class.forName(driver); //classLoader,加载对应驱动
conn = (Connection) DriverManager.getConnection(url, username, password);
} catch (ClassNotFoundException e) {
e.printStackTrace();
} catch (SQLException e) {
e.printStackTrace();
}
return conn;
}
static int insert( List<String> list ) {
Connection conn = getConn();
int i = 0;
//往表QQ中添加属性email
String sql = "insert into QQ (email) values(?)";
PreparedStatement pstmt;
try {
//表示预编译的sql对象
pstmt = (PreparedStatement) conn.prepareStatement(sql);
for (int j = 0; j < list.size(); j++) {
pstmt.setString(1, list.get(j));
i = pstmt.executeUpdate();
pstmt.close();
conn.commit();
conn.close();
}
} catch (SQLException e) {
e.printStackTrace();
}
return i;
}
}
三、 Navicat for MySQL
运行截图:
参考资料:
(1)http://www.pan66.com/show/2373054.html
(2)http://blog.csdn.net/jgirl_333/article/details/54925219
(3)http://jingyan.baidu.com/article/9faa7231b031b8473c28cb34.html
(4)http://jingyan.baidu.com/article/a3a3f81188824c8da2eb8ae2.html
(5)http://blog.csdn.net/sinat_38224744/article/details/70652612