druid.properties
jdbc.driverClassName=com.mysql.jdbc.Driver
jdbc.url=jdbc:mysql://localhost/test?useSSL=false
jdbc.user=root
jdbc.password=asdf
pom.xml
<project
xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<parent>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-parent</artifactId>
<version>1.7.6</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<packaging>jar</packaging>
<name>SLF4J LOG4J-12 Binding</name>
<description>SLF4J LOG4J-12 Binding</description>
<url>http://www.slf4j.org</url>
<dependencies>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</dependency>
<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<configuration>
<archive>
<manifestEntries>
<Bundle-Version>${parsedVersion.osgiVersion}</Bundle-Version>
<Bundle-Description>${project.description}</Bundle-Description>
<Implementation-Version>${project.version}</Implementation-Version>
</manifestEntries>
<manifestFile>${project.build.outputDirectory}/META-INF/MANIFEST.MF</manifestFile>
</archive>
</configuration>
</plugin>
</plugins>
</build>
</project>
spring.xml
<bean id="dataSource" class="com.alibaba.druid.pool.DruidDataSource" init-method="init" destroy-method="close">
<!-- 基本属性 url、user、password -->
<property name="url" value="${jdbc.url}" />
<property name="username" value="${jdbc.user}" />
<property name="password" value="${jdbc.password}" />
<!-- 配置初始化大小、最小、最大 -->
<property name="initialSize" value="1" />
<property name="minIdle" value="1" />
<property name="maxActive" value="20" />
<!-- 配置获取连接等待超时的时间 -->
<property name="maxWait" value="60000" />
<!-- 配置间隔多久才进行一次检测,检测需要关闭的空闲连接,单位是毫秒 -->
<property name="timeBetweenEvictionRunsMillis" value="60000" />
<!-- 配置一个连接在池中最小生存的时间,单位是毫秒 -->
<property name="minEvictableIdleTimeMillis" value="300000" />
<property name="validationQuery" value="SELECT 'x'" />
<property name="testWhileIdle" value="true" />
<property name="testOnBorrow" value="false" />
<property name="testOnReturn" value="false" />
<!-- 打开PSCache,并且指定每个连接上PSCache的大小 -->
<property name="poolPreparedStatements" value="true" />
<property name="maxPoolPreparedStatementPerConnectionSize" value="20" />
<!-- 配置监控统计拦截的filters,去掉后监控界面sql无法统计 -->
<property name="filters" value="stat" />
</bean>
StoreMysql
package com.crawl;
import com.crawl.util.HttpClientUtil;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.IOException;
import static com.crawl.dao.dao.dbTablesInit;
import static com.crawl.dao.dao.insert;
public class StoreMysql {
public static void operateMysql(String url) throws IOException {
String content = HttpClientUtil.getContent(url);
Document doc = Jsoup.parse(content); // 解析网页 得到文档对象
dbTablesInit();
Elements hrefElements = doc.select("#list dl dd a");
for (Element e : hrefElements) {
String urlIndex = "http://www.biquge.com.tw" + e.attr("href");
String contentIndex = HttpClientUtil.getContent(urlIndex);
Document docIndex = Jsoup.parse(contentIndex); // 解析网页 得到文档对象
// 获取章节名
Elements chapterElements = docIndex.getElementsByTag("h1"); // 根据tag名称来查询DOM
Element chapterElement = chapterElements.get(0);
String chapter = chapterElement.text();
String ct = docIndex.select("#content").text().replaceAll("\\s+","\r\n");
insert(chapter,ct);
}
}
}
HttpClientUtil
package com.crawl.util;
import org.apache.http.HttpEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import java.io.IOException;
public class HttpClientUtil {
private static CloseableHttpClient httpClient;
public static String getContent(String url) {
String content= "";
CloseableHttpResponse response=null;
try {
//即将访问的url
// String url = "http://www.biquge.com.tw/17_17380/";
//使用默认配置的httpclient
httpClient = HttpClients.createDefault();
//执行请求
response = getResponse(url);
//打印请求的实体内容 返回json格式
HttpEntity entity = response.getEntity();
content=EntityUtils.toString(entity, "GBK");
response.close();
} catch (IOException e) {
e.printStackTrace();
}
return content;
}
public static CloseableHttpResponse getResponse(String url) throws IOException {
HttpGet httpGet = new HttpGet(url);
CloseableHttpResponse response = httpClient.execute(httpGet);
return response;
}
}
DruidTest
package com.crawl.druid;
import com.alibaba.druid.pool.DruidDataSource;
import java.io.IOException;
import java.sql.*;
import java.util.Properties;
public class DruidTest {
private static DruidDataSource druidDataSource;
/**
* 读取配置文件,并且初始化连接池
*/
private static void init() {
Properties prop = new Properties();
try {
prop.load(DruidTest.class.getResourceAsStream("/druid.properties"));
//声明DruidDataSource
druidDataSource = new DruidDataSource();
druidDataSource.setDriverClassName(prop.getProperty("jdbc.driverClassName"));
druidDataSource.setUrl(prop.getProperty("jdbc.url"));
druidDataSource.setUsername(prop.getProperty("jdbc.user"));
druidDataSource.setPassword(prop.getProperty("jdbc.password"));
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* 获得Connection
*
* @return Connection
*/
public synchronized static Connection getConnection() {
if (druidDataSource == null) {
init();
}
try {
return druidDataSource.getConnection();
} catch (SQLException e) {
e.printStackTrace();
}
return null;
}
}
Dao
package com.crawl.dao;
import com.crawl.druid.DruidTest;
import java.io.IOException;
import java.sql.*;
import java.util.Properties;
import static com.crawl.druid.DruidTest.getConnection;
//import static com.crawl.dbcp.DBCPTest.getConnection;
public class dao {
public static void dbTablesInit() {
ResultSet rs = null;
// Properties p = new Properties();
Connection cn = getConnection();
try {
rs = cn.getMetaData().getTables(null, null, "zhoulin", null);
// p.load(dao.class.getResourceAsStream("/dbconfig.properties"));
Statement st = cn.createStatement();
//不存在url表
if (!rs.next()) {
//创建url表
// st.execute(p.getProperty("createZhouLinTable"));
String sql = "CREATE TABLE `zhoulin` (`chapter` varchar(255) NOT NULL,`content` text)ENGINE=MyISAM DEFAULT CHARSET=utf8";
st.executeUpdate(sql);
} else {
System.out.println("zhoulin表已存在");
}
rs.close();
st.close();
cn.close();
} catch (SQLException e) {
e.printStackTrace();
}
// catch (IOException e) {
// e.printStackTrace();
// }
}
// public static void main(String[] args) {
// try {
// Connection cnection = getConnection();
// if (cnection != null) {
// String sql = "select * from t_user2";
// PreparedStatement preparedStatement = cnection.prepareStatement(sql);
// ResultSet resultSet = preparedStatement.executeQuery();
// while (resultSet.next()) {
// System.out.printf("%s %s\n", resultSet.getString("user_name"), resultSet.getString("password"));
// }
// }else{
// System.out.println("获取Connection失败");
// }
// } catch (SQLException e) {
// e.printStackTrace();
// }
// }
public static void insert(String chapter, String content) {
Connection cn = getConnection();
String sql = " INSERT INTO zhoulin (chapter, content) VALUES (?,?)";
try {
//表示预编译的sql对象
PreparedStatement preparedStatement = cn.prepareStatement(sql);
preparedStatement.setString(1, chapter);
preparedStatement.setString(2, content);
preparedStatement.executeUpdate();
preparedStatement.close();
cn.close();
} catch (SQLException e) {
e.printStackTrace();
}
}
}
DBCPTest
package com.crawl.dbcp;
import org.apache.commons.dbcp2.BasicDataSource;
import javax.jws.Oneway;
import java.io.IOException;
import java.sql.*;
import java.util.Properties;
public class DBCPTest {
// BasicDataSource 也就是DBCP所使用数据源
private static BasicDataSource basicDataSource;
/**
* 读取配置文件,并且初始化连接池
*/
private static void init() {
Properties properties = new Properties();
try {
// 加载properties文件
properties.load(DBCPTest.class.getResourceAsStream("/dbconfig.properties"));
// 新建一个BasicDataSource
basicDataSource = new BasicDataSource();
// 设置对应的参数
basicDataSource.setUrl(properties.getProperty("db.url"));
basicDataSource.setDriverClassName(properties.getProperty("db.driverClassName"));
basicDataSource.setUsername(properties.getProperty("db.username"));
basicDataSource.setPassword(properties.getProperty("db.password"));
basicDataSource.setInitialSize(Integer.parseInt(properties.getProperty("dataSource.initialSize")));
basicDataSource.setMaxIdle(Integer.parseInt(properties.getProperty("dataSource.maxIdle")));
basicDataSource.setMinIdle(Integer.parseInt(properties.getProperty("dataSource.minIdle")));
basicDataSource.setMaxWaitMillis(Long.parseLong(properties.getProperty("dataSource.maxWait")));
basicDataSource.setMaxTotal(Integer.parseInt(properties.getProperty("dataSource.maxActive")));
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* 获得Connection
*
* @return Connection
*/
public synchronized static Connection getConnection() {
if (basicDataSource == null) {
init();
}
try {
return basicDataSource.getConnection();
} catch (SQLException e) {
e.printStackTrace();
}
return null;
}
}
问题:配置文件user多写了空格
问题:druid难 解决:dbcp入手学习
至此本周基本实现爬取小说内容到mysql中。