首先创建一个maven工程
项目结构示意图:
1.pom.xml中加入
org.springframework.boot
spring-boot-starter-web
org.apache.httpcomponents
httpclient
4.1.2
org.jsoup
jsoup
1.7.3
存储数据的实体类book:
package model;
public class Book {
private String bookID;
private String bookName;
private String bookPrice;
private String bookAuthor;
private String bookPulish;
private String bookYear;
public String getBookID() {
return bookID;
}
public void setBookID(String bookID) {
this.bookID = bookID;
}
public String getBookName() {
return bookName;
}
public void setBookName(String bookName) {
this.bookName = bookName;
}
public String getBookPrice() {
return bookPrice;
}
public void setBookPrice(String bookPrice) {
this.bookPrice = bookPrice;
}
public String getBookAuthor() {
return bookAuthor;
}
public void setBookAuthor(String bookAuthor) {
this.bookAuthor = bookAuthor;
}
public String getBookPulish() {
return bookPulish;
}
public void setBookPulish(String bookPulish) {
this.bookPulish = bookPulish;
}
public String getBookYear() {
return bookYear;
}
public void setBookYear(String bookYear) {
this.bookYear = bookYear;
}
}
httpUtil:
package util;
import java.io.IOException;
import org.apache.http.HttpResponse;
import org.apache.http.HttpStatus;
import org.apache.http.HttpVersion;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.message.BasicHttpResponse;
public class httpUtil{
public static HttpResponse getHtml(HttpClient httpclient, String url) throws IOException
{
HttpGet getMethod = new HttpGet(url); //get方法
HttpResponse response = new BasicHttpResponse(HttpVersion.HTTP_1_1,HttpStatus.SC_OK,“ok”); //response初始化
response = httpclient.execute(getMethod); //执行get方法
return response;
}
}
URLEntity:
package util;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.util.EntityUtils;
import parse.bookPrase;//?
import model.Book;
public class URLEntity {
public static List URLParse(HttpClient httpclient,String url) throws IOException
{
List getbooks = new ArrayList();
HttpResponse response = httpUtil.getHtml(httpclient, url);
int statusCode = response.getStatusLine().getStatusCode(); //获取状态码
if(statusCode == 200) //200为正常
{
String entity = EntityUtils.toString(response.getEntity(),“utf-8”);
getbooks = bookPrase.getData(entity);
EntityUtils.consume(response.getEntity()); //消耗实体类,实体类最后需要消耗
}
else
EntityUtils.consume(response.getEntity());
return getbooks;
}
}
连接数据库:
Mysql_source:
package db;
import javax.sql.DataSource;
import org.apache.tomcat.dbcp.dbcp2.BasicDataSource;
public class mysql_source {
public static DataSource getDataSource(String connectURI)
{
BasicDataSource ds = new BasicDataSource();
ds.setDriverClassName(“com.mysql.jdbc.Driver”);
ds.setUsername(“root”);
//ds.setPassword("");
ds.setUrl(connectURI);
return ds;
}
}
参数的传递和sql语句的插入mysql_control:
package db;
import java.sql.SQLException;
import java.util.List;
import javax.sql.DataSource;
import org.apache.commons.dbutils.QueryRunner;
import model.Book;
public class mysql_control {
static DataSource ds = mysql_source.getDataSource(“jdbc:mysql://127.0.0.1:3306/book”);
static QueryRunner qr = new QueryRunner(ds);
public static void executeInsert(List<Book> bookdatas) throws SQLException
{
Object[][] params = new Object[bookdatas.size()][5];
for(int i=0; i<params.length; i++)
{
params[i][0] = bookdatas.get(i).getBookID();
params[i][1] = bookdatas.get(i).getBookName();
params[i][2] = bookdatas.get(i).getBookAuthor();
params[i][3] = bookdatas.get(i).getBookPulish();
params[i][4] = bookdatas.get(i).getBookYear();
}
qr.batch("insert into bok_book(book_no,book_name,book_author,book_house,book_memo)values(?,?,?,?,?)", params);
System.out.println("成功插入" + bookdatas.size() + "条");
}
}
测试类bookmain:
package mian;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.http.client.HttpClient;
import org.apache.http.impl.client.DefaultHttpClient;
import util.URLEntity;
import db.mysql_control;
import model.Book;
public class bookMain{
static final Log logger = LogFactory.getLog(bookMain.class); //log4j
public static void main(String[] args) throws Exception {
HttpClient httpclient = new DefaultHttpClient(); //创建HttpClient
String url = "***"; //种子
List<Book> books = URLEntity.URLParse(httpclient, url); //通过URLEntity获取实体中的信息
for (Book book : books) {
logger.info("bookId:" + book.getBookID() + "\t" + "bookName:" + book.getBookName() + "\t" + "bookPrice:"
+ book.getBookPrice() + "\t"+"bookAuthor:" + book.getBookAuthor() + "\t" +"bookPulish:" + book.getBookPulish() + "\t");
}
mysql_control.executeInsert(books); //数据库添加数据
}
}
执行结果: