-
问题背景: 做一个爬虫,从网上爬书籍,爬下来以后插入数据库
- 问题设计: 用多个线程同时从网上爬书,爬下来的书使用一个线程作为插入,这些线程共享同一个书籍队列,并且同时开始运行
书籍POJO
package thread;
public class Book {
private Integer bookId;
private String bookName;
public Integer getBookId() {
return bookId;
}
public void setBookId(Integer bookId) {
this.bookId = bookId;
}
public String getBookName() {
return bookName;
}
public void setBookName(String bookName) {
this.bookName = bookName;
}
}
爬虫线程
package thread;
import java.util.List;
import java.util.Queue;
public class CrawlerThread extends Thread {
private List<Integer> bookIdList; //待爬书籍的Id号
private Queue<Book> bookQueue; //爬完书籍的共享存储队列
public CrawlerThread(List<Integer> bookIdList, Queue<Book> bookQueue) {
this.bookIdList = bookIdList;
this.bookQueue = bookQueue;
}
public void run() {
for(int i = 0; i < bookIdList.size(); i++) {
Book book = new Book(); //把这两步当做爬虫爬书的过程
book.setBookId(bookIdList.get(i));
book.setBookName("书名" + book.getBookId());
try {
sleep((long) (Math.random() * 1000 * 1000)); //随机sleep 100 - 1000 秒,模拟爬虫爬书时的延时过程
} catch (InterruptedException e) {
e.printStackTrace();
}
bookQueue.offer(book); //爬完的书籍插入共享队列
}
}
}
插入线程
package thread;
import java.util.Queue;
public class InsertThread extends Thread {
private Queue<Book> bookQueue; //爬完书籍的共享存储队列
public InsertThread(Queue<Book> bookQueue) {
this.bookQueue = bookQueue;
}
public void run() {
int timer = 0; //超时计时器
while(timer < 30) { //如果连续30分钟bookQueue均为空,则超时,线程结束
if(bookQueue.size() != 0) { //如果队列不为空
Book book;
while((book = bookQueue.poll()) != null) {
System.out.println(book.getBookName()); //把这步当成插入数据库吧
}
timer = 0; //超时计时器清零
} else {
try {
sleep(60 * 1000); //等待爬虫一分钟
} catch (InterruptedException e) {
e.printStackTrace();
}
timer++; //timer时间+1
}
}
}
}
主程序
package thread;
import java.util.ArrayList;
import java.util.List;
import java.util.Queue;
import java.util.concurrent.ConcurrentLinkedQueue;
public class Start {
public void main(String[] args) {
Queue<Book> bookQueue = new ConcurrentLinkedQueue<Book>(); //使用线程安全的队列作为共享书籍队列
//构造待爬书籍Id号
List<Integer> bookIdList = new ArrayList<Integer>();
bookIdList.add(1);
bookIdList.add(2);
bookIdList.add(3);
bookIdList.add(4);
bookIdList.add(5);
CrawlerThread ct = new CrawlerThread(bookIdList, bookQueue); //此处你可以将Id列表分为多分,分配给多个线程爬,我就不写了,懒..- -
InsertThread it = new InsertThread(bookQueue); //插入数据库线程
ct.start();
it.start();
}
}