Jsoup爬虫为节省写个WebDriverPool浏览器驱动池 减少驱动频繁打开和关闭引起的资源损耗
简单采用了享元模式
享元模式参考链接
https://blog.csdn.net/oneby1314/article/details/108680069
log4j2配置参考链接
https://blog.csdn.net/qq_44762676/article/details/120179581
依赖
<dependency>
<groupId>org.seleniumhq.selenium</groupId>
<artifactId>selenium-java</artifactId>
<version>3.141.59</version>
</dependency>
<!--log4j-->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-log4j2</artifactId>
<version>2.1.4.RELEASE</version><!--$NO-MVN-MAN-VER$-->
</dependency>
1.IWebDriverPool :产品的抽象类,定义了产品对象的内部和外部状态的规范,即前面所说的 FlyWeight
public interface IWebDriverPool {
/**
* 获取一个浏览器驱动,如果等待超过超时时间,将返回null
* @return 浏览器驱动对象
*/
public WebDriver getWebDriver();
/**
* 获得当前线程的连接库连接
* @return 浏览器驱动对象
*/
public WebDriver getCurrentConnecton();
/**
* 释放当前线程浏览器驱动
* @param driver 浏览器驱动对象
*/
public void releaseWebDriver(WebDriver driver);
/**
* 销毁清空当前驱动连接池
*/
public void destroy();
/**
* 连接池可用状态
* @return 连接池是否可用
*/
public boolean isActive();
/**
* 定时器,检查连接池
*/
public void checkPool();
/**
* 获取线程池活动连接数
* @return 线程池活动连接数
*/
public int getActiveNum();
/**
* 获取线程池空闲连接数
* @return 线程池空闲连接数
*/
public int getFreeNum();
}
2.WebDriverPool :具体的产品类,继承了 IWebDriverPool 接口,实现了具体的业务方法,即前面所说的 ConcreteFlyWeight
public class WebDriverPool implements IWebDriverPool {
private static final Logger logger = LogManager.getLogger(LogManager.ROOT_LOGGER_NAME);
private WebDriverBean webDriverBean = null;
/**
* 驱动池可用状态
*/
private Boolean isActive = true;
/**
* 空闲驱动池,由于读写操作较多,所以使用linklist
*/
private LinkedList<WebDriver> freeWebDriver = new LinkedList<>();
/**
* 活动驱动池,由于读写操作较多,所以使用linklist
*/
private LinkedList<WebDriver> activeWebDriver = new LinkedList<>();
/**
* 当前线程获得的连接
*/
private ThreadLocal<WebDriver> currentWebDriver = new ThreadLocal<>();
private WebDriverPool(){
super();
}
public static WebDriverPool createWebDriverPool(WebDriverBean webDriverBean){
WebDriverPool webDriverPool = new WebDriverPool();
webDriverPool.webDriverBean = webDriverBean;
for (int i = 0; i <webDriverPool.webDriverBean.getInitConnections() ; i++) {
try {
if (webDriverPool.webDriverBean.getDriverPath()!=null && webDriverPool.webDriverBean.getDriverPath().length()>1 ){
System.setProperty("webdriver.chrome.driver",webDriverPool.webDriverBean.getDriverPath());
}
ChromeOptions options = new ChromeOptions();
options.addArguments("--headless");
options.addArguments("--disable-gpu");
WebDriver driver = new ChromeDriver(options);
webDriverPool.freeWebDriver.add(driver);
}catch (Exception e){
logger.error("驱动池初始化失败"+e.getMessage());
return null;
}
}
webDriverPool.isActive = true;
return webDriverPool;
}
/**
* 检查驱动是否存活
* @param webDriver
* @return
*/
private Boolean isValidWebDriver(WebDriver webDriver) {
try {
if (webDriver==null){
return false;
}
}catch (Exception e){
e.printStackTrace();
}
return true;
}
private WebDriver newWebDriver(){
WebDriver webDriver = null;
try {
if (this.webDriverBean!= null){
if (this.webDriverBean.getDriverPath()!=null && this.webDriverBean.getDriverPath().length()>1 ){
System.setProperty("webdriver.chrome.driver",this.webDriverBean.getDriverPath());
}
ChromeOptions options = new ChromeOptions();
options.addArguments("--headless");
options.addArguments("--disable-gpu");
webDriver = new ChromeDriver(options);
}
}catch (Exception e){
logger.error("创建新的驱动失败");
}
return webDriver;
}
@Override
public synchronized WebDriver getWebDriver() {
WebDriver webDriver = null;
if (this.getActiveNum() < this.webDriverBean.getMaxConnections()){
if (this.getFreeNum()>0){
logger.info("空闲池中剩余驱动数为"+this.getFreeNum()+",直接获取驱动");
webDriver = this.freeWebDriver.pollFirst();
this.activeWebDriver.add(webDriver);
}else {
logger.info("空闲池中无驱动,创建新的驱动");
try {
webDriver = this.newWebDriver();
this.activeWebDriver.add(webDriver);
}catch (Exception e){
e.printStackTrace();
}
}
}else {
logger.info("当前已达最大驱动数");
long startTime = System.currentTimeMillis();
try {
this.wait(this.webDriverBean.getConninterval());
}catch (Exception e){
logger.error("线程等待被打断");
}
if (this.webDriverBean.getTimeout()!=0){
if (System.currentTimeMillis() - startTime > this.webDriverBean.getTimeout()){
return null;
}
}
webDriver = this.getWebDriver();
}
return webDriver;
}
@Override
public synchronized WebDriver getCurrentConnecton() {
WebDriver webDriver = this.currentWebDriver.get();
try {
if (!isValidWebDriver(webDriver)){
webDriver = this.getWebDriver();
}
}catch (Exception e){
logger.error("获取当前驱动失败"+e.getMessage());
}
return webDriver;
}
@Override
public synchronized void releaseWebDriver(WebDriver driver) {
logger.info(Thread.currentThread().getName()+"关闭连接:activeWebDriver.remove :"+driver);
this.activeWebDriver.remove(driver);
this.currentWebDriver.remove();
try {
if (isValidWebDriver(driver)){
freeWebDriver.add(driver);
}else {
freeWebDriver.add(this.newWebDriver());
}
}catch (Exception e){
logger.error("释放当前驱动失败"+e.getMessage());
}
this.notifyAll();
}
@Override
public synchronized void destroy() {
for (WebDriver webDriver:this.freeWebDriver){
try {
if (isValidWebDriver(webDriver)){
webDriver.quit();
}
}catch (Exception e){
logger.error(e.getMessage());
}finally {
webDriver.quit();
}
}
for (WebDriver webDriver:this.activeWebDriver){
try {
if (isValidWebDriver(webDriver)){
webDriver.quit();
}
}catch (Exception e){
logger.error(e.getMessage());
}finally {
webDriver.quit();
}
}
this.isActive = false;
this.freeWebDriver.clear();
this.activeWebDriver.clear();
logger.info("驱动池已经摧毁");
}
@Override
public boolean isActive() {
return this.isActive;
}
@Override
public void checkPool() {
ScheduledExecutorService ses = new ScheduledThreadPoolExecutor(2);
ses.scheduleAtFixedRate(new TimerTask() {
@Override
public void run() {
logger.info("空闲驱动数"+getFreeNum());
logger.info("活动驱动数"+getActiveNum());
}
},1,30,TimeUnit.SECONDS);
ses.scheduleAtFixedRate(new checkFreepools(this),1,5,TimeUnit.SECONDS);
}
@Override
public int getActiveNum() {
return this.activeWebDriver.size();
}
@Override
public int getFreeNum() {
return this.freeWebDriver.size();
}
/**
* 驱动池内部要保证指定最小数量的驱动数
*/
class checkFreepools extends TimerTask {
private WebDriverPool webDriverPool = null;
public checkFreepools(WebDriverPool wp) {
this.webDriverPool = wp;
}
@Override
public void run() {
if (this.webDriverPool != null && this.webDriverPool.isActive()) {
int poolstotalnum = webDriverPool.getFreeNum()
+ webDriverPool.getActiveNum();
int subnum = webDriverPool.webDriverBean.getMinConnections()
- poolstotalnum;
if (subnum > 0) {
logger.info("扫描并维持空闲池中的最小驱动数,需补充" + subnum + "个驱动");
for (int i = 0; i < subnum; i++) {
try {
webDriverPool.freeWebDriver
.add(webDriverPool.newWebDriver());
} catch (Exception e) {
logger.error("补充驱动失败"+e.getMessage());
}
}
}
}
}
}
}
3.WebDriverManager :构建产品池,即前面所说的 FlyWeightFactory
@Component
public class WebDriverManager {
private static final Logger log = LogManager.getLogger(LogManager.ROOT_LOGGER_NAME);
private static WebDriverManager webDriverManager = null;
@Autowired
WebDriverBean webDriverBean;
private WebDriverPool webDriverPool = null;
// 使用spring容器控制,不再自己使用单例模式创建
// public static WebDriverManager getInstance(){
// if (webDriverManager==null){
// synchronized (WebDriverManager.class){
// if (webDriverManager==null){
// webDriverManager = new WebDriverManager();
// }
// }
// }
// return webDriverManager;
// }
public WebDriverPool createWebDriverPool(){
if (webDriverPool==null){
webDriverPool = WebDriverPool.createWebDriverPool(webDriverBean);
webDriverPool.checkPool();
}
return webDriverPool;
}
public WebDriver getWebDriver(){
if (webDriverPool==null){
this.createWebDriverPool();
}
return webDriverPool.getWebDriver();
}
public void closeWebDriver(WebDriver driver){
if (driver!=null && webDriverPool!=null){
webDriverPool.releaseWebDriver(driver);
}
}
public void destroy(){
if (webDriverPool!=null){
webDriverPool.destroy();
}
}
}
4.WebDriverBean :实体类
属性自动注入依赖
<!--configuration-->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-configuration-processor</artifactId>
<optional>true</optional>
</dependency>
启动类添加开启配置注解
@EnableConfigurationProperties
application.yml添加配置
由于我chromdrive配置了环境变量所以这里为null
webdriver:
driverPath: null
maxConnections: 6
minConnections: 5
initConnections: 5
conninterval: 1000
timeout: 0
@Component
@ConfigurationProperties(prefix = "webdriver")
public class WebDriverBean {
/**
* 浏览器驱动路径
*/
private String driverPath;
/**
* 连接池最大连接数
*/
private int maxConnections ;
/**
* 连接池最小连接数
*/
private int minConnections;
/**
* 连接池初始连接数
*/
private int initConnections;
/**
* 重连间隔时间 ,单位毫秒
*/
private int conninterval ;
/**
* 获取连接超时时间 ,单位毫秒,0永不超时
*/
private int timeout ;
public WebDriverBean() {
}
public WebDriverBean(String driverPath, int maxConnections, int minConnections, int initConnections, int conninterval, int timeout) {
this.driverPath = driverPath;
this.maxConnections = maxConnections;
this.minConnections = minConnections;
this.initConnections = initConnections;
this.conninterval = conninterval;
this.timeout = timeout;
}
public String getDriverPath() {
return driverPath;
}
public void setDriverPath(String driverPath) {
this.driverPath = driverPath;
}
public int getMaxConnections() {
return maxConnections;
}
public void setMaxConnections(int maxConnections) {
this.maxConnections = maxConnections;
}
public int getMinConnections() {
return minConnections;
}
public void setMinConnections(int minConnections) {
this.minConnections = minConnections;
}
public int getInitConnections() {
return initConnections;
}
public void setInitConnections(int initConnections) {
this.initConnections = initConnections;
}
public int getConninterval() {
return conninterval;
}
public void setConninterval(int conninterval) {
this.conninterval = conninterval;
}
public int getTimeout() {
return timeout;
}
public void setTimeout(int timeout) {
this.timeout = timeout;
}
@Override
public String toString() {
return "WebDriverBean{" +
"driverPath='" + driverPath + '\'' +
", maxConnections=" + maxConnections +
", minConnections=" + minConnections +
", initConnections=" + initConnections +
", conninterval=" + conninterval +
", timeout=" + timeout +
'}';
}
}
5.Test:测试
@SpringBootTest
class WebDriverPoolTest{
@Autowired
WebDriverManager webDriverManager;
@Test
void WebDriverTest() throws InterruptedException {
webDriverManager.createWebDriverPool();
List<Thread> threadlist=new ArrayList<Thread>();
for(int i=1;i<=7;i++){
Thread subThread = new Thread(new Runnable() {
@Override
public void run() {
WebDriver webDriver = webDriverManager.getWebDriver();
// webDriverPool.releaseWebDriver(webDriver);
try {
Thread.sleep(3000);
} catch (InterruptedException e) {
e.printStackTrace();
}
webDriverManager.closeWebDriver(webDriver);
}
}, "thread name"+i);
subThread.start();
threadlist.add(subThread);
}
Thread.sleep(10000);
webDriverManager.destroy();
}
}
测试结果
Starting ChromeDriver 92.0.4515.43 (8c61b7e2989f2990d42f859cac71319137787cce-refs/branch-heads/4515@{#306}) on port 30529
Only local connections are allowed.
Please see https://chromedriver.chromium.org/security-considerations for suggestions on keeping ChromeDriver safe.
ChromeDriver was started successfully.
[1631082176.018][WARNING]: This version of ChromeDriver has not been tested with Chrome version 93.
14:22:56.046 [Forwarding newSession on session null to remote] INFO org.openqa.selenium.remote.ProtocolHandshake - Detected dialect: W3C
Starting ChromeDriver 92.0.4515.43 (8c61b7e2989f2990d42f859cac71319137787cce-refs/branch-heads/4515@{#306}) on port 46228
Only local connections are allowed.
Please see https://chromedriver.chromium.org/security-considerations for suggestions on keeping ChromeDriver safe.
ChromeDriver was started successfully.
[1631082177.057][WARNING]: This version of ChromeDriver has not been tested with Chrome version 93.
14:22:57.070 [Forwarding newSession on session null to remote] INFO org.openqa.selenium.remote.ProtocolHandshake - Detected dialect: W3C
Starting ChromeDriver 92.0.4515.43 (8c61b7e2989f2990d42f859cac71319137787cce-refs/branch-heads/4515@{#306}) on port 6189
Only local connections are allowed.
Please see https://chromedriver.chromium.org/security-considerations for suggestions on keeping ChromeDriver safe.
ChromeDriver was started successfully.
[1631082178.075][WARNING]: This version of ChromeDriver has not been tested with Chrome version 93.
14:22:58.090 [Forwarding newSession on session null to remote] INFO org.openqa.selenium.remote.ProtocolHandshake - Detected dialect: W3C
Starting ChromeDriver 92.0.4515.43 (8c61b7e2989f2990d42f859cac71319137787cce-refs/branch-heads/4515@{#306}) on port 7056
Only local connections are allowed.
Please see https://chromedriver.chromium.org/security-considerations for suggestions on keeping ChromeDriver safe.
ChromeDriver was started successfully.
[1631082179.087][WARNING]: This version of ChromeDriver has not been tested with Chrome version 93.
14:22:59.099 [Forwarding newSession on session null to remote] INFO org.openqa.selenium.remote.ProtocolHandshake - Detected dialect: W3C
Starting ChromeDriver 92.0.4515.43 (8c61b7e2989f2990d42f859cac71319137787cce-refs/branch-heads/4515@{#306}) on port 14195
Only local connections are allowed.
Please see https://chromedriver.chromium.org/security-considerations for suggestions on keeping ChromeDriver safe.
ChromeDriver was started successfully.
[1631082180.101][WARNING]: This version of ChromeDriver has not been tested with Chrome version 93.
14:23:00.118 [Forwarding newSession on session null to remote] INFO org.openqa.selenium.remote.ProtocolHandshake - Detected dialect: W3C
14:23:00.121 [thread name1] INFO - 空闲池中剩余驱动数为5,直接获取驱动
14:23:00.122 [thread name7] INFO - 空闲池中剩余驱动数为4,直接获取驱动
14:23:00.122 [thread name6] INFO - 空闲池中剩余驱动数为3,直接获取驱动
14:23:00.122 [thread name3] INFO - 空闲池中剩余驱动数为2,直接获取驱动
14:23:00.122 [thread name4] INFO - 空闲池中剩余驱动数为1,直接获取驱动
14:23:00.122 [thread name5] INFO - 空闲池中无驱动,创建新的驱动
Starting ChromeDriver 92.0.4515.43 (8c61b7e2989f2990d42f859cac71319137787cce-refs/branch-heads/4515@{#306}) on port 40871
Only local connections are allowed.
Please see https://chromedriver.chromium.org/security-considerations for suggestions on keeping ChromeDriver safe.
ChromeDriver was started successfully.
[1631082181.119][WARNING]: This version of ChromeDriver has not been tested with Chrome version 93.
14:23:01.132 [Forwarding newSession on session null to remote] INFO org.openqa.selenium.remote.ProtocolHandshake - Detected dialect: W3C
14:23:01.132 [pool-3-thread-1] INFO - 空闲驱动数0
14:23:01.132 [pool-3-thread-1] INFO - 活动驱动数5
14:23:01.133 [thread name2] INFO - 当前已达最大驱动数
14:23:02.147 [thread name2] INFO - 当前已达最大驱动数
14:23:03.127 [thread name6] INFO - thread name6关闭连接:activeWebDriver.remove :ChromeDriver: chrome on WINDOWS (ef15ab2bd4382aab52c8a66fd88f1a0d)
14:23:03.128 [thread name2] INFO - 空闲池中剩余驱动数为1,直接获取驱动
14:23:03.128 [thread name4] INFO - thread name4关闭连接:activeWebDriver.remove :ChromeDriver: chrome on WINDOWS (7fd4408957968142f487061c8481ba6e)
14:23:03.128 [thread name7] INFO - thread name7关闭连接:activeWebDriver.remove :ChromeDriver: chrome on WINDOWS (88144a4a649e5bf0b24a0cd83839e5e0)
14:23:03.129 [thread name3] INFO - thread name3关闭连接:activeWebDriver.remove :ChromeDriver: chrome on WINDOWS (1ae1a8dca553645ad6363450a5f80361)
14:23:03.129 [thread name1] INFO - thread name1关闭连接:activeWebDriver.remove :ChromeDriver: chrome on WINDOWS (9e8bb8350deba582929f191783b934f5)
14:23:04.141 [thread name5] INFO - thread name5关闭连接:activeWebDriver.remove :ChromeDriver: chrome on WINDOWS (4c3cc87728a7e1d0417a0228d072e55a)
14:23:06.139 [thread name2] INFO - thread name2关闭连接:activeWebDriver.remove :ChromeDriver: chrome on WINDOWS (ef15ab2bd4382aab52c8a66fd88f1a0d)
14:23:13.413 [main] INFO - 驱动池已经摧毁
14:23:13.431 [Thread-3] INFO org.springframework.scheduling.concurrent.ThreadPoolTaskScheduler - Shutting down ExecutorService 'taskScheduler'
Process finished with exit code 0