package com.bicon.panwen;
import com.bicon.config.RedisBloomFilter;
import com.bicon.dao.PanwenMapper;
import com.bicon.xpath.exception.XpathSyntaxErrorException;
import com.bicon.xpath.model.JXDocument;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.openqa.selenium.By;
import org.openqa.selenium.UnhandledAlertException;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.firefox.FirefoxBinary;
import org.openqa.selenium.firefox.FirefoxDriver;
import org.openqa.selenium.firefox.FirefoxOptions;
import org.openqa.selenium.interactions.Actions;
import org.openqa.selenium.support.ui.ExpectedCondition;
import org.openqa.selenium.support.ui.WebDriverWait;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;
import org.springframework.transaction.annotation.Transactional;
import javax.annotation.Resource;
import java.util.List;
import java.util.concurrent.*;
@Component
public class Crawl2222 {
private static final Logger logger = LoggerFactory.getLogger(Crawl2222.class);
private static FirefoxBinary firefoxBinary = new FirefoxBinary();
private static FirefoxOptions firefoxOptions = new FirefoxOptions();
private static String SUFFERFIX = "http://wenshu.court.gov.cn";
private static String YANZHENGMAURL ="http://wenshu.court.gov.cn/waf_verify.htm";
//并发执行队列
private static LinkedBlockingQueue<String> urlQueue = new LinkedBlockingQueue<String>();
//创建线程池的队列,最小,最大,线程空闲时间 结束继续存活时间
static BlockingQueue<Runnable> queue = new LinkedBlockingDeque<Runnable>();
static ThreadPoolExecutor executor;
static CountDownLatch endGate;
@Resource
private PanwenMapper panwenMapper;
//创建
public void init() throws InterruptedException, XpathSyntaxErrorException {
String html = "";
String driverPath = "E:\\geckodriver.exe";
System.setProperty("webdriver.gecko.driver", driverPath);
//不弹出浏览器
firefoxBinary.addCommandLineOptions("--headless");
firefoxOptions.setBinary(firefoxBinary);
FirefoxDriver driver0 = new FirefoxDriver(firefoxOptions);
// WebDriver driver = new FirefoxDriver();
//定位对象时给10s 的时间, 如果10s 内还定位不到则抛出异常
driver0.manage().timeouts(
记录一次定时任务开启线程池+selenium java爬取裁判文书网
最新推荐文章于 2024-06-10 11:21:35 发布