安装chrome浏览器:
下载rpm
yum install https://dl.google.com/linux/direct/google-chrome-stable_current_x86_64.rpm
安装相关依赖
yum install mesa-libOSMesa-devel gnu-free-sans-fonts wqy-zenhei-fonts
查看安装版本
google-chrome-stable --version
安装chromedriver驱动:
下载chromedriver驱动
wget https://edgedl.me.gvt1.com/edgedl/chrome/chrome-for-testing/120.0.6099.109/linux64/chromedriver-linux64.zip
解压
unzip chromedriver-linux64.zip
移动
mv chromedriver-linux64 /usr/bin/
权限
chmod +x /usr/bin/chromedriver-linux64/
Java代码:
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.chrome.ChromeOptions;
import org.openqa.selenium.support.ui.ExpectedConditions;
import org.openqa.selenium.support.ui.WebDriverWait;
import org.springframework.stereotype.Component;
import java.util.List;
@Component
public class ChromeUtils {
private StringBuilder sb;
public String baiduSearch(String text) {
WebDriver driver = null;
// linux驱动
String chromedriverFile = "/usr/bin/chromedriver-linux64/chromedriver";
try {
// 设置Chrome浏览器的启动参数
ChromeOptions options = new ChromeOptions();
options.addArguments("--no-sandbox"); // 禁用沙盒模式
options.addArguments("--disable-dev-shm-usage"); // 禁用/dev/shm使用
options.addArguments("--headless"); // 以无头模式运行
// 设置系统属性,告诉WebDriver使用Chrome浏览器
System.setProperty("webdriver.chrome.driver", chromedriverFile);
// 创建ChromeDriver实例时传递配置参数
driver = new ChromeDriver(options);
// 打开百度网页
driver.get("https://www.baidu.com");
// 定位搜索框元素
WebElement searchBox = driver.findElement(By.name("wd"));
// 在搜索框中输入关键词
searchBox.sendKeys(text);
// 提交搜索表单
searchBox.submit();
// Loop to scrape results from the first two pages
for (int page = 0; page < 2; page++) {
// 等待一些时间,以便查看搜索结果
Thread.sleep(5000);
// 定位搜索结果的父元素,这里使用百度搜索结果的<div>标签
List<WebElement> searchResults = driver.findElements(By.xpath("//div[@class='c-container']"));
this.sb = (sb != null) ? sb : new StringBuilder();
// 打印搜索结果的文本内容
for (WebElement result : searchResults) {
sb.append(result.getText());
}
// Go to the next page if it's not the last iteration
if (page < 1) {
// 等待 "下一页" 按钮可见
WebDriverWait wait = new WebDriverWait(driver, 60);
WebElement nextPageButton = wait.until(ExpectedConditions.elementToBeClickable(By.xpath("//a[@class='n' and contains(text(),'下一页')]")));
// Go to the next page
nextPageButton.click();
}
}
} catch (InterruptedException e) {
e.printStackTrace();
} finally {
if (driver != null) {
// 关闭浏览器
driver.quit();
}
}
return sb.toString().replaceAll("\n", "");
}
}