写在前面的话
使用selenium+chrome的方式爬取网络文件,文件下载默认路径为:C:\Users\user\Downloads。
这不利于我们采集和分类,需要修改chrome的默认下载路径。
demo
HashMap<String, Object> chromePrefs = new HashMap<String, Object>();
chromePrefs.put("profile.default_content_settings.popups", 0);
chromePrefs.put("download.default_directory", downloadsPath);
ChromeOptions options = new ChromeOptions();
HashMap<String, Object> chromeOptionsMap = new HashMap<String, Object>();
options.setExperimentalOption("prefs", chromePrefs);
options.addArguments("--test-type");
DesiredCapabilities cap = DesiredCapabilities.chrome();
cap.setCapability(ChromeOptions.CAPABILITY, chromeOptionsMap);
cap.setCapability(CapabilityType.ACCEPT_SSL_CERTS, true);
cap.setCapability(ChromeOptions.CAPABILITY, options);
System.setProperty(
"webdriver.chrome.driver", PropertiesUtil.getValue(CHROME_PATH));
service =
new ChromeDriverService.Builder()
.usingDriverExecutable(
new File(
PropertiesUtil.getValue(CHROME_DRIVER_PATH)))
.usingAnyFreePort()
.build();
service.start();
new RemoteWebDriver(service.getUrl(), capabilities);
driver.get("https://pc.weixin.qq.com/");
注意: 下载路径会用反斜杠来表示下载会失败。正确的写法如:D:\temp\files