看很多大佬都在用python爬取图片,其实java也可以啦,这里就通过一个例子来跑一把啦
话不多说,上代码
private final static String driver = "webdriver.chrome.driver";
private final static String chromeDriver = "C:\\Users\\d\\AppData\\Local\\Google\\Chrome\\Application\\chromedriver.exe";
//网址
private final static String emojiWebAddress = "https://www.dbbqb.com/";
static {
// 引入谷歌驱动 控制浏览器
System.setProperty(driver, chromeDriver);
}
public static void main(String[] args) {
System.out.println("正在打开浏览器");
//1、获取谷歌浏览器控制 打开浏览器
WebDriver driver = new ChromeDriver();
driver.manage().window().maximize();//浏览器最大化
//2、超时等待30秒
Duration duration = Duration.ofSeconds(3);
driver.manage().timeouts().implicitlyWait(duration);
//3、跳转到表情包网址
driver.get(emojiWebAddress);
while (true){
driver.navigate().refresh();
try {
//4、获取到元素
WebElement bqbEle = driver.findElement(By.xpath("//*[@id=\"root\"]/div[4]"));
//5、获取到最大的排和列的元素
WebElement bigDivEle = bqbEle.findElement(By.xpath("div"));
//6、获取到所有的49
List<WebElement> jss49DivList = bqbEle.findElements(By.className("jss49"));
//7、循环49拿到50
for (WebElement rowEle : jss49DivList) {
//拿到单个的div
List<WebElement> js50DivList = rowEle.findElements(By.xpath("div"));
//循环下获取到链接
for (WebElement js50 : js50DivList) {
String src = js50.findElement(By.className("lazyload-wrapper"))
.findElement(By.xpath("a"))
.findElement(By.xpath("img")).getAttribute("src");
//获取到当前时间戳
Long l = System.currentTimeMillis();
//生成4位随机数
Integer i =(Integer)new Random().nextInt(9999);
//提供一个网址下载
TuPianXiaZai.downloadPicture(src,"E:\\Download\\"+l.toString()+i.toString()+".jpg");
}
System.out.println("下载了一排");
}
}catch (Exception e){
e.printStackTrace();
System.err.println("报错了!!!");
}
}
}
/**
* @Description: 下载图片到本地
* @Param: [urlList图片地址, path本地地址]
* @return: void
* @Author: xjx
* @Date: 2022/9/21
*/
private static void downloadPicture(String urlList, String path) {
URL url = null;
try {
url = new URL(urlList);
DataInputStream dataInputStream = new DataInputStream(url.openStream());
FileOutputStream fileOutputStream = new FileOutputStream(new File(path));
ByteArrayOutputStream output = new ByteArrayOutputStream();
byte[] buffer = new byte[1024];
int length;
while ((length = dataInputStream.read(buffer)) > 0) {
output.write(buffer, 0, length);
}
fileOutputStream.write(output.toByteArray());
dataInputStream.close();
fileOutputStream.close();
} catch (MalformedURLException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
我运行后,发现这个网址是有反爬机制的,这里用了一种无限循环的方式来解决,反正每次刷新图都不一样,代码仅供学习嗷,这里建议大家千万别去恶意爬取人家的资源,容易被逮。