使用腾讯文档excel表格上传图片,图片是以链接的形式,如何批量保存这些图片呢?
1:将文档导出成excel到本地,使用wps打开,再另存为网页文件html
2: 打开其中的一个html文件,可以看到,图片是以a 链接的形式存在
3 :使用jsoup批量抓取图片链接
(1) 引入jsoup依赖
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.8.2</version>
</dependency>
(2) 完整的代码
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.*;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
public class JsoupTest {
public static void main(String[] args) {
FileOutputStream fos = null;
BufferedOutputStream bos = null;
InputStream cis = null;
BufferedInputStream bis = null;
try {
Document document = Jsoup.parse(new File("E:\\mytest\\sheet002.htm"), "utf-8");
Elements links = document.select("a[href]");
for (Element link : links) {
System.out.println("link : " + link.attr("href"));
String href = link.attr("href");
String name = link.text();
URL url = new URL(href);
URLConnection connection = url.openConnection();
connection.connect();
File file = new File("E:/mytest/" + name);
//判断父目录是否存在
if (!file.getParentFile().exists()) {
file.getParentFile().mkdir();
}
if (!file.exists()) {
//先创建文件
file.createNewFile();
//写入文件
fos = new FileOutputStream(file);
bos = new BufferedOutputStream(fos);
cis = connection.getInputStream();
bis = new BufferedInputStream(cis);
byte[] bytes = new byte[8192];
int length = -1;
while ((length = bis.read(bytes)) != -1) {
bos.write(bytes, 0, length);
}
}
}
} catch (MalformedURLException e1) {
e1.printStackTrace();
} catch (IOException e1) {
e1.printStackTrace();
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
if (bis != null) {
bis.close();
}
if (cis != null) {
cis.close();
}
if (bos != null) {
bos.close();
}
if (fos != null) {
fos.close();
}
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
(3) 查看执行结果,图片抓取成功