Jsoup是第三方类库,方便操作获取页面内容
Jsoup的maven仓库坐标:
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.13.1</version>
</dependency>
流程:
解析网页URL,获取页面document,通过F12查看页面元素标签,获取图片所在的父标签,根据父标签来获取img标签对象。然后获取图片路径URL,建立URL连接,获取输入流,创建输出流写入本地文件。
主要代码
public class Main {
public static void main(String[] args) throws Exception {
//网址路径
//由于URL中使用中文会报错,所以这里需要将中文转码:URLEncoder.encode("联想笔记本","UTF-8")
String path = "https://search.jd.com/Search?keyword="+ URLEncoder.encode("联想笔记本","UTF-8")+ "&enc=utf-8&spm=2.1.0";
//创建URL对象
URL url = new URL(path);
//解析url返回页面的Document对象,
Document document = Jsoup.parse(url, 10000);
//要按F12查看元素标签内容
//获取图片所在的父标签
Element J_goodsList = document.getElementById("J_goodsList");
//根据父标签来获取img标签对象
Elements imgs = J_goodsList.getElementsByTag("img");
System.out.println("------------------------------------------------------");
System.out.println("获取的img标签内容如下\n"+imgs);
System.out.println("------------------------------------------------------");
URL imgURL;
int num = 0;
for(Element img : imgs){
//根据属性来获取图片路径地址
String attr = img.attr("data-lazy-img");
if(!attr.isEmpty()){
num ++;
attr = "https:"+attr;
System.out.println("该图片路径如下:"+attr);
//下载图片到本地
imgURL = new URL(attr);
//连接到URL
URLConnection urlConnection = imgURL.openConnection();
//获取输入流
InputStream reader = urlConnection.getInputStream();
OutputStream writer = new FileOutputStream("src/main/resources/imgs/" + num + ".jpg");
int temp;
//写入到本地文件
while((temp = reader.read()) != -1){
writer.write(temp);
writer.flush();
}
System.out.println(num+".jpg下载完毕!");
reader.close();
}
}
}
}