Java爬取图片
第一步 创建一个Maven项目 (我是创建的一个springboot项目)
第二步 导入Jsoup包
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.10.2</version>
</dependency>
然后再创建一个包 包下新建一个带有main的类
分析网站内容
之后就是编写爬取图片的代码了
public class JsoupConfig {
public static void main(String[] args) {
//爬取网站的url路径
String url = "https://blog.csdn.net/weixin_42302341/article/details/109778050?ops_request_misc=%257B%2522request%255Fid%2522%253A%2522161156794416780265412610%2522%252C%2522scm%2522%253A%252220140713.130102334..%2522%257D&request_id=161156794416780265412610&biz_id=0&utm_medium=distribute.pc_search_result.none-task-blog-2~all~sobaiduend~default-2-109778050.pc_search_result_before_js&utm_term=%E5%9B%BE%E7%89%87&spm=1018.2226.3001.4187";
try {
//使用Jsoup解析url
Document document = Jsoup.parse(new URL(url), 10000);
//通过标签id获取网页包含图片的标签
Element elementById = document.getElementById("content_views");
//获取img标签
Elements imgs = elementById.getElementsByTag("img");
//System.out.println(imgs);
int id = 0;
//循环遍历把图片src属性遍历出来
for (Element img : imgs) {
String src = img.attr("src");
//System.out.println(src);
id++;
URL target = new URL(src);
URLConnection urlConnection = target.openConnection();
//获取输入流
InputStream inputStream = urlConnection.getInputStream();
//获取输出流 这里是下载保存图片到本地的路径
OutputStream outputStream = new FileOutputStream("E:\\Work\\照片\\" + id +".jpg");
int temp = 0;
while ((temp = inputStream.read()) != -1) {
outputStream.write(temp);
}
System.out.println(id + ".jpg下载完毕!!!");
outputStream.close();
inputStream.close();
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
效果