爬取图片-工作常用小工具01

最新推荐文章于 2024-05-10 15:05:21 发布

一场梦@bhl

最新推荐文章于 2024-05-10 15:05:21 发布

阅读量494

点赞数

分类专栏：工具集合文章标签： java

原文链接：https://blog.csdn.net/weixin_42485489/article/details/114093560

版权

工具集合专栏收录该内容

3 篇文章 0 订阅

订阅专栏

爬取指定网页图片

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.*;
import java.net.HttpURLConnection;
import java.net.URL;
public class DownLoadImage {


    public static void downLink() throws IOException {
        //InputStreamReader isr = new InputStreamReader(new FileInputStream("C:\\Users\\18404\\Desktop\\人脸库.html"), "UTF-8");
        //本地网页文件
        File input = new File("C:\\Users\\18404\\Desktop\\1.html");
        //读取网页内容
        Document doc = Jsoup.parse(input, "UTF-8", "https://mp.weixin.qq.com/s/fUgISHL-ZrcI1efDiiZR2w");
        //获取指定页面内容
        Element content = doc.getElementById("js_content");
        //System.out.println(content);
        //根据标签读取所有内容
        Elements links = content.getElementsByTag("img");
        System.out.println(links.size());
        //遍历标签
        for (Element link : links) {
            String linkHref = link.attr("data-src");

            System.out.println(linkHref);
            //根据图片链接生成图片
            new DownLoadImage().downImage(linkHref);
            //String linkText = link.text();
        }
    }


    public  void downImage(String strUrl) throws IOException {


        //String strUrl = "https://mmbiz.qpic.cn/mmbiz_jpg/v1hBUnx9Fm2wSCSw6c5R1JkLQXSsOtpNUzEOgQTN1NHgQJans5ER2DNR7dAz1PfRHSCKmN2dPs1cnsibdeMutyg/640?wx_fmt=jpeg";

        //构造URL
        URL url = new URL(strUrl);

        //构造连接
        HttpURLConnection conn = (HttpURLConnection) url.openConnection();

         //这个网站要模拟浏览器才行
        conn.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0) like Gecko");

        //打开连接
        conn.connect();
        //打开这个网站的输入流
        InputStream inStream = conn.getInputStream();

        //用这个做中转站 ，把图片数据都放在了这里，再调用toByteArray()即可获得数据的byte数组
        ByteArrayOutputStream outStream = new ByteArrayOutputStream();
       //用这个是很好的，不用一次就把图片读到了文件中
      //要是需要把图片用作其他用途呢？所以直接把图片的数据弄成一个变量，十分有用
        //相当于操作这个变量就能操作图片了

        byte[] buf = new byte[1024];
        //为什么是1024？
	//1024Byte=1KB，分配1KB的缓存
	//这个就是循环读取，是一个临时空间，多大都没关系
	//这没有什么大的关系，你就是用999这样的数字也没有问题，就是每次读取的最大字节数。
	//byte[]的大小，说明你一次操作最大字节是多少
	//虽然读的是9M的文件，其实你的内存只用1M来处理，节省了很多空间．
	//当然，设得小，说明I/O操作会比较频繁，I/O操作耗时比较长，
	//这多少会有点性能上的影响．这看你是想用空间换时间，还是想用时间换空间了．
	//时间慢总比内存溢出程序崩溃强．如果内存足够的话，我会考虑设大点．
        int len = 0;
	//读取图片数据
        while ((len = inStream.read(buf)) != -1) {
            //System.out.println(len);
            outStream.write(buf, 0, len);
        }
        inStream.close();
        outStream.close();
        //把图片数据填入文件中随机数命名
        int max=100000,min=1;
        int ran2 = (int) (Math.random()*(max-min)+min);
    System.out.println(ran2);

        File file = new File("C:\\Users\\18404\\Desktop\\1\\"+ran2+".jpg");

        FileOutputStream op = new FileOutputStream(file);



        op.write(outStream.toByteArray());

        op.close();

    }



    public static void main(String[] args) throws IOException {
        downLink();
    }

}

一场梦@bhl

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
爬取图片-工作常用小工具01

爬取指定网页图片import org.jsoup.Jsoup;import org.jsoup.nodes.Document;import org.jsoup.nodes.Element;import org.jsoup.select.Elements;import java.io.*;import java.net.HttpURLConnection;import java.net.URL;public class DownLoadImage { public static
复制链接

扫一扫