java爬虫爬取高清图片

最新推荐文章于 2024-06-27 15:32:41 发布

大地Orooz

最新推荐文章于 2024-06-27 15:32:41 发布

阅读量519

点赞数 1

分类专栏：笔记文章标签： java 爬虫

本文链接：https://blog.csdn.net/weixin_45902973/article/details/115253031

版权

笔记专栏收录该内容

28 篇文章 0 订阅

订阅专栏

代码1：

package com.xy;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;

import org.apache.commons.io.FileUtils;
import org.apache.http.HttpEntity;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

/**
 * 爬取网页面图篇
 */
public class Test1 {

    public static void main(String[] args) throws ClientProtocolException, IOException {

        // 创建httpclient实例
        CloseableHttpClient httpclient = HttpClients.createDefault();
        // 创建httpget实例
        CloseableHttpResponse pictureResponse = null;
        CloseableHttpResponse response = null;
        for (int i = 10; i < 99; i++) {
            String ss= "https://pic.netbian.com/tupian/270"+i+".html";
            System.out.println(ss);
            HttpGet httpget = new HttpGet(ss);


            // 执行get请求
            response = httpclient.execute(httpget);
            HttpEntity entity = response.getEntity();
            // 获取返回实体
            String content = EntityUtils.toString(entity, "utf-8");

            // 解析网页 得到文档对象
            Document doc = Jsoup.parse(content);
            // 获取指定的 <img />
            Elements elements = doc.select(".photo-pic #img img");
            try{
                Element element = elements.get(0);


                String src = element.attr("src");

                String strpre = "https://pic.netbian.com";
                String url = strpre + src;
                System.out.println("第"+(i-9)+"张"+ url);

                HttpGet picGet = new HttpGet(url);

                pictureResponse = httpclient.execute(picGet);
                HttpEntity pictureEntity = pictureResponse.getEntity();
                InputStream inputStream = pictureEntity.getContent();

                // 使用 common-io 下载图片到本地，注意图片名不能重复 ✔
                FileUtils.copyToFile(inputStream, new File("D://img//imsge//" + i + "" + 1 + ".jpg"));
            }catch (Exception e){
                e.printStackTrace();
            }
        }
        pictureResponse.close(); // pictureResponse关闭
        response.close(); // response关闭
        httpclient.close(); // httpClient关闭


    }

}

代码2:

package com.xy;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;

import org.apache.commons.io.FileUtils;
import org.apache.http.HttpEntity;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

/**
 * 爬取彼岸图网页面预览图
 */
public class Test2 {

    public static void main(String[] args) throws ClientProtocolException, IOException {

        // 创建httpclient实例
        CloseableHttpClient httpclient = HttpClients.createDefault();
        // 创建httpget实例
        CloseableHttpResponse pictureResponse = null;
        CloseableHttpResponse response = null;
        for (int i = 0; i <=10 ; i++) {
            String ss= "https://pic.netbian.com/e/search/result/index.php?page="+i+"&searchid=2453";
            System.out.println(ss);
            HttpGet httpget = new HttpGet(ss);
//        https://pic.netbian.com/e/search/result/index.php?page=2&searchid=2453

            // 执行get请求
            response = httpclient.execute(httpget);
            HttpEntity entity = response.getEntity();
            // 获取返回实体
            String content = EntityUtils.toString(entity, "utf-8");

            // 解析网页 得到文档对象
            Document doc = Jsoup.parse(content);
            // 获取指定的 <img />
            Elements elements = doc.select(".slist ul li");
            for (int j = 0; j <15 ; j++) {
                Element element = elements.get(j);
                Elements elements1 = element.select("a img");
                String url = elements1.attr("src");
                // 彼岸图网首页
                String urlStr = "https://pic.netbian.com";
                HttpGet picturehttpGet = new HttpGet(url);
                String s = picturehttpGet+"";
                String substring = s.substring(4);
                String[] split = substring.split(" HTTP/1.1");
                String s1 = String.valueOf(split[0]);

                String string = urlStr+s1;
                System.out.println(string);

                HttpGet picGet = new HttpGet(string);
                try {
                    pictureResponse = httpclient.execute(picGet);
                    HttpEntity pictureEntity = pictureResponse.getEntity();
                    InputStream inputStream = pictureEntity.getContent();

                    // 使用 common-io 下载图片到本地，注意图片名不能重复 ✔
                    FileUtils.copyToFile(inputStream, new File("D://img//" + i + "" + j + ".jpg"));
                }catch (Exception e){
                    e.printStackTrace();
                }

            }
//        System.out.println(elements.get(1));

//        System.out.println(url);


        }
//        String pre = "https://pic.netbian.com/e/search/result/index.php?page=1&searchid=2453";
        pictureResponse.close(); // pictureResponse关闭
        response.close(); // response关闭
        httpclient.close(); // httpClient关闭

//        for (int i = 0; i < 10; i++) {
//            Element element = elements.get(i);
            // 获取 <img /> 的 src
//            String url = element.attr("src");

            // 再发请求最简单了，并由于该链接是没有 https:开头的，得人工补全 ✔


    }

}

所需依赖：

<dependencies>
    <!-- https://mvnrepository.com/artifact/org.jsoup/jsoup -->
    <dependency>
        <groupId>org.jsoup</groupId>
        <artifactId>jsoup</artifactId>
        <version>1.11.3</version>
    </dependency>

    <!-- 文件下载 -->
    <dependency>
        <groupId>commons-io</groupId>
        <artifactId>commons-io</artifactId>
        <version>2.5</version>
    </dependency>

    <!-- https://mvnrepository.com/artifact/org.apache.httpcomponents/httpclient -->
    <dependency>
        <groupId>org.apache.httpcomponents</groupId>
        <artifactId>httpclient</artifactId>
        <version>4.5.5</version>
    </dependency>
</dependencies>

大地Orooz

关注

1
点赞
踩
4

收藏

觉得还不错? 一键收藏
0
评论
java爬虫爬取高清图片

代码1：package com.xy;import java.io.File;import java.io.IOException;import java.io.InputStream;import org.apache.commons.io.FileUtils;import org.apache.http.HttpEntity;import org.apache.http.client.ClientProtocolException;import org.apache.http.cli
复制链接

扫一扫

专栏目录