java爬虫爬取高清图片

代码1:

package com.xy;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;

import org.apache.commons.io.FileUtils;
import org.apache.http.HttpEntity;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

/**
 * 爬取网页面图篇
 */
public class Test1 {

    public static void main(String[] args) throws ClientProtocolException, IOException {

        // 创建httpclient实例
        CloseableHttpClient httpclient = HttpClients.createDefault();
        // 创建httpget实例
        CloseableHttpResponse pictureResponse = null;
        CloseableHttpResponse response = null;
        for (int i = 10; i < 99; i++) {
            String ss= "https://pic.netbian.com/tupian/270"+i+".html";
            System.out.println(ss);
            HttpGet httpget = new HttpGet(ss);


            // 执行get请求
            response = httpclient.execute(httpget);
            HttpEntity entity = response.getEntity();
            // 获取返回实体
            String content = EntityUtils.toString(entity, "utf-8");

            // 解析网页 得到文档对象
            Document doc = Jsoup.parse(content);
            // 获取指定的 <img />
            Elements elements = doc.select(".photo-pic #img img");
            try{
                Element element = elements.get(0);


                String src = element.attr("src");

                String strpre = "https://pic.netbian.com";
                String url = strpre + src;
                System.out.println("第"+(i-9)+"张"+ url);

                HttpGet picGet = new HttpGet(url);

                pictureResponse = httpclient.execute(picGet);
                HttpEntity pictureEntity = pictureResponse.getEntity();
                InputStream inputStream = pictureEntity.getContent();

                // 使用 common-io 下载图片到本地,注意图片名不能重复 ✔
                FileUtils.copyToFile(inputStream, new File("D://img//imsge//" + i + "" + 1 + ".jpg"));
            }catch (Exception e){
                e.printStackTrace();
            }
        }
        pictureResponse.close(); // pictureResponse关闭
        response.close(); // response关闭
        httpclient.close(); // httpClient关闭


    }

}

代码2:

package com.xy;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;

import org.apache.commons.io.FileUtils;
import org.apache.http.HttpEntity;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

/**
 * 爬取彼岸图网页面预览图
 */
public class Test2 {

    public static void main(String[] args) throws ClientProtocolException, IOException {

        // 创建httpclient实例
        CloseableHttpClient httpclient = HttpClients.createDefault();
        // 创建httpget实例
        CloseableHttpResponse pictureResponse = null;
        CloseableHttpResponse response = null;
        for (int i = 0; i <=10 ; i++) {
            String ss= "https://pic.netbian.com/e/search/result/index.php?page="+i+"&searchid=2453";
            System.out.println(ss);
            HttpGet httpget = new HttpGet(ss);
//        https://pic.netbian.com/e/search/result/index.php?page=2&searchid=2453

            // 执行get请求
            response = httpclient.execute(httpget);
            HttpEntity entity = response.getEntity();
            // 获取返回实体
            String content = EntityUtils.toString(entity, "utf-8");

            // 解析网页 得到文档对象
            Document doc = Jsoup.parse(content);
            // 获取指定的 <img />
            Elements elements = doc.select(".slist ul li");
            for (int j = 0; j <15 ; j++) {
                Element element = elements.get(j);
                Elements elements1 = element.select("a img");
                String url = elements1.attr("src");
                // 彼岸图网首页
                String urlStr = "https://pic.netbian.com";
                HttpGet picturehttpGet = new HttpGet(url);
                String s = picturehttpGet+"";
                String substring = s.substring(4);
                String[] split = substring.split(" HTTP/1.1");
                String s1 = String.valueOf(split[0]);

                String string = urlStr+s1;
                System.out.println(string);

                HttpGet picGet = new HttpGet(string);
                try {
                    pictureResponse = httpclient.execute(picGet);
                    HttpEntity pictureEntity = pictureResponse.getEntity();
                    InputStream inputStream = pictureEntity.getContent();

                    // 使用 common-io 下载图片到本地,注意图片名不能重复 ✔
                    FileUtils.copyToFile(inputStream, new File("D://img//" + i + "" + j + ".jpg"));
                }catch (Exception e){
                    e.printStackTrace();
                }

            }
//        System.out.println(elements.get(1));

//        System.out.println(url);


        }
//        String pre = "https://pic.netbian.com/e/search/result/index.php?page=1&searchid=2453";
        pictureResponse.close(); // pictureResponse关闭
        response.close(); // response关闭
        httpclient.close(); // httpClient关闭

//        for (int i = 0; i < 10; i++) {
//            Element element = elements.get(i);
            // 获取 <img /> 的 src
//            String url = element.attr("src");

            // 再发请求最简单了,并由于该链接是没有 https:开头的,得人工补全 ✔


    }

}

所需依赖:

<dependencies>
    <!-- https://mvnrepository.com/artifact/org.jsoup/jsoup -->
    <dependency>
        <groupId>org.jsoup</groupId>
        <artifactId>jsoup</artifactId>
        <version>1.11.3</version>
    </dependency>

    <!-- 文件下载 -->
    <dependency>
        <groupId>commons-io</groupId>
        <artifactId>commons-io</artifactId>
        <version>2.5</version>
    </dependency>

    <!-- https://mvnrepository.com/artifact/org.apache.httpcomponents/httpclient -->
    <dependency>
        <groupId>org.apache.httpcomponents</groupId>
        <artifactId>httpclient</artifactId>
        <version>4.5.5</version>
    </dependency>
</dependencies>
  • 1
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值