Java多线程+阻塞队列爬取网页图片

初学者,用java写了个爬虫玩,爬取蜂鸟网上的图片,但是只能获得一页数据,请教大神帮忙看看代码哪里出错了

长话短说,直接贴出代码

package picture;

import com.google.gson.Gson;
import jindong.utils.HttpClientUtils;
import org.apache.http.HttpEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;

import java.io.BufferedOutputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;

// 爬取网页图片的多线程+阻塞队列版
public class FengNiaoSpider02 {
//创建线程池
private static ExecutorService threadPool = Executors.newFixedThreadPool(41);
//创建阻塞队列
private static ArrayBlockingQueue queue = new ArrayBlockingQueue(200);

public static void main(String[] args) throws IOException, InterruptedException {
    threadPool.execute(new Runnable() {
        public void run() {
            while (true) {
                try {
                    Thread.sleep(1000);
                } catch (InterruptedException e) {
                    e.printStackTrace();
                }
                int size = queue.size();//获取到队列的数量
                System.out.println("当前队列中还剩下" + size);
            }
        }
    });
    tooThread();
    getdetail();
}

//开启线程,线程内执行从阻塞队列中取出图片作品详情页的链接
public static void tooThread() {
    for (int i = 1; i <=40; i++) {
        threadPool.execute(new Runnable() {
            public void run() {
                try {
                    String detailpagehref = queue.take();
                    savepic(detailpagehref);
                } catch (Exception e) {
                    e.printStackTrace();
                }
            }
        });

    }
}

//将图片作品详情页的链接放入阻塞队列中
public static void getdetail() throws IOException, InterruptedException {
    for (int i = 1; i <=5; i++) {
        //指定json链接
        String jsonurl = "https://photo.fengniao.com/ajaxPhoto.php?action=getPhotoLists&fid=16&sort=1&page=" + i;
        //解析json链接,获得图片详情页链接,放入阻塞队列中
        String json = HttpClientUtils.doGet(jsonurl);
        Gson gson = new Gson();
        Map map1 = gson.fromJson(json, Map.class);
        List<Map<String, String>> contentlist = (List<Map<String, String>>) map1.get("content");
        for (Map<String, String> map : contentlist) {
            String picUrl = map.get("picUrl");
            picUrl = "https://photo.fengniao.com" + picUrl;
            queue.put(picUrl);
        }
    }
}

//通过详情页的链接来进行图片的获取和保存
public static void savepic(String detailpagehref) throws IOException {
    //通过详情页的url获得其html
    String detail_html = HttpClientUtils.doGet(detailpagehref);
    //解析html
    Document detail_document = Jsoup.parse(detail_html);
    //获得图片名称
    String name = detail_document.select("[class=title overOneTxt]").text();
    //获得下载原图链接
    String downpic = detail_document.select(".downPic").attr("href");

    //根据原图链接下载图片
    CloseableHttpClient httpClient = HttpClients.createDefault();
    HttpGet httpGet = new HttpGet(downpic);
    CloseableHttpResponse response = httpClient.execute(httpGet);
    HttpEntity entity = response.getEntity();
    InputStream in = entity.getContent();

    BufferedOutputStream bo = new BufferedOutputStream(new FileOutputStream("D:/0fengniao2/" + name + ".jpg"));
    byte[] bytes = new byte[1024];
    int len = 0;
    System.out.println("开始下载" + name + downpic);
    while ((len = in.read(bytes, 0, bytes.length)) != -1) {
        bo.write(bytes, 0, len);
    }
    in.close();
    bo.close();
    System.err.println("下载完成");
}

}

我觉得应该是线程那里出了问题,但是,找了好久都找不到原因,链接那里我已经拿出来单独测试过了,能获得有效链接。

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 2
    评论
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值