JAVA抓取百度图片搜索

最新推荐文章于 2024-07-07 09:46:06 发布

闲_风

最新推荐文章于 2024-07-07 09:46:06 发布

阅读量866

点赞数

分类专栏： spring boot 网络编程文章标签：爬取百度图片 http ajax java

本文链接：https://blog.csdn.net/a15123837995/article/details/110292198

版权

spring boot 同时被 2 个专栏收录

11 篇文章 0 订阅

订阅专栏

网络编程

9 篇文章 0 订阅

订阅专栏

1、抓取方式

一般来说，应该用jsoup来抓取网页中的Image的，但是，由于百度图片的加载不再是翻页形式的了，而是使用ajax的方式动态加载。所以，使用jsoup的话，也就只能拿到第一页的内容，想要获取后面页码的内容，效力不够。不过，既然是ajax的方式，我们自然也可以通过ajax的内容来获取到图片内容。

2、验证

可以看到响应hoverURL就是对应的图片地址，那么，我就可以直接从这个json串中，获取到图片地址进行下载了

3、实现

3.1、具体请求

请求示例：

        String url = "https://image.baidu.com/search/acjson?tn=resultjson_com&logid=10660754132115598609&ipn=rj&ct=201326592&is=&fp=result&" +
        "queryWord="+keyword+"&cl=2&lm=-1&ie=utf-8&oe=utf-8&adpicid=&st=&z=&ic=&hd=&latest=&copyright=&" +
        "word="+keyword+"&s=&se=&tab=&width=&height=&face=&istype=&qc=&nc=&fr=&expermode=&force=&" +
        "pn="+page+"&rn=30&gsm=5a&1606053649620=";

说明：queryWord和word就是我们要查询的关键字，pn是30的整数倍，因为一页有30张图片，故pn=n*30

3.2、代码实现

package xyz.xfcloud.test.demo.service;

import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import org.springframework.stereotype.Service;
import org.springframework.web.client.RestTemplate;

import java.io.*;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLConnection;

@Service
public class MyService {

    private RestTemplate restTemplate = new RestTemplate();

    private String path = "D:\\pictures";
    public void downImage(String keyword,int page){

        String url = "https://image.baidu.com/search/acjson?tn=resultjson_com&logid=10660754132115598609&ipn=rj&ct=201326592&is=&fp=result&" +
                "queryWord="+keyword+"&cl=2&lm=-1&ie=utf-8&oe=utf-8&adpicid=&st=&z=&ic=&hd=&latest=&copyright=&" +
                "word="+keyword+"&s=&se=&tab=&width=&height=&face=&istype=&qc=&nc=&fr=&expermode=&force=&" +
                "pn="+page+"&rn=30&gsm=5a&1606053649620=";

        String result = restTemplate.getForObject(url, String.class);
        String tempPath = path;
        if(!tempPath.endsWith("\\")){
            tempPath = path+"\\";
        }
        tempPath = tempPath+keyword+"\\";
        File f = new File(tempPath);
        if(!f.exists()){
            f.mkdirs();
        }

        JSONObject jsonObject = JSONObject.parseObject(result);
        JSONArray array = jsonObject.getJSONArray("data");

        for (int i=0;i<array.size();i++){
            JSONObject o = array.getJSONObject(i);
            try {
                download(o.getString("hoverURL"),tempPath);
            } catch (Exception e) {
                e.printStackTrace();
            }
            System.out.println(o.getString("middleURL"));
        }

        System.out.println(result);
    }

    private void sop(Object obj){
        System.out.println(obj);
    }
    private void download(String url,String path){
        //path = path.substring(0,path.length()-2);
        File file= null;
        File dirFile=null;
        FileOutputStream fos=null;
        HttpURLConnection httpCon = null;
        URLConnection con = null;
        URL urlObj=null;
        InputStream in =null;
        byte[] size = new byte[1024];
        int num=0;
        try {
            String downloadName= url.substring(url.lastIndexOf("/")+1);
            dirFile = new File(path);
            if(!dirFile.exists() && path.length()>0){
                if(dirFile.mkdir()){
                    sop("creat document file \""+path.substring(0,path.length()-1)+"\" success...\n");
                }
            }else{
                file = new File(path+downloadName);
                fos = new FileOutputStream(file);
                if(url.startsWith("http")){
                    urlObj = new URL(url);
                    con = urlObj.openConnection();
                    httpCon =(HttpURLConnection) con;
                    in = httpCon.getInputStream();
                    while((num=in.read(size)) != -1){
                        for(int i=0;i<num;i++)
                        {  fos.write(size[i]);}
                    }
                }
            }
        }catch (FileNotFoundException notFoundE) {
            sop("找不到该网络图片....");
        }catch(NullPointerException nullPointerE){
            sop("找不到该网络图片....");
        }catch(IOException ioE){
            sop("产生IO异常.....");
        }catch (Exception e) {
            e.printStackTrace();
        }finally{
            try {
                fos.close();
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }
}

4、源码下载地址

https://download.csdn.net/download/a15123837995/13203537