Java爬虫

使用Java爬虫

Java使用jsoup爬取b站视频,根据模糊搜索,搜到批量视频自动下载。
首先先获取到b站的search链接,https://search.bilibili.com/all?keyword= keyword一般是elk的搜索引擎索引。
思路:

  • 首先是使用jsoup爬取到b站的动态网页下来
  •      String str="";
         Scanner scanner = new Scanner(System.in);
         System.out.println("请输入想要搜索的内容");
         str=scanner.next();
         Document connect = null;
         try {
             connect = Jsoup.connect("https://search.bilibili.com/all?keyword="+str).get();
         } catch (IOException e) {
             throw new RuntimeException(e);
         }
         Elements li = connect.getElementsByTag("li");
         for (Element element : li) {
             String url = element.getElementsByTag("a").attr("href");
             if (url.contains("www.bilibili.com/video/BV")){
                 list.add(url.substring(url.indexOf("BV")+2,url.indexOf("?")));
             }
         }
    
  • 获取到搜索到的视频网页,根据标签来获取他们的链接,获取到链接之后我们拿到链接里面的BV号,然后使用BV号去一个地址拿到这个视频的cid。
    -这个就是视频的链接里面的BV号
    那到BV号之后我们使用get请求 请求这个"https://api.bilibili.com/x/web-interface/view?bvid="+bvid;地址就会返回一个json回来,我们获取到这个json里面的cid。在这里插入图片描述
    获取到这个cid之后,我们就是可以根据这个cid,去拿到这个视频的真实地址以及视频的真实信息
    使用cid和BV号去请求这个地址 https://api.bilibili.com/x/player/playurl?cid=" + cid + “&fnver=0&qn=” + qn + “&otype=json&bvid=” + bvid + "&fnval=2&player=1 会获取到一个json
    这里面的qn参数是对应视频的清晰度的一个参数 //qn : 视频质量 112 -> 高清 1080P+, 80 -> 高清 1080P, 64 -> 高清 720P, 32 -> 清晰 480P, 16 -> 流畅 360P // 最高支持 1080p, 1080P+是不支持的
    在这里插入图片描述
    url就是视频的真是路径,然后使用流下载下来到电脑的固定位置
    下面是完整代码
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.*;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.Map;
import java.util.Scanner;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;

public class AppUrlMovie {

    private static ExecutorService cacheExecutor = Executors.newCachedThreadPool();

    private final static long timeMillis = System.currentTimeMillis();

    static String  FileName="";
    public static void main(String[] args) throws MalformedURLException {

        int i=0;
        ArrayList<String> list = new ArrayList<>();
        String str="";
        Scanner scanner = new Scanner(System.in);
        System.out.println("请输入想要搜索的内容");
        str=scanner.next();
        Document connect = null;
        try {
            connect = Jsoup.connect("https://search.bilibili.com/all?keyword="+str).get();
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
        Elements li = connect.getElementsByTag("li");
        for (Element element : li) {
            String url = element.getElementsByTag("a").attr("href");
            if (url.contains("www.bilibili.com/video/BV")){
                list.add(url.substring(url.indexOf("BV")+2,url.indexOf("?")));
            }
        }
        long start = System.currentTimeMillis();
        DownloadVivw(list,i);
        long end = System.currentTimeMillis();
        //System.err.println("一共下载:"+list.size()+"份视频,总共耗时:" + (end - start) / 1000 + "s");
    }


    public static void DownloadVivw(final ArrayList<String> list, int i){
        if (i==list.size()){
            return;
        }else {
            //通过bv号自动下载视频
            String bvid = list.get(i);
            //建立连接,先获取到 cid
            String cidJson = getCid(bvid);
            //获取 视频真实路径
            String url = getVivw(cidJson,bvid);
            System.out.println("开始");
            long start = System.currentTimeMillis();
            downloadMovie(url,FileName);
            long end = System.currentTimeMillis();
            System.out.println("以下载"+(i+1)+"份视频");
            System.err.println("总共耗时:" + (end - start) / 1000 + "s,线程名"+Thread.currentThread().getName());
            i++;
            final int finalI = i;
            DownloadVivw(list, finalI);
           /* cacheExecutor.execute(new Runnable() {
                @Override
                public void run() {
                    DownloadVivw(list, finalI);
                }
            });*/
        }
        long end = System.currentTimeMillis();
        System.err.println("总共耗时:" + (end - timeMillis) / 1000 + "s");
    }

    private static String getVivw(String cidJson,String bvid) {
        //qn : 视频质量         112 -> 高清 1080P+,   80 -> 高清 1080P,   64 -> 高清 720P,  32  -> 清晰 480P,  16 -> 流畅 360P
        // 最高支持 1080p,  1080P+是不支持的
        Integer qn = 80;
        String paraUrl = "https://api.bilibili.com/x/player/playurl?cid=" + cidJson + "&fnver=0&qn=" + qn + "&otype=json&bvid=" + bvid + "&fnval=2&player=1";
        StringBuilder json = new StringBuilder();
        try {
            URL urlObject = new URL(paraUrl);
            URLConnection urlConnection = urlObject.openConnection();
            BufferedReader in = new BufferedReader(new InputStreamReader(urlConnection.getInputStream(),"utf-8"));
            String inputLine = null;
            while ( (inputLine = in.readLine()) != null) {
                json.append(inputLine);
            }
            in.close();
        } catch (Exception e) {
            e.printStackTrace();
        }
        json.toString();
        Map maps = (Map) JSON.parse(String.valueOf(json));
        Object o = ((Map) maps.get("data")).get("durl");
        return (String)((JSONObject) ((JSONArray) o).get(0)).get("url");
    }

    private static String getCid(String avid) {
        String CidUrl="https://api.bilibili.com/x/web-interface/view?bvid="+avid;
        StringBuilder json = new StringBuilder();
        try {
            URL urlObject = new URL(CidUrl);
            URLConnection urlConnection = urlObject.openConnection();
            BufferedReader in = new BufferedReader(new InputStreamReader(urlConnection.getInputStream(),"utf-8"));
            String inputLine = null;
            while ( (inputLine = in.readLine()) != null) {
                json.append(inputLine);
            }
            in.close();
        } catch (Exception e) {
            e.printStackTrace();
        }

        json.toString();
        Map maps = (Map) JSON.parse(String.valueOf(json));
        FileName=((Map)maps.get("data")).get("title")+".flv";
        return  ((Map)maps.get("data")).get("cid")+"";
    }

    public static void downloadMovie(String BLUrl, String fileName) {
        InputStream inputStream = null;
        try {
            URL url = new URL(BLUrl);
            URLConnection urlConnection = url.openConnection();
            urlConnection.setRequestProperty("Referer", "https://www.bilibili.com/video/BV14S4y127Gd"); // 填需要爬取的bv号
            urlConnection.setRequestProperty("Sec-Fetch-Mode", "no-cors");
            urlConnection.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36");
            urlConnection.setRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 5.0; Windows NT; DigExt)");
            urlConnection.setConnectTimeout(10 * 1000);
            inputStream = urlConnection.getInputStream();
        } catch (IOException e) {
            e.printStackTrace();
        }
        //定义路径
        String path = "C:\\file\\img\\" + fileName;
        File file = new File(path);
        int i = 1;
        try {
            BufferedInputStream bis = new BufferedInputStream(inputStream);
            BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(file));
            byte[] bys = new byte[1024];
            int len = 0;
            while ((len = bis.read(bys)) != -1) {
                bos.write(bys, 0, len);
            }
            bis.close();
            bos.close();
        } catch (Exception e) {
            e.printStackTrace();
        }

    }
}
  • 4
    点赞
  • 10
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值