Java爬虫(jsoup+fastjson)爬取酷狗热歌前500首

Java爬虫(jsoup+fastjson)爬取酷狗热歌前500首


import com.alibaba.fastjson.JSONObject;
import org.jsoup.Jsoup;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.List;
import java.util.Scanner;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class KuGouMusicTOP500Crawler {
    public static void main(String[] args) throws IOException, InterruptedException {
        Scanner scanner = new Scanner(System.in);
        System.out.println("酷狗前500首热歌下载\n从第几页,第几首歌开始下载?(以空格分开)");
        int pageIndex = scanner.nextInt();
        int i = scanner.nextInt();

   downLoadMusic(pageIndex,i);

    }
    public  static void downLoadMusic(int pageIndex,int i )  throws IOException, InterruptedException{

        String songListUrl = "https://www.kugou.com/yy/rank/home/"+pageIndex+"-8888.html?from=rank";
        String body = Jsoup.connect(songListUrl).ignoreContentType(true).execute().body();
        body=unicodeToString(body);

        int beginIdx = body.indexOf("global.features = ");
        int endIdx = body.indexOf("];", beginIdx);
        String features = body.substring(beginIdx, endIdx + 1).replace("global.features = ", "");
        List<JSONObject> list = JSONObject.parseArray(features, JSONObject.class);

        for (JSONObject jsonObject : list) {

            String hash = (String) jsonObject.get("Hash");
            Integer albumId = (Integer) jsonObject.get("album_id");
            String fileName = (String) jsonObject.get("FileName");
            String url = "https://wwwapi.kugou.com/yy/index.php?r=play/getdata&hash=" + hash + "&dfid=3duCPE2qUax83tS5dX4JWC5o&appid=1014&mid=29d4360b5f124d4a48d5eafb5d193274&platid=4&album_id=" + albumId + "";
            String body1 = Jsoup.connect(url).execute().body();
            String unicodeToString2 = unicodeToString(body1);
            //"play_url":"https:\/\/webfs.ali.kugou.com\/202109031404\/53e9134294b3bcfbf7109b04d8d9c521\/KGTX\/CLTX001\/f9c76358f5169402f1e18260f083c5e0.mp3",
            Matcher matcher = Pattern.compile("\"play_url\":\"(.*?)\",").matcher(unicodeToString2);
            if(matcher.find())
            {
                try {
                    String srcMP3 = matcher.group(1).replace("\\", "");
                     System.out.println("正在下载。。。。。"+ i + "-" + fileName + ".mp3\n位置D盘/酷狗前500首热歌/");
                    byte[] bytes = Jsoup.connect(srcMP3).ignoreContentType(true).maxBodySize(600000000).timeout(100000).execute().bodyAsBytes();
                    File file = new File("D:/酷狗前500首热歌/");
                    if (!file.exists())
                        file.mkdirs();
                    FileOutputStream fileOutputStream = new FileOutputStream(""+file+"/"+ i + "-" + fileName + ".mp3");
                    fileOutputStream.write(bytes);
                    Thread.sleep(1000);
                }catch (Exception e){
                    continue;
                }
            }
            i++;
        }
        downLoadMusic(pageIndex+1,i);
    }
    public static String unicodeToString(String str) {
        Pattern pattern = Pattern.compile("(\\\\u(\\p{XDigit}{4}))");
        Matcher matcher = pattern.matcher(str);
        char ch;
        while (matcher.find()) {
            ch = (char) Integer.parseInt(matcher.group(2), 16);
            str = str.replace(matcher.group(1), ch + "");
        }
        return str;
    }
}

在这里插入图片描述
在这里插入图片描述

  • 0
    点赞
  • 6
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值