Java爬虫(jsoup+fastjson)爬取酷狗热歌前500首
import com.alibaba.fastjson.JSONObject;
import org.jsoup.Jsoup;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.List;
import java.util.Scanner;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class KuGouMusicTOP500Crawler {
public static void main(String[] args) throws IOException, InterruptedException {
Scanner scanner = new Scanner(System.in);
System.out.println("酷狗前500首热歌下载\n从第几页,第几首歌开始下载?(以空格分开)");
int pageIndex = scanner.nextInt();
int i = scanner.nextInt();
downLoadMusic(pageIndex,i);
}
public static void downLoadMusic(int pageIndex,int i ) throws IOException, InterruptedException{
String songListUrl = "https://www.kugou.com/yy/rank/home/"+pageIndex+"-8888.html?from=rank";
String body = Jsoup.connect(songListUrl).ignoreContentType(true).execute().body();
body=unicodeToString(body);
int beginIdx = body.indexOf("global.features = ");
int endIdx = body.indexOf("];", beginIdx);
String features = body.substring(beginIdx, endIdx + 1).replace("global.features = ", "");
List<JSONObject> list = JSONObject.parseArray(features, JSONObject.class);
for (JSONObject jsonObject : list) {
String hash = (String) jsonObject.get("Hash");
Integer albumId = (Integer) jsonObject.get("album_id");
String fileName = (String) jsonObject.get("FileName");
String url = "https://wwwapi.kugou.com/yy/index.php?r=play/getdata&hash=" + hash + "&dfid=3duCPE2qUax83tS5dX4JWC5o&appid=1014&mid=29d4360b5f124d4a48d5eafb5d193274&platid=4&album_id=" + albumId + "";
String body1 = Jsoup.connect(url).execute().body();
String unicodeToString2 = unicodeToString(body1);
//"play_url":"https:\/\/webfs.ali.kugou.com\/202109031404\/53e9134294b3bcfbf7109b04d8d9c521\/KGTX\/CLTX001\/f9c76358f5169402f1e18260f083c5e0.mp3",
Matcher matcher = Pattern.compile("\"play_url\":\"(.*?)\",").matcher(unicodeToString2);
if(matcher.find())
{
try {
String srcMP3 = matcher.group(1).replace("\\", "");
System.out.println("正在下载。。。。。"+ i + "-" + fileName + ".mp3\n位置D盘/酷狗前500首热歌/");
byte[] bytes = Jsoup.connect(srcMP3).ignoreContentType(true).maxBodySize(600000000).timeout(100000).execute().bodyAsBytes();
File file = new File("D:/酷狗前500首热歌/");
if (!file.exists())
file.mkdirs();
FileOutputStream fileOutputStream = new FileOutputStream(""+file+"/"+ i + "-" + fileName + ".mp3");
fileOutputStream.write(bytes);
Thread.sleep(1000);
}catch (Exception e){
continue;
}
}
i++;
}
downLoadMusic(pageIndex+1,i);
}
public static String unicodeToString(String str) {
Pattern pattern = Pattern.compile("(\\\\u(\\p{XDigit}{4}))");
Matcher matcher = pattern.matcher(str);
char ch;
while (matcher.find()) {
ch = (char) Integer.parseInt(matcher.group(2), 16);
str = str.replace(matcher.group(1), ch + "");
}
return str;
}
}