<dependency>
<groupId>org.anyline</groupId>
<artifactId>anyline-net</artifactId>
</dependency>
package org.anyline.simple.spider;
import org.anyline.entity.DataRow;
import org.anyline.entity.DataSet;
import org.anyline.net.HttpUtil;
import org.anyline.util.BasicUtil;
import org.anyline.util.FileUtil;
import org.anyline.util.regular.RegularUtil;
import java.io.File;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class QQSpider {
private static File root = new File("D:\\qk");
public static void main(String[] args) {
//打开一个页面复制浏览器地址
//未登录 地址
String url = "https://ke.qq.com/course/2737483/9558853446911307#term_id=102844777";
//url = "https://ke.qq.com/course/2769083/10244575040520379#term_id=102877352";
// download(url);
merge();//合并已下载的ts片段
}
public static void merge(){
File[] dirs = root.listFiles();
for(File dir:dirs){
File[] sub_dirs = dir.listFiles();
if(null == sub_dirs){
continue;
}
for(File sub_dir:sub_dirs){
List<File> list = FileUtil.getAllChildrenFile(sub_dir, "ts");
if(list.isEmpty()){
continue;
}
File file = new File(sub_dir.getParent(),"_"+sub_dir.getName()+".mp4");
FileUtil.merge(file, list);
}
}
}
/**
*
* @param url 播放页面 浏览器地址
*/
public static void download(String url){
String course = RegularUtil.cut(url, "course", "/", "/");
String term = RegularUtil.cut(url, "term_id=", RegularUtil.TAG_END);
//读取页面源码
String txt = HttpUtil.get(url).getText();
//解析其中的课程列表 就是右侧的课程目录 一个教程内一般会有多个视频
//先把json拆出来 参考 "主页截取的JSON结构"
txt = RegularUtil.cut(txt, "__NEXT_DATA__", ">", "</script>");
//解析成DataRow格式
DataRow json = DataRow.parseJson(txt);
//取其中的课程列表
DataRow catalogMap = (DataRow)json.recursion("props","pageProps","courseInfo","catalogMap");
DataSet items = catalogMap.getSet(term);
for(DataRow item:items){
//视频列表
DataSet subs = item.getSet("sub_info");
for(DataRow sub:subs){
String term_name = sub.getString("name").replace("/","").replace(" ", "");//课程名称
DataSet tasks = sub.getSet("task_info");
for(DataRow task:tasks){//上几层for应该只有一个元素,直接getRow(0)也可以
String sub_name = task.getString("name").replace("/","").replace(" ", ""); //章节标题
File dir = new File(root, term_name);
File sub_dir = new File(dir, sub_name); //用标题作目录 注意有可能会有符号 实际应用时 可以处理一下 或者MD5一下
String taid = task.getString("taid"); //用来生成视频文件的连接
String resid_list = task.getString("resid_list");
//url中的header中有身份微信没仔细看结构 直接需要复制一个 修改几个参数
String video_url = "https://ke.qq.com/cgi-proxy/rec_video/describe_rec_video?course_id="+course+"&file_id="+resid_list+"&header=%7B%22uin%22%3A%22登录帐号%22%2C%22srv_appid%22%3A201%2C%22cli_appid%22%3A%22ke%22%2C%22cli_info%22%3A%7B%22cli_platform%22%3A3%7D%7D&term_id="+term+"&vod_type=0&bkn=721629886&r=0.4348";
Map<String, String> header = new HashMap<>();
header.put("accept","*/*");
header.put("accept-language","zh-CN,zh;q=0.9");
header.put("priority","u=1, i");
header.put("referer",url);
header.put("sec-ch-ua","Windows");
//这里需要登录信息随便找个连接复制一个
String cookie = "iip=0; RK=2n+st9fZHV; ptcz=7ff6503bef5e05361614b33ada83047eaeb86c803dbed502d7b115d55d4d3278; pgv_pvid=4061326676; pac_uid=1_登录帐号; o_cookie=登录帐号; logTrackKey=38c228b9cb214630b31f790247b8f3c0; qq_domain_video_guid_verify=27d9ca0241f55a71; _qimei_uuid42=17c06143828100f7d7e4fa4fa6e1328b2705804f6a; _qimei_fingerprint=8e779cad0569606a4bb9280f2283868b; _qimei_q36=; _qimei_h38=ad2047bdd7e4fa4fa6e1328b02000002717c06; _clck=pvf95v|1|fmd|0; tdw_data_testid=; tdw_data_flowid=; miniapp_qrcode_id=95fcb9ceb0a9473da0119564c641f9f4; _qpsvr_localtk=0.14905287550347412; sessionPath=17203675643166189775506; auth_version=2.0; mix_login_mode=true; uid_type=0; uin=登录帐号; p_uin=登录帐号; p_luin=登录帐号; uid_uin=登录帐号; uid_a2=644d6dfd1065fac6b8bb2338345eb7b5fad7a5ec04aaf7ecddfea3b609de999039f079be06303da0a907a73ebf69d208687a58551488d7be1c3c25108341751190ca4d209564c074; uid_origin_uid_type=0; uid_origin_auth_type=1003; tdw_data_new_2={\"auin\":\"-\",\"sourcetype\":\"\",\"sourcefrom\":\"\",\"ver9\":\"登录帐号\",\"uin\":\"登录帐号\",\"visitor_id\":\"007188006273237235\",\"ver10\":\"login\",\"url_page\":\"course\",\"url_module\":\"\",\"url_position\":\"\",\"sessionPath\":\"17203675643166189775506\"}";
header.put("cookie", cookie);
header.put("sec-ch-ua-mobile","?0");
header.put("sec-fetch-dest","empty");
header.put("sec-fetch-mode","cors");
header.put("sec-fetch-site","same-origin");
header.put("user-agent","Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36");
header.put("x-request-id","1292ba6d-8d58-a055-8451-ab48431d7c36");
txt = HttpUtil.get(header, video_url).getText();
//参考 ”describe_rec_video返回的JSON结构“
json = DataRow.parseJson(txt);
DataSet infos = (DataSet)json.recursion("result", "rec_video_info", "infos");
//提取其中的m3u8文件地址(m3u8中有视频片段地址)
for(DataRow info:infos){
//https://1258712167.vod2.myqcloud.com/25121a6avodtransbj1258712167/d5fe089c5285890803997328557/drm/v.f30742.m3u8?t=66954743&exper=0&us=5091421974650498166&sign=1c2bf02590b18560013aef6586e08241
String m3u8_url = info.getString("url");
String m3u8_url_dir = m3u8_url.split("/v.")[0];
txt = HttpUtil.get(m3u8_url).getText();
//提取没有注释的行
String[] lines = txt.split("\n");
for(String line:lines){
if(!line.startsWith("#")){
//用3u8的url目录合成ts文件url
String ts_url = m3u8_url_dir +"/" + line;
//提取start作为文件名
String start = RegularUtil.cut(line, "start=","&");
//补齐位数 用来排序
start = BasicUtil.fillChar(start, "0",12);
File file = new File(sub_dir, start+".ts");
HttpUtil.download(ts_url, file);
}
}
/*
v.f30742.ts?start=0&end=309839&type=mpegts&exper=0&sign=022fc826d869da96dd0685410e3d3afd&t=669546E8&us=8998661307906711326
#EXT-X-KEY:METHOD=AES-128,URI="https://ke.qq.com/cgi-bin/qcloud/get_dk?edk=CiCZPu8ic5tMt%2FbDcsADud6FkuIKsucShsZsJKO%2BNndifRCO08TAChiaoOvUBCokOTMyNDg4YmItOWZjYS00MzFiLWJiYjItNjFmMDhjYjNlYmM3&fileId=5285890803997328557&keySource=VodBuildInKMS",IV=0x00000000000000000000000000000000
#EXTINF:9.000000,
v.f30742.ts?start=309840&end=722703&type=mpegts&exper=0&sign=022fc826d869da96dd0685410e3d3afd&t=669546E8&us=8998661307906711326
#EXT-X-KEY:METHOD=AES-128,URI="https://ke.qq.com/cgi-bin/qcloud/get_dk?edk=CiCZPu8ic5tMt%2FbDcsADud6FkuIKsucShsZsJKO%2BNndifRCO08TAChiaoOvUBCokOTMyNDg4YmItOWZjYS00MzFiLWJiYjItNjFmMDhjYjNlYmM3&fileId=5285890803997328557&keySource=VodBuildInKMS",IV=0x00000000000000000000000000000000
*/
}
//下载完成后 合并ts 下载一部分也可以先合一部分
List<File> files = FileUtil.getAllChildrenFile(sub_dir, "ts"); //一个章节中的 全部视频片段
FileUtil.merge(new File(dir, sub_name+".mp4"), files);
}
}
}
/*
主页截取的JSON结构
{
"props": {
"pageProps": {
"courseInfo": {
"status": 2,
"err": null,
"data": Object{...},
"curTermId": -1,
"curTerm": Object{...},
"catalogStatus": 2,
"catalogErr": null,
"catalogMap": {
"102844777": [
{
"ch_id": 2967813,
"introduce": "",
"ch_no": 0,
"name": "",
"sub_info": [
{
"csid": 16252645,
"sub_id": 0,
"introduce": "",
"name": "Netty 8小时 快速入门",
"endtime": 0,
"term_id": 102844777,
"task_info": [
{
"restrict_flag": 0,
"create_time": 1592274127,
"csid": 16252645,
"introduce": "",
"special_flag": 0,
"endtime": 0,
"resid_ext": "{\u0026quot;times\u0026quot;:1237,\u0026quot;txcloud\u0026quot;:1,\u0026quot;vid\u0026quot;:\u0026quot;\u0026quot;}",
"term_id": 102844777,
"type": 2,
"bgtime": 0,
"expr_flag": 0,
"te_list": [
3253161180
],
"name": "BIO/NIO/AIO三种IO模式概述",
"task_bit_flag": 0,
"resid_list": "5285890803997328557",
"tu_list": [
3253161180
],
"expr_range": 0,
"append_flag": 0,
"aid": 121675,
"taid": "9558853446911307",
"cid": 2737483
},
Object{...},
Object{...},*/
/*
describe_rec_video返回的JSON结构
{
"result": {
"header": {
"code": 0,
"msg": "success",
"ext_msg": ""
},
"rec_video_info": {
"file_id": "5285890803997328557",
"dk": "",
"infos": [
{
"url": "https://1258712167.vod2.myqcloud.com/25121a6avodtransbj1258712167/d5fe089c5285890803997328557/drm/v.f30742.m3u8?t=66954743&exper=0&us=5091421974650498166&sign=1c2bf02590b18560013aef6586e08241",
"duration": 1238,
"expire": 691200,
"width": 1920,
"height": 1080,
"audio_codec": "aac;",
"video_codec": "h264;",
"template_id": 30742,
"is_speed_hd": 0,
"size": 66768275,
"audio_bitrate": 43066,
"video_bitrate": 388352,
"size_byte": "66768275",
"ts_decode_iv": "",
"ts_list": [
{
"url": "https://1258712167.vod2.myqcloud.com/25121a6avodtransbj1258712167/d5fe089c5285890803997328557/drm/v.f30742.ts?start=0&end=309839&type=mpegts&exper=0&sign=1c2bf02590b18560013aef6586e08241&t=66954743&us=5091421974650498166",
"duration": 10
},
{
"url": "https://1258712167.vod2.myqcloud.com/25121a6avodtransbj1258712167/d5fe089c5285890803997328557/drm/v.f30742.ts?start=309840&end=722703&type=mpegts&exper=0&sign=1c2bf02590b18560013aef6586e08241&t=66954743&us=5091421974650498166",
"duration": 9
},
{
"url": "https://1258712167.vod2.myqcloud.com/25121a6avodtransbj1258712167/d5fe089c5285890803997328557/drm/v.f30742.ts?start=722704&end=1309839&type=mpegts&exper=0&sign=1c2bf02590b18560013aef6586e08241&t=66954743&us=5091421974650498166",
"duration": 10
}
]
},
{
"url": "https://1258712167.vod2.myqcloud.com/25121a6avodtransbj1258712167/d5fe089c5285890803997328557/drm/v.f30741.m3u8?t=66954743&exper=0&us=2525804297650852200&sign=afc734435a2453807e2a3741c5ce7154",
"duration": 1238,
"expire": 691200,
"width": 1280,
"height": 720,
"audio_codec": "aac;",
"video_codec": "h264;",
"template_id": 30741,
"is_speed_hd": 0,
"size": 42613287,
"audio_bitrate": 43066,
"video_bitrate": 232169,
"size_byte": "42613287",
"ts_decode_iv": "",
"ts_list": [
{
"url": "https://1258712167.vod2.myqcloud.com/25121a6avodtransbj1258712167/d5fe089c5285890803997328557/drm/v.f30741.ts?start=0&end=230879&type=mpegts&exper=0&sign=afc734435a2453807e2a3741c5ce7154&t=66954743&us=2525804297650852200",
"duration": 10
},
{
"url": "https://1258712167.vod2.myqcloud.com/25121a6avodtransbj1258712167/d5fe089c5285890803997328557/drm/v.f30741.ts?start=230880&end=594479&type=mpegts&exper=0&sign=afc734435a2453807e2a3741c5ce7154&t=66954743&us=2525804297650852200",
"duration": 10
},
{
"url": "https://1258712167.vod2.myqcloud.com/25121a6avodtransbj1258712167/d5fe089c5285890803997328557/drm/v.f30741.ts?start=594480&end=913519&type=mpegts&exper=0&sign=afc734435a2453807e2a3741c5ce7154&t=66954743&us=2525804297650852200",
"duration": 10
}
]
},
{
"url": "https://1258712167.vod2.myqcloud.com/25121a6avodtransbj1258712167/d5fe089c5285890803997328557/drm/v.f30740.m3u8?t=66954743&exper=0&us=6106913704070875721&sign=7eaceda19651263ea918d02194672a75",
"duration": 1238,
"expire": 691200,
"width": 852,
"height": 480,
"audio_codec": "aac;",
"video_codec": "h264;",
"template_id": 30740,
"is_speed_hd": 0,
"size": 29092723,
"audio_bitrate": 43066,
"video_bitrate": 144747,
"size_byte": "29092723",
"ts_decode_iv": "",
"ts_list": [
{
"url": "https://1258712167.vod2.myqcloud.com/25121a6avodtransbj1258712167/d5fe089c5285890803997328557/drm/v.f30740.ts?start=0&end=166575&type=mpegts&exper=0&sign=7eaceda19651263ea918d02194672a75&t=66954743&us=6106913704070875721",
"duration": 10
},
{
"url": "https://1258712167.vod2.myqcloud.com/25121a6avodtransbj1258712167/d5fe089c5285890803997328557/drm/v.f30740.ts?start=166576&end=398207&type=mpegts&exper=0&sign=7eaceda19651263ea918d02194672a75&t=66954743&us=6106913704070875721",
"duration": 10
},
{
"url": "https://1258712167.vod2.myqcloud.com/25121a6avodtransbj1258712167/d5fe089c5285890803997328557/drm/v.f30740.ts?start=398208&end=608783&type=mpegts&exper=0&sign=7eaceda19651263ea918d02194672a75&t=66954743&us=6106913704070875721",
"duration": 10
}
]
}
],
"master_play_list": "https://1258712167.vod2.myqcloud.com/25121a6avodtransbj1258712167/d5fe089c5285890803997328557/drm/master_playlist.m3u8?t=66954743&exper=0&us=8747950058921443685&sign=3540ef669288e542073d1d006780f79d",
"p_sign": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhcHBJZCI6MTI1ODcxMjE2NywiY3VycmVudFRpbWVTdGFtcCI6MTcyMDM2NzkzOSwiZXhwaXJlVGltZVN0YW1wIjoxNzIwNDU0MzM5LCJmaWxlSWQiOiI1Mjg1ODkwODAzOTk3MzI4NTU3IiwidXJsQWNjZXNzSW5mbyI6eyJ0IjoiNjY4YzBjYzMifX0.8QH9Q7EBWUcHiIbLpYvAYe2nGRTRpz1CC6mql7k109o",
"subtitles": [
{
"url": "https://ke-subtitle.myoed.com/prod/vtt/6vZzhppN5285890803997328557.vtt?sign=27ca4c1379e3870ea5c2c826edadda73&t=66954743",
"type": "vtt"
},
{
"url": "https://ke-subtitle.myoed.com/prod/srt/6vZzhppN5285890803997328557.srt?sign=cc507903830af7eac9f7b81dd992b790&t=66954743",
"type": "srt"
}
],
"d_sign": "b9e3ceb594b941e0b441c3b86db8627c72158c289a7c49e2442d4e8b657053ec7f497e380c56804900134b251259563cf818e8124be3445a",
"drm_infos": [
]
}
},
"retcode": 0
}*/
}
}