java实现hls_java版本对某网站HLS爬取与聚合

该博客介绍了如何使用Java实现对指定网站的HLS视频进行爬取和聚合。作者首先定义了目标URL、存储路径等相关常量,然后通过HTTP请求获取m3u8索引文件,并解析出ts片段链接。接着,使用多线程下载ts文件,并利用ffmpeg进行视频合并。此外,博客还记录了下载和错误日志。
摘要由CSDN通过智能技术生成

import java.io.*;

import java.net.HttpURLConnection;

import java.net.URL;

import java.text.SimpleDateFormat;

import java.util.*;

import java.util.regex.Matcher;

import java.util.regex.Pattern;

/*

* @auth:hjx

* 2020-05-29

*/

public class JavaSpiderQBL {

private static final String TARGET_URL = "https://www.hjxstbserver.xyz";

private static final String DISK_PATH = "e:";

private static final String WORK_PATH = DISK_PATH + "/javaqbl";

private static final String MERGER_PATH = WORK_PATH + "/MV";

private static final String LOG_NAME = WORK_PATH + "/spider.log";

private static final String DOWNED_NAME = WORK_PATH + "/down.log";

private static final String PREFIX = "hjx->";

private static final String PREFIX_INFO = PREFIX + "[INFO]:";

private static final String PREFIX_WARNING = PREFIX + "[WARNING]:";

private static final String PREFIX_ERROR = PREFIX + "[ERROR]:";

private static final boolean ISSUREN = true;

private static final int PAGE = 1;

private static final int THREAD_NUM = 4;

public static boolean downTs(Map targetMap, String workPath, String logName, String mergerPath, String downName) {

String title = targetMap.get("name");

String link = targetMap.get("link");

String fileFolder = workPath + "/" + title;

String threadNum = Thread.currentThread().getName();

createDir(fileFolder, logName);

String indexContent = getURL(link);

if (indexContent == "") {

return false;

}

String indexName = fileFolder + "/index.m3u8";

List tsList = new ArrayList<>();

hlog(logName, PREFIX_INFO, threadNum, "Getting index", title, "file and downtslist");

try {

String[] indexContentList = indexContent.split("\n");

FileWriter writer = new FileWriter(indexName, false);

for (String line : indexContentList) {

if (line.startsWith("#")) {

if (!line.contains("#EXTINF:10.041667,")) {

writer.write(line + "\n");

}

} else {

String[] tsSplit = line.split("/");

String tsName = tsSplit[tsSplit.length - 1];

if (!tsName.endsWith("aaa0.ts")) {

writer.write(tsName + "\n");

tsList.add(line);

}

}

}

writer.close();

} catch (IOException e) {

e.printStackTrace();

}

if (tsList.size() == 0) {

hlog(logName, PREFIX_ERROR, threadNum, "the", title, "no ts file need down");

return false;

}

hlog(logName, PREFIX_INFO, threadNum, "Start downloading ", title, "ts file");

int downFailur = 0;

for (String tsURL : tsList) {

String[] tsSplit = tsURL.split("/");

String tsName = tsSplit[tsSplit.length - 1];

int i = 0;

while (i < 5) {

HttpURLConnection conn = null;

OutputStream oputstream = null;

InputStream iputstream = null;

try {

File file = new File(fileFolder + "/" + tsName);

if (!file.exists()) {

file.createNewFile();

}

URL url = new URL(tsURL);

conn = (HttpURLConnection) url.openConnection();

conn.setDoInput(true);

conn.connect();

iputstream = conn.getInputStream();

oputstream = new FileOutputStream(file);

byte[] buffer = new byte[4 * 1024];

int byteRead = -1;

while ((byteRead = (iputstream.read(buffer))) != -1) {

oputstream.write(buffer, 0, byteRead);

}

oputstream.flush();

hlog(logName, PREFIX_INFO, threadNum, "down", title, tsName, "success times:", String.valueOf(i + 1));

break;

} catch (Exception e) {

i++;

if (i == 6) {

hlog(logName, PREFIX_ERROR, "down", title, tsName, "success times:", String.valueOf(i), "lost the ts");

downFailur++;

e.printStackTrace();

}

continue;

} finally {

try {

if (iputstream != null) {

iputstream.close();

}

if (oputstream != null) {

oputstream.close();

}

if (conn != null) {

conn.disconnect();

}

} catch (IOException e) {

e.printStackTrace();

}

}

}

}

if (downFailur < 5) {

try {

List commands = new ArrayList<>();

commands.add("cmd.exe");

commands.add("/c");

commands.add("ffmpeg");

commands.add("-i");

commands.add(fileFolder + "/index.m3u8");

commands.add("-vcodec");

commands.add("copy");

commands.add("-acodec");

commands.add("copy");

commands.add("-absf");

commands.add("aac_adtstoasc");

commands.add(mergerPath + "/" + title.replace(" ", "") + ".mp4");

ProcessBuilder builder = new ProcessBuilder();

builder.command(commands);

builder.start();

hlog(logName, PREFIX_INFO, "merger the", title, "to ", mergerPath + "/" + title.replace(" ", "") + ".mp4", "success");

writeDown(downName, logName, title);

} catch (Exception e) {

hlog(logName, PREFIX_ERROR, "merger the", title, "to ", mergerPath + "/" + title.replace(" ", "") + ".mp4", "failure");

}

}

return true;

}

public synchronized static void writeDown(String downFile, String logName, String title) {

try {

FileWriter writer = new FileWriter(downFile, true);

writer.write(title + "\n");

writer.close();

hlog(logName, PREFIX_INFO, "Write down", title, "success");

} catch (IOException e) {

hlog(logName, PREFIX_ERROR, "Write down", title, "failure");

e.printStackTrace();

}

}

public static List> averageAssign(List source, int n) {

List> result = new ArrayList>();

int remaider = source.size() % n;

int number = source.size() / n;

int offset = 0;

for (int i = 0; i < n; i++) {

List value = null;

if (remaider > 0) {

value = source.subList(i * number + offset, (i + 1) * number + offset + 1);

remaider--;

offset++;

} else {

value = source.subList(i * number + offset, (i + 1) * number + offset);

}

result.add(value);

}

return result;

}

public static List getTargetURL(String targetURL, int page, String logName) {

List targerList = new ArrayList();

String targetURLPrefix = "";

if (ISSUREN) {

targetURLPrefix = targetURL + "/videos/amateur?page=";

} else {

targetURLPrefix = targetURL + "/videos/japanese?page=";

}

for (int i = 0; i < page; i++) {

String urlContent = getURL(targetURLPrefix + String.valueOf(i + 1));

if (urlContent == "") {

continue;

}

//get title

Pattern pattern = Pattern.compile("title=\"[^\"]*\"");

Matcher matcher = pattern.matcher(urlContent);

List title = new ArrayList<>();

while (matcher.find()) {

String titleGrep = matcher.group();

if (titleGrep.contains("精英娱乐城") || titleGrep.contains("东亚娱乐城")) {

continue;

}

String[] array = titleGrep.split("\"");

String name = array[1];

title.add(name);

}

//get links

Pattern patternLinks = Pattern.compile("href=\"/video/\\d*/\"");

Matcher matcherLinks = patternLinks.matcher(urlContent);

List links = new ArrayList<>();

while (matcherLinks.find()) {

String linksGrep = matcherLinks.group();

String[] arrayLinks = linksGrep.split("\"");

String link = targetURL + arrayLinks[1];

links.add(link);

}

if (title.size() > 0 && links.size() > 0 && title.size() == links.size()) {

int arrlen = title.size();

for (int j = 0; j < arrlen; j++) {

try {

File downFile = new File(DOWNED_NAME);

if (!downFile.exists()) {

downFile.createNewFile();

}

BufferedReader in = new BufferedReader(new FileReader(DOWNED_NAME));

String str;

boolean flag = false;

while ((str = in.readLine()) != null) {

if (str.contains(title.get(j))) {

flag = true;

hlog(logName, PREFIX_WARNING, title.get(j), "Has been downloaded");

break;

}

}

if (flag) {

continue;

}

String linkContent = getURL(links.get(j));

//get m3u8URL

Pattern patternm3u8 = Pattern.compile("

Matcher matcherm3u8 = patternm3u8.matcher(linkContent);

String m3u8URL = "";

if (matcherm3u8.find()) {

String[] m3u8Spilt = matcherm3u8.group().split("\"");

m3u8URL = "http:" + m3u8Spilt[1];

}

if (m3u8URL != "") {

Map map = new HashMap();

map.put("name", title.get(j));

map.put("link", m3u8URL);

targerList.add(map);

} else {

hlog(logName, PREFIX_WARNING, "Get indexM3u8 URL have a error");

}

} catch (Exception e) {

hlog(logName, PREFIX_ERROR, "Get targerURL have a error page is", String.valueOf(page + 1));

}

}

}

}

return targerList;

}

public static String getURL(String uri) {

String result = null;

InputStream in = null;

try {

URL url = new URL(uri);

HttpURLConnection urlcon = (HttpURLConnection) url.openConnection();

urlcon.setRequestMethod("GET");

urlcon.connect();

in = urlcon.getInputStream();

BufferedReader buffer = new BufferedReader(new InputStreamReader(in, "utf-8"));

StringBuffer bs = new StringBuffer();

String line = null;

while ((line = buffer.readLine()) != null) {

bs.append(line + "\n");

}

result = bs.toString();

in.close();

urlcon.disconnect();

return result;

} catch (Exception e) {

return "";

}

}

public static void createDir(String fileName, String logName) {

File fileDir = new File(fileName);

if (fileDir.exists()) {

hlog(logName, PREFIX_INFO, fileName, "Folder already exists");

} else {

fileDir.mkdirs();

hlog(logName, PREFIX_INFO, fileName, "Successfully created folder");

}

}

public synchronized static void hlog(String logName, String prefixType, String... logStringList) {

SimpleDateFormat timeFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");

Date date = new Date();

String time = timeFormat.format(date.getTime());

String logString = prefixType + time;

for (String arg : logStringList) {

logString = logString + " " + arg;

}

try {

FileWriter writer = new FileWriter(logName, true);

writer.write(logString + "\n");

writer.close();

System.out.println(logString);

} catch (IOException e) {

e.printStackTrace();

}

}

public static void main(String args[]) {

hlog(LOG_NAME, PREFIX_INFO, "Buddha bless hjxSpider program no bugs,Start now");

createDir(MERGER_PATH, LOG_NAME);

List targerList = new ArrayList();

targerList = getTargetURL(TARGET_URL, PAGE, LOG_NAME);

List> threadURLList = Collections.singletonList(new ArrayList());

threadURLList = averageAssign(targerList, THREAD_NUM);

List listThred = new ArrayList();

for (List threadURL : threadURLList) {

Thread downThread = new Thread() {

public void run() {

for (Map map : threadURL) {

downTs(map, WORK_PATH, LOG_NAME, MERGER_PATH, DOWNED_NAME);

}

}

};

downThread.start();

listThred.add(downThread);

}

try {

for (Thread runThread : listThred) {

runThread.join();

}

} catch (InterruptedException e) {

e.printStackTrace();

}

hlog(LOG_NAME, PREFIX_INFO, "Thanks for Buddha blessed hjxSpider program no bugs,over now");

}

}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值