import java.io.*;
import java.net.HttpURLConnection;
import java.net.URL;
import java.text.SimpleDateFormat;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/*
* @auth:hjx
* 2020-05-29
*/
public class JavaSpiderQBL {
private static final String TARGET_URL = "https://www.hjxstbserver.xyz";
private static final String DISK_PATH = "e:";
private static final String WORK_PATH = DISK_PATH + "/javaqbl";
private static final String MERGER_PATH = WORK_PATH + "/MV";
private static final String LOG_NAME = WORK_PATH + "/spider.log";
private static final String DOWNED_NAME = WORK_PATH + "/down.log";
private static final String PREFIX = "hjx->";
private static final String PREFIX_INFO = PREFIX + "[INFO]:";
private static final String PREFIX_WARNING = PREFIX + "[WARNING]:";
private static final String PREFIX_ERROR = PREFIX + "[ERROR]:";
private static final boolean ISSUREN = true;
private static final int PAGE = 1;
private static final int THREAD_NUM = 4;
public static boolean downTs(Map targetMap, String workPath, String logName, String mergerPath, String downName) {
String title = targetMap.get("name");
String link = targetMap.get("link");
String fileFolder = workPath + "/" + title;
String threadNum = Thread.currentThread().getName();
createDir(fileFolder, logName);
String indexContent = getURL(link);
if (indexContent == "") {
return false;
}
String indexName = fileFolder + "/index.m3u8";
List tsList = new ArrayList<>();
hlog(logName, PREFIX_INFO, threadNum, "Getting index", title, "file and downtslist");
try {
String[] indexContentList = indexContent.split("\n");
FileWriter writer = new FileWriter(indexName, false);
for (String line : indexContentList) {
if (line.startsWith("#")) {
if (!line.contains("#EXTINF:10.041667,")) {
writer.write(line + "\n");
}
} else {
String[] tsSplit = line.split("/");
String tsName = tsSplit[tsSplit.length - 1];
if (!tsName.endsWith("aaa0.ts")) {
writer.write(tsName + "\n");
tsList.add(line);
}
}
}
writer.close();
} catch (IOException e) {
e.printStackTrace();
}
if (tsList.size() == 0) {
hlog(logName, PREFIX_ERROR, threadNum, "the", title, "no ts file need down");
return false;
}
hlog(logName, PREFIX_INFO, threadNum, "Start downloading ", title, "ts file");
int downFailur = 0;
for (String tsURL : tsList) {
String[] tsSplit = tsURL.split("/");
String tsName = tsSplit[tsSplit.length - 1];
int i = 0;
while (i < 5) {
HttpURLConnection conn = null;
OutputStream oputstream = null;
InputStream iputstream = null;
try {
File file = new File(fileFolder + "/" + tsName);
if (!file.exists()) {
file.createNewFile();
}
URL url = new URL(tsURL);
conn = (HttpURLConnection) url.openConnection();
conn.setDoInput(true);
conn.connect();
iputstream = conn.getInputStream();
oputstream = new FileOutputStream(file);
byte[] buffer = new byte[4 * 1024];
int byteRead = -1;
while ((byteRead = (iputstream.read(buffer))) != -1) {
oputstream.write(buffer, 0, byteRead);
}
oputstream.flush();
hlog(logName, PREFIX_INFO, threadNum, "down", title, tsName, "success times:", String.valueOf(i + 1));
break;
} catch (Exception e) {
i++;
if (i == 6) {
hlog(logName, PREFIX_ERROR, "down", title, tsName, "success times:", String.valueOf(i), "lost the ts");
downFailur++;
e.printStackTrace();
}
continue;
} finally {
try {
if (iputstream != null) {
iputstream.close();
}
if (oputstream != null) {
oputstream.close();
}
if (conn != null) {
conn.disconnect();
}
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
if (downFailur < 5) {
try {
List commands = new ArrayList<>();
commands.add("cmd.exe");
commands.add("/c");
commands.add("ffmpeg");
commands.add("-i");
commands.add(fileFolder + "/index.m3u8");
commands.add("-vcodec");
commands.add("copy");
commands.add("-acodec");
commands.add("copy");
commands.add("-absf");
commands.add("aac_adtstoasc");
commands.add(mergerPath + "/" + title.replace(" ", "") + ".mp4");
ProcessBuilder builder = new ProcessBuilder();
builder.command(commands);
builder.start();
hlog(logName, PREFIX_INFO, "merger the", title, "to ", mergerPath + "/" + title.replace(" ", "") + ".mp4", "success");
writeDown(downName, logName, title);
} catch (Exception e) {
hlog(logName, PREFIX_ERROR, "merger the", title, "to ", mergerPath + "/" + title.replace(" ", "") + ".mp4", "failure");
}
}
return true;
}
public synchronized static void writeDown(String downFile, String logName, String title) {
try {
FileWriter writer = new FileWriter(downFile, true);
writer.write(title + "\n");
writer.close();
hlog(logName, PREFIX_INFO, "Write down", title, "success");
} catch (IOException e) {
hlog(logName, PREFIX_ERROR, "Write down", title, "failure");
e.printStackTrace();
}
}
public static List> averageAssign(List source, int n) {
List> result = new ArrayList>();
int remaider = source.size() % n;
int number = source.size() / n;
int offset = 0;
for (int i = 0; i < n; i++) {
List value = null;
if (remaider > 0) {
value = source.subList(i * number + offset, (i + 1) * number + offset + 1);
remaider--;
offset++;
} else {
value = source.subList(i * number + offset, (i + 1) * number + offset);
}
result.add(value);
}
return result;
}
public static List getTargetURL(String targetURL, int page, String logName) {
List targerList = new ArrayList();
String targetURLPrefix = "";
if (ISSUREN) {
targetURLPrefix = targetURL + "/videos/amateur?page=";
} else {
targetURLPrefix = targetURL + "/videos/japanese?page=";
}
for (int i = 0; i < page; i++) {
String urlContent = getURL(targetURLPrefix + String.valueOf(i + 1));
if (urlContent == "") {
continue;
}
//get title
Pattern pattern = Pattern.compile("title=\"[^\"]*\"");
Matcher matcher = pattern.matcher(urlContent);
List title = new ArrayList<>();
while (matcher.find()) {
String titleGrep = matcher.group();
if (titleGrep.contains("精英娱乐城") || titleGrep.contains("东亚娱乐城")) {
continue;
}
String[] array = titleGrep.split("\"");
String name = array[1];
title.add(name);
}
//get links
Pattern patternLinks = Pattern.compile("href=\"/video/\\d*/\"");
Matcher matcherLinks = patternLinks.matcher(urlContent);
List links = new ArrayList<>();
while (matcherLinks.find()) {
String linksGrep = matcherLinks.group();
String[] arrayLinks = linksGrep.split("\"");
String link = targetURL + arrayLinks[1];
links.add(link);
}
if (title.size() > 0 && links.size() > 0 && title.size() == links.size()) {
int arrlen = title.size();
for (int j = 0; j < arrlen; j++) {
try {
File downFile = new File(DOWNED_NAME);
if (!downFile.exists()) {
downFile.createNewFile();
}
BufferedReader in = new BufferedReader(new FileReader(DOWNED_NAME));
String str;
boolean flag = false;
while ((str = in.readLine()) != null) {
if (str.contains(title.get(j))) {
flag = true;
hlog(logName, PREFIX_WARNING, title.get(j), "Has been downloaded");
break;
}
}
if (flag) {
continue;
}
String linkContent = getURL(links.get(j));
//get m3u8URL
Pattern patternm3u8 = Pattern.compile("
Matcher matcherm3u8 = patternm3u8.matcher(linkContent);
String m3u8URL = "";
if (matcherm3u8.find()) {
String[] m3u8Spilt = matcherm3u8.group().split("\"");
m3u8URL = "http:" + m3u8Spilt[1];
}
if (m3u8URL != "") {
Map map = new HashMap();
map.put("name", title.get(j));
map.put("link", m3u8URL);
targerList.add(map);
} else {
hlog(logName, PREFIX_WARNING, "Get indexM3u8 URL have a error");
}
} catch (Exception e) {
hlog(logName, PREFIX_ERROR, "Get targerURL have a error page is", String.valueOf(page + 1));
}
}
}
}
return targerList;
}
public static String getURL(String uri) {
String result = null;
InputStream in = null;
try {
URL url = new URL(uri);
HttpURLConnection urlcon = (HttpURLConnection) url.openConnection();
urlcon.setRequestMethod("GET");
urlcon.connect();
in = urlcon.getInputStream();
BufferedReader buffer = new BufferedReader(new InputStreamReader(in, "utf-8"));
StringBuffer bs = new StringBuffer();
String line = null;
while ((line = buffer.readLine()) != null) {
bs.append(line + "\n");
}
result = bs.toString();
in.close();
urlcon.disconnect();
return result;
} catch (Exception e) {
return "";
}
}
public static void createDir(String fileName, String logName) {
File fileDir = new File(fileName);
if (fileDir.exists()) {
hlog(logName, PREFIX_INFO, fileName, "Folder already exists");
} else {
fileDir.mkdirs();
hlog(logName, PREFIX_INFO, fileName, "Successfully created folder");
}
}
public synchronized static void hlog(String logName, String prefixType, String... logStringList) {
SimpleDateFormat timeFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
Date date = new Date();
String time = timeFormat.format(date.getTime());
String logString = prefixType + time;
for (String arg : logStringList) {
logString = logString + " " + arg;
}
try {
FileWriter writer = new FileWriter(logName, true);
writer.write(logString + "\n");
writer.close();
System.out.println(logString);
} catch (IOException e) {
e.printStackTrace();
}
}
public static void main(String args[]) {
hlog(LOG_NAME, PREFIX_INFO, "Buddha bless hjxSpider program no bugs,Start now");
createDir(MERGER_PATH, LOG_NAME);
List targerList = new ArrayList();
targerList = getTargetURL(TARGET_URL, PAGE, LOG_NAME);
List> threadURLList = Collections.singletonList(new ArrayList());
threadURLList = averageAssign(targerList, THREAD_NUM);
List listThred = new ArrayList();
for (List threadURL : threadURLList) {
Thread downThread = new Thread() {
public void run() {
for (Map map : threadURL) {
downTs(map, WORK_PATH, LOG_NAME, MERGER_PATH, DOWNED_NAME);
}
}
};
downThread.start();
listThred.add(downThread);
}
try {
for (Thread runThread : listThred) {
runThread.join();
}
} catch (InterruptedException e) {
e.printStackTrace();
}
hlog(LOG_NAME, PREFIX_INFO, "Thanks for Buddha blessed hjxSpider program no bugs,over now");
}
}