在chrome浏览器上下载txt小说的Javascript代码
前段时间在追一部网络小说,想把小说下载到本地离线阅读,利用chrome浏览器开发模式,编写了一段下载小说页面中所有章节内容,并合并为一个文本文件的Javascript代码。
//章节目录所在的标签ID
var txtListID = "list";
//小说章节正文所在的标签ID
var txtContentID = "content";
function syncSleep(time) {
return new Promise((resolve)=>setTimeout(resolve, time));
}
//网页链接文档列表
var docLst = [];
//XmlHttpReauest
var XMLHttp = {
_getInstance: function() {
return this._createObj();
},
_createObj: function() {
if (window.XMLHttpRequest) {
// code for IE7, Firefox, Opera, etc.
objXMLHttp = new XMLHttpRequest();
} else if (window.ActiveXObject) {
// code for IE6, IE5
objXMLHttp = new ActiveXObject("Microsoft.XMLHTTP");
}
return objXMLHttp;
},
// 发送请求(方法[post,get], 地址, 数据, 回调函数)
sendReq: function(method, url, data, callback) {
var objXMLHttp = this._getInstance();
with (objXMLHttp) {
try {
// 加随机数防止缓存
if (url.indexOf("?") > 0) {
url += "&randnum=" + Math.random();
} else {
url += "?randnum=" + Math.random();
}
open(method, url, true);
// 设定请求编码方式
setRequestHeader('Content-Type', 'application/x-www-form-urlencoded; charset=UTF-8');
send(data);
onreadystatechange = function() {
if (objXMLHttp.readyState == 4 && (objXMLHttp.status == 200 || objXMLHttp.status == 304)) {
callback(objXMLHttp);
}
}
} catch (e) {
alert(e);
}
}
}
}
//Xml文档Parser
function Parser() {
if (window.DOMParser) {
return new DOMParser();
} else // Internet Explorer
{
xmlDoc = new ActiveXObject("Microsoft.XMLDOM");
xmlDoc.async = "false";
return xmlDoc;
}
}
var promises = [];
//下一行代码是关键,用于生成小说页面上所有章节链接网页列表。这需要根据不同小说网站的章节目录页面布局,选择不同的ID或者CLASS。
var srcDoc = document.getElementById(txtListID).children[0];
var ad = Array.prototype.slice.call(srcDoc.children);
var txtListCount = 0;
ad.forEach(function(item, idx) {
var tmpd = item.children[0];
if (tmpd != undefined) {
if (tmpd.nodeName === "A") {
promises.push(new Promise(resolve=>{
XMLHttp.sendReq("GET", tmpd.href, null, function(xhttp) {
var parser = Parser();
var doc = parser.parseFromString(xhttp.responseText, "text/html");
var ct = doc.getElementById(txtContentID).innerHTML;
docLst.push({
"idx": idx,
"url": tmpd.href,
"section": tmpd.innerText,
"txt": ct
});
console.log(idx, tmpd.innerText, tmpd.href);
resolve();
});
}
));
}
}
})
function SaveToFile(data, filename) {
if (!data) {
console.error('Console.save: No data')
return;
}
if (!filename)
filename = 'console.json';
if (typeof data === "object") {
data = JSON.stringify(data, undefined, 4)
}
var blob = new Blob([data],{
type: 'text/json'
});
e = document.createEvent("MouseEvents");
a = document.createElement("a");
a.download = filename;
a.href = window.URL.createObjectURL(blob);
a.dataset.downloadurl = ["text/json", a.download, a.href].join(':');
e.initMouseEvent("click", true, false, window, 0, 0, 0, 0, 0, false, false, false, false, 0, null);
a.dispatchEvent(e);
window.URL.revokeObjectURL(a.href);
}
//小说的书名
var txtTitle=document.getElementById("info").innerText
var txtUrl=document.URL
var docStr = "\n"+txtTitle+"\n"+txtUrl+"\n";
Promise.all(promises).then(function() {
docLst.sort((a,b)=>{
return a.idx - b.idx
}
);
docLst.forEach(function(item) {
docStr = docStr + "\n第"+item.section +"\n"+ item.txt+"\n"
});
[{
in: "<br>",
to: " "
}, {
in: " ",
to: " "
},{
in:"XX小说网",
to:"\n"
}].forEach(item=>{
var rg = new RegExp(item.in,"g");
docStr = docStr.replace(rg, item.to);
}
)
//console.log(docStr);
SaveToFile(docStr,"docStr.txt");
});
上述代码需要在chrome开发模式下,新建snippet代码片段,复制粘贴到代码片段中运行。由说小说网站页面布局存在差异,需要针对性的分析页面布局,生成正确的章节网址列表。由于个人水平限制,代码中有不足或错漏之处,还请方家指正。