var request = require("request")
var cheerio = require('cheerio');
var fs = require("fs")
var urlObj = {} //储存已经爬过的连接
//连接后面不能跟 '/'
var urls = "https://im.qq.com"
link(urls) //开始工作
function link(url) {
if (!urlObj[url]) {
request(url, {}, async (err, response, body) => {
if (err) {
console.error("request error : " + url + " : " + err)
link(url)
delete urlObj[url]
}
if (!err && response.statusCode == 200) {
if (url.lastIndexOf('html') < 0) {//
init(url, 4, body)
} else {
init(url, 5, body)
}
getStatic(url) //取静态资源
let $ = cheerio.load(body)
for (var i = 0; i < $("a").length; i++) {
if ($("a")[i].attribs.href) {
if ($("a")[i].attribs.href.indexOf('http') < 0 && $("a")[i].attribs.href && $("a")[i].attribs.href.indexOf('#') < 0 && $("a")[i].attribs.href.indexOf('javascript') < 0 && $("a")[i].attribs.href.indexOf('mailto') < 0 && $("a")[i].attribs.href.indexOf('tel:') < 0) { //去除除了连接外标签
if ($("a")[i].attribs.href[0] != '/') {
$("a")[i].attribs.href = '/' + $("a")[i].attribs.href
}
link(encodeURI(urls + $("a")[i].attribs.href)) //转义URL
urlObj[urls + $("a")[i].attribs.href] = urls + $("a")[i].attribs.href
}
}
}
}
})
}
}
function getStatic(url) {//读取静态资源
request(url, {}, async (err, res, body) => {
if (err) {
console.error("static err url :" + url + '---' + err)
getStatic(url)
}
if (!err && res.statusCode == 200) {
let $ = cheerio.load(body)
/*解析JS */
for (var i = 0; i < $("script").length; i++) {
if ($("script")[i].attribs.src) {
init($("script")[i].attribs.src, 1)
}
}
/*解析CSS文件 */
for (var i = 0; i < $("link").length; i++) {
if ($("link")[i].attribs) {
init($("link")[i].attribs.href, 2)
}
}
/*解析img*/
for (var i = 0; i < $("img").length; i++) {
if ($("img")[i].attribs.src && $("img")[i].attribs.src.indexOf("data:image/png") < 0) {
init($("img")[i].attribs.src, 3)
}
}
}
})
}
function init(str, num, body) { //建文件和文件夹
if (num == 4) {
if (str == urls) {
str = "/index.html"
} else {
var newstr = str.split('?')[0]
str = newstr + "index.html";
str = str.replace(urls, '')
str = str + '?' + newstr[1]
}
}
str = str.replace(urls, '');
let pathStr = str.substring(0, str.lastIndexOf("/") + 1);
if (pathStr.indexOf('http') < 0) {
let foterName = pathStr.split('/')
let filename = str.substring(str.lastIndexOf("/") + 1, str.length).split('?')[0];
let canshu = str.substring(str.lastIndexOf("/") + 1, str.length).split('?')[1];
if (filename[0] != '/') {
filename = "/" + filename
}
let linPath = ""
for (var i = 0; i < foterName.length; i++) { //新建文件
if (foterName[i]) {
linPath += '/' + foterName[i]
if (foterName[i].indexOf("data:image/png") < 0 && foterName[i].indexOf("index.html") < 0) {
if (!fs.existsSync(__dirname + linPath)) {
mkdir(linPath)
}
}
}
if (i + 1 == foterName.length) {
if (num == 4) {
var path = pathStr + filename
path = path.replace('//', '/');
fs.access(__dirname + path, async (err) => {
if (err) {
console.log("add file1" + path)
fs.writeFile(__dirname + path, body, async (err) => {
if (err) {
console.error(path, err)
fs.writeFile(__dirname + path, body, async (err) => {
})
}
})
}
})
}
if (num == 5) {
var path = pathStr + filename
path = path.replace('//', '/');
fs.access(__dirname + path, async (err) => {
if (err) {
console.log("add file1" + path)
fs.writeFile(__dirname + path, body, async (err) => {
if (err) {
console.error(path, err)
fs.writeFile(__dirname + path, body, async (err) => {
})
}
})
}
})
}
if (num != 4 && num != 5) {
fs.access(__dirname + pathStr + filename, async (err) => {
if (err) {
var path = pathStr + filename
path = path.replace('//', '/');
console.log("add file2" + path)
writeFile(__dirname + path, urls + str, num, canshu)
}
})
}
}
}
}
}
function mkdir(path) { //建文件夹
console.log("add font" + path)
fs.mkdir(__dirname + path, async (err) => {
if (err) {
console.error(err)
}
})
}
function writeFile(file, url, num, canshu) { //建文件
console.log("addfile--" + url)
if (num == 3) {
request(url + '?' + canshu).pipe(fs.createWriteStream(file)); //保存图片
} else {
request(url + '?' + canshu, {}, (err, res, body) => {
if (!err && res.statusCode == 200) {
fs.writeFile(file, body, (err) => {
if (err) {
writeFile(file, url, num, canshu);
console.log(err)
}
})
}
})
}
}