const axios = require('axios');
const fs = require('fs');
const path = require('path');
const cheerio = require('cheerio');
const { str } = require('./dataStr');
async function getImageUrls() {
const $ = cheerio.load(str)
let imageUrls = [];
$('img').each((index, element) => {
const imgSrc = $(element).attr('src'); // 获取每张图片的源地址
if (/^https/.test(imgSrc)) { // 判断是否为外部图片
imageUrls.push(imgSrc);
} else if (/^\//.test(imgSrc)) { // 判断是否为相对路径
imageUrls.push(`https:${imgSrc}`);
}
});
return imageUrls;
}
const sleep = time => {
return new Promise(function (resolve, reject) {
setTimeout(function () {
resolve();
}, time);
})
}
async function test() {
try {
const urls = await getImageUrls();
console.log('urls', urls)
if (urls !== null) {
for (let i = 0; i < urls.length; i++) {
const fileName = `image_${i}.gif`; // 自定义保存图片的名称及格式
const filePath = path.join('./images2', fileName); // 构造本地保存路径
await sleep(1500)
try {
const response = await axios({
method: 'get',
url: urls[i],
responseType: 'stream' // 流式传输图片数据
});
response.data.pipe(fs.createWriteStream(filePath)); // 将图片数据写入本地文件
console.log(`Successfully downloaded ${fileName}`);
} catch (error) {
console.log(`Failed to download ${fileName}: ${error}`);
}
}
}
} catch (error) {
console.log(error);
}
}
test();
nodejs写爬虫
于 2024-02-23 16:08:24 首次发布
![](https://img-home.csdnimg.cn/images/20240711042549.png)