第一步:cmd 进入命令版界面
第二步:输入D: 进入D盘
第三步:创建项目文件:mkdir nodetaobao(项目名)
第四步:进入创建的项目:cd nodetaobao
第五步:初始化项目:npm init
然后安装各种依赖模块,新建一个images文件夹装爬虫下拉的图片,创建start.js文件
复制一下内容代码,然后node start.js就可以下载文件了
var https = require('https')
var request = require('request')
var cheerio = require('cheerio')
var fs = require('fs')
var images = require('images')
// 商品详情网址(每次只需替换这个地址就行了) 运行文件 node start.js
var url = 'https://item.taobao.com/item.htm?id=592293023396&scm=1007.12144.81309.359571_0_0&pvid=56bf99e6-c060-4ff0-8354-998718dde11f&utparam=%7B%22x_hestia_source%22%3A%22359571%22%2C%22x_object_type%22%3A%22item%22%2C%22x_mt%22%3A8%2C%22x_src%22%3A%22359571%22%2C%22x_pos%22%3A9%2C%22x_pvid%22%3A%2256bf99e6-c060-4ff0-8354-998718dde11f%22%2C%22x_object_id%22%3A592293023396%7D'
function startPage(x) {
https.get(x, function(res) {
var html ='' //用来存储请求网页的整个html内容
res.setEncoding('utf-8') // 防止中文乱码
// 监听data事件,每次取一次数据
res.on('data', function(chunk) {
html +=chunk
})
res.on('end', function() {
var $ =cheerio.load(html) // 采用cheerio模块解析html
var content = $('html').find('script')
// console.log(content)
content.each(function(i, elem) {
var script = $(this).html();
if (script.match('apiImgInfo :')){
var newarry = script.split(',')
for (const key in newarry) {
if(newarry[key].indexOf('apiImgInfo ')>0){
var newUrl = newarry[key].split(':')
// console.log(newUrl)
var pageUrl = 'http:'+newUrl[1].replace(/\\/g, "").replace(/\'/g,'').replace(/^\s*/,"");
// console.log(pageUrl)
// var pageUrl= 'http://tds.alicdn.com/json/item_imgs.htm?t=TB1B7_Tbhn1gK0jSZKPXXXvUXXa&sid=2144097515&id=573011542300&s=f4fc2076503684f96bf9fa5a7eccecde&v=2&m=1'
var pageUrls = pageUrl.split('&')
for (const key in pageUrls) {
if(pageUrls[key].indexOf('id') > 0) {
var idVaule = pageUrls[key].split('=')[1]
setTimeout(() => {
sendpage(pageUrl,idVaule)
}, 2200);
}
}
}
}
}
});
getMainImages($)
})
})
}
// 获取详情图
function sendpage(y,idVaule){
request({url:y}, function (error, response, body) {
if (!error && response.statusCode == 200) {
var reuslt = body.split(':')
for (const key in reuslt) {
if(reuslt[key].indexOf(idVaule)>0){
var index = reuslt[key].indexOf(',')
var text = reuslt[key].slice(index+1).replace(/\"/g, "")
var img_src = 'https://img.alicdn.com/imgextra/i2/'+idVaule+'/' + text; //获取图片的url
// 采用request模块,向服务器发起一次请求,获取图片资源
request.head(img_src, function(err,res,body) {
if(err) {
console.log(err)
}
})
// request(img_src).pipe(fs.createWriteStream('./images/' + '详情图' + key + '.jpg'))
var writeStream = fs.createWriteStream('./images/' + '详情图' + key +'.jpg');
var readStream = request(img_src)
readStream.pipe(writeStream);
readStream.on('end', function(response) {
console.log('详情图'+key+'下载成功');
writeStream.end();
});
writeStream.on("finish", function() {
// console.log("ok");
setTimeout(() => {
images('./images/'+'详情图' + key + '.jpg').size(750).save('./images/'+'详情图' + key + '.jpg');
}, 2000);
})
}
}
}
})
}
// 获取主图
function getMainImages($) {
$('.tb-s50 a img').each(function(index,item){
var srcImg = ''
if($(this).attr('data-src').indexOf('htt') > -1){
var srcImg =$(this).attr('data-src').replace('50x50','800x800')
} else{
var srcImg ='https:'+$(this).attr('data-src').replace('50x50','800x800')
}
// 采用request模块,向服务器发起一次请求,获取图片资源
request.head(srcImg, function(err,res,body) {
if(err) {
console.log(err)
}
})
// request(srcImg).pipe(fs.createWriteStream('./images/' + '主图' + index +'.jpg'))
var writeStream = fs.createWriteStream('./images/' + '主图' + index +'.jpg');
var readStream = request(srcImg)
readStream.pipe(writeStream);
readStream.on('end', function(response) {
console.log('主图'+index+'下载成功');
writeStream.end();
});
writeStream.on("finish", function() {
// console.log("ok");
images('./images/'+'主图' + index + '.jpg').size(750).save('./images/'+'主图' + index + '.jpg');
})
})
}
startPage(url)
一定要安装nodejs才可以
运行成功下载的图片