1. 抓取标签内容
-
引入模块
新模块
jsdom
中的JSDOM
-
创建对象
let DOM = new JSDOM(html); let document = DOM.window.document;
-
dom
操作document.querySelector('.tm-count').innerHTML
完整代码
var index = 0; const fs = require('fs'); const url = require('url'); const gbk = require('gbk'); const JSDOM = require('jsdom').JSDOM; GetUrl('https://detail.tmall.com/item.htm?id=548466958386&ali_refid=a3_430583_1006:1103419234:N:%E5%8D%8E%E4%B8%BA:bb84ee4c8f67c7b202d725187b7ad429&ali_trackid=1_bb84ee4c8f67c7b202d725187b7ad429&spm=a230r.1.14.1&sku_properties=5919063:6536025;12304035:116177',(data)=>{ var html = gbk.toString('utf-8',data); let DOM = new JSDOM(html); let document = DOM.window.document; console.log(document.querySelector('.tm-count').innerHTML) }) function GetUrl(sUrl,success){ index++; var urlObj = url.parse(sUrl); var http =''; if(urlObj.protocol == 'http:'){ http = require('http'); } else{ http = require('https'); } let req = http.request({ 'hostname':urlObj.hostname, 'path':urlObj.path },res=>{ if(res.statusCode == 200){ var arr = []; var str = ''; res.on('data',buffer=>{ arr.push(buffer); //str +=buffer; }); res.on('end',()=>{ let b = Buffer.concat(arr); success && success(b); }) } else if(res.statusCode == 302 || res.statusCode == 301){ console.log(`第${index}次重定向`,res.headers.location); GetUrl(res.headers.location,success) } }); req.end(); req.on('error',()=>{ console.log('404'); }) }