node.js 爬取网页数据

最新推荐文章于 2024-03-01 21:12:37 发布

可乐与远方

最新推荐文章于 2024-03-01 21:12:37 发布

阅读量466

点赞数 1

文章标签： nodejs js

本文链接：https://blog.csdn.net/kelezx/article/details/115120003

版权

准备工作安装node 环境然后下载npm 切换淘宝镜像
代码如下

const axios = require('axios')
const cheerio = require('cheerio')
const fs = require('fs')
// 需要引入的模块

const urlmove = 'http://www.bbsnet.com/gif'
// 请求回来的promise对象数据
function req (url) {
  return axios.get(url).then(function (data) {
    // console.log(data)
    return data
  }).catch((error) => {
    return false
  })
}
// 获取主页nav内容的函数
async function getHome () {
  const { data } = await req(urlmove)
  const $ = cheerio.load(data)
  let arr = []
  $('#menu-nav a').each((index, el) => {
    let obj = {
      name: $(el).text(),
      path: $(el).attr('href')
    }
    arr.push(obj)
  })
  arr.pop()
  arr.shift()
  return JSON.parse(JSON.stringify(arr))
}
// 获取每一个导航的数据
async function main () {
  let arr = await getHome()
  // console.log(arr);
  fs.mkdirSync('分类', function () {
    // console.log('创建分类文件夹');
  })
  arr.forEach(async (item) => {
    let i = 1
    // 循环获取每一页表情包
    let arr = []
    let flag = ''
    while (flag = await req(item.path + '/page/' + i)) {
      const { data } = await req(item.path + '/page/' + i)
      const $ = cheerio.load(data)
      $('#post_container img').each((index, el) => {
        let obj = {
          title: $(el).attr('alt'),
          path: $(el).attr('src')
        }
        arr.push(obj)
      })
      i++
    }
    fs.writeFile('./分类/' + item.name + '.txt', JSON.stringify(arr), function (err) { })
  })
}
//调用函数
main()

在这里插入图片描述
然后就会爬取到数据

可乐与远方

关注

1
点赞
踩
0

收藏

觉得还不错? 一键收藏
2
评论
node.js 爬取网页数据

准备工作安装node 环境然后下载npm 切换淘宝镜像代码如下const axios = require('axios')const cheerio = require('cheerio')const fs = require('fs')// 需要引入的模块const urlmove = 'http://www.bbsnet.com/gif'// 请求回来的promise对象数据function req (url) { return axios.get(url).then(func
复制链接

扫一扫