node + mysql 爬取网页数据并写入数据库(Promise 优化,发邮件优化)

15 篇文章 0 订阅
6 篇文章 0 订阅
// 连接使用数据库
import {connection} from './../../common/dbConnect.js'
import {senderMsg} from './../../common/mailer.js'
// 引入爬虫需要的网络请求模块
const https = require('https')
const fs = require('fs')

// 需要被爬的网站网址
const url = 'https://douban.fm/j/v2/songlist/explore?type=hot&genre=0&limit=20&sample_cnt=5' // 这是获取数据的接口url
const urlDir = 'https://douban.fm/explore/songlists' // 这是浏览器访问页面的url

let errorInfoGroup = []
let successInfoGroup = []
let testInterval

const logPath = '../../../../log.js'

let receiverMsg = {
  from: '。。。@qq.com',
  to: '。。。@163.com',
  subject: '测试发邮件',
  text: '你能不能收到我发的邮件呢',
  html: '',
  attachments: []
}

// 从需要被爬的网站里获取数据
let getWebData = function () {
  console.log('程序开始执行......')
  let httpPromise = new Promise((resolve, reject) => {
    https.get(url, (res) => {
      let chunks = []
      let size = 0
      res.on('data', (trunk) => {
        chunks.push(trunk)
        size += trunk.length
      })
      res.on('end', () => {
        let data = Buffer.concat(chunks, size)
        let html = JSON.parse(data.toString()).reverse()
        resolve(html)
      })
    })
  })
  return httpPromise
}

// 依据获取到的数据结构(字段)创建数据库表结构
let getTableSkelon = function (html) {
  let tablePromise = new Promise((resolve, reject) => {
    let createString = 'CREATE TABLE IF NOT EXISTS doubanAlbum ('
    let createParams = ''
    for(let j in html[0]) {
      if(j !== 'id') {
        createParams = createParams + j + ' VARCHAR (255) DEFAULT NULL, '
      } else {
        createParams = createParams + 'id INT (10) NOT NULL, '
      }
    }
    createString = createString + createParams + 'PRIMARY KEY (id))'
    resolve([createString, html]) // resolve传值只能拿到第一个参数,所以用数组的方式传值
  })
  return tablePromise
}


// 创建数据表
let createTable = function (createString) {
  let dbPromise = new Promise ((resolve, reject) => {
    connection.query(createString[0], (err, data) => {
      if(err) {
        throw err
      } else {
        resolve(createString[1])
      }
    })
  })
  return dbPromise
}

// 插入数据前整理数据
let fillTable = function (newInfo) {
  let fillPromise = new Promise ((resolve, reject) => {
    let tempSongs = []
    for (let i in newInfo) {
      if(newInfo[i] !== null) {
        if(Array.prototype.isPrototypeOf(newInfo[i]) == true) {
          if(i == 'sample_songs') {
            newInfo['sample_songs'].forEach((item, index, array) => {
              tempSongs.push(item.sid)
            })
            newInfo[i] = tempSongs.join(',')
          } else {
            newInfo[i] = JSON.stringify(newInfo[i])
          }
        } else if(typeof(newInfo[i]) == 'object') {
          if(newInfo[i].hasOwnProperty('id')) {
            newInfo[i] = newInfo[i].id
          } else {
            newInfo[i] = null
          }
        } else {
          newInfo[i] = newInfo[i]
        }
      }
    }
  })
  fillbd(newInfo)
  return fillPromise
}

// 将整理好的数据插入到数据库中
let fillbd = function (newInfo) {
  connection.query('INSERT INTO doubanAlbum SET ?', newInfo, (err, result) => {
    if(err) {
      errorInfoGroup.push(newInfo)
    } else {
      successInfoGroup.push(newInfo)
    }
  })
}


// 发送邮件
let postMail = function (text) {
  console.log('邮件发送中')
  receiverMsg.text = text
  senderMsg.sendMail(receiverMsg,(error,info)=>{
    if(error) {
      console.log(error)
    } else {
      console.log(`Message: ${info.messageId}`)
      console.log(`sent: ${info.response}`)
    }
  })
}

// 将错误信息写入log
let writeLog = function (string) {
  fs.open(logPath, 'a', (err, fd) => {
    if(err) {
      throw err
    } else {
      fs.write(fd, string, (err, written, string) => {
        if(err) {
          throw err
        } else {
          console.log('已成功写入log')
          fs.close(fd)
        }
      })
    }
  })
}

// 清空log文件
let clearLog = function () {
  fs.writeFile(logPath, '', 'utf8', (err) => {
     if (err){
       console.log(err)
     } else {
       console.log('清空成功')
     }
  })
}


// 插入操作结束后关闭数据库连接或者收集错误信息
let endConnection = function (html) {
  let shutPromise = new Promise((resolve, reject) => {
    if(successInfoGroup.length + errorInfoGroup.length == html.length) {
      connection.end()
      clearInterval(testInterval)
    } else {
      console.log('错误数据')
      console.log(errorInfoGroup)
      clearLog()
      writeLog(JSON.stringify(errorInfoGroup))
      postMail(JSON.stringify(errorInfoGroup))
    }
  })
  return shutPromise
}


getWebData().then(res => {
  return getTableSkelon(res)
}).then(res => {
  return createTable(res)
}).then (res => {
  res.forEach((item, index, array) => {
    fillTable(item)
  })
  testInterval = setInterval(endConnection, 1000, res)
})

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值