express 爬虫部分

mysql-node

连接数据库

 let mysql=require("mysql")

let con=mysql.createConnection({
    host:'localhost',
    user:"root",
    password:"root",
    //数据库名
    database:"book"
})

con.connect((err)=>{
    if(err){
        console.log(err)
    }else{
        console.log("连接成功")
    }
})

常用的crud

//查询
//查询全部
let str=“select * from student”

//条件查询
let str1=“select * from student where age>18”

//添加一条数据
let str2=“insert into student (name,id,age) values (‘李白’,3,20)”

//删除数据,会将所有满足的全部删除
let str3=“delete from student where id=3;”

//修改数据
let str4=“update book set author=‘自己改的名字’,bookname=‘自己改的书名’ where id=1”

//limit分页查询
let str5=“select * from book limit ?,?”
let page=1 //第几页
let pageNumber=20 //每页几条
let arr=[(page-1)*pageNumber,pageNumber]

//视图查询
let str6=“create view booksAll1 as (select * from book inner join book_book1 on book.id=book_book1.bookid)”
let str7=“select * from booksAll”

//模糊查询
let str8=“select * from book where author like ‘%刀%’”

//事务
let str9=“begin;insert into book (bookname,author,id,cataory) values (‘静夜诗’,‘李白’,1002,‘古诗’);rollback;commit”

//2张连表查询 inner join on
let str10=“select * from book inner join book_book1 on book.id=book_book1.bookid where book.id=1”

//3张连表查询 inner join on
let str11="select book.bookname from book inner join book_book1 on book.id=book_book1.bookid inner join book1 on book_book1.id=book1.id where author=‘朱冰’ "

con.query(str9,(err,res,fil)=>{
console.log(res)
})

// con.query(str6,arr,(err,res,fil)=>{
// console.log(res)
// })

封装mysql
mysql.js

let mysql=require(“mysql”)

let con=mysql.createConnection({
host:‘localhost’,
user:“root”,
password:“root”,
//数据库名
database:“book”
})

con.connect((err)=>{
if(err){
console.log(err)
}else{
console.log(“连接成功”)
}
})

function getMysql(sql,arr){
return new Promise((res,rej)=>{
con.query(sql,arr,function(err, rows, fields){
if (err) {
rej(err)
}
res(rows)
});
})
}

module.exports={
getMysql,
con
}

爬虫部分
let axios=require(“axios”)
let cheerio=require(“cheerio”)
let mysql=require(“mysql”)

let con=mysql.createConnection({
host:‘localhost’,
user:“root”,
password:“root”,
//数据库名
database:“book”
})
con.connect((err)=>{
if(err){
console.log(err)
}else{
console.log(“连接成功”)
}
})

let page=1
let count=1

//获取当前页的书籍url
async function getPage(count){
let res=await axios.get(https://www.bookben.net/xuanhuan/${count}.html)
// console.log(res.data)
let $=cheerio.load(res.data)
KaTeX parse error: Expected 'EOF', got '#' at position 3: ("#̲wrap > div.xiao…(value).attr(“href”)
bookurl=“https://www.bookben.net”+bookurl
getbook(bookurl)
})

}

//获取具体的书本信息
async function getbook(url){
let res=await axios.get(url)
let $=cheerio.load(res.data)

//具体的innerHTML中的数据使用text()获取,属性使用attr获取
let author=$("#wrap > div.xiaotian > div.left > div:nth-child(2) > dl.yxjj > dd:nth-child(3) > a").text()
let bookname=$("#wrap > div.xiaotian > div.left > div:nth-child(2) > dl.yxjj > h2 > dt > a").attr("title")
let cataory=$("#wrap > div.xiaotian > div.left > div:nth-child(2) > dl.yxjj > dd:nth-child(2) > a").text()
// console.log(cataory)
let arr=[author,bookname,cataory]
// console.log(arr)

let str="insert into book (author,bookname,cataory) values (?,?,?)"
con.query(str,arr,(err,res,fil)=>{
    console.log(ok)
}) 

}

// getPage(count)

for(let i=1;i<500;i++){
setTimeout(()=>{
getPage(i)
},1000)
}

mongodb-node

let axios=require("axios")
let cheerio=require("cheerio")
let mongoose = require("mongoose")
//连接数据库
mongoose.connect("mongodb://localhost/playground", { useUnifiedTopology: true })
.then(() => {
    console.log("连接数据库成功")
})
.catch(err => {
    console.log("连接数据库失败")
})

    //创建集合规则 courseSchema 中就是集合的规则
    const courseSchema = new mongoose.Schema({
        author: String,
        bookname: String,
        cataory: String,
    })
    //使用规则创建集合
    const Course = mongoose.model("Course", courseSchema) //courses文件名 ,以上部分可以看成创建了一个类
    
let page=1
let count=1

//获取当前页的书籍url
async function getPage(count){
    let res=await axios.get(`https://www.bookben.net/xuanhuan/${count}.html`)
    // console.log(res.data)
    let $=cheerio.load(res.data)
    $("#wrap > div.xiaotian > div.left > div.nylr > div.lblr > dl > dt a").each((index,value)=>{
        let bookurl=$(value).attr("href")
        bookurl="https://www.bookben.net"+bookurl
        getbook(bookurl)
    })
}

//获取具体的书本信息
async function getbook(url){
    let res=await axios.get(url)
    let $=cheerio.load(res.data)

    //具体的innerHTML中的数据使用text()获取,属性使用attr获取
    let author=$("#wrap > div.xiaotian > div.left > div:nth-child(2) > dl.yxjj > dd:nth-child(3) > a").text()
    let bookname=$("#wrap > div.xiaotian > div.left > div:nth-child(2) > dl.yxjj > h2 > dt > a").attr("title")
    let cataory=$("#wrap > div.xiaotian > div.left > div:nth-child(2) > dl.yxjj > dd:nth-child(2) > a").text()
     
//使用规则创建集合
Course.create({
    author,
    bookname,
    cataory
}).then(data => {
    console.log("添加数据成功")
}).catch(
    err => {
        console.log(err)
    }
)

course.save() //将文档插入到数据库中
}

getPage(1)

// sleep函数,默认参数1000
function sleep(milliseconds = 1000) {
    // 封装sleep函数,返回一个Promise,在异步使用时前面加个await
    return new Promise((resolve, reject) => {
      setTimeout(() => {
        resolve();
      }, milliseconds);
    });
  } 

(async function begSp(){
    for(let j=0;j<5;j++){ 
        await sleep(3000);
         getPage(j)
         console.log("hello")
     }
})()

uncloud json转换

let axios=require("axios")
let cheerio=require("cheerio")
let mongoose = require("mongoose")
//连接数据库
mongoose.connect("mongodb://localhost/playground", { useUnifiedTopology: true })
.then(() => {
    console.log("连接数据库成功")
})
.catch(err => {
    console.log("连接数据库失败")
})

    //创建集合规则 courseSchema 中就是集合的规则
    const courseSchema = new mongoose.Schema({
        author: String,
        bookname: String,
        cataory: String,
    })
    //使用规则创建集合
    const Course = mongoose.model("Course", courseSchema) //courses文件名 ,以上部分可以看成创建了一个类
    

let page=1
let count=1

//获取当前页的书籍url
async function getPage(count){
    let res=await axios.get(`https://www.bookben.net/xuanhuan/${count}.html`)
    // console.log(res.data)
    let $=cheerio.load(res.data)
    $("#wrap > div.xiaotian > div.left > div.nylr > div.lblr > dl > dt a").each((index,value)=>{
        let bookurl=$(value).attr("href")
        bookurl="https://www.bookben.net"+bookurl
        getbook(bookurl)
    })

}

//获取具体的书本信息
async function getbook(url){
    let res=await axios.get(url)
    let $=cheerio.load(res.data)

    //具体的innerHTML中的数据使用text()获取,属性使用attr获取
    let author=$("#wrap > div.xiaotian > div.left > div:nth-child(2) > dl.yxjj > dd:nth-child(3) > a").text()
    let bookname=$("#wrap > div.xiaotian > div.left > div:nth-child(2) > dl.yxjj > h2 > dt > a").attr("title")
    let cataory=$("#wrap > div.xiaotian > div.left > div:nth-child(2) > dl.yxjj > dd:nth-child(2) > a").text()
     

//使用规则创建集合
Course.create({
    author,
    bookname,
    cataory
}).then(data => {
    console.log("添加数据成功")
}).catch(
    err => {
        console.log(err)
    }
)

course.save() //将文档插入到数据库中

}

getPage(1)

// sleep函数,默认参数1000
function sleep(milliseconds = 1000) {
    // 封装sleep函数,返回一个Promise,在异步使用时前面加个await
    return new Promise((resolve, reject) => {
      setTimeout(() => {
        resolve();
      }, milliseconds);
    });
  } 

(async function begSp(){
    for(let j=0;j<5;j++){ 
        await sleep(3000);
         getPage(j)
         console.log("hello")
     }
})()
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值