http爬虫
http模块的数据请求: get,post,request
爬虫: 爬去数据,但并不是所有网站都能爬取的,有反爬虫
流程: 后端数据请求 —> 数据分析 —> 数据清洗 ----> 数据前台发送
const http = require( 'http' )//引入http模块
const cheerio = require( 'cheerio' )//引入cheerio,用于清洗数据
const options = {//options 就是一个对象,也是一个配置
hostname: 'jx.1000phone.net',//域名
port: 80,//端口
path: '/teacher.php/Class/classDetail/param/rqiWlsefmajGmqJhXXWhl3ZiY2dn',//路径
method: 'GET',//请求方式
headers: {//请求头
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
'Cookie': 'PHPSESSID=ST-91625-drj9QJxH287RYSrtXEIOz7ZePTo-izm5ejd5j1npj2pjc7i3v4z',
'Host': 'jx.1000phone.net',
'Pragma': 'no-cache',
'Referer': 'http://jx.1000phone.net/teacher.php/Class/index',
'Upgrade-Insecure-Requests': 1,
'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1',
'Content-Type': 'application/x-www-form-urlencoded',
'Content-Length': ''
}
};
const req = http.get( options , (res) => { // res response 响应( 回馈 )
res.setEncoding('utf8'); // 得到结果的编码
let rawData = '';
res.on('data', (chunk) => { rawData += chunk; });
res.on('end', () => {
try {
// console.log( rawData ) // html string
const $ = cheerio.load( rawData )
$('td.student a').each( function ( i, ele) {
console.log( $(this).text() )
})
} catch (e) {
console.error(e.message);
}
});
}).on('error', (e) => {
console.error(`Got error: ${e.message}`);
});
req.end()//结束