- 安装node环境
- node安装puppeteer插件
npm i --save puppeteer --ignore-scripts
3.截图需要js
//导入包
const puppeteer = require('puppeteer');
(async () => {
const chromePath = "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
const address = "https://news.qq.com/"
const path = "/Users/admin/Desktop/test.png"
var time = 15000
//创建新的浏览器
const browser = await puppeteer.launch({
ignoreHTTPSErrors: true,
headless: false,
executablePath: chromePath,
args: ['--no-sandbox', '--disable-setuid-sandbox']
});
//打开新页面
const page = await browser.newPage();
// 访问
await page.goto(address, {waitUntil: 'domcontentloaded'}).catch()
await page.waitFor(time)
//加载页面的js,cs5
var style, text
page.evaluate(function () {
var style = document.createElement('style'),
text = document.createTextNode('body { background: #fff }')
style.setAttribute('type', 'text/css')
style.appendChild(text)
document.head.insertBefore(style, document.head.firstChild)
})
let content = await page.content()
console.log(content)
await page.screenshot({path:path})
await browser.close()
})()
4.滚动截屏js
//导入包
const puppeteer = require('puppeteer');
(async () => {
// chrome地址
//const chromePath=process.argv[2]
const chromePath = '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome'
//传进的将要解析的url网址
//const address=process.argv[3];
const address = 'https://news.qq.com/'
const path = '/Users/admin/Desktop/test.png'
var time = 15000
const system_warn = 1002 // 系统提示/告警
//创建新的浏览器
const browser = await puppeteer.launch({
ignoreHTTPSErrors: true,
headless: false,
executablePath: chromePath,
args: ['--no-sandbox', '--disable-setuid-sandbox']
})
//打开新页面
const page = await browser.newPage()
// 访问
await page.goto(address, {waitUntil: 'domcontentloaded'}).catch()
await page.setViewport({
width: 1920,
height: 1080,
})
await page.waitFor(time)
let height_limit = false
let scroll_times = 0
let mValues = {'scrollEnable': true, 'height_limit': height_limit, 'times': 2}
let result_map = new Map()
try {
await page.waitFor(5000)
while (mValues.scrollEnable) {
mValues = await page.evaluate((max_height_px,
page_screentshot_height_limit,
height_limit,
result_map,
system_warn,
address,
scroll_times
) => {
let times = 1
let scrollEnable = true
if (undefined !== document.body && null != document.body) {
window.scrollBy(0, window.innerHeight)
times = parseInt(document.body.clientHeight / 1080)
// 超出图片的限制高度, 生成PDF
if (document.body.clientHeight > page_screentshot_height_limit) {
height_limit = true
}
// 超出网页的限制高度, 不再滚动
if (document.body.clientHeight > max_height_px && scroll_times > 40) {
result_map['resultCode'] = system_warn
result_map['warning'] = '网页加载高度过长, 易造成数据获取失败。'
scrollEnable = false
}
} else {
scrollEnable = false
}
times = times + 1
return {
'scrollEnable': scrollEnable,
'height_limit': height_limit,
'times': times,
'title': document.title
}
}, 60000, 60000, height_limit, result_map, system_warn, address, scroll_times)
// 等待随机时间
let randomMillSecond = randomNum(600, 2000)
await sleep(randomMillSecond)
scroll_times++
console.log(address + ' 需要滚动 : ' + mValues.times + '次 , 滚动第[' + scroll_times + ']次')
if (scroll_times > mValues.times) {
console.log(address + ' 结束')
mValues.scrollEnable = false
}
}
//console.log('Dimensions:', dimensions);
//加载页面的js,cs5
var style, text
page.evaluate(function () {
var style = document.createElement('style'),
text = document.createTextNode('body { background: #fff }')
style.setAttribute('type', 'text/css')
style.appendChild(text)
document.head.insertBefore(style, document.head.firstChild)
})
let content = await page.content()
//console.log(content)
await page.screenshot({path: path, fullPage: true})
} catch (e) {
console.log(e)
console.log('执行异常')
} finally {
await browser.close()
}
})()
// 随机数
function randomNum (minNum, maxNum) {
switch (arguments.length) {
case 1:
return parseInt(Math.random() * minNum + 1, 10)
case 2:
return parseInt(Math.random() * (maxNum - minNum + 1) + minNum, 10)
default:
return 0
}
}
// 延时函数
function sleep (delay) {
return new Promise((resolve, reject) => {
setTimeout(() => {
try {
resolve(1)
} catch (e) {
reject(0)
}
}, delay)
})
}
5.java执行cmd命令截图
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import java.io.InputStream;
public static void main(String[] args) {
String url = "https://news.qq.com/";
Document document = null;
//chrome浏览器地址
String chromePath = "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome";
//nodejs地址 + 截图的js的地址(两个需要在同一个目录之下)
String nodeJSPath = "/usr/local/opt/node@10/bin/node /Users/admin/Desktop/printscrnnt.js";
String BLANK = " ";
String exec = nodeJSPath + BLANK + chromePath + BLANK + url;
try {
//执行脚本命令
Process process = Runtime.getRuntime().exec(exec);
System.err.println("ecec =======> " + exec);
InputStream is = process.getInputStream();
document = Jsoup.parse(is, "UTF-8", url);
try {
process.waitFor();
} catch (InterruptedException e) {
e.printStackTrace();
}
process.destroy();
process = null;
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
document.body();
}