java chrome puppeteer 截图

liu---shui

已于 2022-03-02 09:45:25 修改

阅读量1.3k

点赞数

分类专栏： java技术文章标签： java chrome

于 2022-02-23 10:02:07 首次发布

本文链接：https://blog.csdn.net/liushuiziyouliu/article/details/123082882

版权

java技术专栏收录该内容

14 篇文章 0 订阅

订阅专栏

安装node环境
node安装puppeteer插件

npm i --save puppeteer --ignore-scripts

3.截图需要js

//导入包
const puppeteer = require('puppeteer');
(async () => {
  const chromePath = "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
  const address = "https://news.qq.com/"
  const path = "/Users/admin/Desktop/test.png"
  var time = 15000
  //创建新的浏览器
  const browser = await puppeteer.launch({
    ignoreHTTPSErrors: true,
    headless: false,
    executablePath: chromePath,
    args: ['--no-sandbox', '--disable-setuid-sandbox']
  });
  //打开新页面
  const page = await browser.newPage();
  // 访问
  await page.goto(address, {waitUntil: 'domcontentloaded'}).catch()
  await page.waitFor(time)
  //加载页面的js,cs5
  var style, text
  page.evaluate(function () {
    var style = document.createElement('style'),
      text = document.createTextNode('body { background: #fff }')
    style.setAttribute('type', 'text/css')
    style.appendChild(text)
    document.head.insertBefore(style, document.head.firstChild)
  })
  let content = await page.content()
  console.log(content)
  await page.screenshot({path:path})
  await browser.close()
})()

4.滚动截屏js

//导入包
const puppeteer = require('puppeteer');
(async () => {
  //  chrome地址
  //const chromePath=process.argv[2]
  const chromePath = '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome'
  //传进的将要解析的url网址
  //const address=process.argv[3];
  const address = 'https://news.qq.com/'
  const path = '/Users/admin/Desktop/test.png'
  var time = 15000

  const system_warn = 1002 // 系统提示/告警

  //创建新的浏览器
  const browser = await puppeteer.launch({
    ignoreHTTPSErrors: true,
    headless: false,
    executablePath: chromePath,
    args: ['--no-sandbox', '--disable-setuid-sandbox']
  })
  //打开新页面
  const page = await browser.newPage()
  // 访问
  await page.goto(address, {waitUntil: 'domcontentloaded'}).catch()

  await page.setViewport({
    width: 1920,
    height: 1080,
  })
  await page.waitFor(time)

  let height_limit = false
  let scroll_times = 0
  let mValues = {'scrollEnable': true, 'height_limit': height_limit, 'times': 2}
  let result_map = new Map()

  try {
    await page.waitFor(5000)

    while (mValues.scrollEnable) {
      mValues = await page.evaluate((max_height_px,
        page_screentshot_height_limit,
        height_limit,
        result_map,
        system_warn,
        address,
        scroll_times
      ) => {

        let times = 1
        let scrollEnable = true
        if (undefined !== document.body && null != document.body) {
          window.scrollBy(0, window.innerHeight)
          times = parseInt(document.body.clientHeight / 1080)

          // 超出图片的限制高度, 生成PDF
          if (document.body.clientHeight > page_screentshot_height_limit) {
            height_limit = true
          }
          // 超出网页的限制高度, 不再滚动
          if (document.body.clientHeight > max_height_px && scroll_times > 40) {
            result_map['resultCode'] = system_warn
            result_map['warning'] = '网页加载高度过长, 易造成数据获取失败。'
            scrollEnable = false
          }
        } else {
          scrollEnable = false
        }

        times = times + 1
        return {
          'scrollEnable': scrollEnable,
          'height_limit': height_limit,
          'times': times,
          'title': document.title
        }

      }, 60000, 60000, height_limit, result_map, system_warn, address, scroll_times)

      // 等待随机时间
      let randomMillSecond = randomNum(600, 2000)
      await sleep(randomMillSecond)
      scroll_times++
      console.log(address + ' 需要滚动 : ' + mValues.times + '次 , 滚动第[' + scroll_times + ']次')
      if (scroll_times > mValues.times) {
        console.log(address + ' 结束')
        mValues.scrollEnable = false
      }
    }

    //console.log('Dimensions:', dimensions);
    //加载页面的js,cs5
    var style, text
    page.evaluate(function () {
      var style = document.createElement('style'),
        text = document.createTextNode('body { background: #fff }')
      style.setAttribute('type', 'text/css')
      style.appendChild(text)
      document.head.insertBefore(style, document.head.firstChild)
    })
    let content = await page.content()
    //console.log(content)
    await page.screenshot({path: path, fullPage: true})
  } catch (e) {
    console.log(e)
    console.log('执行异常')
  } finally {
    await browser.close()
  }
})()

// 随机数
function randomNum (minNum, maxNum) {
  switch (arguments.length) {
    case 1:
      return parseInt(Math.random() * minNum + 1, 10)
    case 2:
      return parseInt(Math.random() * (maxNum - minNum + 1) + minNum, 10)
    default:
      return 0
  }
}

// 延时函数
function sleep (delay) {
  return new Promise((resolve, reject) => {
    setTimeout(() => {
      try {
        resolve(1)
      } catch (e) {
        reject(0)
      }
    }, delay)
  })
}

5.java执行cmd命令截图

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;

import java.io.InputStream;



    public static void main(String[] args) {
        String url = "https://news.qq.com/";
        Document document = null;
        //chrome浏览器地址
        String chromePath = "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome";

        //nodejs地址  + 截图的js的地址（两个需要在同一个目录之下）
        String nodeJSPath = "/usr/local/opt/node@10/bin/node    /Users/admin/Desktop/printscrnnt.js";

        String BLANK = "    ";

        String exec =  nodeJSPath + BLANK + chromePath + BLANK + url;

        try {
            //执行脚本命令
            Process process = Runtime.getRuntime().exec(exec);

            System.err.println("ecec =======> " + exec);

            InputStream is = process.getInputStream();
            document = Jsoup.parse(is, "UTF-8", url);

            try {
                process.waitFor();
            } catch (InterruptedException e) {
                e.printStackTrace();
            }


            process.destroy();
            process = null;

        } catch (Exception e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        document.body();
    }

liu---shui

关注

0
点赞
踩
3

收藏

觉得还不错? 一键收藏
0
评论
java chrome puppeteer 截图

安装node环境 node安装puppeteer插件npm i --save puppeteer --ignore-scripts3.截图需要js//导入包const puppeteer = require('puppeteer');(async () => { const chromePath = "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome" const address =...
复制链接

扫一扫