playwright-har是puppeteer-har 的playwright 移植,使用方法上类似,以下是一个简单试用

环境准备

  • docker-compose
    使用了browserless
version: "3"
services:
  browser:
    image: ghcr.io/browserless/chromium:latest
    environment:
      - CONCURRENT=40
      - QUEUED=20
      - CORS=true
      - CORS_MAX_AGE=300
      - DATA_DIR=/tmp/my-profile
      - TOKEN=6R0W53R135510
    volumes:
      - ./my-profile:/tmp/my-profile
    ports:
      - "3000:3000"
  • 1.
  • 2.
  • 3.
  • 4.
  • 5.
  • 6.
  • 7.
  • 8.
  • 9.
  • 10.
  • 11.
  • 12.
  • 13.
  • 14.
  • 15.
services:
  • 1.

使用

安装依赖yarn add playwright playwright-har

  • dalongv3.js
const { chromium } = require("playwright");
const {PlaywrightHar} = require('playwright-har');
 
(async () => {
  let browser = await chromium.connectOverCDP(
    "ws://localhost:3000?token=6R0W53R135510"
  );
  let bContext = await browser.newContext({
    userAgent:
      "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36",
  });
 
  let page = await bContext.newPage();
 
  const playwrightHar = new PlaywrightHar(page);
  await playwrightHar.start();
 
  await page.goto("https://news.baidu.com/", {
    waitUntil: "networkidle",
  });
  let json_result = []
  let resutlv2 = await page.locator(".column").all();
  for (const row of await resutlv2) {
    json_result.push({address:"https://news.baidu.com/",content:await row.innerHTML(),title:"dalong.js"})
  }
  console.log(JSON.stringify(json_result))
  await playwrightHar.stop('./baidu.har');
  await browser.close();
})();
  • 1.
  • 2.
  • 3.
  • 4.
  • 5.
  • 6.
  • 7.
  • 8.
  • 9.
  • 10.
  • 11.
  • 12.
  • 13.
  • 14.
  • 15.
  • 16.
  • 17.
  • 18.
  • 19.
  • 20.
  • 21.
  • 22.
  • 23.
  • 24.
  • 25.
  • 26.
  • 27.
  • 28.
  • 29.
const {PlaywrightHar} = require('playwright-har');
  • 1.
  • har 文件效果

playwright-har 试用_Network

 

内部实现

内部实现上使用了chrome-har 这个npm 包

  • 参考实现
import { harFromMessages } from 'chrome-har';
import { writeFileSync } from 'fs';
import { CDPSession, Page } from 'playwright-chromium';
import { PlaywrightHarConfig } from './playwright-har-config';
 
export class PlaywrightHar {
 
    private page: Page;
    private client: CDPSession;
    private addResponseBodyPromises = [];
    private events = [];
    private config: PlaywrightHarConfig;
 
    constructor(page: Page, config: PlaywrightHarConfig = null) {
        this.page = page;
 
        if (config == null) {
          this.config = new PlaywrightHarConfig();
        }
    }
 
    async start() {
        //@ts-ignore
        // newCDPSession is only available for ChromiumBrowserContext
       // 通过page 的创建新的cdp 回话,之后就是对于page 不同事件的监听,内容包含了事件以及事件的content body 
        this.client = await this.page.context().newCDPSession(this.page);
        await this.client.send('Page.enable');
        await this.client.send('Network.enable');
        const observe = [
            'Page.loadEventFired',
            'Page.domContentEventFired',
            'Page.frameStartedLoading',
            'Page.frameAttached',
            'Page.frameScheduledNavigation',
            'Network.requestWillBeSent',
            'Network.requestServedFromCache',
            'Network.dataReceived',
            'Network.responseReceived',
            'Network.resourceChangedPriority',
            'Network.loadingFinished',
            'Network.loadingFailed',
            'Network.getResponseBody'
        ];
        observe.forEach(method => {
            //@ts-ignore
            // Doesn't work when array contains symbols instead of strings
            this.client.on(method, params => {
                const harEvent = { method, params };
                this.events.push(harEvent);
                if (method === 'Network.responseReceived') {
                    if (this.config.recordResponses === false) {
                        return;
                    }
 
                    const response = harEvent.params.response;
                    const requestId = harEvent.params.requestId;
                    // Response body is unavailable for redirects, no-content, image, audio and video responses
                    if (
                        response.status !== 204 &&
                        response.headers.location == null &&
                        !response.mimeType.includes('image') &&
                        !response.mimeType.includes('audio') &&
                        !response.mimeType.includes('video')
                    ) {
                        const addResponseBodyPromise = this.client.send('Network.getResponseBody', { requestId }).then(
                            responseBody => {
                                // Set the response so chrome-har can add it to the HAR file
                                harEvent.params.response = {
                                    ...response,
                                    body: Buffer.from(responseBody.body, responseBody.base64Encoded ? 'base64' : undefined).toString()
                                };
                            },
                            reason => { }
                        );
                        this.addResponseBodyPromises.push(addResponseBodyPromise);
                    }
                }
            });
        });
    }
  // 对于保存的事件以及事件内容,基于harFromMessages 包装har 内容,之后就是内容的写入
    async stop(path?: string) {
        await Promise.all(this.addResponseBodyPromises);
        const harObject = harFromMessages(this.events, { includeTextFromResponseBody: this.config.recordResponses !== false });
        this.events = [];
        this.addResponseBodyPromises = [];
        if (path) {
            writeFileSync(path, JSON.stringify(harObject));
        }
        else {
            return harObject
        }
 
    }
}
  • 1.
  • 2.
  • 3.
  • 4.
  • 5.
  • 6.
  • 7.
  • 8.
  • 9.
  • 10.
  • 11.
  • 12.
  • 13.
  • 14.
  • 15.
  • 16.
  • 17.
  • 18.
  • 19.
  • 20.
  • 21.
  • 22.
  • 23.
  • 24.
  • 25.
  • 26.
  • 27.
  • 28.
  • 29.
  • 30.
  • 31.
  • 32.
  • 33.
  • 34.
  • 35.
  • 36.
  • 37.
  • 38.
  • 39.
  • 40.
  • 41.
  • 42.
  • 43.
  • 44.
  • 45.
  • 46.
  • 47.
  • 48.
  • 49.
  • 50.
  • 51.
  • 52.
  • 53.
  • 54.
  • 55.
  • 56.
  • 57.
  • 58.
  • 59.
  • 60.
  • 61.
  • 62.
  • 63.
  • 64.
  • 65.
  • 66.
  • 67.
  • 68.
  • 69.
  • 70.
  • 71.
  • 72.
  • 73.
  • 74.
  • 75.
  • 76.
  • 77.
  • 78.
  • 79.
  • 80.
  • 81.
  • 82.
  • 83.
  • 84.
  • 85.
  • 86.
  • 87.
  • 88.
  • 89.
  • 90.
  • 91.
  • 92.
  • 93.
  • 94.
  • 95.
import { writeFileSync } from 'fs';
  • 1.

说明

har 好多时候是分析问题一个很不错的选择,playwright-har 值得试用下

参考资料

 https://github.com/janzaremski/playwright-har
 https://github.com/Everettss/puppeteer-har