Chrome无头模式获取直播间弹幕

Fly_Camel_Yu

已于 2022-06-02 18:08:23 修改

阅读量2.3k

点赞数 14

分类专栏： python之rpa工具文章标签： python chrome devtools

于 2022-06-02 17:53:05 首次发布

本文链接：https://blog.csdn.net/qq_39974348/article/details/125104191

版权

Selenium protobuf 弹幕抓取无头浏览器 DY直播

关键词由CSDN通过智能技术生成

python之rpa工具专栏收录该内容

1 篇文章 0 订阅

订阅专栏

场景：

获取DY直播间弹幕，并分类保存

安装第三方包:

经过调研，此处主要使用的是webdriver相关的包，也就是selenium，以及protobuf解析相关的包，如下：

//安装google的protobuf

pip install -i http://pypi.douban.com/simple --trusted-host pypi.douban.com protobuf

//安装webdriver相关的selenium

pip install -i http://pypi.douban.com/simple --trusted-host pypi.douban.com selenium

在本地下载和自己chrome浏览器版本号对应的chromedriver的可执行文件（.exe），各个版本的webdriver下载链接如下：http://chromedriver.storage.googleapis.com/index.html

代码实现：

下面是主类的实现过程，注意下面的chromdriver的可执行文件存放的路径要修改成本地下载的chromedriver文件在自己本地电脑上存放的对应位置，如下图：

#!/user/bin/env python

from selenium import webdriver
import time
import json
from queue import Queue
import base64
import datetime
from selenium.webdriver.chrome.options import Options
from proto import message_pb2


class GiftTks():
    '''
        thanks to the gift sender
    '''
    def __init__(self):
        super(GiftTks, self).__init__()

    @classmethod
    def run(cls, url, gift_queue):
        chromeOpitons = Options()
        # 使用无头谷歌浏览器模式
        chromeOpitons.add_argument('--headless')
        chromeOpitons.add_argument('--disable-gpu')
        chromeOpitons.add_argument('--no-sandbox')
        chromeOpitons.add_argument("--start-maximized")
        # 无痕隐身模式
        chromeOpitons.add_argument('--incognito')
        # 禁用缓存
        chromeOpitons.add_argument("disable-cache")
        # 忽略证书错误
        chromeOpitons.add_argument('--ignore-certificate-errors')
        # chromeOpitons.add_experimental_option("prefs", {"profile.managed_default_content_settings.images": 2})
        # chromeOpitons.add_experimental_option('excludeSwitches', ['enable-automation'])
        chromeOpitons.add_experimental_option(
            "excludeSwitches", [
                'enable-automation', 'enable-logging'])
        chromeOpitons.add_experimental_option('w3c', False)
        # chromeOpitons.add_argument(
        #     'user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36')

        caps = {
            'browserName': 'chrome',
            'loggingPrefs': {
                'browser': 'ALL',
                'driver': 'ALL',
                'performance': 'ALL',
            },
            'goog:chromeOptions': {
                'perfLoggingPrefs': {
                    'enableNetwork': True,
                },
                'w3c': False,
            },
        }

        driver = webdriver.Chrome(
            executable_path="D:\\tools\\python3.8\\chromedriver.exe",
            desired_capabilities=caps,
            options=chromeOpitons)
        # driver.set_page_load_timeout(30)
        driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
            "source": """
                    Object.defineProperty(navigator, 'webdriver', {
                      get: () => undefined
                    })
                  """
        })

        driver.get(url)
        while True:
            for i in driver.get_log('performance'):
                log = json.loads(i['message'])['message']
                if log['method'] == 'Network.responseReceived' and "params" in log.keys():
                    _url = log['params']['response']['url']
                    _requestId = log['params']['requestId']
                    if str(_url).startswith(
                            r'https://live.douyin.com/webcast/im/fetch/?aid='):
                        content = {}
                        try:
                            # print(_url)
                            # print(_requestId)
                            content = driver.execute_cdp_cmd('Network.getResponseBody',
                                                             {'requestId': _requestId})
                        except Exception:
                            print(_requestId)
                        if 'base64Encoded' in content and content['base64Encoded'] is True:
                            _body = content['body']
                            _bs = base64.b64decode(_body)
                            response = message_pb2.Response()
                            response.ParseFromString(_bs)
                            for msg in response.messages:
                                if msg.method == 'WebcastGiftMessage':
                                    _gift_msg_response = message_pb2.GiftMessage()
                                    _gift_msg_response.ParseFromString(msg.payload)
                                    print('{}  {}'.format(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                                                          _gift_msg_response.common.describe))
                                    gift_queue.put(
                                        '{}  {}'.format(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                                                        _gift_msg_response.common.describe))

            time.sleep(1)


if __name__ == '__main__':
    //直播间链接
    url = "https:xxxxx"
    gift_queue=Queue()
    GiftTks.run(url, gift_queue)