chromedp抓取数据,从0开始踩坑记录

前言

本文是chromedp新手练习记录,如有不当之处欢迎指正。

笔者曾使用selenium做项目自动化测试,selenium的API丰富程度和社区活跃度相较于chromedp都更高一些,所以chromedp学习过程中ctrl+c+v神器不怎么好用,但是如果可以用go写项目的话那必然是极好的。


一、准备工作

笔者的环境如下:

Google Chrome版本 126.0.6478.127(正式版本)

Goland2024

go1.19.13

引入chromedp包和goquery包(国内使用代理GOPROXY=https://goproxy.io/)

go get github.com/chromedp/chromedp
go get github.com/PuerkitoBio/goquery

注意:

goquery当前版本1.9.2要求goSDK版本最低1.18,否则引入失败

Goland2022之前的版本不能对goSDK1.19进行debug,务必使用较高版本的Goland编辑器

二、开发步骤

1.配置chromedp,并创建会话

代码:

func main() {
	// 配置爬虫代理
	//proxyURL, _ := url.Parse("http://xxx:xxxx@41.235.154.92")

	// 设置Chrome会话上下文和超时时间
	ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
	defer cancel()

	//var audioURL string

	// 创建一个新的Chrome会话
	opts := append(chromedp.DefaultExecAllocatorOptions[:],
		//关闭无头模式,方便调试
		chromedp.Flag("headless", false),
		// 防止监测webdriver
		chromedp.Flag("enable-automation", false),
		//禁用 blink 特征
		chromedp.Flag("disable-blink-features", "AutomationControlled"),
		//忽略浏览器的风险提示(但好像并没什么用)
		chromedp.Flag("ignore-certificate-errors", true),
		//关闭浏览器声音(也没用)
		chromedp.Flag("mute-audio", false),
		//设置浏览器尺寸
		chromedp.WindowSize(1150, 1000),
	)
	allocCtx, allocCancel := chromedp.NewExecAllocator(ctx, opts...)
	defer allocCancel()
	taskCtx, taskCancel := chromedp.NewContext(allocCtx, chromedp.WithLogf(log.Printf))
	defer taskCancel()

	// 启动浏览器并导航
	err := chromedp.Run(taskCtx,
		//打开youtube
		chromedp.Navigate("https://xxxxxxxxx"),
		//等待按钮绘制完成
		chromedp.WaitVisible("a[title='Shorts']", chromedp.ByQuery),
		//点击短视频按钮
		chromedp.Click("a[title='Shorts']", chromedp.ByQuery),
	)

	err = chromedp.Run(taskCtx, chromedp.Sleep(time.Hour*1))
	if err != nil {
		log.Fatalf("Error: %v", err)
	}
}

2.获取up主信息

踩坑记录:

问题 :返回前页的操作,尝试了NavigateBack()和Evaluate(`window.history.back();`, &res)两种方式都会导致浏览器缓存的前页数据消失,目前用重新加载之前记录的地址的方式返回,这种方式效果很差需要研究一下

解决方案 :

代码:

type upInfo struct {
	Name        string
	Description string
	Links       []string
	Info        []string
}

func (t *task) getUp() {
	info := new(upInfo)
	pfers := ""
	err := chromedp.Run(t.ctx,
		chromedp.WaitVisible("#channel-info > a"),
		//点击up主昵称
		chromedp.Click("#channel-info > a"),
		//点击更多按钮
		chromedp.Click("#page-header > yt-page-header-renderer > yt-page-header-view-model > div > div.page-header-view-model-wiz__page-header-headline > div > yt-description-preview-view-model > truncated-text > button > span > span"),
		//获取弹出页的HTML
		chromedp.OuterHTML("#about-container", &pfers),
		chromedp.Sleep(time.Second*2),
	)
	//goquery加载获取的Html文本
	doc, err := goquery.NewDocumentFromReader(strings.NewReader(pfers))
	//获取up简介信息
	info.Description = doc.Find("#description-container > span").Text()
	//获取up的所有链接地址
	doc.Find("yt-channel-external-link-view-model").Each(func(i int, selection *goquery.Selection) {
		link := ""
		selection.Find("span").Each(func(i int, selection *goquery.Selection) {
			if i != 0 {
				link += "----"
			}
			link += selection.Text()
		})
		info.Links = append(info.Links, link)
	})
	//获取up的数据信息
	doc.Find("table").Find("tr").Each(func(i int, selection *goquery.Selection) {
		if i > 2 {
			selection.Find("td").Each(func(i int, selection *goquery.Selection) {
				if i == 1 {
					info.Info = append(info.Info, strings.ReplaceAll(selection.Text(), "\n", ""))
				}
			})
		}

	})
	//todo 以浏览器后退的方式返回前页
	//chromedp.Evaluate(`window.history.back();`, &res),
	//chromedp.NavigateBack(),
	chromedp.Navigate(t.backUrl).Do(t.ctx)
	fmt.Println("简介:\n", info.Description)
	fmt.Println("链接:\n", strings.Join(info.Links, "\n"))
	fmt.Println("资料:\n", strings.Join(info.Info, "\n"))
	if err != nil {
		log.Printf(err.Error())
	}
}

3.获取评论

踩坑记录:

问题:network.EventResponseReceived标志的请求阶段并不是通信完成,用network.GetResponseBody()获取的结果中并没有body

解决方案:用EventRequestWillBeSent加EventLoadingFinished来确定目标请求已完成(tcp通信原理)

思路:

循环评论节点的下拉动作,获取评论数据然后格式化。调试发现获取评论的接口返回是明文数据,这就很方便了,直接监听请求。

代码:

func (t *task) getComments() (comments []*CommentData) {
	needClose := false
	//var res struct{}
	var evens []network.RequestID
	//监听评论加载接口
	chromedp.ListenTarget(t.ctx, func(ev interface{}) {
		switch ev := ev.(type) {
		case *network.EventRequestWillBeSent:
			uri, _ := url.Parse(ev.Request.URL)
			if uri.Path != "xxxxxxxxx" {
				break
			}
			evens = append(evens, ev.RequestID)
		case *network.EventLoadingFinished:
			needRun := false
			var tmpEvent []network.RequestID
			for _, v := range evens {
				if v == ev.RequestID {
					needRun = true
				} else {
					tmpEvent = append(tmpEvent, v)
				}
			}
			evens = tmpEvent
			if !needRun {
				break
			}

			fc := chromedp.FromContext(t.ctx)
			ctx := cdp.WithExecutor(t.ctx, fc.Target)
			go func() {
				byts, err := network.GetResponseBody(ev.RequestID).Do(ctx)
				if err != nil {
					panic(err)
				}
				if byts != nil && len(byts) != 0 {
					jsonRes := new(commJson)
					_ = json2.Unmarshal(byts, &jsonRes)
					if jsonRes != nil {
						for _, v := range jsonRes.FrameworkUpdates.EntityBatchUpdate.Mutations {
							if v.Payload.CommentEntityPayload.Author.DisplayName != "" {
								comment := new(CommentData)
								comment.NickName = v.Payload.CommentEntityPayload.Author.DisplayName
								comment.Avatar = v.Payload.CommentEntityPayload.Author.AvatarThumbnailUrl
								comment.Content = v.Payload.CommentEntityPayload.Properties.Content.Content
								comments = append(comments, comment)
								needClose = true
							}
						}
					}
				}

			}()
		}
	})

	chromedp.Run(t.ctx,
		//点击评论按钮
		chromedp.Click("#comments-button"),
		chromedp.ActionFunc(func(ctx context.Context) error {
			var res struct{}
			time.Sleep(time.Second * 1)
			for {
				if needClose {
					chromedp.Run(t.ctx,
						chromedp.Evaluate(`document.querySelector("#visibility-button > ytd-button-renderer > yt-button-shape > button")`, &res),
					)
					break
				} else {
					chromedp.Run(t.ctx,
						chromedp.Sleep(time.Second),
					)
				}

			}
			return nil
		}),
	)
	return
}

三、完整代码

package main

import (
	"context"
	json2 "encoding/json"
	"fmt"
	"github.com/PuerkitoBio/goquery"
	"github.com/chromedp/cdproto/cdp"
	"github.com/chromedp/cdproto/network"
	"github.com/chromedp/chromedp"
	"log"
	"net/url"
	"strings"
	"time"
)

func main() {
	// 配置爬虫代理
	//proxyURL, _ := url.Parse("http://xxx:xxxx@41.235.154.92")

	// 设置Chrome会话上下文和超时时间
	ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
	defer cancel()

	//var audioURL string

	// 创建一个新的Chrome会话
	opts := append(chromedp.DefaultExecAllocatorOptions[:],
		//关闭无头模式,方便调试
		chromedp.Flag("headless", false),
		// 防止监测webdriver
		chromedp.Flag("enable-automation", false),
		//禁用 blink 特征
		chromedp.Flag("disable-blink-features", "AutomationControlled"),
		//忽略浏览器的风险提示(但好像并没什么用)
		chromedp.Flag("ignore-certificate-errors", true),
		//关闭浏览器声音(也没用)
		chromedp.Flag("mute-audio", false),
		//设置浏览器尺寸
		chromedp.WindowSize(1150, 1000),
	)
	allocCtx, allocCancel := chromedp.NewExecAllocator(ctx, opts...)
	defer allocCancel()
	taskCtx, taskCancel := chromedp.NewContext(allocCtx, chromedp.WithLogf(log.Printf))
	defer taskCancel()

	// 启动浏览器并导航
	err := chromedp.Run(taskCtx,
		//打开youtube
		chromedp.Navigate("https:/xxxxxxxxxxx/"),
		//等待按钮绘制完成
		chromedp.WaitVisible("a[title='Shorts']", chromedp.ByQuery),
		//点击短视频按钮
		chromedp.Click("a[title='Shorts']", chromedp.ByQuery),
	)

	task := newTask(taskCtx, 1)
	task.runUnitTask()
	err = chromedp.Run(taskCtx, chromedp.Sleep(time.Hour*1))
	if err != nil {
		log.Fatalf("Error: %v", err)
	}
	defer taskCtx.Done()
}

type task struct {
	ctx        context.Context
	round      int32
	roundLimit int32
	status     string
	backUrl    string
}

func newTask(ctx context.Context, roundLimit int32) *task {
	return &task{
		ctx:        ctx,
		round:      0,
		roundLimit: roundLimit,
		status:     "run",
	}
}
func (t *task) addRound() {
	t.round++
	if t.roundLimit > 0 && t.round >= t.roundLimit {
		t.status = "stop"
		println("--------任务结束--------")
	}
}
func (t *task) runUnitTask() {
	for t.status == "run" {
		adStr := ""
		err := chromedp.Run(t.ctx,
			chromedp.WaitVisible("ytd-reel-video-renderer[is-active]"),
			chromedp.OuterHTML("ytd-reel-video-renderer[is-active]  > div.overlay.style-scope.ytd-reel-video-renderer", &adStr),
			chromedp.Location(&t.backUrl),
		)

		doc, err := goquery.NewDocumentFromReader(strings.NewReader(adStr))
		adNode := doc.Find("ytd-ad-slot-renderer").Nodes
		//判断是否是广告
		if len(adNode) > 0 {
			log.Printf("--------跳过广告---------")
		} else {
			log.Printf("跳转---------")
			t.addRound()
			resInfo := t.getUp()
			resComms := t.getComments()
			fmt.Println(resInfo)
			fmt.Println("评论:", resComms)
		}
		err = chromedp.Run(t.ctx,
			//chromedp.Click("#navigation-button-down > ytd-button-renderer > yt-button-shape > button"),
			chromedp.Click("#navigation-button-down > ytd-button-renderer > yt-button-shape > button"),
			chromedp.Sleep(time.Second*5),
		)

		if err != nil {
			log.Printf(err.Error())
		}
	}

}

type upInfo struct {
	Name        string
	Description string
	Links       []string
	Info        []string
}

func (t *task) getUp() *upInfo {
	info := new(upInfo)
	pfers := ""
	err := chromedp.Run(t.ctx,
		chromedp.WaitVisible("#channel-info > a"),
		//点击up主昵称
		chromedp.Click("#channel-info > a"),
		//点击更多按钮
		chromedp.Click("#page-header > yt-page-header-renderer > yt-page-header-view-model > div > div.page-header-view-model-wiz__page-header-headline > div > yt-description-preview-view-model > truncated-text > button > span > span"),
		//获取弹出页的HTML
		chromedp.OuterHTML("#about-container", &pfers),
		chromedp.Sleep(time.Second*2),
	)
	//goquery加载获取的Html文本
	doc, err := goquery.NewDocumentFromReader(strings.NewReader(pfers))
	//获取up简介信息
	info.Description = doc.Find("#description-container > span").Text()
	//获取up的所有链接地址
	doc.Find("yt-channel-external-link-view-model").Each(func(i int, selection *goquery.Selection) {
		link := ""
		selection.Find("span").Each(func(i int, selection *goquery.Selection) {
			if i != 0 {
				link += "----"
			}
			link += selection.Text()
		})
		info.Links = append(info.Links, link)
	})
	//获取up的数据信息
	doc.Find("table").Find("tr").Each(func(i int, selection *goquery.Selection) {
		if i > 2 {
			selection.Find("td").Each(func(i int, selection *goquery.Selection) {
				if i == 1 {
					info.Info = append(info.Info, strings.ReplaceAll(selection.Text(), "\n", ""))
				}
			})
		}

	})
	//todo 以浏览器后退的方式返回前页
	//chromedp.Evaluate(`window.history.back();`, &res),
	//chromedp.NavigateBack(),
	chromedp.Navigate(t.backUrl).Do(t.ctx)
	//fmt.Println("简介:\n", info.Description)
	//fmt.Println("链接:\n", strings.Join(info.Links, "\n"))
	//fmt.Println("资料:\n", strings.Join(info.Info, "\n"))
	if err != nil {
		log.Printf(err.Error())
	}
	return info
}

type CommentData struct {
	NickName string
	Avatar   string
	Content  string
}
type commJson struct {
	FrameworkUpdates struct {
		EntityBatchUpdate struct {
			Mutations []struct {
				EntityKey string `json:"entityKey"`
				Type      string `json:"type"`
				Payload   struct {
					CommentSharedEntityPayload struct {
						Key   string `json:"key"`
						Theme struct {
							Colors struct {
								BrandBackgroundSolid         int64 `json:"brandBackgroundSolid"`
								BrandBackgroundPrimary       int64 `json:"brandBackgroundPrimary"`
								BrandBackgroundSecondary     int64 `json:"brandBackgroundSecondary"`
								BackgroundA                  int64 `json:"backgroundA"`
								IconInactive                 int64 `json:"iconInactive"`
								IconDisabled                 int64 `json:"iconDisabled"`
								BadgeChipBackground          int   `json:"badgeChipBackground"`
								ButtonChipBackgroundHover    int   `json:"buttonChipBackgroundHover"`
								TouchResponse                int64 `json:"touchResponse"`
								BrandIconActive              int64 `json:"brandIconActive"`
								BrandIconInactive            int64 `json:"brandIconInactive"`
								RedIndicator                 int64 `json:"redIndicator"`
								BrandLinkText                int64 `json:"brandLinkText"`
								TenPercentLayer              int   `json:"tenPercentLayer"`
								SnackbarBackground           int64 `json:"snackbarBackground"`
								ThemedBlue                   int64 `json:"themedBlue"`
								ThemedGreen                  int64 `json:"themedGreen"`
								StaticBrandRed               int64 `json:"staticBrandRed"`
								StaticBrandWhite             int64 `json:"staticBrandWhite"`
								StaticBrandBlack             int64 `json:"staticBrandBlack"`
								StaticClearColor             int   `json:"staticClearColor"`
								StaticAdYellow               int64 `json:"staticAdYellow"`
								StaticGrey                   int64 `json:"staticGrey"`
								OverlayTextPrimary           int64 `json:"overlayTextPrimary"`
								OverlayTextSecondary         int64 `json:"overlayTextSecondary"`
								Separator                    int64 `json:"separator"`
								ThumbnailOverlayIcon         int64 `json:"thumbnailOverlayIcon"`
								Selected                     int64 `json:"selected"`
								Highlighted                  int   `json:"highlighted"`
								BorderGrey                   int64 `json:"borderGrey"`
								BlackDim                     int64 `json:"blackDim"`
								VideoProgressBarBackground   int64 `json:"videoProgressBarBackground"`
								LegacyBlue                   int64 `json:"legacyBlue"`
								IconActiveOther              int64 `json:"iconActiveOther"`
								ErrorBackground              int64 `json:"errorBackground"`
								SuggestedAction              int64 `json:"suggestedAction"`
								OverlayButtonPrimary         int   `json:"overlayButtonPrimary"`
								OverlayButtonSecondary       int   `json:"overlayButtonSecondary"`
								CallToAction                 int64 `json:"callToAction"`
								OverlayBackgroundBrand       int64 `json:"overlayBackgroundBrand"`
								OverlayBackgroundMediumLight int   `json:"overlayBackgroundMediumLight"`
								VerifiedBadgeBackground      int   `json:"verifiedBadgeBackground"`
								StaticClearBlack             int   `json:"staticClearBlack"`
								OverlayBackgroundSolid       int64 `json:"overlayBackgroundSolid"`
								OverlayBackgroundHeavy       int64 `json:"overlayBackgroundHeavy"`
								AdIndicator                  int64 `json:"adIndicator"`
								TextDisabled                 int64 `json:"textDisabled"`
								TextPrimaryInverse           int64 `json:"textPrimaryInverse"`
								OverlayCallToAction          int64 `json:"overlayCallToAction"`
								OverlayBackgroundMedium      int64 `json:"overlayBackgroundMedium"`
								ThemedOverlayBackground      int64 `json:"themedOverlayBackground"`
								WordmarkText                 int64 `json:"wordmarkText"`
								BrandBackgroundSolidUpdated  int64 `json:"brandBackgroundSolidUpdated"`
								BackgroundAUpdated           int64 `json:"backgroundAUpdated"`
								OverlayBackgroundLight       int   `json:"overlayBackgroundLight"`
								OverlayTextDisabled          int   `json:"overlayTextDisabled"`
								GeneralBackgroundC           int64 `json:"generalBackgroundC"`
								GeneralBackgroundB           int64 `json:"generalBackgroundB"`
								GeneralBackgroundA           int64 `json:"generalBackgroundA"`
								TextSecondary                int64 `json:"textSecondary"`
								TextPrimary                  int64 `json:"textPrimary"`
								ErrorIndicator               int64 `json:"errorIndicator"`
								BaseBackground               int64 `json:"baseBackground"`
								RaisedBackground             int64 `json:"raisedBackground"`
								MenuBackground               int64 `json:"menuBackground"`
								InvertedBackground           int64 `json:"invertedBackground"`
								Outline                      int   `json:"outline"`
								AdditiveBackground           int   `json:"additiveBackground"`
								InvertedAdditiveBackground   int   `json:"invertedAdditiveBackground"`
								OutlineInverse               int   `json:"outlineInverse"`
								StaticMagenta                int64 `json:"staticMagenta"`
								StaticBlack                  int64 `json:"staticBlack"`
								WhiteBackground              int64 `json:"whiteBackground"`
								OverlayBackgroundMediumHeavy int   `json:"overlayBackgroundMediumHeavy"`
								StaticMediumMagenta          int64 `json:"staticMediumMagenta"`
								CallToActionInverse          int64 `json:"callToActionInverse"`
								ErrorBackgroundRed           int   `json:"errorBackgroundRed"`
							} `json:"colors"`
							Fonts struct {
							} `json:"fonts"`
							Icons struct {
							} `json:"icons"`
							Layout struct {
								Spacing struct {
									Space1  int `json:"space1"`
									Space2  int `json:"space2"`
									Space3  int `json:"space3"`
									Space4  int `json:"space4"`
									Space5  int `json:"space5"`
									Space6  int `json:"space6"`
									Space7  int `json:"space7"`
									Space8  int `json:"space8"`
									Space9  int `json:"space9"`
									Space10 int `json:"space10"`
									Space0  int `json:"space0"`
								} `json:"spacing"`
								FixedGrid struct {
									Margin int `json:"margin"`
									Gutter int `json:"gutter"`
								} `json:"fixedGrid"`
								Icon struct {
									Width  int `json:"width"`
									Height int `json:"height"`
								} `json:"icon"`
								Button struct {
									MinWidthTextButton int `json:"minWidthTextButton"`
									MinWidthIconButton int `json:"minWidthIconButton"`
									PaddingX           int `json:"paddingX"`
									CornerRadius       int `json:"cornerRadius"`
									StrokeThickness    int `json:"strokeThickness"`
								} `json:"button"`
							} `json:"layout"`
							ThemeType string `json:"themeType"`
						} `json:"theme"`
						Strings struct {
							ViewAllRepliesButtonA11Y string `json:"viewAllRepliesButtonA11y"`
							ExpandText               string `json:"expandText"`
							TranslatingText          string `json:"translatingText"`
							SeeOriginalText          string `json:"seeOriginalText"`
							DiscardDialogTitle       string `json:"discardDialogTitle"`
							DiscardDialogAction      string `json:"discardDialogAction"`
							DiscardDialogCancel      string `json:"discardDialogCancel"`
							ShortReplyThumbnailA11Y  string `json:"shortReplyThumbnailA11y"`
							CollapseText             string `json:"collapseText"`
							ReplyButtonText          string `json:"replyButtonText"`
							SmartRepliesAiDisclaimer string `json:"smartRepliesAiDisclaimer"`
						} `json:"strings"`
						ToolbarShared struct {
							UnlikeButtonA11Y      string `json:"unlikeButtonA11y"`
							UndislikeButtonA11Y   string `json:"undislikeButtonA11y"`
							HeartButtonA11Y       string `json:"heartButtonA11y"`
							UnheartButtonA11Y     string `json:"unheartButtonA11y"`
							HeartedTooltipA11Y    string `json:"heartedTooltipA11y"`
							ReplyButtonA11Y       string `json:"replyButtonA11y"`
							MenuButtonA11Y        string `json:"menuButtonA11y"`
							ApproveButtonA11Y     string `json:"approveButtonA11y"`
							RemoveButtonA11Y      string `json:"removeButtonA11y"`
							BlockButtonA11Y       string `json:"blockButtonA11y"`
							SpamButtonA11Y        string `json:"spamButtonA11y"`
							HeartedTooltipCommand struct {
								TooltipCommand struct {
									HintRenderer struct {
										Content struct {
											BubbleHintRenderer struct {
												TrackingParams string `json:"trackingParams"`
												DetailsText    struct {
													Runs []struct {
														Text      string `json:"text"`
														TextColor int64  `json:"textColor"`
													} `json:"runs"`
												} `json:"detailsText"`
												IsVisible bool `json:"isVisible"`
											} `json:"bubbleHintRenderer"`
										} `json:"content"`
										DwellTimeMs       string `json:"dwellTimeMs"`
										SuggestedPosition struct {
											Type string `json:"type"`
										} `json:"suggestedPosition"`
										TrackingParams string `json:"trackingParams"`
									} `json:"hintRenderer"`
								} `json:"tooltipCommand"`
							} `json:"heartedTooltipCommand"`
							DislikeCommentButtonA11Y string `json:"dislikeCommentButtonA11y"`
							DislikeReplyButtonA11Y   string `json:"dislikeReplyButtonA11y"`
							CancelButtonLabel        string `json:"cancelButtonLabel"`
							AllowVoiceControl        bool   `json:"allowVoiceControl"`
						} `json:"toolbarShared"`
						Experiments struct {
							EnableElementsRowReverse                   bool   `json:"enableElementsRowReverse"`
							ElementsUseAndroidLegacyTouchTarget        bool   `json:"elementsUseAndroidLegacyTouchTarget"`
							EnableElementsReadMoreButtonLogging        bool   `json:"enableElementsReadMoreButtonLogging"`
							TranslateButtonStyle                       string `json:"translateButtonStyle"`
							EnablePersistentCommentComposer            bool   `json:"enablePersistentCommentComposer"`
							EnableTecToTsCommandMigration              bool   `json:"enableTecToTsCommandMigration"`
							EnableElementsLoggingContract              bool   `json:"enableElementsLoggingContract"`
							EnableCompactifyV0                         bool   `json:"enableCompactifyV0"`
							EnableCompactifyV0AvatarSize               bool   `json:"enableCompactifyV0AvatarSize"`
							EnableCompactifyV0OverflowMenu             bool   `json:"enableCompactifyV0OverflowMenu"`
							ShortsCommentsOnElements                   bool   `json:"shortsCommentsOnElements"`
							EnableCompactifyV1CommentCountRemoval      bool   `json:"enableCompactifyV1CommentCountRemoval"`
							EnableNewUiForElementizedComposer          bool   `json:"enableNewUiForElementizedComposer"`
							EnableCompactifyV2CommentDividerRemoval    bool   `json:"enableCompactifyV2CommentDividerRemoval"`
							EnableInlineMobileSuperThanksChip          bool   `json:"enableInlineMobileSuperThanksChip"`
							CompactifyV1MaxCommentLines                int    `json:"compactifyV1MaxCommentLines"`
							EnableDoubleTapLikesComment                bool   `json:"enableDoubleTapLikesComment"`
							EnableElementsCoinboxRepliesPadding        bool   `json:"enableElementsCoinboxRepliesPadding"`
							EnableShowPreviouslyPinnedComments         bool   `json:"enableShowPreviouslyPinnedComments"`
							EnableLikeButtonPromo                      bool   `json:"enableLikeButtonPromo"`
							EnableHandlesTooltip                       bool   `json:"enableHandlesTooltip"`
							EnableModernizedCommunityChannelGuidelines bool   `json:"enableModernizedCommunityChannelGuidelines"`
							EnableCommunityGuidelinesOncePerRun        bool   `json:"enableCommunityGuidelinesOncePerRun"`
							Enable18PtToolbarIcons                     bool   `json:"enable18ptToolbarIcons"`
							EnableHideCommunityGuidelines              bool   `json:"enableHideCommunityGuidelines"`
							IgnoreTapsBetweenToolbarButtons            bool   `json:"ignoreTapsBetweenToolbarButtons"`
							EnableModernizedFontsWithHeightChanges     bool   `json:"enableModernizedFontsWithHeightChanges"`
							EnableModernizedFontsWithoutHeightChanges  bool   `json:"enableModernizedFontsWithoutHeightChanges"`
							LogDoubleTapGestures                       bool   `json:"logDoubleTapGestures"`
							UseRoundedCornersForSmartReplies           bool   `json:"useRoundedCornersForSmartReplies"`
							EnableVoiceReplyCreation                   bool   `json:"enableVoiceReplyCreation"`
							DisableCommentTextTap                      bool   `json:"disableCommentTextTap"`
						} `json:"experiments"`
						Environment struct {
							PlatformName string `json:"platformName"`
							FormFactor   string `json:"formFactor"`
						} `json:"environment"`
						Capabilities struct {
							SupportsIndicatorTouchExpansion bool `json:"supportsIndicatorTouchExpansion"`
							SupportsIndicatorPadding        bool `json:"supportsIndicatorPadding"`
							SupportsTooltipCommand          bool `json:"supportsTooltipCommand"`
							SupportsSubstitute              bool `json:"supportsSubstitute"`
							SupportsUpdate                  bool `json:"supportsUpdate"`
							SupportsLogGestureCommand       bool `json:"supportsLogGestureCommand"`
							SupportsClickableSpanIncrease   bool `json:"supportsClickableSpanIncrease"`
						} `json:"capabilities"`
						CommentEnvironment struct {
							PlatformName string `json:"platformName"`
						} `json:"commentEnvironment"`
						ClientName string `json:"clientName"`
					} `json:"commentSharedEntityPayload,omitempty"`
					CommentEntityPayload struct {
						Key        string `json:"key"`
						Properties struct {
							CommentId string `json:"commentId"`
							Content   struct {
								Content        string `json:"content"`
								AttachmentRuns []struct {
									StartIndex int `json:"startIndex"`
									Length     int `json:"length"`
									Element    struct {
										Type struct {
											ImageType struct {
												Image struct {
													Sources []struct {
														Url    string `json:"url"`
														Width  int    `json:"width"`
														Height int    `json:"height"`
													} `json:"sources"`
												} `json:"image"`
												PlaybackState string `json:"playbackState"`
											} `json:"imageType"`
										} `json:"type"`
										Properties struct {
											LayoutProperties struct {
												Height struct {
													Value int    `json:"value"`
													Unit  string `json:"unit"`
												} `json:"height"`
												Width struct {
													Value int    `json:"value"`
													Unit  string `json:"unit"`
												} `json:"width"`
												Margin struct {
													Left struct {
														Value int    `json:"value"`
														Unit  string `json:"unit"`
													} `json:"left"`
													Right struct {
														Value int    `json:"value"`
														Unit  string `json:"unit"`
													} `json:"right"`
												} `json:"margin"`
											} `json:"layoutProperties"`
										} `json:"properties"`
									} `json:"element"`
									Alignment string `json:"alignment"`
								} `json:"attachmentRuns,omitempty"`
							} `json:"content"`
							PublishedTime            string `json:"publishedTime"`
							ReplyLevel               int    `json:"replyLevel"`
							AuthorButtonA11Y         string `json:"authorButtonA11y"`
							ToolbarStateKey          string `json:"toolbarStateKey"`
							TranslateButtonEntityKey string `json:"translateButtonEntityKey"`
						} `json:"properties"`
						Author struct {
							ChannelId          string `json:"channelId"`
							DisplayName        string `json:"displayName"`
							AvatarThumbnailUrl string `json:"avatarThumbnailUrl"`
							IsVerified         bool   `json:"isVerified"`
							IsCurrentUser      bool   `json:"isCurrentUser"`
							IsCreator          bool   `json:"isCreator"`
							ChannelCommand     struct {
								InnertubeCommand struct {
									ClickTrackingParams string `json:"clickTrackingParams"`
									CommandMetadata     struct {
										WebCommandMetadata struct {
											Url         string `json:"url"`
											WebPageType string `json:"webPageType"`
											RootVe      int    `json:"rootVe"`
											ApiUrl      string `json:"apiUrl"`
										} `json:"webCommandMetadata"`
									} `json:"commandMetadata"`
									BrowseEndpoint struct {
										BrowseId         string `json:"browseId"`
										CanonicalBaseUrl string `json:"canonicalBaseUrl"`
									} `json:"browseEndpoint"`
								} `json:"innertubeCommand"`
							} `json:"channelCommand"`
							IsArtist bool `json:"isArtist"`
						} `json:"author"`
						Toolbar struct {
							LikeCountLiked         string `json:"likeCountLiked"`
							LikeCountNotliked      string `json:"likeCountNotliked"`
							ReplyCount             string `json:"replyCount"`
							CreatorThumbnailUrl    string `json:"creatorThumbnailUrl"`
							LikeButtonA11Y         string `json:"likeButtonA11y"`
							EngagementToolbarStyle struct {
								Value string `json:"value"`
							} `json:"engagementToolbarStyle"`
							LikeCountA11Y          string `json:"likeCountA11y"`
							ReplyCountA11Y         string `json:"replyCountA11y"`
							LikeInactiveTooltip    string `json:"likeInactiveTooltip"`
							LikeActiveTooltip      string `json:"likeActiveTooltip"`
							DislikeInactiveTooltip string `json:"dislikeInactiveTooltip"`
							DislikeActiveTooltip   string `json:"dislikeActiveTooltip"`
							HeartActiveTooltip     string `json:"heartActiveTooltip"`
						} `json:"toolbar"`
						Avatar struct {
							Image struct {
								Sources []struct {
									Url    string `json:"url"`
									Width  int    `json:"width"`
									Height int    `json:"height"`
								} `json:"sources"`
								Processor struct {
									BorderImageProcessor struct {
										Circular bool `json:"circular"`
									} `json:"borderImageProcessor"`
								} `json:"processor"`
							} `json:"image"`
							AccessibilityText string `json:"accessibilityText"`
							AvatarImageSize   string `json:"avatarImageSize"`
							Endpoint          struct {
								InnertubeCommand struct {
									ClickTrackingParams string `json:"clickTrackingParams"`
									CommandMetadata     struct {
										WebCommandMetadata struct {
											Url         string `json:"url"`
											WebPageType string `json:"webPageType"`
											RootVe      int    `json:"rootVe"`
											ApiUrl      string `json:"apiUrl"`
										} `json:"webCommandMetadata"`
									} `json:"commandMetadata"`
									BrowseEndpoint struct {
										BrowseId         string `json:"browseId"`
										CanonicalBaseUrl string `json:"canonicalBaseUrl"`
									} `json:"browseEndpoint"`
								} `json:"innertubeCommand"`
							} `json:"endpoint"`
						} `json:"avatar"`
					} `json:"commentEntityPayload,omitempty"`
					CommentSurfaceEntityPayload struct {
						Key                 string `json:"key"`
						CommentClickCommand struct {
							InnertubeCommand struct {
								ClickTrackingParams string `json:"clickTrackingParams"`
								CommandMetadata     struct {
									WebCommandMetadata struct {
										Url         string `json:"url"`
										WebPageType string `json:"webPageType"`
										RootVe      int    `json:"rootVe"`
									} `json:"webCommandMetadata"`
								} `json:"commandMetadata"`
								SignInEndpoint struct {
									NextEndpoint struct {
										ClickTrackingParams string `json:"clickTrackingParams"`
										CommandMetadata     struct {
											WebCommandMetadata struct {
												Url         string `json:"url"`
												WebPageType string `json:"webPageType"`
												RootVe      int    `json:"rootVe"`
											} `json:"webCommandMetadata"`
										} `json:"commandMetadata"`
										WatchEndpoint struct {
											VideoId string `json:"videoId"`
										} `json:"watchEndpoint"`
									} `json:"nextEndpoint"`
								} `json:"signInEndpoint"`
							} `json:"innertubeCommand"`
						} `json:"commentClickCommand"`
						InlineReadMoreButton struct {
							IsSupported bool `json:"isSupported"`
							IsExpanded  bool `json:"isExpanded"`
						} `json:"inlineReadMoreButton"`
						CommentClickOpensComposer bool   `json:"commentClickOpensComposer"`
						ComposerDraftEntityKey    string `json:"composerDraftEntityKey"`
						ViewRepliesTooltipData    struct {
							TooltipCommand struct {
								TooltipCommand struct {
									HintRenderer struct {
										Content struct {
											BubbleHintRenderer struct {
												Text struct {
													SimpleText string `json:"simpleText"`
												} `json:"text"`
												TrackingParams string `json:"trackingParams"`
												IsVisible      bool   `json:"isVisible"`
											} `json:"bubbleHintRenderer"`
										} `json:"content"`
										DismissStrategy struct {
											Type string `json:"type"`
										} `json:"dismissStrategy"`
										HintCap struct {
											ImpressionCap string `json:"impressionCap"`
										} `json:"hintCap"`
										SuggestedPosition struct {
											Type string `json:"type"`
										} `json:"suggestedPosition"`
										TrackingParams string `json:"trackingParams"`
									} `json:"hintRenderer"`
								} `json:"tooltipCommand"`
							} `json:"tooltipCommand"`
							ViewRepliesTooltipIdentifier string `json:"viewRepliesTooltipIdentifier"`
						} `json:"viewRepliesTooltipData,omitempty"`
						PublishedTimeCommand struct {
							InnertubeCommand struct {
								ClickTrackingParams string `json:"clickTrackingParams"`
								CommandMetadata     struct {
									WebCommandMetadata struct {
										Url         string `json:"url"`
										WebPageType string `json:"webPageType"`
										RootVe      int    `json:"rootVe"`
									} `json:"webCommandMetadata"`
								} `json:"commandMetadata"`
								WatchEndpoint struct {
									VideoId string `json:"videoId"`
									Params  string `json:"params"`
								} `json:"watchEndpoint"`
							} `json:"innertubeCommand"`
						} `json:"publishedTimeCommand"`
					} `json:"commentSurfaceEntityPayload,omitempty"`
					TriStateButtonStateEntityPayload struct {
						Key             string `json:"key"`
						StateIdentifier string `json:"stateIdentifier"`
					} `json:"triStateButtonStateEntityPayload,omitempty"`
					EngagementToolbarSurfaceEntityPayload struct {
						Key                   string `json:"key"`
						PrepareAccountCommand struct {
							InnertubeCommand struct {
								ClickTrackingParams string `json:"clickTrackingParams"`
								CommandMetadata     struct {
									WebCommandMetadata struct {
										Url         string `json:"url"`
										WebPageType string `json:"webPageType"`
										RootVe      int    `json:"rootVe"`
									} `json:"webCommandMetadata"`
								} `json:"commandMetadata"`
								SignInEndpoint struct {
									NextEndpoint struct {
										ClickTrackingParams string `json:"clickTrackingParams"`
										CommandMetadata     struct {
											WebCommandMetadata struct {
												Url         string `json:"url"`
												WebPageType string `json:"webPageType"`
												RootVe      int    `json:"rootVe"`
											} `json:"webCommandMetadata"`
										} `json:"commandMetadata"`
										WatchEndpoint struct {
											VideoId string `json:"videoId"`
										} `json:"watchEndpoint"`
									} `json:"nextEndpoint"`
								} `json:"signInEndpoint"`
							} `json:"innertubeCommand"`
						} `json:"prepareAccountCommand"`
						LikeCommand struct {
							InnertubeCommand struct {
								ClickTrackingParams string `json:"clickTrackingParams"`
							} `json:"innertubeCommand"`
						} `json:"likeCommand"`
						UnlikeCommand struct {
							InnertubeCommand struct {
								ClickTrackingParams string `json:"clickTrackingParams"`
							} `json:"innertubeCommand"`
						} `json:"unlikeCommand"`
						DislikeCommand struct {
							InnertubeCommand struct {
								ClickTrackingParams string `json:"clickTrackingParams"`
							} `json:"innertubeCommand"`
						} `json:"dislikeCommand"`
						UndislikeCommand struct {
							InnertubeCommand struct {
								ClickTrackingParams string `json:"clickTrackingParams"`
							} `json:"innertubeCommand"`
						} `json:"undislikeCommand"`
						ReplyCommand struct {
							InnertubeCommand struct {
								ClickTrackingParams string `json:"clickTrackingParams"`
								CommandMetadata     struct {
									WebCommandMetadata struct {
										Url         string `json:"url"`
										WebPageType string `json:"webPageType"`
										RootVe      int    `json:"rootVe"`
									} `json:"webCommandMetadata"`
								} `json:"commandMetadata"`
								SignInEndpoint struct {
									NextEndpoint struct {
										ClickTrackingParams string `json:"clickTrackingParams"`
										CommandMetadata     struct {
											WebCommandMetadata struct {
												Url         string `json:"url"`
												WebPageType string `json:"webPageType"`
												RootVe      int    `json:"rootVe"`
											} `json:"webCommandMetadata"`
										} `json:"commandMetadata"`
										WatchEndpoint struct {
											VideoId string `json:"videoId"`
										} `json:"watchEndpoint"`
									} `json:"nextEndpoint"`
								} `json:"signInEndpoint"`
							} `json:"innertubeCommand"`
						} `json:"replyCommand"`
						IsEngagementToolbar bool `json:"isEngagementToolbar"`
					} `json:"engagementToolbarSurfaceEntityPayload,omitempty"`
					EngagementToolbarStateEntityPayload struct {
						Key        string `json:"key"`
						LikeState  string `json:"likeState"`
						HeartState string `json:"heartState"`
					} `json:"engagementToolbarStateEntityPayload,omitempty"`
				} `json:"payload"`
			} `json:"mutations"`
			Timestamp struct {
				Seconds string `json:"seconds"`
				Nanos   int    `json:"nanos"`
			} `json:"timestamp"`
		} `json:"entityBatchUpdate"`
	}
}

func (t *task) getComments() (comments []*CommentData) {

	needClose := false
	//var res struct{}
	var evens []network.RequestID
	//监听评论加载接口
	chromedp.ListenTarget(t.ctx, func(ev interface{}) {
		switch ev := ev.(type) {
		case *network.EventRequestWillBeSent:
			uri, _ := url.Parse(ev.Request.URL)
			if uri.Path != "/xxxxxx" {
				break
			}
			evens = append(evens, ev.RequestID)
		case *network.EventLoadingFinished:
			needRun := false
			var tmpEvent []network.RequestID
			for _, v := range evens {
				if v == ev.RequestID {
					needRun = true
				} else {
					tmpEvent = append(tmpEvent, v)
				}
			}
			evens = tmpEvent
			if !needRun {
				break
			}

			fc := chromedp.FromContext(t.ctx)
			ctx := cdp.WithExecutor(t.ctx, fc.Target)
			go func() {
				byts, err := network.GetResponseBody(ev.RequestID).Do(ctx)
				if err != nil {
					panic(err)
				}
				if byts != nil && len(byts) != 0 {
					jsonRes := new(commJson)
					_ = json2.Unmarshal(byts, &jsonRes)
					if jsonRes != nil {
						for _, v := range jsonRes.FrameworkUpdates.EntityBatchUpdate.Mutations {
							if v.Payload.CommentEntityPayload.Author.DisplayName != "" {
								comment := new(CommentData)
								comment.NickName = v.Payload.CommentEntityPayload.Author.DisplayName
								comment.Avatar = v.Payload.CommentEntityPayload.Author.AvatarThumbnailUrl
								comment.Content = v.Payload.CommentEntityPayload.Properties.Content.Content
								comments = append(comments, comment)
								needClose = true
							}
						}
					}
				}

			}()
		}
	})

	chromedp.Run(t.ctx,
		//点击评论按钮
		chromedp.Click("#comments-button"),
		chromedp.ActionFunc(func(ctx context.Context) error {
			var res struct{}
			time.Sleep(time.Second * 1)
			for {
				if needClose {
					chromedp.Run(t.ctx,
						chromedp.Evaluate(`document.querySelector("#visibility-button > ytd-button-renderer > yt-button-shape > button")`, &res),
					)
					break
				} else {
					chromedp.Run(t.ctx,
						chromedp.Sleep(time.Second),
					)
				}

			}
			return nil
		}),
	)
	return
}

评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值