dy弹幕数据抓取
侵权或涉及相关利益请联系作者:QQ、邮箱
环境:
MacOS
golang
1.21+protoc
版本:libprotoc 25.1
抓取样例:
[进场][435837529504884][男] 肖靖远 进入直播间
[进场][64624951415][男] 建华.@.@ 进入直播间
[点赞][1327816996293822][女] 舍;得! 点赞 * 13
[点赞][131584125580622][女] 🌺万里晴空🌸 点赞 * 25
[关注][2379758914007816][女] abdisj 关注了主播
[进场][3526844532921287][男] 1314 进入直播间
[进场][4459243776260376][女] 用户7952123796342 进入直播间
[关注][2379758914007816][女] abdisj 关注了主播
[进场][83547928564][女] 小何 进入直播间
[进场][1626894999224967][男] 苦尽甘来 进入直播间
[进场][60771789788][男] Huang yong 进入直播间
[点赞][131584125580622][女] 🌺万里晴空🌸 点赞 * 19
[进场][1275022271335976][女] Judy 进入直播间
[进场][97136425247][男] 简单人生 进入直播间
[进场][109513351102][女] 五月 进入直播间
[关注][1538943795083519][男] 若 生 关注了主播
详细代码:
package main
import (
"bytes"
"compress/gzip"
"io"
"log"
"net/http"
"regexp"
"strings"
"sync"
"time"
"github.com/golang/protobuf/proto"
"github.com/gorilla/websocket"
douyin "github.com/wgx0307/DouyinLiveComments/protobuf"
)
type Live struct {
Url string
Ttwid string
RoomStore string
RoomId string
RoomTitle string
wsConn *websocket.Conn
}
func main() {
live, err := run("https://live.douyin.com/592737552981")
if err != nil {
panic(err)
}
live.Connect()
var wg sync.WaitGroup
wg.Add(1)
wg.Wait()
}
func run(u string) (*Live, error) {
h := map[string]string{
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36",
"cookie": "__ac_nonce=0638733a400869171be51",
}
req, err := http.NewRequest("GET", u, nil)
if err != nil {
log.Println(err)
return nil, err
}
for k, v := range h {
req.Header.Set(k, v)
}
client := &http.Client{}
res, err := client.Do(req)
if err != nil {
log.Println(err)
return nil, err
}
defer res.Body.Close()
data := res.Cookies()
var ttwid string
for _, c := range data {
if c.Name == "ttwid" {
ttwid = c.Value
break
}
}
body, err := io.ReadAll(res.Body)
if err != nil {
log.Println(err)
return nil, err
}
resText := string(body)
re := regexp.MustCompile(`roomId\\":\\"(\d+)\\"`)
match := re.FindStringSubmatch(resText)
if match == nil || len(match) < 2 {
log.Println("No match found")
return nil, err
}
liveRoomId := match[1]
return &Live{
Url: u,
Ttwid: ttwid,
RoomId: liveRoomId,
}, nil
}
func (live *Live) Connect() error {
wsUrl := "wss://webcast3-ws-web-lq.douyin.com/webcast/im/push/v2/?" +
"app_name=douyin_web&version_code=180800&webcast_sdk_version=1.3.0&update_version_code=1.3.0&compress=gzip&" +
"internal_ext=internal_src:dim|wss_push_room_id:%s|wss_push_did:%s|dim_log_id:202302171547011A160A7BAA76660E13ED|fetch_time:1676620021641|seq:1|wss_info:0-1676620021641-0-0|wrds_kvs:WebcastRoomStatsMessage-1676620020691146024_WebcastRoomRankMessage-1676619972726895075_AudienceGiftSyncData-1676619980834317696_HighlightContainerSyncData-2&cursor=t-1676620021641_r-1_d-1_u-1_h-1&host=https://live.douyin.com&aid=6383&live_id=1&did_rule=3&debug=false&endpoint=live_pc&support_wrds=1&im_path=/webcast/im/fetch/&user_unique_id=%s&device_platform=web&cookie_enabled=true&screen_width=1440&screen_height=900&browser_language=zh&browser_platform=MacIntel&browser_name=Mozilla&browser_version=5.0%20(Macintosh;%20Intel%20Mac%20OS%20X%2010_15_7)%20AppleWebKit/537.36%20(KHTML,%20like%20Gecko)%20Chrome/110.0.0.0%20Safari/537.36&browser_online=true&tz_name=Asia/Shanghai&identity=audience&room_id=%s&heartbeatDuration=0&signature=00000000"
wsUrl = strings.Replace(wsUrl, "%s", live.RoomId, -1)
h := http.Header{}
h.Set("cookie", "ttwid="+live.Ttwid)
h.Set("user-agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36")
wsConn, _, err := websocket.DefaultDialer.Dial(wsUrl, h)
if err != nil {
return err
}
live.wsConn = wsConn
go live.onMessage()
go live.onHeartbeat()
return nil
}
func (live *Live) onMessage() {
for {
_, data, err := live.wsConn.ReadMessage()
if err != nil {
panic(err.Error())
}
var msgPack douyin.PushFrame
_ = proto.Unmarshal(data, &msgPack)
decompressed, _ := degzip(msgPack.Payload)
var payloadPackage douyin.Response
_ = proto.Unmarshal(decompressed, &payloadPackage)
if payloadPackage.NeedAck {
live.sendAck(msgPack.LogId, payloadPackage.InternalExt)
}
for _, msg := range payloadPackage.MessagesList {
switch msg.Method {
case "WebcastChatMessage":
parseChatMsg(msg.Payload)
case "WebcastGiftMessage":
parseGiftMsg(msg.Payload)
case "WebcastLikeMessage":
parseLikeMsg(msg.Payload)
case "WebcastMemberMessage":
parseEnterMsg(msg.Payload)
case "WebcastSocialMessage":
parseSocialMsg(msg.Payload)
case "parseRoomUserSeqMsg":
parseRoomUserSMsg(msg.Payload)
}
}
}
}
func (live *Live) onHeartbeat() {
for {
pingPack := &douyin.PushFrame{
PayloadType: "bh",
}
data, _ := proto.Marshal(pingPack)
err := live.wsConn.WriteMessage(websocket.BinaryMessage, data)
if err != nil {
panic(err.Error())
}
time.Sleep(time.Second * 10)
}
}
func (live *Live) sendAck(logId uint64, iExt string) {
ackPack := &douyin.PushFrame{
LogId: logId,
PayloadType: iExt,
}
data, _ := proto.Marshal(ackPack)
err := live.wsConn.WriteMessage(websocket.BinaryMessage, data)
if err != nil {
panic(err.Error())
}
}
func degzip(data []byte) ([]byte, error) {
b := bytes.NewReader(data)
var out bytes.Buffer
r, err := gzip.NewReader(b)
if err != nil {
return nil, err
}
_, err = io.Copy(&out, r)
if err != nil {
return nil, err
}
return out.Bytes(), nil
}
func parseChatMsg(msg []byte) {
var Message douyin.ChatMessage
_ = proto.Unmarshal(msg, &Message)
log.Printf("[弹幕][%d][%s] %s : %s\n", Message.User.Id, getGender(Message.User.Gender), Message.User.NickName, Message.Content)
}
func parseGiftMsg(msg []byte) {
var Message douyin.GiftMessage
_ = proto.Unmarshal(msg, &Message)
log.Printf("[礼物][%d][%s] %s : %s * %d \n", Message.User.Id, getGender(Message.User.Gender), Message.User.NickName, Message.Gift.Name, Message.ComboCount)
}
func parseLikeMsg(msg []byte) {
var Message douyin.LikeMessage
_ = proto.Unmarshal(msg, &Message)
log.Printf("[点赞][%d][%s] %s 点赞 * %d \n", Message.User.Id, getGender(Message.User.Gender), Message.User.NickName, Message.Count)
}
func parseEnterMsg(msg []byte) {
var Message douyin.MemberMessage
_ = proto.Unmarshal(msg, &Message)
log.Printf("[进场][%d][%s] %s 进入直播间\n", Message.User.Id, getGender(Message.User.Gender), Message.User.NickName)
}
func parseSocialMsg(msg []byte) {
var Message douyin.SocialMessage
_ = proto.Unmarshal(msg, &Message)
log.Printf("[关注][%d][%s] %s 关注了主播\n", Message.User.Id, getGender(Message.User.Gender), Message.User.NickName)
}
func parseRoomUserSMsg(msg []byte) {
var Message douyin.RoomUserSeqMessage
_ = proto.Unmarshal(msg, &Message)
log.Printf("[统计] 当前观看人数: %d, 累计观看人数: %d \n", Message.Total, Message.TotalPvForAnchor)
}
func getGender(gender uint32) string {
var sex = "女"
if gender == 1 {
sex = "男"
}
return sex
}