python实时抓取斗鱼弹幕源码

# -*- coding: utf-8 -*-
"""
Created on Fri Jul  5 10:30:06 2019

@author: 86138
"""

#from __future__ import unicode_literals
import multiprocessing
import socket
import time
import re
import requests
from bs4 import BeautifulSoup
import json
import AutoChat

# 配置socket的ip和端口
client = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
host = socket.gethostbyname("openbarrage.douyutv.com")
port = 8601
client.connect((host, port))

# 获取用户昵称及弹幕信息的正则表达式
danmu = re.compile(b'type@=chatmsg.*?/nn@=(.*?)/txt@=(.*?)/cid@=.*?/cst@=(.*?)/bnn@')


def sendmsg(msgstr):
    '''
    客户端向服务器发送请求的函数,集成发送协议头的功能
    msgHead: 发送数据前的协议头,消息长度的两倍,及消息类型、加密字段和保密字段
    使用while循环发送具体数据,保证将数据都发送出去
    '''
    msg = msgstr.encode('utf-8')
    data_length = len(msg) + 8
    code = 689
    msgHead = int.to_bytes(data_length, 4, 'little') \
              + int.to_bytes(data_length, 4, 'little') + int.to_bytes(code, 4, 'little')
    client.send(msgHead)
    sent = 0
    while sent < len(msg):
        #返回已发送长度,接着这个长度,继续全部发送
        tn = client.send(msg[sent:])
        sent = sent + tn


def start(roomid):
    '''
    发送登录验证请求后,获取服务器返回的弹幕信息,同时提取昵称及弹幕内容
    登陆请求消息及入组消息末尾要加入\0
    '''
    msg = 'type@=loginreq/roomid@={}/\0'.format(roomid)
    sendmsg(msg)
    msg_more = 'type@=joingroup/rid@={}/gid@=-9999/\0'.format(roomid)
    sendmsg(msg_more)
    global client
    #print('---------------欢迎连接到{}的直播间---------------'.format(get_name(roomid)))
    index = 300
    while True:
        #global client
        #client.close()
        #client = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        #client.connect((host, port))
        time.sleep(2)
        try:
            data = client.recv(1024)
        except ConnectionResetError as e:
            print("断线重连")
            client.connect((host, port))
        #print(data)
        danmu_more = danmu.findall(data)
        print('条数:'+ str(len(danmu_more)))
        if not data:
            print('没有数据:等待5s')
            client.close()
            client = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
            client.connect((host, port))
            #sendmsg(msg)
            #sendmsg(msg_more)
            #time.sleep(5)
            continue
        else:
            with open('bullet_curtain.jl', 'a') as f:
                try:
                    for i in danmu_more:
                        print('............................................................')
                        dmDict={}
                        dmDict['昵称'] = i[0].decode(encoding='utf-8', errors='ignore')
                        dmDict['弹幕内容'] = i[1].decode(encoding='utf-8', errors='ignore')
                        dmDict['弹幕时间'] = i[2].decode(encoding='utf-8', errors='ignore')
                        dmTime = dmDict['弹幕时间']
                        dmTime = dmTime[0:10]
                        localTime = time.localtime(int(dmTime)) 
                        dmTime = time.strftime("%Y-%m-%d %H:%M:%S", localTime)
                        #dmTime = time.strftime("%Y%m%d%H%M", dmTime)
                        dmJsonStr = json.dumps(dmDict, ensure_ascii=False)+'\n'
                        #dmTime + ":" +
                        print(dmTime + ":" + dmDict['昵称'] + ':' + dmDict['弹幕内容'])
                        index = index + 1
                        AutoChat.main_func(dmDict['弹幕内容'],index)
                        f.write(dmJsonStr)
                        #danmuNum = danmuNum + 1
                        time.sleep(2)
                except Exception as e:
                    print ("exception:", e)
                    continue

def keeplive():
    '''
    发送心跳信息,维持TCP长连接
    心跳消息末尾加入\0
    '''
    i = 1
    while True:
        msg = 'type@=keeplive/tick@=' + str(int(time.time())) + '/\0'
        sendmsg(msg)
        time.sleep(5)
        i = i+1
        #print('heart' + str(i))
    print('heart exit')


def get_name(roomid):
    '''
    利用BeautifulSoup获取直播间标题
    '''
    r = requests.get("http://www.douyu.com/" + roomid)
    soup = BeautifulSoup(r.text, 'lxml')
    return soup.find('a', {'class', 'zb-name'}).string

# 启动程序
if __name__ == '__main__':
    #room_id = input('请输入房间ID:4556661 ')
    start('96291')
    #p1 = multiprocessing.Process(target=start, args=('486808',))
    p2 = multiprocessing.Process(target=keeplive)
    #p1.start()
    p2.start()
    
    
    
    

 

  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 3
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值