使用python解析gb28181视频流

幻紫辰

已于 2024-08-18 15:42:21 修改

阅读量387

点赞数 6

文章标签：网络 python 音视频

于 2024-08-18 15:40:29 首次发布

本文链接：https://blog.csdn.net/u012783669/article/details/141299684

版权

1. 概述

最近换了新工作，由于工作需要，需要维护之前gb模块的代码，接到的第一个任务就是分析花屏问题。分析花屏的问题最关键的是需要分析码流有无异常，有没有被修改，确定gb模块是否解析视频数据异常。这里我记录了对接收的视频流的分析，我用python分析的目的主要是为了以后分析类似问题的时候能够轻松点，而不是对着抓的包一点一点看十六进制的数据差异，看上去真的很傻，而且很费劲。

2. 码流封装的协议

抓到的包，最外层是tcp，tcp payload部分是rtp包，rtp payload部分对应ps流，ps流中封装了音视频数据。

tcp payload部分不只是rtp包，还包括每个rtp包的长度信息，只有这样我们才能从中解析出rtp包，对应协议是 rfc 4571:https://www.rfc-editor.org/rfc/rfc4571 。

rtp部分对应的协议是rfc 3350，我们一个视频帧通常都很大，会拆分为多个rtp包，当然这里是拆分的是ps流。

ps流部分封装是参考gb28181，当然比较遗憾的是，我这边没有找到对应的文档，网上没找到，部门内也没有，网上能找到的就是其他人的记录，并没有找到标准文档。如果有人找到了可以把链接放在评论区。相信大家了解以上内容，就能慢慢解析出对应的视频流了。

3. 解析过程

import scapy
from scapy.all import *
from scapy.utils import PcapReader
import socket
import time
import os
import numpy as np
import pandas as pd

packetspath ="D:\\r.pcap"
packetssrc=rdpcap(packetspath)

"""为了方便，这边我将包用wireshark过滤过的"""
seq_pkt = {}
for packet in packetssrc:
    if "TCP" not in packet:
        continue
    if not hasattr(packet['TCP'],'load'):
        continue

    if packet['TCP'].seq not in seq_pkt:
        seq_pkt[packet['TCP'].seq] = packet['TCP']
    else:
        #print("duplicated seq:",packet['TCP'].seq)
        pass

key_inorder = sorted(seq_pkt)

"""检查tcp包有没有丢，一般不需要这个，但实际上你可以加上试试，也许有惊喜，rdpcap模块可能存在bug会导致tcp的load部分丢失部分数据，当然一般是不会遇到的"""
for key in key_inorder:
    nextseq = seq_pkt[key].seq + len(seq_pkt[key].load)#tcp nextseq 对应当前的seq + tcp.payload 长度
    if nextseq not in seq_pkt:
        print("may loss pkt, cur tcp pkt seq:",seq_pkt[key].seq, " next seq:", nextseq)

"""tcp payload"""
pkt_buffer = bytearray([]) # tcp payload buffer
for key in key_inorder:
    pkt_buffer += bytearray(seq_pkt[key].load)


"""parsing tcp payload for rtp packet according to rfc 4571:https://www.rfc-editor.org/rfc/rfc4571"""
pre_segment_len = 0
buffer_idx = 0
rtp_buffer = [] # rtp packet buffer list
while buffer_idx < len(pkt_buffer):
    if pre_segment_len:
        max_segment_len = min(pre_segment_len, len(pkt_buffer[buffer_idx:]))
        rtp_buffer[-1]+=pkt_buffer[buffer_idx:buffer_idx+max_segment_len]
        buffer_idx += max_segment_len
        pre_segment_len -= max_segment_len

        if buffer_idx < len(pkt_buffer): #pre_segment_len==0
            rtp_pkt_len = struct.unpack('>H',pkt_buffer[buffer_idx:buffer_idx+2])[0]
            buffer_idx += 2
            pre_segment_len = rtp_pkt_len
            rtp_buffer.append(bytearray([]))
    else:
        rtp_pkt_len = struct.unpack('>H',pkt_buffer[buffer_idx:buffer_idx+2])[0]
        buffer_idx += 2
        pre_segment_len = rtp_pkt_len
        rtp_buffer.append(bytearray([]))
    

"""解析rtp包"""
class analysis_rtp_in_buffer(object):
    def __init__(self,packet):
        self.packetinst = packet

    def __str__(self):
        return "ssrc:%d pt:%d timestamp:%d sn:%d mark:%d" % (self.ssrc,self.pt,self.timestamp,self.sn,self.mark)
        
    def parse(self):
        packetinst = self.packetinst
        self.rtpversion = packetinst[0]>>6
        self.padding = (packetinst[0]>>5)&0x1
        self.has_extension = (packetinst[0]>>4)&0x1
        self.csrc_count = (packetinst[0])&0xf
        self.mark = (packetinst[1]&0x80)>>7
        self.pt = (packetinst[1])&0x7f
        self.sn = struct.unpack('>H',packetinst[2:4])[0]
        self.timestamp = struct.unpack('>I',packetinst[4:8])[0]
        self.ssrc = struct.unpack('>I',packetinst[8:12])[0]
    
        offset = 12 + self.csrc_count*4;
        if self.has_extension:
            self.extension_len = 0
            self.extension_list=[]
            defined_profile = struct.unpack('>H',packetinst[offset:offset+2])[0]
            offset = offset + 2
            self.extension_size = struct.unpack('>H',packetinst[offset:offset+2])[0]
            offset = offset + 2
            next_extension = 0
            if defined_profile == 0xbede:
                #print('one type extension header')
                while (next_extension+1)//2*2 < self.extension_size*4:
                    extension_obj = one_byte_extension(packetinst[offset:])
                    offset = offset + extension_obj.length + 1
                    next_extension += extension_obj.length + 1
                    self.extension_list.append(extension_obj)
            elif (defined_profile>>4) == 0x100:
                #print('two type extension header')
                while (next_extension+1)//2*2 < self.extension_size*4:
                    extension_obj = two_byte_extension(packetinst[offset:])
                    offset = offset + ((extension_obj.length+1)//2*2) + 2
                    next_extension += ((extension_obj.length+1)//2*2) + 2
                    self.extension_list.append(extension_obj)
            
        offset = (offset+1)//2*2
        self.payload = packetinst[offset:]
        self.payload_offset = offset


"""解析rtp包构造dataframe方便处理"""
ssrc = []
pt = []
timestamp = []
sn = []
mark = []
payload = []
for rtp in rtp_buffer:
    pkt = analysis_rtp_in_buffer(rtp)
    pkt.parse()
    ssrc.append(pkt.ssrc)
    pt.append(pkt.pt)
    timestamp.append(pkt.timestamp)
    sn.append(pkt.sn)
    mark.append(pkt.mark)
    payload.append(pkt.payload)

data={'ssrc':ssrc,'pt':pt,'timestamp':timestamp,'sn':sn,'mark':mark,'payload':payload}
dataframe = pd.DataFrame(data)


"""check if lost rtp packet"""
packet_num = len(dataframe)
print("rtp packet is continuous:",dataframe['sn'].iloc[packet_num-1] - dataframe['sn'].iloc[0] + 1 == packet_num)
print("timestamp 换算时长：%f s" % ((dataframe['timestamp'].iloc[packet_num-1] - dataframe['timestamp'].iloc[0])/90000))

"""保存ps流，这边ps流是可以直接用ffplay播放的"""
with open("D:\\r.ps", 'wb') as f:
    for payload in dataframe['payload']:
        f.write(payload)

with open("D:\\r.ps", 'rb') as f:
    ps_stream = f.read()

ps_stream_array = bytearray(ps_stream)

"""解析ps流，获取媒体帧列表"""
start_code_str = bytearray([0,0,0,1]).decode('ISO-8859-1')
frame_data_list = [] #ps流中解析的视频帧，按照起始码0 0 0 1解析，分析的包中码流为h265
cur_idx = 0
while cur_idx < len(ps_stream_array):
    header_type = struct.unpack('>I',ps_stream_array[cur_idx:cur_idx+4])[0] #ps header type
    cur_idx += 4
    if header_type == 0x1ba:
        """ps header"""
        cur_idx += 10
        cur_idx += ps_stream_array[cur_idx]&0x7
        continue
    elif header_type == 0x1bb:
        """ps system header"""
        cur_idx += struct.unpack('>H',ps_stream_array[cur_idx:cur_idx+2])[0]
        cur_idx += 2
        continue
    elif header_type == 0x1bc:
        """ps system map"""
        cur_idx += struct.unpack('>H',ps_stream_array[cur_idx:cur_idx+2])[0]
        cur_idx += 2
        continue
    elif header_type == 0x1e0:
        """视频"""
        """check video data"""
        video_data_len = struct.unpack('>H',ps_stream_array[cur_idx:cur_idx+2])[0]
        #print(cur_idx,video_data_len)
        check_idx = cur_idx + video_data_len + 2 #check video data len
        cur_idx += 2 #frame len
        cur_idx += 2#相关标志位
        video_data_len -= 2
        ext_len = ps_stream_array[cur_idx]#可选字段的长度
        cur_idx+=1
        video_data_len-=1
        cur_idx+=ext_len
        video_data_len-=ext_len
        video_data = ps_stream_array[cur_idx:cur_idx+video_data_len]
        if(cur_idx+video_data_len>len(ps_stream_array)):
            print("lost some date in the tail")
        nalu_str_list = video_data.decode('ISO-8859-1').split(start_code_str)
        
        if len(nalu_str_list[0]): #这是前一帧的视频数据，起始码之前已经处理了
            frame_data_list[-1] += bytearray(nalu_str_list[0].encode('ISO-8859-1'))
            video_data_len-=len(nalu_str_list[0])
            cur_idx+=len(nalu_str_list[0])
        for frame_data in nalu_str_list[1:]:#后面的帧都是根据起始码划分的，需要处理起始码
            video_data_len-=4 #start_code_str 起始码长度为4
            cur_idx+=4
            frame_data_list.append(bytearray(frame_data.encode('ISO-8859-1')))
            video_data_len-=len(frame_data)
            cur_idx+=len(frame_data)
            #print("\t",frame_data_list[-1][:2].hex(" ",1)) #帧类型，h265 两个字节

        if cur_idx != check_idx:
            print("error video data len:",cur_idx,check_idx)
            break
        continue
    elif header_type == 0x1c0:
        """音频"""
        """check audio data"""
        cur_idx += struct.unpack('>H',ps_stream_array[cur_idx:cur_idx+2])[0]
        cur_idx += 2
        continue

"""这里是从gb模块转发出去的视频流中提取的，视频数据按照flv协议被封装里面"""
with open("D:\\s.flv", 'rb') as f:
    flv_stream = f.read()
flv_stream_str = flv_stream.decode('ISO-8859-1')

"""检查转发的视频帧数据是否有异常"""
state_map = dict()
for i in range(len(frame_data_list)):
    idx = flv_stream_str.find(frame_data_list[i].decode('ISO-8859-1'))
    state_map[i]=(idx,len(frame_data_list[i]),frame_data_list[i][:2].hex(" ",1))

"""到这里我们已经知道哪些帧数据存在异常了，state_map 中元组中idx为-1的那些就是"""