1. 概述
最近换了新工作,由于工作需要,需要维护之前gb模块的代码,接到的第一个任务就是分析花屏问题。分析花屏的问题最关键的是需要分析码流有无异常,有没有被修改,确定gb模块是否解析视频数据异常。这里我记录了对接收的视频流的分析,我用python分析的目的主要是为了以后分析类似问题的时候能够轻松点,而不是对着抓的包一点一点看十六进制的数据差异,看上去真的很傻,而且很费劲。
2. 码流封装的协议
抓到的包,最外层是tcp,tcp payload部分是rtp包,rtp payload部分对应ps流,ps流中封装了音视频数据。
tcp payload部分不只是rtp包,还包括每个rtp包的长度信息,只有这样我们才能从中解析出rtp包,对应协议是 rfc 4571:https://www.rfc-editor.org/rfc/rfc4571 。
rtp部分对应的协议是rfc 3350,我们一个视频帧通常都很大,会拆分为多个rtp包,当然这里是拆分的是ps流。
ps流部分封装是参考gb28181,当然比较遗憾的是,我这边没有找到对应的文档,网上没找到,部门内也没有,网上能找到的就是其他人的记录,并没有找到标准文档。如果有人找到了可以把链接放在评论区。相信大家了解以上内容,就能慢慢解析出对应的视频流了。
3. 解析过程
import scapy
from scapy.all import *
from scapy.utils import PcapReader
import socket
import time
import os
import numpy as np
import pandas as pd
packetspath ="D:\\r.pcap"
packetssrc=rdpcap(packetspath)
"""为了方便,这边我将包用wireshark过滤过的"""
seq_pkt = {}
for packet in packetssrc:
if "TCP" not in packet:
continue
if not hasattr(packet['TCP'],'load'):
continue
if packet['TCP'].seq not in seq_pkt:
seq_pkt[packet['TCP'].seq] = packet['TCP']
else:
#print("duplicated seq:",packet['TCP'].seq)
pass
key_inorder = sorted(seq_pkt)
"""检查tcp包有没有丢,一般不需要这个,但实际上你可以加上试试,也许有惊喜,rdpcap模块可能存在bug会导致tcp的load部分丢失部分数据,当然一般是不会遇到的"""
for key in key_inorder:
nextseq = seq_pkt[key].seq + len(seq_pkt[key].load)#tcp nextseq 对应当前的seq + tcp.payload 长度
if nextseq not in seq_pkt:
print("may loss pkt, cur tcp pkt seq:",seq_pkt[key].seq, " next seq:", nextseq)
"""tcp payload"""
pkt_buffer = bytearray([]) # tcp payload buffer
for key in key_inorder:
pkt_buffer += bytearray(seq_pkt[key].load)
"""parsing tcp payload for rtp packet according to rfc 4571:https://www.rfc-editor.org/rfc/rfc4571"""
pre_segment_len = 0
buffer_idx = 0
rtp_buffer = [] # rtp packet buffer list
while buffer_idx < len(pkt_buffer):
if pre_segment_len:
max_segment_len = min(pre_segment_len, len(pkt_buffer[buffer_idx:]))
rtp_buffer[-1]+=pkt_buffer[buffer_idx:buffer_idx+max_segment_len]
buffer_idx += max_segment_len
pre_segment_len -= max_segment_len
if buffer_idx < len(pkt_buffer): #pre_segment_len==0
rtp_pkt_len = struct.unpack('>H',pkt_buffer[buffer_idx:buffer_idx+2])[0]
buffer_idx += 2
pre_segment_len = rtp_pkt_len
rtp_buffer.append(bytearray([]))
else:
rtp_pkt_len = struct.unpack('>H',pkt_buffer[buffer_idx:buffer_idx+2])[0]
buffer_idx += 2
pre_segment_len = rtp_pkt_len
rtp_buffer.append(bytearray([]))
"""解析rtp包"""
class analysis_rtp_in_buffer(object):
def __init__(self,packet):
self.packetinst = packet
def __str__(self):
return "ssrc:%d pt:%d timestamp:%d sn:%d mark:%d" % (self.ssrc,self.pt,self.timestamp,self.sn,self.mark)
def parse(self):
packetinst = self.packetinst
self.rtpversion = packetinst[0]>>6
self.padding = (packetinst[0]>>5)&0x1
self.has_extension = (packetinst[0]>>4)&0x1
self.csrc_count = (packetinst[0])&0xf
self.mark = (packetinst[1]&0x80)>>7
self.pt = (packetinst[1])&0x7f
self.sn = struct.unpack('>H',packetinst[2:4])[0]
self.timestamp = struct.unpack('>I',packetinst[4:8])[0]
self.ssrc = struct.unpack('>I',packetinst[8:12])[0]
offset = 12 + self.csrc_count*4;
if self.has_extension:
self.extension_len = 0
self.extension_list=[]
defined_profile = struct.unpack('>H',packetinst[offset:offset+2])[0]
offset = offset + 2
self.extension_size = struct.unpack('>H',packetinst[offset:offset+2])[0]
offset = offset + 2
next_extension = 0
if defined_profile == 0xbede:
#print('one type extension header')
while (next_extension+1)//2*2 < self.extension_size*4:
extension_obj = one_byte_extension(packetinst[offset:])
offset = offset + extension_obj.length + 1
next_extension += extension_obj.length + 1
self.extension_list.append(extension_obj)
elif (defined_profile>>4) == 0x100:
#print('two type extension header')
while (next_extension+1)//2*2 < self.extension_size*4:
extension_obj = two_byte_extension(packetinst[offset:])
offset = offset + ((extension_obj.length+1)//2*2) + 2
next_extension += ((extension_obj.length+1)//2*2) + 2
self.extension_list.append(extension_obj)
offset = (offset+1)//2*2
self.payload = packetinst[offset:]
self.payload_offset = offset
"""解析rtp包构造dataframe方便处理"""
ssrc = []
pt = []
timestamp = []
sn = []
mark = []
payload = []
for rtp in rtp_buffer:
pkt = analysis_rtp_in_buffer(rtp)
pkt.parse()
ssrc.append(pkt.ssrc)
pt.append(pkt.pt)
timestamp.append(pkt.timestamp)
sn.append(pkt.sn)
mark.append(pkt.mark)
payload.append(pkt.payload)
data={'ssrc':ssrc,'pt':pt,'timestamp':timestamp,'sn':sn,'mark':mark,'payload':payload}
dataframe = pd.DataFrame(data)
"""check if lost rtp packet"""
packet_num = len(dataframe)
print("rtp packet is continuous:",dataframe['sn'].iloc[packet_num-1] - dataframe['sn'].iloc[0] + 1 == packet_num)
print("timestamp 换算时长:%f s" % ((dataframe['timestamp'].iloc[packet_num-1] - dataframe['timestamp'].iloc[0])/90000))
"""保存ps流,这边ps流是可以直接用ffplay播放的"""
with open("D:\\r.ps", 'wb') as f:
for payload in dataframe['payload']:
f.write(payload)
with open("D:\\r.ps", 'rb') as f:
ps_stream = f.read()
ps_stream_array = bytearray(ps_stream)
"""解析ps流,获取媒体帧列表"""
start_code_str = bytearray([0,0,0,1]).decode('ISO-8859-1')
frame_data_list = [] #ps流中解析的视频帧,按照起始码0 0 0 1解析,分析的包中码流为h265
cur_idx = 0
while cur_idx < len(ps_stream_array):
header_type = struct.unpack('>I',ps_stream_array[cur_idx:cur_idx+4])[0] #ps header type
cur_idx += 4
if header_type == 0x1ba:
"""ps header"""
cur_idx += 10
cur_idx += ps_stream_array[cur_idx]&0x7
continue
elif header_type == 0x1bb:
"""ps system header"""
cur_idx += struct.unpack('>H',ps_stream_array[cur_idx:cur_idx+2])[0]
cur_idx += 2
continue
elif header_type == 0x1bc:
"""ps system map"""
cur_idx += struct.unpack('>H',ps_stream_array[cur_idx:cur_idx+2])[0]
cur_idx += 2
continue
elif header_type == 0x1e0:
"""视频"""
"""check video data"""
video_data_len = struct.unpack('>H',ps_stream_array[cur_idx:cur_idx+2])[0]
#print(cur_idx,video_data_len)
check_idx = cur_idx + video_data_len + 2 #check video data len
cur_idx += 2 #frame len
cur_idx += 2#相关标志位
video_data_len -= 2
ext_len = ps_stream_array[cur_idx]#可选字段的长度
cur_idx+=1
video_data_len-=1
cur_idx+=ext_len
video_data_len-=ext_len
video_data = ps_stream_array[cur_idx:cur_idx+video_data_len]
if(cur_idx+video_data_len>len(ps_stream_array)):
print("lost some date in the tail")
nalu_str_list = video_data.decode('ISO-8859-1').split(start_code_str)
if len(nalu_str_list[0]): #这是前一帧的视频数据,起始码之前已经处理了
frame_data_list[-1] += bytearray(nalu_str_list[0].encode('ISO-8859-1'))
video_data_len-=len(nalu_str_list[0])
cur_idx+=len(nalu_str_list[0])
for frame_data in nalu_str_list[1:]:#后面的帧都是根据起始码划分的,需要处理起始码
video_data_len-=4 #start_code_str 起始码长度为4
cur_idx+=4
frame_data_list.append(bytearray(frame_data.encode('ISO-8859-1')))
video_data_len-=len(frame_data)
cur_idx+=len(frame_data)
#print("\t",frame_data_list[-1][:2].hex(" ",1)) #帧类型,h265 两个字节
if cur_idx != check_idx:
print("error video data len:",cur_idx,check_idx)
break
continue
elif header_type == 0x1c0:
"""音频"""
"""check audio data"""
cur_idx += struct.unpack('>H',ps_stream_array[cur_idx:cur_idx+2])[0]
cur_idx += 2
continue
"""这里是从gb模块转发出去的视频流中提取的,视频数据按照flv协议被封装里面"""
with open("D:\\s.flv", 'rb') as f:
flv_stream = f.read()
flv_stream_str = flv_stream.decode('ISO-8859-1')
"""检查转发的视频帧数据是否有异常"""
state_map = dict()
for i in range(len(frame_data_list)):
idx = flv_stream_str.find(frame_data_list[i].decode('ISO-8859-1'))
state_map[i]=(idx,len(frame_data_list[i]),frame_data_list[i][:2].hex(" ",1))
"""到这里我们已经知道哪些帧数据存在异常了,state_map 中元组中idx为-1的那些就是"""