目前我在从事远程rtsp视频采集+本地录音整合到一个mp4文件的工作,遇到的问题:由于视频的编解码以及网络耗时导致音视频无法同步,播放出来的画面延迟很严重。
为了解决这个问题,研究了一下音视频pts和dts。
录像音视频的原理:
(1) 获取网络码流-->抛弃异常包--->等待关键帧(为了图像不花图)----->开始转封装视频数据------>更改pts和dts------>av_write_frame
(2)设置音频回调函数(音频采集回调的条件为满足采样一个音频帧则返回)本地音频采集回调------>回调的数据----->编码音频 ---->更改Pts和dts---->av_write_frame
备注1: 音频帧:规定1024个采样点为一帧 如果采样位数为16bit 左右声道均采集 则采样一个音频回调需要1024*16/2*2=4096bytes
备注2:抛弃异常包:由于刚开始获取到的几个视频包的pts和dts是异常跳变而非递增的,所以需要等到视频包连续递增跳变的时候才能使用
并且为了保存的录像解码不花图,则需要第一帧写入的包为key包
BOOL CRecordRtspAndMicrophone::GetStreamPacket( AVPacket *pkt)
{
BOOL bRet = FALSE;
//如果暂停录像等情况会释放rtsp链接 所以需要对指针保护
if (NULL != m_pRtspFmt)
{
if (av_read_frame(m_pRtspFmt, pkt) == 0)
{
if (m_bFirstFrm)
{
m_bFirstFrm = FALSE;
m_nFirstPts = pkt->pts;
m_nFirstDts = pkt->dts;
}
//如果是视频刚开始段--刚开始链接、重新连接、创建新文件等情况需要等待关键帧
if (m_bFirstKeyFrm)
{
if (MAX_VIDEO_FIRST_WAIT_FRM_CNT <= m_nWaitFrmCnt++)
{
if(pkt->pts > m_nFirstPts)
{
if (pkt->flags == AV_PKT_FLAG_KEY)
{
m_bFirstKeyFrm = FALSE;
m_nFirstKeyPts = pkt->pts;
m_nFirstKeyDts = pkt->dts;
bRet = TRUE;
}
}
}
}
//普通情况只要读取到包裹就可以返回TRUE
else
{
//pkt->pts -= m_nFirstPts;
//pkt->dts -= m_nFirstDts;
bRet = TRUE;
}
}
}
return bRet;
}
pts:显示时间戳
dts:解码时间戳
一、视频时间戳转换
av_rescale_q_rnd函数 将时间戳从一个进制转换到另外一个进制
系统默认的H264的time_base= num=1/den=90000 mp4文件的time_base= num=1/den=12800 flv文件的time_base=num=1/den=1000
可以看出来 转换后的数据为 m=12800/90000*m 不能直接调用乘法计算 会导致溢出为负数
m_pInVst----输入的视频stream 类型为AVStream* =【AVFormatContext-> AVStream **streams[视频index]】
//分析读入的流 读取其音频和视频下标
for (int i = 0; i<m_pRtspFmt->nb_streams; i++)
{
if (m_pRtspFmt->streams[i]->codec->codec_type == AVMEDIA_TYPE_VIDEO)
{
m_pInVst = m_pRtspFmt->streams[i];
m_nInViStreamIdx = i;
LOG(LOG_RTM, "[ERROR]find video stream index=%d",i);
}
else if (m_pRtspFmt->streams[i]->codec->codec_type == AVMEDIA_TYPE_AUDIO)
{
m_pInAst = m_pRtspFmt->streams[i];
m_nInAuStreamIdx = i;
LOG(LOG_RTM, "[ERROR]find audio stream index=%d", i);
}
}
m_pOutVst---输出的视频stream
BOOL CRecordRtspAndMicrophone::AddVideoOutput()
{
m_nOutViStreamIdx = 0;
m_pOutVst = avformat_new_stream( m_pMp4Fmt, m_pInVst->codec->codec);
avcodec_copy_context(m_pOutVst->codec, m_pInVst->codec);
//m_pOutVst->sample_aspect_ratio = m_pInVst->codec->sample_aspect_ratio;
m_nImgW = m_pInVst->codec->width;
m_nImgH = m_pInVst->codec->height;
copy_video_stream_info(m_pOutVst, m_pInVst, m_pMp4Fmt);
if (m_pMp4Fmt->oformat->flags & AVFMT_GLOBALHEADER)
m_pOutVst->codec->flags |= CODEC_FLAG_GLOBAL_HEADER;
LOG(LOG_RTM, "AddVideoOutput");
return TRUE;
}
这个函数是收到一个视频数据包 并将原来的时间戳转换为输出mp4文件的时间戳
BOOL CRecordRtspAndMicrophone::WriteVideoFrame( AVPacket *pPkt )
{
pPkt->stream_index = m_nOutViStreamIdx;
pPkt->pts -= m_nFirstKeyPts;
pPkt->dts -= m_nFirstKeyDts;
pPkt->pts = av_rescale_q_rnd(pPkt->pts, m_pInVst->time_base, m_pOutVst->time_base, (AVRounding)(AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX));
pPkt->dts = av_rescale_q_rnd(pPkt->dts, m_pInVst->time_base, m_pOutVst->time_base, (AVRounding)(AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX));
pPkt->duration = av_rescale_q(pPkt->duration, m_pInVst->time_base, m_pOutVst->time_base);
pPkt->pos = -1;
m_player.InputStream(pPkt, VIDEO_STREAM);
if (av_write_frame(m_pMp4Fmt, pPkt))
{
LOG(LOG_ERROR,"[ERROR] writing video frame,fmt=0x%x,audiostream=0x%x,videostream=0x%x,pktsize=%d", m_pMp4Fmt,m_pOutAst,m_pOutVst, pPkt->size);
}
else
{
LOG(LOG_DEBUG, "[Record] videoout packet type=%d\tlen=%d,pts=%d,dts=%d,pb=%d",
pPkt->stream_index, pPkt->size, (int)pPkt->pts, (int)pPkt->dts, (int)m_pMp4Fmt->pb->pos);
}
m_nVideoPts = pPkt->pts;
return TRUE;
}
二、音频时间戳的处理(时间延迟和时间戳构造)
1、用本地获取到的pcm数据进行编码
AVPacket pkt;
av_new_packet(&pkt, m_nAudioFrmSize);
memcpy(m_pAFrame->data[0], m_AudioCircleBuf.ele[nPos].data, m_nAudioFrmSize);
m_nAudioTotalSize += m_nAudioFrmSize;
if (m_AudioCircleBuf.ele[nPos].nMaxLen < m_nAudioFrmSize)
{
LOG(LOG_ERROR, "[ERROR]音频编码所需内存异常!");
}
if (avcodec_encode_audio2(m_pOutAst->codec, &pkt, m_pAFrame, &got_frame) != 0)
{
LOG(LOG_ERROR, "[ERROR]音频编码失败!");
}
音频采样bufsize的设置为:通道数*采样bit/8*1024
m_nAudioFrmSize = av_samples_get_buffer_size(NULL, pCodecCtx->channels, pCodecCtx->frame_size, pCodecCtx->sample_fmt, 1);
2、对编码后的时间戳进行构造
(1)我设置的每次音频缓冲区满了就回调--而我的缓冲区为4096字节(16bit*双通道*1024/帧) 所以我每次收到的音频pcm数据实际上为一帧!
则我的
pts可以设置为---------------1024*帧数
dts=pts
反推一下 Pts可以泛化:pts=dts=音频size/通道数/(bit/8)/1024
(2)音频延迟
如果要延迟m_nAudioTimeDelayMs=500毫秒保持与网络同步 则可以设置pts=dts+延迟时间dms
如果采样率为AUDIO_SAMP_RATE=22050 双通道 16bit 则dms=22050*500/1000
BOOL CRecordRtspAndMicrophone::WriteDeviceAudio()
{
//return TRUE;
if (NULL == m_pOutAst) return FALSE;
UINT nPos = 0;
DWORD dwTimeStamp = GetTickCount();
BOOL bRet = FALSE,ret=0;
AVPacket pkt;
int got_frame = 0, nRet;
Lock();
if (m_AudioCircleBuf.nItemCnt)
{
nPos = (m_AudioCircleBuf.nRead + 1) % CIRCLE_BUF_LEN;
LOG(LOG_DEBUG, "[GetAudio] buf itemcnt=%d,read=%d,write=%d,time=%u",
m_AudioCircleBuf.nItemCnt, m_AudioCircleBuf.nRead,
m_AudioCircleBuf.nWrite, m_AudioCircleBuf.ele[nPos].dwTimeStamp);
av_new_packet(&pkt, m_nAudioFrmSize);
memcpy(m_pAFrame->data[0], m_AudioCircleBuf.ele[nPos].data, m_nAudioFrmSize);
m_nAudioTotalSize += m_nAudioFrmSize;
if (m_AudioCircleBuf.ele[nPos].nMaxLen < m_nAudioFrmSize)
{
LOG(LOG_ERROR, "[ERROR]音频编码所需内存异常!");
}
if (avcodec_encode_audio2(m_pOutAst->codec, &pkt, m_pAFrame, &got_frame) != 0)
{
LOG(LOG_ERROR, "[ERROR]音频编码失败!");
}
else
{
pkt.pts = audio_frame_cnt * 1024;
pkt.pts += m_nAudioTimeDelayMs * AUDIO_SAMP_RATE / 1024;
pkt.flags |= AV_PKT_FLAG_KEY;
pkt.stream_index = m_nOutAuStreamIdx;
audio_frame_cnt++;
pkt.dts = audio_frame_cnt*1024;
m_nAudioPts = pkt.pts;
m_Fms.InputStream(&pkt, AUDIO_STREAM);
/* write the compressed frame in the media file */
if (ret = av_write_frame(m_pMp4Fmt, &pkt), ret < 0)
{
LOG(LOG_ERROR, "[ERROR]Error while writing audio frame,fmt=0x%x,audiostream=0x%x,videostream=0x%x,pktsize=%d", m_pMp4Fmt, m_pOutAst, m_pOutVst, pkt.size);
}
else
{
LOG(LOG_DEBUG, "[Record] packet type=%d\tlen=%d,pts=%d,dts=%d,pb=%d",
pkt.stream_index, pkt.size, (int)pkt.pts, (int)pkt.dts, (int)m_pMp4Fmt->pb->pos);
bRet = TRUE;
}
}
av_free_packet(&pkt);
m_AudioCircleBuf.nRead = nPos;
m_AudioCircleBuf.nItemCnt--;
}
UnLock();
return bRet;
}
三、写入的pts和dts分析
1、视频时间戳
Line 11: 2014-08-12 10:52:46:011 [Record] videoout packet type=0 len=12911,pts=1535,dts=1535,pb=37327
Line 16: 2014-08-12 10:52:46:037 [Record] videoout packet type=0 len=12504,pts=2048,dts=2048,pb=50343
Line 21: 2014-08-12 10:52:46:079 [Record] videoout packet type=0 len=13178,pts=2559,dts=2559,pb=64033
Line 26: 2014-08-12 10:52:46:121 [Record] videoout packet type=0 len=12976,pts=3072,dts=3072,pb=77521
Line 28: 2014-08-12 10:52:46:162 [Record] videoout packet type=0 len=14037,pts=3583,dts=3583,pb=91558
Line 33: 2014-08-12 10:52:46:208 [Record] videoout packet type=0 len=13122,pts=4095,dts=4095,pb=105192
Line 38: 2014-08-12 10:52:46:252 [Record] videoout packet type=0 len=12531,pts=4609,dts=4609,pb=118235
Line 43: 2014-08-12 10:52:46:296 [Record] videoout packet type=0 len=11679,pts=5119,dts=5119,pb=130426
Line 48: 2014-08-12 10:52:46:319 [Record] videoout packet type=0 len=11535,pts=5632,dts=5632,pb=142473
Line 53: 2014-08-12 10:52:46:362 [Record] videoout packet type=0 len=11031,pts=6144,dts=6144,pb=154016
Line 55: 2014-08-12 10:52:46:404 [Record] videoout packet type=0 len=11451,pts=6655,dts=6655,pb=165467
Line 60: 2014-08-12 10:52:46:449 [Record] videoout packet type=0 len=10687,pts=7167,dts=7167,pb=176666
Line 65: 2014-08-12 10:52:46:491 [Record] videoout packet type=0 len=9677,pts=7680,dts=7680,pb=186855
Line 70: 2014-08-12 10:52:46:535 [Record] videoout packet type=0 len=8871,pts=8191,dts=8191,pb=196238
Line 75: 2014-08-12 10:52:46:558 [Record] videoout packet type=0 len=9140,pts=8703,dts=8703,pb=205890
Line 80: 2014-08-12 10:52:46:600 [Record] videoout packet type=0 len=10013,pts=9216,dts=9216,pb=216415
Line 82: 2014-08-12 10:52:46:642 [Record] videoout packet type=0 len=9973,pts=9727,dts=9727,pb=226388
Line 87: 2014-08-12 10:52:46:686 [Record] videoout packet type=0 len=9924,pts=10239,dts=10239,pb=236824
Line 92: 2014-08-12 10:52:46:730 [Record] videoout packet type=0 len=10096,pts=10751,dts=10751,pb=247432
Line 97: 2014-08-12 10:52:46:773 [Record] videoout packet type=0 len=11293,pts=11263,dts=11263,pb=259237
Line 102: 2014-08-12 10:52:46:797 [Record] videoout packet type=0 len=11598,pts=11775,dts=11775,pb=271347
Line 107: 2014-08-12 10:52:46:840 [Record] videoout packet type=0 len=12024,pts=12287,dts=12287,pb=283883
Line 109: 2014-08-12 10:52:46:882 [Record] videoout packet type=0 len=11264,pts=12799,dts=12799,pb=295147
Line 114: 2014-08-12 10:52:46:927 [Record] videoout packet type=0 len=11842,pts=13311,dts=13311,pb=307501
Line 119: 2014-08-12 10:52:46:971 [Record] videoout packet type=0 len=11135,pts=13823,dts=13823,pb=319148
共写了25条数据--与海康每个nual一个帧的策略匹配--帧率为25 pts从1535递增到13823=12288~=12800 可以看出来 视频时间戳无问题 长时间统计后每秒的pts和dts增加绝对为12800 这样我就可以确定我的pts设置没有任何问题
2、音频时间戳
Line 15: 2014-08-12 10:52:46:036 [Record] packet type=1 len=512,pts=10766,dts=1024,pb=37839
Line 20: 2014-08-12 10:52:46:078 [Record] packet type=1 len=512,pts=11790,dts=2048,pb=50855
Line 25: 2014-08-12 10:52:46:120 [Record] packet type=1 len=512,pts=12814,dts=3072,pb=64545
Line 32: 2014-08-12 10:52:46:208 [Record] packet type=1 len=512,pts=13838,dts=4096,pb=92070
Line 37: 2014-08-12 10:52:46:252 [Record] packet type=1 len=512,pts=14862,dts=5120,pb=105704
Line 42: 2014-08-12 10:52:46:296 [Record] packet type=1 len=512,pts=15886,dts=6144,pb=118747
Line 47: 2014-08-12 10:52:46:319 [Record] packet type=1 len=512,pts=16910,dts=7168,pb=130938
Line 52: 2014-08-12 10:52:46:361 [Record] packet type=1 len=512,pts=17934,dts=8192,pb=142985
Line 59: 2014-08-12 10:52:46:449 [Record] packet type=1 len=512,pts=18958,dts=9216,pb=165979
Line 64: 2014-08-12 10:52:46:491 [Record] packet type=1 len=512,pts=19982,dts=10240,pb=177178
Line 69: 2014-08-12 10:52:46:534 [Record] packet type=1 len=512,pts=21006,dts=11264,pb=187367
Line 74: 2014-08-12 10:52:46:558 [Record] packet type=1 len=512,pts=22030,dts=12288,pb=196750
Line 79: 2014-08-12 10:52:46:600 [Record] packet type=1 len=512,pts=23054,dts=13312,pb=206402
Line 86: 2014-08-12 10:52:46:685 [Record] packet type=1 len=512,pts=24078,dts=14336,pb=226900
Line 91: 2014-08-12 10:52:46:729 [Record] packet type=1 len=512,pts=25102,dts=15360,pb=237336
Line 96: 2014-08-12 10:52:46:773 [Record] packet type=1 len=512,pts=26126,dts=16384,pb=247944
Line 101: 2014-08-12 10:52:46:796 [Record] packet type=1 len=512,pts=27150,dts=17408,pb=259749
Line 106: 2014-08-12 10:52:46:839 [Record] packet type=1 len=512,pts=28174,dts=18432,pb=271859
Line 113: 2014-08-12 10:52:46:927 [Record] packet type=1 len=512,pts=29198,dts=19456,pb=295659
Line 118: 2014-08-12 10:52:46:970 [Record] packet type=1 len=512,pts=30222,dts=20480,pb=308013
共写了21条数据--与我设置的音频采样率22050保持一致
音频每秒次采样帧数为22050/1024(点/帧)=21.53
pts从10766递增到30222==差值为20000~=22050 则表明我的pts设置没有问题 长期统计下来每秒的差值肯定为22050
四、总结
音视频的pts和dts设置需要满足与每秒采样点数的对应关系:每秒Pts和dts的增加需要=采样点
音频一帧为1024个点
如果要快进或者慢放 则将pts增加或者减少