将 H264 + PCMA 转为MP4格式(RTSP协议)

 

问题:

通过rtsp交互,解析RTP流(h264视频+alaw(pcma)音频), 将其保存为MP4格式。

 

解决方案:

使用MP4V2 ,版本2.0.0, VC2010可编译。(还有一种gpac库,听说支持h265,未实验)

Mp4V2下载地址: https://code.google.com/archive/p/mp4v2/downloads  ,  文件名mp4v2-2.0.0.tar.bz2

参考文章: 使用mp4v2将H264+AAC合成mp4文件

参考文章: 将RTSP流录制为mp4文件

参考文章:  linux下利用mp4v2库将h264和aac文件封装成MP4

参考文章: Android音视频系列:视频容器操作篇 -- mp4容器打包实现

参考文章: Mp4V2调试经验记录   (视频或者音画同步的主要参数是duration)

参考文章:mp4v2 接口函数  : 其中有一些注意的地方,我似乎没有理会。

 

PS: 考虑过FFmpeg,但是太大,Windows下编译麻烦,不会裁剪。Mp4V2比较轻量级,适合做一个SDK。

 

前期:

建议先对MP4格式有一点基本了解, 看看一些MP4格式的文档,有一个基本概念。


参考文章: mp4文件格式解析 , 介绍详细,其中推荐mp4v2 和 gpac,和一个在线MP4格式解析器,

参考文章(代码): 逐个Box解析MP4文件的代码,挺好的。 MP4 file analyzer  (This is a console application, written by MSVC 2008.It will display mp4 file content in human readable format.)

 

 

套路:


//创建mp4文件
MP4FileHandle file = MP4CreateEx("d:\\test.mp4", MP4_CREATE_64BIT_DATA | MP4_CREATE_64BIT_TIME);


MP4SetTimeScale(file, 90000);

MP4TrackId video = MP4AddH264VideoTrack(file, 90000, 90000/_fps, _width, _height,
                                                0x64, //sps[1] AVCProfileIndication
                                                0x00, //sps[2] profile_compat
                                                0x1f, //sps[3] AVCLevelIndication
                                                3); // 4 bytes length before each NAL unit
MP4SetVideoProfileLevel(file, 0x7F);

//源码重新编译,新加函数MP4AddALawAudioTrack2
MP4TrackId audio = MP4AddALawAudioTrack2(file,
                                         8000,   //timescale
                                         8000*40/1000);   //sampleDuration.  40ms

MP4SetTrackIntegerProperty(file,audio, "mdia.minf.stbl.stsd.alaw.channels",1);
MP4SetAudioProfileLevel(file, 0x2);




while(1)
{
    //更新SPS
   MP4AddH264SequenceParameterSet(file, video, (const uint8_t*)(pH264+4), len);

    //更新PPS
   MP4AddH264PictureParameterSet(file, video, (const uint8_t*)(pH264+4), len);

    //更新H264 (前四个字节需要特殊处理)
   if(key_frame)
   {
     MP4WriteSample(file, video, (const uint8_t* )pH264, len, MP4_INVALID_DURATION, 0, 
          1);
   }else{
     MP4WriteSample(file, video, (const uint8_t* )pH264, len, MP4_INVALID_DURATION, 0, 
          0);
    }

    //更新pcma
   MP4WriteSample(file, audio, (const uint8_t* )pCMA, len , MP4_INVALID_DURATION, 0, 1);

}

MP4Close(file);

 

PS:      注意Write H264 Sample时,h264流中的NAL,头四个字节是0x00000001,  而mp4中的h264track,头四个字节要求是NAL的长度,并且是大端顺序,所以,写入前,需要进行如下更改:

uint32_t* pSize = (uint32_t*)pH264 ;
*pSize = htonl(len - 4);

 

PS:   源码中函数 AddALawAudioTrack里面计算 

uint32_t fixedSampleDuration = (timeScale * 20)/1000; // 20mSec/Sample

然而实际的1秒25帧, 间隔40ms, 而不是20ms。故而需要手动添加一个新的接口。

 

PS:   音频和视频同步没有特别处理,没考虑。

PS:   SPS,PPS 不知道是否需要每次变更都需要进行  MP4AddH264SequenceParameterSet  ,  MP4AddH264PictureParameterSet操作。 不确定这个操作的影响。

 

 

简单实验结果: 

测试步骤1  :  VLC可播放, 有声音。

测试步骤2: 手机端微信可播放。

测试步骤3: 桌面端微信不可播放。 (猜测可能是音频格式需要AAC格式)

测试步骤4: Html5 可播放,有声音。

测试步骤5: Windows Media Player可播放, 但是没声音。

 

困扰: 发现MP4文件中的mdat 数据特别长, 不知道是否有影响, 这份生成的MP4文件不支持格式工厂转换。

 

其他:

感觉MP4似乎主要用来打包H264和AAC的,而我用的音频是PCMA,不知道有没有兼容性问题。

mp4格式好复杂,结构可变,又多,  里面有好多Box结构体,我曾经企图一个字节一个字节的研究,后来放弃了,复杂。

 

附图(图片来自网络):

 

 

 

 

参考源码:  (来源: https://github.com/Thinkerfans/lib-mp4v2/tree/master/mp4v2

 

lib-mp4v2/mp4v2/mp4record.h

//
//  mp4record.h
//  RTSP_Player
//
//  Created by apple on 15/4/7.
//  Copyright (c) 2015年 thinker. All rights reserved.
//

#ifndef __RTSP_Player__mp4record__
#define __RTSP_Player__mp4record__

#include "mp4v2.h"

#define  _NALU_SPS_  0
#define  _NALU_PPS_  1
#define  _NALU_I_    2
#define  _NALU_P_    3


int initMp4Encoder(const char * filename,int width,int height);
int mp4VEncode(uint8_t * data ,int len);
int mp4AEncode(uint8_t * data ,int len);
void closeMp4Encoder();


#endif /* defined(__RTSP_Player__mp4record__) */

 

lib-mp4v2/mp4v2/mp4record.c

//
//  mp4record.c
//  RTSP_Player
//
//  Created by apple on 15/4/7.
//  Copyright (c) 2015年 thinker. All rights reserved.
//


#include "mp4record.h"
#include <stdlib.h>


typedef struct MP4V2_CONTEXT{
    
    int m_vWidth,m_vHeight,m_vFrateR,m_vTimeScale;
    MP4FileHandle m_mp4FHandle;
    MP4TrackId m_vTrackId,m_aTrackId;
    double m_vFrameDur;
    
} MP4V2_CONTEXT;

struct MP4V2_CONTEXT * recordCtx = NULL;


int initMp4Encoder(const char * filename,int width,int height){
    
    int ret = -1;
    recordCtx = malloc(sizeof(struct MP4V2_CONTEXT));
    if (!recordCtx) {
        printf("error : malloc context \n");
        return ret;
    }
    
    recordCtx->m_vWidth = width;
    recordCtx->m_vHeight = height;
    recordCtx->m_vFrateR = 25;
    recordCtx->m_vTimeScale = 90000;
    recordCtx->m_vFrameDur = 300;
    recordCtx->m_vTrackId = 0;
    recordCtx->m_aTrackId = 0;
    
    recordCtx->m_mp4FHandle = MP4Create(filename,0);
    if (recordCtx->m_mp4FHandle == MP4_INVALID_FILE_HANDLE) {
        printf("error : MP4Create  \n");
        return ret;
    }
     MP4SetTimeScale(recordCtx->m_mp4FHandle, recordCtx->m_vTimeScale);
    //------------------------------------------------------------------------------------- audio track
//    recordCtx->m_aTrackId = MP4AddAudioTrack(recordCtx->m_mp4FHandle, 44100, 1024, MP4_MPEG4_AUDIO_TYPE);
//    if (recordCtx->m_aTrackId == MP4_INVALID_TRACK_ID){
//        printf("error : MP4AddAudioTrack  \n");
//        return ret;
//    }
//
//    MP4SetAudioProfileLevel(recordCtx->m_mp4FHandle, 0x2);
//    uint8_t aacConfig[2] = {18,16};
//    MP4SetTrackESConfiguration(recordCtx->m_mp4FHandle,recordCtx->m_aTrackId,aacConfig,2);
//    printf("ok  : initMp4Encoder file=%s  \n",filename);

    return 0;
}
int mp4VEncode(uint8_t * _naluData ,int _naluSize){
    
    int index = -1;
    
    if(_naluData[0]==0 && _naluData[1]==0 && _naluData[2]==0 && _naluData[3]==1 && _naluData[4]==0x67){
        index = _NALU_SPS_;
    }
    
    if(index!=_NALU_SPS_ && recordCtx->m_vTrackId == MP4_INVALID_TRACK_ID){
        return index;
    }
    if(_naluData[0]==0 && _naluData[1]==0 && _naluData[2]==0 && _naluData[3]==1 && _naluData[4]==0x68){
        index = _NALU_PPS_;
    }
    if(_naluData[0]==0 && _naluData[1]==0 && _naluData[2]==0 && _naluData[3]==1 && _naluData[4]==0x65){
        index = _NALU_I_;
    }
    if(_naluData[0]==0 && _naluData[1]==0 && _naluData[2]==0 && _naluData[3]==1 && _naluData[4]==0x41){
        index = _NALU_P_;
    }
    //
    switch(index){
        case _NALU_SPS_:
            if(recordCtx->m_vTrackId == MP4_INVALID_TRACK_ID){
                recordCtx->m_vTrackId = MP4AddH264VideoTrack
                (recordCtx->m_mp4FHandle,
                 recordCtx->m_vTimeScale,
                 recordCtx->m_vTimeScale / recordCtx->m_vFrateR,
                 recordCtx->m_vWidth,     // width
                 recordCtx->m_vHeight,    // height
                 _naluData[5], // sps[1] AVCProfileIndication
                 _naluData[6], // sps[2] profile_compat
                 _naluData[7], // sps[3] AVCLevelIndication
                 3);           // 4 bytes length before each NAL unit
                if (recordCtx->m_vTrackId == MP4_INVALID_TRACK_ID)  {
                    return -1;
                }
                MP4SetVideoProfileLevel(recordCtx->m_mp4FHandle, 0x7F); //  Simple Profile @ Level 3
            }
            MP4AddH264SequenceParameterSet(recordCtx->m_mp4FHandle,recordCtx->m_vTrackId,_naluData+4,_naluSize-4);
            //
            break;
        case _NALU_PPS_:
            MP4AddH264PictureParameterSet(recordCtx->m_mp4FHandle,recordCtx->m_vTrackId,_naluData+4,_naluSize-4);
            break;
        case _NALU_I_:
        {
            uint8_t * IFrameData = malloc(_naluSize+1);
            //
            IFrameData[0] = (_naluSize-3) >>24;
            IFrameData[1] = (_naluSize-3) >>16;
            IFrameData[2] = (_naluSize-3) >>8;
            IFrameData[3] = (_naluSize-3) &0xff;
    
            memcpy(IFrameData+4,_naluData+3,_naluSize-3);
//            if(!MP4WriteSample(recordCtx->m_mp4FHandle, recordCtx->m_vTrackId, IFrameData, _naluSize+1, recordCtx->m_vFrameDur/44100*90000, 0, 1)){
//                return -1;
//            }
//            recordCtx->m_vFrameDur = 0;
            if(!MP4WriteSample(recordCtx->m_mp4FHandle, recordCtx->m_vTrackId, IFrameData, _naluSize+1, MP4_INVALID_DURATION, 0, 1)){
                return -1;
            }
            free(IFrameData);
            //
            break;
        }
        case _NALU_P_:
        {
            _naluData[0] = (_naluSize-4) >>24;  
            _naluData[1] = (_naluSize-4) >>16;  
            _naluData[2] = (_naluSize-4) >>8;  
            _naluData[3] = (_naluSize-4) &0xff;
            
//            if(!MP4WriteSample(recordCtx->m_mp4FHandle, recordCtx->m_vTrackId, _naluData, _naluSize, recordCtx->m_vFrameDur/44100*90000, 0, 1)){
//                return -1;
//            }
//            recordCtx->m_vFrameDur = 0;
            if(!MP4WriteSample(recordCtx->m_mp4FHandle, recordCtx->m_vTrackId, _naluData, _naluSize, MP4_INVALID_DURATION, 0, 1)){
                return -1;
            }
            break;
        }
    }
    return 0;
}


int mp4AEncode(uint8_t * data ,int len){
    if(recordCtx->m_vTrackId == MP4_INVALID_TRACK_ID){
        return -1;
    }
    MP4WriteSample(recordCtx->m_mp4FHandle, recordCtx->m_aTrackId, data, len , MP4_INVALID_DURATION, 0, 1);
    recordCtx->m_vFrameDur += 1024;
    return 0;
}

void closeMp4Encoder(){
    if(recordCtx){
        if (recordCtx->m_mp4FHandle != MP4_INVALID_FILE_HANDLE) {
            MP4Close(recordCtx->m_mp4FHandle,0);
            recordCtx->m_mp4FHandle = NULL;
        }
        
        free(recordCtx);
        recordCtx = NULL;
    }
    
    printf("ok  : closeMp4Encoder  \n");

}

 

 

 

 

 

 

  • 4
    点赞
  • 8
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值