音频采集 via Media Foundation

Media Foundation 简介

Media Foundation (简称 MF)是微软在 Windows Vista上 推出的新一代多媒体应用库,目的是提供 Windows 平台一个统一的多媒体影音解决方案,开发者可以通过 MF 播放视频或声音文件、进行多媒体文件格式转码,或者将一连串图片编码为视频等等。
MF 是 DirectShow 为主的旧式多媒体应用程序接口的替代者与继承者,在微软的计划下将逐步汰换 DirectShow 技术。MF 要求 Windows Vista 或更高版本,不支持较早期的 Windows 版本,特别是 Windows XP。
MF 长于高质量的音频和视频播放,高清内容(如 HDTV,高清电视)和数字版权管理(DRM)访问控制。MF 在不同的 Windows 版本上能力不同,如 Windows 7 上就添加了 h.264 编码支持。Windows 8 上则提供数种更高质量的设置。

Media Foundation 采集音频

采集流程图

MF cap audio

采集代码

以下是整个 MF 采集过程的概要代码,略去设备枚举和 CMFCapture 类的实现。

hr = CoInitializeEx(NULL, COINIT_APARTMENTTHREADED | COINIT_DISABLE_OLE1DDE);
hr = MFStartup(MF_VERSION);

// Enumerate the capture devices.
hr = UpdateDeviceList(hDlg, true);

EncodingParameters audEncParam;
audEncParam.subType = _getSubType(hDlg, true);
audEncParam.bitRate = TARGET_AUD_BIT_RATE;

CComPtr<IMFActivate> pAudActivate = NULL;
hr = GetSelectedDevice(hDlg, &pAudActivate, true);

hr = CMFCapture::CreateInstance(hDlg, &g_pCapture);
hr = g_pCapture->startCapture(pAudActivate, &audEncParam, pszFile);

// Capturing ...  

hr = g_pCapture->stopCapture();

g_pCapture->Release();
MFShutdown();
CoUninitialize();

MMDeviceHelper::enumAudCapDevices 函数

此处使用了 _enumMFDevices 传参的形式获取音频设备,因为该函数还可以枚举视频设备。

HRESULT MMDeviceHelper::enumAudCapDevices()
{
    return _enumDevices(MF_DEVSOURCE_ATTRIBUTE_SOURCE_TYPE_AUDCAP_GUID);
}

HRESULT MMDeviceHelper::_enumMFDevices(const GUID& devType)
{
    HRESULT hr = S_OK;
    CComPtr<IMFAttributes> pAttributes = NULL;
    clear();
    
    // Initialize an attribute store. We will use this to specify the enumeration parameters.
    hr = MFCreateAttributes(&pAttributes, 1);
    RETURN_IF_FAILED(hr);
    
    hr = pAttributes->SetGUID(MF_DEVSOURCE_ATTRIBUTE_SOURCE_TYPE, devType);
    RETURN_IF_FAILED(hr);
    
    hr = MFEnumDeviceSources(pAttributes, &m_ppDevices, &m_cDevices);
    RETURN_IF_FAILED(hr);
    
    return hr;
}

CMFCapture::startCapture 函数

首先创建一个 Sink Writer 并开始写入,接着配置音频输入并开始读取 sample。

HRESULT CMFCapture::startCapture(IMFActivate *pAudioAct, EncodingParameters* pAudEncParam, LPCTSTR pszFileName)
{
    HRESULT hr = S_OK;
    SyncUtil::AutoLock lock(m_critsec);
    
    hr = MFCreateSinkWriterFromURL(pszFileName, NULL, NULL, &m_pWriter);
    RETURN_IF_MF_FAILED(hr);
    
    m_bFirstSample = TRUE;
    m_llBaseTime = 0;
    
    hr = _configAudioCapture(pAudioAct, pAudEncParam);
    GOTO_LABEL_IF_FAILED(hr, OnErr);
    
    hr = m_pWriter->BeginWriting();
    GOTO_LABEL_IF_FAILED(hr, OnErr);
    
    hr = m_pAudioReader->ReadSample((DWORD)MF_SOURCE_READER_FIRST_AUDIO_STREAM, 0, NULL, NULL, NULL, NULL);
    GOTO_LABEL_IF_FAILED(hr, OnErr);
    
    m_isCapturing = true;
    return hr;
OnErr:
    SAFE_RELEASE(m_pAudioReader);
    SAFE_RELEASE(m_pWriter);
    return hr;
}
CMFCapture::_configAudioCapture 函数

创建并配置音频 source reader 及 encoder。

HRESULT CMFCapture::_configAudioCapture( IMFActivate *pActivate, EncodingParameters* pEncParam )
{
    HRESULT hr = E_FAIL;
    CComPtr<IMFMediaSource> pSource = NULL;
    CComPtr<IMFMediaType> pType = NULL;
    DWORD sinkStream = 0;
    SyncUtil::AutoLock lock(m_critsec);
    
    hr = pActivate->ActivateObject(__uuidof(IMFMediaSource), (void**)&pSource);
    RETURN_IF_FAILED(hr);
    
    hr = createSrcReader(pSource, m_pAudioReader, this);
    RETURN_IF_FAILED(hr);
    
    hr = configSrcReader(m_pAudioReader, true);
    RETURN_IF_FAILED(hr);
    
    hr = m_pAudioReader->GetCurrentMediaType((DWORD)MF_SOURCE_READER_FIRST_AUDIO_STREAM, &pType);
    RETURN_IF_FAILED(hr);
    
    hr = configAudioEncoder(pType, pEncParam, pType, m_pWriter, &sinkStream);
    RETURN_IF_FAILED(hr);    
    m_audioStreamIdx = sinkStream;
    
    hr = m_pWriter->SetInputMediaType(sinkStream, pType, NULL);
    RETURN_IF_MF_FAILED(hr);
    
    return S_OK;
}
createSrcReader 函数

创建 source reader 并指定回调接口。

HRESULT createSrcReader(IMFMediaSource *pSource, IMFSourceReader*& pReader, IUnknown* pCallback)
{
    HRESULT hr = S_OK;
    
    CComPtr<IMFAttributes> pAttributes = NULL;
    hr = MFCreateAttributes(&pAttributes, 2);
    RETURN_IF_FAILED(hr);
    
    hr = pAttributes->SetUnknown(MF_SOURCE_READER_ASYNC_CALLBACK, pCallback);
    RETURN_IF_FAILED(hr);
    
    /*
    By default, when the application releases the source reader, 
    the source reader shuts down the media source by calling IMFMediaSource::Shutdown on the media source. 
    At that point, the application can no longer use the media source.
    However, if the MF_SOURCE_READER_DISCONNECT_MEDIASOURCE_ON_SHUTDOWN attribute is TRUE, 
    the source reader does not shut down the media source. 
    That means the application can still use the media source after the application releases the source reader.
    */
    hr = pAttributes->SetUINT32(MF_SOURCE_READER_DISCONNECT_MEDIASOURCE_ON_SHUTDOWN, TRUE);
    RETURN_IF_FAILED(hr);
    
    hr = MFCreateSourceReaderFromMediaSource(pSource, pAttributes, &pReader);
    RETURN_IF_MF_FAILED(hr);
    
    return hr;
}
configSrcReader 函数

设置 source reader 的输出媒体类型。

HRESULT configSrcReader(IMFSourceReader *pReader)
{
    // The list of acceptable types.
    std::vector<GUID> subTypes;
    subTypes.push_back(MFAudioFormat_Float);
    subTypes.push_back(MFAudioFormat_PCM);

    HRESULT hr = S_OK;
    BOOL bUseNativeType = FALSE;
    GUID subType = { 0 };
    CComPtr<IMFMediaType> pType = NULL;

    // If the source's native format matches any of the formats in the list, prefer the native format.

    // Note: The source might support multiple output formats. 
    // The application could provide a list to the user and have the user select the output format.
    // That is outside the scope of this sample, however.

    for (int i = 0; ; ++i) {
        hr = pReader->GetNativeMediaType(MF_SOURCE_READER_FIRST_AUDIO_STREAM, i, &pType);
        if (FAILED(hr))
            break;

        hr = pType->GetGUID(MF_MT_SUBTYPE, &subType);
        DL_MEDIA_TYPE(subType);
        if (FAILED(hr))
            break;

        for (UINT32 i = 0; i < subTypes.size(); i++) {
            if (subType == subTypes[i]) {
                hr = pReader->SetCurrentMediaType(MF_SOURCE_READER_FIRST_AUDIO_STREAM, NULL, pType);
                if (SUCCEEDED(hr)) {
                    bUseNativeType = TRUE;
                    break;
                }
            }
        }

        if (bUseNativeType)
            break;
        else
            pType = NULL;
    }

    if (!bUseNativeType) {
        // None of the native types worked. The source might offer output a compressed type.
        // Try adding a decoder.
        if (pType == NULL) {
            hr = pReader->GetNativeMediaType(MF_SOURCE_READER_FIRST_AUDIO_STREAM, 0, &pType);
            RETURN_IF_FAILED(hr);
        }

        for (UINT32 i = 0; i < subTypes.size(); i++) {
            hr = pType->SetGUID(MF_MT_SUBTYPE, subTypes[i]);
            RETURN_IF_FAILED(hr);

            hr = pReader->SetCurrentMediaType(MF_SOURCE_READER_FIRST_AUDIO_STREAM, NULL, pType);
            if (SUCCEEDED(hr))
                break;
        }
    }

    return hr;
}
configAudioEncoder 函数

设置输出音频媒体格式,MF 会自动选择合适的编码器。

HRESULT configAudioEncoder(IMFMediaType* pSrcType, EncodingParameters* params, IMFSinkWriter *pWriter, DWORD *pStreamIdx)
{
    RETURN_IF_NULL(pSrcType);
    RETURN_IF_NULL(params);
    RETURN_IF_NULL(pWriter);
    RETURN_IF_NULL(pStreamIdx);
    HRESULT hr = S_OK;
    CComPtr<IMFMediaType> pTargetType = NULL;

    hr = makeTargetAudioType(pSrcType, params->subType, &pTargetType);
    RETURN_IF_FAILED(hr);

    // some encoder might return error if setting an invalid bitRate.
    hr = pTargetType->SetUINT32(MF_MT_AVG_BITRATE, params->bitRate);
    RETURN_IF_FAILED(hr);

    hr = pWriter->AddStream(pTargetType, pStreamIdx);
    RETURN_IF_FAILED(hr);

    return hr;
}
makeTargetAudioType 函数

选择一个和输入媒体类型(未编码)最接近(声道、采样率、位深度等等)的输出媒体类型(编码后)。

HRESULT makeTargetAudioType( IMFMediaType* pInputType, GUID& targetSubType, IMFMediaType** ppTargetType )
{
    RETURN_IF_NULL(pInputType);
    RETURN_IF_NULL(ppTargetType);
    HRESULT hr = E_FAIL; 
    CComPtr<IMFMediaType> spBestMatchType;

    CComPtr<IMFCollection> spTypeCollection;
    hr = ::MFTranscodeGetAudioOutputAvailableTypes( targetSubType, MFT_ENUM_FLAG_ALL, NULL, &spTypeCollection );
    RETURN_IF_FAILED(hr);

    DWORD cTypes;
    hr = spTypeCollection->GetElementCount(&cTypes);
    RETURN_IF_FAILED(hr);

    for (DWORD i = 0; i < cTypes; i++) {
        CComPtr<IUnknown> spTypeUnk;
        CComPtr<IMFMediaType> spType;

        hr = spTypeCollection->GetElement(i, &spTypeUnk);
        hr = spTypeUnk->QueryInterface( IID_PPV_ARGS(&spType) );

        if (NULL == spBestMatchType || isBetterAudioTypeMatch(pInputType, spType, spBestMatchType)) {
            spBestMatchType = spType;
        }
    }
    RETURN_IF_NULL(spBestMatchType);

    *ppTargetType = spBestMatchType.Detach();
    return S_OK;
}

IMFSourceReaderCallback::OnReadSample 回调函数

Source reader 读取到一个 sample 后回调此函数,设置完时间戳后即交给 Sink Writer 编码并写入文件。

HRESULT CMFCapture::OnReadSample(HRESULT hrStatus, DWORD, DWORD, LONGLONG llTimeStamp, IMFSample *pSample)
{
    if (!isCapturing())
        return S_OK;
        
    SyncUtil::AutoLock lock(m_critsec);
    if (NULL == m_pWriter)
        return S_OK;
        
    HRESULT hr = S_OK;
    RETURN_IF_FAILED(hrStatus);
    
    if (NULL != pSample) {
        if (m_bFirstSample) {
            m_llBaseTime = llTimeStamp;
            m_bFirstSample = FALSE;
        }
        
        llTimeStamp -= m_llBaseTime;
        hr = pSample->SetSampleTime(llTimeStamp);
        RETURN_IF_FAILED(hr);
        
        hr = m_pWriter->WriteSample(m_audioStreamIdx, pSample);
        RETURN_IF_FAILED(hr);
    }
    
    hr = m_pAudioReader->ReadSample((DWORD)MF_SOURCE_READER_FIRST_AUDIO_STREAM, 0, NULL, NULL, NULL, NULL);
    RETURN_IF_FAILED(hr);
    
    return S_OK;
}

CMFCapture::stopCapture 函数

在 Flush Source Reader 的时候,如果不等待 IMFSourceReaderCallback::OnFlush 回调完成,可能会造成死锁。

HRESULT CMFCapture::stopCapture()
{ 
    HRESULT hr = S_OK;
    m_isCapturing = false;
    SyncUtil::AutoLock lock(m_critsec);
    
    if (NULL != m_pWriter) {
        m_pWriter->Flush(m_audioStreamIdx);
        
        hr = m_pWriter->Finalize();
        SAFE_RELEASE(m_pWriter);
    }
    
    if (NULL != m_pAudioReader) {
        m_pAudioReader->Flush(MF_SOURCE_READER_FIRST_AUDIO_STREAM);
        WaitForSingleObject(m_hFlushedEvent, 3000);
        SAFE_RELEASE(m_pAudioReader);
    }
    
    return hr;
}

TDMETHODIMP OnFlush(DWORD dwStreamIndex)
{
    SetEvent(m_hFlushedEvent);
    return S_OK;
}

Tools

TopoEdit

TopoEdit 是用于构建和测试 MF Topology 的可视化工具,包含在 Windows SDK 7.x 中(Bin/TopoEdit.exe)。使用 TopoEdit,可以:

  • 通过添加各种 Topology Node(如 source, transform 和 output node)来构建 Topology
  • 连接并决定 topology 结构
  • 通过回放测试 topology 的功能
    TopoEdit

MFTrace

MFTrace 工具是 Windows 7 SDK 的一部分(Bin/MFTrace.exe)。 MFTrace 可以 hook 正在运行的 MF 应用程序并从中接收详细的跟踪信息。 从 traces 中可以看到 topology 的完整连接,以及找出程序执行过程中出错的地方。
MFTrace

TextAnalysisTool

TextAnalysisTool.NET 工具可以加载日志文件,允许输入字符串模式以过滤掉或突出显示 log 信息。
TextAnalysisTool.net

其他框架下的采集

请参考对应的文章。

Blueware
EOF

  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值