音频特效滤镜 via Media Foundation Transform
- 音频特效定义
- Media Foundation Transform
- 结语
- 其他框架的滤镜
音频特效定义
音效或声效(Sound effects 或 Audio effects)是人工制造或加强的声音,用来增强对电影、电子游戏、音乐或其他媒体的艺术或其他内容的声音处理。
常见的音效技术有:回声、合唱、均衡(EQ)、过滤、变调、移相、压缩 / 拉伸、3D、调制和共鸣等等。
Media Foundation Transform
MF 中插件是以 MFT 的形式创建的,需要继承 IMFTransform 接口,接口函数如下:
// Methods That Handle Format Negotiation
STDMETHODIMP GetStreamLimits(DWORD *pInputMinimum, DWORD *pInputMaximum, DWORD *pOutputMinimum, DWORD *pOutputMaximum);
STDMETHODIMP GetStreamCount(DWORD *pcInputStreams, DWORD *pcOutputStreams);
STDMETHODIMP GetStreamIDs(DWORD dwInputIDArraySize, DWORD *pdwInputIDs, DWORD dwOutputIDArraySize, DWORD *pdwOutputIDs);
STDMETHODIMP GetInputStreamInfo(DWORD dwInputStreamID, MFT_INPUT_STREAM_INFO *pStreamInfo);
STDMETHODIMP GetOutputStreamInfo(DWORD dwOutputStreamID, MFT_OUTPUT_STREAM_INFO *pStreamInfo);
STDMETHODIMP GetAttributes(IMFAttributes **ppAttributes);
STDMETHODIMP GetInputStreamAttributes(DWORD dwInputStreamID, IMFAttributes **ppAttributes);
STDMETHODIMP GetOutputStreamAttributes(DWORD dwOutputStreamID, IMFAttributes **ppAttributes);
STDMETHODIMP GetInputAvailableType(DWORD dwInputStreamID, DWORD dwTypeIndex, IMFMediaType **ppType);
STDMETHODIMP GetOutputAvailableType(DWORD dwOutputStreamID, DWORD dwTypeIndex, IMFMediaType **ppType);
STDMETHODIMP SetInputType(DWORD dwInputStreamID, IMFMediaType *pType, DWORD dwFlags);
STDMETHODIMP SetOutputType(DWORD dwOutputStreamID, IMFMediaType *pType, DWORD dwFlags);
// Methods That Specify or Retrieve State Information
STDMETHODIMP GetInputCurrentType(DWORD dwInputStreamID, IMFMediaType **ppType);
STDMETHODIMP GetOutputCurrentType(DWORD dwOutputStreamID, IMFMediaType **ppType);
STDMETHODIMP DeleteInputStream(DWORD dwStreamID);
STDMETHODIMP AddInputStreams(DWORD cStreams, DWORD *adwStreamIDs);
STDMETHODIMP GetInputStatus(DWORD dwInputStreamID, DWORD *pdwFlags);
STDMETHODIMP GetOutputStatus(DWORD *pdwFlags);
STDMETHODIMP SetOutputBounds(LONGLONG hnsLowerBound, LONGLONG hnsUpperBound);
// Methods That Handle Buffering and Processing Data
STDMETHODIMP ProcessEvent(DWORD dwInputStreamID, IMFMediaEvent *pEvent);
STDMETHODIMP ProcessMessage(MFT_MESSAGE_TYPE eMessage, ULONG_PTR ulParam);
STDMETHODIMP ProcessInput(DWORD dwInputStreamID, IMFSample *pSample, DWORD dwFlags);
STDMETHODIMP ProcessOutput(DWORD dwFlags, DWORD cOutputBufferCount, MFT_OUTPUT_DATA_BUFFER *pOutputSamples, DWORD *pStatus);
如果要实现一个 异步的 MFT,则还需要继承 IMFMediaEventGenerator 和 IMFShutdown 接口。
// IMFMediaEventGenerator
HRESULT BeginGetEvent(IMFAsyncCallback *pCallback, IUnknown *punkState);
HRESULT EndGetEvent(IMFAsyncResult *pResult, IMFMediaEvent **ppEvent);
HRESULT GetEvent(DWORD dwFlags, IMFMediaEvent **ppEvent);
HRESULT QueueEvent(MediaEventType met, REFGUID guidExtendedType, HRESULT hrStatus, const PROPVARIANT *pvValue);
// IMFShutdown
HRESULT GetShutdownStatus(MFSHUTDOWN_STATUS *pStatus);
HRESULT Shutdown();
下面介绍一下 IMFTransform 的几个比较重要的接口,以一个延迟音频的插件为例。
代码包含在 Windows SDK 7.x 的 samples\multimedia\mediafoundation\mft_audiodelay\ 目录下。
IMFTransform::GetInputStreamInfo 函数
对输入流的要求,比如音频 sample 必须完整且大小固定,可以 in place 直接处理,无需再分配内存,等等。
HRESULT CDelayMFT::GetInputStreamInfo(DWORD dwInputStreamID, MFT_INPUT_STREAM_INFO *pStreamInfo)
{
AutoLock lock(m_critSec);
if (!IsValidInputStream(dwInputStreamID))
return MF_E_INVALIDSTREAMNUMBER;
pStreamInfo->hnsMaxLatency = 0;
pStreamInfo->dwFlags =
MFT_INPUT_STREAM_WHOLE_SAMPLES | // The MFT must get complete audio frames.
MFT_INPUT_STREAM_PROCESSES_IN_PLACE | // The MFT can do in-place processing.
MFT_INPUT_STREAM_FIXED_SAMPLE_SIZE; // Samples (i.e., audio frames) are fixed size.
pStreamInfo->cbSize = 0; // If no media type is set, use zero.
pStreamInfo->cbMaxLookahead = 0;
pStreamInfo->cbAlignment = 0;
// When the media type is set, return the minimum buffer size = one audio frame.
if (IsInputTypeSet())
pStreamInfo->cbSize = BlockAlign();
return S_OK;
}
IMFTransform::GetOutputStreamInfo 函数
输出流的信息,比如音频 sample 是完整且固定大小的,可使用 sample 原来分配的 buffer 或按需自己分配,等等。
HRESULT CDelayMFT::GetOutputStreamInfo(DWORD dwOutputStreamID, MFT_OUTPUT_STREAM_INFO *pStreamInfo)
{
AutoLock lock(m_critSec);
if (!IsValidOutputStream(dwOutputStreamID))
return MF_E_INVALIDSTREAMNUMBER;
pStreamInfo->dwFlags =
MFT_OUTPUT_STREAM_WHOLE_SAMPLES | // Output buffers contain complete audio frames.
MFT_OUTPUT_STREAM_CAN_PROVIDE_SAMPLES | // The MFT can allocate output buffers,
// or use caller-allocated buffers.
MFT_OUTPUT_STREAM_FIXED_SAMPLE_SIZE; // Samples (i.e., audio frames) are fixed size.
pStreamInfo->cbSize = 0; // If no media type is set, use zero.
pStreamInfo->cbAlignment = 0;
// When the media type is set, return the minimum buffer size = one audio frame.
if (m_bOutputTypeSet)
pStreamInfo->cbSize = BlockAlign();
return S_OK;
}
IMFTransform::GetInputAvailableType 函数
获取支持的输入媒体类型,如果输出类型已设置,则要求输入和输出类型一致,否则提供一组可接受的类型。
HRESULT CDelayMFT::GetInputAvailableType(
DWORD dwInputStreamID, // Input stream ID.
DWORD dwTypeIndex, // 0-based index into the list of preferred types.
IMFMediaType **ppType // Receives a pointer to the media type.
)
{
AutoLock lock(m_critSec);
HRESULT hr = S_OK;
if (ppType == NULL)
return E_INVALIDARG;
if (!IsValidInputStream(dwInputStreamID))
return MF_E_INVALIDSTREAMNUMBER;
// If the output type is set, return that type as our preferred input type.
if (IsOutputTypeSet()) {
// Only one preferred type in this case.
if (dwTypeIndex > 0)
return MF_E_NO_MORE_TYPES;
*ppType = m_pMediaType;
(*ppType)->AddRef();
}
else // The output type is not set. Create a partial media type.
hr = GetProposedType(dwTypeIndex, ppType);
return hr;
}
CDelayMFT::GetProposedType 函数
可接受的媒体类型,此处只提供了两种(理论上没有限制):
- 只要是 PCM 类型的音频即可
- 固定了各种音频属性,如采样率、位深、声道等
HRESULT CDelayMFT::GetProposedType(DWORD dwTypeIndex, IMFMediaType **ppmt)
{
if (dwTypeIndex > 1)
return MF_E_NO_MORE_TYPES;
IMFMediaType *pType = NULL;
HRESULT hr = S_OK;
CHECK_HR(hr = MFCreateMediaType(&pType));
CHECK_HR(hr = pType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Audio));
switch (dwTypeIndex) {
case 0: // Partial type: PCM audio
CHECK_HR(hr = pType->SetGUID(MF_MT_SUBTYPE, MFAudioFormat_PCM));
break;
case 1: // Full type: Propose 48 kHz, 16-bit, 2-channel
const UINT32 SamplesPerSec = 48000, BitsPerSample = 16, NumChannels = 2;
const UINT32 BlockAlign = NumChannels * BitsPerSample / 8;
CHECK_HR(hr = pType->SetGUID(MF_MT_SUBTYPE, MFAudioFormat_PCM));
CHECK_HR(hr = pType->SetUINT32(MF_MT_AUDIO_SAMPLES_PER_SECOND, SamplesPerSec));
CHECK_HR(hr = pType->SetUINT32(MF_MT_AUDIO_BITS_PER_SAMPLE, BitsPerSample));
CHECK_HR(hr = pType->SetUINT32(MF_MT_AUDIO_NUM_CHANNELS, NumChannels));
CHECK_HR(hr = pType->SetUINT32(MF_MT_AUDIO_BLOCK_ALIGNMENT, BlockAlign));
CHECK_HR(hr = pType->SetUINT32(MF_MT_AUDIO_AVG_BYTES_PER_SECOND, BlockAlign * SamplesPerSec));
CHECK_HR(hr = pType->SetUINT32(MF_MT_ALL_SAMPLES_INDEPENDENT, TRUE));
break;
}
*ppmt = pType;
(*ppmt)->AddRef();
done:
SAFE_RELEASE(pType);
return hr;
}
IMFTransform::SetInputType 函数
MF session 尝试设置某个输入媒体类型到该 MFT,可以是 Test Only 的,如果接受该类型,则返回成功,否则返回失败。
HRESULT CDelayMFT::SetInputType( DWORD dwInputStreamID,
IMFMediaType *pType, // Can be NULL to clear the input type.
DWORD dwFlags)
{
AutoLock lock(m_critSec);
if (!IsValidInputStream(dwInputStreamID))
return MF_E_INVALIDSTREAMNUMBER;
if (dwFlags & ~MFT_SET_TYPE_TEST_ONLY)
return E_INVALIDARG;
if ((dwFlags & MFT_SET_TYPE_TEST_ONLY) && (pType == NULL))
return E_INVALIDARG;
// If we have output, the client cannot change the type now.
if (HasPendingOutput())
return MF_E_TRANSFORM_CANNOT_CHANGE_MEDIATYPE_WHILE_PROCESSING;
HRESULT hr = S_OK;
if (pType)
CHECK_HR(hr = OnCheckInputType(pType));
BOOL bReallySet = ((dwFlags & MFT_SET_TYPE_TEST_ONLY) == 0);
if (bReallySet)
CHECK_HR(hr = OnSetMediaType(pType, InputStream));
done:
return hr;
}
CDelayMFT::OnCheckInputType 函数
检查输入类型,如果已设置输出类型,则要求一致,否则做音频属性检查(ValidatePCMAudioType)。
HRESULT CDelayMFT::OnCheckInputType(IMFMediaType *pmt)
{
assert(pmt != NULL);
HRESULT hr = S_OK;
// If the output type is set, see if they match.
if (IsOutputTypeSet()) {
DWORD flags = 0;
hr = pmt->IsEqual(m_pMediaType, &flags);
// IsEqual can return S_FALSE. Treat this as failure.
if (hr != S_OK)
hr = MF_E_INVALIDMEDIATYPE;
}
else {
// Output type is not set. Just check this type.
hr = ValidatePCMAudioType(pmt);
}
return hr;
}
CDelayMFT::ValidatePCMAudioType 函数
检测音频参数(声道、采样率、位深等等)是否符合该 MFT 的要求。
HRESULT ValidatePCMAudioType(IMFMediaType *pmt)
{
HRESULT hr = S_OK;
GUID majorType = GUID_NULL;
CHECK_HR(hr = pmt->GetGUID(MF_MT_MAJOR_TYPE, &majorType));
GUID subtype = GUID_NULL;
CHECK_HR(hr = pmt->GetGUID(MF_MT_SUBTYPE, &subtype));
UINT32 nChannels = 0;
CHECK_HR(hr = pmt->GetUINT32(MF_MT_AUDIO_NUM_CHANNELS, &nChannels));
UINT32 nSamplesPerSec = 0;
CHECK_HR(hr = pmt->GetUINT32(MF_MT_AUDIO_SAMPLES_PER_SECOND, &nSamplesPerSec));
UINT32 nAvgBytesPerSec = 0;
CHECK_HR(hr = pmt->GetUINT32(MF_MT_AUDIO_AVG_BYTES_PER_SECOND, &nAvgBytesPerSec));
UINT32 nBlockAlign = 0;
CHECK_HR(hr = pmt->GetUINT32(MF_MT_AUDIO_BLOCK_ALIGNMENT, &nBlockAlign));
UINT32 wBitsPerSample = 0;
CHECK_HR(hr = pmt->GetUINT32(MF_MT_AUDIO_BITS_PER_SAMPLE, &wBitsPerSample));
// Validate the values.
if (nChannels != 1 && nChannels != 2)
CHECK_HR(hr = MF_E_INVALIDMEDIATYPE);
if (wBitsPerSample != 8 && wBitsPerSample != 16)
CHECK_HR(hr = MF_E_INVALIDMEDIATYPE);
if (nBlockAlign != nChannels * (wBitsPerSample / 8)) // Make sure block alignment was calculated correctly.
CHECK_HR(hr = MF_E_INVALIDMEDIATYPE);
if (nSamplesPerSec > (DWORD)(MAXDWORD / nBlockAlign)) // Check possible overflow...
CHECK_HR(hr = MF_E_INVALIDMEDIATYPE);
if (nAvgBytesPerSec != nSamplesPerSec * nBlockAlign) // Make sure average bytes per second was correct.
CHECK_HR(hr = MF_E_INVALIDMEDIATYPE);
done:
return hr;
}
CDelayMFT::OnSetMediaType 函数
设置或清除当前媒体类型。
HRESULT CDelayMFT::OnSetMediaType(IMFMediaType *pType, StreamDirection dir)
{
BOOL bInputType = (dir == InputStream);
// Note: This MFT requires the input type to match the output type.
if (pType) {
SAFE_RELEASE(m_pMediaType);
m_pMediaType = pType;
m_pMediaType->AddRef();
if (bInputType)
m_bInputTypeSet = TRUE;
else
m_bOutputTypeSet = TRUE;
}
else { // Clear the media type.
if (bInputType)
m_bInputTypeSet = FALSE;
else
m_bOutputTypeSet = FALSE;
// If both types (input and output) are not set, we can release our media type pointer.
if (!m_bOutputTypeSet)
SAFE_RELEASE(m_pMediaType);
}
return S_OK;
}
IMFTransform::SetOutputType 函数
MF session 尝试设置某个输出媒体类型到该 MFT,可以是 Test Only 的,如果接受该类型,则返回成功,否则返回失败。检查过程类似 SetInputType,故此处不展开。
HRESULT CDelayMFT::SetOutputType( DWORD dwOutputStreamID,
IMFMediaType *pType, // Can be NULL to clear the output type.
DWORD dwFlags )
{
AutoLock lock(m_critSec);
if (!IsValidOutputStream(dwOutputStreamID))
return MF_E_INVALIDSTREAMNUMBER;
if (dwFlags & ~MFT_SET_TYPE_TEST_ONLY)
return E_INVALIDARG;
if ((dwFlags & MFT_SET_TYPE_TEST_ONLY) && (pType == NULL))
return E_INVALIDARG;
if (HasPendingOutput()) // If we have output, the client cannot change the type now.
return MF_E_TRANSFORM_CANNOT_CHANGE_MEDIATYPE_WHILE_PROCESSING;
HRESULT hr = S_OK;
// Does the caller want us to set the type, or just test it?
BOOL bReallySet = ((dwFlags & MFT_SET_TYPE_TEST_ONLY) == 0);
if (pType)
CHECK_HR(hr = OnCheckOutputType(pType));
if (bReallySet) // The type is OK. Set or clear the type, unless the caller was just testing.
CHECK_HR(hr = OnSetMediaType(pType, OutputStream));
done:
return hr;
}
IMFTransform::ProcessMessage 函数
处理发送到该 MFT 的消息。
HRESULT CDelayMFT::ProcessMessage(MFT_MESSAGE_TYPE eMessage, ULONG_PTR ulParam)
{
AutoLock lock(m_critSec);
HRESULT hr = S_OK;
switch (eMessage) {
case MFT_MESSAGE_COMMAND_FLUSH:
FreeStreamingResources(TRUE);
break;
case MFT_MESSAGE_NOTIFY_BEGIN_STREAMING:
hr = AllocateStreamingResources();
break;
case MFT_MESSAGE_NOTIFY_END_STREAMING:
FreeStreamingResources(FALSE);
break;
case MFT_MESSAGE_COMMAND_DRAIN:
// Tells the MFT not to accept any more input until all of the pending output be processed.
hr = OnDrain();
break;
case MFT_MESSAGE_NOTIFY_END_OF_STREAM:
case MFT_MESSAGE_NOTIFY_START_OF_STREAM:
break;
}
return hr;
}
IMFTransform::ProcessInput 函数
处理一个输入的 sample,这里主要是把该 sample 缓存起来并得到 IMFMediaBuffer 接口。
HRESULT CDelayMFT::ProcessInput( DWORD dwInputStreamID, IMFSample *pSample, DWORD dwFlags )
{
AutoLock lock(m_critSec);
if (pSample == NULL)
return E_POINTER;
if (!IsValidInputStream(dwInputStreamID))
return MF_E_INVALIDSTREAMNUMBER;
if (!IsInputTypeSet() || !IsOutputTypeSet())
return MF_E_NOTACCEPTING;
if (HasPendingOutput()) // Not accepting input because there is still data to process.
return MF_E_NOTACCEPTING;
HRESULT hr = S_OK;
DWORD dwBufferCount = 0;
CHECK_HR(hr = pSample->GetBufferCount(&dwBufferCount));
CHECK_HR(hr = pSample->ConvertToContiguousBuffer(&m_pBuffer));
m_pSample = pSample;
m_pSample->AddRef();
LONGLONG hnsTime = 0;
if (SUCCEEDED(pSample->GetSampleTime(&hnsTime))) {
m_bValidTime = TRUE;
m_rtTimestamp = hnsTime;
}
else
m_bValidTime = FALSE;
done:
return hr;
}
IMFTransform::ProcessOutput 函数
处理一个输出 sample 的请求,如果尚未有输入,则请求输入(返回 MF_E_TRANSFORM_NEED_MORE_INPUT),否则进行处理(特效算法)。
HRESULT CDelayMFT::ProcessOutput( DWORD dwFlags, DWORD cOutputBufferCount,
MFT_OUTPUT_DATA_BUFFER *pOutputSamples, DWORD *pdwStatus )
{
AutoLock lock(m_critSec);
if (pOutputSamples == NULL || pdwStatus == NULL)
return E_POINTER;
if (cOutputBufferCount != 1) // The MFT has exactly one output stream.
return E_INVALIDARG;
if (!HasPendingOutput())
return MF_E_TRANSFORM_NEED_MORE_INPUT;
// We can deliver output, so one or both of the following is true:
// 1. We have an input buffer.
// 2. We are draining the effect tail.
// If both are true, process the input buffer before the tail.
assert((m_pBuffer && m_pSample) || (m_bDraining && (m_cbTailSamples > 0)));
HRESULT hr = S_OK;
if (m_pBuffer) // Process the input.
hr = InternalProcessOutput(pOutputSamples[0], pdwStatus);
else // Output the effect tail.
hr = ProcessEffectTail(pOutputSamples[0], pdwStatus);
return hr;
}
CDelayMFT::InternalProcessOutput 函数
得到输入输出 sample 的 buffer 指针,然后进行处理(ProcessAudio 函数,此处略过),最后设置 timestamp 和其他需要的信息到输出 sample。
HRESULT CDelayMFT::InternalProcessOutput(MFT_OUTPUT_DATA_BUFFER& OutputSample, DWORD *pdwStatus)
{
IMFMediaBuffer *pOutputBuffer = NULL;
HRESULT hr = S_OK;
BYTE *pbOutputData = NULL; // Pointer to the memory in the output buffer.
DWORD cbOutputLength = 0; // Size of the output buffer.
DWORD cbBytesProcessed = 0; // How much data we processed.
BOOL bComplete = FALSE; // Are we done with the input buffer?
if (m_pbInputData == NULL)
CHECK_HR(hr = m_pBuffer->Lock(&m_pbInputData, NULL, &m_cbInputLength));
if (OutputSample.pSample != NULL) { // If the client provided an output sample, get the output buffer.
CHECK_HR(hr = OutputSample.pSample->GetBufferByIndex(0, &pOutputBuffer));
CHECK_HR(hr = pOutputBuffer->Lock(&pbOutputData, &cbOutputLength, NULL));
}
else { // Client did not provide an output sample. Use the input buffer and transform the data in place.
pbOutputData = m_pbInputData;
cbOutputLength = m_cbInputLength;
OutputSample.pSample = m_pSample; // Return the input sample as the output sample.
OutputSample.pSample->AddRef();
}
// Calculate how many audio samples we can process.
if (m_cbInputLength > cbOutputLength)
cbBytesProcessed = cbOutputLength;
else {
cbBytesProcessed = m_cbInputLength;
bComplete = TRUE;
}
cbBytesProcessed -= (cbBytesProcessed % BlockAlign()); // Round to the next lowest multiple of nBlockAlign.
CHECK_HR(hr = ProcessAudio(pbOutputData, m_pbInputData, cbBytesProcessed / nBlockAlign));
if (pOutputBuffer) { // Update the output buffer/sample (if provided)
CHECK_HR(hr = pOutputBuffer->SetCurrentLength(cbBytesProcessed));
if (m_bValidTime) { // Set the time stamp, if we have a valid time from the input sample.
LONGLONG hnsDuration = (cbBytesProcessed / AvgBytesPerSec()) * UNITS;
CHECK_HR(hr = OutputSample.pSample->SetSampleTime(m_rtTimestamp));
CHECK_HR(hr = OutputSample.pSample->SetSampleDuration(hnsDuration));
m_rtTimestamp += hnsDuration;
}
}
OutputSample.dwStatus = 0;
*pdwStatus = 0;
if (bComplete) { // We are done with this input buffer. Release it.
CHECK_HR(hr = m_pBuffer->Unlock());
SAFE_RELEASE(m_pBuffer);
SAFE_RELEASE(m_pSample);
m_pbInputData = 0;
m_cbInputLength = 0;
}
else { // There is still data in the input buffer.
m_cbInputLength -= cbBytesProcessed; // Update the running count of bytes processed.
m_pbInputData += cbBytesProcessed;
OutputSample.dwStatus |= MFT_OUTPUT_DATA_BUFFER_INCOMPLETE; // Notify the caller that we have more output to produce.
}
done:
if (pOutputBuffer && pbOutputData)
pOutputBuffer->Unlock();
SAFE_RELEASE(pOutputBuffer);
return hr;
}
结语
以上是同步 MFT 的一般实现方式,如果是异步 MFT,则需要实现 IMFMediaEventGenerator 接口以监听和发送 event 来处理输入输出,虽然没有同步 MFT 来的简单直观,但如果该 MFT 需要长时间处理时,用异步 MFT 就能有效避免帧率低的问题。
其他框架的滤镜
– EOF –