Android多声道录音研究

原生Android只支持2 channel的录音。可是偏偏会有多mic的需求,比如说语音识别。目前已知TDM协议可以将多mic数据从kernel送到hal,从内核空间搬运到用户空间中。可是原生AudioRecord接口是完全不支持多channel录音数据的采集的,怎么修改,才能让原生进行支持呢?

我们就从AudioRecord的构造函数开始往下研究。无论行不行,都要研究出个所以然来!​我们如果写个录音app,我们一般这么使用AudioRecord:

int sampleRateInHz = 8000;
int audioEncodingBits = AudioFormat.ENCODING_PCM_16BIT;
int recordBufferSize = AudioRecord.getMinBufferSize(sampleRateInHz,     channelConfiguration, audioEncodingBits);
mAudioRecord = new AudioRecord(MediaRecorder.AudioSource.MIC, 
    sampleRateInHz, channelConfiguration, audioEncodingBits,
                    recordBufferSize);

先说AudioRecord构造函数最后一个参数recordBufferSize。来自:

getMinBufferSize

//AudioRecord.java
static public int getMinBufferSize(int sampleRateInHz, int channelConfig, int audioFormat) {
        int channelCount = 0;
        ...
        //根据channelMask得出channelCount
        //这里竟然有个6声道的,估计可以参考下
        case AudioFormat.CHANNEL_IN_5POINT1:
            channelCount = 6;
        ...
        int size = native_get_min_buff_size(sampleRateInHz, channelCount, audioFormat);
        ...
}

native_get_min_buff_size对应android_media_AudioRecord_get_min_buff_size:

//android_media_AudioRecord.cpp
static jint android_media_AudioRecord_get_min_buff_size(JNIEnv *env,  jobject thiz,jint sampleRateInHertz, jint channelCount, jint audioFormat) {
    size_t frameCount = 0;
    audio_format_t format = audioFormatToNative(audioFormat);
    status_t result = AudioRecord::getMinFrameCount(&frameCount,
            sampleRateInHertz,
            format,
            audio_channel_in_mask_from_count(channelCount));
    return frameCount * channelCount *      audio_bytes_per_sample(format);
}

这里传入的format是AudioFormat.ENCODING_PCM_16BIT,根据audio_bytes_per_sample:

//audio.h
static inline size_t audio_bytes_per_sample(audio_format_t format)
{
    ...
    case AUDIO_FORMAT_PCM_16_BIT:
      case AUDIO_FORMAT_IEC61937:
          size = sizeof(int16_t);
    ...
}

audio_bytes_per_sample返回的是sizeof(signed short) = 2.

status_t AudioRecord::getMinFrameCount(   
        size_t* frameCount,
        uint32_t sampleRate,
        audio_format_t format,
        audio_channel_mask_t channelMask)
{
    status_t status = AudioSystem::getInputBufferSize(sampleRate, format, channelMask, &size);
    ...
     //这里需要double一下
    // We double the size of input buffer for ping pong use of record buffer.
    // Assumes audio_is_linear_pcm(format)
    if ((*frameCount = (size * 2) / (audio_channel_count_from_in_mask(channelMask) *
            audio_bytes_per_sample(format))) == 0) {
        ALOGE("Unsupported configuration: sampleRate %u, format %#x, channelMask %#x",
            sampleRate, format, channelMask);
        return BAD_VALUE;
    }
}

getInputBufferSize直接看hal层:

//audio_hw.c
static size_t get_input_buffer_size(uint32_t sample_rate,
                                    audio_format_t format,
                                    int channel_count,
                                    bool is_low_latency)
{
    ...
        //这里是(8000*20)/1000
        size = (sample_rate * AUDIO_CAPTURE_PERIOD_DURATION_MSEC) / 1000;
        size *= sizeof(short) * channel_count;
    ...
}

size = (8000*20)/1000 * 2 * 2 = 640,get_input_buffer_size返回640.

目前这种场景getMinFrameCount取得frameCount = (640 *2) / (2 * 2) = 320

getMinBufferSize将返回320 * 2 * 2 = 1280,调用完构造函数之后,AudioRecord将通过audioBuffSizeCheck将这个值设置生效(函数名字是check,我觉得这个地方不太合理。)

private void audioBuffSizeCheck(int audioBufferSize) throws IllegalArgumentException {                                                                                   
        // NB: this section is only valid with PCM data.
        // To update when supporting compressed formats
        //只支持无压缩的pcm
        int frameSizeInBytes = mChannelCount
            * (AudioFormat.getBytesPerSample(mAudioFormat));
        //检查用户设置的这个值是不是frameSizeInBytes的整数倍
        if ((audioBufferSize % frameSizeInBytes != 0) || (audioBufferSize < 1)) {
            throw new IllegalArgumentException("Invalid audio buffer size " + audioBufferSize
                    + " (frame size " + frameSizeInBytes + ")");
        }
        //存到这里。作为录音数据的buffer
        mNativeBufferSizeInBytes = audioBufferSize;
    }

然后,通过调用native_setup将值传入native层。

//android_media_AudioRecord.cpp
static jint
android_media_AudioRecord_setup
{
    ...
    size_t frameSize = channelCount * bytesPerSample;
    //这里还是上文说的320
    size_t frameCount = buffSizeInBytes / frameSize;
    ...
    const status_t status = lpRecorder->set(
            ...
            frameCount
            ...
            );
}

然后这个函数会调用AudioRecord set接口.

//AudioRecord.cpp
status_t AudioRecord::set(//参数省略)
{
    ...
    //上层请求的frameCount
    // mFrameCount is initialized in openRecord_l
    mReqFrameCount = frameCount;
    ...
    size_t frameCount = mReqFrameCount;
    ...
    //temp有可能会被openRecord修订
    size_t temp = frameCount;
    ...
    sp<IAudioRecord> record = audioFlinger->openRecord(
                                            ...
                                            &temp,
                                            ...
    );
}

然后设置到AudioFlinger端:

//services/audioflinger/Tracks.cpp
AudioFlinger::PlaybackThread::Track::Track(/*省略参数*/)
{
    ...
        if (sharedBuffer == 0) {
        mAudioTrackServerProxy = new AudioTrackServerProxy(mCblk, mBuffer, frameCount,mFrameSize, !isExternalTrack(), sampleRate);
    } else {
        mAudioTrackServerProxy = new StaticAudioTrackServerProxy(mCblk, mBuffer, frameCount,mFrameSize);
    }
    ...
}

本文的主题是研究多声道录音,所以先就此打住。

回到前文,如果需要支持多声道,需要看看第四个参数

channelConfiguration

取值范围只有这些:

public static final int CHANNEL_IN_DEFAULT = 1;
    // These directly match native
    public static final int CHANNEL_IN_LEFT = 0x4;
    public static final int CHANNEL_IN_RIGHT = 0x8;
    public static final int CHANNEL_IN_FRONT = 0x10;
    public static final int CHANNEL_IN_BACK = 0x20;
    public static final int CHANNEL_IN_LEFT_PROCESSED = 0x40;
    public static final int CHANNEL_IN_RIGHT_PROCESSED = 0x80;
    public static final int CHANNEL_IN_FRONT_PROCESSED = 0x100;
    public static final int CHANNEL_IN_BACK_PROCESSED = 0x200;
    public static final int CHANNEL_IN_PRESSURE = 0x400;
    public static final int CHANNEL_IN_X_AXIS = 0x800;
    public static final int CHANNEL_IN_Y_AXIS = 0x1000;
    public static final int CHANNEL_IN_Z_AXIS = 0x2000;
    public static final int CHANNEL_IN_VOICE_UPLINK = 0x4000;
    public static final int CHANNEL_IN_VOICE_DNLINK = 0x8000;
    public static final int CHANNEL_IN_MONO = CHANNEL_IN_FRONT;
    public static final int CHANNEL_IN_STEREO = (CHANNEL_IN_LEFT | CHANNEL_IN_RIGHT);
    /** @hide */
    public static final int CHANNEL_IN_FRONT_BACK = CHANNEL_IN_FRONT | CHANNEL_IN_BACK;
    // CHANNEL_IN_ALL is not yet defined; if added then it should match AUDIO_CHANNEL_IN_ALL

刚开始没看明白为什么这么定义。直到看到了...往下看,后面会说

//AudioRecord.java
public AudioRecord(int audioSource, int sampleRateInHz, int channelConfig, int audioFormat,                                                                              
            int bufferSizeInBytes)
    throws IllegalArgumentException {
        //调用另外一个重载的构造函数
        this((new AudioAttributes.Builder())
                    .setInternalCapturePreset(audioSource)
                    .build(),
                (new AudioFormat.Builder())                 .setChannelMask(getChannelMaskFromLegacyConfig(channelConfig,
                                        true/*allow legacy configurations*/))
                    .setEncoding(audioFormat)
                    .setSampleRate(sampleRateInHz)
                    .build(),
                bufferSizeInBytes,
                AudioManager.AUDIO_SESSION_ID_GENERATE);
    }

注意看这一行:

.setChannelMask(getChannelMaskFromLegacyConfig(channelConfig,
                                        true/*allow legacy configurations*/))

做一个兼容性转换。最终结果还是前面那些。关键是这里是Mask(中文叫掩码)。Android中有很多这种用法

private static int getChannelMaskFromLegacyConfig(int inChannelConfig,
            boolean allowLegacyConfig) {
        int mask;
        switch (inChannelConfig) {
        case AudioFormat.CHANNEL_IN_DEFAULT: // AudioFormat.CHANNEL_CONFIGURATION_DEFAULT
        case AudioFormat.CHANNEL_IN_MONO:
        case AudioFormat.CHANNEL_CONFIGURATION_MONO:
            mask = AudioFormat.CHANNEL_IN_MONO;
            break;
        case AudioFormat.CHANNEL_IN_STEREO:
        case AudioFormat.CHANNEL_CONFIGURATION_STEREO:
            mask = AudioFormat.CHANNEL_IN_STEREO;
            break;
        case (AudioFormat.CHANNEL_IN_FRONT | AudioFormat.CHANNEL_IN_BACK):
            mask = inChannelConfig;
            break;
        default:
            throw new IllegalArgumentException("Unsupported channel configuration.");
        }
​
        if (!allowLegacyConfig && ((inChannelConfig == AudioFormat.CHANNEL_CONFIGURATION_MONO)
                || (inChannelConfig == AudioFormat.CHANNEL_CONFIGURATION_STEREO))) {
            // only happens with the constructor that uses AudioAttributes and AudioFormat
            throw new IllegalArgumentException("Unsupported deprecated configuration.");
        }
​
        return mask;
    }

getChannelMaskFromLegacyConfig根本没对超过2个的声道就行处理。包括AudioFormat里的hide参数:

/** @hide */
    public static final int CHANNEL_IN_5POINT1 = (CHANNEL_IN_LEFT |
            CHANNEL_IN_RIGHT | CHANNEL_IN_FRONT | CHANNEL_IN_BACK |
            CHANNEL_IN_LEFT_PROCESSED | CHANNEL_IN_RIGHT_PROCESSED);

看了是打算先占个位置,将来会支持这种5.1声道的方式。那我们岂不是可以加上个同样的定义,比如说7.1声道:

/** @hide */
    public static final int CHANNEL_IN_7POINT1 = (CHANNEL_IN_LEFT |
            CHANNEL_IN_RIGHT | CHANNEL_IN_FRONT | CHANNEL_IN_BACK |
            CHANNEL_IN_LEFT_PROCESSED | CHANNEL_IN_RIGHT_PROCESSED|
            CHANNEL_IN_FRONT_PROCESSED | CHANNEL_IN_BACK_PROCESSED);

虽然感觉不太对。对应的,getChannelMaskFromLegacyConfig就需要做添加,不然直接抛出IllegalArgumentException:

//AudioRecord.java-getChannelMaskFromLegacyConfig
case AudioFormat.CHANNEL_IN_7POINT1:
            mask = AudioFormat.CHANNEL_IN_7POINT1:
            break;

然后,计算bufferSize的地方也要修改:

case AudioFormat.CHANNEL_IN_7POINT1:
            channelCount = 8;

再往下,貌似是不会被参数检查所拦截了。

只有channelCount够了,frameCount才能对,bufferSize才能对应。不然数据就错乱了。因为声道数,格式等决定了每一帧所需要的缓存空间!

另外一个设置channel_count的地方:

//hal/audio_hw.c
struct pcm_config pcm_config_audio_capture = {
    .channels = 2,
    .period_count = AUDIO_CAPTURE_PERIOD_COUNT,
    .format = PCM_FORMAT_S16_LE,
};

普通录音场景给了个默认的2 channel.然后

static int adev_open_input_stream(struct audio_hw_device *dev,
                                  audio_io_handle_t handle,
                                  audio_devices_t devices,
                                  //注意这个:
                                  struct audio_config *config,
                                  struct audio_stream_in **stream_in,
                                  audio_input_flags_t flags __unused,
                                  const char *address __unused,
                                  audio_source_t source)
{
    ...
    in->config = pcm_config_audio_capture;//此时是默认值2
    ...
    //这里会取出应用层设置下来的channel_count
    int channel_count = audio_channel_count_from_in_mask(config->channel_mask);
    //如果应用设置的不是2.修改之
    in->config.channels = channel_count;
    ...
}

这些都改完之后,全编译一把,然后:

int channelConfiguration = AudioFormat.CHANNEL_IN_7POINT1;
int audioEncodingBits = AudioFormat.ENCODING_PCM_16BIT;
int sampleRateInHz = 8000;
int recordBufferSize = AudioRecord.getMinBufferSize(sampleRateInHz, channelConfiguration, audioEncodingBits);
LogD("recordBufferSize = " + String.valueOf(recordBufferSize));

recordBufferSize = 5120(之前立体声1280的四倍)

表明,修改成功!Oh yeah!当然,这么修改之后,frameCount还是320(5120/(声道数×每个采样的字节数)).知道为啥定义帧的概念了吧。

稍等,好坑!

AudioRecord: set(): inputSource 6, sampleRate 8000, format 0x1, channelMask 0x3fc
...
audio_hw_primary: adev_open_input_stream: enter: sample_rate(8000) channel_mask(0xc)

看这log,AudioRecord:set之后,adev_open_input_stream之前会改变到这个channelMask.跟了下代码,发现是这里在搞怪:

audio_io_handle_t AudioPolicyManager::getInputForDevice(
    ...
    audio_channel_mask_t channelMask,
    ...)
{
    ...
    audio_channel_mask_t profileChannelMask = channelMask;
    for (;;) {
        //就是这里
        profile = getInputProfile(device,   address,profileSamplingRate, profileFormat,     profileChannelMask,profileFlags);
        if (profile != 0) {
            break; // success
        } else if (profileFlags & AUDIO_INPUT_FLAG_RAW) {
            profileFlags = (audio_input_flags_t) (profileFlags & ~AUDIO_INPUT_FLAG_RAW); // retry
        } else if (profileFlags != AUDIO_INPUT_FLAG_NONE) {
            profileFlags = AUDIO_INPUT_FLAG_NONE; // retry
        } else { // fail
            return input;
        }
    }
    ...
}

我们来看看getInputProfile

sp<IOProfile> AudioPolicyManager::getInputProfile(audio_devices_t device,const String8& address,uint32_t& samplingRate,audio_format_t& format,audio_channel_mask_t& channelMask,audio_input_flags_t flags)
{
    // Choose an input profile based on the requested capture parameters: select the first available
    // profile supporting all requested parameters.
    for (size_t i = 0; i < mHwModules.size(); i++)
    {
        if (mHwModules[i]->mHandle == 0) {
            continue;
        }   
        for (size_t j = 0; j < mHwModules[i]->mInputProfiles.size(); j++)
        {
            sp<IOProfile> profile = mHwModules[i]->mInputProfiles[j];
            // profile->log();
            if (profile->isCompatibleProfile(/*一堆参数*/) {
​
                return profile;
            }
        }
        //恕老夫眼拙,没看出来和上面的for循环有什么区别?????
        for (size_t j = 0; j < mHwModules[i]->mInputProfiles.size(); j++)
        {
            sp<IOProfile> profile = mHwModules[i]->mInputProfiles[j];
            // profile->log();
            if (profile->isCompatibleProfile(/*一堆参数同上*/) {
                                              
                return profile;
            }   
        }   
    }   
    return NULL;
}

基于请求的capture参数,选择一个input profile,选中第一个可用的。

看了会儿相关代码,都要跟吐了。不过,我感觉基本就是改

audio_policy_configuration.xml(Android O新加入的)或者audio_policy.conf了,加入8.1声道的支持。比如这样:

<mixPort name="primary input" role="sink">
                    <profile name="" format="AUDIO_FORMAT_PCM_16_BIT"
                             samplingRates="8000,11025,12000,16000,22050,24000,32000,44100,48000"
                             channelMasks="AUDIO_CHANNEL_IN_MONO,AUDIO_CHANNEL_IN_STEREO,AUDIO_CHANNEL_IN_FRONT_BACK,AUDIO_CHANNEL_IN_8"/>

相应的,audo-base也要改一下。

//audio-base.h
//这个值应该要和java定义的对应0x3fc
AUDIO_CHANNEL_IN_8 = 1020u

再后来,发现,读入的时候,需要在这里加一下,不然,没法识别

//libmedia/TypeConverter.cpp
template <>
const InputChannelConverter::Table InputChannelConverter::mTable[] = {
    MAKE_STRING_FROM_ENUM(AUDIO_CHANNEL_IN_MONO),
    MAKE_STRING_FROM_ENUM(AUDIO_CHANNEL_IN_STEREO),
    MAKE_STRING_FROM_ENUM(AUDIO_CHANNEL_IN_FRONT_BACK),
    MAKE_STRING_FROM_ENUM(AUDIO_CHANNEL_IN_6),
    MAKE_STRING_FROM_ENUM(AUDIO_CHANNEL_IN_VOICE_UPLINK_MONO),
    MAKE_STRING_FROM_ENUM(AUDIO_CHANNEL_IN_VOICE_DNLINK_MONO),
    MAKE_STRING_FROM_ENUM(AUDIO_CHANNEL_IN_VOICE_CALL_MONO),
    MAKE_STRING_FROM_ENUM(AUDIO_CHANNEL_IN_8),
    TERMINATOR
};

因为:

//Serializer.cpp
status_t AudioProfileTraits::deserialize(_xmlDoc */*doc*/, const _xmlNode *root, PtrElement &profile,
                                         PtrSerializingCtx /*serializingContext*/)
{
    string samplingRates = getXmlAttribute(root, Attributes::samplingRates);
    string format = getXmlAttribute(root, Attributes::format);
    string channels = getXmlAttribute(root, Attributes::channelMasks);
    profile = new Element(formatFromString(format, gDynamicFormat),
                          //这里
                          channelMasksFromString(channels, ","),
                          samplingRatesFromString(samplingRates, ","));
    
    profile->setDynamicFormat(profile->getFormat() == gDynamicFormat);
    profile->setDynamicChannels(profile->getChannels().isEmpty());
    profile->setDynamicRate(profile->getSampleRates().isEmpty());
    
    return NO_ERROR;
}

这样改完之后。就支持8channel录音了。当然了。。。如果tinyalsa的实现不支持,pcm_open的时候恐怕是要报错的。那就是另外一个话题了.

发布了56 篇原创文章 · 获赞 42 · 访问量 26万+
展开阅读全文

没有更多推荐了,返回首页

©️2019 CSDN 皮肤主题: 技术黑板 设计师: CSDN官方博客

分享到微信朋友圈

×

扫一扫,手机浏览