ffmpeg抓取麦克风音频并转为G711格式(C语言)

一、获取麦克风设备

#include <stdio.h>
#include <dshow.h>
#include <string>
#include <Windows.h>
#include <comutil.h>

#pragma comment(lib, "Strmiids.lib")
#pragma comment(lib, "comsuppw.lib")

#define MAX_FRIENDLY_NAME_LENGTH    128

int GetAudioInputDevice(char* device)
{
    HRESULT hr;
    // 初始化COM
    hr = CoInitializeEx(NULL, COINIT_APARTMENTTHREADED);
    if (FAILED(hr))
    {
        return -1;
    }

    // 创建系统设备枚举器实例
    ICreateDevEnum *pSysDevEnum = NULL;
    hr = CoCreateInstance(CLSID_SystemDeviceEnum, NULL, CLSCTX_INPROC_SERVER, IID_ICreateDevEnum, (void **)&pSysDevEnum);
    if (FAILED(hr))
    {
        CoUninitialize();
        return -1;
    }

    // 获取设备类枚举器
    IEnumMoniker *pEnumCat = NULL;
    hr = pSysDevEnum->CreateClassEnumerator(CLSID_AudioInputDeviceCategory, &pEnumCat, 0);
    if (hr == S_OK)
    {
        // 枚举设备名称
        IMoniker *pMoniker = NULL;
        ULONG cFetched;
        int i = 0;
        while (pEnumCat->Next(1, &pMoniker, &cFetched) == S_OK)
        {
            IPropertyBag *pPropBag;
            hr = pMoniker->BindToStorage(NULL, NULL, IID_IPropertyBag, (void **)&pPropBag);
            if (SUCCEEDED(hr))
            {
                // 获取设备友好名
                VARIANT varName;
                VariantInit(&varName);
                hr = pPropBag->Read(L"FriendlyName", &varName, NULL);
                if (SUCCEEDED(hr))
                {
                    StringCchCopy(device, MAX_FRIENDLY_NAME_LENGTH, _com_util::ConvertBSTRToString(varName.bstrVal));
                }
                VariantClear(&varName);
                pPropBag->Release();

                break;
            }
            pMoniker->Release();
        } // End for While
        pEnumCat->Release();
    }
    pSysDevEnum->Release();
    CoUninitialize();

    return 0;
}

// ANSI转UTF-8,后面初始化解码时使用

std::string AnsiToUTF8( const std::string& strIn, std::string& strOut )
{
    WCHAR* strSrc    = NULL;
    TCHAR* szRes    = NULL;

    int len = MultiByteToWideChar(CP_ACP, 0, (LPCSTR)strIn.c_str(), -1, NULL,0);

    unsigned short* wszUtf8 = new unsigned short[len+1];
    memset(wszUtf8, 0, len * 2 + 2);
    MultiByteToWideChar(CP_ACP, 0, (LPCSTR)strIn.c_str(), -1, (LPWSTR)wszUtf8, len);

    len = WideCharToMultiByte(CP_UTF8, 0, (LPCWSTR)wszUtf8, -1, NULL, 0, NULL, NULL);

    char* szUtf8 = new char[len + 1];
    memset(szUtf8, 0, len + 1);
    WideCharToMultiByte (CP_UTF8, 0, (LPCWSTR)wszUtf8, -1, szUtf8, len, NULL,NULL);

    strOut = szUtf8;

    delete[] szUtf8;
    delete[] wszUtf8;

    return strOut;
}

二、抓取音频并转为G711

1、结构体定义   

typedef struct  
{
    AVCodecContext        *pEncCodecCtx;
    AVFrame                *pEncFrame;
    uint8_t                *pEncFrameBuf;
    
    int                    iAudioIndex;

    AVFormatContext        *pDecFmtCtx;
    AVCodecContext        *pDecCodecCtx;
    AVPacket            *pDecPacket;
    AVFrame                *pDecFrame;

    int                    src_chl_layout;
    int                    dst_chl_layout;
    int                    src_sample_rate;
    int                    dst_sample_rate;

    int                    frameSize;
    AVAudioFifo*        audiofifo;

    struct SwrContext    *swr_ctx;
    int                    src_nb_samples;
    int                    dst_nb_samples;
    int                    max_dst_nb_samples;
    int                    dst_nb_channels;
    int                    dst_linesize;
    uint8_t                **dst_data;

    audio_data_call_back    cb;
    void*                    usr;
}ST_FFAUDIO;

static ST_FFAUDIO s_st_ff;

static int s_ichannel = 1;        //通道数
static int s_sample = 8000;    //采样率
static int s_type = 0;               //类型 A - U

2、ffmpeg初始化 

avformat_network_init();
avdevice_register_all();

3、初始化编码器

static int init_enc(int ichannel)
{
    AVCodec *pCodec;
    int size=0;

    AVCodecID codec_id = AV_CODEC_ID_PCM_ALAW;   //G711A

    if (s_type == 1)
    {
        codec_id = AV_CODEC_ID_PCM_MULAW;  //G711U
    }

    pCodec = avcodec_find_encoder(codec_id);
    if (!pCodec) {
        return -1;
    }
    s_st_ff.pEncCodecCtx = avcodec_alloc_context3(pCodec);
    if (!s_st_ff.pEncCodecCtx) {
        return -1;
    }

    s_st_ff.pEncCodecCtx->codec_id = codec_id;
    s_st_ff.pEncCodecCtx->codec_type = AVMEDIA_TYPE_AUDIO;
    s_st_ff.pEncCodecCtx->sample_fmt = AV_SAMPLE_FMT_S16;
    s_st_ff.pEncCodecCtx->sample_rate= s_sample;//44100;
    s_st_ff.pEncCodecCtx->channel_layout= ichannel==1 ? AV_CH_LAYOUT_MONO : AV_CH_LAYOUT_STEREO;
    s_st_ff.pEncCodecCtx->channels = av_get_channel_layout_nb_channels(s_st_ff.pEncCodecCtx->channel_layout);
    s_st_ff.pEncCodecCtx->bit_rate = 64000;  

    if (avcodec_open2(s_st_ff.pEncCodecCtx, pCodec, NULL) < 0) {
        return -1;
    }

    s_st_ff.pEncCodecCtx->frame_size = s_st_ff.frameSize;    //s_st_ff.pEncCodecCtx->sample_rate / 2;

    s_st_ff.pEncFrame = av_frame_alloc();
    s_st_ff.pEncFrame->nb_samples= s_st_ff.pEncCodecCtx->frame_size;
    s_st_ff.pEncFrame->format= s_st_ff.pEncCodecCtx->sample_fmt;
    size = av_samples_get_buffer_size(NULL, s_st_ff.pEncCodecCtx->channels,s_st_ff.pEncCodecCtx->frame_size,s_st_ff.pEncCodecCtx->sample_fmt, 1);
    s_st_ff.pEncFrameBuf = (uint8_t *)av_malloc(size);
    avcodec_fill_audio_frame(s_st_ff.pEncFrame, s_st_ff.pEncCodecCtx->channels, s_st_ff.pEncCodecCtx->sample_fmt,(const uint8_t*)s_st_ff.pEncFrameBuf, size, 1);
    

    s_st_ff.audiofifo = av_audio_fifo_alloc(AV_SAMPLE_FMT_S16, s_ichannel, 1);

    return 0;
}

4、初始化解码器

static int init_decode(const char* dev, int ichannel)
{
    s_st_ff.pDecFmtCtx = avformat_alloc_context();
    AVDictionary* options = NULL;
    av_dict_set(&options, "list_devices", "true", 0);
    AVInputFormat *iformat = av_find_input_format("dshow");

    char _dev[128] = {0};   
    GetAudioInputDevice(_dev);
  
    log_msg_ex("dev : %s", _dev);
    std::string device_name = "audio=";
    device_name += std::string(_dev);
    std::string strOut;
    const char* utf8 = AnsiToUTF8(device_name, strOut).c_str();

    AVDictionary* tmp = NULL;
    av_dict_set_int(&tmp, "sample_rate", (long)44100, 0);     //我的设备不支持8000,先采44100在重采样成8000
    av_dict_set_int(&tmp, "sample_size", (long)16, 0);
    av_dict_set_int(&tmp, "channels", (long)ichannel, 0);
    //av_dict_set_int(&tmp, "audio_buffer_size", (long)40, 0);  //buffer大小是以采样率44100计算的,设置了采样率也无效

    int ret2 = avformat_open_input(&s_st_ff.pDecFmtCtx, strOut.c_str(), iformat, &tmp);
    if (ret2 != 0)
    {
        return -1;
    }

    if(avformat_find_stream_info(s_st_ff.pDecFmtCtx,NULL)<0)  
    { 
        return -1;  
    }  

    int ret;
    AVCodec         *pCodec;
    for(int i=0; i<s_st_ff.pDecFmtCtx->nb_streams; i++)   
    {
        if(s_st_ff.pDecFmtCtx->streams[i]->codecpar->codec_type==AVMEDIA_TYPE_AUDIO)  
        {  
            pCodec = avcodec_find_decoder(s_st_ff.pDecFmtCtx->streams[i]->codecpar->codec_id);

            if (pCodec == nullptr)
            {
                printf("Codec not found.(没有找到解码器)\n");
                return -1;
            }
            //从视频流中拷贝参数到codecCtx
            s_st_ff.pDecCodecCtx = avcodec_alloc_context3(pCodec);
            if ((ret = avcodec_parameters_to_context(s_st_ff.pDecCodecCtx, s_st_ff.pDecFmtCtx->streams[i]->codecpar)) < 0)
            {
                return -1;
            }
            s_st_ff.iAudioIndex=i;  
            break;  
        }  
        if(s_st_ff.iAudioIndex==-1)  
        { 
            return -1;  
        }
    }

    if(avcodec_open2(s_st_ff.pDecCodecCtx, pCodec,NULL)<0)  
    { 
        return -1;  
    } 

    s_st_ff.pDecFrame=av_frame_alloc();  
    if(NULL==s_st_ff.pDecFrame)  
    { 
        return -1;  
    }  
    
    return 0;
}

5、初始化转码

int init_swr(int ichannel)
{
    int ret = 0;
    s_st_ff.swr_ctx = swr_alloc();
    if (!s_st_ff.swr_ctx) {
        fprintf(stderr, "Could not allocate resampler context\n");
        return -1;
    }

    /* set options */
    av_opt_set_int(s_st_ff.swr_ctx, "in_channel_layout",     ichannel==1 ? AV_CH_LAYOUT_MONO : AV_CH_LAYOUT_STEREO, 0);
    av_opt_set_int(s_st_ff.swr_ctx, "in_sample_rate",       44100, 0);
    av_opt_set_sample_fmt(s_st_ff.swr_ctx, "in_sample_fmt", AV_SAMPLE_FMT_S16, 0);

    av_opt_set_int(s_st_ff.swr_ctx, "out_channel_layout",    ichannel==1 ? AV_CH_LAYOUT_MONO : AV_CH_LAYOUT_STEREO, 0);
    av_opt_set_int(s_st_ff.swr_ctx, "out_sample_rate",       s_sample, 0);
    av_opt_set_sample_fmt(s_st_ff.swr_ctx, "out_sample_fmt", AV_SAMPLE_FMT_S16, 0);

    /* initialize the resampling context */
    if ((ret = swr_init(s_st_ff.swr_ctx)) < 0) {
        fprintf(stderr, "Failed to initialize the resampling context\n");
        return -1;
    }

    s_st_ff.src_nb_samples = 22050;

    s_st_ff.max_dst_nb_samples = s_st_ff.dst_nb_samples =
        av_rescale_rnd(s_st_ff.src_nb_samples, s_sample, 44100, AV_ROUND_UP);
    s_st_ff.dst_nb_channels = av_get_channel_layout_nb_channels(ichannel==1 ? AV_CH_LAYOUT_MONO : AV_CH_LAYOUT_STEREO);
    ret = av_samples_alloc_array_and_samples(&s_st_ff.dst_data, &s_st_ff.dst_linesize, s_st_ff.dst_nb_channels,
                                             s_st_ff.dst_nb_samples, AV_SAMPLE_FMT_S16, 0);
    if (ret < 0) {
        fprintf(stderr, "Could not allocate destination samples\n");
        return -1;
    }

    return 0;
}

6、抓取线程

FILE *fp_pcm;
FILE *fp_711;
FILE *fp_swr;
void* __stdcall thread_cap(void* arg)
{
    int frameCnt=100;
    AVPacket pkt;
    int i = 0;
    int ret;
    
    uint64_t tss = 0;

    uint ts = 0;
    while(s_istart)
    {  
        //log_msg_ex("run  %d", s_istart);
        MSG   msg;
        while( PeekMessage( &msg, NULL, 0, 0, PM_REMOVE ) )
        {
            TranslateMessage( &msg );
            DispatchMessage( &msg );
        }

        uint64_t ts_start = gettime();
        if(av_read_frame(s_st_ff.pDecFmtCtx, s_st_ff.pDecPacket)>=0)
        {  
            if(s_st_ff.pDecPacket->stream_index==s_st_ff.iAudioIndex)
            {
                ret = avcodec_send_packet(s_st_ff.pDecCodecCtx, s_st_ff.pDecPacket);
                if(ret < 0){  
                    printf("avcodec_send_packet Error.\n");  
                    return NULL;  
                }  
                ret = avcodec_receive_frame(s_st_ff.pDecCodecCtx, s_st_ff.pDecFrame);
                if(ret < 0){  
                    printf("avcodec_receive_frame Error.\n");  
                    return NULL;  
                }  

                uint8_t * pcmBuffer = s_st_ff.pDecFrame->data[0];

                int pcmSize = av_samples_get_buffer_size(NULL, s_st_ff.pDecCodecCtx->channels, s_st_ff.pDecFrame->nb_samples, s_st_ff.pDecCodecCtx->sample_fmt, 1);
                if (pcmSize <= 0)
                {
                    printf("pcmSize <= 0");
                    continue;
                }
#if OUTPUT_PCM    
                fwrite(pcmBuffer,1,pcmSize,fp_pcm);    //Y                   
#endif                       
                //重采样              
                int swr_len = 0;
                int fifo_size = swr_get_out_samples(s_st_ff.swr_ctx, 0);

                swr_len = swr_convert(s_st_ff.swr_ctx, s_st_ff.dst_data, s_st_ff.dst_nb_samples, (const uint8_t **)s_st_ff.pDecFrame->data, s_st_ff.src_nb_samples);
                if (swr_len < 0) {
                    printf("Error while converting\n");
                    break;
                }

                int dst_bufsize = av_samples_get_buffer_size(&s_st_ff.dst_linesize, s_st_ff.dst_nb_channels,
                    swr_len, AV_SAMPLE_FMT_S16, 1);
                if (dst_bufsize < 0) {
                    log_msg_ex("Could not get sample buffer size\n");
                    break;
                }
#if OUTPUT_PCM    
                fwrite(s_st_ff.dst_data[0], 1, dst_bufsize, fp_swr);
#endif               
                av_audio_fifo_realloc(s_st_ff.audiofifo, av_audio_fifo_size(s_st_ff.audiofifo) + s_st_ff.dst_nb_samples);
                av_audio_fifo_write(s_st_ff.audiofifo, (void **)s_st_ff.dst_data, s_st_ff.dst_nb_samples);

                while(av_audio_fifo_size(s_st_ff.audiofifo) >= s_st_ff.frameSize){
                    //encode g711a

                    int frame_size = FFMIN(av_audio_fifo_size(s_st_ff.audiofifo), s_st_ff.frameSize);
                    tss+=frame_size;
                    AVFrame* output_frame = NULL;
                    output_frame = av_frame_alloc();
                    output_frame->nb_samples = frame_size;
                    output_frame->channel_layout = AV_CH_LAYOUT_MONO;
                    output_frame->format = AV_SAMPLE_FMT_S16;
                    output_frame->sample_rate = s_sample;
                    output_frame->pts = tss;

                    av_frame_get_buffer(output_frame, 0);
                    av_audio_fifo_read(s_st_ff.audiofifo, (void **)output_frame->data, frame_size);

                    av_init_packet(&pkt);
                    pkt.data = NULL;    // packet data will be allocated by the encoder
                    pkt.size = 0;

                    //s_st_ff.pEncFrame->pts = i++;
                    ret = avcodec_send_frame(s_st_ff.pEncCodecCtx, output_frame);    //s_st_ff.pEncFrame
                    ret = avcodec_receive_packet(s_st_ff.pEncCodecCtx, &pkt);
                    if (ret < 0) {
                        printf("Error encoding frame\n");
                        return NULL;
                    }
    #if OUTPUT_G711
                    fwrite(pkt.data, 1, pkt.size, fp_711);
    #endif
                    ts += s_st_ff.frameSize;

                    //printf("Succeed to encode size:%5d   pts:%I64d   ts:%u\n", pkt.size, pkt.pts, ts);

                    if (s_st_ff.cb)
                    {
                        s_st_ff.cb(pkt.data, pkt.size, ts, s_st_ff.usr);
                    }
                    av_packet_unref(&pkt);

                    if (!s_istart)
                    {
                        break;
                    }
                }
            }  
            av_packet_unref(s_st_ff.pDecPacket);  
        }  
    }  
    printf("thread quit");
    
    return NULL;
}

参考链接:

https://blog.csdn.net/wer85121430/article/details/79672813

https://blog.csdn.net/leixiaohua1020

  • 0
    点赞
  • 9
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
敬告:该系列的课程在抓紧录制更新中,敬请大家关注。敬告: 该系列的课程涉及:FFmpeg,WebRTC,SRS,Nginx,Darwin,Live555,等。包括:音视频、流媒体、直播、Android、视频监控28181、等。  我将带领大家一起来学习:采集麦克风、PCM重采样、AAC或MP3音频编码存储、并直播。具体内容包括:1.FFmpeg采集麦克风并存储为PCM。2.PCM重采样原理及实战。3.采集麦克风并实时转码AAC或MP3并保存文件。4.采集麦克风并实时转码AAC并可以直播。 音视频与流媒体是一门很复杂的技术,涉及的概念、原理、理论非常多,很多初学者不学 基础理论,而是直接做项目,往往会看到c/c++的代码时一头雾水,不知道代码到底是什么意思,这是为什么呢? 因为没有学习音视频和流媒体的基础理论,就比如学习英语,不学习基本单词,而是天天听英语新闻,总也听不懂。所以呢,一定要认真学习基础理论,然后再学习播放器、转码器、非编、流媒体直播、视频监控、等等。 梅老师从事音视频与流媒体行业18年;曾在永新视博、中科大洋、百度、美国Harris广播事业部等公司就职,经验丰富;曾亲手主导广电直播全套项目,精通h.264/h.265/aac,曾亲自参与百度app上的网页播放器等实战产品。 目前全身心自主创业,主要聚焦音视频+流媒体行业,精通音视频加密、流媒体在线转码快编等热门产品。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值