语音对讲的实现(码流客户端与服务器)

浅笑一斤

已于 2022-12-15 16:44:24 修改

阅读量1.4k

点赞数 1

分类专栏：音视频文章标签：服务器运维

于 2022-08-18 16:00:55 首次发布

本文链接：https://blog.csdn.net/linyibin_123/article/details/126407198

版权

音视频专栏收录该内容

19 篇文章 9 订阅

订阅专栏

简诉

使用ffmpeg拉流和推流实现对音频流的获取和发送，使用directsound实现对音频的采集和播放功能，视频播放显示可以使用D3D实现。本文提供客户端和服务器该功能的核心代码，工程涉及的其他功能有进行删除。

核心功能

1.码流播放处理（码流客户端）

int ClientPlayer::PlayStream()
{
    m_bStop = false;
    m_nVideoIndex = -1;
    m_nAudioIndex = -1;
    int i = 0, res = 0, speak_ret = 0, nValue = 0;
    char buf[64] = { 0 };
    m_bPlaying = false;
    AVStream* in_stream;
    AVStream* out_stream;
    AVCodecContext* AudioCodecCtx = NULL;
    char sUrl[64] = {0};
    char sSpeakUrl[64] = { 0 };

    _snprintf_s(sUrl, sizeof(sUrl), sizeof(sUrl) - 1, "%s", "rtsp://admin:123456@192.168.1.100:554/main");
    _snprintf_s(sSpeakUrl, sizeof(sSpeakUrl), sizeof(sSpeakUrl) - 1, "%s", "rtsp://admin:123456@192.168.1.100:554/speak");


    //初始化网络
    avformat_network_init();

    //设置超时时间/连接协议/缓存区
    AVDictionary* optionsDict = NULL;
    av_dict_set(&optionsDict, "rtsp_transport", "udp", 0);
    av_dict_set(&optionsDict, "stimeout", "5000000", 0);
    av_dict_set(&optionsDict, "buffer_size", "8192000", 0);
    av_dict_set(&optionsDict, "recv_buffer_size", "8192000", 0);

    //中断回调处理
    m_lastReadPacktTime = av_gettime();
    m_pAVFmtCxt = avformat_alloc_context();
    m_pAVFmtCxt->interrupt_callback.opaque = this;
    m_pAVFmtCxt->interrupt_callback.callback = [](void* ctx)
    {
        ClientPlayer* p_this = (ClientPlayer*)ctx;
        int timeout = 3;
        if (av_gettime() - p_this->m_lastReadPacktTime > timeout * 1000 * 1000)
        {
            return -1;
        }
        return 0;
    };

    //打开码流文件
    res = avformat_open_input(&m_pAVFmtCxt, sUrl, NULL, &optionsDict);
    if (res < 0)
    {
        printf("avformat_open_input fail: %d", res);
        return -1;
    }

    //流探测时间配置
    m_pAVFmtCxt->probesize = 100 * 1024;    
    m_pAVFmtCxt->max_analyze_duration = 1 * AV_TIME_BASE;

    res = avformat_find_stream_info(m_pAVFmtCxt, NULL);
    if (res < 0)
    {
        printf("error %x in avformat_find_stream_info\n", res);
        return -1;
    }

    //根据输出url创建音频输出流上下文参数 
    if (m_bSupportMike)
    {
        int rt = avformat_alloc_output_context2(&m_pOutAVFmtCxt, NULL, "rtsp", sSpeakUrl);
        if (!m_pOutAVFmtCxt)
        {
            m_bSupportMike = false;
            printf("Could not create output context,rt=%d\n",rt);
        }
    }

    //查找视音频流
    av_dump_format(m_pAVFmtCxt, 0, sUrl, 0);
    printf("m_pAVFmtCxt->nb_streams:%d \n", m_pAVFmtCxt->nb_streams);
    for (i = 0; i < m_pAVFmtCxt->nb_streams; i++)
    {
        if (m_pAVFmtCxt->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO)
        {
            m_nVideoIndex = i;
        }
        else if (m_pAVFmtCxt->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
            m_nAudioIndex = i;
        }

        printf("m_pAVFmtCxt->streams[i]->codec->codec_type:%d", m_pAVFmtCxt->streams[i]->codecpar->codec_type);
        if (m_bSupportAudio && m_pAVFmtCxt->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO)
        {
            //根据输入流配置创建输出流，指定音频输出编码为pcmu
            if (m_bSupportMike)
            {
   
                in_stream = m_pAVFmtCxt->streams[i];

                auto pAudioCodec = avcodec_find_encoder(AV_CODEC_ID_PCM_MULAW);
                if (pAudioCodec == NULL) {
                    printf("Cannot find any endcoder");
                    continue;
                }
                AudioCodecCtx = avcodec_alloc_context3(pAudioCodec);
                if (AudioCodecCtx == NULL) {
                    printf("Cannot alloc context");
                    continue;
                }
                out_stream = avformat_new_stream(m_pOutAVFmtCxt, pAudioCodec);
                if (!out_stream)
                {
                    printf("Failed allocating output stream\n");
                    m_bSupportMike = false;
                    continue;
                }

                //设置解码器上下文参数
                AudioCodecCtx->sample_fmt = AV_SAMPLE_FMT_S16;
                AudioCodecCtx->sample_rate = m_nSampleRate;
                AudioCodecCtx->channels = 1;
                AudioCodecCtx->codec_tag = 0;
                if (m_pOutAVFmtCxt->oformat->flags & AVFMT_GLOBALHEADER)
                    AudioCodecCtx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
                avcodec_parameters_from_context(out_stream->codecpar, AudioCodecCtx);
                avcodec_free_context(&AudioCodecCtx);
            }
        }
    }

    //查找视频解码器
    m_pCodec = avcodec_find_decoder(m_pAVFmtCxt->streams[m_nVideoIndex]->codecpar->codec_id);
    if (!m_pCodec)
    {
        printf("video decoder not found\n");
        return -1;
    }
    //查找音频解码器
    if (m_bSupportAudio && m_nAudioIndex != -1)
    {
        if (!m_pAudioCodecCxt)
            m_pAudioCodecCxt = avcodec_alloc_context3(NULL);
        auto pAudioCodecpar = m_pAVFmtCxt->streams[m_nAudioIndex]->codecpar;
        avcodec_parameters_to_context(m_pAudioCodecCxt, pAudioCodecpar);
        m_pAudioCodec = avcodec_find_decoder(pAudioCodecpar->codec_id);

        if (m_pAudioCodec == NULL || m_pAudioCodecCxt == NULL)
        {
            printf("audio decoder not found\n");
            return -1;
        }

        //重采样配置
        m_pSwrContext = swr_alloc_set_opts(0,         // 输入为空，则会分配
            av_get_default_channel_layout(m_channels_play),
            AV_SAMPLE_FMT_S16,                         // 输出的格式
            m_nSampleRate,                     // 输出的采样频率
            av_get_default_channel_layout(m_pAudioCodecCxt->channels),
            m_pAudioCodecCxt->sample_fmt,       // 输入的格式
            m_pAudioCodecCxt->sample_rate,      // 输入的采样率
            0,
            0);
        auto ret = swr_init(m_pSwrContext);
        if (ret < 0)
        {
            printf("Failed to swr_init(pSwrContext);");
            return -1;
        }

        //初始化directsound
        if (InitDirectSound() == FALSE)
            m_bSupportAudio = false;
    }

    //视频处理
    m_CodecId = m_pCodec->id;
    if(!m_pCodecCxt)
        m_pCodecCxt = avcodec_alloc_context3(NULL);
    avcodec_parameters_to_context(m_pCodecCxt, m_pAVFmtCxt->streams[m_nVideoIndex]->codecpar);
    if (m_pCodecCxt)
    {
        if (m_pCodecCxt->width == 0 || m_pCodecCxt->height == 0)
        {
            printf("m_pCodecCxt->width:%d, m_pCodecCxt->height:%d", m_pCodecCxt->width, m_pCodecCxt->height);
            return -1;
        }
    }
    else
        return -1;

    AVCodecContext* temp_codecctx = m_pCodecCxt;
    memcpy(temp_codecctx, m_pCodecCxt, sizeof(m_pCodecCxt));
    if (m_pCodecCxt->codec_type == AVMEDIA_TYPE_VIDEO)
    {
        printf("Soft Solution");
        avcodec_close(m_pCodecCxt);
        m_pCodecCxt = temp_codecctx;
        m_pCodecCxt->thread_count = 4;

        if (m_nD3DRenderWay == D3D_SURFACE) //D3D surface
        {
            if (m_Dxva2D3DRender.InitD3DRender(m_hWnd, m_pCodecCxt->width, m_pCodecCxt->height) == false)
            {
                printf("InitD3DRender fail");
            }

            m_pOutBuffer = (uint8_t*)av_malloc(av_image_get_buffer_size(AV_PIX_FMT_YUV420P, m_pCodecCxt->width, m_pCodecCxt->height, 1));
            if (m_pOutBuffer == NULL)
                return -1;

            av_image_fill_arrays(m_pFrameBGR->data, m_pFrameBGR->linesize, m_pOutBuffer, AV_PIX_FMT_YUV420P, m_pCodecCxt->width, m_pCodecCxt->height, 1); //填充AVFrame数据缓冲
            m_pImgConvertCtx = sws_getContext(m_pCodecCxt->width, m_pCodecCxt->height, m_pCodecCxt->pix_fmt, m_pCodecCxt->width, m_pCodecCxt->height, AV_PIX_FMT_YUV420P, SWS_FAST_BILINEAR, NULL, NULL, NULL);
            if (m_pImgConvertCtx == NULL)
                return -1;
        }
    
        m_nActualWidth = m_pCodecCxt->width;
        m_nActualHeight = m_pCodecCxt->height;

        res = avcodec_open2(m_pCodecCxt, m_pCodec, NULL);
        if (res < 0)
        {
            printf("avcodec_open2 video fail  error:%x", res);
            return -1;
        }
    }

    // 输入音频的处理
    if (m_bSupportAudio && m_pAudioCodecCxt)
    {
        if (m_pAudioCodecCxt->codec_type == AVMEDIA_TYPE_AUDIO)
        {
            res = avcodec_open2(m_pAudioCodecCxt, m_pAudioCodec, NULL);
            if (res < 0)
            {
                printf("avcodec_open2 audio fail  error:%x", res);
                avcodec_close(m_pAudioCodecCxt);
                return -1;
            }
        }
    }

    // 输出音频的处理
    if (m_bSupportMike)
    {
        if (m_CaptureAudio.Open()) //打开麦克风
        {
            m_bresampleinitsuc = audio_resample_init();
            if (!m_bresampleinitsuc)
            {
                m_bSupportMike = false;
            }
            m_CaptureAudio.SetGrabAudioFrames(TRUE, this); //采集音频
        }
        else
        {
            m_bSupportMike = false;
        }
    }

    printf("Start ThreadDecode...");

    //创建解码线程
    m_pDecodeThread = (my_thread_t*)malloc(sizeof(my_thread_t));
    if (m_pDecodeThread != NULL)
    {
        m_bDecodeThreadRun = true;
        res = my_thread_create(m_pDecodeThread, ThreadDecode, this);
        if (res == -1)
        {
            printf("my_thread_create ThreadDecode failed  res:%x", res);
            return -1;
        }
    }

    return 0;
}

2.码流解码处理（码流客户端）

void ClientPlayer::DecodeStream()
{
    printf("DecodeAndShow");
    AVPacket pkt = { 0 };
    m_bPlaying = true;
    uint8_t* pBuffer;
    bool bEnoughSpace = true;
    int nTimeCnt = 0;
    int res = 0;
    int nRectDrawWait = 0;
    bool bRecordLastIFrame = false;
    int num_av_read_frame_err = 0;
    int num_stream_index_err = 0;
    uint8_t * outData[2] = {0};
    outData[0] = (uint8_t*)av_malloc(1152 * 8);
    outData[1] = (uint8_t*)av_malloc(1152 * 8);

    uint8_t* pktdata;
    int pktsize;
    int len = 0;
    bool bPushAudioToQueue = false;
    m_bStopAudio = false;
    CRect ShowRect;

    AVFrame* pAvFrame = av_frame_alloc();
    if (pAvFrame == NULL)
        return;


    //创建音频播放线程
    if (m_bSupportAudio && m_pAudioPlayThread)
    {
        my_thread_create(m_pAudioPlayThread, ThreadAudioPlay, this);
    }

    //创建音频对讲线程
    if (m_bSupportMike && m_pAudioTalkThread)
    {
        my_thread_create(m_pAudioTalkThread, ThreadAudioSend, this);
    }

    while (m_bDecodeThreadRun && !m_bQuit) 
    {
        m_lastReadPacktTime = av_gettime();
        if (av_read_frame(m_pAVFmtCxt, &pkt) >= 0) // 读取每一帧数据
        {
            num_av_read_frame_err = 0;
            m_nFrameStartTime = time(NULL);
            if (pkt.stream_index == m_nVideoIndex) //视频流
            {
                num_stream_index_err = 0;
                nTimeCnt = 0;

                if (pkt.flags == 1)//先解视频关键帧，后处理音频
                    bPushAudioToQueue = true;
                if (m_pCodecCxt == NULL || pAvFrame == NULL) {
                    printf("m_pCodecCxt == NULL || pAvFrame == NULL");
                    break;
                }

                int gotvframe = 0;
                auto sd_ret = avcodec_send_packet(m_pCodecCxt, &pkt);
                if (sd_ret != 0 && sd_ret != AVERROR(EAGAIN)) {
                    printf("avcodec_send_packet err, rt=%d", sd_ret);
                    enableReConnect();
                }
                else {
                    while (gotvframe == 0 && !m_bQuit) {
                        gotvframe = avcodec_receive_frame(m_pCodecCxt, pAvFrame);
                        if (gotvframe == 0)
                        {
                            
                            m_nCurPKSize = pkt.size;

                            if (m_pImgConvertCtx && m_pFrameBGR && m_pOutBuffer && pAvFrame)
                            {
                                sws_scale(m_pImgConvertCtx, (const uint8_t* const*)pAvFrame->data, pAvFrame->linesize, 0,
                                    m_pCodecCxt->height, m_pFrameBGR->data, m_pFrameBGR->linesize);

                                if (m_nD3DRenderWay == D3D_SURFACE) //surface
                                {
                                    //D3D surface视频渲染处理，此处暂不提供
                                }
                            }
                        }
                    }
                }
            }
            else if (pkt.stream_index == m_nAudioIndex) //音频流
            {
                num_stream_index_err = 0;
                if (m_bSupportAudio) {
                    pktdata = pkt.data;
                    pktsize = pkt.size;

                    if (pktsize > 0)
                    {
                        int gotframe = 0;
                        if (m_pAudioCodecCxt == NULL || pAvFrame == NULL) {
                            printf("m_pAudioCodecCxt == NULL || pAvFrame == NULL");
                            break;
                        }
                        len = avcodec_send_packet(m_pAudioCodecCxt, &pkt);
                        if (len != 0 && len != AVERROR(EAGAIN))
                        {
                            pktsize = 0;
                            printf("avcodec_send_packet len < 0");
                            break;
                        }
                        auto data_size = av_get_bytes_per_sample(m_pAudioCodecCxt->sample_fmt); 
                        if (data_size < 0) {
                            printf("Failed to calculate data size\n");
                            break;
                        }
                        while (gotframe == 0 && !m_bQuit) {
                            gotframe = avcodec_receive_frame(m_pAudioCodecCxt, pAvFrame);
                            if (!gotframe)
                            {

                                if (bPushAudioToQueue == true && m_bEnableAudio && !m_bStopAudio)
                                {
                                    audio_frame_t audioFrame;

                                    // 获取每一个采样点的字节大小
                                    numBytes = av_get_bytes_per_sample(AV_SAMPLE_FMT_S16);
                                    //uint8_t outData[2][192000] = { 0 };
                                    // 修改采样率参数后，须要从新获取采样点的样本个数
                                    auto dstNbSamples = av_rescale_rnd(pAvFrame->nb_samples,
                                        m_src_sample_rate,
                                        pAvFrame->sample_rate,
                                        AV_ROUND_ZERO);
                                      
                                    // 重采样处理
                                    int data_size = 0;
                                    try
                                    {
                                        auto nb = swr_convert(m_pSwrContext,
                                            (uint8_t**)outData,
                                            dstNbSamples,
                                            (const uint8_t**)pAvFrame->data,
                                            pAvFrame->nb_samples);
                                        data_size = av_samples_get_buffer_size(nullptr, m_channels_play, nb, AV_SAMPLE_FMT_S16, 1);
                                    }
                                    catch (const std::exception&)
                                    {
                                        m_bSupportAudio = false;
                                        printf("swr_convert throw err, set m_bSupportAudio false");
                                        continue;
                                    }

                                    int copy_size = 0; 
                                    int copy_ptr = 0;                              
                                    for (int isub = data_size; isub > 0; isub -= copy_size) {
                                        if (isub > m_audio_buffer_notify_size) {
                                            copy_size = m_audio_buffer_notify_size;
                                            copy_ptr = data_size - isub;
                                        }
                                        else
                                            copy_size = isub;
                                        audioFrame.data_size = copy_size;
                                        memcpy(audioFrame.data, outData[0] + copy_ptr, copy_size);

                                        //线程锁，音频播放缓冲队列存储音频帧数据
                                        EnterCriticalSection(&m_lock);
                                        m_AudioPlayQue.push(audioFrame);
                                        LeaveCriticalSection(&m_lock);
                                    }
                                }

                            }
                        }
                    }
                }
            }
            else if (++num_stream_index_err > 20) {
                printf("pkt.stream_index unfind, %d",pkt.stream_index);
                enableReConnect();
            }

            av_packet_unref(&pkt);
        }
        else {
            if (++num_av_read_frame_err > 10) {
                printf("num_av_read_frame_err is more than 10");
                enableReConnect();
            }
        }
    }

    if (pAvFrame)
        av_free(pAvFrame);

    if (pBuffer)
        av_free(pBuffer);

    if (outData[0] && outData[1])
    {
        av_free(outData[0]);
        av_free(outData[1]);
        outData[0] = 0;
        outData[1] = 0;
    }
}

3.音频播放线程（码流客户端）

void ClientPlayer::AudioPlayThread()
{
    bool bPlay = true;
    LPVOID buf = NULL;
    DWORD  buf_len = 0;
    DWORD res = WAIT_OBJECT_0;

    DWORD offset = m_audio_buffer_notify_size;

    if (!m_pDSBuffer8)
        return;
    m_pDSBuffer8->SetCurrentPosition(0);
    m_pDSBuffer8->Play(0, 0, DSBPLAY_LOOPING);
    while (m_bPlaying && m_bSupportAudio)
    {
        if (!m_bEnableAudio || m_bStopAudio || m_bPause)
        {
            while (!m_AudioPlayQue.empty())
            {
                m_AudioPlayQue.pop();
            }
            if (m_pDSBuffer8 && bPlay)
            {
                m_pDSBuffer8->Stop();
                m_pDSBuffer8->Restore();
                m_pDSBuffer8->SetCurrentPosition(0);
            }
            bPlay = false;
            continue;
        }
        else
        {
            if (bPlay == false) {
                m_pDSBuffer8->SetCurrentPosition(0);
                m_pDSBuffer8->Play(0, 0, DSBPLAY_LOOPING);
                bPlay = true;
            }
            if ((res >= WAIT_OBJECT_0) && (res <= WAIT_OBJECT_0 + 3))
            {
                m_pDSBuffer8->Lock(offset, m_audio_buffer_notify_size, &buf, &buf_len, NULL, NULL, 0);
                if (buf_len > 0) {
                    if (m_AudioPlayQue.size() > 0) {

                        audio_frame_t& audioFrame = m_AudioPlayQue.front();

                        int cpy_num = buf_len < audioFrame.data_size ? buf_len : audioFrame.data_size;
                        memcpy(buf, audioFrame.data, cpy_num);

                        if (audioFrame.data_size <= buf_len) {
                            EnterCriticalSection(&m_lock);
                            printf("audioFrame.data_size:%d", audioFrame.data_size);
                            m_AudioPlayQue.pop();
                            LeaveCriticalSection(&m_lock);
                        }
                        else {
                            audioFrame.data_size = audioFrame.data_size - buf_len;
                            uint8_t data[2048];
                            memcpy(data, audioFrame.data + cpy_num, audioFrame.data_size);
                            memcpy(audioFrame.data, data, audioFrame.data_size);
                        }

                        offset += cpy_num;
                        offset %= (m_audio_buffer_notify_size * MAX_AUDIO_BUF);

                        int buf_short_len = cpy_num * sizeof(char) / sizeof(short);
                        const int out_len = 80;

                    }
                    else {
                        memset(buf, 0, buf_len);
                        offset += buf_len;
                        offset %= (m_audio_buffer_notify_size * MAX_AUDIO_BUF);
                    }
                }
                m_pDSBuffer8->Unlock(buf, buf_len, NULL, 0);
            }
        }
        res = WaitForMultipleObjects(MAX_AUDIO_BUF, m_event, FALSE, 1000/*INFINITE*/);
    }

    printf("PlayAudioThread thread stop...");
}

3.音频发送线程（码流客户端）

void ClientPlayer::AudioSendThread()
{
    int ret = 0;
    int index = 0;

    if (!(m_pOutAVFmtCxt->oformat->flags & AVFMT_NOFILE))
    {
        ret = avio_open(&m_pOutAVFmtCxt->pb, m_url_speak.c_str(), AVIO_FLAG_WRITE);
        if (ret < 0)
        {
            printf("avio_open fail ret:%d", ret);
            return;
        }
    }
    AVDictionary* dict = NULL;
    av_dict_set(&dict, "rtsp_transport", "udp", 0);
    av_dict_set(&dict, "muxdelay", "0.1", 0);
    m_pOutAVFmtCxt->audio_codec_id = m_pOutAVFmtCxt->oformat->audio_codec;
    ret = avformat_write_header(m_pOutAVFmtCxt, NULL);
    if (ret < 0)
    {
        printf("avformat_write_header fail ret:%d\n", ret);
        return;
    }

    while (m_bPlaying && m_bSupportMike)
    {
        if (!m_bEnableMike)
        {
            Sleep(200);
            while (!m_AudioTalkQue.empty())
            {
                EnterCriticalSection(&m_talklock);
                m_AudioTalkQue.pop();
                LeaveCriticalSection(&m_talklock);
            }
            continue;
        }

        if (m_AudioTalkQue.size() > 0)
        {
            talk_frame_t talkFrame = m_AudioTalkQue.front();
            char g711Buffer[2048];
            long g711Size = talkFrame.data_size / 2;
            Pcm2G711(talkFrame.data, talkFrame.data_size, g711Buffer, g711Size, PCM_U);

            auto pPkt = av_packet_alloc();
            AVPacket& pkt = *pPkt;
            av_init_packet(&pkt);
            pkt.data = (uint8_t*)g711Buffer;
            pkt.size = g711Size;
            pkt.stream_index = 0;

            pkt.pts = pkt.dts = index;
            index++;
            pkt.duration = 1;
            int ret = av_interleaved_write_frame(m_pOutAVFmtCxt, &pkt);
            if (ret < 0)
            {
                printf("av_interleaved_write_frame fail ret:%d", ret);
            }
            if (!m_AudioTalkQue.empty())
            {
                EnterCriticalSection(&m_talklock);
                m_AudioTalkQue.pop();
                LeaveCriticalSection(&m_talklock);
            }

            //av_free_packet(&pkt);
            av_packet_free(&pPkt);
        }
        else {
            Sleep(20);
        }
    }

    av_write_trailer(m_pOutAVFmtCxt); 

    if (m_pOutAVFmtCxt && !(m_pOutAVFmtCxt->oformat->flags & AVFMT_NOFILE))
        avio_close(m_pOutAVFmtCxt->pb);

    avformat_free_context(m_pOutAVFmtCxt);
}

4. 初始化Directsound（码流客户端）

//directsound 初始化
BOOL ClientPlayer::InitDirectSound()
{
    int channels = m_channels_play;
    int bits_per_sample = m_bits_per_sample_play;

    if (m_bSupportAudio)
    {
        if (m_nPlatForm == PLATFORM_TYPE_XS7300 || m_pAVFmtCxt->streams[m_nAudioIndex]->codecpar->codec_id == AV_CODEC_ID_AAC)
        {
            m_audio_buffer_notify_size = BUFFERNOTIFYSIZE_XS7300;
        }
        else
        {
            m_audio_buffer_notify_size = BUFFERNOTIFYSIZE_HISI;
        }

        if (FAILED(DirectSoundCreate8(NULL, &m_pDS, NULL)))
        {
            return FALSE;
        }

        HWND hDlg = GetForegroundWindow()->GetSafeHwnd();
        if (!hDlg) {
            hDlg = GetDesktopWindow()->GetSafeHwnd();
        }

        if (FAILED(m_pDS->SetCooperativeLevel(hDlg, DSSCL_NORMAL)))
        {
            printf("[SetCooperativeLevel Faile]\n");
            return FALSE;
        }
    }

    DSBUFFERDESC dsbd;
    memset(&dsbd, 0, sizeof(dsbd));
    dsbd.dwSize = sizeof(dsbd);
    dsbd.dwFlags = DSBCAPS_GLOBALFOCUS | DSBCAPS_CTRLPOSITIONNOTIFY | DSBCAPS_GETCURRENTPOSITION2;
    dsbd.dwBufferBytes = MAX_AUDIO_BUF * m_audio_buffer_notify_size;
    dsbd.lpwfxFormat = (WAVEFORMATEX*)malloc(sizeof(WAVEFORMATEX));
    dsbd.lpwfxFormat->wFormatTag = WAVE_FORMAT_PCM;
    dsbd.lpwfxFormat->nChannels = channels;//format type         
    dsbd.lpwfxFormat->nSamplesPerSec = m_nSampleRate;//number of channels (i.e. mono, stereo...)     
    dsbd.lpwfxFormat->nAvgBytesPerSec = m_nSampleRate * (bits_per_sample / 8) * channels;//sample rate
    dsbd.lpwfxFormat->nBlockAlign = (bits_per_sample / 8) * channels;//for buffer estimation      
    dsbd.lpwfxFormat->wBitsPerSample = bits_per_sample;//block size of data     
    dsbd.lpwfxFormat->cbSize = 0;//number of bits per sample of mono data 

    if (FAILED(m_pDS->CreateSoundBuffer(&dsbd, &m_pDSBuffer, NULL)))
    {
        printf("create m_pDSBuffer failed \n");
        return FALSE;
    }

    if (FAILED(m_pDSBuffer->QueryInterface(IID_IDirectSoundBuffer8, (LPVOID*)&m_pDSBuffer8)))
    {
        printf("create m_pDSBuffer8 failed \n");
        return FALSE;
    }
    if (FAILED(m_pDSBuffer8->QueryInterface(IID_IDirectSoundNotify, (LPVOID*)&m_pDSNotify)))
    {
        printf("create m_pDSNotify failed \n");
        return FALSE;
    }

    while (!m_AudioPlayQue.empty())
    {
        m_AudioPlayQue.pop();
    }
    if (m_pDSBuffer8)
    {
        m_pDSBuffer8->Stop();
        m_pDSBuffer8->Restore();
    }

    for (int i = 0; i < MAX_AUDIO_BUF; i++)
    {
        m_pDSPosNotify[i].dwOffset = i * m_audio_buffer_notify_size;
        m_event[i] = ::CreateEvent(NULL, false, false, NULL);
        m_pDSPosNotify[i].hEventNotify = m_event[i];
    }

    m_pDSNotify->SetNotificationPositions(MAX_AUDIO_BUF, m_pDSPosNotify);
    m_pDSNotify->Release();

    return TRUE;
}

5. 音频采集封装类（码流客户端）

mycaptureaudio.h

#include <mmsystem.h>
#include <dsound.h>
#include "wavefile.h"
#include "G711Codec.h"

#define NUM_REC_NOTIFICATIONS  16
class CAudioFrameHandler 
{

public:
    virtual void CaptureAudioFrameData(uint8_t* pBuffer, long lBufferSize) = 0; 

};
class MyCaptureAudio
{

public:
    BOOL m_bGrabAudioFrame;                 //recording now ? also used by event recv thread

    protected:
    LPDIRECTSOUNDCAPTURE8 m_pCapDev;        //设备对象指针 
    LPDIRECTSOUNDCAPTUREBUFFER m_pCapBuf;   //缓冲区对象指针
    LPDIRECTSOUNDNOTIFY8 m_pNotify;         //用来设置通知的对象接口
    GUID m_guidCapDevId;                    //设备id
    WAVEFORMATEX m_wfxInput;                //输入的音频格式
    CWaveFile *m_pWaveFile;                 //wave 文件指针
    bool m_bCanWav;                         //wave 文件是否可写
    DSBPOSITIONNOTIFY m_aPosNotify[NUM_REC_NOTIFICATIONS + 1]; //设置通知标志的数组 
    HANDLE m_hNotifyEvent;                  //通知事件
    BOOL m_abInputFmtSupported[20];
    DWORD m_dwCapBufSize;                   //录音用缓冲区的大小
    DWORD m_dwNextCapOffset;                //偏移位置 
    DWORD m_dwNotifySize;                   // 通知位置
    CAudioFrameHandler* m_frame_handler;    // outer frame data dealer ptr 

public:                                 
    static BOOL CALLBACK CbEnumDevProc(LPGUID lpGUID, LPCTSTR lpszDesc, 
    LPCTSTR lpszDrvName, LPVOID lpContext); // callback func to add enum devices string name 
    static UINT NotifyCaptureThread(LPVOID data);

    protected:
    HRESULT InitDirectSoundCapture(); 
    HRESULT DeInitDirectSoundCapture(); 
    HRESULT InitNotifications(); 
    HRESULT CreateCaptureBuffer(WAVEFORMATEX * wfx); 
    HRESULT SetStartCaptureData(BOOL bStartRec);
    HRESULT DoCapturedData(); 
    void SetWavFormat(WAVEFORMATEX * wfx);

public:
    MyCaptureAudio(void);
    ~MyCaptureAudio(void);
    BOOL EnumDevices(HWND hList);
    BOOL Open(void); 
    BOOL Close(); 
    void SetGrabAudioFrames(BOOL bGrabAudioFrames, CAudioFrameHandler* frame_handler); 

};

mycaptureaudio.cpp

#include "stdafx.h"
#include "mycaptureaudio.h"
#include <mmsystem.h>
#include <dsound.h>

#ifndef SAFE_DELETE
#define SAFE_DELETE(p)  {if(p) {delete (p);(p)=NULL;}}
#endif

#ifndef SAFE_RELEASE
#define SAFE_RELEASE(p) {if(p) {(p)->Release(); (p)=NULL;}}
#endif

#ifndef MAX
#define MAX(a,b)        ((a) > (b) ? (a) : (b))
#endif

MyCaptureAudio::MyCaptureAudio(void)
{
    // 1.初始化 CoInitialize();
    if(FAILED(CoInitialize(NULL))) 
    {
        printf("MyCaptureAudio CoInitialize Failed!\r\n"); 
        return;
    }
    m_pCapDev = NULL;
    m_pCapBuf = NULL; 
    m_pNotify = NULL;
    
    // set default wave format PCM
    ZeroMemory(&m_wfxInput, sizeof(m_wfxInput));
    m_wfxInput.wFormatTag = WAVE_FORMAT_PCM; //输入的音频格式
    m_guidCapDevId = GUID_NULL; //设备id
    m_bGrabAudioFrame = FALSE; 
    m_hNotifyEvent = NULL; 
    m_bCanWav = false;

}

MyCaptureAudio::~MyCaptureAudio(void)
{
    CoUninitialize() ; 
}

BOOL CALLBACK MyCaptureAudio::CbEnumDevProc(LPGUID lpGUID, LPCTSTR lpszDesc, 
             LPCTSTR lpszDrvName, LPVOID lpContext) 
{
    HWND hList = (HWND)lpContext;
    if(!hList) 
        return FALSE; 
    LPGUID lpTemp = NULL;
    if(lpGUID != NULL) 
    {
        // NULL only for "Primary Sound Driver".
        if ((lpTemp = (LPGUID)malloc(sizeof(GUID))) == NULL) 
            return(TRUE);
        memcpy(lpTemp, lpGUID, sizeof(GUID));
    }
    ::SendMessage(hList, CB_ADDSTRING, 0,(LPARAM)lpszDesc);
    ::SendMessage(hList, LB_SETITEMDATA, 0, (LPARAM)lpTemp); 
    free(lpTemp);
    return(TRUE);
}

UINT MyCaptureAudio::NotifyCaptureThread(LPVOID data)
{
    MyCaptureAudio * pado = static_cast<MyCaptureAudio *>(data); 
    MSG   msg;
    HRESULT hr; 
    DWORD dwResult; 
    while(pado->m_bGrabAudioFrame) 
    {
        // 6.等待设置事件发生
        dwResult = MsgWaitForMultipleObjects(1, &(pado->m_hNotifyEvent), FALSE, INFINITE, QS_ALLEVENTS );
        switch( dwResult ) 
        {
            case WAIT_OBJECT_0 + 0:
                // g_hNotificationEvents[0] is signaled
                // This means that DirectSound just finished playing 
                // a piece of the buffer, so we need to fill the circular 
                // buffer with new sound from the wav file
                if(FAILED(hr = pado->DoCapturedData())) 
                {
                    printf("Error handling DirectSound notifications."); 
                    pado->m_bGrabAudioFrame = FALSE ; 
                }
            break;
            case WAIT_OBJECT_0 + 1:
                // Windows messages are available
                while( PeekMessage(&msg, NULL, 0, 0, PM_REMOVE)) 
                { 
                    TranslateMessage(&msg); 
                    DispatchMessage(&msg); 
                    if(msg.message == WM_QUIT) 
                        pado->m_bGrabAudioFrame = FALSE; 
                }
            break;
        }
    }
    AfxEndThread(0, TRUE); 
    return 0; 
}

BOOL MyCaptureAudio::EnumDevices(HWND hList) 
{
    if(FAILED(DirectSoundCaptureEnumerate(
    (LPDSENUMCALLBACK)(MyCaptureAudio::CbEnumDevProc),
    (VOID*)&hList)))
    {
        return(FALSE);
    }
    return (TRUE); 
}
BOOL MyCaptureAudio::Open(void)
{
    printf("MyCaptureAudio::Open 000");
    HRESULT hr; 
    if(!m_bGrabAudioFrame) 
    {
        hr = InitDirectSoundCapture(); 
    }
    return (FAILED(hr)) ? FALSE : TRUE; 
}

BOOL MyCaptureAudio::Close() 
{
    HRESULT hr; 
    hr = DeInitDirectSoundCapture(); 
    if (m_hNotifyEvent) {
        CloseHandle(m_hNotifyEvent);
        m_hNotifyEvent = NULL;
    }
    return (FAILED(hr)) ? FALSE : TRUE; 
}

HRESULT MyCaptureAudio::InitDirectSoundCapture()
{
    printf("InitDirectSoundCapture");
    HRESULT hr; 
    //m_guidCapDevId = dev_id;
    ZeroMemory(&m_aPosNotify, sizeof(DSBPOSITIONNOTIFY) * (NUM_REC_NOTIFICATIONS + 1));
    m_dwCapBufSize = 0;
    m_dwNotifySize = 0;

    // 2.创建设备
    // Create IDirectSoundCapture using the preferred capture device

    //hr = DirectSoundCaptureCreate(&m_guidCapDevId, &m_pCapDev, NULL);
    hr = DirectSoundCaptureCreate(&DSDEVID_DefaultCapture, &m_pCapDev, NULL);

    // init wave format 
    SetWavFormat(&m_wfxInput);

    #ifdef WAV_TEST_PCM
    m_pWaveFile = new CWaveFile();
    if(m_pWaveFile->OpenFile("pcmtest2.wav", m_wfxInput) == true)
    {
        printf("Open test.wav fail");
        m_bCanWav = true;
    }
    #endif

    return (FAILED(hr)) ? E_FAIL : S_OK; 
}
HRESULT MyCaptureAudio::DeInitDirectSoundCapture()
{
    // Release DirectSound interfaces
    SAFE_RELEASE(m_pNotify);
    SAFE_RELEASE(m_pCapBuf);
    SAFE_RELEASE(m_pCapDev); 

    #ifdef WAV_TEST_PCM
    m_bCanWav = false;
    if(m_pWaveFile)
        m_pWaveFile->CloseFile();
    SAFE_DELETE(m_pWaveFile);
    #endif

    return S_OK;
}
HRESULT MyCaptureAudio::CreateCaptureBuffer(WAVEFORMATEX * wfx) 
{
    HRESULT hr;
    DSCBUFFERDESC dscbd;
    SAFE_RELEASE(m_pNotify);
    SAFE_RELEASE(m_pCapBuf);

    // Set the notification size
    m_dwNotifySize = MAX(1024, wfx->nAvgBytesPerSec / 8);//  44100 16bit  has 11025
    m_dwNotifySize -= m_dwNotifySize % wfx->nBlockAlign;

    // Set the buffer sizes 
    m_dwCapBufSize = m_dwNotifySize * NUM_REC_NOTIFICATIONS;
    SAFE_RELEASE(m_pNotify);
    SAFE_RELEASE(m_pCapBuf);

    // Create the capture buffer
    ZeroMemory(&dscbd, sizeof(dscbd));
    dscbd.dwSize        = sizeof(dscbd);
    dscbd.dwBufferBytes = m_dwCapBufSize;
    dscbd.lpwfxFormat   = wfx ; // Set the format during creatation

    if(NULL == m_pCapDev)
        return E_FAIL;

    if(FAILED(hr = m_pCapDev->CreateCaptureBuffer(&dscbd, &m_pCapBuf, NULL)))
        return S_FALSE;

    m_dwNextCapOffset = 0;
    if(FAILED(hr = InitNotifications()))
        return S_FALSE;

    return S_OK;
}
HRESULT MyCaptureAudio::InitNotifications() 
{
    HRESULT hr; 
    int i; 
    if(NULL == m_pCapBuf)
    return S_FALSE;

    // 4.1 创建事件
    // create auto notify event 
    m_hNotifyEvent = CreateEvent(NULL, FALSE, FALSE, NULL);

    // 4.2.query一个LPDIRECTSOUNDCAPTUREBUFFER8类型的对象
    // Create a notification event, for when the sound stops playing
    if(FAILED(hr = m_pCapBuf->QueryInterface(IID_IDirectSoundNotify,(VOID**)&m_pNotify)))
        return S_FALSE ;
        
    // Setup the notification positions
    for(i = 0; i < NUM_REC_NOTIFICATIONS; i++) 
    {
        m_aPosNotify[i].dwOffset = (m_dwNotifySize * i) + m_dwNotifySize - 1;
        m_aPosNotify[i].hEventNotify = m_hNotifyEvent;             
    }

    // 4.3 设置事件提醒
    // Tell DirectSound when to notify us. the notification will come in the from 
    // of signaled events that are handled in WinMain()
    if(FAILED(hr = m_pNotify->SetNotificationPositions(NUM_REC_NOTIFICATIONS, m_aPosNotify)))
        return S_FALSE ;
    return S_OK;
}
HRESULT MyCaptureAudio::SetStartCaptureData(BOOL bStartRec)
{
    HRESULT hr;
    if(bStartRec) 
    {
        printf("SetStartCaptureData start");
        // 3.创建buffer对象
        // Create a capture buffer, and tell the capture 
        // buffer to start recording   
        if(FAILED(hr = CreateCaptureBuffer(&m_wfxInput)))
            return S_FALSE ;

        // 5.启动录音
        if(FAILED(hr = m_pCapBuf->Start(DSCBSTART_LOOPING)))
            return S_FALSE ;

            
        // create notify event recv thread 
        //AfxBeginThread 函数时基于MFC的，在Windows APi提供的CreateThread 函数的基础上再封装,自动释放句柄
        AfxBeginThread(MyCaptureAudio::NotifyCaptureThread, (LPVOID)(this));

    } 
    else 
    { 
        printf("SetStartCaptureData stop");
        // Stop the capture and read any data that 
        // was not caught by a notification
        if(NULL == m_pCapBuf)
            return S_OK;
            
        // wait until the notify_event_thd thread exit and release the resources.
        Sleep(500) ;

        // Stop the buffer, and read any data that was not 
        // caught by a notification
        if(FAILED(hr = m_pCapBuf->Stop()))
            return S_OK;
        if(FAILED(hr = DoCapturedData()))
            return S_FALSE; 
    }
    return S_OK;
}
HRESULT MyCaptureAudio::DoCapturedData() 
{
     HRESULT hr;
     VOID*   pbCaptureData    = NULL;
     DWORD   dwCaptureLength;
     VOID*   pbCaptureData2   = NULL;
     DWORD   dwCaptureLength2;
     DWORD   dwReadPos;
     DWORD   dwCapturePos;
     LONG lLockSize;
     if(NULL == m_pCapBuf)
        return S_FALSE; 
     
     if(FAILED(hr = m_pCapBuf->GetCurrentPosition(&dwCapturePos, &dwReadPos)))
        return S_FALSE;
     lLockSize = dwReadPos - m_dwNextCapOffset;
     if(lLockSize < 0)
        lLockSize += m_dwCapBufSize;

     //  1 锁操作lock
     // Block align lock size so that we are always write on a boundary
     lLockSize -= (lLockSize % m_dwNotifySize);
     if(lLockSize == 0)
        return S_FALSE;
        
     // Lock the capture buffer down
     if(FAILED(hr = m_pCapBuf->Lock( m_dwNextCapOffset, lLockSize,
               &pbCaptureData, &dwCaptureLength, 
               &pbCaptureData2, &dwCaptureLength2, 0L)))
        return S_FALSE;

     //  2 存储到wav文件
     #ifdef WAV_TEST_PCM
     if(m_bCanWav && m_pWaveFile)
     {
        m_pWaveFile->WriteDataToFile((LPSTR)pbCaptureData, dwCaptureLength);
     }
     #endif

      //  3 拷贝数据
     // call the outer data handler
     if(m_frame_handler)
     {
        m_frame_handler->CaptureAudioFrameData((uint8_t*)pbCaptureData, dwCaptureLength); 
     }
     
     // Move the capture offset along
     m_dwNextCapOffset += dwCaptureLength; 
     m_dwNextCapOffset %= m_dwCapBufSize; // Circular buffer
     if(pbCaptureData2 != NULL) 
     {
        // call the outer data handler 
        if(m_frame_handler) 
        {
            m_frame_handler->CaptureAudioFrameData((uint8_t*)pbCaptureData, dwCaptureLength); 
        }
        // Move the capture offset along
        m_dwNextCapOffset += dwCaptureLength2; 
        m_dwNextCapOffset %= m_dwCapBufSize; // Circular buffer
     }

     //  4 锁操作unlock
     // Unlock the capture buffer
     m_pCapBuf->Unlock( pbCaptureData, dwCaptureLength, pbCaptureData2, dwCaptureLength2);
     return S_OK;
}
void MyCaptureAudio::SetWavFormat(WAVEFORMATEX * wfx)
{
    // get the default capture wave formate 
    ZeroMemory(wfx, sizeof(WAVEFORMATEX)); 
    wfx->wFormatTag = WAVE_FORMAT_PCM;
    
    // 44100Hz(CD)/48000Hz(DVD), 16 bits PCM, Mono
    wfx->nSamplesPerSec = 44100; 
    wfx->wBitsPerSample = 16; 
    wfx->nChannels  = 1;
    wfx->nBlockAlign = wfx->nChannels*(wfx->wBitsPerSample/8); 
    wfx->nAvgBytesPerSec = wfx->nBlockAlign*wfx->nSamplesPerSec;
}
void MyCaptureAudio::SetGrabAudioFrames(BOOL bGrabAudioFrames, CAudioFrameHandler* frame_handler) 
{
    m_frame_handler = frame_handler; 
    m_bGrabAudioFrame = bGrabAudioFrames; 
    SetStartCaptureData(bGrabAudioFrames); 
}

6. 码流服务器处理

在RTSP Record中获取到对讲流会话，寻找带speak的会话，对speak会话进行接收处理，接收到rtp数据后，解rtp包获取到音频数据，将pcmu转为pcm，发送到播放设备中。

int rtsp_handle_record(rtsp_request_t* req, const char* uri)
{
    ...

    // 寻找带speak的会话
    if(strstr(path, "speak") != NULL)
    {
        ts_speak = (media_session_t *)req->media_session[3];
        if(!ts_speak)
        {
            rtsp_request_reply(req, RTSP_RESP_SESSION_NOT_FOUND, NULL);
            return -1;
        }
    }

    ...

    req->state = REQ_STATE_START;
    // 开始对speak会话进行接收处理
    if(ts_speak) 
    {
        if(session_recv_start(ts_speak) == -1)
        {
            req->state = REQ_STATE_STOP;
            return -1;
        }
    }

    return 0;
}

int session_recv_start(media_session_t *ts)
{
    
    if(!ts->ev_speek)
    {
        sock_set_noblk(ts->sock->rtp_fd);
        ts->ev_speek = event_create(ts->sock->rtp_fd, 0,  on_session_recv,  
                                    NULL,  on_session_error, ts);
        if (-1 == event_add(ts->evbase, ts->ev_speek)) 
        {
            event_destroy(ts->ev_speek);
            ts->ev_speek = NULL;    
            return -1;
        }
    }

    return 0;
}

//socket接收音频流数据
static void on_session_recv(int fd, void *arg)
{

    media_session_t *ts = (media_session_t *)arg;
    int rlen = 0;
    uint8_t buf[2048] = {0};
    
    len = sock_recv2(ts->sock->rtp_fd, buf, sizeof(buf)-1, 0);       
    if(len > 0)
    {
        push_audio(buf, len);
    }
}

void push_audio(void* packet, int bytes)
{
    if(packet == NULL || bytes <= 12 || bytes > AUDIO_BUF_SIZE) //注意rtp头部有12字节
    {
        return;
    }

    // 1. 解rtp包获取到音频数据
    rtp_packet_t pkt = {0};
    if (0 != rtp_packet_deserialize(&pkt, packet, bytes))
    {
        return;
    }

    // 2. 接收音频数据，同时将pcmu转为pcm
    int i = 0;
    int msgid = stream_audio_get_msgid();
    audio_msg_t stMsg = {0};
    stMsg.mtype = RTP_AUDIO_MSGTYPE;
    stMsg.lenth = min(pkt.payloadlen, sizeof(stMsg.buf)); // 跳过rtp头部
    for(i = 0; i < stMsg.lenth; i++)
    {
        stMsg.buf[i] = (short)_mu2lin[*(((uint8_t*)pkt.payload)+i)];
    }

    // 3. 发送数据到播放设备中
    ...
}

浅笑一斤

关注

1
点赞
踩
4

收藏

觉得还不错? 一键收藏
打赏
1
评论
语音对讲的实现(码流客户端与服务器)

使用ffmpeg拉流和推流实现对音频流的获取和发送，使用directsound实现对音频的采集和播放功能，视频播放显示可以使用D3D实现。本文提供客户端和服务器该功能的核心代码，工程涉及的其他功能有进行删除。在RTSP Record中获取到对讲流会话，寻找带speak的会话，对speak会话进行接收处理，接收到rtp数据后，解rtp包获取到音频数据，将pcmu转为pcm，发送到播放设备中。...
复制链接

扫一扫