一 代码
ffmpeg版本5.1.2,dll是:ffmpeg-5.1.2-full_build-shared。x64的。
代码是windows端,用VS编译。
怎么使用这个代码?新建win32工程,复制这三个文件。设置ffmpeg库的include和lib目录。
代码是在系列 05上加的,这个demo可以作为直播的demo。
类似淘宝卖货直播、抖音直播。(我不确定它们是不是使用的rtmp,抓包看走的tcp。可能推流走rtmp,网页拉流走的http-flv。音视频数据有加密。)
/*
author: ashleycoder
CSDN blog: https://blog.csdn.net/chenquangobeijing
*/
#pragma once
extern "C"
{
#include "libswresample/swresample.h"
#include "libavformat/avformat.h"
#include "libavcodec/avcodec.h"
#include "libswscale/swscale.h"
#include "libavdevice/avdevice.h"
#include "libavutil/opt.h"
#include "libavutil/pixdesc.h"
#include "libavutil/time.h"
#include "libavutil/timestamp.h"
#include "libavutil/pixfmt.h"
#include "libavutil/imgutils.h"
#include "libavutil/audio_fifo.h"
#include "libavutil/fifo.h"
};
#include <thread>
#include <mutex>
#pragma comment(lib, "avformat.lib")
#pragma comment(lib, "avutil.lib")
#pragma comment(lib, "avcodec.lib")
#pragma comment(lib, "swscale.lib")
#pragma comment(lib, "avdevice.lib")
#pragma comment(lib, "swresample.lib")
class CVideoAudioLive
{
public:
CVideoAudioLive(void);
~CVideoAudioLive(void);
public:
void Start();
void Ffmpeg_Init();
int RTMP_Init(const char* pOutFileName, const char* pFormat);
int Write_Header();
//audio
int SoundCard_Open();
int Audio_Decode_Encode_Init();
int Audio_Swr_Init();
int Audio_Frame_Init();
void AudioCapture_Thread_Fun();
//video
int Camera_Open();
int Video_Init();
void VideoCapture_Thread_Fun();
//video audio
void Encode_Write_Mutex_Thread();
void Close();
std::string dup_wchar_to_utf8(const wchar_t* wstr);
private:
//output 1个, input 2个
AVFormatContext* m_pOutputFormatCtx = nullptr;
AVFormatContext* m_pInputAudioFormatCtx = nullptr;
AVFormatContext* m_pInputVideoFormatCtx = nullptr;
//音频解码
const AVCodec* m_pAudioDecodeCodec = nullptr;
AVCodecContext* m_pAudioDecodeCodecCtx = nullptr;
//音频编码
const AVCodec* m_pAudioEncodeCodec = nullptr;
AVCodecContext* m_pAudioEncodeCodecCtx = nullptr;
AVStream* m_pAudioStream = nullptr;
int m_nAudioStream = -1;
AVCodecID m_AudioCodecID = AV_CODEC_ID_AAC;
SwrContext* m_pSwr = nullptr;
AVPacket* m_pAudioDecodePacket = nullptr;
//视频
const AVCodec* m_pVideoDecodeCodec = nullptr;
AVCodecContext* m_pVideoDecodeCodecCtx = nullptr;
const AVCodec* m_pVideoEncodeCodec = nullptr;
AVCodecContext* m_pVideoEncodeCodecCtx = nullptr;
AVStream* m_pVideoStream = nullptr;
//YUV格式转换用的
SwsContext* m_pSwsContext = nullptr;
AVCodecID m_VideoCodecID = AV_CODEC_ID_H264;
//audio
private:
//解码用的,
AVFrame* m_pDecodeAudioFrame = nullptr;
//swr_convert, 转成fltp有用到
AVFrame* m_pAudioConvertFrame = nullptr;
uint8_t* m_pOutAudioBuffer[2];
//编码AAC有用到,
AVFrame* m_pAudioEncodeFrame = nullptr;
//音频缓存队列
AVAudioFifo* m_pAudioFifo = nullptr;
std::mutex audio_mutex;
int m_nAudioFramesize = 0;
//video
private:
//视频解码后,sws_scale有用到
AVFrame* m_pFrameScale = nullptr;
//视频编码用
AVFrame* m_pEncodeYUVFrame = nullptr;
//av_read_frame有用到
AVPacket* m_pVideoDecodePacket = nullptr;
//YUV数据缓存队列
AVFifo* m_pVideoFifo = nullptr;
int m_nFrameHeight;
int m_nFrameWidth;
int m_nYUVFrameSize;
int m_nVideoStream = -1;
std::mutex video_mutex;
std::thread m_VideoCaptureThread;
private:
AVSampleFormat audio_input_format = AV_SAMPLE_FMT_S16;
AVSampleFormat audio_encode_format = AV_SAMPLE_FMT_FLTP;
const int AUDIO_SAMPLING_FREQUENCY = 44100;
const uint64_t audio_channel_type = AV_CH_LAYOUT_STEREO;
const int audio_channel_num = 2;
const int frame_size = 1024; //AAC LC 要求1024
std::thread m_AudioCaptureThread;
std::thread m_AudioWriteThread;
private:
FILE* m_pPCM = nullptr; //test use
FILE* m_pPCM2 = nullptr;
int m_naudio_count = 0;
FILE* m_pOutYUV = nullptr;
FILE* m_pH264File = nullptr;
private:
char av_error[AV_ERROR_MAX_STRING_SIZE] = { 0 };
#define av_err2str2(errnum) av_make_error_string(av_error, AV_ERROR_MAX_STRING_SIZE, errnum)
};
/*
author: ashleycoder
CSDN blog: https://blog.csdn.net/chenquangobeijing
*/
#include "VideoAudioLive.h"
#include <functional>
#include <codecvt>
#include <locale>
#include <string>
#include <Windows.h>
CVideoAudioLive::CVideoAudioLive(void)
{
fopen_s(&m_pPCM, "audio_read.pcm", "wb");
fopen_s(&m_pPCM2, "audio_f32le.pcm", "wb");
fopen_s(&m_pH264File, "output.264", "wb");
fopen_s(&m_pOutYUV, "output.yuv", "wb");
}
CVideoAudioLive::~CVideoAudioLive(void)
{
Close();
}
void CVideoAudioLive::Start()
{
Ffmpeg_Init();
const char* pOutFileName = "output.flv";
const char* pFormat = nullptr;
//const char* pOutFileName = "rtmp://127.0.0.1/live/now";
//const char* pFormat = "flv";
RTMP_Init(pOutFileName, pFormat);
Camera_Open(); //先摄像头, 再声卡, 打开摄像头耗时些
SoundCard_Open();
Video_Init();
Audio_Decode_Encode_Init();
Audio_Swr_Init();
Audio_Frame_Init();
Write_Header();
m_VideoCaptureThread = std::thread(std::bind(&CVideoAudioLive::VideoCapture_Thread_Fun, this));
m_AudioCaptureThread = std::thread(std::bind(&CVideoAudioLive::AudioCapture_Thread_Fun, this));
m_AudioWriteThread = std::thread(std::bind(&CVideoAudioLive::Encode_Write_Mutex_Thread, this));
m_AudioWriteThread.join();
}
std::string CVideoAudioLive::dup_wchar_to_utf8(const wchar_t* wstr)
{
std::wstring_convert<std::codecvt_utf8<wchar_t>> converter;
return converter.to_bytes(wstr);
}
int CVideoAudioLive::SoundCard_Open()
{
const AVInputFormat* pInputFormat = av_find_input_format("dshow");
std::string strAudioName = dup_wchar_to_utf8(L"audio=麦克风阵列 (英特尔® 智音技术)");
//采样频率设置成48000,返回-5
AVDictionary* options = NULL;
av_dict_set(&options, "sample_rate", "44100", 0);
av_dict_set(&options, "sample_fmt", "s16", 0);
av_dict_set(&options, "ac", "2", 0);
int nRet = avformat_open_input(&m_pInputAudioFormatCtx, strAudioName.c_str(), pInputFormat, &options);
//返回-5,I/O error
if (nRet != 0)
{
char* err_str = av_err2str2(nRet);
printf("Error: %s\n", err_str);
return -1;
}
return 0;
}
int CVideoAudioLive::Audio_Decode_Encode_Init()
{
int nRet = -1;
if (avformat_find_stream_info(m_pInputAudioFormatCtx, nullptr) < 0)
{
return -1;
}
//这两个函数等价
m_nAudioStream = av_find_best_stream(m_pInputAudioFormatCtx, AVMEDIA_TYPE_AUDIO, -1, -1, nullptr, 0);
printf("m_nAudioStream=%d\n", m_nAudioStream);
/*int i = 0;
for (; i < m_pInputAudioFormatCtx->nb_streams; ++i)
{
if (AVMEDIA_TYPE_AUDIO == m_pInputAudioFormatCtx->streams[i]->codec->codec_type)
{
m_nAudioStream = i;
break;
}
}
if (m_nAudioStream == -1)
{
return FALSE;
}*/
m_pAudioDecodeCodecCtx = avcodec_alloc_context3(nullptr);
nRet = avcodec_parameters_to_context(m_pAudioDecodeCodecCtx, m_pInputAudioFormatCtx->streams[m_nAudioStream]->codecpar);
m_pAudioDecodeCodecCtx->frame_size = frame_size;
//m_pAudioDecodeCodecCtx->codec_id=AV_CODEC_ID_FIRST_AUDIO
m_pAudioDecodeCodec = avcodec_find_decoder(m_pAudioDecodeCodecCtx->codec_id);
if (m_pAudioDecodeCodec == nullptr)
{
printf("Can not find audio decoder! \n");
return -1;
}
nRet = avcodec_open2(m_pAudioDecodeCodecCtx, m_pAudioDecodeCodec, nullptr);
if (nRet < 0)
{
printf("Can not open audio decoder! \n");
return -1;
}
//官方编译的dll,不支持libfdk_aac
//libfdk_aac支持编码s16
//m_pAudioEncodeCodec = avcodec_find_encoder_by_name("libfdk_aac");
m_pAudioEncodeCodec = avcodec_find_encoder(m_AudioCodecID);
if (!m_pAudioEncodeCodec)
{
printf("Can not find audio encoder! \n");
return -1;
}
AVChannelLayout layout = { 0 };
layout.order = AV_CHANNEL_ORDER_NATIVE;
layout.nb_channels = audio_channel_num;
layout.u.mask = (1 << AV_CHAN_FRONT_LEFT) | (1 << AV_CHAN_FRONT_RIGHT);
m_pAudioEncodeCodecCtx = avcodec_alloc_context3(m_pAudioEncodeCodec);
m_pAudioEncodeCodecCtx->codec_type = AVMEDIA_TYPE_AUDIO;
m_pAudioEncodeCodecCtx->sample_fmt = audio_encode_format;
m_pAudioEncodeCodecCtx->sample_rate = AUDIO_SAMPLING_FREQUENCY;
m_pAudioEncodeCodecCtx->ch_layout = layout;
m_pAudioEncodeCodecCtx->ch_layout.nb_channels = audio_channel_num;
m_pAudioEncodeCodecCtx->bit_rate = 128000;
m_pAudioEncodeCodecCtx->profile = FF_PROFILE_AAC_LOW;
nRet = avcodec_open2(m_pAudioEncodeCodecCtx, m_pAudioEncodeCodec, nullptr);
if(nRet<0)
{
printf("open audio encoder fail ! \n");
return -1;
}
m_pAudioStream = avformat_new_stream(m_pOutputFormatCtx, m_pAudioEncodeCodec);
nRet = avcodec_parameters_from_context(m_pAudioStream->codecpar, m_pAudioEncodeCodecCtx);
m_pAudioFifo = av_audio_fifo_alloc(m_pAudioDecodeCodecCtx->sample_fmt, audio_channel_num, 1000 * m_pAudioDecodeCodecCtx->frame_size);
return 0;
}
int CVideoAudioLive::Audio_Swr_Init()
{
//声道、采样频率不变, 格式变了
m_pSwr = swr_alloc();
av_opt_set_int(m_pSwr, "in_channel_layout", audio_channel_type, 0);
av_opt_set_int(m_pSwr, "out_channel_layout", audio_channel_type, 0);
av_opt_set_int(m_pSwr, "in_sample_rate", AUDIO_SAMPLING_FREQUENCY, 0);
av_opt_set_int(m_pSwr, "out_sample_rate", AUDIO_SAMPLING_FREQUENCY, 0);
av_opt_set_sample_fmt(m_pSwr, "in_sample_fmt", audio_input_format, 0);
av_opt_set_sample_fmt(m_pSwr, "out_sample_fmt", audio_encode_format, 0);
swr_init(m_pSwr);
return 0;
}
int CVideoAudioLive::Audio_Frame_Init()
{
int nRet = -1;
//4个属性都需要设置
m_pAudioConvertFrame = av_frame_alloc();
m_pAudioConvertFrame->format = audio_input_format; //s16
m_pAudioConvertFrame->sample_rate = AUDIO_SAMPLING_FREQUENCY;
m_pAudioConvertFrame->nb_samples = m_pAudioEncodeCodecCtx->frame_size;
m_pAudioConvertFrame->ch_layout.nb_channels = audio_channel_type;
nRet = av_frame_get_buffer(m_pAudioConvertFrame, 0); //分配内存
m_pAudioEncodeFrame = av_frame_alloc();
m_pAudioEncodeFrame->sample_rate = AUDIO_SAMPLING_FREQUENCY;
m_pAudioEncodeFrame->nb_samples = m_pAudioEncodeCodecCtx->frame_size;
m_pAudioEncodeFrame->format = m_pAudioEncodeCodecCtx->sample_fmt;//fltp
m_pAudioEncodeFrame->ch_layout.nb_channels = audio_channel_type;
nRet = av_frame_get_buffer(m_pAudioEncodeFrame, 0);
char* err_str = av_err2str2(nRet);
printf("av_frame_get_buffer fail: %s\n", err_str);
//1帧音频大小,2声道的:1024*4*2
m_nAudioFramesize = av_samples_get_buffer_size(nullptr, audio_channel_num, m_pAudioEncodeCodecCtx->frame_size,
m_pAudioEncodeCodecCtx->sample_fmt, 0);
printf("frame_size=%d, size=%d\n", m_pAudioEncodeCodecCtx->frame_size, m_nAudioFramesize);
m_pOutAudioBuffer[0] = (uint8_t*)malloc(m_nAudioFramesize);
m_pOutAudioBuffer[1] = (uint8_t*)malloc(m_nAudioFramesize);
return 1;
}
int CVideoAudioLive::Write_Header()
{
int nRet = avformat_write_header(m_pOutputFormatCtx, nullptr);
if (nRet < 0)
{
char* err_str = av_err2str2(nRet);
printf("avformat_write_header fail: %s\n", err_str);
return -1;
}
return 1;
}
void CVideoAudioLive::Ffmpeg_Init()
{
avdevice_register_all();
avformat_network_init();
av_log_set_level(AV_LOG_FATAL);
}
//摄像头采集出来是mjpg,需要解码,再编码
int CVideoAudioLive::Camera_Open()
{
const AVInputFormat* pInputFormat = av_find_input_format("dshow");
AVDictionary* options = nullptr;
//av_dict_set(&options, "video_size", "640x480", 0);
//av_dict_set(&options, "pixel_format", av_get_pix_fmt_name(AV_PIX_FMT_YUV422P), 0);
m_pInputVideoFormatCtx = avformat_alloc_context();
int nRet = avformat_open_input(&m_pInputVideoFormatCtx, "video=Integrated Webcam", pInputFormat, &options);
if (nRet != 0)
{
char* err_str = av_err2str2(nRet);
printf("Error: %s\n", err_str);
return -1;
}
return 0;
}
int CVideoAudioLive::Video_Init()
{
if(avformat_find_stream_info(m_pInputVideoFormatCtx, nullptr) < 0)
{
return -1;
}
for(int i=0; i< m_pInputVideoFormatCtx->nb_streams; ++i)
{
if(AVMEDIA_TYPE_VIDEO == m_pInputVideoFormatCtx->streams[i]->codecpar->codec_type)
{
m_nVideoStream = i;
break;
}
}
if(m_nVideoStream == -1)
{
return -1;
}
printf("m_nVideoStream =%d\n", m_nVideoStream);
m_pVideoDecodeCodecCtx = avcodec_alloc_context3(nullptr);
avcodec_parameters_to_context(m_pVideoDecodeCodecCtx, m_pInputVideoFormatCtx->streams[m_nVideoStream]->codecpar);
m_pVideoDecodeCodec = avcodec_find_decoder(m_pVideoDecodeCodecCtx->codec_id);
if(m_pVideoDecodeCodec == nullptr)
{
printf("video find decoder fail\n");
return -1;
}
int nRet = -1;
nRet = avcodec_open2(m_pVideoDecodeCodecCtx, m_pVideoDecodeCodec, nullptr);
if(nRet < 0)
{
printf("video open decoder fail: %d\n", nRet);
return -1;
}
m_nFrameWidth = m_pVideoDecodeCodecCtx->width;
m_nFrameHeight = m_pVideoDecodeCodecCtx->height;
//m_pVideoDecodeCodecCtx->pix_fmt = AV_PIX_FMT_YUVJ422P
printf("w=%d h=%d\n", m_pVideoDecodeCodecCtx->width, m_pVideoDecodeCodecCtx->height);
printf("pix_fmt=%d\n", m_pVideoDecodeCodecCtx->pix_fmt);
//m_pVideoDecodeCodecCtx->pix_fmt=J422P
m_pSwsContext = sws_getContext(m_nFrameWidth, m_nFrameHeight,
m_pVideoDecodeCodecCtx->pix_fmt, m_nFrameWidth, m_nFrameHeight,
AV_PIX_FMT_YUV420P, SWS_FAST_BILINEAR, nullptr, nullptr, nullptr);
//编码
m_pVideoEncodeCodec = avcodec_find_encoder(m_VideoCodecID);
if (!m_pVideoEncodeCodec)
{
printf("Can not find encoder! \n");
return -1;
}
m_pVideoEncodeCodecCtx = avcodec_alloc_context3(m_pVideoEncodeCodec);
m_pVideoEncodeCodecCtx->pix_fmt = AV_PIX_FMT_YUV420P; //编码前的格式
m_pVideoEncodeCodecCtx->width = m_pVideoDecodeCodecCtx->width;
m_pVideoEncodeCodecCtx->height =m_pVideoDecodeCodecCtx->height;
m_pVideoEncodeCodecCtx->time_base.num = 1;
m_pVideoEncodeCodecCtx->time_base.den = 25;
m_pVideoEncodeCodecCtx->bit_rate = 4000000;
m_pVideoEncodeCodecCtx->gop_size = 25;
m_pVideoEncodeCodecCtx->qmin = 10;
m_pVideoEncodeCodecCtx->qmax = 51;
m_pVideoEncodeCodecCtx->max_b_frames = 0;
if (m_pOutputFormatCtx->oformat->flags & AVFMT_GLOBALHEADER)
{
m_pVideoEncodeCodecCtx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
}
AVDictionary *param = 0;
av_dict_set(¶m, "preset", "fast", 0);
av_dict_set(¶m, "tune", "zerolatency", 0);
//av_dict_set(¶m, "profile", "high", 0);
if (avcodec_open2(m_pVideoEncodeCodecCtx, m_pVideoEncodeCodec, ¶m) < 0)
{
printf("avcodec_open2 fail \n");
return -1;
}
m_pVideoStream = avformat_new_stream(m_pOutputFormatCtx, m_pVideoEncodeCodec);
if (m_pVideoStream == nullptr)
{
return -1;
}
m_pVideoStream->time_base = m_pVideoEncodeCodecCtx->time_base;
m_pVideoStream->id = m_pOutputFormatCtx->nb_streams - 1;
m_pVideoStream->codecpar->format = AV_PIX_FMT_YUV420P;
nRet = avcodec_parameters_from_context(m_pVideoStream->codecpar, m_pVideoEncodeCodecCtx);
m_pFrameScale = av_frame_alloc();
m_pEncodeYUVFrame = av_frame_alloc();
m_pEncodeYUVFrame->format = m_pVideoEncodeCodecCtx->pix_fmt;
m_pEncodeYUVFrame->width = m_pVideoDecodeCodecCtx->width;
m_pEncodeYUVFrame->height = m_pVideoDecodeCodecCtx->height;
printf("m_pEncodeYUVFrame pix_fmt=%d\n", m_pVideoDecodeCodecCtx->pix_fmt);
av_frame_get_buffer(m_pEncodeYUVFrame, 32);
m_nYUVFrameSize = av_image_get_buffer_size(AV_PIX_FMT_YUV420P, m_nFrameWidth, m_nFrameHeight, 1);
m_pVideoFifo = av_fifo_alloc2(30*m_nYUVFrameSize, 1, AV_FIFO_FLAG_AUTO_GROW);
return 0;
}
int CVideoAudioLive::RTMP_Init(const char* pOutFileName, const char* pFormat)
{
avformat_alloc_output_context2(&m_pOutputFormatCtx, nullptr, pFormat, pOutFileName);
int nRet = -1;
if(!(m_pOutputFormatCtx->oformat->flags & AVFMT_NOFILE))
{
nRet = avio_open(&m_pOutputFormatCtx->pb, pOutFileName, AVIO_FLAG_WRITE);
if (nRet < 0)
{
char* err_str = av_err2str2(nRet);
printf("avio_open fail\n");
return -1;
}
}
return 0;
}
void CVideoAudioLive::VideoCapture_Thread_Fun()
{
int nRet = -1;
int nvideo_count = 0;
m_pVideoDecodePacket = av_packet_alloc();
int y_size = m_nFrameHeight * m_nFrameWidth;
while (true)
{
//m_pVideoDecodePacket的数据是mjpg的,带头的
nRet = av_read_frame(m_pInputVideoFormatCtx, m_pVideoDecodePacket);
if (nRet == AVERROR(EAGAIN) || nRet == AVERROR_EOF) {
av_packet_unref(m_pVideoDecodePacket);
printf("read_frame break");
break;
}
if (m_pVideoDecodePacket->stream_index == m_nVideoStream)
{
++nvideo_count;
//printf("enter read video pkt.size:%d, nvideo_count=%d\r\n", m_pVideoDecodePacket->size, nvideo_count);
int video_send_pkt_ret = avcodec_send_packet(m_pVideoDecodeCodecCtx, m_pVideoDecodePacket);
//printf("encode video send_packet_ret %d\n", video_send_pkt_ret);
int video_receive_frame_ret = avcodec_receive_frame(m_pVideoDecodeCodecCtx, m_pFrameScale);
char* err_str = av_err2str2(video_receive_frame_ret);
if (video_receive_frame_ret == 0)
{
int iReturn = sws_scale(m_pSwsContext, m_pFrameScale->data,
m_pFrameScale->linesize, 0, m_nFrameHeight,
m_pEncodeYUVFrame->data, m_pEncodeYUVFrame->linesize);
//printf("enter av_fifo_can_write:%d\r\n", av_fifo_can_write(m_pVideoFifo));
//返回值是字节数
if (av_fifo_can_write(m_pVideoFifo) >= m_nYUVFrameSize)
{
video_mutex.lock();
av_fifo_write(m_pVideoFifo, m_pEncodeYUVFrame->data[0], y_size);
av_fifo_write(m_pVideoFifo, m_pEncodeYUVFrame->data[1], y_size/4);
av_fifo_write(m_pVideoFifo, m_pEncodeYUVFrame->data[2], y_size/4);
video_mutex.unlock();
}
}
}
av_packet_unref(m_pVideoDecodePacket);
}
}
void CVideoAudioLive::AudioCapture_Thread_Fun()
{
int nRet = -1;
int i = 0, count = 0;
m_pAudioDecodePacket = av_packet_alloc();
while (true)
{
nRet = av_read_frame(m_pInputAudioFormatCtx, m_pAudioDecodePacket);
if (nRet == AVERROR(EAGAIN) || nRet == AVERROR_EOF) {
av_packet_unref(m_pAudioDecodePacket);
printf("read_frame break");
break;
}
//m_pAudioDecodePacket->size=88200=44100*2=1秒的音频数据量
if (m_pAudioDecodePacket->stream_index == m_nAudioStream)
{
++m_naudio_count;
//printf("enter push audio count=%d\r\n", m_naudio_count);
//fwrite(m_pAudioDecodePacket->data, 1, m_pAudioDecodePacket->size, m_pPCM);
m_pDecodeAudioFrame = av_frame_alloc();
//pFrameTemp->format = audio_input_format;
//pFrameTemp->sample_rate = AUDIO_SAMPLING_FREQUENCY;
//pFrameTemp->channel_layout = audio_channel_type;// AV_CH_LAYOUT_MONO;
//pFrameTemp->ch_layout.nb_channels = audio_channel_num;
//解码,
//m_pDecodeAudioFrame
int audio_send_pkt_ret = avcodec_send_packet(m_pAudioDecodeCodecCtx, m_pAudioDecodePacket);
int audio_receive_frame_ret = avcodec_receive_frame(m_pAudioDecodeCodecCtx, m_pDecodeAudioFrame);
if (audio_receive_frame_ret < 0)
continue;
//m_pDecodeAudioFrame->data[0]地址有值,data[1]==0x0
//m_pDecodeAudioFrame->nb_samples = 22050, time_base=0/1
//解码后的数据,也可以用系统音频库函数替代
int space = av_audio_fifo_space(m_pAudioFifo);
//printf("space=%d, nb_samples=%d\r\n", space, pFrameTemp->nb_samples);
if (av_audio_fifo_space(m_pAudioFifo) > m_pDecodeAudioFrame->nb_samples * 2)
{
audio_mutex.lock();
int write_len = av_audio_fifo_write(m_pAudioFifo, (void**)&(m_pDecodeAudioFrame->data[0]), m_pDecodeAudioFrame->nb_samples);//?
audio_mutex.unlock();
}
av_frame_free(&m_pDecodeAudioFrame);
}
av_packet_unref(m_pAudioDecodePacket);
}
}
//比较时间戳,音视频同步,推流端的音视频同步
//推流端的音视频同步:1 声卡、摄像头线程采集数据的时间差,
//2 音频、视频编码速度,视频编码耗时些
void CVideoAudioLive::Encode_Write_Mutex_Thread()
{
uint8_t* pdata = new uint8_t[m_nYUVFrameSize];
int i = 0;
int64_t current_pts_video = 0, current_pts_audio = 0;
//int video_framesize = av_image_get_buffer_size(AV_PIX_FMT_YUV420P, m_nFrameWidth, m_nFrameHeight, 0);
int64_t pts = 0;
int nAudioFrameCount = 0;
while (true)
{
//=-1, video before audio,
//注意:current_pts_video取帧的pts,而不是编码后Packet的pts
int nRet = av_compare_ts(current_pts_video, m_pVideoEncodeCodecCtx->time_base,
current_pts_audio, m_pAudioEncodeCodecCtx->time_base);
if (nRet == -1)
{
int ttt = av_fifo_can_read(m_pVideoFifo);
//printf("enter video av_fifo_can_read:%d\r\n", ttt);
if (av_fifo_can_read(m_pVideoFifo) >= m_nYUVFrameSize)
{
video_mutex.lock();
av_fifo_read(m_pVideoFifo, pdata, m_nYUVFrameSize);
video_mutex.unlock();
//把pdata数据拷贝到m_pEncodeYUVFrame->data中
av_image_fill_arrays(m_pEncodeYUVFrame->data, m_pEncodeYUVFrame->linesize, pdata,
AV_PIX_FMT_YUV420P, m_nFrameWidth, m_nFrameHeight, 1);
//fwrite(m_pEncodeYUVFrame->data[0], 1, m_nFrameHeight*m_nFrameWidth, m_pOutYUV);
//fwrite(m_pEncodeYUVFrame->data[1], 1, m_nFrameHeight*m_nFrameWidth/4, m_pOutYUV);
//fwrite(m_pEncodeYUVFrame->data[2], 1, m_nFrameHeight*m_nFrameWidth/4, m_pOutYUV);
AVPacket* video_pkt = av_packet_alloc();
m_pEncodeYUVFrame->pts = pts++;
current_pts_video = m_pEncodeYUVFrame->pts;//new
int video_send_frame_ret = avcodec_send_frame(m_pVideoEncodeCodecCtx, m_pEncodeYUVFrame);
//printf("encode video send_frame %d\n", video_send_frame_ret);
if (video_send_frame_ret >= 0) {
int video_receive_packet_ret = avcodec_receive_packet(m_pVideoEncodeCodecCtx, video_pkt);
//char* err_str = av_err2str2(video_receive_packet_ret);
if (video_receive_packet_ret == AVERROR(EAGAIN) || video_receive_packet_ret == AVERROR_EOF) {
//break;
}
else if (video_receive_packet_ret < 0) {
//break;
}
if (video_pkt->size > 0)
{
++i;
video_pkt->stream_index = m_pVideoStream->index;
av_packet_rescale_ts(video_pkt, m_pVideoEncodeCodecCtx->time_base, m_pVideoStream->time_base);
printf("video_pkt->stream_index:%d, pts=%d, count=%d\r\n", video_pkt->stream_index, video_pkt->pts, pts);
//fwrite(video_pkt->data, 1, video_pkt->size, m_pH264File);
int video_write_ret = av_interleaved_write_frame(m_pOutputFormatCtx, video_pkt);
//char* err_str = av_err2str2(video_write_ret);
//printf("video write_ret:%d\r\n", video_write_ret);
av_packet_unref(video_pkt);
}
}
}
}
else
{
int yyy = av_audio_fifo_size(m_pAudioFifo);
if (av_audio_fifo_size(m_pAudioFifo) >= m_pAudioStream->codecpar->frame_size)
{
audio_mutex.lock();
int read_size = av_audio_fifo_read(m_pAudioFifo, (void**)m_pAudioConvertFrame->data, m_pAudioConvertFrame->nb_samples);
audio_mutex.unlock();
int count = swr_convert(m_pSwr, m_pOutAudioBuffer, m_pAudioConvertFrame->nb_samples,
(const uint8_t**)&(m_pAudioConvertFrame->data), m_pAudioConvertFrame->nb_samples);
m_pAudioEncodeFrame->data[0] = m_pOutAudioBuffer[0];
m_pAudioEncodeFrame->data[1] = m_pOutAudioBuffer[1];
//pts递增nb_samples
m_pAudioEncodeFrame->pts = nAudioFrameCount * m_pAudioEncodeFrame->nb_samples;
++nAudioFrameCount;
current_pts_audio = m_pAudioEncodeFrame->pts;
AVPacket* audio_pkt = av_packet_alloc();
int audio_send_frame_ret = avcodec_send_frame(m_pAudioEncodeCodecCtx, m_pAudioEncodeFrame);
if (audio_send_frame_ret == 0) {
int audio_receive_pkt_ret = avcodec_receive_packet(m_pAudioEncodeCodecCtx, audio_pkt);
char* err_str = av_err2str2(audio_receive_pkt_ret);
//printf("audio receive_packet_ret:%d, i=%d, pkt.size=%d\r\n", audio_receive_pkt_ret, i++, audio_pkt->size);
if (audio_pkt->size > 0) {
++i;
audio_pkt->stream_index = m_pAudioStream->index;
av_packet_rescale_ts(audio_pkt, m_pAudioEncodeCodecCtx->time_base, m_pAudioStream->time_base);
printf("audio_pkt->stream_index:%d, pts=%d, count=%d\r\n", audio_pkt->stream_index, audio_pkt->pts, nAudioFrameCount);
int write_ret = av_interleaved_write_frame(m_pOutputFormatCtx, audio_pkt);
}
av_packet_unref(audio_pkt);
}
}
}
}
delete [] pdata;
av_write_trailer(m_pOutputFormatCtx);
}
void CVideoAudioLive::Close()
{
av_write_trailer(m_pOutputFormatCtx);
avformat_free_context(m_pInputAudioFormatCtx);
avformat_free_context(m_pInputVideoFormatCtx);
avformat_free_context(m_pOutputFormatCtx);
avcodec_close(m_pAudioDecodeCodecCtx);
avcodec_close(m_pAudioEncodeCodecCtx);
avcodec_close(m_pVideoDecodeCodecCtx);
avcodec_close(m_pVideoEncodeCodecCtx);
av_frame_free(&m_pDecodeAudioFrame);
av_frame_free(&m_pAudioEncodeFrame);
av_frame_free(&m_pAudioConvertFrame);
av_frame_free(&m_pFrameScale);
av_frame_free(&m_pEncodeYUVFrame);
av_packet_free(&m_pAudioDecodePacket);
av_packet_free(&m_pVideoDecodePacket);
free(m_pOutAudioBuffer[0]);
free(m_pOutAudioBuffer[1]);
}
#include <iostream>
#include <Windows.h>
#include "4__VideoAudioLive/VideoAudioLive.h"
int main()
{
CVideoAudioLive* m_pVideoAudioLive = new CVideoAudioLive();
m_pVideoAudioLive->Start();
return 0;
}
二 注意的问题
1 摄像头为什么要解码?
答:mjpg格式,同分辨率的YUV422P,帧率低。mjpg带头,压缩的格式。
2 YUV缓存队列,AVFifo的使用
AVFifo* m_pVideoFifo = nullptr;
3 推流端的音视频同步:
正确的音视频同步大概是:1个视频,2个音频。因为一般视频是25帧,音频有50帧。
如果视频是1秒25帧, 帧间隔是40ms。
音频采样频率是44100,44100/1024==43帧, 1000/43=23.256ms。
//错误的写法
//结果:十几帧视频,才1帧音频
int nRet = av_compare_ts(current_pts_video, m_pVideoEncodeCodecCtx->time_base,
current_pts_audio, m_pAudioEncodeCodecCtx->time_base);
//printf("nRet:%d\r\n", nRet);
if (nRet == -1)
{
avcodec_receive_packet(m_pVideoEncodeCodecCtx, video_pkt);
current_pts_video = video_pkt->pts;
}
else
{
avcodec_receive_packet(m_pAudioEncodeCodecCtx, audio_pkt);
current_pts_audio = audio_pkt->pts;
}
4 Frame的pts:调用av_packet_rescale_ts,ffmpeg已内部计算好。
雷神博客中,自己通过time_base计算,不需要这样做。可能是他使用的ffmpeg版本太低了。
5 我曾经犯过的错误
我自己计算pts和dts。音频:1024/48000=21.33333ms。结果我取了21.3。
导致的问题:音视频一开始是同步的,过了十几分钟后,音频落后了。
因为:0.03ms*100000帧=3秒。