[ffmpeg系列 06] 摄像头直播

ashleycoder

已于 2024-02-26 20:04:51 修改

阅读量359

点赞数 7

分类专栏： ffmpeg 文章标签： ffmpeg

于 2024-01-17 21:05:13 首次发布

本文链接：https://blog.csdn.net/chenquangobeijing/article/details/135660290

版权

ffmpeg 专栏收录该内容

11 篇文章 6 订阅

订阅专栏

一代码

ffmpeg版本5.1.2，dll是：ffmpeg-5.1.2-full_build-shared。x64的。

代码是windows端，用VS编译。

怎么使用这个代码？新建win32工程，复制这三个文件。设置ffmpeg库的include和lib目录。

代码是在系列 05上加的，这个demo可以作为直播的demo。

类似淘宝卖货直播、抖音直播。(我不确定它们是不是使用的rtmp，抓包看走的tcp。可能推流走rtmp，网页拉流走的http-flv。音视频数据有加密。)


/*
author: ashleycoder
CSDN blog: https://blog.csdn.net/chenquangobeijing
*/

#pragma once

extern  "C"
{
#include "libswresample/swresample.h"
#include "libavformat/avformat.h"
#include "libavcodec/avcodec.h"
#include "libswscale/swscale.h"
#include "libavdevice/avdevice.h"
#include "libavutil/opt.h"
#include "libavutil/pixdesc.h"
#include "libavutil/time.h"
#include "libavutil/timestamp.h"
#include "libavutil/pixfmt.h"
#include "libavutil/imgutils.h"
#include "libavutil/audio_fifo.h"
#include "libavutil/fifo.h"	
};

#include <thread>
#include <mutex>

#pragma  comment(lib, "avformat.lib")
#pragma  comment(lib, "avutil.lib")
#pragma  comment(lib, "avcodec.lib")
#pragma  comment(lib, "swscale.lib")
#pragma  comment(lib, "avdevice.lib")
#pragma  comment(lib, "swresample.lib")



class CVideoAudioLive
{
public:
	CVideoAudioLive(void);
	~CVideoAudioLive(void);

public:
	void   Start();
	void   Ffmpeg_Init();
	int    RTMP_Init(const char* pOutFileName, const char* pFormat);
	int    Write_Header();

	//audio
	int    SoundCard_Open();
	int    Audio_Decode_Encode_Init();
	int    Audio_Swr_Init();
	int    Audio_Frame_Init();
	void   AudioCapture_Thread_Fun();

	//video
	int   Camera_Open();
	int   Video_Init();
	void  VideoCapture_Thread_Fun();

	//video audio
	void  Encode_Write_Mutex_Thread();

	void   Close();
	std::string  dup_wchar_to_utf8(const wchar_t* wstr);
private:
	//output 1个, input 2个
	AVFormatContext*   m_pOutputFormatCtx = nullptr;
	AVFormatContext*   m_pInputAudioFormatCtx = nullptr;
	AVFormatContext*   m_pInputVideoFormatCtx = nullptr;

	//音频解码
	const AVCodec*     m_pAudioDecodeCodec = nullptr;
	AVCodecContext*    m_pAudioDecodeCodecCtx = nullptr;

	//音频编码
	const AVCodec*      m_pAudioEncodeCodec = nullptr;
	AVCodecContext*     m_pAudioEncodeCodecCtx = nullptr;
	AVStream*           m_pAudioStream = nullptr;

	int                 m_nAudioStream = -1;
	AVCodecID           m_AudioCodecID = AV_CODEC_ID_AAC;


	SwrContext*         m_pSwr = nullptr;
	AVPacket*           m_pAudioDecodePacket = nullptr;


	//视频
	const AVCodec*    m_pVideoDecodeCodec = nullptr;
	AVCodecContext*   m_pVideoDecodeCodecCtx = nullptr;

	const AVCodec*    m_pVideoEncodeCodec = nullptr;
	AVCodecContext*   m_pVideoEncodeCodecCtx = nullptr;
	AVStream*         m_pVideoStream = nullptr;

	//YUV格式转换用的
	SwsContext*       m_pSwsContext = nullptr;
	AVCodecID         m_VideoCodecID = AV_CODEC_ID_H264;

//audio
private:	
	//解码用的,
	AVFrame* m_pDecodeAudioFrame = nullptr;

	//swr_convert, 转成fltp有用到
	AVFrame* m_pAudioConvertFrame = nullptr;
	uint8_t* m_pOutAudioBuffer[2];

	//编码AAC有用到, 
	AVFrame* m_pAudioEncodeFrame = nullptr;

	//音频缓存队列
	AVAudioFifo*      m_pAudioFifo = nullptr;
	std::mutex        audio_mutex;
	int               m_nAudioFramesize = 0;

//video
private:
	//视频解码后，sws_scale有用到
	AVFrame*          m_pFrameScale = nullptr;

	//视频编码用
	AVFrame*          m_pEncodeYUVFrame = nullptr;

	//av_read_frame有用到
	AVPacket*         m_pVideoDecodePacket = nullptr;

	//YUV数据缓存队列
	AVFifo*           m_pVideoFifo = nullptr;

	int                m_nFrameHeight;
	int                m_nFrameWidth;	
	int                m_nYUVFrameSize;
	int                m_nVideoStream = -1;

	std::mutex         video_mutex;
	std::thread        m_VideoCaptureThread;


private:
	AVSampleFormat    audio_input_format = AV_SAMPLE_FMT_S16;
	AVSampleFormat    audio_encode_format = AV_SAMPLE_FMT_FLTP;
	const  int        AUDIO_SAMPLING_FREQUENCY = 44100;
	const  uint64_t   audio_channel_type = AV_CH_LAYOUT_STEREO;
	const  int        audio_channel_num = 2;
	const  int        frame_size = 1024; //AAC LC 要求1024

	std::thread       m_AudioCaptureThread;
	std::thread       m_AudioWriteThread;
private:
	FILE*             m_pPCM = nullptr; //test use
	FILE*             m_pPCM2 = nullptr;
	int               m_naudio_count = 0;

	FILE*             m_pOutYUV = nullptr;
	FILE*             m_pH264File = nullptr;


private:	
	char av_error[AV_ERROR_MAX_STRING_SIZE] = { 0 };
#define av_err2str2(errnum) av_make_error_string(av_error, AV_ERROR_MAX_STRING_SIZE, errnum)
};


/*
author: ashleycoder
CSDN blog: https://blog.csdn.net/chenquangobeijing
*/

#include "VideoAudioLive.h"
#include <functional>
#include <codecvt>
#include <locale>
#include <string>
#include <Windows.h>


CVideoAudioLive::CVideoAudioLive(void)
{
	fopen_s(&m_pPCM, "audio_read.pcm", "wb");
	fopen_s(&m_pPCM2, "audio_f32le.pcm", "wb");

	fopen_s(&m_pH264File, "output.264", "wb");
	fopen_s(&m_pOutYUV, "output.yuv", "wb");	
}

CVideoAudioLive::~CVideoAudioLive(void)
{
	Close();
}


void CVideoAudioLive::Start()
{
	Ffmpeg_Init();

	const char* pOutFileName = "output.flv";
	const char* pFormat = nullptr;

	//const char* pOutFileName = "rtmp://127.0.0.1/live/now";
	//const char* pFormat = "flv";	
	RTMP_Init(pOutFileName, pFormat);
	
	Camera_Open(); //先摄像头, 再声卡, 打开摄像头耗时些

	SoundCard_Open();

	Video_Init();

	Audio_Decode_Encode_Init();

	Audio_Swr_Init();

	Audio_Frame_Init();

	Write_Header();


	m_VideoCaptureThread = std::thread(std::bind(&CVideoAudioLive::VideoCapture_Thread_Fun, this));

	m_AudioCaptureThread = std::thread(std::bind(&CVideoAudioLive::AudioCapture_Thread_Fun, this));
	
	m_AudioWriteThread = std::thread(std::bind(&CVideoAudioLive::Encode_Write_Mutex_Thread, this));
	m_AudioWriteThread.join();

}


std::string CVideoAudioLive::dup_wchar_to_utf8(const wchar_t* wstr)
{
	std::wstring_convert<std::codecvt_utf8<wchar_t>> converter;
	return converter.to_bytes(wstr);
}


int   CVideoAudioLive::SoundCard_Open()
{
	const AVInputFormat* pInputFormat = av_find_input_format("dshow");
	std::string strAudioName = dup_wchar_to_utf8(L"audio=麦克风阵列 (英特尔® 智音技术)");



	//采样频率设置成48000，返回-5
	AVDictionary* options = NULL;
	av_dict_set(&options, "sample_rate", "44100", 0);
	av_dict_set(&options, "sample_fmt", "s16", 0);
	av_dict_set(&options, "ac", "2", 0);
	int  nRet = avformat_open_input(&m_pInputAudioFormatCtx, strAudioName.c_str(), pInputFormat, &options);
	//返回-5，I/O error
	if (nRet != 0)
	{
		char* err_str = av_err2str2(nRet);
		printf("Error: %s\n", err_str);
		return  -1;
	}

	return 0;
}

int   CVideoAudioLive::Audio_Decode_Encode_Init()
{
	int nRet = -1;
	if (avformat_find_stream_info(m_pInputAudioFormatCtx, nullptr) < 0)
	{
		return  -1;
	}

	//这两个函数等价
	m_nAudioStream = av_find_best_stream(m_pInputAudioFormatCtx, AVMEDIA_TYPE_AUDIO, -1, -1, nullptr, 0);
	printf("m_nAudioStream=%d\n", m_nAudioStream);
	/*int i = 0;
	for (; i < m_pInputAudioFormatCtx->nb_streams; ++i)
	{
		if (AVMEDIA_TYPE_AUDIO == m_pInputAudioFormatCtx->streams[i]->codec->codec_type)
		{
			m_nAudioStream = i;
			break;
		}
	}
	if (m_nAudioStream == -1)
	{
		return  FALSE;
	}*/


	m_pAudioDecodeCodecCtx = avcodec_alloc_context3(nullptr);
	nRet = avcodec_parameters_to_context(m_pAudioDecodeCodecCtx, m_pInputAudioFormatCtx->streams[m_nAudioStream]->codecpar);
	m_pAudioDecodeCodecCtx->frame_size = frame_size;

	//m_pAudioDecodeCodecCtx->codec_id=AV_CODEC_ID_FIRST_AUDIO 
	m_pAudioDecodeCodec = avcodec_find_decoder(m_pAudioDecodeCodecCtx->codec_id);
	if (m_pAudioDecodeCodec == nullptr)
	{
		printf("Can not find audio decoder! \n");
		return  -1;
	}

	nRet = avcodec_open2(m_pAudioDecodeCodecCtx, m_pAudioDecodeCodec, nullptr);
	if (nRet < 0)
	{
		printf("Can not open audio decoder! \n");
		return  -1;
	}	  
	
	//官方编译的dll,不支持libfdk_aac
	//libfdk_aac支持编码s16 
	//m_pAudioEncodeCodec = avcodec_find_encoder_by_name("libfdk_aac");
	m_pAudioEncodeCodec = avcodec_find_encoder(m_AudioCodecID);
	if (!m_pAudioEncodeCodec)
	{
		printf("Can not find audio encoder! \n");
		return -1;
	}
	
	

	AVChannelLayout layout = { 0 };
	layout.order = AV_CHANNEL_ORDER_NATIVE;
	layout.nb_channels = audio_channel_num;
	layout.u.mask = (1 << AV_CHAN_FRONT_LEFT) | (1 << AV_CHAN_FRONT_RIGHT);

	
	m_pAudioEncodeCodecCtx = avcodec_alloc_context3(m_pAudioEncodeCodec);
	m_pAudioEncodeCodecCtx->codec_type = AVMEDIA_TYPE_AUDIO;
	m_pAudioEncodeCodecCtx->sample_fmt = audio_encode_format;
	m_pAudioEncodeCodecCtx->sample_rate = AUDIO_SAMPLING_FREQUENCY;
	m_pAudioEncodeCodecCtx->ch_layout = layout;
	m_pAudioEncodeCodecCtx->ch_layout.nb_channels = audio_channel_num;
	m_pAudioEncodeCodecCtx->bit_rate = 128000;
	m_pAudioEncodeCodecCtx->profile = FF_PROFILE_AAC_LOW;

    
    
	nRet = avcodec_open2(m_pAudioEncodeCodecCtx, m_pAudioEncodeCodec, nullptr);
	if(nRet<0)
	{
        printf("open audio encoder fail ! \n");  
        return  -1;
	}

	
	m_pAudioStream = avformat_new_stream(m_pOutputFormatCtx, m_pAudioEncodeCodec);
	nRet = avcodec_parameters_from_context(m_pAudioStream->codecpar, m_pAudioEncodeCodecCtx);
	

	m_pAudioFifo = av_audio_fifo_alloc(m_pAudioDecodeCodecCtx->sample_fmt, audio_channel_num, 1000 * m_pAudioDecodeCodecCtx->frame_size);
	 return   0;
}

int   CVideoAudioLive::Audio_Swr_Init()
{
	//声道、采样频率不变, 格式变了 
	m_pSwr = swr_alloc();
	av_opt_set_int(m_pSwr, "in_channel_layout", audio_channel_type, 0);
	av_opt_set_int(m_pSwr, "out_channel_layout", audio_channel_type, 0);
	av_opt_set_int(m_pSwr, "in_sample_rate", AUDIO_SAMPLING_FREQUENCY, 0);
	av_opt_set_int(m_pSwr, "out_sample_rate", AUDIO_SAMPLING_FREQUENCY, 0);
	av_opt_set_sample_fmt(m_pSwr, "in_sample_fmt", audio_input_format, 0);
	av_opt_set_sample_fmt(m_pSwr, "out_sample_fmt", audio_encode_format, 0);
	swr_init(m_pSwr);

	return 0;
}

int   CVideoAudioLive::Audio_Frame_Init()
{
	int nRet = -1;
	//4个属性都需要设置
	m_pAudioConvertFrame = av_frame_alloc();
	m_pAudioConvertFrame->format = audio_input_format; //s16
	m_pAudioConvertFrame->sample_rate = AUDIO_SAMPLING_FREQUENCY;
	m_pAudioConvertFrame->nb_samples = m_pAudioEncodeCodecCtx->frame_size;
	m_pAudioConvertFrame->ch_layout.nb_channels = audio_channel_type;
	nRet = av_frame_get_buffer(m_pAudioConvertFrame, 0); //分配内存


	m_pAudioEncodeFrame = av_frame_alloc();
	m_pAudioEncodeFrame->sample_rate = AUDIO_SAMPLING_FREQUENCY;
	m_pAudioEncodeFrame->nb_samples = m_pAudioEncodeCodecCtx->frame_size;
	m_pAudioEncodeFrame->format = m_pAudioEncodeCodecCtx->sample_fmt;//fltp
	m_pAudioEncodeFrame->ch_layout.nb_channels = audio_channel_type;
	nRet = av_frame_get_buffer(m_pAudioEncodeFrame, 0);
	char* err_str = av_err2str2(nRet);
	printf("av_frame_get_buffer fail: %s\n", err_str);

	//1帧音频大小，2声道的：1024*4*2
	m_nAudioFramesize = av_samples_get_buffer_size(nullptr, audio_channel_num, m_pAudioEncodeCodecCtx->frame_size,
		m_pAudioEncodeCodecCtx->sample_fmt, 0);
	printf("frame_size=%d, size=%d\n", m_pAudioEncodeCodecCtx->frame_size, m_nAudioFramesize);
	m_pOutAudioBuffer[0] = (uint8_t*)malloc(m_nAudioFramesize);
	m_pOutAudioBuffer[1] = (uint8_t*)malloc(m_nAudioFramesize);


	return  1;
}


int   CVideoAudioLive::Write_Header()
{
	int nRet = avformat_write_header(m_pOutputFormatCtx, nullptr);
	if (nRet < 0)
	{
		char* err_str = av_err2str2(nRet);
		printf("avformat_write_header fail: %s\n", err_str);
		return  -1;
	}

	return  1;
}


void   CVideoAudioLive::Ffmpeg_Init()
{
	avdevice_register_all();
	avformat_network_init();

	av_log_set_level(AV_LOG_FATAL);
}

//摄像头采集出来是mjpg，需要解码，再编码
int   CVideoAudioLive::Camera_Open()
{	
	const AVInputFormat* pInputFormat = av_find_input_format("dshow");
	AVDictionary* options = nullptr;
	//av_dict_set(&options, "video_size", "640x480", 0);
	//av_dict_set(&options, "pixel_format", av_get_pix_fmt_name(AV_PIX_FMT_YUV422P), 0);		
	m_pInputVideoFormatCtx = avformat_alloc_context();
	int  nRet = avformat_open_input(&m_pInputVideoFormatCtx, "video=Integrated Webcam", pInputFormat, &options);
	if (nRet != 0)
	{
		char* err_str = av_err2str2(nRet);
		printf("Error: %s\n", err_str);
		return  -1;
	}

	return 0;
}


int   CVideoAudioLive::Video_Init()
{
	if(avformat_find_stream_info(m_pInputVideoFormatCtx, nullptr) < 0)
	{
		return  -1;
	}


	for(int i=0; i< m_pInputVideoFormatCtx->nb_streams; ++i)
	{
		if(AVMEDIA_TYPE_VIDEO == m_pInputVideoFormatCtx->streams[i]->codecpar->codec_type)
		{
			m_nVideoStream = i;
			break;
		}
	}
	if(m_nVideoStream == -1)
	{
		return  -1;
	}
	printf("m_nVideoStream =%d\n", m_nVideoStream);


	m_pVideoDecodeCodecCtx = avcodec_alloc_context3(nullptr);
	avcodec_parameters_to_context(m_pVideoDecodeCodecCtx, m_pInputVideoFormatCtx->streams[m_nVideoStream]->codecpar);


	m_pVideoDecodeCodec = avcodec_find_decoder(m_pVideoDecodeCodecCtx->codec_id);
	if(m_pVideoDecodeCodec == nullptr)
	{
		printf("video find decoder fail\n");
		return  -1;
	}

	int nRet = -1;
	nRet = avcodec_open2(m_pVideoDecodeCodecCtx, m_pVideoDecodeCodec, nullptr);
	if(nRet < 0)
	{
		printf("video open decoder fail: %d\n", nRet);
        return  -1;
	}

	m_nFrameWidth = m_pVideoDecodeCodecCtx->width;
	m_nFrameHeight = m_pVideoDecodeCodecCtx->height;
	//m_pVideoDecodeCodecCtx->pix_fmt = AV_PIX_FMT_YUVJ422P
    printf("w=%d  h=%d\n", m_pVideoDecodeCodecCtx->width, m_pVideoDecodeCodecCtx->height);
	printf("pix_fmt=%d\n", m_pVideoDecodeCodecCtx->pix_fmt); 

	//m_pVideoDecodeCodecCtx->pix_fmt=J422P
	m_pSwsContext = sws_getContext(m_nFrameWidth, m_nFrameHeight,
				 m_pVideoDecodeCodecCtx->pix_fmt, m_nFrameWidth, m_nFrameHeight,
		         AV_PIX_FMT_YUV420P, SWS_FAST_BILINEAR, nullptr, nullptr, nullptr);


	//编码	
	m_pVideoEncodeCodec = avcodec_find_encoder(m_VideoCodecID);
	if (!m_pVideoEncodeCodec)
	{
		printf("Can not find encoder! \n");
		return -1;
	}


	m_pVideoEncodeCodecCtx = avcodec_alloc_context3(m_pVideoEncodeCodec);
	m_pVideoEncodeCodecCtx->pix_fmt = AV_PIX_FMT_YUV420P; //编码前的格式
	m_pVideoEncodeCodecCtx->width = m_pVideoDecodeCodecCtx->width;
	m_pVideoEncodeCodecCtx->height =m_pVideoDecodeCodecCtx->height;
	m_pVideoEncodeCodecCtx->time_base.num = 1;
	m_pVideoEncodeCodecCtx->time_base.den = 25;	
	m_pVideoEncodeCodecCtx->bit_rate = 4000000;
	m_pVideoEncodeCodecCtx->gop_size = 25;
	m_pVideoEncodeCodecCtx->qmin = 10;
	m_pVideoEncodeCodecCtx->qmax = 51;
	m_pVideoEncodeCodecCtx->max_b_frames = 0;
	
	if (m_pOutputFormatCtx->oformat->flags & AVFMT_GLOBALHEADER)
	{
		m_pVideoEncodeCodecCtx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
	}
	
	
	AVDictionary *param = 0;
	av_dict_set(&param, "preset", "fast", 0);
	av_dict_set(&param, "tune", "zerolatency", 0);
	//av_dict_set(&param, "profile", "high", 0);
	if (avcodec_open2(m_pVideoEncodeCodecCtx, m_pVideoEncodeCodec, &param) < 0)
	{
        printf("avcodec_open2 fail \n");
		return -1;
	}


	m_pVideoStream = avformat_new_stream(m_pOutputFormatCtx, m_pVideoEncodeCodec);
	if (m_pVideoStream == nullptr)
	{
		return -1;
	}
    
	m_pVideoStream->time_base = m_pVideoEncodeCodecCtx->time_base;
	m_pVideoStream->id = m_pOutputFormatCtx->nb_streams - 1;
	m_pVideoStream->codecpar->format = AV_PIX_FMT_YUV420P;
	nRet = avcodec_parameters_from_context(m_pVideoStream->codecpar, m_pVideoEncodeCodecCtx);
	


	m_pFrameScale = av_frame_alloc();


	m_pEncodeYUVFrame = av_frame_alloc();
	m_pEncodeYUVFrame->format = m_pVideoEncodeCodecCtx->pix_fmt;
	m_pEncodeYUVFrame->width = m_pVideoDecodeCodecCtx->width;
	m_pEncodeYUVFrame->height = m_pVideoDecodeCodecCtx->height;
	printf("m_pEncodeYUVFrame pix_fmt=%d\n", m_pVideoDecodeCodecCtx->pix_fmt);
	av_frame_get_buffer(m_pEncodeYUVFrame, 32);

			
	m_nYUVFrameSize = av_image_get_buffer_size(AV_PIX_FMT_YUV420P, m_nFrameWidth, m_nFrameHeight, 1);

	m_pVideoFifo = av_fifo_alloc2(30*m_nYUVFrameSize, 1, AV_FIFO_FLAG_AUTO_GROW);
	
	return   0;
}
    

int   CVideoAudioLive::RTMP_Init(const char* pOutFileName, const char* pFormat)
{ 	
	 avformat_alloc_output_context2(&m_pOutputFormatCtx, nullptr, pFormat, pOutFileName);

	 int nRet = -1;
	 if(!(m_pOutputFormatCtx->oformat->flags & AVFMT_NOFILE))
	 {
		  nRet = avio_open(&m_pOutputFormatCtx->pb, pOutFileName, AVIO_FLAG_WRITE);
		  if (nRet < 0)
		  {
			  char* err_str = av_err2str2(nRet);
              printf("avio_open fail\n");
			  return  -1;
		  }
	 }

	  return   0;
}


void CVideoAudioLive::VideoCapture_Thread_Fun()
{
	int  nRet = -1;	
	int  nvideo_count = 0;
	m_pVideoDecodePacket = av_packet_alloc();

	int y_size = m_nFrameHeight * m_nFrameWidth;
	while (true)
	{
		//m_pVideoDecodePacket的数据是mjpg的，带头的
		nRet = av_read_frame(m_pInputVideoFormatCtx, m_pVideoDecodePacket);
		if (nRet == AVERROR(EAGAIN) || nRet == AVERROR_EOF) {
			av_packet_unref(m_pVideoDecodePacket);
			printf("read_frame break");
			break;
		}		

		if (m_pVideoDecodePacket->stream_index == m_nVideoStream)
		{
			++nvideo_count;
			//printf("enter read video  pkt.size：%d, nvideo_count=%d\r\n", m_pVideoDecodePacket->size, nvideo_count);

			
			int  video_send_pkt_ret = avcodec_send_packet(m_pVideoDecodeCodecCtx, m_pVideoDecodePacket);
			//printf("encode video send_packet_ret %d\n", video_send_pkt_ret);
			int video_receive_frame_ret = avcodec_receive_frame(m_pVideoDecodeCodecCtx, m_pFrameScale);
			char* err_str = av_err2str2(video_receive_frame_ret);


			if (video_receive_frame_ret == 0)
			{
				int iReturn = sws_scale(m_pSwsContext, m_pFrameScale->data,
					m_pFrameScale->linesize, 0, m_nFrameHeight,
					m_pEncodeYUVFrame->data, m_pEncodeYUVFrame->linesize);
				
				//printf("enter av_fifo_can_write：%d\r\n", av_fifo_can_write(m_pVideoFifo));
				//返回值是字节数
				if (av_fifo_can_write(m_pVideoFifo) >= m_nYUVFrameSize) 
				{
					video_mutex.lock();
					av_fifo_write(m_pVideoFifo, m_pEncodeYUVFrame->data[0], y_size);
					av_fifo_write(m_pVideoFifo, m_pEncodeYUVFrame->data[1], y_size/4);
					av_fifo_write(m_pVideoFifo, m_pEncodeYUVFrame->data[2], y_size/4);
					video_mutex.unlock();
				}							
			}          
		}
		
		av_packet_unref(m_pVideoDecodePacket);
	}
}


void CVideoAudioLive::AudioCapture_Thread_Fun()
{
	int  nRet = -1;
	int i = 0, count = 0;
	m_pAudioDecodePacket = av_packet_alloc();


	while (true)
	{
		nRet = av_read_frame(m_pInputAudioFormatCtx, m_pAudioDecodePacket);
		if (nRet == AVERROR(EAGAIN) || nRet == AVERROR_EOF) {
			av_packet_unref(m_pAudioDecodePacket);
			printf("read_frame break");
			break;
		}

		//m_pAudioDecodePacket->size=88200=44100*2=1秒的音频数据量		
		if (m_pAudioDecodePacket->stream_index == m_nAudioStream)
		{
			++m_naudio_count;
			//printf("enter push audio  count=%d\r\n", m_naudio_count);

			//fwrite(m_pAudioDecodePacket->data, 1, m_pAudioDecodePacket->size, m_pPCM);

			m_pDecodeAudioFrame = av_frame_alloc();
			//pFrameTemp->format = audio_input_format;
			//pFrameTemp->sample_rate = AUDIO_SAMPLING_FREQUENCY;
			//pFrameTemp->channel_layout = audio_channel_type;// AV_CH_LAYOUT_MONO;
			//pFrameTemp->ch_layout.nb_channels = audio_channel_num;

			//解码,
			//m_pDecodeAudioFrame
			int  audio_send_pkt_ret = avcodec_send_packet(m_pAudioDecodeCodecCtx, m_pAudioDecodePacket);
			int audio_receive_frame_ret = avcodec_receive_frame(m_pAudioDecodeCodecCtx, m_pDecodeAudioFrame);
			if (audio_receive_frame_ret < 0)
				continue;

			//m_pDecodeAudioFrame->data[0]地址有值,data[1]==0x0
			//m_pDecodeAudioFrame->nb_samples = 22050, time_base=0/1
			//解码后的数据，也可以用系统音频库函数替代


			int space = av_audio_fifo_space(m_pAudioFifo);
			//printf("space=%d, nb_samples=%d\r\n", space, pFrameTemp->nb_samples);
			if (av_audio_fifo_space(m_pAudioFifo) > m_pDecodeAudioFrame->nb_samples * 2)
			{
				audio_mutex.lock();
				int write_len = av_audio_fifo_write(m_pAudioFifo, (void**)&(m_pDecodeAudioFrame->data[0]), m_pDecodeAudioFrame->nb_samples);//?
				audio_mutex.unlock();
			}
			av_frame_free(&m_pDecodeAudioFrame);
		}
		av_packet_unref(m_pAudioDecodePacket);
	}
}


//比较时间戳,音视频同步,推流端的音视频同步
//推流端的音视频同步：1 声卡、摄像头线程采集数据的时间差,
//2 音频、视频编码速度,视频编码耗时些
void  CVideoAudioLive::Encode_Write_Mutex_Thread()
{
	uint8_t* pdata = new uint8_t[m_nYUVFrameSize];
	int i = 0;
	int64_t  current_pts_video = 0, current_pts_audio = 0;
	//int  video_framesize = av_image_get_buffer_size(AV_PIX_FMT_YUV420P, m_nFrameWidth, m_nFrameHeight, 0);
	int64_t  pts = 0;
	

	int nAudioFrameCount = 0;
	while (true)
	{		
		//=-1, video before audio,
		//注意：current_pts_video取帧的pts,而不是编码后Packet的pts
		int nRet = av_compare_ts(current_pts_video, m_pVideoEncodeCodecCtx->time_base,
			             current_pts_audio, m_pAudioEncodeCodecCtx->time_base);
		if (nRet == -1)
		{			
			int ttt = av_fifo_can_read(m_pVideoFifo);
			//printf("enter video av_fifo_can_read：%d\r\n", ttt);
			if (av_fifo_can_read(m_pVideoFifo) >= m_nYUVFrameSize)
			{
				video_mutex.lock();
				av_fifo_read(m_pVideoFifo, pdata, m_nYUVFrameSize);								
				video_mutex.unlock();

				//把pdata数据拷贝到m_pEncodeYUVFrame->data中
				av_image_fill_arrays(m_pEncodeYUVFrame->data, m_pEncodeYUVFrame->linesize, pdata,
					AV_PIX_FMT_YUV420P, m_nFrameWidth, m_nFrameHeight, 1);

				//fwrite(m_pEncodeYUVFrame->data[0], 1, m_nFrameHeight*m_nFrameWidth, m_pOutYUV);
				//fwrite(m_pEncodeYUVFrame->data[1], 1, m_nFrameHeight*m_nFrameWidth/4, m_pOutYUV);
				//fwrite(m_pEncodeYUVFrame->data[2], 1, m_nFrameHeight*m_nFrameWidth/4, m_pOutYUV);

				AVPacket* video_pkt = av_packet_alloc();

				m_pEncodeYUVFrame->pts = pts++;
				current_pts_video = m_pEncodeYUVFrame->pts;//new
				int  video_send_frame_ret = avcodec_send_frame(m_pVideoEncodeCodecCtx, m_pEncodeYUVFrame);
				//printf("encode video send_frame %d\n", video_send_frame_ret);

				if (video_send_frame_ret >= 0) {
					int video_receive_packet_ret = avcodec_receive_packet(m_pVideoEncodeCodecCtx, video_pkt);
					//char* err_str = av_err2str2(video_receive_packet_ret);
					if (video_receive_packet_ret == AVERROR(EAGAIN) || video_receive_packet_ret == AVERROR_EOF) {
						//break;
					}
					else if (video_receive_packet_ret < 0) {						
						//break;
					}

					if (video_pkt->size > 0)
					{
						++i;
						video_pkt->stream_index = m_pVideoStream->index;				
						av_packet_rescale_ts(video_pkt, m_pVideoEncodeCodecCtx->time_base, m_pVideoStream->time_base);

						printf("video_pkt->stream_index：%d, pts=%d, count=%d\r\n", video_pkt->stream_index, video_pkt->pts, pts);

						//fwrite(video_pkt->data, 1, video_pkt->size, m_pH264File);

						int video_write_ret = av_interleaved_write_frame(m_pOutputFormatCtx, video_pkt);
						//char* err_str = av_err2str2(video_write_ret);
						//printf("video write_ret：%d\r\n", video_write_ret);
						av_packet_unref(video_pkt);
					}
				}		
			}
		}
		else
		{			
			int yyy = av_audio_fifo_size(m_pAudioFifo);
			if (av_audio_fifo_size(m_pAudioFifo) >= m_pAudioStream->codecpar->frame_size)
			{
				audio_mutex.lock();
				int read_size = av_audio_fifo_read(m_pAudioFifo, (void**)m_pAudioConvertFrame->data, m_pAudioConvertFrame->nb_samples);
				audio_mutex.unlock();
				

				int count = swr_convert(m_pSwr, m_pOutAudioBuffer, m_pAudioConvertFrame->nb_samples,
					(const uint8_t**)&(m_pAudioConvertFrame->data), m_pAudioConvertFrame->nb_samples);


				m_pAudioEncodeFrame->data[0] = m_pOutAudioBuffer[0];
				m_pAudioEncodeFrame->data[1] = m_pOutAudioBuffer[1];

				//pts递增nb_samples
				m_pAudioEncodeFrame->pts = nAudioFrameCount * m_pAudioEncodeFrame->nb_samples;
				++nAudioFrameCount;
				current_pts_audio = m_pAudioEncodeFrame->pts;
				AVPacket* audio_pkt = av_packet_alloc();

				int	audio_send_frame_ret = avcodec_send_frame(m_pAudioEncodeCodecCtx, m_pAudioEncodeFrame);
				if (audio_send_frame_ret == 0) {
					int  audio_receive_pkt_ret = avcodec_receive_packet(m_pAudioEncodeCodecCtx, audio_pkt);
					char* err_str = av_err2str2(audio_receive_pkt_ret);
					//printf("audio receive_packet_ret：%d, i=%d, pkt.size=%d\r\n", audio_receive_pkt_ret, i++, audio_pkt->size);

					if (audio_pkt->size > 0) {
						++i;					
						audio_pkt->stream_index = m_pAudioStream->index;
						av_packet_rescale_ts(audio_pkt, m_pAudioEncodeCodecCtx->time_base, m_pAudioStream->time_base);
						printf("audio_pkt->stream_index：%d, pts=%d, count=%d\r\n", audio_pkt->stream_index, audio_pkt->pts, nAudioFrameCount);
						int write_ret = av_interleaved_write_frame(m_pOutputFormatCtx, audio_pkt);
						
					}
					av_packet_unref(audio_pkt);
				}
			}

		}
	}

	delete [] pdata;
	av_write_trailer(m_pOutputFormatCtx);
}

void  CVideoAudioLive::Close() 
{
	av_write_trailer(m_pOutputFormatCtx);

	avformat_free_context(m_pInputAudioFormatCtx);
	avformat_free_context(m_pInputVideoFormatCtx);
	avformat_free_context(m_pOutputFormatCtx);

	avcodec_close(m_pAudioDecodeCodecCtx);
	avcodec_close(m_pAudioEncodeCodecCtx);
	avcodec_close(m_pVideoDecodeCodecCtx);
	avcodec_close(m_pVideoEncodeCodecCtx);

	av_frame_free(&m_pDecodeAudioFrame);
	av_frame_free(&m_pAudioEncodeFrame);
	av_frame_free(&m_pAudioConvertFrame);
	av_frame_free(&m_pFrameScale);
	av_frame_free(&m_pEncodeYUVFrame);

	av_packet_free(&m_pAudioDecodePacket);
	av_packet_free(&m_pVideoDecodePacket);

	free(m_pOutAudioBuffer[0]);
	free(m_pOutAudioBuffer[1]);	
}

#include <iostream>
#include <Windows.h>


#include "4__VideoAudioLive/VideoAudioLive.h"


int main()
{  
    CVideoAudioLive* m_pVideoAudioLive = new CVideoAudioLive();
    m_pVideoAudioLive->Start();
   

    return 0;
}

二注意的问题

1 摄像头为什么要解码？

答：mjpg格式，同分辨率的YUV422P，帧率低。mjpg带头，压缩的格式。

2 YUV缓存队列，AVFifo的使用

AVFifo* m_pVideoFifo = nullptr;

3 推流端的音视频同步：

正确的音视频同步大概是：1个视频，2个音频。因为一般视频是25帧，音频有50帧。

如果视频是1秒25帧, 帧间隔是40ms。
音频采样频率是44100,44100/1024==43帧, 1000/43=23.256ms。

//错误的写法
//结果：十几帧视频，才1帧音频
int nRet = av_compare_ts(current_pts_video, m_pVideoEncodeCodecCtx->time_base,
			             current_pts_audio, m_pAudioEncodeCodecCtx->time_base);
//printf("nRet：%d\r\n", nRet);
if (nRet == -1)
{
   avcodec_receive_packet(m_pVideoEncodeCodecCtx, video_pkt);
   current_pts_video = video_pkt->pts;
}
else
{
   avcodec_receive_packet(m_pAudioEncodeCodecCtx, audio_pkt);
   current_pts_audio = audio_pkt->pts;
}

4 Frame的pts：调用av_packet_rescale_ts，ffmpeg已内部计算好。

雷神博客中，自己通过time_base计算，不需要这样做。可能是他使用的ffmpeg版本太低了。

5 我曾经犯过的错误

我自己计算pts和dts。音频：1024/48000=21.33333ms。结果我取了21.3。

导致的问题：音视频一开始是同步的，过了十几分钟后，音频落后了。

因为：0.03ms*100000帧=3秒。

ashleycoder

关注

7
点赞
踩
9

收藏

觉得还不错? 一键收藏
0
评论
[ffmpeg系列 06] 摄像头直播

(我不确定它们是不是使用的rtmp，抓包看走的tcp。4 Frame的pts：调用av_packet_rescale_ts，ffmpeg已内部计算好。版本5.1.2，dll是：ffmpeg-5.1.2-full_build-shared。正确的音视频同步大概是：1个视频，2个音频。因为一般视频是25帧，音频有50帧。答：mjpg格式，同分辨率的YUV422P，帧率低。代码是在系列 05上加的，这个demo可以作为直播的demo。导致的问题：音视频一开始是同步的，过了十几分钟后，音频落后了。
复制链接

扫一扫