由于实验室是做图像与视频处理的,所以基本上是利用OpenCV开源库。OpenCV库非常方便,也很好用。但是其底层提供的视频接口不是很多,主要是提取视频帧,后续的都变成图像处理的工作了。在做算是自己的论文方向题目的时候,需要用到音频特征(提取的特征越多,识别的效果应该会有所提升),这时候OpenCV就不行了。由于OpenCV底层的图像和视频接口都是也是使用ffmpeg处理,不过用于比较少而已。所以就花了一点时间看了一些ffmpeg的资料,写了下面的图片音频接口,从而算是对OpenCV的一点点扩展。
接口包括六个文件,分别如下:
FFmpeg.h和FFmpeg.cpp : 接口头文件,并提供通用的全局函数
FFAudio.h和FFAudio.cpp : 利用ffmpeg修改的音频提取接口类,并提供简单的音频特征提取函数接口
FFCapture.h和FFCapture.cpp : 利用ffmepg修改的视频帧提取接口类,采用OpenCV图片IplImage输出结构。
使用前,需首先配置完成ffmpeg和OpenCV,这个就不在这里说了。
具体代码如下所示:
ffmpeg文件
#ifndef FFMPEG_H
#define FFMPEG_H
#include <iostream>
#include <string.h>
#include <fstream>
#include <stdio.h>
using namespace std;
//OpenCV头文件
#include "cv.h"
#include "highgui.h"
//ffmpeg头文件
extern "C"
{
#include "libavformat/avformat.h"
#include "libavcodec/avcodec.h"
#include "libswscale/swscale.h"
}
//数据类型定义
typedef unsigned short WORD;//16位无符号整型数
typedef unsigned long DWORD;//32位无符号整型数
//定义的全局函数
//初始化ffmpeg环境,在使用各种与ffmpeg有关的函数时,需先执行该函数,可在系统初始时调用
bool InitFFmpeg();
//分数转换为小数
double RationalToDouble(AVRational r);
//AVFrame格式在RGB24颜色空间转换为IplImage格式
void AVFrameToIplImage(AVFrame* pSrcFrame, IplImage* pDstImg);
#endif
#include "FFmpeg.h"
//初始化ffmpeg环境
bool InitFFmpeg()
{
//注册视频格式
av_register_all();
return true;
}
//分数转换为小数
double RationalToDouble(AVRational r)
{
return r.num == 0 || r.den == 0 ? 0. : (double)r.num / (double)r.den;
}
//AVFrame格式在RGB24颜色空间转换为IplImage格式
void AVFrameToIplImage(AVFrame* pSrcFrame, IplImage* pDstImg)
{
//pSrcFrmae指针为空或pSrcFrame图像格式不是RGB24
if (NULL == pSrcFrame || NULL == pDstImg || pSrcFrame->format != PIX_FMT_BGR24 || pDstImg->nChannels != 3)
{
return;
}
cvZero(pDstImg);
//根据pFrame信息对pImg进行参数赋值
pDstImg->dataOrder = 0; //图像的顶点位置为左上
pDstImg->widthStep = pSrcFrame->linesize[0]; //图像的行宽
pDstImg->imageSize = pDstImg->height * pSrcFrame->linesize[0]; //图像大小
//图像原始数据赋值
uchar* ptrImg = NULL;
uchar* ptrFrame = NULL;
for (int i = 0; i < pDstImg->height; i++)
{
ptrImg = (uchar*)(i * pDstImg->widthStep + pDstImg->imageData);
ptrFrame = (uchar*)(i * pSrcFrame->linesize[0] + pSrcFrame->data[0]);
for (int j = 0; j < pDstImg->width; j++)
{
ptrImg[j * 3] = ptrFrame[j * 3];
ptrImg[j * 3 + 1] = ptrFrame[j * 3 + 1];
ptrImg[j * 3 + 2] = ptrFrame[j * 3 + 2];
}
}
}
FFAudio文件
#ifndef FFAUDIO_H
#define FFAUDIO_H
#include "FFmpeg.h"
//wave文件头格式参数
//关于下述各个结构体的参数意义,请参考WAV文件具体格式
//推荐网址:http://blog.csdn.net/zhihu008/article/details/7854533
typedef struct RIFFChunk
{
char szRiffID[4];
DWORD dwRiffSize;
char szRiffFormat[4];
};
typedef struct PCMWAVEFORMAT
{
WORD wFormatTag; //编码格式
WORD wChannels; //声道数
DWORD dwSamplePerSec; //采样频率
DWORD dwAvgBytesPerSec; //每秒的数据量
WORD wBlockAlign; //块对齐
WORD wBitsPerSample; //wave文件的采样大小
};
typedef struct FORMATChunk
{
char szFmtID[4];
DWORD dwFmtSize;
PCMWAVEFORMAT wavFmt;
};
typedef struct DATAChunk
{
char szDataID[4];
DWORD dwDataSize;
};
//利用ffmpeg构造的音频类,用于处理并提取各种音频特征
class CFFAudio
{
private:
short* m_pData;//音频原始PCM数据
int64_t m_nLength;//音频数据长度,以 sizeof(short) 为单位
int m_nChannels;//音频声道数
int m_nSampleRate;//音频采样率
int m_nBytesPerSample;//音频采样位数
//音频帧的属性
int m_nSamplePerFrame;//单个音频帧的采样点个数
int m_nOverlapPerFrame;//音频帧之间重叠采样点个数
int m_nFrameNum;//音频帧个数
short** m_pFrameData;//音频帧数据,每行表示一个音频帧
bool m_bIsExtracted;//是否已经提取特征,用于避免多次计算提取
int m_nAENum;//帧短时能量大于段能量的音频帧个数
int m_nZCRNum;//帧过零率大于段过零率的音频帧个数
protected:
void ChangeToFrame();//对音频信号进行分帧处理
float CalcSTEnergy(short* pFrame);//计算单个音频帧的平均短时能量
float CalcZCRate(short* pFrame);//计算单个音频帧的短时平均过零率
public:
CFFAudio(void);
//依据参数创建
bool CreateFFAudio(short* pData, int64_t nLength, int nChannels, int nSampleRate, AVSampleFormat nSampleFmt);
bool LoadWAVFile(char* szFileName);//导入WAV格式文件
bool SaveAsWAVFile(char* szFileName);//将PCM数据保存为WAV文件格式
void Save(char* szFileName);
int64_t GetLength();//获取长度
int64_t GetFrameNums();
short GetSampleData(int64_t nPos);//获取单个采样点数据
short* GetAudioData();//获取音频数据
bool ExtractFeature(int& nAENum, int& nZCRNum);//提取特征,包括短时能量与短时平均过零率
virtual ~CFFAudio(void);
};
#endif
#include "FFAudio.h"
CFFAudio::CFFAudio(void)
{
m_pData = NULL;
m_nLength = -1;
m_nChannels = -1;
m_nSampleRate = -1;
m_nBytesPerSample = -1;
m_nSamplePerFrame = -1;
m_nOverlapPerFrame = -1;
m_nFrameNum = -1;
m_pFrameData = NULL;
m_bIsExtracted = false;
m_nZCRNum = 0;
m_nAENum = 0;
}
CFFAudio::~CFFAudio(void)
{
if (NULL != m_pData)
{
free(m_pData);
}
for (int i = 0; i < m_nFrameNum; i++)
{
free(m_pFrameData[i]);
}
free(m_pFrameData);
}
bool CFFAudio::CreateFFAudio(short* pData, int64_t nLength, int nChannels, int nSampleRate, AVSampleFormat nSampleFmt)
{
//音频PCM数据为空
if (NULL == pData || nLength <= 0)
{
return false;
}
//仅支持单通道与双通道两种声道音频
if (nChannels <=0 || nChannels > 2)
{
return false;
}
//仅支持无符号8位与有符号16位两种采样格式
if (nSampleFmt == AV_SAMPLE_FMT_S16)
{
m_nBytesPerSample = 16;//采样位数为16
}
else if (nSampleFmt == AV_SAMPLE_FMT_U8)
{
m_nBytesPerSample = 8;//采样位数为8
}
else
{
return false;
}
//支持的采样率
int nSupportSampleRateArr[] = {6000, 8000, 11025, 16000, 22050, 32000, 32075, 44100, 48000, 96000};
bool bFlag = false;
for (int i = 0; i < 10; i++)
{
if (nSupportSampleRateArr[i] == nSampleRate)
{
bFlag = true;
break;
}
}
if (!bFlag)//输入采样率不属于以上10种,即不支持该采样率
{
return false;
}
else
{
m_nSampleRate = nSampleRate;
}
//参数数据赋值
m_pData = (short*)malloc(nLength * sizeof(short));
memcpy(m_pData ,pData, nLength * sizeof(short));
m_nLength = nLength;
m_nChannels = nChannels;
m_nSampleRate = nSampleRate;
return true;
}
bool CFFAudio::LoadWAVFile(char* szFileName)
{
//音频数据已经存在,无法导入
if (NULL != m_pData && m_nLength > 0)
{
return false;
}
//文件名为NULL
if (NULL == szFileName)
{
return false;
}
//验证文件类型是否正确,即文件名最后四位是否为".wav"或".WAV"
char szFileType[5] = {0};
strncpy(szFileType, szFileName + strlen(szFileName) - 4, 4);
if (strcmp(szFileType, ".wav") != 0 && strcmp(szFileType, ".WAV") != 0)
{
return false;
}
//解析WAV数据文件
return false;
}
bool CFFAudio::SaveAsWAVFile(char* szFileName)
{
//音频数据为NULL
if (NULL == m_pData || m_nLength <= 0)
{
return false;
}
//文件名为NULL
if (NULL == szFileName)
{
return false;
}
//验证文件类型是否正确,即文件名最后四位是否为".wav"或".WAV"
char szFileType[5] = {0};
strncpy(szFileType, szFileName + strlen(szFileName) - 4, 4);
if (strcmp(szFileType, ".wav") != 0 && strcmp(szFileType, ".WAV") != 0)
{
return false;
}
//构造DATA
DATAChunk data;
strncpy(data.szDataID, "data", 4);
data.dwDataSize = m_nLength * sizeof(short);
//构造FORMAT
FORMATChunk fmt;
strncpy(fmt.szFmtID, "fmt ", 4);
fmt.dwFmtSize = sizeof(PCMWAVEFORMAT);
//构造PCMWAVFORMAT
fmt.wavFmt.wFormatTag = 1;
fmt.wavFmt.wChannels = m_nChannels;
fmt.wavFmt.dwSamplePerSec = m_nSampleRate;
fmt.wavFmt.dwAvgBytesPerSec = m_nSampleRate * m_nChannels * m_nBytesPerSample / 8;
fmt.wavFmt.wBlockAlign = m_nBytesPerSample / 8;
fmt.wavFmt.wBitsPerSample = m_nBytesPerSample;
//构造RIFF
RIFFChunk riff;
strncpy(riff.szRiffID, "RIFF", 4);
strncpy(riff.szRiffFormat, "WAVE", 4);
riff.dwRiffSize = 44 + m_nLength * sizeof(short);
//C++的文件输出格式不合适,故改用C
FILE* fp = fopen(szFileName, "wb+");
fwrite(&riff, sizeof(RIFFChunk), 1, fp);
fwrite(&fmt, sizeof(FORMATChunk), 1, fp);
fwrite(&data, sizeof(DATAChunk), 1, fp);
fwrite(m_pData, m_nLength * sizeof(short), 1, fp);
fclose(fp);
return true;
}
void CFFAudio::Save(char* szFileName)
{
ofstream fout;
fout.open(szFileName, ios::out | ios::app);
for (int i = 0; i < m_nLength; i++)
{
fout<<m_pData[i]<<endl;
}
fout.close();
}
int64_t CFFAudio::GetLength()
{
return m_nLength;
}
int64_t CFFAudio::GetFrameNums()
{
return m_nFrameNum;
}
short CFFAudio::GetSampleData(int64_t nPos)
{
if (nPos < 0 || nPos >= m_nLength)
{
//nPos参数位置错误,返回short能表示的最小值
return SHRT_MIN;
}
else
{
return m_pData[nPos];
}
}
short* CFFAudio::GetAudioData()
{
if (NULL == m_pData || m_nLength <= 0)
{
return NULL;
}
short* pData = (short*)malloc(m_nLength * sizeof(short));
memcpy(pData, m_pData, m_nLength * sizeof(short));
return pData;
}
//音频分帧操作
void CFFAudio::ChangeToFrame()
{
//默认以25ms作为音频帧的时长,即单帧的长度 = 采样率 / 40;
//相邻两帧之间重叠10ms的长度,即重叠的长度 = 采样率 / 100;
m_nSamplePerFrame = m_nSampleRate / 40;
m_nOverlapPerFrame = m_nSampleRate / 100;
m_nFrameNum = (m_nLength - m_nSamplePerFrame) / (m_nSamplePerFrame - m_nOverlapPerFrame);//音频帧的长度,最后不足一帧的不考虑
//依次分帧
m_pFrameData = (short**)malloc(sizeof(short*) * m_nFrameNum);
for (int i = 0; i < m_nFrameNum; i++)
{
m_pFrameData[i] = (short*)malloc(sizeof(short) * m_nSamplePerFrame);
//依次提取单帧数据
strncpy((char*)m_pFrameData[i], (char*)(m_pData + i * (m_nSamplePerFrame - m_nOverlapPerFrame)), m_nSamplePerFrame * sizeof(short));
}
}
//备注:在对音频信息进行特征提取的时候,通常会对音频信号进行加窗处理,这里为快速提取,故不进行加窗处理
//计算单个音频帧的平均短时能量
float CFFAudio::CalcSTEnergy(short* pFrame)
{
if (NULL == pFrame)
{
return -1;
}
long nEnergy = 0;
for (int i = 0; i < m_nSamplePerFrame; i++)
{
nEnergy += pFrame[i] * pFrame[i];
}
return nEnergy * 1.0 / m_nSamplePerFrame;
}
//计算单个音频帧的平均过零率
float CFFAudio::CalcZCRate(short* pFrame)
{
if (NULL == pFrame)
{
return -1;
}
int nCount = 0;
for (int i = 0; i < m_nSamplePerFrame; i++)
{
if (pFrame[i] >= 0)
{
nCount++;
}
}
return nCount * 1.0 / m_nSamplePerFrame;
}
bool CFFAudio::ExtractFeature(int& nAENum, int& nZCRNum)
{
if (m_bIsExtracted)
{
nAENum = m_nAENum;
nZCRNum = m_nZCRNum;
return true;
}
//对音频数据分帧操作
ChangeToFrame();
//判断分帧操作是否正确
if (NULL == m_pFrameData)
{
return false;
}
for (int i = 0; i < m_nFrameNum; i++)
{
if (NULL == m_pFrameData[i])
{
return false;
}
}
//对分帧后的音频帧依次提取短时能量和过零率
float* pSTE = new float[m_nFrameNum];
float* pZCR = new float[m_nFrameNum];
float fSTEMean = 0;//音频帧的平均短时能量
float fZCRMean = 0;//音频帧的平均过零率
for (int i = 0; i < m_nFrameNum; i++)
{
pSTE[i] = CalcSTEnergy(m_pFrameData[i]);
pZCR[i] = CalcZCRate(m_pFrameData[i]);
fSTEMean += pSTE[i];
fZCRMean += pZCR[i];
}
fSTEMean /= m_nFrameNum;
fZCRMean /= m_nFrameNum;
m_nZCRNum = 0;
m_nAENum = 0;
for (int i = 0; i < m_nFrameNum; i++)
{
if (pSTE[i] > fSTEMean)
{
m_nAENum++;
}
if (pZCR[i] > fZCRMean)
{
m_nZCRNum++;
}
}
m_bIsExtracted = true;
nAENum = m_nAENum;
nZCRNum = m_nZCRNum;
}
#ifndef FFCAPTURE_H
#define FFCAPTURE_H
#include "FFmpeg.h"
#include "FFAudio.h"
//利用ffmpeg封装的视频音频处理类,与OpenCV的CvCapture功能基本类似,增加了音频数据提取
class CFFCapture
{
private:
char m_szFileName[100];//视频文件路径
AVFormatContext* m_pFormatCtx;
int m_nVideoStream;//视频流位置
int m_nAudioStream;//音频流位置
AVCodecContext* m_pVideoCodecCtx;
AVCodecContext* m_pAudioCodecCtx;
AVCodec* m_pVideoCodec;
AVCodec* m_pAudioCodec;
uint8_t* m_pVideoBuffer;//帧图像缓冲区
AVPacket m_packet;//数据包
int m_nFramePos;//当前帧位置
AVFrame* m_pFrame;//当前帧图像
AVFrame* m_pFrameRGB;//当前帧图像RGB格式数据
IplImage* m_pImgRGB;//当前帧图像的OpenCV数据格式
//视频流属性
int64_t m_nFrameNums;//视频帧总数
int64_t m_nDuration;//视频时长,以毫秒为单位
double m_dVideoFPS;//视频帧率,用于计算视频帧时间戳
//音频流属性
int m_nAudioChannels;//音频声道数
int m_nSampleRate;//音频采样率
AVSampleFormat m_nSampleFmt;//音频采样格式
protected:
bool GrabFrame();//抓取一帧图像
bool SetFramePos(int64_t nPos);//设置视频帧位置
short* QueryAudio(int64_t nStartPos, int64_t nStopPos, int64_t& nAudioLength);//获取一段时间内的音频信号,返回音频数据
public:
CFFCapture(void);
bool CreateFFCapture(char* szFileName);//根据视频文件路径名称,创建视频处理对象
virtual ~CFFCapture(void);
IplImage* QueryFrame();//获取下一帧图像
IplImage* QueryFrame(int64_t nPos);//获取特定位置的一帧图像,并设定当前帧位置
CFFAudio* QueryAudio(int64_t nStartPos, int64_t nStopPos);//获取一段时间内的音频信号,返回自定义的音频类
int GetFramePos();//获取视频帧当前位置
int64_t GetVideoDuration();//获取视频时长,以毫秒为单位
int64_t GetFrameNums();//获取视频帧数
CvSize GetFrameSize();//获取视频图像大小
int GetAudioChannels();//获取音频声道数
int GetAudioSampleRate();//获取音频采样率
AVSampleFormat GetAudioSampleFmt();//获取音频采样格式
};
#endif
#include "FFCapture.h"
CFFCapture::CFFCapture(void)
{
strcpy(m_szFileName, "");
m_pFormatCtx = NULL;
m_nVideoStream = -1;
m_nAudioStream = -1;
m_pVideoCodecCtx = NULL;
m_pAudioCodecCtx = NULL;
m_pVideoCodec = NULL;
m_pAudioCodec = NULL;
m_pVideoBuffer = NULL;
m_nFramePos = -1;
m_pFrame = NULL;
m_pFrameRGB = NULL;
m_nAudioChannels = -1;
m_nSampleRate = -1;
m_nSampleFmt = AV_SAMPLE_FMT_NONE;
}
CFFCapture::~CFFCapture(void)
{
if (NULL != m_pVideoBuffer)
{
av_free(m_pVideoBuffer);
}
if (NULL != m_pFrameRGB)
{
av_free(m_pFrameRGB);
}
if (NULL != m_pFrame)
{
av_free(m_pFrame);
}
if (NULL != m_pImgRGB)
{
cvReleaseImage(&m_pImgRGB);
}
//释放数据包
if (m_packet.size > 0)
{
av_free_packet(&m_packet);
}
// 关闭解码器
if (NULL != m_pVideoCodecCtx)
{
avcodec_close(m_pVideoCodecCtx);
}
if (NULL != m_pAudioCodecCtx)
{
avcodec_close(m_pAudioCodecCtx);
}
// 关闭视频文件
if (NULL != m_pFormatCtx)
{
av_close_input_file(m_pFormatCtx);
}
}
bool CFFCapture::CreateFFCapture(char* szFileName)
{
if (NULL == szFileName)
{
return false;
}
//打开视频文件
if (av_open_input_file(&m_pFormatCtx, szFileName, NULL, 0, NULL) != 0)
{
return false;
}
strcpy(m_szFileName, szFileName);//文件名保存
//获取视频文件流信息
if (av_find_stream_info(m_pFormatCtx) < 0)
{
return false;
}
//寻找视频流与视频流位置
for (int i = 0; i < m_pFormatCtx->nb_streams; i++)
{
if (m_pFormatCtx->streams[i]->codec->codec_type == CODEC_TYPE_VIDEO)
{
m_nVideoStream = i;
}
else if (m_pFormatCtx->streams[i]->codec->codec_type == CODEC_TYPE_AUDIO)
{
m_nAudioStream = i;
}
}
//未寻找到视频流与音频流
if (m_nVideoStream == -1 || m_nAudioStream == -1)
{
return false;
}
//寻找并打开视频流解码格式
m_pVideoCodecCtx = m_pFormatCtx->streams[m_nVideoStream]->codec;
m_pVideoCodec = avcodec_find_decoder(m_pVideoCodecCtx->codec_id);
if (NULL == m_pVideoCodec)
{
return false;
}
if (avcodec_open(m_pVideoCodecCtx, m_pVideoCodec) < 0)
{
return false;
}
//获取视频帧数,时长,帧率
m_nFrameNums = m_pFormatCtx->streams[m_nVideoStream]->nb_frames;
m_nDuration = m_pFormatCtx->streams[m_nVideoStream]->duration;
m_dVideoFPS = RationalToDouble(m_pFormatCtx->streams[m_nVideoStream]->r_frame_rate);
if (fabs(m_dVideoFPS - 0.00000000001) < 0)
{
m_dVideoFPS = 1.0 / RationalToDouble(m_pVideoCodecCtx->time_base);
}
//寻找并打开音频流解码格式
m_pAudioCodecCtx = m_pFormatCtx->streams[m_nAudioStream]->codec;
m_pAudioCodec = avcodec_find_decoder(m_pAudioCodecCtx->codec_id);
if (NULL == m_pAudioCodec)
{
return false;
}
if (avcodec_open(m_pAudioCodecCtx, m_pAudioCodec) < 0)
{
return false;
}
//获取音频流属性
m_nAudioChannels = m_pAudioCodecCtx->channels;//音频声道格式
m_nSampleRate = m_pAudioCodecCtx->sample_rate;//音频采样率
m_nSampleFmt = m_pAudioCodecCtx->sample_fmt;//音频采样格式
//初始化帧图像数据内存等信息
m_pFrame = avcodec_alloc_frame();
m_pFrameRGB = avcodec_alloc_frame();
if (NULL == m_pFrame || NULL == m_pFrameRGB)
{
return false;
}
m_pVideoBuffer = (uint8_t*)av_malloc(sizeof(uint8_t) * avpicture_get_size(PIX_FMT_RGB24, m_pVideoCodecCtx->width, m_pVideoCodecCtx->height));
if (NULL == m_pVideoBuffer)
{
return false;
}
avpicture_fill((AVPicture*)m_pFrameRGB, m_pVideoBuffer, PIX_FMT_RGB24, m_pVideoCodecCtx->width, m_pVideoCodecCtx->height);
//初始化OpenCV图像数据信息
m_pImgRGB = cvCreateImage(cvSize(m_pVideoCodecCtx->width, m_pVideoCodecCtx->height), IPL_DEPTH_8U, 3);
cvZero(m_pImgRGB);
//由于在提取第0个视频帧失败,故提取抓取第0个视频帧
IplImage* pImg = QueryFrame();
if (NULL != pImg)
{
cvReleaseImage(&pImg);
}
return true;
}
bool CFFCapture::GrabFrame()
{
//循环读取数据帧,直到读取到视频信号帧为止
do
{
if (av_read_frame(m_pFormatCtx, &m_packet) < 0)
{
//读取数据帧失败,可能为数据帧结束
return NULL;
}
} while (m_packet.stream_index != m_nVideoStream);
return true;
}
IplImage* CFFCapture::QueryFrame()
{
//抓取一个视频数据包
GrabFrame();
//成功读取视频信号帧,当前视频帧位置加一
m_nFramePos++;
int nFrameFinished = 0;//视频解码成功标志
//视频帧解码
avcodec_decode_video(m_pVideoCodecCtx, m_pFrame, &nFrameFinished, m_packet.data, m_packet.size);
if (nFrameFinished) //视频帧解码成功
{
//设置图像转换上下文
SwsContext* pSwsCtx = sws_getContext (m_pVideoCodecCtx->width,
m_pVideoCodecCtx->height,
m_pVideoCodecCtx->pix_fmt,
m_pVideoCodecCtx->width,
m_pVideoCodecCtx->height,
PIX_FMT_BGR24,
SWS_BICUBIC,
NULL, NULL, NULL);
//转换图像格式,将解压出来的YUV420P的图像转换为BRG24的图像
sws_scale(pSwsCtx, m_pFrame->data, m_pFrame->linesize, 0, m_pVideoCodecCtx->height, m_pFrameRGB->data, m_pFrameRGB->linesize);
//设置数据
m_pFrameRGB->width = m_pVideoCodecCtx->width;
m_pFrameRGB->height = m_pVideoCodecCtx->height;
m_pFrameRGB->format = PIX_FMT_BGR24;
//释放图像转换上下文
sws_freeContext(pSwsCtx);
//输出OpenCV支持的图像帧格式
AVFrameToIplImage(m_pFrameRGB, m_pImgRGB);
return m_pImgRGB;
}
else
{
return NULL;
}
}
//获取特定位置的一帧图像,并设定当前帧位置
IplImage* CFFCapture::QueryFrame(int64_t nPos)
{
if (nPos < 0 || nPos >= m_nFrameNums)
{
return NULL;
}
if (!SetFramePos(nPos))
{
return NULL;
}
int nFrameFinished = 0;//视频解码成功标志
//视频帧解码
avcodec_decode_video(m_pVideoCodecCtx, m_pFrame, &nFrameFinished, m_packet.data, m_packet.size);
if (nFrameFinished) //视频帧解码成功
{
//设置图像转换上下文
SwsContext* pSwsCtx = sws_getContext(m_pVideoCodecCtx->width,
m_pVideoCodecCtx->height,
m_pVideoCodecCtx->pix_fmt,
m_pVideoCodecCtx->width,
m_pVideoCodecCtx->height,
PIX_FMT_BGR24,
SWS_BICUBIC,
NULL, NULL, NULL);
//转换图像格式,将解压出来的YUV420P的图像转换为BRG24的图像
sws_scale(pSwsCtx, m_pFrame->data, m_pFrame->linesize, 0, m_pVideoCodecCtx->height, m_pFrameRGB->data, m_pFrameRGB->linesize);
//设置当前帧位置
m_nFramePos = nPos;
//设置数据
m_pFrameRGB->width = m_pVideoCodecCtx->width;
m_pFrameRGB->height = m_pVideoCodecCtx->height;
m_pFrameRGB->format = PIX_FMT_BGR24;
//释放图像转换上下文
sws_freeContext(pSwsCtx);
//输出OpenCV支持的图像帧格式
AVFrameToIplImage(m_pFrameRGB, m_pImgRGB);
return m_pImgRGB;
}
else
{
//解码失败
return NULL;
}
}
short* CFFCapture::QueryAudio(int64_t nStartPos, int64_t nStopPos, int64_t& nAudioLength)
{
//判断查询音频位置是否正确
if (nStartPos >= nStopPos)
{
return NULL;
}
nStartPos = nStopPos < 0 ? 0 : nStartPos;
nStopPos = nStopPos > m_nFrameNums ? m_nFrameNums : nStopPos;
int nMaxSize = AVCODEC_MAX_AUDIO_FRAME_SIZE;//单个音频帧缓存大小
short* pAudioBuffer = (short*)malloc(nMaxSize);//单个音频帧缓存指针
short* pAudioResult = (short*)malloc(nMaxSize * (nStopPos - nStartPos));//查询结果音频指针(多个音频帧)
int nSize = 0;//实际提取单个音频帧大小
nAudioLength = 0;//音频结果长度,以 sizeof(short) 为单位
int nFramePosCp = m_nFramePos;//原始视频帧位置,提取音频后需将视频帧跳转到原来位置
if (NULL == pAudioBuffer || NULL == pAudioResult)
{
return NULL;
}
//视频流与音频流在时间有一定的偏差,但不影响这里的音频段特征提取,故使用视频帧的位置来定位音频段
//设置开始位置
if (!SetFramePos(nStartPos))
{
return NULL;
}
//依次提取音频
while (m_nFramePos != nStopPos)
{
if (av_read_frame(m_pFormatCtx, &m_packet) < 0)
{
return NULL;
}
if (m_packet.stream_index == m_nAudioStream) //当前提取到音频流
{
nSize = AVCODEC_MAX_AUDIO_FRAME_SIZE * 100;
if (avcodec_decode_audio2(m_pAudioCodecCtx, pAudioBuffer, &nSize, m_packet.data, m_packet.size) < 0)
{
//音频解码失败,返回NULL
return NULL;
}
if (nSize <= 0)
{
//音频解码失败,返回NULL
return NULL;
}
//保存当前提取音频
memcpy(pAudioResult + nAudioLength, pAudioBuffer, nSize);
nAudioLength += nSize / sizeof(short);
}
else if (m_packet.stream_index == m_nVideoStream)//当前提取到视频流
{
//更显视频帧位置
m_nFramePos++;
}
}
free(pAudioBuffer);
//将视频帧跳转回原先位置
SetFramePos(nFramePosCp);
return pAudioResult;
}
CFFAudio* CFFCapture::QueryAudio(int64_t nStartPos, int64_t nStopPos)
{
int64_t nAudioLength = -1;
short* pAudioData = QueryAudio(nStartPos, nStopPos, nAudioLength);
if (NULL != pAudioData && nAudioLength > 0)
{
CFFAudio* pAudio = new CFFAudio();
if (pAudio->CreateFFAudio(pAudioData, nAudioLength, m_nAudioChannels, m_nSampleRate, m_nSampleFmt))
{
free(pAudioData);
return pAudio;
}
else
{
free(pAudioData);
return NULL;
}
}
else
{
free(pAudioData);
return NULL;
}
}
int CFFCapture::GetFramePos()
{
return m_nFramePos;
}
//设置视频帧位置
bool CFFCapture::SetFramePos(int64_t nPos)
{
if (nPos < 0 || m_nFrameNums < 0)
{
return false;
}
//若nPos大于帧总数,则取帧总数
nPos = nPos >= m_nFrameNums ? m_nFrameNums : nPos;
//根据视频帧位置近似计算时间戳
int nDelta = 16;//定位提前的长度
int64_t nStartTime = (int64_t)((nPos - nDelta) * 1.0 / m_dVideoFPS * 1000000);
nStartTime = nStartTime > 0 ? nStartTime : 0;
if (av_seek_frame(m_pFormatCtx, -1, nStartTime, AVSEEK_FLAG_BACKWARD) >= 0)
{
//清除解码器缓冲
avcodec_flush_buffers(m_pVideoCodecCtx);
int nFrameFinished = 0;//解码标志
//依次抓取视频帧,直到出现正常pts为止
while(GrabFrame())
{
//解码,填充解码器信息
avcodec_decode_video(m_pVideoCodecCtx, m_pFrame, &nFrameFinished, m_packet.data, m_packet.size);
if (m_packet.pts != AV_NOPTS_VALUE)
{
m_nFramePos = m_packet.pts;
break;
}
}
//判断当前帧位置是否等于定位的位置
while (m_nFramePos <= nPos - 1)
{
//依次抓取下一帧视频图像数据
if (!GrabFrame())
{
return false;
}
else
{
//解码,填充解码器信息
avcodec_decode_video(m_pVideoCodecCtx, m_pFrame, &nFrameFinished, m_packet.data, m_packet.size);
//视频帧抓取成功,更新当前视频帧位置
m_nFramePos++;
}
}
return true;
}
else
{
return false;
}
}
//获取视频帧数
int64_t CFFCapture::GetFrameNums()
{
return m_nFrameNums;
}
//获取视频帧图像大小
CvSize CFFCapture::GetFrameSize()
{
return cvSize(m_pVideoCodecCtx->width, m_pVideoCodecCtx->height);
}
int CFFCapture::GetAudioChannels()
{
return m_nAudioChannels;
}
AVSampleFormat CFFCapture::GetAudioSampleFmt()
{
return m_nSampleFmt;
}
int CFFCapture::GetAudioSampleRate()
{
return m_nSampleRate;
}
最后提供使用方法示例
#include <stdio.h>
#include "FFmpeg.h"
#include "FFCapture.h"
#include "FFAudio.h"
void main()
{
InitFFmpeg();
CFFCapture* pCapture = new CFFCapture();
if (!pCapture->CreateFFCapture("E:\\videos\\Swi_Fra1.avi"))
{
printf("1\n");
}
printf("%d\n", pCapture->GetFrameNums());
CFFAudio* pAudio1 = pCapture->QueryAudio(17417, 18071);
CFFAudio* pAudio2 = pCapture->QueryAudio(23121, 23256);
CFFAudio* pAudio3 = pCapture->QueryAudio(23257, 23489);
CFFAudio* pAudio4 = pCapture->QueryAudio(35826, 35893);
CFFAudio* pAudio5 = pCapture->QueryAudio(40831, 40971);
CFFAudio* pAudio6 = pCapture->QueryAudio(45944, 46235);
CFFAudio* pAudio7 = pCapture->QueryAudio(57345, 57433);
pAudio1->Save("正样本3.txt");
pAudio2->Save("正样本4.txt");
pAudio3->Save("正样本5.txt");
pAudio4->Save("正样本6.txt");
pAudio5->Save("正样本7.txt");
pAudio6->Save("正样本8.txt");
pAudio7->Save("正样本9.txt");
pAudio1->SaveAsWAVFile("正样本3.wav");
pAudio2->SaveAsWAVFile("正样本4.wav");
pAudio3->SaveAsWAVFile("正样本5.wav");
pAudio4->SaveAsWAVFile("正样本6.wav");
pAudio5->SaveAsWAVFile("正样本7.wav");
pAudio6->SaveAsWAVFile("正样本8.wav");
pAudio7->SaveAsWAVFile("正样本9.wav");
CFFAudio* pAudioy1 = pCapture->QueryAudio(1, 631);
CFFAudio* pAudioy2 = pCapture->QueryAudio(631, 684);
CFFAudio* pAudioy3 = pCapture->QueryAudio(684, 1074);
CFFAudio* pAudioy4 = pCapture->QueryAudio(4089, 4152);
CFFAudio* pAudioy5 = pCapture->QueryAudio(4343, 4401);
CFFAudio* pAudioy6 = pCapture->QueryAudio(4667, 4759);
CFFAudio* pAudioy7 = pCapture->QueryAudio(42984, 43034);
pAudioy1->Save("负样本3.txt");
pAudioy2->Save("负样本4.txt");
pAudioy3->Save("负样本5.txt");
pAudioy4->Save("负样本6.txt");
pAudioy5->Save("负样本7.txt");
pAudioy6->Save("负样本8.txt");
pAudioy7->Save("负样本9.txt");
pAudioy1->SaveAsWAVFile("负样本3.wav");
pAudioy2->SaveAsWAVFile("负样本4.wav");
pAudioy3->SaveAsWAVFile("负样本5.wav");
pAudioy4->SaveAsWAVFile("负样本6.wav");
pAudioy5->SaveAsWAVFile("负样本7.wav");
pAudioy6->SaveAsWAVFile("负样本8.wav");
pAudioy7->SaveAsWAVFile("负样本9.wav");
/*
pAudio1->SaveAsWAVFile("正样本1.wav");
pAudio2->SaveAsWAVFile("正样本2.wav");
int nAENum = 0;
int nZCRNum = 0;
ofstream fout;
fout.open("音频特征.txt", ios::out | ios::app);
fout<<"正样本:\n";
pAudio1->ExtractFeature(nAENum, nZCRNum);
fout<<"1号样本:"<<nAENum * 1.0 / pAudio1->GetFrameNums()<<" "<<nZCRNum * 1.0 / pAudio1->GetFrameNums()<<" "<<pAudio1->GetFrameNums()<<endl;
pAudio2->ExtractFeature(nAENum, nZCRNum);
fout<<"2号样本:"<<nAENum * 1.0 / pAudio2->GetFrameNums()<<" "<<nZCRNum * 1.0 / pAudio2->GetFrameNums()<<" "<<pAudio2->GetFrameNums()<<endl;
*/
/*
CFFAudio* pAudio6 = pCapture->QueryAudio(16, 286);
CFFAudio* pAudio7 = pCapture->QueryAudio(1218, 1257);
pAudio6->Save("负样本1.txt");
pAudio7->Save("负样本2.txt");
pAudio6->SaveAsWAVFile("负样本1.wav");
pAudio7->SaveAsWAVFile("负样本2.wav");
fout<<"负样本:\n";
pAudio6->ExtractFeature(nAENum, nZCRNum);
fout<<"1号样本:"<<nAENum * 1.0 / pAudio6->GetFrameNums()<<" "<<nZCRNum * 1.0 / pAudio6->GetFrameNums()<<" "<<pAudio6->GetFrameNums()<<endl;
pAudio7->ExtractFeature(nAENum, nZCRNum);
fout<<"2号样本:"<<nAENum * 1.0 / pAudio7->GetFrameNums()<<" "<<nZCRNum * 1.0 / pAudio7->GetFrameNums()<<" "<<pAudio7->GetFrameNums()<<endl;
fout.close();
*/
/*
//正样本
CFFAudio* pAudio1 = pCapture->QueryAudio(751, 1867);
CFFAudio* pAudio2 = pCapture->QueryAudio(14213, 15374);
CFFAudio* pAudio3 = pCapture->QueryAudio(19704, 22040);
CFFAudio* pAudio4 = pCapture->QueryAudio(44839, 46646);
CFFAudio* pAudio5 = pCapture->QueryAudio(59913, 61193);
pAudio1->SaveAsWAVFile("正样本1.wav");
pAudio2->SaveAsWAVFile("正样本2.wav");
pAudio3->SaveAsWAVFile("正样本3.wav");
pAudio4->SaveAsWAVFile("正样本4.wav");
pAudio5->SaveAsWAVFile("正样本5.wav");
int nAENum = 0;
int nZCRNum = 0;
ofstream fout;
fout.open("音频特征.txt", ios::out | ios::app);
fout<<"正样本:\n";
pAudio1->ExtractFeature(nAENum, nZCRNum);
fout<<"1号样本:"<<nAENum * 1.0 / pAudio1->GetFrameNums()<<" "<<nZCRNum * 1.0 / pAudio1->GetFrameNums()<<" "<<pAudio1->GetFrameNums()<<endl;
pAudio2->ExtractFeature(nAENum, nZCRNum);
fout<<"2号样本:"<<nAENum * 1.0 / pAudio2->GetFrameNums()<<" "<<nZCRNum * 1.0 / pAudio2->GetFrameNums()<<" "<<pAudio2->GetFrameNums()<<endl;
pAudio3->ExtractFeature(nAENum, nZCRNum);
fout<<"3号样本:"<<nAENum * 1.0 / pAudio3->GetFrameNums()<<" "<<nZCRNum * 1.0 / pAudio3->GetFrameNums()<<" "<<pAudio3->GetFrameNums()<<endl;
pAudio4->ExtractFeature(nAENum, nZCRNum);
fout<<"4号样本:"<<nAENum * 1.0 / pAudio4->GetFrameNums()<<" "<<nZCRNum * 1.0 / pAudio4->GetFrameNums()<<" "<<pAudio4->GetFrameNums()<<endl;
pAudio5->ExtractFeature(nAENum, nZCRNum);
fout<<"5号样本:"<<nAENum * 1.0 / pAudio5->GetFrameNums()<<" "<<nZCRNum * 1.0 / pAudio5->GetFrameNums()<<" "<<pAudio5->GetFrameNums()<<endl;
//负样本
CFFAudio* pAudio6 = pCapture->QueryAudio(4993, 5422);
CFFAudio* pAudio7 = pCapture->QueryAudio(6020, 6886);
CFFAudio* pAudio8 = pCapture->QueryAudio(8874, 10279);
CFFAudio* pAudio9 = pCapture->QueryAudio(41513, 42937);
CFFAudio* pAudio10 = pCapture->QueryAudio(55245, 55504);
pAudio6->SaveAsWAVFile("负样本1.wav");
pAudio7->SaveAsWAVFile("负样本2.wav");
pAudio8->SaveAsWAVFile("负样本3.wav");
pAudio9->SaveAsWAVFile("负样本4.wav");
pAudio10->SaveAsWAVFile("负样本5.wav");
fout<<"负样本:\n";
pAudio6->ExtractFeature(nAENum, nZCRNum);
fout<<"1号样本:"<<nAENum * 1.0 / pAudio6->GetFrameNums()<<" "<<nZCRNum * 1.0 / pAudio6->GetFrameNums()<<" "<<pAudio6->GetFrameNums()<<endl;
pAudio7->ExtractFeature(nAENum, nZCRNum);
fout<<"2号样本:"<<nAENum * 1.0 / pAudio7->GetFrameNums()<<" "<<nZCRNum * 1.0 / pAudio7->GetFrameNums()<<" "<<pAudio7->GetFrameNums()<<endl;
pAudio8->ExtractFeature(nAENum, nZCRNum);
fout<<"3号样本:"<<nAENum * 1.0 / pAudio8->GetFrameNums()<<" "<<nZCRNum * 1.0 / pAudio8->GetFrameNums()<<" "<<pAudio8->GetFrameNums()<<endl;
pAudio9->ExtractFeature(nAENum, nZCRNum);
fout<<"4号样本:"<<nAENum * 1.0 / pAudio9->GetFrameNums()<<" "<<nZCRNum * 1.0 / pAudio9->GetFrameNums()<<" "<<pAudio9->GetFrameNums()<<endl;
pAudio10->ExtractFeature(nAENum, nZCRNum);
fout<<"5号样本:"<<nAENum * 1.0 / pAudio10->GetFrameNums()<<" "<<nZCRNum * 1.0 / pAudio10->GetFrameNums()<<" "<<pAudio10->GetFrameNums()<<endl;
fout.close();
*/
}