实验一 利用ffmpeg进行视音频信息提取 |音视频技术
一、实验目的
1、深入掌握视音频的基本参数信息
2、掌握ffmpeg编译环境配置
3、掌握和熟悉提取视音频文件的基本方法
二、实验要求
1、对ffmpeg的编译环境进行配置;
2、对一个视频文件,提取基本信息(例如,封装格式,码流,视频编码方式,音频编码方式,分辨率,帧率,时长等等),并输出为txt文档。结果与MediaInfo的信息对比,并截图;
3、对该视频文件,提取视频信息,保存为yuv格式。结果利用yuv播放器播放并截图;
4、对该视频文件,提取音频信息,保存为wav格式。结果利用adobe audition播放并截图。
1、环境配置
项目文件夹下
2、提取基本信息并且对比存储
3、对该视频文件,提取视频信息,保存为yuv格式。结果利用yuv播放器播放并截图
4、对该视频文件,提取音频信息,保存为wav格式。结果利用adobe audition播放并截图
三、实验代码
#include <stdio.h>
#define __STDC_CONSTANT_MACROS
#ifdef _WIN32
//Windows
extern "C"
{
#include "libavcodec/avcodec.h"
#include "libavformat/avformat.h"
#include "libswscale/swscale.h"
#include "libswresample/swresample.h"
#include <libavutil/opt.h>
#include <libavutil/channel_layout.h>
#include <libavutil/samplefmt.h>
};
#else
//Linux...
#ifdef __cplusplus
extern "C"
{
#endif
#include <libavformat/avformat.h>
#ifdef __cplusplus
};
#endif
#endif
//'1': Use H.264 Bitstream Filter
#define USE_H264BSF 1
#define MAX_AUDIO_FRAME_SIZE 192000 // 1 second of 48khz 32bit audio
typedef struct WAVE_HEADER {//共占12字节
char fccID[4]; //4bytes RIFF
unsigned long dwSize; //文件字节数-8 8=fccID(4)+dwSize(4)
char fccType[4]; //4bytes WAVE
}WAVE_HEADER;
typedef struct WAVE_FMT {//共占24字节
char fccSub1ID[4]; //4bytes "fmt "后面是个空格
unsigned long dwSub1Size;//16,存储该子块的字节数(不含fccSub1ID和dwSub1Size这8个字节)
unsigned short wFormatTag;//存储音频文件的编码格式,例如若为PCM则其存储值为1,若为其他非PCM格式的则有一定的压缩。
unsigned short numChannels;//通道数,单通道(Mono)值为1,双通道(Stereo)值为2,等等
unsigned long sampleRate;//采样率,如8k,44.1k等
unsigned long byteRate;//每秒存储的字节数,其值=SampleRate * NumChannels * BitsPerSample/8
unsigned short wBlockAlign;//块对齐大小,其值=NumChannels * BitsPerSample/8
unsigned short uiBitsPerSample;//每个采样点(sample)的bit数,一般为8,16,32等。
}WAVE_FMT;
typedef struct WAVE_DATA {//共占8个字节
char fccSub2ID[4];//4bytes "data"
unsigned long dwSub2Size;//内容为接下来的正式的数据部分的字节数,其值=NumSamples * NumChannels * BitsPerSample/8
}WAVE_DATA;
//wav文件大小字节数=dwSize+8=44+dwSub2Size => dwSize=dwSub2Size+36
#define SIZE_WAV_HEADER 12
#define SIZE_WAV_FMT 24
#define SIZE_WAV_DATA 8
/*
numSamples:单个通道的总采样点数的总和值
return wav的文件头部指针
*/
char* set_wav_parm(int numSamples) {
WAVE_HEADER* wavHEADER;
WAVE_FMT* wavFMT;
WAVE_DATA* wavDATA;
wavHEADER = (WAVE_HEADER*)malloc(SIZE_WAV_HEADER);
char* p = wavHEADER->fccID;
wavFMT = (WAVE_FMT*)(p + SIZE_WAV_HEADER);
wavDATA = (WAVE_DATA*)(p + SIZE_WAV_HEADER + SIZE_WAV_FMT);
//head
memcpy(wavHEADER->fccID, "RIFF", 4);
memcpy(wavHEADER->fccType, "WAVE", 4);
//fmt
memcpy(wavFMT->fccSub1ID, "fmt ", 4);
wavFMT->dwSub1Size = 16;
wavFMT->wFormatTag = 1;
wavFMT->numChannels = 2;
wavFMT->sampleRate = 44100;
wavFMT->uiBitsPerSample = 16;
wavFMT->byteRate = (wavFMT->sampleRate * wavFMT->numChannels * wavFMT->uiBitsPerSample) / 8;
wavFMT->wBlockAlign = (wavFMT->numChannels * wavFMT->uiBitsPerSample) / 8;
//data
memcpy(wavDATA->fccSub2ID, "data", 4);
wavDATA->dwSub2Size = (numSamples * wavFMT->numChannels * wavFMT->uiBitsPerSample) / 8;
wavHEADER->dwSize = wavDATA->dwSub2Size + 36;
return p;
}
int main(int argc, char* argv[])
{
AVFormatContext* ifmt_ctx = NULL;
AVCodecContext* ifmt_CodecCtx = NULL;
AVCodec* pCodec;
AVFrame* pFrame, * pFrameYUV;
uint8_t* out_buffer;
AVPacket pkt;
int ret, i, got_picture;
int videoindex = -1, audioindex = -1;
struct SwsContext* img_convert_ctx;
const char* in_filename = "Titanic.ts";//Input file URL
const char* out_filename_v = "Titanic.h264";//Output file URL
const char* out_filename_yuv = "Titanic.yuv";
const char* out_filename_a = "Titanic.mp3";
av_register_all();
avcodec_register_all();
avformat_network_init();
ifmt_ctx = avformat_alloc_context();
//Input
if ((ret = avformat_open_input(&ifmt_ctx, in_filename, 0, 0)) < 0) {
printf("Could not open input file.");
return -1;
}
if ((ret = avformat_find_stream_info(ifmt_ctx, 0)) < 0) {
printf("Failed to retrieve input stream information");
return -1;
}
videoindex = -1;
for (i = 0; i < ifmt_ctx->nb_streams; i++) {
if (ifmt_ctx->streams[i]->codec->codec_type == AVMEDIA_TYPE_VIDEO) {
videoindex = i;
}
else if (ifmt_ctx->streams[i]->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
audioindex = i;
}
}
//Dump Format------------------
printf("\nInput Video===========================\n");
av_dump_format(ifmt_ctx, 0, in_filename, 0);
printf("\n======================================\n");
FILE* fp_audio = fopen(out_filename_a, "wb+");
FILE* fp_video = fopen(out_filename_v, "wb+");
FILE* fp_video_yuv = fopen(out_filename_yuv, "wb+");
ifmt_CodecCtx = ifmt_ctx->streams[videoindex]->codec;
//查找解码器
pCodec = avcodec_find_decoder(ifmt_CodecCtx->codec_id);
if (pCodec == NULL) {
printf("Codec not found.\n");
return -1;
}
//打开解码器
if (avcodec_open2(ifmt_CodecCtx, pCodec, NULL) < 0) {
printf("Could not open codec.\n");
return -1;
}
//文件流形式输入到output.txt文本里
FILE* fp = fopen("output.txt", "wb+");
fprintf(fp, "封装格式:%s\n", ifmt_ctx->iformat->name);
fprintf(fp, "宽高:%d * %d\n", ifmt_ctx->streams[videoindex]->codec->width, ifmt_ctx->streams[videoindex]->codec->height);//videoindex是从上面for循环遍历得到的视频流的索引,一般来说视频流索引为0,音频为1
fprintf(fp, "时长: %d秒\n", ifmt_ctx->duration / 1000000);
fprintf(fp, "码率: %d\n", ifmt_ctx->bit_rate);
fprintf(fp, "封装格式的名称: %s\n", ifmt_ctx->iformat->name);
fprintf(fp, "封装格式的长名称: %s\n", ifmt_ctx->iformat->long_name);
fprintf(fp, "封装格式的扩展名: %s\n", ifmt_ctx->iformat->extensions);
fprintf(fp, "输入视频的AVStream个数: %d\n", ifmt_ctx->nb_streams);
fprintf(fp, "输入视频的AVStream序号: %d\n", ifmt_ctx->streams[videoindex]->id);
fprintf(fp, "输入视频的AVStream的时基: %d\n", ifmt_ctx->streams[videoindex]->time_base);
fprintf(fp, "输入视频的AVStream的帧率: %d\n", ifmt_ctx->streams[videoindex]->r_frame_rate);
fprintf(fp, "视频像素格式: %s\n", ifmt_ctx->streams[videoindex]->codec->pix_fmt);
fprintf(fp, "编解码器名称: %s\n", ifmt_ctx->streams[videoindex]->codec->codec->name);
fprintf(fp, "编解码器长名称: %s\n", ifmt_ctx->streams[videoindex]->codec->codec->long_name);
fprintf(fp, "编解码器类型: %s\n", ifmt_ctx->streams[videoindex]->codec->codec->type);
fprintf(fp, "编解码器ID: %d\n", ifmt_ctx->streams[videoindex]->codec->codec->id);
fprintf(fp, "音频采样率: %d\n", ifmt_ctx->streams[audioindex]->codec->sample_rate);
fprintf(fp, "音频声道数: %d\n", ifmt_ctx->streams[audioindex]->codec->channels);
fprintf(fp, "音频采样格式: %d\n", ifmt_ctx->streams[audioindex]->codec->sample_fmt);
fclose(fp);
/*
FIX: H.264 in some container format (FLV, MP4, MKV etc.) need
"h264_mp4toannexb" bitstream filter (BSF)
*Add SPS,PPS in front of IDR frame
*Add start code ("0,0,0,1") in front of NALU
H.264 in some container (MPEG2TS) don't need this BSF.
*/
pFrame = av_frame_alloc();
pFrameYUV = av_frame_alloc();
out_buffer = (uint8_t*)av_malloc(avpicture_get_size(PIX_FMT_YUV420P, ifmt_CodecCtx->width, ifmt_CodecCtx->height));
avpicture_fill((AVPicture*)pFrameYUV, out_buffer, PIX_FMT_YUV420P, ifmt_CodecCtx->width, ifmt_CodecCtx->height);
img_convert_ctx = sws_getContext(ifmt_CodecCtx->width, ifmt_CodecCtx->height, ifmt_CodecCtx->pix_fmt,
ifmt_CodecCtx->width, ifmt_CodecCtx->height, PIX_FMT_YUV420P, 4, NULL, NULL, NULL);
#if USE_H264BSF
AVBitStreamFilterContext* h264bsfc = av_bitstream_filter_init("h264_mp4toannexb");
#endif
int frame_cnt = 0;
while (av_read_frame(ifmt_ctx, &pkt) >= 0) {
if (pkt.stream_index == videoindex) {
#if USE_H264BSF
av_bitstream_filter_filter(h264bsfc, ifmt_ctx->streams[videoindex]->codec, NULL, &pkt.data, &pkt.size, pkt.data, pkt.size, 0);
#endif
printf("Write Video Packet. size:%d\tpts:%lld\n", pkt.size, pkt.pts);
fwrite(pkt.data, 1, pkt.size, fp_video);
//解码一帧压缩数据
ret = avcodec_decode_video2(ifmt_CodecCtx, pFrame, &got_picture, &pkt);//YUV
if (got_picture) {
sws_scale(img_convert_ctx, (const uint8_t* const*)pFrame->data, pFrame->linesize, 0, ifmt_CodecCtx->height,
pFrameYUV->data, pFrameYUV->linesize);
printf("Decoded frame index: %d\n", frame_cnt);
fwrite(pFrameYUV->data[0], 1, ifmt_CodecCtx->width * ifmt_CodecCtx->height, fp_video_yuv);
fwrite(pFrameYUV->data[1], 1, ifmt_CodecCtx->width * ifmt_CodecCtx->height / 4, fp_video_yuv);
fwrite(pFrameYUV->data[2], 1, ifmt_CodecCtx->width * ifmt_CodecCtx->height / 4, fp_video_yuv);
frame_cnt++;
}
}
else if (pkt.stream_index == audioindex) {
/*
AAC in some container format (FLV, MP4, MKV etc.) need to add 7 Bytes
ADTS Header in front of AVPacket data manually.
Other Audio Codec (MP3...) works well.
*/
printf("Write Audio Packet. size:%d\tpts:%lld\n", pkt.size, pkt.pts);
fwrite(pkt.data, 1, pkt.size, fp_audio);
}
av_free_packet(&pkt);
}
const char* input_file = "Titanic.mp3";
av_register_all();
avformat_network_init();
AVFormatContext* pFormatCtx = avformat_alloc_context();
if (avformat_open_input(&pFormatCtx, input_file, 0, 0) != 0) {
printf("Couldn't open input stream.\n");
return -1;
}
if (avformat_find_stream_info(pFormatCtx, 0) < 0) {
printf("Couldn't find stream information.\n");
return -1;
}
//av_dump_format(pFormatCtx, 0, input_file, false);
//获取音频流索引位置
int j = 0, audio_stream_idx = -1;
for (; j < pFormatCtx->nb_streams; j++) {
if (pFormatCtx->streams[j]->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
audio_stream_idx = j;
break;
}
}
if (audio_stream_idx == -1) {
printf("Didn't find a audio stream.\n");
return -1;
}
AVCodecContext* codecCtx = pFormatCtx->streams[audio_stream_idx]->codec;
AVCodec* codec = avcodec_find_decoder(codecCtx->codec_id);
if (codec == NULL) {
printf("Codec not found.\n");
return -1;
}
if (avcodec_open2(codecCtx, codec, NULL) < 0) {
printf("Could not open codec.\n");
return -1;
}
AVSampleFormat in_sample_fmt = codecCtx->sample_fmt;//输入的采样格式
int in_sample_rate = codecCtx->sample_rate;//输入的采样率
int channels = codecCtx->channels;
printf("采样率:%d ,声道数:%d\n", in_sample_rate, channels);
//重采样设置参数
AVSampleFormat out_sample_fmt = AV_SAMPLE_FMT_S16; //输出采样格式16bit PCM
//输出采样率
int out_sample_rate = 44100;
//输出的声道布局(立体声)
uint64_t out_ch_layout = AV_CH_LAYOUT_STEREO;
SwrContext* swrCtx = swr_alloc();
swrCtx = swr_alloc_set_opts(swrCtx,
out_ch_layout, out_sample_fmt, out_sample_rate,
av_get_default_channel_layout(codecCtx->channels), in_sample_fmt, in_sample_rate,
0, NULL);
/*
av_opt_set_int(swrCtx, "in_channel_layout", AV_CH_LAYOUT_MONO, 0);
av_opt_set_int(swrCtx, "in_sample_rate", in_sample_rate, 0);
av_opt_set_sample_fmt(swrCtx, "in_sample_fmt", in_sample_fmt, 0);
av_opt_set_int(swrCtx, "out_channel_layout", out_ch_layout, 0);
av_opt_set_int(swrCtx, "out_sample_rate", out_sample_rate, 0);
av_opt_set_sample_fmt(swrCtx, "out_sample_fmt", out_sample_fmt, 0);*/
swr_init(swrCtx);
//输出的声道个数
int out_channel_nb = av_get_channel_layout_nb_channels(out_ch_layout);
printf("输出pcm: 采样率:%d , 声道数:%d\n", out_sample_rate, out_channel_nb);
const char* output_wav = "Titanic.wav";
FILE* fp_wav = fopen(output_wav, "wb+");
//16bit 44100 PCM 数据
uint8_t* out_buffer1 = (uint8_t*)av_malloc(MAX_AUDIO_FRAME_SIZE * 2);
int got_frame = 0, framecnt = 0;
AVPacket* packet = (AVPacket*)av_malloc(sizeof(AVPacket));
av_init_packet(packet);
AVFrame* frame = av_frame_alloc();
int totalsamples = 0;
fseek(fp_wav, SIZE_WAV_HEADER + SIZE_WAV_FMT + SIZE_WAV_DATA, 1);
while (av_read_frame(pFormatCtx, packet) >= 0) {
if (packet->stream_index == audio_stream_idx) {
//解码
avcodec_decode_audio4(codecCtx, frame, &got_frame, packet);
if (got_frame > 0) {
//解码得到的Frame数据,转成PCM
swr_convert(swrCtx, &out_buffer1, MAX_AUDIO_FRAME_SIZE, (const uint8_t**)frame->data, frame->nb_samples);
//printf("index:%5d\t pts:%lld\t packet size:%d\n", framecnt, packet->pts, packet->size);
//Write PCM
totalsamples += frame->nb_samples;
//音频文件字节大小= 采用率*时长*通道数*采样位数/8
//计算一帧音频帧占用的字节数 通道数 * 采样点数* 采样位数/8
int out_buffer_size = av_samples_get_buffer_size(NULL, out_channel_nb,
frame->nb_samples, out_sample_fmt, 1);
//frame->nb_samples 当前帧的一个通道的采样点数(经测试,可能有的帧值不等) 和codecCtx->frame_size(音频帧的一个通道的采样点数,是固定值)
//虽然两者都是表示音频的采样点数,但是我的理解,这里用frame->nb_samples更加科学。
fwrite(out_buffer1, 1, out_buffer_size, fp_wav);
framecnt++;
}
}
av_free_packet(packet);
}
swr_free(&swrCtx);
av_frame_free(&frame);
av_free(out_buffer1);
char* p = set_wav_parm(totalsamples);
rewind(fp_wav);//文件指针回到头部
fwrite(p, 1, SIZE_WAV_HEADER + SIZE_WAV_FMT + SIZE_WAV_DATA, fp_wav);//size取1个字节没问题,可能跟p是char类型有关,取大了,报错
fclose(fp_wav);
printf("success");
avcodec_close(codecCtx);
avformat_close_input(&pFormatCtx);
#if USE_H264BSF
av_bitstream_filter_close(h264bsfc);
#endif
fclose(fp_video);
fclose(fp_video_yuv);
fclose(fp_audio);
avformat_close_input(&ifmt_ctx);
if (ret < 0){
printf("Error occurred.\n");
return -1;
}
return 0;
}