转自:http://489553433.iteye.com/blog/1636488 感谢草帽海贼团!
ffmpeg:开源的跨平台的视频和音频流方案,提供了录制、转换以及流化音视频的完整解决方案,包含先进的音频/视频编解码库libavcodec,里面提供了许多API给我们使用,但仍有一些问题需要我们自己解决,如同步问题。
ffmpeg编译后的一些文件:
ffplay:真正的播放器,像vlc、mplayer等,有图形界面的
ffmpeg:可以理解为一种工具,利用ffmpeg提供的API,再加上其他操作,可以实现转码等一些功能。
ffserver:做服务器的,可以单播或多播一些流。ffmpeg和ffserver配合使用可以实现实时的流媒体服务。
处理音视频的一般过程:
1、从视频文件中打开视频流(解复用的过程)
2、从视频流中读取包到帧当中(解码)
3、如果帧还不完整,跳回2
4、对该帧进行操作
5、跳回2
一、打开文件获取视频流(以下操作使用ffmpeg-0.8版本)
<1> 使用av_register_all()注册所有的文件格式和编解码器的库,只需要调用一次,所以最好的选择就是在main函数中。
<2> av_open_input_file 打开视频文件。这个函数会读取视频文件头部信息并保存在AVFormatContext中,函数原型如下(avformat.h中):
1 int av_open_input_file(AVFormatContext **ic_ptr, const char *filename, 2 AVInputFormat *fmt, 3 int buf_size, 4 AVFormatParameters *ap) 5 { 6 int err; 7 AVDictionary *opts = convert_format_parameters(ap); 8 9 if (!ap || !ap->prealloced_context) 10 *ic_ptr = NULL; 11 12 err = avformat_open_input(ic_ptr, filename, fmt, &opts); 13 14 av_dict_free(&opts); 15 return err; 16 }
函数实现在utils.c中。如果后3个参数为NULL或者0,libavformat将自动检测这些参数,该函数最终调用avformat_open_input进行操作。
1 int avformat_open_input(AVFormatContext **ps, const char *filename, AVInputFormat *fmt, AVDictionary **options) 2 { 3 return avformat_open_input_header(ps,filename,fmt,options,NULL); 4 }
avformat_open_input_header作用是读取视频文件头部信息并保存在AVFormatContext中。
<3> 根据视频文件头部信息,得到音视频流的信息,调用函数av_find_stream_info(定义在avformat.h)
1 int av_find_stream_info(AVFormatContext *ic) 2 { 3 int i, count, ret, read_size, j; 4 AVStream *st; 5 AVPacket pkt1, *pkt; 6 int64_t old_offset = avio_tell(ic->pb); 7 8 for(i=0;i<ic->nb_streams;i++) { 9 AVCodec *codec; 10 st = ic->streams[i]; 11 /*st->codec得到的是AVCodecContext类型,其保存了流中关于使用 12 编解码器的信息 */ 13 if (st->codec->codec_id == CODEC_ID_AAC) { 14 st->codec->sample_rate = 0; 15 st->codec->frame_size = 0; 16 st->codec->channels = 0; 17 } 18 if (st->codec->codec_type == AVMEDIA_TYPE_VIDEO || 19 st->codec->codec_type == AVMEDIA_TYPE_SUBTITLE) { 20 /* if(!st->time_base.num) 21 st->time_base= */ 22 if(!st->codec->time_base.num) 23 /* time_base是一个AVRational(分母)结构体,保存帧率的信息,现在很多编解码器 24 都使用非整数的帧率,如NTSC使用29.97fps*/ 25 st->codec->time_base= st->time_base; 26 } 27 //only for the split stuff 28 if (!st->parser && !(ic->flags & AVFMT_FLAG_NOPARSE)) { 29 st->parser = av_parser_init(st->codec->codec_id); 30 if(st->need_parsing == AVSTREAM_PARSE_HEADERS && st->parser){ 31 st->parser->flags |= PARSER_FLAG_COMPLETE_FRAMES; 32 } 33 } 34 assert(!st->codec->codec); 35 /*找到对应的编解码器*/ 36 codec = avcodec_find_decoder(st->codec->codec_id); 37 38 /* Force decoding of at least one frame of codec data 39 * this makes sure the codec initializes the channel configuration 40 * and does not trust the values from the container. 41 */ 42 if (codec && codec->capabilities & CODEC_CAP_CHANNEL_CONF) 43 st->codec->channels = 0; 44 45 /* Ensure that subtitle_header is properly set. */ 46 if (st->codec->codec_type == AVMEDIA_TYPE_SUBTITLE 47 && codec && !st->codec->codec) 48 //打开编解码器 49 avcodec_open(st->codec, codec); 50 51 //try to just open decoders, in case this is enough to get parameters 52 if(!has_codec_parameters(st->codec)){ 53 if (codec && !st->codec->codec) 54 avcodec_open(st->codec, codec); 55 } 56 } 57 58 for (i=0; i<ic->nb_streams; i++) { 59 ic->streams[i]->info->last_dts = AV_NOPTS_VALUE; 60 } 61 62 count = 0; 63 read_size = 0; 64 for(;;) { 65 if(url_interrupt_cb()){ 66 ret= AVERROR_EXIT; 67 av_log(ic, AV_LOG_DEBUG, "interrupted\n"); 68 break; 69 } 70 71 /* check if one codec still needs to be handled */ 72 for(i=0;i<ic->nb_streams;i++) { 73 int fps_analyze_framecount = 20; 74 75 st = ic->streams[i]; 76 if (!has_codec_parameters(st->codec)) 77 break; 78 /* if the timebase is coarse (like the usual millisecond precision 79 of mkv), we need to analyze more frames to reliably arrive at 80 the correct fps */ 81 if (av_q2d(st->time_base) > 0.0005) 82 fps_analyze_framecount *= 2; 83 if (ic->fps_probe_size >= 0) 84 fps_analyze_framecount = ic->fps_probe_size; 85 /* variable fps and no guess at the real fps */ 86 if( tb_unreliable(st->codec) && !(st->r_frame_rate.num && st->avg_frame_rate.num) 87 && st->info->duration_count < fps_analyze_framecount 88 && st->codec->codec_type == AVMEDIA_TYPE_VIDEO) 89 break; 90 if(st->parser && st->parser->parser->split && !st->codec->extradata) 91 break; 92 if(st->first_dts == AV_NOPTS_VALUE) 93 break; 94 } 95 if (i == ic->nb_streams) { 96 /* NOTE: if the format has no header, then we need to read 97 some packets to get most of the streams, so we cannot 98 stop here */ 99 if (!(ic->ctx_flags & AVFMTCTX_NOHEADER)) { 100 /* if we found the info for all the codecs, we can stop */ 101 ret = count; 102 av_log(ic, AV_LOG_DEBUG, "All info found\n"); 103 break; 104 } 105 } 106 /* we did not get all the codec info, but we read too much data */ 107 if (read_size >= ic->probesize) { 108 ret = count; 109 av_log(ic, AV_LOG_DEBUG, "Probe buffer size limit %d reached\n", ic->probesize); 110 break; 111 } 112 113 /* NOTE: a new stream can be added there if no header in file 114 (AVFMTCTX_NOHEADER) */ 115 ret = av_read_frame_internal(ic, &pkt1); 116 if (ret < 0 && ret != AVERROR(EAGAIN)) { 117 /* EOF or error */ 118 ret = -1; /* we could not have all the codec parameters before EOF */ 119 for(i=0;i<ic->nb_streams;i++) { 120 st = ic->streams[i]; 121 if (!has_codec_parameters(st->codec)){ 122 char buf[256]; 123 avcodec_string(buf, sizeof(buf), st->codec, 0); 124 av_log(ic, AV_LOG_WARNING, "Could not find codec parameters (%s)\n", buf); 125 } else { 126 ret = 0; 127 } 128 } 129 break; 130 } 131 132 if (ret == AVERROR(EAGAIN)) 133 continue; 134 135 pkt= add_to_pktbuf(&ic->packet_buffer, &pkt1, &ic->packet_buffer_end); 136 if ((ret = av_dup_packet(pkt)) < 0) 137 goto find_stream_info_err; 138 139 read_size += pkt->size; 140 141 st = ic->streams[pkt->stream_index]; 142 if (st->codec_info_nb_frames>1) { 143 int64_t t; 144 if (st->time_base.den > 0 && (t=av_rescale_q(st->info->codec_info_duration, st->time_base, AV_TIME_BASE_Q)) >= ic->max_analyze_duration) { 145 av_log(ic, AV_LOG_WARNING, "max_analyze_duration %d reached at %"PRId64"\n", ic->max_analyze_duration, t); 146 break; 147 } 148 st->info->codec_info_duration += pkt->duration; 149 } 150 { 151 int64_t last = st->info->last_dts; 152 int64_t duration= pkt->dts - last; 153 154 if(pkt->dts != AV_NOPTS_VALUE && last != AV_NOPTS_VALUE && duration>0){ 155 double dur= duration * av_q2d(st->time_base); 156 157 // if(st->codec->codec_type == AVMEDIA_TYPE_VIDEO) 158 // av_log(NULL, AV_LOG_ERROR, "%f\n", dur); 159 if (st->info->duration_count < 2) 160 memset(st->info->duration_error, 0, sizeof(st->info->duration_error)); 161 for (i=1; i<FF_ARRAY_ELEMS(st->info->duration_error); i++) { 162 int framerate= get_std_framerate(i); 163 int ticks= lrintf(dur*framerate/(1001*12)); 164 double error= dur - ticks*1001*12/(double)framerate; 165 st->info->duration_error[i] += error*error; 166 } 167 st->info->duration_count++; 168 // ignore the first 4 values, they might have some random jitter 169 if (st->info->duration_count > 3) 170 st->info->duration_gcd = av_gcd(st->info->duration_gcd, duration); 171 } 172 if (last == AV_NOPTS_VALUE || st->info->duration_count <= 1) 173 st->info->last_dts = pkt->dts; 174 } 175 if(st->parser && st->parser->parser->split && !st->codec->extradata){ 176 int i= st->parser->parser->split(st->codec, pkt->data, pkt->size); 177 if(i){ 178 st->codec->extradata_size= i; 179 st->codec->extradata= av_malloc(st->codec->extradata_size + FF_INPUT_BUFFER_PADDING_SIZE); 180 memcpy(st->codec->extradata, pkt->data, st->codec->extradata_size); 181 memset(st->codec->extradata + i, 0, FF_INPUT_BUFFER_PADDING_SIZE); 182 } 183 } 184 185 /* if still no information, we try to open the codec and to 186 decompress the frame. We try to avoid that in most cases as 187 it takes longer and uses more memory. For MPEG-4, we need to 188 decompress for QuickTime. */ 189 if (!has_codec_parameters(st->codec) || !has_decode_delay_been_guessed(st)) 190 try_decode_frame(st, pkt); 191 192 st->codec_info_nb_frames++; 193 count++; 194 } 195 196 // close codecs which were opened in try_decode_frame() 197 for(i=0;i<ic->nb_streams;i++) { 198 st = ic->streams[i]; 199 if(st->codec->codec) 200 avcodec_close(st->codec); 201 } 202 for(i=0;i<ic->nb_streams;i++) { 203 st = ic->streams[i]; 204 if (st->codec_info_nb_frames>2 && !st->avg_frame_rate.num && st->info->codec_info_duration) 205 av_reduce(&st->avg_frame_rate.num, &st->avg_frame_rate.den, 206 (st->codec_info_nb_frames-2)*(int64_t)st->time_base.den, 207 st->info->codec_info_duration*(int64_t)st->time_base.num, 60000); 208 if (st->codec->codec_type == AVMEDIA_TYPE_VIDEO) { 209 if(st->codec->codec_id == CODEC_ID_RAWVIDEO && !st->codec->codec_tag && !st->codec->bits_per_coded_sample){ 210 uint32_t tag= avcodec_pix_fmt_to_codec_tag(st->codec->pix_fmt); 211 if(ff_find_pix_fmt(ff_raw_pix_fmt_tags, tag) == st->codec->pix_fmt) 212 st->codec->codec_tag= tag; 213 } 214 215 // the check for tb_unreliable() is not completely correct, since this is not about handling 216 // a unreliable/inexact time base, but a time base that is finer than necessary, as e.g. 217 // ipmovie.c produces. 218 if (tb_unreliable(st->codec) && st->info->duration_count > 15 && st->info->duration_gcd > FFMAX(1, st->time_base.den/(500LL*st->time_base.num)) && !st->r_frame_rate.num) 219 av_reduce(&st->r_frame_rate.num, &st->r_frame_rate.den, st->time_base.den, st->time_base.num * st->info->duration_gcd, INT_MAX); 220 if (st->info->duration_count && !st->r_frame_rate.num 221 && tb_unreliable(st->codec) /*&& 222 //FIXME we should not special-case MPEG-2, but this needs testing with non-MPEG-2 ... 223 st->time_base.num*duration_sum[i]/st->info->duration_count*101LL > st->time_base.den*/){ 224 int num = 0; 225 double best_error= 2*av_q2d(st->time_base); 226 best_error = best_error*best_error*st->info->duration_count*1000*12*30; 227 228 for (j=1; j<FF_ARRAY_ELEMS(st->info->duration_error); j++) { 229 double error = st->info->duration_error[j] * get_std_framerate(j); 230 // if(st->codec->codec_type == AVMEDIA_TYPE_VIDEO) 231 // av_log(NULL, AV_LOG_ERROR, "%f %f\n", get_std_framerate(j) / 12.0/1001, error); 232 if(error < best_error){ 233 best_error= error; 234 num = get_std_framerate(j); 235 } 236 } 237 // do not increase frame rate by more than 1 % in order to match a standard rate. 238 if (num && (!st->r_frame_rate.num || (double)num/(12*1001) < 1.01 * av_q2d(st->r_frame_rate))) 239 av_reduce(&st->r_frame_rate.num, &st->r_frame_rate.den, num, 12*1001, INT_MAX); 240 } 241 242 if (!st->r_frame_rate.num){ 243 if( st->codec->time_base.den * (int64_t)st->time_base.num 244 <= st->codec->time_base.num * st->codec->ticks_per_frame * (int64_t)st->time_base.den){ 245 st->r_frame_rate.num = st->codec->time_base.den; 246 st->r_frame_rate.den = st->codec->time_base.num * st->codec->ticks_per_frame; 247 }else{ 248 st->r_frame_rate.num = st->time_base.den; 249 st->r_frame_rate.den = st->time_base.num; 250 } 251 } 252 }else if(st->codec->codec_type == AVMEDIA_TYPE_AUDIO) { 253 if(!st->codec->bits_per_coded_sample) 254 st->codec->bits_per_coded_sample= av_get_bits_per_sample(st->codec->codec_id); 255 // set stream disposition based on audio service type 256 switch (st->codec->audio_service_type) { 257 case AV_AUDIO_SERVICE_TYPE_EFFECTS: 258 st->disposition = AV_DISPOSITION_CLEAN_EFFECTS; break; 259 case AV_AUDIO_SERVICE_TYPE_VISUALLY_IMPAIRED: 260 st->disposition = AV_DISPOSITION_VISUAL_IMPAIRED; break; 261 case AV_AUDIO_SERVICE_TYPE_HEARING_IMPAIRED: 262 st->disposition = AV_DISPOSITION_HEARING_IMPAIRED; break; 263 case AV_AUDIO_SERVICE_TYPE_COMMENTARY: 264 st->disposition = AV_DISPOSITION_COMMENT; break; 265 case AV_AUDIO_SERVICE_TYPE_KARAOKE: 266 st->disposition = AV_DISPOSITION_KARAOKE; break; 267 } 268 } 269 } 270 271 av_estimate_timings(ic, old_offset); 272 273 compute_chapters_end(ic); 274 275 #if 0 276 /* correct DTS for B-frame streams with no timestamps */ 277 for(i=0;i<ic->nb_streams;i++) { 278 st = ic->streams[i]; 279 if (st->codec->codec_type == AVMEDIA_TYPE_VIDEO) { 280 if(b-frames){ 281 ppktl = &ic->packet_buffer; 282 while(ppkt1){ 283 if(ppkt1->stream_index != i) 284 continue; 285 if(ppkt1->pkt->dts < 0) 286 break; 287 if(ppkt1->pkt->pts != AV_NOPTS_VALUE) 288 break; 289 ppkt1->pkt->dts -= delta; 290 ppkt1= ppkt1->next; 291 } 292 if(ppkt1) 293 continue; 294 st->cur_dts -= delta; 295 } 296 } 297 } 298 #endif 299 300 find_stream_info_err: 301 for (i=0; i < ic->nb_streams; i++) 302 av_freep(&ic->streams[i]->info); 303 return ret; 304 }
二、读取包的信息保存在帧中
<1> 分配目标帧的内存,函数为avcode_alloc_frame() (定义在avcodec.h)
1 AVFrame *avcodec_alloc_frame(void); 2 3 /* ffmpeg/libavcodec/utils.c */ 4 AVFrame *avcodec_alloc_frame(void){ 5 AVFrame *pic= av_malloc(sizeof(AVFrame)); 6 7 if(pic==NULL) return NULL; 8 9 avcodec_get_frame_defaults(pic); 10 11 return pic; 12 }
<2> 通过读取包来读取视频流,将它解码成帧,主要函数有av_read_frame(),这里说明下,av_read_packet这个方法已经不用了,在ffmpeg 0.8中有进行说明,函数原型如下:
1 /** 2 * Return the next frame of a stream. 3 * This function returns what is stored in the file, and does not validate 4 * that what is there are valid frames for the decoder. It will split what is 5 * stored in the file into frames and return one for each call. It will not 6 * omit invalid data between valid frames so as to give the decoder the maximum 7 * information possible for decoding. 8 * 9 * The returned packet is valid 10 * until the next av_read_frame() or until av_close_input_file() and 11 * must be freed with av_free_packet. For video, the packet contains 12 * exactly one frame. For audio, it contains an integer number of 13 * frames if each frame has a known fixed size (e.g. PCM or ADPCM 14 * data). If the audio frames have a variable size (e.g. MPEG audio), 15 * then it contains one frame. 16 * 17 * pkt->pts, pkt->dts and pkt->duration are always set to correct 18 * values in AVStream.time_base units (and guessed if the format cannot 19 * provide them). pkt->pts can be AV_NOPTS_VALUE if the video format 20 * has B-frames, so it is better to rely on pkt->dts if you do not 21 * decompress the payload. 22 * 23 * @return 0 if OK, < 0 on error or end of file 24 */ 25 int av_read_frame(AVFormatContext *s, AVPacket *pkt);
av_read_frame通常是在while循环中,主要是读取一个包并且将它保存在AVPacket结构体中,使用函数avcodec_decode_video2()将包转换为帧(原先的avcodec_decode_video()方法已经不用了,在ffmpeg/doc/APIChanges 说明如下:
1 2009-04-07 - r18351 - lavc 52.23.0 - avcodec_decode_video/audio/subtitle 2 The old decoding functions are deprecated, all new code should use the 3 new functions avcodec_decode_video2(), avcodec_decode_audio3() and 4 avcodec_decode_subtitle2(). These new functions take an AVPacket *pkt 5 argument instead of a const uint8_t *buf / int buf_size pair.
当解码一个包时,我们可能没有得到自己所需的帧的信息,因此当我们得到下一帧的时候,avcodec_decode_video2中设置了帧的结束标志 got_picture, 如果得到我们所需的帧,我们就可以对其进行自己所需要的操作了。