avformat_open_input
s->iformat->read_header(s)
--> mov_read_header
mov_read_default
-->mov_read_trak
-->mov_build_index
video的dts的由来:
AVIndex stream=0, sample=0x0, offset=0x30, dts=-1001, size=0x1333, distance=0, keyframe=1
AVIndex stream=0, sample=0x1, offset=0x137d, dts=0x0, size=0xf6f, distance=1, keyframe=0
AVIndex stream=0, sample=0x2, offset=0x253c, dts=0xbbb, size=0x991, distance=2, keyframe=0
AVIndex stream=0, sample=0x3, offset=0x3143, dts=0x138d, size=0x9ab, distance=3, keyframe=0
AVIndex stream=0, sample=0x4, offset=0x3ca7, dts=0x1776, size=0x8e4, distance=4, keyframe=0
00 0000 00 -->stts的version与flag
00 0041 73 -->entries=0x4173=16755 ::剩下的就是sample_count与sample_duration的entry
00 0000 01 -->sample_count=0x01 ::sample_count=1, sample_duration=1001
00 0003 e9 -->sample_duration=0x3e9
00 0000 01 -->sample_count=0x1 ::sample_count=1, sample_duration=3003
00 000b bb -->sample_duration=0xbbb
...
sound的dts的由来:
sc->chunk_count=0xe10c
AVIndex stream=1, sample=0x0, offset=0x1363, dts=0x0, size=0x1a, distance=0, keyframe=1
AVIndex stream=1, sample=0x1, offset=0x22ec, dts=0x400, size=0x9, distance=0, keyframe=1
AVIndex stream=1, sample=0x2, offset=0x22f5, dts=0x800, size=0x9, distance=0, keyframe=1
AVIndex stream=1, sample=0x3, offset=0x22fe, dts=0xc00, size=0xa1, distance=0, keyframe=1
AVIndex stream=1, sample=0x4, offset=0x239f, dts=0x1000, size=0xc9, distance=0, keyframe=1
sound没有elst这个box,所以 current_dts = 0,所以dts_0 =0
以后的dts就是dts_0与 sample_duration( 在stts中 )累加
但sounde只有一个stts的entries,所以这个sample_duration=0x400,恒定的
所以box有如下:
s->iformat->read_header(s)
--> mov_read_header
mov_read_default
-->mov_read_trak
-->mov_build_index
- static void mov_build_index(MOVContext *mov, AVStream *st)
- {
- MOVStreamContext *sc = st->priv_data;
- int64_t current_offset;
- int64_t current_dts = 0;
- uint64_t stream_size = 0;
- /*从媒体文件中读到video_trak的elst, sound_trak没有这个elst
-
- 0000 0001 -->edit_count=1
- 0014 6595 -->segment duration=0x146595
- 0000 03e9 -->media time=0x3e9=1001 ;;这个就是video的开始时间,负的
- 0001 0000 -->rate=0x1000,rate/65536=1.0
- 0000 0001 -->edit_count=1
- //下面这个if,因为只有video的elst_count=1,所以只是得出video的start_time
- if (sc->elst_count) { //video:elst_count=1,sound:elst_count=0
- int i, edit_start_index = 0, unsupported = 0;
- int64_t empty_duration = 0; // empty duration of the first edit list entry
- int64_t start_time = 0; // start time of the media
-
- for (i = 0; i < sc->elst_count; i++) {
- const MOVElst *e = &sc->elst_data[i];
- if (i == 0 && e->time == -1) { //video:e->time=1001
- empty_duration = e->duration;
- edit_start_index = 1;
- } else if (i == edit_start_index && e->time >= 0) {
- start_time = e->time; //所以video的开始时间是1001,start_time=1001
- } else
- unsupported = 1;
- }
- if ((empty_duration || start_time) && mov->time_scale > 0) { //empty_duration=0
- if (empty_duration)
- empty_duration = av_rescale(empty_duration, sc->time_scale, mov->time_scale); //empty_duration=0
- sc->time_offset = start_time - empty_duration; //video:sc->time_offset=1001; sound:sc->time_offet=0
- current_dts = -sc->time_offset; //video:current_dts=-1001; sound:current_dts=0;
- if (sc->ctts_count>0 && sc->stts_count>0 &&
- sc->ctts_data[0].duration / FFMAX(sc->stts_data[0].duration, 1) > 16) {
- sc->wrong_dts = 1;
- st->codec->has_b_frames = 1;
- }
- }
- }
-
- /* only use old uncompressed audio chunk demuxing when stts specifies it */
- if (!(st->codec->codec_type == AVMEDIA_TYPE_AUDIO &&
- sc->stts_count == 1 && sc->stts_data[0].duration == 1)) {
- unsigned int current_sample = 0;
- unsigned int stts_sample = 0;
- unsigned int sample_size;
- unsigned int distance = 0;
- unsigned int rap_group_index = 0;
- unsigned int rap_group_sample = 0;
- int64_t last_dts = 0;
- int64_t dts_correction = 0;
- int rap_group_present = sc->rap_group_count && sc->rap_group;
- int key_off = (sc->keyframe_count && sc->keyframes[0] > 0) || (sc->stps_count && sc->stps_data[0] > 0);
-
- current_dts -= sc;
- last_dts = current_dts; //video的current_dts=1001,sound的current_dts=0
-
- av_reallocp_array(&st->index_entries, st->nb_index_entries + sc->sample_count, sizeof(*st->index_entries)); //分配空间
- st->index_entries_allocated_size = (st->nb_index_entries + sc->sample_count) * sizeof(*st->index_entries);
-
- for (i = 0; i < sc->chunk_count; i++) { //这个sc->chunk_count就是从"stco"中读取的video_chuk_cnt=0x4e56,sound_chunk_count=0xe10c
- int64_t next_offset = i+1 < sc->chunk_count ? sc->chunk_offsets[i+1] : INT64_MAX;
- current_offset = sc->chunk_offsets[i]; //chunk_offset表出自"stco",video与sound各自都有各自的"stco"
- while (stsc_index + 1 < sc->stsc_count && i + 1 == sc->stsc_data[stsc_index + 1].first) //video与sound的stsc_count=1,所以stsc_index=0且一直为0
- stsc_index++;
-
- for (j = 0; j < sc->stsc_data[stsc_index].count; j++) {
- int keyframe = 0;
-
- if (!sc->keyframe_absent && (!sc->keyframe_count || current_sample+key_off == sc->keyframes[stss_index])) {
- keyframe = 1;
- if (stss_index + 1 < sc->keyframe_count)
- stss_index++; //stss_index=0且一直为0
- } else if (sc->stps_count && current_sample+key_off == sc->stps_data[stps_index]) {
- keyframe = 1;
- if (stps_index + 1 < sc->stps_count)
- stps_index++;
- }
-
- if (sc->keyframe_absent && !sc->stps_count && !rap_group_present && (st->codec->codec_type == AVMEDIA_TYPE_AUDIO || (i==0 && j==0)))
- keyframe = 1;
- if (keyframe)
- distance = 0;
- sample_size = sc->stsz_sample_size > 0 ? sc->stsz_sample_size : sc->sample_sizes[current_sample];
- if (sc->pseudo_stream_id == -1 ||
- sc->stsc_data[stsc_index].id - 1 == sc->pseudo_stream_id) {
- AVIndexEntry *e = &st->index_entries[st->nb_index_entries++];
- e->pos = current_offset;
- e->timestamp = current_dts; //这个timestamp就是dts,计算过程就是start_time累加sample_duration
- e->size = sample_size;
- e->min_distance = distance;
- e->flags = keyframe ? AVINDEX_KEYFRAME : 0;
- if (st->codec->codec_type == AVMEDIA_TYPE_VIDEO && st->nb_index_entries < 100)
- ff_rfps_add_frame(mov->fc, st, current_dts);
- }
-
- current_offset += sample_size;
- stream_size += sample_size;
-
- if (sc->stts_data[stts_index].duration < 0) {
- dts_correction += sc->stts_data[stts_index].duration - 1;
- sc->stts_data[stts_index].duration = 1;
- }
- current_dts += sc->stts_data[stts_index].duration;
- if (!dts_correction || current_dts + dts_correction > last_dts) {
- current_dts += dts_correction;
- dts_correction = 0;
- } else {
- dts_correction += current_dts - last_dts - 1;
- current_dts = last_dts + 1;
- }
- last_dts = current_dts;
- distance++;
- stts_sample++;
- current_sample++;
- if (stts_index + 1 < sc->stts_count && stts_sample == sc->stts_data[stts_index].count) {
- stts_sample = 0; //对于video来说有很多个stts的entries,所以这个stts_index是累加的
- stts_index++; //但对于sounde来说只有一个stts的entries,所以这个stts_index是不加的,一直为0
- }
- }
- }
- if (st->duration > 0)
- st->codec->bit_rate = stream_size*8*sc->time_scale/st->duration; //每条trak的bit_rate的计算
- } else { //video的0x7f0bb=520379,audio的bit_rate=0x14ec7=85703
- //这个else没有用到
- }
- }
video的dts的由来:
AVIndex stream=0, sample=0x0, offset=0x30, dts=-1001, size=0x1333, distance=0, keyframe=1
AVIndex stream=0, sample=0x1, offset=0x137d, dts=0x0, size=0xf6f, distance=1, keyframe=0
AVIndex stream=0, sample=0x2, offset=0x253c, dts=0xbbb, size=0x991, distance=2, keyframe=0
AVIndex stream=0, sample=0x3, offset=0x3143, dts=0x138d, size=0x9ab, distance=3, keyframe=0
AVIndex stream=0, sample=0x4, offset=0x3ca7, dts=0x1776, size=0x8e4, distance=4, keyframe=0
第0个dts就是从
video_trak的elst中解析出来的media time=0x3e9=1001 ,
然后把这个值 current_dts = -sc->time_offset; 取了个负值,所以dts_0 =-1001
以后的dts就是dts_0与sample_duration(在stts中)累加
00 020b a873 7474 73 -->stts的长度=0x20ba8,type=stts
然后把这个值 current_dts = -sc->time_offset; 取了个负值,所以dts_0 =-1001
以后的dts就是dts_0与sample_duration(在stts中)累加
00 0000 00 -->stts的version与flag
00 0041 73 -->entries=0x4173=16755 ::剩下的就是sample_count与sample_duration的entry
00 0000 01 -->sample_count=0x01 ::sample_count=1, sample_duration=1001
00 0003 e9 -->sample_duration=0x3e9
00 0000 01 -->sample_count=0x1 ::sample_count=1, sample_duration=3003
00 000b bb -->sample_duration=0xbbb
...
sound的dts的由来:
sc->chunk_count=0xe10c
AVIndex stream=1, sample=0x0, offset=0x1363, dts=0x0, size=0x1a, distance=0, keyframe=1
AVIndex stream=1, sample=0x1, offset=0x22ec, dts=0x400, size=0x9, distance=0, keyframe=1
AVIndex stream=1, sample=0x2, offset=0x22f5, dts=0x800, size=0x9, distance=0, keyframe=1
AVIndex stream=1, sample=0x3, offset=0x22fe, dts=0xc00, size=0xa1, distance=0, keyframe=1
AVIndex stream=1, sample=0x4, offset=0x239f, dts=0x1000, size=0xc9, distance=0, keyframe=1
sound没有elst这个box,所以 current_dts = 0,所以dts_0 =0
以后的dts就是dts_0与 sample_duration( 在stts中 )累加
但sounde只有一个stts的entries,所以这个sample_duration=0x400,恒定的
- 00 0000 1873 7474 73 -->stts的长度=0x18,type=stts
- 00 0000 00 -->stts的version与flag
- 00 0000 01 -->entries=0x01
- 00 00e1 0c -->sample_count=0xe10c=67612
- 00 0004 00 -->sample_duration=0x400
- static int mov_read_header(AVFormatContext *s)
- {
- MOVContext *mov = s->priv_data;
- AVIOContext *pb = s->pb;
- int j, err;
- MOVAtom atom = { AV_RL32("root") };
- int i;
-
- mov->fc = s;
- mov->trak_index = -1;
- /* .mov and .mp4 aren't streamable anyway (only progressive download if moov is before mdat) */
- if (pb->seekable)
- atom.size = avio_size(pb);
- else
- atom.size = INT64_MAX;
-
- /* check MOV header */
- do {
- if (mov->moov_retry)
- avio_seek(pb, 0, SEEK_SET);
- if ((err = mov_read_default(mov, pb, atom)) < 0) {
- av_log(s, AV_LOG_ERROR, "error reading header\n");
- mov_read_close(s);
- return err;
- }
- } while (pb->seekable && !mov->found_moov && !mov->moov_retry++);
- if (!mov->found_moov) {
- av_log(s, AV_LOG_ERROR, "moov atom not found\n");
- mov_read_close(s);
- return AVERROR_INVALIDDATA;
- }
- if (pb->seekable) {
- if (mov->chapter_track > 0 && !mov->ignore_chapters)
- mov_read_chapters(s);
- for (i = 0; i < s->nb_streams; i++)
- if (s->streams[i]->codec->codec_tag == AV_RL32("tmcd"))
- mov_read_timecode_track(s, s->streams[i]);
- }
-
- /* copy timecode metadata from tmcd tracks to the related video streams */
- for (i = 0; i < s->nb_streams; i++) {
- AVStream *st = s->streams[i];
- MOVStreamContext *sc = st->priv_data;
- if (sc->timecode_track > 0) {
- AVDictionaryEntry *tcr;
- int tmcd_st_id = -1;
-
- for (j = 0; j < s->nb_streams; j++)
- if (s->streams[j]->id == sc->timecode_track)
- tmcd_st_id = j;
-
- if (tmcd_st_id < 0 || tmcd_st_id == i)
- continue;
- tcr = av_dict_get(s->streams[tmcd_st_id]->metadata, "timecode", NULL, 0);
- if (tcr)
- av_dict_set(&st->metadata, "timecode", tcr->value, 0);
- }
- }
- export_orphan_timecode(s);
-
- for (i = 0; i < s->nb_streams; i++) {
- AVStream *st = s->streams[i];
- MOVStreamContext *sc = st->priv_data;
- fix_timescale(mov, sc);
- if(st->codec->codec_type == AVMEDIA_TYPE_AUDIO && st->codec->codec_id == AV_CODEC_ID_AAC) {
- st->skip_samples = sc->start_pad;
- }
- if (st->codec->codec_type == AVMEDIA_TYPE_VIDEO && sc->nb_frames_for_fps > 0 && sc->duration_for_fps > 0)
- av_reduce(&st->avg_frame_rate.num, &st->avg_frame_rate.den,
- sc->time_scale*(int64_t)sc->nb_frames_for_fps, sc->duration_for_fps, INT_MAX);
- if (st->codec->codec_type == AVMEDIA_TYPE_SUBTITLE) {
- if (st->codec->width <= 0 || st->codec->height <= 0) {
- st->codec->width = sc->width;
- st->codec->height = sc->height;
- }
- if (st->codec->codec_id == AV_CODEC_ID_DVD_SUBTITLE) {
- if ((err = mov_rewrite_dvd_sub_extradata(st)) < 0)
- return err;
- }
- }
- if (mov->handbrake_version && mov->handbrake_version <= 1000000*0 + 1000*10 + 2 && st->codec->codec_id == AV_CODEC_ID_MP3) {
- st->need_parsing = AVSTREAM_PARSE_FULL;
- }
- }
-
- if (mov->trex_data) {
- for (i = 0; i < s->nb_streams; i++) {
- AVStream *st = s->streams[i];
- MOVStreamContext *sc = st->priv_data;
- if (st->duration > 0)
- st->codec->bit_rate = sc->data_size * 8 * sc->time_scale / st->duration;
- }
- }
-
- if (mov->use_mfra_for > 0) {
- for (i = 0; i < s->nb_streams; i++) {
- AVStream *st = s->streams[i];
- MOVStreamContext *sc = st->priv_data;
- if (sc->duration_for_fps > 0) {
- st->codec->bit_rate = sc->data_size * 8 * sc->time_scale /
- sc->duration_for_fps;
- }
- }
- }
-
- for (i = 0; i < mov->bitrates_count && i < s->nb_streams; i++) {
- if (mov->bitrates[i]) {
- s->streams[i]->codec->bit_rate = mov->bitrates[i];
- }
- }
-
- ff_rfps_calculate(s);
-
- for (i = 0; i < s->nb_streams; i++) {
- AVStream *st = s->streams[i];
- MOVStreamContext *sc = st->priv_data;
-
- switch (st->codec->codec_type) {
- case AVMEDIA_TYPE_AUDIO:
- err = ff_replaygain_export(st, s->metadata);
- if (err < 0) {
- mov_read_close(s);
- return err;
- }
- break;
- case AVMEDIA_TYPE_VIDEO:
- if (sc->display_matrix) {
- AVPacketSideData *sd, *tmp;
-
- tmp = av_realloc_array(st->side_data,
- st->nb_side_data + 1, sizeof(*tmp));
- if (!tmp)
- return AVERROR(ENOMEM);
-
- st->side_data = tmp;
- st->nb_side_data++;
-
- sd = &st->side_data[st->nb_side_data - 1];
- sd->type = AV_PKT_DATA_DISPLAYMATRIX;
- sd->size = sizeof(int32_t) * 9;
- sd->data = (uint8_t*)sc->display_matrix;
- sc->display_matrix = NULL;
- }
- break;
- }
- }
- ff_configure_buffers_for_index(s, AV_TIME_BASE);
-
- return 0;
- }
- static int mov_read_default(MOVContext *c, AVIOContext *pb, MOVAtom atom)
- {
- int64_t total_size = 0;
- MOVAtom a;
- int i;
-
- if (c->atom_depth > 10) {
- return AVERROR_INVALIDDATA;
- }
- c->atom_depth ++;
-
- if (atom.size < 0)
- atom.size = INT64_MAX;
- while (total_size + 8 <= atom.size && !avio_feof(pb)) {
- int (*parse)(MOVContext*, AVIOContext*, MOVAtom) = NULL;
- a.size = atom.size;
- a.type=0;
- if (atom.size >= 8) {
- a.size = avio_rb32(pb);
- a.type = avio_rl32(pb);
- if (a.type == MKTAG('f','r','e','e') &&
- a.size >= 8 &&
- c->moov_retry) {
- uint8_t buf[8];
- uint32_t *type = (uint32_t *)buf + 1;
- if (avio_read(pb, buf, 8) != 8)
- return AVERROR_INVALIDDATA;
- avio_seek(pb, -8, SEEK_CUR);
- if (*type == MKTAG('m','v','h','d') ||
- *type == MKTAG('c','m','o','v')) {
- a.type = MKTAG('m','o','o','v');
- }
- }
- if (atom.type != MKTAG('r','o','o','t') && atom.type != MKTAG('m','o','o','v'))
- {
- if (a.type == MKTAG('t','r','a','k') || a.type == MKTAG('m','d','a','t'))
- {
- avio_skip(pb, -8);
- c->atom_depth --;
- return 0;
- }
- }
- total_size += 8;
- if (a.size == 1 && total_size + 8 <= atom.size) { /* 64 bit extended size */
- a.size = avio_rb64(pb) - 8;
- total_size += 8;
- }
- }
- if (a.size == 0) {
- a.size = atom.size - total_size + 8;
- }
- a.size -= 8;
- if (a.size < 0)
- break;
- a.size = FFMIN(a.size, atom.size - total_size);
-
- for (i = 0; mov_default_parse_table[i].type; i++)
- if (mov_default_parse_table[i].type == a.type) {
- parse = mov_default_parse_table[i].parse; //匹配type,调用各个box的读函数
- break;
- }
-
- if (!parse && (atom.type == MKTAG('u','d','t','a') ||
- atom.type == MKTAG('i','l','s','t')))
- parse = mov_read_udta_string;
-
- if (!parse && c->found_hdlr_mdta &&
- atom.type == MKTAG('m','e','t','a') &&
- a.type == MKTAG('k','e','y','s')) {
- parse = mov_read_keys;
- }
-
- if (!parse) { /* skip leaf atoms data */
- avio_skip(pb, a.size);
- } else {
- int64_t start_pos = avio_tell(pb);
- int64_t left;
- int err = parse(c, pb, a);
- if (err < 0) {
- c->atom_depth --;
- return err;
- }
- if (c->found_moov && c->found_mdat &&
- ((!pb->seekable || c->fc->flags & AVFMT_FLAG_IGNIDX || c->fragment_index_complete) ||
- start_pos + a.size == avio_size(pb))) {
- if (!pb->seekable || c->fc->flags & AVFMT_FLAG_IGNIDX || c->fragment_index_complete)
- c->next_root_atom = start_pos + a.size;
- c->atom_depth --;
- return 0;
- }
- left = a.size - avio_tell(pb) + start_pos;
- if (left > 0)
- avio_skip(pb, left);
- else if (left < 0) {
- avio_seek(pb, left, SEEK_CUR);
- }
- }
-
- total_size += a.size;
- }
-
- if (total_size < atom.size && atom.size < 0x7ffff)
- avio_skip(pb, atom.size - total_size);
-
- c->atom_depth --;
- return 0;
- }
所以box有如下:
- type='ftyp' ,parent='root',sz=32 8 102119991
- type='free' ,parent='root',sz=8 40 102119991
- type='mdat' ,parent='root',sz=101281678 48 102119991
- type='moov' ,parent='root',sz=838273 101281726 102119991
- type='mvhd' ,parent='moov',sz=108 8 838265
- type='trak' ,parent='moov',sz=376733 116 838265
- type='tkhd' ,parent='trak',sz=92 8 376725
- type='edts' ,parent='trak',sz=36 100 376725
- type='elst' ,parent='edts',sz=28 8 28
- type='mdia' ,parent='trak',sz=376597 136 376725
- type='mdhd' ,parent='mdia',sz=32 8 376589
- type='hdlr' ,parent='mdia',sz=45 40 376589
- type='minf' ,parent='mdia',sz=376512 85 376589
- type='vmhd' ,parent='minf',sz=20 8 376504
- type='dinf' ,parent='minf',sz=36 28 376504
- type='dref' ,parent='dinf',sz=28 8 28
- type='stbl' ,parent='minf',sz=376448 64 376504
- type='stsd' ,parent='stbl',sz=148 8 376440
- type='avcC' ,parent='stsd',sz=46 8 46
- type='stts' ,parent='stbl',sz=134056 156 376440
- type='stss' ,parent='stbl',sz=516 134212 376440
- type='ctts' ,parent='stbl',sz=81224 134728 376440
- type='stsc' ,parent='stbl',sz=28 215952 376440
- type='stsz' ,parent='stbl',sz=80236 215980 376440
- type='stco' ,parent='stbl',sz=80232 296216 376440
- type='trak' ,parent='moov',sz=461328 376849 838265
- type='tkhd' ,parent='trak',sz=92 8 461320
- type='mdia' ,parent='trak',sz=461228 100 461320
- type='mdhd' ,parent='mdia',sz=32 8 461220
- type='hdlr' ,parent='mdia',sz=45 40 461220
- type='minf' ,parent='mdia',sz=461143 85 461220
- type='smhd' ,parent='minf',sz=16 8 461135
- type='dinf' ,parent='minf',sz=36 24 461135
- type='dref' ,parent='dinf',sz=28 8 28
- type='stbl' ,parent='minf',sz=461083 60 461135
- type='stsd' ,parent='stbl',sz=91 8 461075
- type='esds' ,parent='stsd',sz=39 8 39
- type='stts' ,parent='stbl',sz=24 99 461075
- type='stsc' ,parent='stbl',sz=28 123 461075
- type='stsz' ,parent='stbl',sz=230468 151 461075
- type='stco' ,parent='stbl',sz=230464 230619 461075
- type='udta' ,parent='moov',sz=96 838177 838265
- type='meta' ,parent='udta',sz=88 8 88
- type='hdlr' ,parent='meta',sz=33 8 76
- type='ilst' ,parent='meta',sz=43 41 76
- type='?too' ,parent='ilst',sz=35 8 35