[FFmpeg-devel] [PATCH 15/21] avformat/matroskadec: Redo level handling

Steve Lhomme robux4 at ycbcr.xyz
Sun Apr 7 10:55:00 EEST 2019


On 3/27/2019 12:18 PM, Andreas Rheinhardt via ffmpeg-devel wrote:
> This commit changes how levels are handled: If the level used for
> ebml_parse ends directly after an element that has been consumed, then
> ebml_parse ends the level itself (and any finite-sized levels that end
> there as well) and informs the caller via the return value; if the
> current level is unknown-sized, then the level is ended as soon as
> an element that is not valid on the current level is encountered.
>
> This is designed for situations where one wants to parse master elements
> incrementally, i.e. not in one go via ebml_parse_nest.
>
> Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt at googlemail.com>
> ---
>   libavformat/matroskadec.c | 104 +++++++++++++++++++++++++++++++-------
>   1 file changed, 85 insertions(+), 19 deletions(-)
>
> diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c
> index 42f1c21042..32cf57685f 100644
> --- a/libavformat/matroskadec.c
> +++ b/libavformat/matroskadec.c
> @@ -69,6 +69,8 @@
>   #include "qtpalette.h"
>   
>   #define EBML_UNKNOWN_LENGTH  UINT64_MAX /* EBML unknown length, in uint64_t */
> +#define LEVEL_ENDED                   2 /* return value of ebml_parse when the
> +                                         * syntax level used for parsing ended. */
>   
>   typedef enum {
>       EBML_NONE,
> @@ -1041,16 +1043,32 @@ static int ebml_parse_id(MatroskaDemuxContext *matroska, EbmlSyntax *syntax,
>                            uint32_t id, void *data)
>   {
>       int i;
> +
>       for (i = 0; syntax[i].id; i++)
>           if (id == syntax[i].id)
>               break;
> -    if (!syntax[i].id && id == MATROSKA_ID_CLUSTER &&
> -        matroska->num_levels > 0                   &&
> -        matroska->levels[matroska->num_levels - 1].length == EBML_UNKNOWN_LENGTH)
> -        return 0;  // we reached the end of an unknown size cluster
> +
>       if (!syntax[i].id && id != EBML_ID_VOID && id != EBML_ID_CRC32) {
> -        av_log(matroska->ctx, AV_LOG_DEBUG, "Unknown entry 0x%"PRIX32"\n", id);
> +        if (!matroska->num_levels || matroska->levels[matroska->num_levels - 1].length
> +                                                                != EBML_UNKNOWN_LENGTH) {
> +            av_log(matroska->ctx, AV_LOG_DEBUG, "Unknown entry 0x%"PRIX32"\n", id);

Would this mean a Segment with unknown length would produce this log ? 
Or Segments are not part of the levels at all ? If so, same question 
with level 1 elements which have an unknown length.

> +        } else if (matroska->num_levels > 1) {
> +            // Unknown-sized master elements (except root elements) end
> +            // when an id not known to be allowed in them is encountered.
> +            matroska->num_levels--;
> +            return LEVEL_ENDED;
> +        } else if (matroska->num_levels == 1) {
> +            AVIOContext *pb = matroska->ctx->pb;
> +            int64_t     pos = avio_tell(pb);
> +            // An unkown level 1 element inside an unknown-sized segment
> +            // is considered an error.
> +            av_log(matroska->ctx, AV_LOG_ERROR, "Found unknown element id 0x%"PRIX32
> +                   " at pos. %"PRIu64" (0x%"PRIx64") in an unknown-sized segment. "
> +                   "Will be treated as error.\n", id, pos, pos);
> +            return AVERROR_INVALIDDATA;
> +        }
>       }
> +
>       return ebml_parse_elem(matroska, &syntax[i], data);
>   }
>   
> @@ -1061,9 +1079,24 @@ static int ebml_parse(MatroskaDemuxContext *matroska, EbmlSyntax *syntax,
>           uint64_t id;
>           int res = ebml_read_num(matroska, matroska->ctx->pb, 4, &id, 0);
>           if (res < 0) {
> -            // in live mode, finish parsing if EOF is reached.
> -            return (matroska->is_live && matroska->ctx->pb->eof_reached &&
> -                    res == AVERROR_EOF) ? 1 : res;
> +            if (matroska->ctx->pb->eof_reached && res == AVERROR_EOF) {
> +                if (matroska->is_live)
> +                    // in live mode, finish parsing if EOF is reached.
> +                    return 1;
> +                if (matroska->num_levels) {
> +                    MatroskaLevel *level = &matroska->levels[matroska->num_levels - 1];
> +
> +                    if (level->length == EBML_UNKNOWN_LENGTH) {
> +                        // Unknown-sized elements automatically end at EOF.
> +                        matroska->num_levels = 0;
> +                        return LEVEL_ENDED;
> +                    } else if (avio_tell(matroska->ctx->pb) < level->start + level->length) {
> +                        av_log(matroska->ctx, AV_LOG_ERROR, "File ended before "
> +                               "its declared end\n");
> +                    }
> +                }
> +            }
> +            return res;
>           }
>           matroska->current_id = id | 1 << 7 * res;
>       }
> @@ -1098,10 +1131,15 @@ static int ebml_parse_nest(MatroskaDemuxContext *matroska, EbmlSyntax *syntax,
>               break;
>           }
>   
> -    while (!res && !ebml_level_end(matroska))
> +    if (!matroska->levels[matroska->num_levels - 1].length) {
> +        matroska->num_levels--;
> +        return 0;
> +    }
> +
> +    while (!res)
>           res = ebml_parse(matroska, syntax, data);
>   
> -    return res;
> +    return res == LEVEL_ENDED ? 0 : res;
>   }
>   
>   static int is_ebml_id_valid(uint32_t id)
> @@ -1169,7 +1207,7 @@ static int ebml_parse_elem(MatroskaDemuxContext *matroska,
>       AVIOContext *pb = matroska->ctx->pb;
>       uint32_t id = syntax->id;
>       uint64_t length;
> -    int res;
> +    int res, level_check;
>       void *newelem;
>       MatroskaLevel1Element *level1_elem;
>   
> @@ -1195,7 +1233,8 @@ static int ebml_parse_elem(MatroskaDemuxContext *matroska,
>                      length, max_lengths[syntax->type], syntax->type);
>               return AVERROR_INVALIDDATA;
>           }
> -        if (matroska->num_levels > 0) {
> +
> +        if (matroska->num_levels) {
>               MatroskaLevel *level = &matroska->levels[matroska->num_levels - 1];
>               int64_t pos = avio_tell(pb);
>   
> @@ -1204,18 +1243,24 @@ static int ebml_parse_elem(MatroskaDemuxContext *matroska,
>                   uint64_t elem_end = pos + length,
>                           level_end = level->start + level->length;
>   
> -                if (level_end < elem_end) {
> +                if (elem_end < level_end) {
> +                    level_check = 0;
> +                } else if (elem_end == level_end) {
> +                    level_check = LEVEL_ENDED;
> +                } else {
>                       av_log(matroska->ctx, AV_LOG_ERROR,
>                              "Element at 0x%"PRIx64" ending at 0x%"PRIx64" exceeds "
>                              "containing master element ending at 0x%"PRIx64"\n",
>                              pos, elem_end, level_end);
>                       return AVERROR_INVALIDDATA;
>                   }
> +            } else if (length != EBML_UNKNOWN_LENGTH) {
> +                level_check = 0;
>               } else if (level->length != EBML_UNKNOWN_LENGTH) {
>                   av_log(matroska->ctx, AV_LOG_ERROR, "Unknown-sized element "
>                          "at 0x%"PRIx64" inside parent with finite size\n", pos);
>                   return AVERROR_INVALIDDATA;
> -            } else if (length == EBML_UNKNOWN_LENGTH && id != MATROSKA_ID_CLUSTER) {
> +            } else if (id != MATROSKA_ID_CLUSTER) {
>                   // According to the specifications only clusters and segments
>                   // are allowed to be unknown-sized.
>                   av_log(matroska->ctx, AV_LOG_ERROR,
> @@ -1223,7 +1268,8 @@ static int ebml_parse_elem(MatroskaDemuxContext *matroska,
>                          "0x%"PRIx64". Dropping the invalid element.\n", pos);
>                   return AVERROR_INVALIDDATA;
>               }
> -        }
> +        } else
> +            level_check = 0;
>       }
>   
>       switch (syntax->type) {
> @@ -1257,19 +1303,36 @@ static int ebml_parse_elem(MatroskaDemuxContext *matroska,
>                   av_log(matroska->ctx, AV_LOG_ERROR, "Duplicate element\n");
>               level1_elem->parsed = 1;
>           }
> -        return ebml_parse_nest(matroska, syntax->def.n, data);
> +        res = ebml_parse_nest(matroska, syntax->def.n, data);
> +        break;
>       case EBML_STOP:
>           return 1;
>       default:
>           if (ffio_limit(pb, length) != length)
>               return AVERROR(EIO);
> -        return avio_skip(pb, length) < 0 ? AVERROR(EIO) : 0;
> +        res = avio_skip(pb, length) < 0 ? AVERROR(EIO) : 0;
>       }
>       if (res == AVERROR_INVALIDDATA)
>           av_log(matroska->ctx, AV_LOG_ERROR, "Invalid element\n");
>       else if (res == AVERROR(EIO))
>           av_log(matroska->ctx, AV_LOG_ERROR, "Read error\n");
> -    return res;
> +
> +    if (res < 0 || res == 1)
> +        return res;
> +
> +    if (level_check == LEVEL_ENDED && matroska->num_levels) {
> +        MatroskaLevel *level = &matroska->levels[matroska->num_levels - 1];
> +        int64_t pos = avio_tell(pb);
> +
> +        // Given that pos >= level->start no check for
> +        // level->length != EBML_UNKNOWN_LENGTH is necessary.
> +        while (matroska->num_levels && pos == level->start + level->length) {
> +            matroska->num_levels--;
> +            level--;
> +        }

It seems you're assuming that unknown length can only return to level 0 
(Segment). But an unknown length Cluster could be followed by another 
unknown length Cluster.

> +    }
> +
> +    return level_check;
>   }
>   
>   static void ebml_free(EbmlSyntax *syntax, void *data)
> @@ -3491,7 +3554,7 @@ static int matroska_parse_cluster(MatroskaDemuxContext *matroska)
>                                cluster);
>       }
>   
> -    if (!res && block->bin.size > 0) {
> +    if ((!res || res == LEVEL_ENDED) && block->bin.size > 0) {
>               int is_keyframe = block->non_simple ? block->reference == INT64_MIN : -1;
>               uint8_t* additional = block->additional.size > 0 ?
>                                       block->additional.data : NULL;
> @@ -3506,6 +3569,9 @@ static int matroska_parse_cluster(MatroskaDemuxContext *matroska)
>                                          block->discard_padding);
>       }
>   
> +    if (res == LEVEL_ENDED)
> +        cluster->pos = 0;

This seems odd.

> +
>       ebml_free(matroska_blockgroup, block);
>       memset(block, 0, sizeof(*block));
>   
> -- 
> 2.19.2
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request at ffmpeg.org with subject "unsubscribe".



More information about the ffmpeg-devel mailing list