[FFmpeg-devel] [PATCH v3 3/6] lavc/qsvdec: Replace current parser with MFXVideoDECODE_DecodeHeader()

Rogozhkin, Dmitry V dmitry.v.rogozhkin at intel.com
Sat Mar 9 02:48:16 EET 2019


On Fri, 2019-03-08 at 15:40 +0800, Zhong Li wrote:
> Using MSDK parser can improve qsv decoder pass rate in some cases
> (E.g:
> sps declares a wrong level_idc, smaller than it should be).
> And it is necessary for adding new qsv decoders such as MJPEG and VP9
> since current parser can't provide enough information.
> Actually using MFXVideoDECODE_DecodeHeader() was disscussed at
> https://ffmpeg.org/pipermail/ffmpeg-devel/2015-July/175734.html and
> merged as commit 1acb19d,
> but was overwritten when merged libav patches (commit: 1f26a23)
> without any explain.
> 
> v2: split decode header from decode_init, and call it for everyframe
> to
> detect format/resoultion change. It can fix some regression issues
> such
> as hevc 10bits decoding.
> 
> Signed-off-by: Zhong Li <zhong.li at intel.com>
> ---
>  libavcodec/qsvdec.c       | 172 ++++++++++++++++++++--------------
> ----
>  libavcodec/qsvdec.h       |   2 +
>  libavcodec/qsvdec_h2645.c |   1 +
>  libavcodec/qsvdec_other.c |   1 +
>  4 files changed, 93 insertions(+), 83 deletions(-)
> 
> diff --git a/libavcodec/qsvdec.c b/libavcodec/qsvdec.c
> index 4a0be811fb..b78026e14d 100644
> --- a/libavcodec/qsvdec.c
> +++ b/libavcodec/qsvdec.c
> @@ -120,19 +120,17 @@ static inline unsigned int qsv_fifo_size(const
> AVFifoBuffer* fifo)
>      return av_fifo_size(fifo) / qsv_fifo_item_size();
>  }
>  
> -static int qsv_decode_init(AVCodecContext *avctx, QSVContext *q)
> +static int qsv_decode_preinit(AVCodecContext *avctx, QSVContext *q,
> enum AVPixelFormat *pix_fmts, mfxVideoParam *param)
>  {
> -    const AVPixFmtDescriptor *desc;
>      mfxSession session = NULL;
>      int iopattern = 0;
> -    mfxVideoParam param = { 0 };
> -    int frame_width  = avctx->coded_width;
> -    int frame_height = avctx->coded_height;
>      int ret;
>  
> -    desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
> -    if (!desc)
> -        return AVERROR_BUG;
> +    ret = ff_get_format(avctx, pix_fmts);
> +    if (ret < 0) {
> +        q->orig_pix_fmt = avctx->pix_fmt = AV_PIX_FMT_NONE;
> +        return ret;
> +    }
>  
>      if (!q->async_fifo) {
>          q->async_fifo = av_fifo_alloc(q->async_depth *
> qsv_fifo_item_size());
> @@ -170,48 +168,72 @@ static int qsv_decode_init(AVCodecContext
> *avctx, QSVContext *q)
>          return ret;
>      }
>  
> -    ret = ff_qsv_codec_id_to_mfx(avctx->codec_id);
> -    if (ret < 0)
> -        return ret;
> +    param->IOPattern   = q->iopattern;
> +    param->AsyncDepth  = q->async_depth;
> +    param->ExtParam    = q->ext_buffers;
> +    param->NumExtParam = q->nb_ext_buffers;
>  
> -    param.mfx.CodecId      = ret;
> -    param.mfx.CodecProfile = ff_qsv_profile_to_mfx(avctx->codec_id,
> avctx->profile);
> -    param.mfx.CodecLevel   = avctx->level == FF_LEVEL_UNKNOWN ?
> MFX_LEVEL_UNKNOWN : avctx->level;
> -
> -    param.mfx.FrameInfo.BitDepthLuma   = desc->comp[0].depth;
> -    param.mfx.FrameInfo.BitDepthChroma = desc->comp[0].depth;
> -    param.mfx.FrameInfo.Shift          = desc->comp[0].depth > 8;
> -    param.mfx.FrameInfo.FourCC         = q->fourcc;
> -    param.mfx.FrameInfo.Width          = frame_width;
> -    param.mfx.FrameInfo.Height         = frame_height;
> -    param.mfx.FrameInfo.ChromaFormat   = MFX_CHROMAFORMAT_YUV420;
> -
> -    switch (avctx->field_order) {
> -    case AV_FIELD_PROGRESSIVE:
> -        param.mfx.FrameInfo.PicStruct = MFX_PICSTRUCT_PROGRESSIVE;
> -        break;
> -    case AV_FIELD_TT:
> -        param.mfx.FrameInfo.PicStruct = MFX_PICSTRUCT_FIELD_TFF;
> -        break;
> -    case AV_FIELD_BB:
> -        param.mfx.FrameInfo.PicStruct = MFX_PICSTRUCT_FIELD_BFF;
> -        break;
> -    default:
> -        param.mfx.FrameInfo.PicStruct = MFX_PICSTRUCT_UNKNOWN;
> -        break;
> -    }
> +    return 0;
> + }
>  
> -    param.IOPattern   = q->iopattern;
> -    param.AsyncDepth  = q->async_depth;
> -    param.ExtParam    = q->ext_buffers;
> -    param.NumExtParam = q->nb_ext_buffers;
> +static int qsv_decode_init(AVCodecContext *avctx, QSVContext *q,
> mfxVideoParam *param)
> +{
> +    int ret;
>  
> -    ret = MFXVideoDECODE_Init(q->session, &param);
> +    avctx->width        = param->mfx.FrameInfo.CropW;
> +    avctx->height       = param->mfx.FrameInfo.CropH;
> +    avctx->coded_width  = param->mfx.FrameInfo.Width;
> +    avctx->coded_height = param->mfx.FrameInfo.Height;
> +    avctx->level        = param->mfx.CodecLevel;
> +    avctx->profile      = param->mfx.CodecProfile;
> +    avctx->field_order  = ff_qsv_map_picstruct(param-
> >mfx.FrameInfo.PicStruct);
> +    avctx->pix_fmt      = ff_qsv_map_fourcc(param-
> >mfx.FrameInfo.FourCC);
> +
> +    ret = MFXVideoDECODE_Init(q->session, param);
>      if (ret < 0)
>          return ff_qsv_print_error(avctx, ret,
>                                    "Error initializing the MFX video
> decoder");
>  
> -    q->frame_info = param.mfx.FrameInfo;
> +    q->frame_info = param->mfx.FrameInfo;
> +
> +    return 0;
> +}
> +
> +static int qsv_decode_header(AVCodecContext *avctx, QSVContext *q,
> AVPacket *avpkt, enum AVPixelFormat *pix_fmts, mfxVideoParam *param)
> +{
> +    int ret;
> +
> +    mfxBitstream bs = { { { 0 } } };
> +
> +    if (avpkt->size) {
> +        bs.Data       = avpkt->data;
> +        bs.DataLength = avpkt->size;
> +        bs.MaxLength  = bs.DataLength;
> +        bs.TimeStamp  = avpkt->pts;
> +        if (avctx->field_order == AV_FIELD_PROGRESSIVE)
> +            bs.DataFlag   |= MFX_BITSTREAM_COMPLETE_FRAME;
> +    } else
> +        return AVERROR_INVALIDDATA;
> +
> +
> +    if(!q->session) {
> +        ret = qsv_decode_preinit(avctx, q, pix_fmts, param);
> +        if (ret < 0)
> +            return ret;
> +    }
> +
> +    ret = ff_qsv_codec_id_to_mfx(avctx->codec_id);
> +    if (ret < 0)
> +        return ret;
> +
> +    param->mfx.CodecId = ret;
> +    ret = MFXVideoDECODE_DecodeHeader(q->session, &bs, param);
> +    if (MFX_ERR_MORE_DATA == ret) {
> +       return AVERROR(EAGAIN);
> +    }
> +    if (ret < 0)
> +        return ff_qsv_print_error(avctx, ret,
> +                "Error decoding stream header");
>  
>      return 0;
>  }
> @@ -494,7 +516,10 @@ int ff_qsv_process_data(AVCodecContext *avctx,
> QSVContext *q,
>      uint8_t *dummy_data;
>      int dummy_size;
>      int ret;
> -    const AVPixFmtDescriptor *desc;
> +    mfxVideoParam param = { 0 };
> +    enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_QSV,
> +                                       AV_PIX_FMT_NV12,
> +                                       AV_PIX_FMT_NONE };
>  
>      if (!q->avctx_internal) {
>          q->avctx_internal = avcodec_alloc_context3(NULL);
> @@ -508,7 +533,6 @@ int ff_qsv_process_data(AVCodecContext *avctx,
> QSVContext *q,
>              return AVERROR(ENOMEM);
>  
>          q->parser->flags |= PARSER_FLAG_COMPLETE_FRAMES;
> -        q->orig_pix_fmt   = AV_PIX_FMT_NONE;
>      }
>  
>      if (!pkt->size)
> @@ -521,15 +545,17 @@ int ff_qsv_process_data(AVCodecContext *avctx,
> QSVContext *q,
>                       pkt->data, pkt->size, pkt->pts, pkt->dts,
>                       pkt->pos);
>  
> -    avctx->field_order  = q->parser->field_order;
>      /* TODO: flush delayed frames on reinit */
> -    if (q->parser->format       != q->orig_pix_fmt    ||
> -        FFALIGN(q->parser->coded_width, 16)  != FFALIGN(avctx-
> >coded_width, 16) ||
> -        FFALIGN(q->parser->coded_height, 16) != FFALIGN(avctx-
> >coded_height, 16)) {
> -        enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_QSV,
> -                                           AV_PIX_FMT_NONE,
> -                                           AV_PIX_FMT_NONE };
> -        enum AVPixelFormat qsv_format;
> +
> +    // sw_pix_fmt was initialized as NV12, will be updated after
> header decoded if not true.
> +    if (q->orig_pix_fmt != AV_PIX_FMT_NONE)
> +        pix_fmts[1] = q->orig_pix_fmt;
Well, it still seems non self-explanatory to me why we have pix_fmts[1]
here.

> +
> +    ret = qsv_decode_header(avctx, q, pkt, pix_fmts, &param);
Parsing can be expensive to perform on each frame. See more below.

> +
> +    if (ret >= 0 && (q->orig_pix_fmt !=
> ff_qsv_map_fourcc(param.mfx.FrameInfo.FourCC) ||
> +        avctx->coded_width  != param.mfx.FrameInfo.Width ||
> +        avctx->coded_height != param.mfx.FrameInfo.Height)) {
>          AVPacket zero_pkt = {0};
You are actually trying to perform the task which is being done by
mediasdk. Moreover, you don't detect all the cases when mediasdk
decoder will require to be reset. For example, you may have 2 streams
concatenated which have perfectly the same resolution, but still
decoder reset will be required because of profile/level/gop structure
changes and memory reallocation or better to say additional allocation
could also be required because mediasdk may require different (bigger
or lesser) number of surfaces to properly handle new bitstream. See
more below.

>  
>          if (q->buffered_count) {
> @@ -538,45 +564,24 @@ int ff_qsv_process_data(AVCodecContext *avctx,
> QSVContext *q,
>              q->buffered_count--;
>              return qsv_decode(avctx, q, frame, got_frame,
> &zero_pkt);
>          }
> -
>          q->reinit_flag = 0;
>  
> -        qsv_format = ff_qsv_map_pixfmt(q->parser->format, &q-
> >fourcc);
> -        if (qsv_format < 0) {
> -            av_log(avctx, AV_LOG_ERROR,
> -                   "Decoding pixel format '%s' is not supported\n",
> -                   av_get_pix_fmt_name(q->parser->format));
> -            ret = AVERROR(ENOSYS);
> -            goto reinit_fail;
> -        }
> +        q->orig_pix_fmt = avctx->pix_fmt = pix_fmts[1] =
> ff_qsv_map_fourcc(param.mfx.FrameInfo.FourCC);
>  
> -        q->orig_pix_fmt     = q->parser->format;
> -        avctx->pix_fmt      = pix_fmts[1] = qsv_format;
> -        avctx->width        = q->parser->width;
> -        avctx->height       = q->parser->height;
> -        avctx->coded_width  = FFALIGN(q->parser->coded_width, 16);
> -        avctx->coded_height = FFALIGN(q->parser->coded_height, 16);
> -        avctx->level        = q->avctx_internal->level;
> -        avctx->profile      = q->avctx_internal->profile;
> +        avctx->coded_width  = param.mfx.FrameInfo.Width;
> +        avctx->coded_height = param.mfx.FrameInfo.Height;
>  
> -        ret = ff_get_format(avctx, pix_fmts);
> +        ret = qsv_decode_preinit(avctx, q, pix_fmts, &param);
>          if (ret < 0)
>              goto reinit_fail;
> +        q->initialized = 0;
> +    }
>  
> -        avctx->pix_fmt = ret;
> -
> -        desc = av_pix_fmt_desc_get(avctx->pix_fmt);
> -        if (!desc)
> -            goto reinit_fail;
> -
> -         if (desc->comp[0].depth > 8) {
> -            avctx->coded_width =  FFALIGN(q->parser->coded_width,
> 32);
> -            avctx->coded_height = FFALIGN(q->parser->coded_height,
> 32);
> -        }
> -
> -        ret = qsv_decode_init(avctx, q);
> +    if (!q->initialized) {
> +        ret = qsv_decode_init(avctx, q, &param);
>          if (ret < 0)
>              goto reinit_fail;
> +        q->initialized = 1;
>      }
>  
>      return qsv_decode(avctx, q, frame, got_frame, pkt);

So, from my perspective you should not attempt to call decode header on
each incoming data portion and try to interpret results. Instead you
should respect what mediasdk DecodeFrameAsync is returning. Basically
it will return MFX_ERR_INCOMPATIBLE_VIDEO_PARAM is decoder reset is
required. This gives you 2 situations when you should call
DecodeHeader:
1. You either call it if decoder is not initalized at all
2. Or you call it upon receiving MFX_ERR_INCOMPATIBLE_VIDEO_PARAM

Once you received MFX_ERR_INCOMPATIBLE_VIDEO_PARAM step to perform are:
1. Flush cached frames from medisdk decoder passing nullptr bitstream
to DecodeFrameAsync
2. Run decode header and get updated parameters
3. Check (via QueryIOSurf) and potentially reallocate surfaces to
satisfy new bitstream requirement
4. Reset decoder with new parameters

Application may attempt to optimize procedure on step 3 above and avoid
surfaces reallocation if possible. For example, if you have allocated
1920x1080 surfaces and resolution changed to 720x480, you may actually
stay on 1920x1080 if you are ok to use more memory hoping to receive
1920x1080 stream in the future again. Eventually this would work if you
have enough number of 1920x1080 surfaces.

> @@ -589,4 +594,5 @@ reinit_fail:
>  void ff_qsv_decode_flush(AVCodecContext *avctx, QSVContext *q)
>  {
>      q->orig_pix_fmt = AV_PIX_FMT_NONE;
> +    q->initialized = 0;
>  }
> diff --git a/libavcodec/qsvdec.h b/libavcodec/qsvdec.h
> index 111536caba..4812fb2a6b 100644
> --- a/libavcodec/qsvdec.h
> +++ b/libavcodec/qsvdec.h
> @@ -63,6 +63,8 @@ typedef struct QSVContext {
>      uint32_t fourcc;
>      mfxFrameInfo frame_info;
>  
> +    int initialized;
> +
>      // options set by the caller
>      int async_depth;
>      int iopattern;
> diff --git a/libavcodec/qsvdec_h2645.c b/libavcodec/qsvdec_h2645.c
> index 9b49f5506e..eb1dc336a4 100644
> --- a/libavcodec/qsvdec_h2645.c
> +++ b/libavcodec/qsvdec_h2645.c
> @@ -103,6 +103,7 @@ static av_cold int qsv_decode_init(AVCodecContext
> *avctx)
>          }
>      }
>  
> +    s->qsv.orig_pix_fmt = AV_PIX_FMT_NV12;
>      s->packet_fifo = av_fifo_alloc(sizeof(AVPacket));
>      if (!s->packet_fifo) {
>          ret = AVERROR(ENOMEM);
> diff --git a/libavcodec/qsvdec_other.c b/libavcodec/qsvdec_other.c
> index 03251d2c85..a6f1b88ca0 100644
> --- a/libavcodec/qsvdec_other.c
> +++ b/libavcodec/qsvdec_other.c
> @@ -90,6 +90,7 @@ static av_cold int qsv_decode_init(AVCodecContext
> *avctx)
>      }
>  #endif
>  
> +    s->qsv.orig_pix_fmt = AV_PIX_FMT_NV12;
>      s->packet_fifo = av_fifo_alloc(sizeof(AVPacket));
>      if (!s->packet_fifo) {
>          ret = AVERROR(ENOMEM);


More information about the ffmpeg-devel mailing list