24 #include "config_components.h"
49 #define VP9_SYNCCODE 0x498342
102 f->segmentation_map =
NULL;
114 sz = 64 *
s->sb_cols *
s->sb_rows;
115 if (sz !=
s->frame_extradata_pool_size) {
119 if (!
s->frame_extradata_pool) {
120 s->frame_extradata_pool_size = 0;
124 s->frame_extradata_pool_size = sz;
132 f->segmentation_map =
f->extradata;
152 dst->segmentation_map =
src->segmentation_map;
154 dst->uses_2pass =
src->uses_2pass;
157 src->hwaccel_picture_private);
162 #define HWACCEL_MAX (CONFIG_VP9_DXVA2_HWACCEL + \
163 CONFIG_VP9_D3D11VA_HWACCEL * 2 + \
164 CONFIG_VP9_D3D12VA_HWACCEL + \
165 CONFIG_VP9_NVDEC_HWACCEL + \
166 CONFIG_VP9_VAAPI_HWACCEL + \
167 CONFIG_VP9_VDPAU_HWACCEL + \
168 CONFIG_VP9_VIDEOTOOLBOX_HWACCEL)
172 int bytesperpixel =
s->bytesperpixel,
ret, cols, rows;
177 if (!(
s->pix_fmt ==
s->gf_fmt &&
w ==
s->w &&
h ==
s->h)) {
181 switch (
s->pix_fmt) {
184 #if CONFIG_VP9_DXVA2_HWACCEL
187 #if CONFIG_VP9_D3D11VA_HWACCEL
191 #if CONFIG_VP9_D3D12VA_HWACCEL
194 #if CONFIG_VP9_NVDEC_HWACCEL
197 #if CONFIG_VP9_VAAPI_HWACCEL
200 #if CONFIG_VP9_VDPAU_HWACCEL
203 #if CONFIG_VP9_VIDEOTOOLBOX_HWACCEL
208 #if CONFIG_VP9_NVDEC_HWACCEL
211 #if CONFIG_VP9_VAAPI_HWACCEL
214 #if CONFIG_VP9_VDPAU_HWACCEL
221 #if CONFIG_VP9_VAAPI_HWACCEL
228 #if CONFIG_VP9_VAAPI_HWACCEL
234 *fmtp++ =
s->pix_fmt;
242 s->gf_fmt =
s->pix_fmt;
250 if (
s->intra_pred_data[0] && cols ==
s->cols && rows ==
s->rows &&
s->pix_fmt ==
s->last_fmt)
253 s->last_fmt =
s->pix_fmt;
254 s->sb_cols = (
w + 63) >> 6;
255 s->sb_rows = (
h + 63) >> 6;
256 s->cols = (
w + 7) >> 3;
257 s->rows = (
h + 7) >> 3;
260 #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
264 p =
av_malloc(
s->sb_cols * (128 + 192 * bytesperpixel +
265 lflvl_len *
sizeof(*
s->lflvl) + 16 *
sizeof(*
s->above_mv_ctx)));
268 assign(
s->intra_pred_data[0], uint8_t *, 64 * bytesperpixel);
269 assign(
s->intra_pred_data[1], uint8_t *, 64 * bytesperpixel);
270 assign(
s->intra_pred_data[2], uint8_t *, 64 * bytesperpixel);
271 assign(
s->above_y_nnz_ctx, uint8_t *, 16);
272 assign(
s->above_mode_ctx, uint8_t *, 16);
274 assign(
s->above_uv_nnz_ctx[0], uint8_t *, 16);
275 assign(
s->above_uv_nnz_ctx[1], uint8_t *, 16);
276 assign(
s->above_partition_ctx, uint8_t *, 8);
277 assign(
s->above_skip_ctx, uint8_t *, 8);
278 assign(
s->above_txfm_ctx, uint8_t *, 8);
279 assign(
s->above_segpred_ctx, uint8_t *, 8);
280 assign(
s->above_intra_ctx, uint8_t *, 8);
281 assign(
s->above_comp_ctx, uint8_t *, 8);
282 assign(
s->above_ref_ctx, uint8_t *, 8);
283 assign(
s->above_filter_ctx, uint8_t *, 8);
288 for (
i = 0;
i <
s->active_tile_cols;
i++)
292 if (
s->s.h.bpp !=
s->last_bpp) {
295 s->last_bpp =
s->s.h.bpp;
305 int chroma_blocks, chroma_eobs, bytesperpixel =
s->bytesperpixel;
312 chroma_blocks = 64 * 64 >> (
s->ss_h +
s->ss_v);
313 chroma_eobs = 16 * 16 >> (
s->ss_h +
s->ss_v);
315 int sbs =
s->sb_cols *
s->sb_rows;
319 16 * 16 + 2 * chroma_eobs) * sbs);
334 for (
i = 1;
i <
s->active_tile_cols;
i++)
337 for (
i = 0;
i <
s->active_tile_cols;
i++) {
339 s->td[
i].block_base =
av_mallocz((64 * 64 + 2 * chroma_blocks) * bytesperpixel *
sizeof(int16_t) +
340 16 * 16 + 2 * chroma_eobs);
341 if (!
s->td[
i].b_base || !
s->td[
i].block_base)
343 s->td[
i].uvblock_base[0] =
s->td[
i].block_base + 64 * 64 * bytesperpixel;
344 s->td[
i].uvblock_base[1] =
s->td[
i].uvblock_base[0] + chroma_blocks * bytesperpixel;
345 s->td[
i].eob_base = (uint8_t *) (
s->td[
i].uvblock_base[1] + chroma_blocks * bytesperpixel);
346 s->td[
i].uveob_base[0] =
s->td[
i].eob_base + 16 * 16;
347 s->td[
i].uveob_base[1] =
s->td[
i].uveob_base[0] + chroma_eobs;
351 if (!
s->td[
i].block_structure)
356 s->block_alloc_using_2pass =
s->s.frames[
CUR_FRAME].uses_2pass;
373 return m - ((v + 1) >> 1);
380 static const uint8_t inv_map_table[255] = {
381 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
382 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
383 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
384 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
385 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
386 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
387 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
388 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
389 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
390 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
391 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
392 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
393 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
394 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
395 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
396 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
397 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
398 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
446 s->s.h.bpp = 8 +
bits * 2;
447 s->bytesperpixel = (7 +
s->s.h.bpp) >> 3;
453 s->ss_h =
s->ss_v = 0;
467 static const enum AVPixelFormat pix_fmt_for_ss[3][2 ][2 ] = {
479 s->pix_fmt = pix_fmt_for_ss[
bits][
s->ss_v][
s->ss_h];
490 s->ss_h =
s->ss_v = 1;
491 s->pix_fmt = pix_fmt_for_ss[
bits][1][1];
502 int c,
i, j, k, l, m, n,
w,
h,
max, size2,
ret, sharp;
504 const uint8_t *data2;
528 s->last_keyframe =
s->s.h.keyframe;
531 last_invisible =
s->s.h.invisible;
534 s->s.h.use_last_frame_mvs = !
s->s.h.errorres && !last_invisible;
536 if (
s->s.h.keyframe) {
544 s->s.h.refreshrefmask = 0xff;
550 s->s.h.intraonly =
s->s.h.invisible ?
get_bits1(&
s->gb) : 0;
551 s->s.h.resetctx =
s->s.h.errorres ? 0 :
get_bits(&
s->gb, 2);
552 if (
s->s.h.intraonly) {
561 s->ss_h =
s->ss_v = 1;
564 s->bytesperpixel = 1;
577 s->s.h.signbias[0] =
get_bits1(&
s->gb) && !
s->s.h.errorres;
579 s->s.h.signbias[1] =
get_bits1(&
s->gb) && !
s->s.h.errorres;
581 s->s.h.signbias[2] =
get_bits1(&
s->gb) && !
s->s.h.errorres;
582 if (!
s->s.refs[
s->s.h.refidx[0]].f ||
583 !
s->s.refs[
s->s.h.refidx[1]].f ||
584 !
s->s.refs[
s->s.h.refidx[2]].f) {
589 w =
s->s.refs[
s->s.h.refidx[0]].f->width;
590 h =
s->s.refs[
s->s.h.refidx[0]].f->height;
592 w =
s->s.refs[
s->s.h.refidx[1]].f->width;
593 h =
s->s.refs[
s->s.h.refidx[1]].f->height;
595 w =
s->s.refs[
s->s.h.refidx[2]].f->width;
596 h =
s->s.refs[
s->s.h.refidx[2]].f->height;
604 s->s.h.use_last_frame_mvs &=
s->s.frames[
CUR_FRAME].tf.f &&
612 s->s.h.allowcompinter =
s->s.h.signbias[0] !=
s->s.h.signbias[1] ||
613 s->s.h.signbias[0] !=
s->s.h.signbias[2];
614 if (
s->s.h.allowcompinter) {
615 if (
s->s.h.signbias[0] ==
s->s.h.signbias[1]) {
616 s->s.h.fixcompref = 2;
617 s->s.h.varcompref[0] = 0;
618 s->s.h.varcompref[1] = 1;
619 }
else if (
s->s.h.signbias[0] ==
s->s.h.signbias[2]) {
620 s->s.h.fixcompref = 1;
621 s->s.h.varcompref[0] = 0;
622 s->s.h.varcompref[1] = 2;
624 s->s.h.fixcompref = 0;
625 s->s.h.varcompref[0] = 1;
626 s->s.h.varcompref[1] = 2;
631 s->s.h.refreshctx =
s->s.h.errorres ? 0 :
get_bits1(&
s->gb);
632 s->s.h.parallelmode =
s->s.h.errorres ? 1 :
get_bits1(&
s->gb);
634 if (
s->s.h.keyframe ||
s->s.h.intraonly)
635 s->s.h.framectxid = 0;
638 if (
s->s.h.keyframe ||
s->s.h.errorres ||
s->s.h.intraonly) {
640 s->s.h.lf_delta.ref[0] = 1;
641 s->s.h.lf_delta.ref[1] = 0;
642 s->s.h.lf_delta.ref[2] = -1;
643 s->s.h.lf_delta.ref[3] = -1;
644 s->s.h.lf_delta.mode[0] = 0;
645 s->s.h.lf_delta.mode[1] = 0;
646 memset(
s->s.h.segmentation.feat, 0,
sizeof(
s->s.h.segmentation.feat));
652 if (
s->s.h.filter.sharpness != sharp) {
653 for (
i = 1;
i <= 63;
i++) {
657 limit >>= (sharp + 3) >> 2;
662 s->filter_lut.lim_lut[
i] =
limit;
663 s->filter_lut.mblim_lut[
i] = 2 * (
i + 2) +
limit;
666 s->s.h.filter.sharpness = sharp;
667 if ((
s->s.h.lf_delta.enabled =
get_bits1(&
s->gb))) {
668 if ((
s->s.h.lf_delta.updated =
get_bits1(&
s->gb))) {
669 for (
i = 0;
i < 4;
i++)
672 for (
i = 0;
i < 2;
i++)
683 s->s.h.lossless =
s->s.h.yac_qi == 0 &&
s->s.h.ydc_qdelta == 0 &&
684 s->s.h.uvdc_qdelta == 0 &&
s->s.h.uvac_qdelta == 0;
685 #if FF_API_CODEC_PROPS
693 if ((
s->s.h.segmentation.enabled =
get_bits1(&
s->gb))) {
694 if ((
s->s.h.segmentation.update_map =
get_bits1(&
s->gb))) {
695 for (
i = 0;
i < 7;
i++)
698 if ((
s->s.h.segmentation.temporal =
get_bits1(&
s->gb)))
699 for (
i = 0;
i < 3;
i++)
705 s->s.h.segmentation.absolute_vals =
get_bits1(&
s->gb);
706 for (
i = 0;
i < 8;
i++) {
707 if ((
s->s.h.segmentation.feat[
i].q_enabled =
get_bits1(&
s->gb)))
709 if ((
s->s.h.segmentation.feat[
i].lf_enabled =
get_bits1(&
s->gb)))
711 if ((
s->s.h.segmentation.feat[
i].ref_enabled =
get_bits1(&
s->gb)))
712 s->s.h.segmentation.feat[
i].ref_val =
get_bits(&
s->gb, 2);
713 s->s.h.segmentation.feat[
i].skip_enabled =
get_bits1(&
s->gb);
720 s->s.h.segmentation.temporal = 0;
721 s->s.h.segmentation.update_map = 0;
725 for (
i = 0;
i < (
s->s.h.segmentation.enabled ? 8 : 1);
i++) {
726 int qyac, qydc, quvac, quvdc, lflvl, sh;
728 if (
s->s.h.segmentation.enabled &&
s->s.h.segmentation.feat[
i].q_enabled) {
729 if (
s->s.h.segmentation.absolute_vals)
734 qyac =
s->s.h.yac_qi;
746 sh =
s->s.h.filter.level >= 32;
747 if (
s->s.h.segmentation.enabled &&
s->s.h.segmentation.feat[
i].lf_enabled) {
748 if (
s->s.h.segmentation.absolute_vals)
751 lflvl =
av_clip_uintp2(
s->s.h.filter.level +
s->s.h.segmentation.feat[
i].lf_val, 6);
753 lflvl =
s->s.h.filter.level;
755 if (
s->s.h.lf_delta.enabled) {
756 s->s.h.segmentation.feat[
i].lflvl[0][0] =
757 s->s.h.segmentation.feat[
i].lflvl[0][1] =
759 for (j = 1; j < 4; j++) {
760 s->s.h.segmentation.feat[
i].lflvl[j][0] =
762 s->s.h.lf_delta.mode[0]) * (1 << sh)), 6);
763 s->s.h.segmentation.feat[
i].lflvl[j][1] =
765 s->s.h.lf_delta.mode[1]) * (1 << sh)), 6);
768 memset(
s->s.h.segmentation.feat[
i].lflvl, lflvl,
769 sizeof(
s->s.h.segmentation.feat[
i].lflvl));
779 for (
s->s.h.tiling.log2_tile_cols = 0;
780 s->sb_cols > (64 <<
s->s.h.tiling.log2_tile_cols);
781 s->s.h.tiling.log2_tile_cols++) ;
782 for (
max = 0; (
s->sb_cols >>
max) >= 4;
max++) ;
784 while (
max >
s->s.h.tiling.log2_tile_cols) {
786 s->s.h.tiling.log2_tile_cols++;
791 s->s.h.tiling.tile_rows = 1 <<
s->s.h.tiling.log2_tile_rows;
792 if (
s->s.h.tiling.tile_cols != (1 <<
s->s.h.tiling.log2_tile_cols)) {
797 for (
i = 0;
i <
s->active_tile_cols;
i++)
802 s->s.h.tiling.tile_cols = 1 <<
s->s.h.tiling.log2_tile_cols;
804 s->s.h.tiling.tile_cols : 1;
809 n_range_coders =
s->s.h.tiling.tile_cols;
816 for (
i = 0;
i <
s->active_tile_cols;
i++) {
819 rc += n_range_coders;
824 if (!
s->s.h.keyframe && !
s->s.h.intraonly) {
825 int valid_ref_frame = 0;
826 for (
i = 0;
i < 3;
i++) {
828 int refw =
ref->width, refh =
ref->height;
832 "Ref pixfmt (%s) did not match current frame (%s)",
836 }
else if (refw ==
w && refh ==
h) {
837 s->mvscale[
i][0] =
s->mvscale[
i][1] = 0;
841 if (
w * 2 < refw ||
h * 2 < refh ||
w > 16 * refw ||
h > 16 * refh) {
843 "Invalid ref frame dimensions %dx%d for frame size %dx%d\n",
848 s->mvscale[
i][0] = (refw << 14) /
w;
849 s->mvscale[
i][1] = (refh << 14) /
h;
850 s->mvstep[
i][0] = 16 *
s->mvscale[
i][0] >> 14;
851 s->mvstep[
i][1] = 16 *
s->mvscale[
i][1] >> 14;
855 if (!valid_ref_frame) {
856 av_log(avctx,
AV_LOG_ERROR,
"No valid reference frame is found, bitstream not supported\n");
861 if (
s->s.h.keyframe ||
s->s.h.errorres || (
s->s.h.intraonly &&
s->s.h.resetctx == 3)) {
862 s->prob_ctx[0].p =
s->prob_ctx[1].p =
s->prob_ctx[2].p =
872 }
else if (
s->s.h.intraonly &&
s->s.h.resetctx == 2) {
879 s->s.h.compressed_header_size = size2 =
get_bits(&
s->gb, 16);
883 if (size2 >
size - (data2 -
data)) {
896 for (
i = 0;
i <
s->active_tile_cols;
i++) {
897 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
898 memset(
s->td[
i].counts.coef, 0,
sizeof(
s->td[0].counts.coef));
899 memset(
s->td[
i].counts.eob, 0,
sizeof(
s->td[0].counts.eob));
901 memset(&
s->td[
i].counts, 0,
sizeof(
s->td[0].counts));
903 s->td[
i].nb_block_structure = 0;
909 s->prob.p =
s->prob_ctx[
c].p;
912 if (
s->s.h.lossless) {
916 if (
s->s.h.txfmmode == 3)
920 for (
i = 0;
i < 2;
i++)
923 for (
i = 0;
i < 2;
i++)
924 for (j = 0; j < 2; j++)
926 s->prob.p.tx16p[
i][j] =
928 for (
i = 0;
i < 2;
i++)
929 for (j = 0; j < 3; j++)
931 s->prob.p.tx32p[
i][j] =
937 for (
i = 0;
i < 4;
i++) {
938 uint8_t (*
ref)[2][6][6][3] =
s->prob_ctx[
c].coef[
i];
940 for (j = 0; j < 2; j++)
941 for (k = 0; k < 2; k++)
942 for (l = 0; l < 6; l++)
943 for (m = 0; m < 6; m++) {
944 uint8_t *p =
s->prob.coef[
i][j][k][l][m];
945 uint8_t *
r =
ref[j][k][l][m];
946 if (m >= 3 && l == 0)
948 for (n = 0; n < 3; n++) {
957 for (j = 0; j < 2; j++)
958 for (k = 0; k < 2; k++)
959 for (l = 0; l < 6; l++)
960 for (m = 0; m < 6; m++) {
961 uint8_t *p =
s->prob.coef[
i][j][k][l][m];
962 uint8_t *
r =
ref[j][k][l][m];
969 if (
s->s.h.txfmmode ==
i)
974 for (
i = 0;
i < 3;
i++)
977 if (!
s->s.h.keyframe && !
s->s.h.intraonly) {
978 for (
i = 0;
i < 7;
i++)
979 for (j = 0; j < 3; j++)
981 s->prob.p.mv_mode[
i][j] =
985 for (
i = 0;
i < 4;
i++)
986 for (j = 0; j < 2; j++)
988 s->prob.p.filter[
i][j] =
991 for (
i = 0;
i < 4;
i++)
995 if (
s->s.h.allowcompinter) {
997 if (
s->s.h.comppredmode)
1000 for (
i = 0;
i < 5;
i++)
1009 for (
i = 0;
i < 5;
i++) {
1011 s->prob.p.single_ref[
i][0] =
1014 s->prob.p.single_ref[
i][1] =
1020 for (
i = 0;
i < 5;
i++)
1022 s->prob.p.comp_ref[
i] =
1026 for (
i = 0;
i < 4;
i++)
1027 for (j = 0; j < 9; j++)
1029 s->prob.p.y_mode[
i][j] =
1032 for (
i = 0;
i < 4;
i++)
1033 for (j = 0; j < 4; j++)
1034 for (k = 0; k < 3; k++)
1036 s->prob.p.partition[3 -
i][j][k] =
1038 s->prob.p.partition[3 -
i][j][k]);
1041 for (
i = 0;
i < 3;
i++)
1045 for (
i = 0;
i < 2;
i++) {
1047 s->prob.p.mv_comp[
i].sign =
1050 for (j = 0; j < 10; j++)
1052 s->prob.p.mv_comp[
i].classes[j] =
1056 s->prob.p.mv_comp[
i].class0 =
1059 for (j = 0; j < 10; j++)
1061 s->prob.p.mv_comp[
i].bits[j] =
1065 for (
i = 0;
i < 2;
i++) {
1066 for (j = 0; j < 2; j++)
1067 for (k = 0; k < 3; k++)
1069 s->prob.p.mv_comp[
i].class0_fp[j][k] =
1072 for (j = 0; j < 3; j++)
1074 s->prob.p.mv_comp[
i].fp[j] =
1078 if (
s->s.h.highprecisionmvs) {
1079 for (
i = 0;
i < 2;
i++) {
1081 s->prob.p.mv_comp[
i].class0_hp =
1085 s->prob.p.mv_comp[
i].hp =
1091 return (data2 -
data) + size2;
1095 ptrdiff_t yoff, ptrdiff_t uvoff,
enum BlockLevel bl)
1098 int c = ((
s->above_partition_ctx[col] >> (3 - bl)) & 1) |
1101 s->prob.p.partition[bl][
c];
1103 ptrdiff_t hbs = 4 >> bl;
1105 ptrdiff_t y_stride =
f->linesize[0], uv_stride =
f->linesize[1];
1106 int bytesperpixel =
s->bytesperpixel;
1111 }
else if (col + hbs < s->cols) {
1112 if (row + hbs < s->rows) {
1120 yoff += hbs * 8 * y_stride;
1121 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1126 yoff += hbs * 8 * bytesperpixel;
1127 uvoff += hbs * 8 * bytesperpixel >>
s->ss_h;
1131 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1133 yoff + 8 * hbs * bytesperpixel,
1134 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1135 yoff += hbs * 8 * y_stride;
1136 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1137 decode_sb(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1138 decode_sb(td, row + hbs, col + hbs, lflvl,
1139 yoff + 8 * hbs * bytesperpixel,
1140 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1147 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1149 yoff + 8 * hbs * bytesperpixel,
1150 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1155 }
else if (row + hbs < s->rows) {
1158 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1159 yoff += hbs * 8 * y_stride;
1160 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1161 decode_sb(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1168 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1174 ptrdiff_t yoff, ptrdiff_t uvoff,
enum BlockLevel bl)
1178 ptrdiff_t hbs = 4 >> bl;
1180 ptrdiff_t y_stride =
f->linesize[0], uv_stride =
f->linesize[1];
1181 int bytesperpixel =
s->bytesperpixel;
1186 }
else if (td->
b->
bl == bl) {
1189 yoff += hbs * 8 * y_stride;
1190 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1192 }
else if (
b->bp ==
PARTITION_V && col + hbs < s->cols) {
1193 yoff += hbs * 8 * bytesperpixel;
1194 uvoff += hbs * 8 * bytesperpixel >>
s->ss_h;
1199 if (col + hbs < s->cols) {
1200 if (row + hbs < s->rows) {
1201 decode_sb_mem(td, row, col + hbs, lflvl, yoff + 8 * hbs * bytesperpixel,
1202 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1203 yoff += hbs * 8 * y_stride;
1204 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1205 decode_sb_mem(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1207 yoff + 8 * hbs * bytesperpixel,
1208 uvoff + (8 * hbs * bytesperpixel >>
s->ss_h), bl + 1);
1210 yoff += hbs * 8 * bytesperpixel;
1211 uvoff += hbs * 8 * bytesperpixel >>
s->ss_h;
1212 decode_sb_mem(td, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
1214 }
else if (row + hbs < s->rows) {
1215 yoff += hbs * 8 * y_stride;
1216 uvoff += hbs * 8 * uv_stride >>
s->ss_v;
1217 decode_sb_mem(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1224 int sb_start = ( idx * n) >> log2_n;
1225 int sb_end = ((idx + 1) * n) >> log2_n;
1226 *start =
FFMIN(sb_start, n) << 3;
1227 *end =
FFMIN(sb_end, n) << 3;
1235 for (
i = 0;
i <
s->active_tile_cols;
i++)
1244 for (
int i = 0;
i < 3;
i++)
1247 for (
i = 0;
i < 8;
i++) {
1266 int row, col, tile_row, tile_col,
ret;
1268 int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1270 ptrdiff_t yoff, uvoff, ls_y, ls_uv;
1273 ls_y =
f->linesize[0];
1274 ls_uv =
f->linesize[1];
1275 bytesperpixel =
s->bytesperpixel;
1278 for (tile_row = 0; tile_row <
s->s.h.tiling.tile_rows; tile_row++) {
1280 tile_row,
s->s.h.tiling.log2_tile_rows,
s->sb_rows);
1282 for (tile_col = 0; tile_col <
s->s.h.tiling.tile_cols; tile_col++) {
1285 if (tile_col ==
s->s.h.tiling.tile_cols - 1 &&
1286 tile_row ==
s->s.h.tiling.tile_rows - 1) {
1293 if (tile_size >
size)
1304 for (row = tile_row_start; row < tile_row_end;
1305 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >>
s->ss_v) {
1307 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1309 for (tile_col = 0; tile_col <
s->s.h.tiling.tile_cols; tile_col++) {
1311 tile_col,
s->s.h.tiling.log2_tile_cols,
s->sb_cols);
1316 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
1325 td->
c = &td->
c_b[tile_col];
1328 for (col = tile_col_start;
1330 col += 8, yoff2 += 64 * bytesperpixel,
1331 uvoff2 += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1335 memset(lflvl_ptr->
mask, 0,
sizeof(lflvl_ptr->
mask));
1356 if (row + 8 <
s->rows) {
1357 memcpy(
s->intra_pred_data[0],
1358 f->data[0] + yoff + 63 * ls_y,
1359 8 *
s->cols * bytesperpixel);
1360 memcpy(
s->intra_pred_data[1],
1361 f->data[1] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1362 8 *
s->cols * bytesperpixel >>
s->ss_h);
1363 memcpy(
s->intra_pred_data[2],
1364 f->data[2] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1365 8 *
s->cols * bytesperpixel >>
s->ss_h);
1369 if (
s->s.h.filter.level) {
1372 lflvl_ptr =
s->lflvl;
1373 for (col = 0; col <
s->cols;
1374 col += 8, yoff2 += 64 * bytesperpixel,
1375 uvoff2 += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1392 int decode_tiles_mt(
AVCodecContext *avctx,
void *tdata,
int jobnr,
1397 ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1398 int bytesperpixel =
s->bytesperpixel, row, col, tile_row;
1399 unsigned tile_cols_len;
1400 int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1405 ls_y =
f->linesize[0];
1406 ls_uv =
f->linesize[1];
1409 jobnr,
s->s.h.tiling.log2_tile_cols,
s->sb_cols);
1411 uvoff = (64 * bytesperpixel >>
s->ss_h)*(tile_col_start >> 3);
1412 yoff = (64 * bytesperpixel)*(tile_col_start >> 3);
1413 lflvl_ptr_base =
s->lflvl+(tile_col_start >> 3);
1415 for (tile_row = 0; tile_row <
s->s.h.tiling.tile_rows; tile_row++) {
1417 tile_row,
s->s.h.tiling.log2_tile_rows,
s->sb_rows);
1419 td->
c = &td->
c_b[tile_row];
1420 for (row = tile_row_start; row < tile_row_end;
1421 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >>
s->ss_v) {
1422 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1423 VP9Filter *lflvl_ptr = lflvl_ptr_base+
s->sb_cols*(row >> 3);
1427 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
1436 for (col = tile_col_start;
1438 col += 8, yoff2 += 64 * bytesperpixel,
1439 uvoff2 += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1442 memset(lflvl_ptr->
mask, 0,
sizeof(lflvl_ptr->
mask));
1449 tile_cols_len = tile_col_end - tile_col_start;
1450 if (row + 8 <
s->rows) {
1451 memcpy(
s->intra_pred_data[0] + (tile_col_start * 8 * bytesperpixel),
1452 f->data[0] + yoff + 63 * ls_y,
1453 8 * tile_cols_len * bytesperpixel);
1454 memcpy(
s->intra_pred_data[1] + (tile_col_start * 8 * bytesperpixel >>
s->ss_h),
1455 f->data[1] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1456 8 * tile_cols_len * bytesperpixel >>
s->ss_h);
1457 memcpy(
s->intra_pred_data[2] + (tile_col_start * 8 * bytesperpixel >>
s->ss_h),
1458 f->data[2] + uvoff + ((64 >>
s->ss_v) - 1) * ls_uv,
1459 8 * tile_cols_len * bytesperpixel >>
s->ss_h);
1462 vp9_report_tile_progress(
s, row >> 3, 1);
1472 ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1474 int bytesperpixel =
s->bytesperpixel, col,
i;
1478 ls_y =
f->linesize[0];
1479 ls_uv =
f->linesize[1];
1481 for (
i = 0;
i <
s->sb_rows;
i++) {
1482 vp9_await_tile_progress(
s,
i,
s->s.h.tiling.tile_cols);
1484 if (
s->s.h.filter.level) {
1485 yoff = (ls_y * 64)*
i;
1486 uvoff = (ls_uv * 64 >>
s->ss_v)*
i;
1487 lflvl_ptr =
s->lflvl+
s->sb_cols*
i;
1488 for (col = 0; col <
s->cols;
1489 col += 8, yoff += 64 * bytesperpixel,
1490 uvoff += 64 * bytesperpixel >>
s->ss_h, lflvl_ptr++) {
1503 unsigned int tile, nb_blocks = 0;
1505 if (
s->s.h.segmentation.enabled) {
1506 for (tile = 0; tile <
s->active_tile_cols; tile++)
1507 nb_blocks +=
s->td[tile].nb_block_structure;
1515 par->
qp =
s->s.h.yac_qi;
1516 par->
delta_qp[0][0] =
s->s.h.ydc_qdelta;
1517 par->
delta_qp[1][0] =
s->s.h.uvdc_qdelta;
1518 par->
delta_qp[2][0] =
s->s.h.uvdc_qdelta;
1519 par->
delta_qp[1][1] =
s->s.h.uvac_qdelta;
1520 par->
delta_qp[2][1] =
s->s.h.uvac_qdelta;
1523 unsigned int block = 0;
1524 unsigned int tile, block_tile;
1526 for (tile = 0; tile <
s->active_tile_cols; tile++) {
1533 uint8_t seg_id =
frame->segmentation_map[row * 8 *
s->sb_cols + col];
1540 if (
s->s.h.segmentation.feat[seg_id].q_enabled) {
1541 b->delta_qp =
s->s.h.segmentation.feat[seg_id].q_val;
1542 if (
s->s.h.segmentation.absolute_vals)
1543 b->delta_qp -= par->
qp;
1560 (!
s->s.h.segmentation.enabled || !
s->s.h.segmentation.update_map);
1566 }
else if (
ret == 0) {
1567 if (!
s->s.refs[
ref].f) {
1571 for (
int i = 0;
i < 8;
i++)
1586 src = !
s->s.h.keyframe && !
s->s.h.intraonly && !
s->s.h.errorres ?
1588 if (!retain_segmap_ref ||
s->s.h.keyframe ||
s->s.h.intraonly)
1595 if (
s->s.h.keyframe)
1599 if (
s->s.h.lossless)
1613 for (
i = 0;
i < 8;
i++) {
1615 s->s.h.refreshrefmask & (1 <<
i) ?
1634 memset(
s->above_partition_ctx, 0,
s->cols);
1635 memset(
s->above_skip_ctx, 0,
s->cols);
1636 if (
s->s.h.keyframe ||
s->s.h.intraonly) {
1637 memset(
s->above_mode_ctx,
DC_PRED,
s->cols * 2);
1641 memset(
s->above_y_nnz_ctx, 0,
s->sb_cols * 16);
1642 memset(
s->above_uv_nnz_ctx[0], 0,
s->sb_cols * 16 >>
s->ss_h);
1643 memset(
s->above_uv_nnz_ctx[1], 0,
s->sb_cols * 16 >>
s->ss_h);
1644 memset(
s->above_segpred_ctx, 0,
s->cols);
1649 "Failed to allocate block buffers\n");
1652 if (
s->s.h.refreshctx &&
s->s.h.parallelmode) {
1655 for (
i = 0;
i < 4;
i++) {
1656 for (j = 0; j < 2; j++)
1657 for (k = 0; k < 2; k++)
1658 for (l = 0; l < 6; l++)
1659 for (m = 0; m < 6; m++)
1660 memcpy(
s->prob_ctx[
s->s.h.framectxid].coef[
i][j][k][l][m],
1661 s->prob.coef[
i][j][k][l][m], 3);
1662 if (
s->s.h.txfmmode ==
i)
1665 s->prob_ctx[
s->s.h.framectxid].p =
s->prob.p;
1667 }
else if (!
s->s.h.refreshctx) {
1673 for (
i = 0;
i <
s->sb_rows;
i++)
1679 for (
i = 0;
i <
s->active_tile_cols;
i++) {
1680 s->td[
i].b =
s->td[
i].b_base;
1681 s->td[
i].block =
s->td[
i].block_base;
1682 s->td[
i].uvblock[0] =
s->td[
i].uvblock_base[0];
1683 s->td[
i].uvblock[1] =
s->td[
i].uvblock_base[1];
1684 s->td[
i].eob =
s->td[
i].eob_base;
1685 s->td[
i].uveob[0] =
s->td[
i].uveob_base[0];
1686 s->td[
i].uveob[1] =
s->td[
i].uveob_base[1];
1687 s->td[
i].error_info = 0;
1692 int tile_row, tile_col;
1696 for (tile_row = 0; tile_row <
s->s.h.tiling.tile_rows; tile_row++) {
1697 for (tile_col = 0; tile_col <
s->s.h.tiling.tile_cols; tile_col++) {
1700 if (tile_col ==
s->s.h.tiling.tile_cols - 1 &&
1701 tile_row ==
s->s.h.tiling.tile_rows - 1) {
1708 if (tile_size >
size)
1731 for (
i = 1;
i <
s->s.h.tiling.tile_cols;
i++)
1732 for (j = 0; j <
sizeof(
s->td[
i].counts) /
sizeof(
unsigned); j++)
1733 ((
unsigned *)&
s->td[0].counts)[j] += ((
unsigned *)&
s->td[
i].counts)[j];
1735 if (
s->pass < 2 &&
s->s.h.refreshctx && !
s->s.h.parallelmode) {
1739 }
while (
s->pass++ == 1);
1741 if (
s->td->error_info < 0) {
1743 s->td->error_info = 0;
1756 for (
int i = 0;
i < 8;
i++)
1759 if (!
s->s.h.invisible) {
1776 for (
i = 0;
i < 3;
i++)
1778 for (
i = 0;
i < 8;
i++)
1791 s->s.h.filter.sharpness = -1;
1809 for (
int i = 0;
i < 3;
i++)
1811 for (
int i = 0;
i < 8;
i++)
1814 s->frame_extradata_pool_size = ssrc->frame_extradata_pool_size;
1816 s->s.h.invisible = ssrc->s.h.invisible;
1817 s->s.h.keyframe = ssrc->s.h.keyframe;
1818 s->s.h.intraonly = ssrc->s.h.intraonly;
1819 s->ss_v = ssrc->ss_v;
1820 s->ss_h = ssrc->ss_h;
1821 s->s.h.segmentation.enabled = ssrc->s.h.segmentation.enabled;
1822 s->s.h.segmentation.update_map = ssrc->s.h.segmentation.update_map;
1823 s->s.h.segmentation.absolute_vals = ssrc->s.h.segmentation.absolute_vals;
1824 s->bytesperpixel = ssrc->bytesperpixel;
1825 s->gf_fmt = ssrc->gf_fmt;
1828 s->s.h.bpp = ssrc->s.h.bpp;
1829 s->bpp_index = ssrc->bpp_index;
1830 s->pix_fmt = ssrc->pix_fmt;
1831 memcpy(&
s->prob_ctx, &ssrc->prob_ctx,
sizeof(
s->prob_ctx));
1832 memcpy(&
s->s.h.lf_delta, &ssrc->s.h.lf_delta,
sizeof(
s->s.h.lf_delta));
1833 memcpy(&
s->s.h.segmentation.feat, &ssrc->s.h.segmentation.feat,
1834 sizeof(
s->s.h.segmentation.feat));
1856 .bsfs =
"vp9_superframe_split",
1858 #if CONFIG_VP9_DXVA2_HWACCEL
1861 #if CONFIG_VP9_D3D11VA_HWACCEL
1864 #if CONFIG_VP9_D3D11VA2_HWACCEL
1867 #if CONFIG_VP9_D3D12VA_HWACCEL
1870 #if CONFIG_VP9_NVDEC_HWACCEL
1873 #if CONFIG_VP9_VAAPI_HWACCEL
1876 #if CONFIG_VP9_VDPAU_HWACCEL
1879 #if CONFIG_VP9_VIDEOTOOLBOX_HWACCEL