FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
vp9.c
Go to the documentation of this file.
1 /*
2  * VP9 compatible video decoder
3  *
4  * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5  * Copyright (C) 2013 Clément Bœsch <u pkh me>
6  *
7  * This file is part of FFmpeg.
8  *
9  * FFmpeg is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * FFmpeg is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with FFmpeg; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22  */
23 
24 #include "avcodec.h"
25 #include "get_bits.h"
26 #include "internal.h"
27 #include "thread.h"
28 #include "videodsp.h"
29 #include "vp56.h"
30 #include "vp9.h"
31 #include "vp9data.h"
32 #include "vp9dsp.h"
33 #include "libavutil/avassert.h"
34 #include "libavutil/pixdesc.h"
35 
36 #define VP9_SYNCCODE 0x498342
37 
42 };
43 
44 enum BlockLevel {
49 };
50 
51 enum BlockSize {
66 };
67 
68 struct VP9mvrefPair {
69  VP56mv mv[2];
70  int8_t ref[2];
71 };
72 
73 typedef struct VP9Frame {
77  struct VP9mvrefPair *mv;
79 } VP9Frame;
80 
81 struct VP9Filter {
82  uint8_t level[8 * 8];
83  uint8_t /* bit=col */ mask[2 /* 0=y, 1=uv */][2 /* 0=col, 1=row */]
84  [8 /* rows */][4 /* 0=16, 1=8, 2=4, 3=inner4 */];
85 };
86 
87 typedef struct VP9Block {
90  VP56mv mv[4 /* b_idx */][2 /* ref */];
91  enum BlockSize bs;
92  enum TxfmMode tx, uvtx;
93  enum BlockLevel bl;
95 } VP9Block;
96 
97 typedef struct VP9Context {
103  unsigned c_b_size;
105  int pass;
106  int row, row7, col, col7;
108  ptrdiff_t y_stride, uv_stride;
109 
110  // bitstream header
131 #define CUR_FRAME 0
132 #define REF_FRAME_MVPAIR 1
133 #define REF_FRAME_SEGMAP 2
135 
136  struct {
138  int8_t sharpness;
141  } filter;
142  struct {
144  int8_t mode[2];
145  int8_t ref[4];
146  } lf_delta;
150 #define MAX_SEGMENT 8
151  struct {
157  struct {
163  int16_t q_val;
164  int8_t lf_val;
165  int16_t qmul[2][2];
166  uint8_t lflvl[4][2];
167  } feat[MAX_SEGMENT];
168  } segmentation;
169  struct {
171  unsigned tile_cols, tile_rows;
173  } tiling;
174  unsigned sb_cols, sb_rows, rows, cols;
175  struct {
177  uint8_t coef[4][2][2][6][6][3];
178  } prob_ctx[4];
179  struct {
180  prob_context p;
181  uint8_t coef[4][2][2][6][6][11];
184  } prob;
185  struct {
186  unsigned y_mode[4][10];
187  unsigned uv_mode[10][10];
188  unsigned filter[4][3];
189  unsigned mv_mode[7][4];
190  unsigned intra[4][2];
191  unsigned comp[5][2];
192  unsigned single_ref[5][2][2];
193  unsigned comp_ref[5][2];
194  unsigned tx32p[2][4];
195  unsigned tx16p[2][3];
196  unsigned tx8p[2][2];
197  unsigned skip[3][2];
198  unsigned mv_joint[4];
199  struct {
200  unsigned sign[2];
201  unsigned classes[11];
202  unsigned class0[2];
203  unsigned bits[10][2];
204  unsigned class0_fp[2][4];
205  unsigned fp[4];
206  unsigned class0_hp[2];
207  unsigned hp[2];
208  } mv_comp[2];
209  unsigned partition[4][4][4];
210  unsigned coef[4][2][2][6][6][3];
211  unsigned eob[4][2][2][6][6][2];
212  } counts;
215 
216  // contextual (left/above) cache
231  // FIXME maybe merge some of the below in a flags field?
242 
243  // whole-frame cache
245  struct VP9Filter *lflvl;
247 
248  // block reconstruction intermediates
250  int16_t *block_base, *block, *uvblock_base[2], *uvblock[2];
252  struct { int x, y; } min_mv, max_mv;
253  DECLARE_ALIGNED(32, uint8_t, tmp_y)[64 * 64 * 2];
254  DECLARE_ALIGNED(32, uint8_t, tmp_uv)[2][64 * 64 * 2];
255  uint16_t mvscale[3][2];
257 } VP9Context;
258 
259 static const uint8_t bwh_tab[2][N_BS_SIZES][2] = {
260  {
261  { 16, 16 }, { 16, 8 }, { 8, 16 }, { 8, 8 }, { 8, 4 }, { 4, 8 },
262  { 4, 4 }, { 4, 2 }, { 2, 4 }, { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 },
263  }, {
264  { 8, 8 }, { 8, 4 }, { 4, 8 }, { 4, 4 }, { 4, 2 }, { 2, 4 },
265  { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 },
266  }
267 };
268 
270 {
271  VP9Context *s = ctx->priv_data;
272  int ret, sz;
273 
274  if ((ret = ff_thread_get_buffer(ctx, &f->tf, AV_GET_BUFFER_FLAG_REF)) < 0)
275  return ret;
276  sz = 64 * s->sb_cols * s->sb_rows;
277  if (!(f->extradata = av_buffer_allocz(sz * (1 + sizeof(struct VP9mvrefPair))))) {
278  ff_thread_release_buffer(ctx, &f->tf);
279  return AVERROR(ENOMEM);
280  }
281 
283  f->mv = (struct VP9mvrefPair *) (f->extradata->data + sz);
284 
285  return 0;
286 }
287 
289 {
290  ff_thread_release_buffer(ctx, &f->tf);
292 }
293 
295 {
296  int res;
297 
298  if ((res = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0) {
299  return res;
300  } else if (!(dst->extradata = av_buffer_ref(src->extradata))) {
301  vp9_unref_frame(ctx, dst);
302  return AVERROR(ENOMEM);
303  }
304 
306  dst->mv = src->mv;
307  dst->uses_2pass = src->uses_2pass;
308 
309  return 0;
310 }
311 
312 static int update_size(AVCodecContext *ctx, int w, int h, enum AVPixelFormat fmt)
313 {
314  VP9Context *s = ctx->priv_data;
315  uint8_t *p;
316  int bytesperpixel = s->bytesperpixel;
317 
318  av_assert0(w > 0 && h > 0);
319 
320  if (s->intra_pred_data[0] && w == ctx->width && h == ctx->height && ctx->pix_fmt == fmt)
321  return 0;
322 
323  ctx->width = w;
324  ctx->height = h;
325  ctx->pix_fmt = fmt;
326  s->sb_cols = (w + 63) >> 6;
327  s->sb_rows = (h + 63) >> 6;
328  s->cols = (w + 7) >> 3;
329  s->rows = (h + 7) >> 3;
330 
331 #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
332  av_freep(&s->intra_pred_data[0]);
333  // FIXME we slightly over-allocate here for subsampled chroma, but a little
334  // bit of padding shouldn't affect performance...
335  p = av_malloc(s->sb_cols * (128 + 192 * bytesperpixel +
336  sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx)));
337  if (!p)
338  return AVERROR(ENOMEM);
339  assign(s->intra_pred_data[0], uint8_t *, 64 * bytesperpixel);
340  assign(s->intra_pred_data[1], uint8_t *, 64 * bytesperpixel);
341  assign(s->intra_pred_data[2], uint8_t *, 64 * bytesperpixel);
342  assign(s->above_y_nnz_ctx, uint8_t *, 16);
343  assign(s->above_mode_ctx, uint8_t *, 16);
344  assign(s->above_mv_ctx, VP56mv(*)[2], 16);
345  assign(s->above_uv_nnz_ctx[0], uint8_t *, 16);
346  assign(s->above_uv_nnz_ctx[1], uint8_t *, 16);
348  assign(s->above_skip_ctx, uint8_t *, 8);
349  assign(s->above_txfm_ctx, uint8_t *, 8);
350  assign(s->above_segpred_ctx, uint8_t *, 8);
351  assign(s->above_intra_ctx, uint8_t *, 8);
352  assign(s->above_comp_ctx, uint8_t *, 8);
353  assign(s->above_ref_ctx, uint8_t *, 8);
354  assign(s->above_filter_ctx, uint8_t *, 8);
355  assign(s->lflvl, struct VP9Filter *, 1);
356 #undef assign
357 
358  // these will be re-allocated a little later
359  av_freep(&s->b_base);
360  av_freep(&s->block_base);
361 
362  if (s->bpp != s->last_bpp) {
363  ff_vp9dsp_init(&s->dsp, s->bpp);
364  ff_videodsp_init(&s->vdsp, s->bpp);
365  s->last_bpp = s->bpp;
366  }
367 
368  return 0;
369 }
370 
372 {
373  VP9Context *s = ctx->priv_data;
374  int chroma_blocks, chroma_eobs, bytesperpixel = s->bytesperpixel;
375 
377  return 0;
378 
379  av_free(s->b_base);
380  av_free(s->block_base);
381  chroma_blocks = 64 * 64 >> (s->ss_h + s->ss_v);
382  chroma_eobs = 16 * 16 >> (s->ss_h + s->ss_v);
383  if (s->frames[CUR_FRAME].uses_2pass) {
384  int sbs = s->sb_cols * s->sb_rows;
385 
386  s->b_base = av_malloc_array(s->cols * s->rows, sizeof(VP9Block));
387  s->block_base = av_mallocz(((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
388  16 * 16 + 2 * chroma_eobs) * sbs);
389  if (!s->b_base || !s->block_base)
390  return AVERROR(ENOMEM);
391  s->uvblock_base[0] = s->block_base + sbs * 64 * 64 * bytesperpixel;
392  s->uvblock_base[1] = s->uvblock_base[0] + sbs * chroma_blocks * bytesperpixel;
393  s->eob_base = (uint8_t *) (s->uvblock_base[1] + sbs * chroma_blocks * bytesperpixel);
394  s->uveob_base[0] = s->eob_base + 16 * 16 * sbs;
395  s->uveob_base[1] = s->uveob_base[0] + chroma_eobs * sbs;
396  } else {
397  s->b_base = av_malloc(sizeof(VP9Block));
398  s->block_base = av_mallocz((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
399  16 * 16 + 2 * chroma_eobs);
400  if (!s->b_base || !s->block_base)
401  return AVERROR(ENOMEM);
402  s->uvblock_base[0] = s->block_base + 64 * 64 * bytesperpixel;
403  s->uvblock_base[1] = s->uvblock_base[0] + chroma_blocks * bytesperpixel;
404  s->eob_base = (uint8_t *) (s->uvblock_base[1] + chroma_blocks * bytesperpixel);
405  s->uveob_base[0] = s->eob_base + 16 * 16;
406  s->uveob_base[1] = s->uveob_base[0] + chroma_eobs;
407  }
409 
410  return 0;
411 }
412 
413 // for some reason the sign bit is at the end, not the start, of a bit sequence
415 {
416  int v = get_bits(gb, n);
417  return get_bits1(gb) ? -v : v;
418 }
419 
421 {
422  return v > 2 * m ? v : v & 1 ? m - ((v + 1) >> 1) : m + (v >> 1);
423 }
424 
425 // differential forward probability updates
426 static int update_prob(VP56RangeCoder *c, int p)
427 {
428  static const int inv_map_table[254] = {
429  7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
430  189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
431  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
432  25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
433  40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
434  55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
435  70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
436  86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
437  101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
438  116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
439  131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
440  146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
441  161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
442  177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
443  192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
444  207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
445  222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
446  237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
447  252, 253,
448  };
449  int d;
450 
451  /* This code is trying to do a differential probability update. For a
452  * current probability A in the range [1, 255], the difference to a new
453  * probability of any value can be expressed differentially as 1-A,255-A
454  * where some part of this (absolute range) exists both in positive as
455  * well as the negative part, whereas another part only exists in one
456  * half. We're trying to code this shared part differentially, i.e.
457  * times two where the value of the lowest bit specifies the sign, and
458  * the single part is then coded on top of this. This absolute difference
459  * then again has a value of [0,254], but a bigger value in this range
460  * indicates that we're further away from the original value A, so we
461  * can code this as a VLC code, since higher values are increasingly
462  * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough'
463  * updates vs. the 'fine, exact' updates further down the range, which
464  * adds one extra dimension to this differential update model. */
465 
466  if (!vp8_rac_get(c)) {
467  d = vp8_rac_get_uint(c, 4) + 0;
468  } else if (!vp8_rac_get(c)) {
469  d = vp8_rac_get_uint(c, 4) + 16;
470  } else if (!vp8_rac_get(c)) {
471  d = vp8_rac_get_uint(c, 5) + 32;
472  } else {
473  d = vp8_rac_get_uint(c, 7);
474  if (d >= 65)
475  d = (d << 1) - 65 + vp8_rac_get(c);
476  d += 64;
477  }
478 
479  return p <= 128 ? 1 + inv_recenter_nonneg(inv_map_table[d], p - 1) :
480  255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
481 }
482 
484 {
485  static const enum AVColorSpace colorspaces[8] = {
488  };
489  VP9Context *s = ctx->priv_data;
490  enum AVPixelFormat res;
491  int bits = ctx->profile <= 1 ? 0 : 1 + get_bits1(&s->gb); // 0:8, 1:10, 2:12
492 
493  s->bpp_index = bits;
494  s->bpp = 8 + bits * 2;
495  s->bytesperpixel = (7 + s->bpp) >> 3;
496  ctx->colorspace = colorspaces[get_bits(&s->gb, 3)];
497  if (ctx->colorspace == AVCOL_SPC_RGB) { // RGB = profile 1
498  static const enum AVPixelFormat pix_fmt_rgb[3] = {
500  };
501  if (ctx->profile & 1) {
502  s->ss_h = s->ss_v = 1;
503  res = pix_fmt_rgb[bits];
505  } else {
506  av_log(ctx, AV_LOG_ERROR, "RGB not supported in profile %d\n",
507  ctx->profile);
508  return AVERROR_INVALIDDATA;
509  }
510  } else {
511  static const enum AVPixelFormat pix_fmt_for_ss[3][2 /* v */][2 /* h */] = {
518  };
520  if (ctx->profile & 1) {
521  s->ss_h = get_bits1(&s->gb);
522  s->ss_v = get_bits1(&s->gb);
523  if ((res = pix_fmt_for_ss[bits][s->ss_v][s->ss_h]) == AV_PIX_FMT_YUV420P) {
524  av_log(ctx, AV_LOG_ERROR, "YUV 4:2:0 not supported in profile %d\n",
525  ctx->profile);
526  return AVERROR_INVALIDDATA;
527  } else if (get_bits1(&s->gb)) {
528  av_log(ctx, AV_LOG_ERROR, "Profile %d color details reserved bit set\n",
529  ctx->profile);
530  return AVERROR_INVALIDDATA;
531  }
532  } else {
533  s->ss_h = s->ss_v = 1;
534  res = pix_fmt_for_ss[bits][1][1];
535  }
536  }
537 
538  return res;
539 }
540 
542  const uint8_t *data, int size, int *ref)
543 {
544  VP9Context *s = ctx->priv_data;
545  int c, i, j, k, l, m, n, w, h, max, size2, res, sharp;
546  enum AVPixelFormat fmt = ctx->pix_fmt;
547  int last_invisible;
548  const uint8_t *data2;
549 
550  /* general header */
551  if ((res = init_get_bits8(&s->gb, data, size)) < 0) {
552  av_log(ctx, AV_LOG_ERROR, "Failed to initialize bitstream reader\n");
553  return res;
554  }
555  if (get_bits(&s->gb, 2) != 0x2) { // frame marker
556  av_log(ctx, AV_LOG_ERROR, "Invalid frame marker\n");
557  return AVERROR_INVALIDDATA;
558  }
559  ctx->profile = get_bits1(&s->gb);
560  ctx->profile |= get_bits1(&s->gb) << 1;
561  if (ctx->profile == 3) ctx->profile += get_bits1(&s->gb);
562  if (ctx->profile > 3) {
563  av_log(ctx, AV_LOG_ERROR, "Profile %d is not yet supported\n", ctx->profile);
564  return AVERROR_INVALIDDATA;
565  }
566  if (get_bits1(&s->gb)) {
567  *ref = get_bits(&s->gb, 3);
568  return 0;
569  }
570  s->last_keyframe = s->keyframe;
571  s->keyframe = !get_bits1(&s->gb);
572  last_invisible = s->invisible;
573  s->invisible = !get_bits1(&s->gb);
574  s->errorres = get_bits1(&s->gb);
575  s->use_last_frame_mvs = !s->errorres && !last_invisible;
576  if (s->keyframe) {
577  if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
578  av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
579  return AVERROR_INVALIDDATA;
580  }
581  if ((fmt = read_colorspace_details(ctx)) < 0)
582  return fmt;
583  // for profile 1, here follows the subsampling bits
584  s->refreshrefmask = 0xff;
585  w = get_bits(&s->gb, 16) + 1;
586  h = get_bits(&s->gb, 16) + 1;
587  if (get_bits1(&s->gb)) // display size
588  skip_bits(&s->gb, 32);
589  } else {
590  s->intraonly = s->invisible ? get_bits1(&s->gb) : 0;
591  s->resetctx = s->errorres ? 0 : get_bits(&s->gb, 2);
592  if (s->intraonly) {
593  if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
594  av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
595  return AVERROR_INVALIDDATA;
596  }
597  if (ctx->profile == 1) {
598  if ((fmt = read_colorspace_details(ctx)) < 0)
599  return fmt;
600  } else {
601  s->ss_h = s->ss_v = 1;
602  s->bpp = 8;
603  s->bpp_index = 0;
604  s->bytesperpixel = 1;
605  fmt = AV_PIX_FMT_YUV420P;
608  }
609  s->refreshrefmask = get_bits(&s->gb, 8);
610  w = get_bits(&s->gb, 16) + 1;
611  h = get_bits(&s->gb, 16) + 1;
612  if (get_bits1(&s->gb)) // display size
613  skip_bits(&s->gb, 32);
614  } else {
615  s->refreshrefmask = get_bits(&s->gb, 8);
616  s->refidx[0] = get_bits(&s->gb, 3);
617  s->signbias[0] = get_bits1(&s->gb) && !s->errorres;
618  s->refidx[1] = get_bits(&s->gb, 3);
619  s->signbias[1] = get_bits1(&s->gb) && !s->errorres;
620  s->refidx[2] = get_bits(&s->gb, 3);
621  s->signbias[2] = get_bits1(&s->gb) && !s->errorres;
622  if (!s->refs[s->refidx[0]].f->data[0] ||
623  !s->refs[s->refidx[1]].f->data[0] ||
624  !s->refs[s->refidx[2]].f->data[0]) {
625  av_log(ctx, AV_LOG_ERROR, "Not all references are available\n");
626  return AVERROR_INVALIDDATA;
627  }
628  if (get_bits1(&s->gb)) {
629  w = s->refs[s->refidx[0]].f->width;
630  h = s->refs[s->refidx[0]].f->height;
631  } else if (get_bits1(&s->gb)) {
632  w = s->refs[s->refidx[1]].f->width;
633  h = s->refs[s->refidx[1]].f->height;
634  } else if (get_bits1(&s->gb)) {
635  w = s->refs[s->refidx[2]].f->width;
636  h = s->refs[s->refidx[2]].f->height;
637  } else {
638  w = get_bits(&s->gb, 16) + 1;
639  h = get_bits(&s->gb, 16) + 1;
640  }
641  // Note that in this code, "CUR_FRAME" is actually before we
642  // have formally allocated a frame, and thus actually represents
643  // the _last_ frame
644  s->use_last_frame_mvs &= s->frames[CUR_FRAME].tf.f->width == w &&
645  s->frames[CUR_FRAME].tf.f->height == h;
646  if (get_bits1(&s->gb)) // display size
647  skip_bits(&s->gb, 32);
648  s->highprecisionmvs = get_bits1(&s->gb);
650  get_bits(&s->gb, 2);
651  s->allowcompinter = (s->signbias[0] != s->signbias[1] ||
652  s->signbias[0] != s->signbias[2]);
653  if (s->allowcompinter) {
654  if (s->signbias[0] == s->signbias[1]) {
655  s->fixcompref = 2;
656  s->varcompref[0] = 0;
657  s->varcompref[1] = 1;
658  } else if (s->signbias[0] == s->signbias[2]) {
659  s->fixcompref = 1;
660  s->varcompref[0] = 0;
661  s->varcompref[1] = 2;
662  } else {
663  s->fixcompref = 0;
664  s->varcompref[0] = 1;
665  s->varcompref[1] = 2;
666  }
667  }
668 
669  for (i = 0; i < 3; i++) {
670  AVFrame *ref = s->refs[s->refidx[i]].f;
671  int refw = ref->width, refh = ref->height;
672 
673  if (ref->format != fmt) {
674  av_log(ctx, AV_LOG_ERROR,
675  "Ref pixfmt (%s) did not match current frame (%s)",
677  av_get_pix_fmt_name(fmt));
678  return AVERROR_INVALIDDATA;
679  } else if (refw == w && refh == h) {
680  s->mvscale[i][0] = s->mvscale[i][1] = 0;
681  } else {
682  if (w * 2 < refw || h * 2 < refh || w > 16 * refw || h > 16 * refh) {
683  av_log(ctx, AV_LOG_ERROR,
684  "Invalid ref frame dimensions %dx%d for frame size %dx%d\n",
685  refw, refh, w, h);
686  return AVERROR_INVALIDDATA;
687  }
688  s->mvscale[i][0] = (refw << 14) / w;
689  s->mvscale[i][1] = (refh << 14) / h;
690  s->mvstep[i][0] = 16 * s->mvscale[i][0] >> 14;
691  s->mvstep[i][1] = 16 * s->mvscale[i][1] >> 14;
692  }
693  }
694  }
695  }
696  s->refreshctx = s->errorres ? 0 : get_bits1(&s->gb);
697  s->parallelmode = s->errorres ? 1 : get_bits1(&s->gb);
698  s->framectxid = c = get_bits(&s->gb, 2);
699 
700  /* loopfilter header data */
701  if (s->keyframe || s->errorres || s->intraonly) {
702  // reset loopfilter defaults
703  s->lf_delta.ref[0] = 1;
704  s->lf_delta.ref[1] = 0;
705  s->lf_delta.ref[2] = -1;
706  s->lf_delta.ref[3] = -1;
707  s->lf_delta.mode[0] = 0;
708  s->lf_delta.mode[1] = 0;
709  }
710  s->filter.level = get_bits(&s->gb, 6);
711  sharp = get_bits(&s->gb, 3);
712  // if sharpness changed, reinit lim/mblim LUTs. if it didn't change, keep
713  // the old cache values since they are still valid
714  if (s->filter.sharpness != sharp)
715  memset(s->filter.lim_lut, 0, sizeof(s->filter.lim_lut));
716  s->filter.sharpness = sharp;
717  if ((s->lf_delta.enabled = get_bits1(&s->gb))) {
718  if (get_bits1(&s->gb)) {
719  for (i = 0; i < 4; i++)
720  if (get_bits1(&s->gb))
721  s->lf_delta.ref[i] = get_sbits_inv(&s->gb, 6);
722  for (i = 0; i < 2; i++)
723  if (get_bits1(&s->gb))
724  s->lf_delta.mode[i] = get_sbits_inv(&s->gb, 6);
725  }
726  }
727 
728  /* quantization header data */
729  s->yac_qi = get_bits(&s->gb, 8);
730  s->ydc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
731  s->uvdc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
732  s->uvac_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
733  s->lossless = s->yac_qi == 0 && s->ydc_qdelta == 0 &&
734  s->uvdc_qdelta == 0 && s->uvac_qdelta == 0;
735 
736  /* segmentation header info */
738  if ((s->segmentation.enabled = get_bits1(&s->gb))) {
739  if ((s->segmentation.update_map = get_bits1(&s->gb))) {
740  for (i = 0; i < 7; i++)
741  s->prob.seg[i] = get_bits1(&s->gb) ?
742  get_bits(&s->gb, 8) : 255;
743  if ((s->segmentation.temporal = get_bits1(&s->gb))) {
744  for (i = 0; i < 3; i++)
745  s->prob.segpred[i] = get_bits1(&s->gb) ?
746  get_bits(&s->gb, 8) : 255;
747  }
748  }
749  if ((!s->segmentation.update_map || s->segmentation.temporal) &&
750  (w != s->frames[CUR_FRAME].tf.f->width ||
751  h != s->frames[CUR_FRAME].tf.f->height)) {
752  av_log(ctx, AV_LOG_WARNING,
753  "Reference segmap (temp=%d,update=%d) enabled on size-change!\n",
756  //return AVERROR_INVALIDDATA;
757  }
758 
759  if (get_bits1(&s->gb)) {
761  for (i = 0; i < 8; i++) {
762  if ((s->segmentation.feat[i].q_enabled = get_bits1(&s->gb)))
763  s->segmentation.feat[i].q_val = get_sbits_inv(&s->gb, 8);
764  if ((s->segmentation.feat[i].lf_enabled = get_bits1(&s->gb)))
765  s->segmentation.feat[i].lf_val = get_sbits_inv(&s->gb, 6);
766  if ((s->segmentation.feat[i].ref_enabled = get_bits1(&s->gb)))
767  s->segmentation.feat[i].ref_val = get_bits(&s->gb, 2);
768  s->segmentation.feat[i].skip_enabled = get_bits1(&s->gb);
769  }
770  }
771  } else {
772  s->segmentation.feat[0].q_enabled = 0;
773  s->segmentation.feat[0].lf_enabled = 0;
774  s->segmentation.feat[0].skip_enabled = 0;
775  s->segmentation.feat[0].ref_enabled = 0;
776  }
777 
778  // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas
779  for (i = 0; i < (s->segmentation.enabled ? 8 : 1); i++) {
780  int qyac, qydc, quvac, quvdc, lflvl, sh;
781 
782  if (s->segmentation.feat[i].q_enabled) {
784  qyac = s->segmentation.feat[i].q_val;
785  else
786  qyac = s->yac_qi + s->segmentation.feat[i].q_val;
787  } else {
788  qyac = s->yac_qi;
789  }
790  qydc = av_clip_uintp2(qyac + s->ydc_qdelta, 8);
791  quvdc = av_clip_uintp2(qyac + s->uvdc_qdelta, 8);
792  quvac = av_clip_uintp2(qyac + s->uvac_qdelta, 8);
793  qyac = av_clip_uintp2(qyac, 8);
794 
795  s->segmentation.feat[i].qmul[0][0] = vp9_dc_qlookup[s->bpp_index][qydc];
796  s->segmentation.feat[i].qmul[0][1] = vp9_ac_qlookup[s->bpp_index][qyac];
797  s->segmentation.feat[i].qmul[1][0] = vp9_dc_qlookup[s->bpp_index][quvdc];
798  s->segmentation.feat[i].qmul[1][1] = vp9_ac_qlookup[s->bpp_index][quvac];
799 
800  sh = s->filter.level >= 32;
801  if (s->segmentation.feat[i].lf_enabled) {
803  lflvl = av_clip_uintp2(s->segmentation.feat[i].lf_val, 6);
804  else
805  lflvl = av_clip_uintp2(s->filter.level + s->segmentation.feat[i].lf_val, 6);
806  } else {
807  lflvl = s->filter.level;
808  }
809  if (s->lf_delta.enabled) {
810  s->segmentation.feat[i].lflvl[0][0] =
811  s->segmentation.feat[i].lflvl[0][1] =
812  av_clip_uintp2(lflvl + (s->lf_delta.ref[0] << sh), 6);
813  for (j = 1; j < 4; j++) {
814  s->segmentation.feat[i].lflvl[j][0] =
815  av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
816  s->lf_delta.mode[0]) * (1 << sh)), 6);
817  s->segmentation.feat[i].lflvl[j][1] =
818  av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
819  s->lf_delta.mode[1]) * (1 << sh)), 6);
820  }
821  } else {
822  memset(s->segmentation.feat[i].lflvl, lflvl,
823  sizeof(s->segmentation.feat[i].lflvl));
824  }
825  }
826 
827  /* tiling info */
828  if ((res = update_size(ctx, w, h, fmt)) < 0) {
829  av_log(ctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d @ %d\n", w, h, fmt);
830  return res;
831  }
832  for (s->tiling.log2_tile_cols = 0;
833  (s->sb_cols >> s->tiling.log2_tile_cols) > 64;
834  s->tiling.log2_tile_cols++) ;
835  for (max = 0; (s->sb_cols >> max) >= 4; max++) ;
836  max = FFMAX(0, max - 1);
837  while (max > s->tiling.log2_tile_cols) {
838  if (get_bits1(&s->gb))
839  s->tiling.log2_tile_cols++;
840  else
841  break;
842  }
843  s->tiling.log2_tile_rows = decode012(&s->gb);
844  s->tiling.tile_rows = 1 << s->tiling.log2_tile_rows;
845  if (s->tiling.tile_cols != (1 << s->tiling.log2_tile_cols)) {
846  s->tiling.tile_cols = 1 << s->tiling.log2_tile_cols;
847  s->c_b = av_fast_realloc(s->c_b, &s->c_b_size,
848  sizeof(VP56RangeCoder) * s->tiling.tile_cols);
849  if (!s->c_b) {
850  av_log(ctx, AV_LOG_ERROR, "Ran out of memory during range coder init\n");
851  return AVERROR(ENOMEM);
852  }
853  }
854 
855  if (s->keyframe || s->errorres || s->intraonly) {
856  s->prob_ctx[0].p = s->prob_ctx[1].p = s->prob_ctx[2].p =
857  s->prob_ctx[3].p = vp9_default_probs;
858  memcpy(s->prob_ctx[0].coef, vp9_default_coef_probs,
859  sizeof(vp9_default_coef_probs));
860  memcpy(s->prob_ctx[1].coef, vp9_default_coef_probs,
861  sizeof(vp9_default_coef_probs));
862  memcpy(s->prob_ctx[2].coef, vp9_default_coef_probs,
863  sizeof(vp9_default_coef_probs));
864  memcpy(s->prob_ctx[3].coef, vp9_default_coef_probs,
865  sizeof(vp9_default_coef_probs));
866  }
867 
868  // next 16 bits is size of the rest of the header (arith-coded)
869  size2 = get_bits(&s->gb, 16);
870  data2 = align_get_bits(&s->gb);
871  if (size2 > size - (data2 - data)) {
872  av_log(ctx, AV_LOG_ERROR, "Invalid compressed header size\n");
873  return AVERROR_INVALIDDATA;
874  }
875  ff_vp56_init_range_decoder(&s->c, data2, size2);
876  if (vp56_rac_get_prob_branchy(&s->c, 128)) { // marker bit
877  av_log(ctx, AV_LOG_ERROR, "Marker bit was set\n");
878  return AVERROR_INVALIDDATA;
879  }
880 
881  if (s->keyframe || s->intraonly) {
882  memset(s->counts.coef, 0, sizeof(s->counts.coef));
883  memset(s->counts.eob, 0, sizeof(s->counts.eob));
884  } else {
885  memset(&s->counts, 0, sizeof(s->counts));
886  }
887  // FIXME is it faster to not copy here, but do it down in the fw updates
888  // as explicit copies if the fw update is missing (and skip the copy upon
889  // fw update)?
890  s->prob.p = s->prob_ctx[c].p;
891 
892  // txfm updates
893  if (s->lossless) {
894  s->txfmmode = TX_4X4;
895  } else {
896  s->txfmmode = vp8_rac_get_uint(&s->c, 2);
897  if (s->txfmmode == 3)
898  s->txfmmode += vp8_rac_get(&s->c);
899 
900  if (s->txfmmode == TX_SWITCHABLE) {
901  for (i = 0; i < 2; i++)
902  if (vp56_rac_get_prob_branchy(&s->c, 252))
903  s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]);
904  for (i = 0; i < 2; i++)
905  for (j = 0; j < 2; j++)
906  if (vp56_rac_get_prob_branchy(&s->c, 252))
907  s->prob.p.tx16p[i][j] =
908  update_prob(&s->c, s->prob.p.tx16p[i][j]);
909  for (i = 0; i < 2; i++)
910  for (j = 0; j < 3; j++)
911  if (vp56_rac_get_prob_branchy(&s->c, 252))
912  s->prob.p.tx32p[i][j] =
913  update_prob(&s->c, s->prob.p.tx32p[i][j]);
914  }
915  }
916 
917  // coef updates
918  for (i = 0; i < 4; i++) {
919  uint8_t (*ref)[2][6][6][3] = s->prob_ctx[c].coef[i];
920  if (vp8_rac_get(&s->c)) {
921  for (j = 0; j < 2; j++)
922  for (k = 0; k < 2; k++)
923  for (l = 0; l < 6; l++)
924  for (m = 0; m < 6; m++) {
925  uint8_t *p = s->prob.coef[i][j][k][l][m];
926  uint8_t *r = ref[j][k][l][m];
927  if (m >= 3 && l == 0) // dc only has 3 pt
928  break;
929  for (n = 0; n < 3; n++) {
930  if (vp56_rac_get_prob_branchy(&s->c, 252)) {
931  p[n] = update_prob(&s->c, r[n]);
932  } else {
933  p[n] = r[n];
934  }
935  }
936  p[3] = 0;
937  }
938  } else {
939  for (j = 0; j < 2; j++)
940  for (k = 0; k < 2; k++)
941  for (l = 0; l < 6; l++)
942  for (m = 0; m < 6; m++) {
943  uint8_t *p = s->prob.coef[i][j][k][l][m];
944  uint8_t *r = ref[j][k][l][m];
945  if (m > 3 && l == 0) // dc only has 3 pt
946  break;
947  memcpy(p, r, 3);
948  p[3] = 0;
949  }
950  }
951  if (s->txfmmode == i)
952  break;
953  }
954 
955  // mode updates
956  for (i = 0; i < 3; i++)
957  if (vp56_rac_get_prob_branchy(&s->c, 252))
958  s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]);
959  if (!s->keyframe && !s->intraonly) {
960  for (i = 0; i < 7; i++)
961  for (j = 0; j < 3; j++)
962  if (vp56_rac_get_prob_branchy(&s->c, 252))
963  s->prob.p.mv_mode[i][j] =
964  update_prob(&s->c, s->prob.p.mv_mode[i][j]);
965 
966  if (s->filtermode == FILTER_SWITCHABLE)
967  for (i = 0; i < 4; i++)
968  for (j = 0; j < 2; j++)
969  if (vp56_rac_get_prob_branchy(&s->c, 252))
970  s->prob.p.filter[i][j] =
971  update_prob(&s->c, s->prob.p.filter[i][j]);
972 
973  for (i = 0; i < 4; i++)
974  if (vp56_rac_get_prob_branchy(&s->c, 252))
975  s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]);
976 
977  if (s->allowcompinter) {
978  s->comppredmode = vp8_rac_get(&s->c);
979  if (s->comppredmode)
980  s->comppredmode += vp8_rac_get(&s->c);
981  if (s->comppredmode == PRED_SWITCHABLE)
982  for (i = 0; i < 5; i++)
983  if (vp56_rac_get_prob_branchy(&s->c, 252))
984  s->prob.p.comp[i] =
985  update_prob(&s->c, s->prob.p.comp[i]);
986  } else {
988  }
989 
990  if (s->comppredmode != PRED_COMPREF) {
991  for (i = 0; i < 5; i++) {
992  if (vp56_rac_get_prob_branchy(&s->c, 252))
993  s->prob.p.single_ref[i][0] =
994  update_prob(&s->c, s->prob.p.single_ref[i][0]);
995  if (vp56_rac_get_prob_branchy(&s->c, 252))
996  s->prob.p.single_ref[i][1] =
997  update_prob(&s->c, s->prob.p.single_ref[i][1]);
998  }
999  }
1000 
1001  if (s->comppredmode != PRED_SINGLEREF) {
1002  for (i = 0; i < 5; i++)
1003  if (vp56_rac_get_prob_branchy(&s->c, 252))
1004  s->prob.p.comp_ref[i] =
1005  update_prob(&s->c, s->prob.p.comp_ref[i]);
1006  }
1007 
1008  for (i = 0; i < 4; i++)
1009  for (j = 0; j < 9; j++)
1010  if (vp56_rac_get_prob_branchy(&s->c, 252))
1011  s->prob.p.y_mode[i][j] =
1012  update_prob(&s->c, s->prob.p.y_mode[i][j]);
1013 
1014  for (i = 0; i < 4; i++)
1015  for (j = 0; j < 4; j++)
1016  for (k = 0; k < 3; k++)
1017  if (vp56_rac_get_prob_branchy(&s->c, 252))
1018  s->prob.p.partition[3 - i][j][k] =
1019  update_prob(&s->c, s->prob.p.partition[3 - i][j][k]);
1020 
1021  // mv fields don't use the update_prob subexp model for some reason
1022  for (i = 0; i < 3; i++)
1023  if (vp56_rac_get_prob_branchy(&s->c, 252))
1024  s->prob.p.mv_joint[i] = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1025 
1026  for (i = 0; i < 2; i++) {
1027  if (vp56_rac_get_prob_branchy(&s->c, 252))
1028  s->prob.p.mv_comp[i].sign = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1029 
1030  for (j = 0; j < 10; j++)
1031  if (vp56_rac_get_prob_branchy(&s->c, 252))
1032  s->prob.p.mv_comp[i].classes[j] =
1033  (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1034 
1035  if (vp56_rac_get_prob_branchy(&s->c, 252))
1036  s->prob.p.mv_comp[i].class0 = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1037 
1038  for (j = 0; j < 10; j++)
1039  if (vp56_rac_get_prob_branchy(&s->c, 252))
1040  s->prob.p.mv_comp[i].bits[j] =
1041  (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1042  }
1043 
1044  for (i = 0; i < 2; i++) {
1045  for (j = 0; j < 2; j++)
1046  for (k = 0; k < 3; k++)
1047  if (vp56_rac_get_prob_branchy(&s->c, 252))
1048  s->prob.p.mv_comp[i].class0_fp[j][k] =
1049  (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1050 
1051  for (j = 0; j < 3; j++)
1052  if (vp56_rac_get_prob_branchy(&s->c, 252))
1053  s->prob.p.mv_comp[i].fp[j] =
1054  (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1055  }
1056 
1057  if (s->highprecisionmvs) {
1058  for (i = 0; i < 2; i++) {
1059  if (vp56_rac_get_prob_branchy(&s->c, 252))
1060  s->prob.p.mv_comp[i].class0_hp =
1061  (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1062 
1063  if (vp56_rac_get_prob_branchy(&s->c, 252))
1064  s->prob.p.mv_comp[i].hp =
1065  (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1066  }
1067  }
1068  }
1069 
1070  return (data2 - data) + size2;
1071 }
1072 
1073 static av_always_inline void clamp_mv(VP56mv *dst, const VP56mv *src,
1074  VP9Context *s)
1075 {
1076  dst->x = av_clip(src->x, s->min_mv.x, s->max_mv.x);
1077  dst->y = av_clip(src->y, s->min_mv.y, s->max_mv.y);
1078 }
1079 
1081  VP56mv *pmv, int ref, int z, int idx, int sb)
1082 {
1083  static const int8_t mv_ref_blk_off[N_BS_SIZES][8][2] = {
1084  [BS_64x64] = {{ 3, -1 }, { -1, 3 }, { 4, -1 }, { -1, 4 },
1085  { -1, -1 }, { 0, -1 }, { -1, 0 }, { 6, -1 }},
1086  [BS_64x32] = {{ 0, -1 }, { -1, 0 }, { 4, -1 }, { -1, 2 },
1087  { -1, -1 }, { 0, -3 }, { -3, 0 }, { 2, -1 }},
1088  [BS_32x64] = {{ -1, 0 }, { 0, -1 }, { -1, 4 }, { 2, -1 },
1089  { -1, -1 }, { -3, 0 }, { 0, -3 }, { -1, 2 }},
1090  [BS_32x32] = {{ 1, -1 }, { -1, 1 }, { 2, -1 }, { -1, 2 },
1091  { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
1092  [BS_32x16] = {{ 0, -1 }, { -1, 0 }, { 2, -1 }, { -1, -1 },
1093  { -1, 1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
1094  [BS_16x32] = {{ -1, 0 }, { 0, -1 }, { -1, 2 }, { -1, -1 },
1095  { 1, -1 }, { -3, 0 }, { 0, -3 }, { -3, -3 }},
1096  [BS_16x16] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, 1 },
1097  { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
1098  [BS_16x8] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, -1 },
1099  { 0, -2 }, { -2, 0 }, { -2, -1 }, { -1, -2 }},
1100  [BS_8x16] = {{ -1, 0 }, { 0, -1 }, { -1, 1 }, { -1, -1 },
1101  { -2, 0 }, { 0, -2 }, { -1, -2 }, { -2, -1 }},
1102  [BS_8x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1103  { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1104  [BS_8x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1105  { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1106  [BS_4x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1107  { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1108  [BS_4x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1109  { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1110  };
1111  VP9Block *b = s->b;
1112  int row = s->row, col = s->col, row7 = s->row7;
1113  const int8_t (*p)[2] = mv_ref_blk_off[b->bs];
1114 #define INVALID_MV 0x80008000U
1115  uint32_t mem = INVALID_MV, mem_sub8x8 = INVALID_MV;
1116  int i;
1117 
1118 #define RETURN_DIRECT_MV(mv) \
1119  do { \
1120  uint32_t m = AV_RN32A(&mv); \
1121  if (!idx) { \
1122  AV_WN32A(pmv, m); \
1123  return; \
1124  } else if (mem == INVALID_MV) { \
1125  mem = m; \
1126  } else if (m != mem) { \
1127  AV_WN32A(pmv, m); \
1128  return; \
1129  } \
1130  } while (0)
1131 
1132  if (sb >= 0) {
1133  if (sb == 2 || sb == 1) {
1134  RETURN_DIRECT_MV(b->mv[0][z]);
1135  } else if (sb == 3) {
1136  RETURN_DIRECT_MV(b->mv[2][z]);
1137  RETURN_DIRECT_MV(b->mv[1][z]);
1138  RETURN_DIRECT_MV(b->mv[0][z]);
1139  }
1140 
1141 #define RETURN_MV(mv) \
1142  do { \
1143  if (sb > 0) { \
1144  VP56mv tmp; \
1145  uint32_t m; \
1146  av_assert2(idx == 1); \
1147  av_assert2(mem != INVALID_MV); \
1148  if (mem_sub8x8 == INVALID_MV) { \
1149  clamp_mv(&tmp, &mv, s); \
1150  m = AV_RN32A(&tmp); \
1151  if (m != mem) { \
1152  AV_WN32A(pmv, m); \
1153  return; \
1154  } \
1155  mem_sub8x8 = AV_RN32A(&mv); \
1156  } else if (mem_sub8x8 != AV_RN32A(&mv)) { \
1157  clamp_mv(&tmp, &mv, s); \
1158  m = AV_RN32A(&tmp); \
1159  if (m != mem) { \
1160  AV_WN32A(pmv, m); \
1161  } else { \
1162  /* BUG I'm pretty sure this isn't the intention */ \
1163  AV_WN32A(pmv, 0); \
1164  } \
1165  return; \
1166  } \
1167  } else { \
1168  uint32_t m = AV_RN32A(&mv); \
1169  if (!idx) { \
1170  clamp_mv(pmv, &mv, s); \
1171  return; \
1172  } else if (mem == INVALID_MV) { \
1173  mem = m; \
1174  } else if (m != mem) { \
1175  clamp_mv(pmv, &mv, s); \
1176  return; \
1177  } \
1178  } \
1179  } while (0)
1180 
1181  if (row > 0) {
1182  struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[(row - 1) * s->sb_cols * 8 + col];
1183  if (mv->ref[0] == ref) {
1184  RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][0]);
1185  } else if (mv->ref[1] == ref) {
1186  RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][1]);
1187  }
1188  }
1189  if (col > s->tiling.tile_col_start) {
1190  struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[row * s->sb_cols * 8 + col - 1];
1191  if (mv->ref[0] == ref) {
1192  RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][0]);
1193  } else if (mv->ref[1] == ref) {
1194  RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][1]);
1195  }
1196  }
1197  i = 2;
1198  } else {
1199  i = 0;
1200  }
1201 
1202  // previously coded MVs in this neighbourhood, using same reference frame
1203  for (; i < 8; i++) {
1204  int c = p[i][0] + col, r = p[i][1] + row;
1205 
1206  if (c >= s->tiling.tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
1207  struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
1208 
1209  if (mv->ref[0] == ref) {
1210  RETURN_MV(mv->mv[0]);
1211  } else if (mv->ref[1] == ref) {
1212  RETURN_MV(mv->mv[1]);
1213  }
1214  }
1215  }
1216 
1217  // MV at this position in previous frame, using same reference frame
1218  if (s->use_last_frame_mvs) {
1219  struct VP9mvrefPair *mv = &s->frames[REF_FRAME_MVPAIR].mv[row * s->sb_cols * 8 + col];
1220 
1223  if (mv->ref[0] == ref) {
1224  RETURN_MV(mv->mv[0]);
1225  } else if (mv->ref[1] == ref) {
1226  RETURN_MV(mv->mv[1]);
1227  }
1228  }
1229 
1230 #define RETURN_SCALE_MV(mv, scale) \
1231  do { \
1232  if (scale) { \
1233  VP56mv mv_temp = { -mv.x, -mv.y }; \
1234  RETURN_MV(mv_temp); \
1235  } else { \
1236  RETURN_MV(mv); \
1237  } \
1238  } while (0)
1239 
1240  // previously coded MVs in this neighbourhood, using different reference frame
1241  for (i = 0; i < 8; i++) {
1242  int c = p[i][0] + col, r = p[i][1] + row;
1243 
1244  if (c >= s->tiling.tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
1245  struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
1246 
1247  if (mv->ref[0] != ref && mv->ref[0] >= 0) {
1248  RETURN_SCALE_MV(mv->mv[0], s->signbias[mv->ref[0]] != s->signbias[ref]);
1249  }
1250  if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
1251  // BUG - libvpx has this condition regardless of whether
1252  // we used the first ref MV and pre-scaling
1253  AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
1254  RETURN_SCALE_MV(mv->mv[1], s->signbias[mv->ref[1]] != s->signbias[ref]);
1255  }
1256  }
1257  }
1258 
1259  // MV at this position in previous frame, using different reference frame
1260  if (s->use_last_frame_mvs) {
1261  struct VP9mvrefPair *mv = &s->frames[REF_FRAME_MVPAIR].mv[row * s->sb_cols * 8 + col];
1262 
1263  // no need to await_progress, because we already did that above
1264  if (mv->ref[0] != ref && mv->ref[0] >= 0) {
1265  RETURN_SCALE_MV(mv->mv[0], s->signbias[mv->ref[0]] != s->signbias[ref]);
1266  }
1267  if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
1268  // BUG - libvpx has this condition regardless of whether
1269  // we used the first ref MV and pre-scaling
1270  AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
1271  RETURN_SCALE_MV(mv->mv[1], s->signbias[mv->ref[1]] != s->signbias[ref]);
1272  }
1273  }
1274 
1275  AV_ZERO32(pmv);
1276  clamp_mv(pmv, pmv, s);
1277 #undef INVALID_MV
1278 #undef RETURN_MV
1279 #undef RETURN_SCALE_MV
1280 }
1281 
1282 static av_always_inline int read_mv_component(VP9Context *s, int idx, int hp)
1283 {
1284  int bit, sign = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].sign);
1285  int n, c = vp8_rac_get_tree(&s->c, vp9_mv_class_tree,
1286  s->prob.p.mv_comp[idx].classes);
1287 
1288  s->counts.mv_comp[idx].sign[sign]++;
1289  s->counts.mv_comp[idx].classes[c]++;
1290  if (c) {
1291  int m;
1292 
1293  for (n = 0, m = 0; m < c; m++) {
1294  bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].bits[m]);
1295  n |= bit << m;
1296  s->counts.mv_comp[idx].bits[m][bit]++;
1297  }
1298  n <<= 3;
1299  bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree, s->prob.p.mv_comp[idx].fp);
1300  n |= bit << 1;
1301  s->counts.mv_comp[idx].fp[bit]++;
1302  if (hp) {
1303  bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].hp);
1304  s->counts.mv_comp[idx].hp[bit]++;
1305  n |= bit;
1306  } else {
1307  n |= 1;
1308  // bug in libvpx - we count for bw entropy purposes even if the
1309  // bit wasn't coded
1310  s->counts.mv_comp[idx].hp[1]++;
1311  }
1312  n += 8 << c;
1313  } else {
1314  n = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0);
1315  s->counts.mv_comp[idx].class0[n]++;
1316  bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree,
1317  s->prob.p.mv_comp[idx].class0_fp[n]);
1318  s->counts.mv_comp[idx].class0_fp[n][bit]++;
1319  n = (n << 3) | (bit << 1);
1320  if (hp) {
1321  bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0_hp);
1322  s->counts.mv_comp[idx].class0_hp[bit]++;
1323  n |= bit;
1324  } else {
1325  n |= 1;
1326  // bug in libvpx - we count for bw entropy purposes even if the
1327  // bit wasn't coded
1328  s->counts.mv_comp[idx].class0_hp[1]++;
1329  }
1330  }
1331 
1332  return sign ? -(n + 1) : (n + 1);
1333 }
1334 
1335 static void fill_mv(VP9Context *s,
1336  VP56mv *mv, int mode, int sb)
1337 {
1338  VP9Block *b = s->b;
1339 
1340  if (mode == ZEROMV) {
1341  AV_ZERO64(mv);
1342  } else {
1343  int hp;
1344 
1345  // FIXME cache this value and reuse for other subblocks
1346  find_ref_mvs(s, &mv[0], b->ref[0], 0, mode == NEARMV,
1347  mode == NEWMV ? -1 : sb);
1348  // FIXME maybe move this code into find_ref_mvs()
1349  if ((mode == NEWMV || sb == -1) &&
1350  !(hp = s->highprecisionmvs && abs(mv[0].x) < 64 && abs(mv[0].y) < 64)) {
1351  if (mv[0].y & 1) {
1352  if (mv[0].y < 0)
1353  mv[0].y++;
1354  else
1355  mv[0].y--;
1356  }
1357  if (mv[0].x & 1) {
1358  if (mv[0].x < 0)
1359  mv[0].x++;
1360  else
1361  mv[0].x--;
1362  }
1363  }
1364  if (mode == NEWMV) {
1366  s->prob.p.mv_joint);
1367 
1368  s->counts.mv_joint[j]++;
1369  if (j >= MV_JOINT_V)
1370  mv[0].y += read_mv_component(s, 0, hp);
1371  if (j & 1)
1372  mv[0].x += read_mv_component(s, 1, hp);
1373  }
1374 
1375  if (b->comp) {
1376  // FIXME cache this value and reuse for other subblocks
1377  find_ref_mvs(s, &mv[1], b->ref[1], 1, mode == NEARMV,
1378  mode == NEWMV ? -1 : sb);
1379  if ((mode == NEWMV || sb == -1) &&
1380  !(hp = s->highprecisionmvs && abs(mv[1].x) < 64 && abs(mv[1].y) < 64)) {
1381  if (mv[1].y & 1) {
1382  if (mv[1].y < 0)
1383  mv[1].y++;
1384  else
1385  mv[1].y--;
1386  }
1387  if (mv[1].x & 1) {
1388  if (mv[1].x < 0)
1389  mv[1].x++;
1390  else
1391  mv[1].x--;
1392  }
1393  }
1394  if (mode == NEWMV) {
1396  s->prob.p.mv_joint);
1397 
1398  s->counts.mv_joint[j]++;
1399  if (j >= MV_JOINT_V)
1400  mv[1].y += read_mv_component(s, 0, hp);
1401  if (j & 1)
1402  mv[1].x += read_mv_component(s, 1, hp);
1403  }
1404  }
1405  }
1406 }
1407 
1408 static av_always_inline void setctx_2d(uint8_t *ptr, int w, int h,
1409  ptrdiff_t stride, int v)
1410 {
1411  switch (w) {
1412  case 1:
1413  do {
1414  *ptr = v;
1415  ptr += stride;
1416  } while (--h);
1417  break;
1418  case 2: {
1419  int v16 = v * 0x0101;
1420  do {
1421  AV_WN16A(ptr, v16);
1422  ptr += stride;
1423  } while (--h);
1424  break;
1425  }
1426  case 4: {
1427  uint32_t v32 = v * 0x01010101;
1428  do {
1429  AV_WN32A(ptr, v32);
1430  ptr += stride;
1431  } while (--h);
1432  break;
1433  }
1434  case 8: {
1435 #if HAVE_FAST_64BIT
1436  uint64_t v64 = v * 0x0101010101010101ULL;
1437  do {
1438  AV_WN64A(ptr, v64);
1439  ptr += stride;
1440  } while (--h);
1441 #else
1442  uint32_t v32 = v * 0x01010101;
1443  do {
1444  AV_WN32A(ptr, v32);
1445  AV_WN32A(ptr + 4, v32);
1446  ptr += stride;
1447  } while (--h);
1448 #endif
1449  break;
1450  }
1451  }
1452 }
1453 
1454 static void decode_mode(AVCodecContext *ctx)
1455 {
1456  static const uint8_t left_ctx[N_BS_SIZES] = {
1457  0x0, 0x8, 0x0, 0x8, 0xc, 0x8, 0xc, 0xe, 0xc, 0xe, 0xf, 0xe, 0xf
1458  };
1459  static const uint8_t above_ctx[N_BS_SIZES] = {
1460  0x0, 0x0, 0x8, 0x8, 0x8, 0xc, 0xc, 0xc, 0xe, 0xe, 0xe, 0xf, 0xf
1461  };
1462  static const uint8_t max_tx_for_bl_bp[N_BS_SIZES] = {
1464  TX_16X16, TX_8X8, TX_8X8, TX_8X8, TX_4X4, TX_4X4, TX_4X4
1465  };
1466  VP9Context *s = ctx->priv_data;
1467  VP9Block *b = s->b;
1468  int row = s->row, col = s->col, row7 = s->row7;
1469  enum TxfmMode max_tx = max_tx_for_bl_bp[b->bs];
1470  int bw4 = bwh_tab[1][b->bs][0], w4 = FFMIN(s->cols - col, bw4);
1471  int bh4 = bwh_tab[1][b->bs][1], h4 = FFMIN(s->rows - row, bh4), y;
1472  int have_a = row > 0, have_l = col > s->tiling.tile_col_start;
1473  int vref, filter_id;
1474 
1475  if (!s->segmentation.enabled) {
1476  b->seg_id = 0;
1477  } else if (s->keyframe || s->intraonly) {
1479  } else if (!s->segmentation.update_map ||
1480  (s->segmentation.temporal &&
1482  s->prob.segpred[s->above_segpred_ctx[col] +
1483  s->left_segpred_ctx[row7]]))) {
1484  if (!s->errorres && !s->segmentation.ignore_refmap) {
1485  int pred = 8, x;
1487 
1490  for (y = 0; y < h4; y++) {
1491  int idx_base = (y + row) * 8 * s->sb_cols + col;
1492  for (x = 0; x < w4; x++)
1493  pred = FFMIN(pred, refsegmap[idx_base + x]);
1494  }
1495  av_assert1(pred < 8);
1496  b->seg_id = pred;
1497  } else {
1498  b->seg_id = 0;
1499  }
1500 
1501  memset(&s->above_segpred_ctx[col], 1, w4);
1502  memset(&s->left_segpred_ctx[row7], 1, h4);
1503  } else {
1505  s->prob.seg);
1506 
1507  memset(&s->above_segpred_ctx[col], 0, w4);
1508  memset(&s->left_segpred_ctx[row7], 0, h4);
1509  }
1510  if (s->segmentation.enabled &&
1511  (s->segmentation.update_map || s->keyframe || s->intraonly)) {
1512  setctx_2d(&s->frames[CUR_FRAME].segmentation_map[row * 8 * s->sb_cols + col],
1513  bw4, bh4, 8 * s->sb_cols, b->seg_id);
1514  }
1515 
1516  b->skip = s->segmentation.enabled &&
1517  s->segmentation.feat[b->seg_id].skip_enabled;
1518  if (!b->skip) {
1519  int c = s->left_skip_ctx[row7] + s->above_skip_ctx[col];
1520  b->skip = vp56_rac_get_prob(&s->c, s->prob.p.skip[c]);
1521  s->counts.skip[c][b->skip]++;
1522  }
1523 
1524  if (s->keyframe || s->intraonly) {
1525  b->intra = 1;
1526  } else if (s->segmentation.feat[b->seg_id].ref_enabled) {
1527  b->intra = !s->segmentation.feat[b->seg_id].ref_val;
1528  } else {
1529  int c, bit;
1530 
1531  if (have_a && have_l) {
1532  c = s->above_intra_ctx[col] + s->left_intra_ctx[row7];
1533  c += (c == 2);
1534  } else {
1535  c = have_a ? 2 * s->above_intra_ctx[col] :
1536  have_l ? 2 * s->left_intra_ctx[row7] : 0;
1537  }
1538  bit = vp56_rac_get_prob(&s->c, s->prob.p.intra[c]);
1539  s->counts.intra[c][bit]++;
1540  b->intra = !bit;
1541  }
1542 
1543  if ((b->intra || !b->skip) && s->txfmmode == TX_SWITCHABLE) {
1544  int c;
1545  if (have_a) {
1546  if (have_l) {
1547  c = (s->above_skip_ctx[col] ? max_tx :
1548  s->above_txfm_ctx[col]) +
1549  (s->left_skip_ctx[row7] ? max_tx :
1550  s->left_txfm_ctx[row7]) > max_tx;
1551  } else {
1552  c = s->above_skip_ctx[col] ? 1 :
1553  (s->above_txfm_ctx[col] * 2 > max_tx);
1554  }
1555  } else if (have_l) {
1556  c = s->left_skip_ctx[row7] ? 1 :
1557  (s->left_txfm_ctx[row7] * 2 > max_tx);
1558  } else {
1559  c = 1;
1560  }
1561  switch (max_tx) {
1562  case TX_32X32:
1563  b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][0]);
1564  if (b->tx) {
1565  b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][1]);
1566  if (b->tx == 2)
1567  b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][2]);
1568  }
1569  s->counts.tx32p[c][b->tx]++;
1570  break;
1571  case TX_16X16:
1572  b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][0]);
1573  if (b->tx)
1574  b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][1]);
1575  s->counts.tx16p[c][b->tx]++;
1576  break;
1577  case TX_8X8:
1578  b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx8p[c]);
1579  s->counts.tx8p[c][b->tx]++;
1580  break;
1581  case TX_4X4:
1582  b->tx = TX_4X4;
1583  break;
1584  }
1585  } else {
1586  b->tx = FFMIN(max_tx, s->txfmmode);
1587  }
1588 
1589  if (s->keyframe || s->intraonly) {
1590  uint8_t *a = &s->above_mode_ctx[col * 2];
1591  uint8_t *l = &s->left_mode_ctx[(row7) << 1];
1592 
1593  b->comp = 0;
1594  if (b->bs > BS_8x8) {
1595  // FIXME the memory storage intermediates here aren't really
1596  // necessary, they're just there to make the code slightly
1597  // simpler for now
1598  b->mode[0] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1599  vp9_default_kf_ymode_probs[a[0]][l[0]]);
1600  if (b->bs != BS_8x4) {
1602  vp9_default_kf_ymode_probs[a[1]][b->mode[0]]);
1603  l[0] = a[1] = b->mode[1];
1604  } else {
1605  l[0] = a[1] = b->mode[1] = b->mode[0];
1606  }
1607  if (b->bs != BS_4x8) {
1608  b->mode[2] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1609  vp9_default_kf_ymode_probs[a[0]][l[1]]);
1610  if (b->bs != BS_8x4) {
1612  vp9_default_kf_ymode_probs[a[1]][b->mode[2]]);
1613  l[1] = a[1] = b->mode[3];
1614  } else {
1615  l[1] = a[1] = b->mode[3] = b->mode[2];
1616  }
1617  } else {
1618  b->mode[2] = b->mode[0];
1619  l[1] = a[1] = b->mode[3] = b->mode[1];
1620  }
1621  } else {
1623  vp9_default_kf_ymode_probs[*a][*l]);
1624  b->mode[3] = b->mode[2] = b->mode[1] = b->mode[0];
1625  // FIXME this can probably be optimized
1626  memset(a, b->mode[0], bwh_tab[0][b->bs][0]);
1627  memset(l, b->mode[0], bwh_tab[0][b->bs][1]);
1628  }
1631  } else if (b->intra) {
1632  b->comp = 0;
1633  if (b->bs > BS_8x8) {
1635  s->prob.p.y_mode[0]);
1636  s->counts.y_mode[0][b->mode[0]]++;
1637  if (b->bs != BS_8x4) {
1639  s->prob.p.y_mode[0]);
1640  s->counts.y_mode[0][b->mode[1]]++;
1641  } else {
1642  b->mode[1] = b->mode[0];
1643  }
1644  if (b->bs != BS_4x8) {
1646  s->prob.p.y_mode[0]);
1647  s->counts.y_mode[0][b->mode[2]]++;
1648  if (b->bs != BS_8x4) {
1650  s->prob.p.y_mode[0]);
1651  s->counts.y_mode[0][b->mode[3]]++;
1652  } else {
1653  b->mode[3] = b->mode[2];
1654  }
1655  } else {
1656  b->mode[2] = b->mode[0];
1657  b->mode[3] = b->mode[1];
1658  }
1659  } else {
1660  static const uint8_t size_group[10] = {
1661  3, 3, 3, 3, 2, 2, 2, 1, 1, 1
1662  };
1663  int sz = size_group[b->bs];
1664 
1666  s->prob.p.y_mode[sz]);
1667  b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1668  s->counts.y_mode[sz][b->mode[3]]++;
1669  }
1671  s->prob.p.uv_mode[b->mode[3]]);
1672  s->counts.uv_mode[b->mode[3]][b->uvmode]++;
1673  } else {
1674  static const uint8_t inter_mode_ctx_lut[14][14] = {
1675  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1676  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1677  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1678  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1679  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1680  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1681  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1682  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1683  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1684  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1685  { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1686  { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1687  { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 0, 3 },
1688  { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 4 },
1689  };
1690 
1691  if (s->segmentation.feat[b->seg_id].ref_enabled) {
1692  av_assert2(s->segmentation.feat[b->seg_id].ref_val != 0);
1693  b->comp = 0;
1694  b->ref[0] = s->segmentation.feat[b->seg_id].ref_val - 1;
1695  } else {
1696  // read comp_pred flag
1697  if (s->comppredmode != PRED_SWITCHABLE) {
1698  b->comp = s->comppredmode == PRED_COMPREF;
1699  } else {
1700  int c;
1701 
1702  // FIXME add intra as ref=0xff (or -1) to make these easier?
1703  if (have_a) {
1704  if (have_l) {
1705  if (s->above_comp_ctx[col] && s->left_comp_ctx[row7]) {
1706  c = 4;
1707  } else if (s->above_comp_ctx[col]) {
1708  c = 2 + (s->left_intra_ctx[row7] ||
1709  s->left_ref_ctx[row7] == s->fixcompref);
1710  } else if (s->left_comp_ctx[row7]) {
1711  c = 2 + (s->above_intra_ctx[col] ||
1712  s->above_ref_ctx[col] == s->fixcompref);
1713  } else {
1714  c = (!s->above_intra_ctx[col] &&
1715  s->above_ref_ctx[col] == s->fixcompref) ^
1716  (!s->left_intra_ctx[row7] &&
1717  s->left_ref_ctx[row & 7] == s->fixcompref);
1718  }
1719  } else {
1720  c = s->above_comp_ctx[col] ? 3 :
1721  (!s->above_intra_ctx[col] && s->above_ref_ctx[col] == s->fixcompref);
1722  }
1723  } else if (have_l) {
1724  c = s->left_comp_ctx[row7] ? 3 :
1725  (!s->left_intra_ctx[row7] && s->left_ref_ctx[row7] == s->fixcompref);
1726  } else {
1727  c = 1;
1728  }
1729  b->comp = vp56_rac_get_prob(&s->c, s->prob.p.comp[c]);
1730  s->counts.comp[c][b->comp]++;
1731  }
1732 
1733  // read actual references
1734  // FIXME probably cache a few variables here to prevent repetitive
1735  // memory accesses below
1736  if (b->comp) /* two references */ {
1737  int fix_idx = s->signbias[s->fixcompref], var_idx = !fix_idx, c, bit;
1738 
1739  b->ref[fix_idx] = s->fixcompref;
1740  // FIXME can this codeblob be replaced by some sort of LUT?
1741  if (have_a) {
1742  if (have_l) {
1743  if (s->above_intra_ctx[col]) {
1744  if (s->left_intra_ctx[row7]) {
1745  c = 2;
1746  } else {
1747  c = 1 + 2 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1748  }
1749  } else if (s->left_intra_ctx[row7]) {
1750  c = 1 + 2 * (s->above_ref_ctx[col] != s->varcompref[1]);
1751  } else {
1752  int refl = s->left_ref_ctx[row7], refa = s->above_ref_ctx[col];
1753 
1754  if (refl == refa && refa == s->varcompref[1]) {
1755  c = 0;
1756  } else if (!s->left_comp_ctx[row7] && !s->above_comp_ctx[col]) {
1757  if ((refa == s->fixcompref && refl == s->varcompref[0]) ||
1758  (refl == s->fixcompref && refa == s->varcompref[0])) {
1759  c = 4;
1760  } else {
1761  c = (refa == refl) ? 3 : 1;
1762  }
1763  } else if (!s->left_comp_ctx[row7]) {
1764  if (refa == s->varcompref[1] && refl != s->varcompref[1]) {
1765  c = 1;
1766  } else {
1767  c = (refl == s->varcompref[1] &&
1768  refa != s->varcompref[1]) ? 2 : 4;
1769  }
1770  } else if (!s->above_comp_ctx[col]) {
1771  if (refl == s->varcompref[1] && refa != s->varcompref[1]) {
1772  c = 1;
1773  } else {
1774  c = (refa == s->varcompref[1] &&
1775  refl != s->varcompref[1]) ? 2 : 4;
1776  }
1777  } else {
1778  c = (refl == refa) ? 4 : 2;
1779  }
1780  }
1781  } else {
1782  if (s->above_intra_ctx[col]) {
1783  c = 2;
1784  } else if (s->above_comp_ctx[col]) {
1785  c = 4 * (s->above_ref_ctx[col] != s->varcompref[1]);
1786  } else {
1787  c = 3 * (s->above_ref_ctx[col] != s->varcompref[1]);
1788  }
1789  }
1790  } else if (have_l) {
1791  if (s->left_intra_ctx[row7]) {
1792  c = 2;
1793  } else if (s->left_comp_ctx[row7]) {
1794  c = 4 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1795  } else {
1796  c = 3 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1797  }
1798  } else {
1799  c = 2;
1800  }
1801  bit = vp56_rac_get_prob(&s->c, s->prob.p.comp_ref[c]);
1802  b->ref[var_idx] = s->varcompref[bit];
1803  s->counts.comp_ref[c][bit]++;
1804  } else /* single reference */ {
1805  int bit, c;
1806 
1807  if (have_a && !s->above_intra_ctx[col]) {
1808  if (have_l && !s->left_intra_ctx[row7]) {
1809  if (s->left_comp_ctx[row7]) {
1810  if (s->above_comp_ctx[col]) {
1811  c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7] ||
1812  !s->above_ref_ctx[col]);
1813  } else {
1814  c = (3 * !s->above_ref_ctx[col]) +
1815  (!s->fixcompref || !s->left_ref_ctx[row7]);
1816  }
1817  } else if (s->above_comp_ctx[col]) {
1818  c = (3 * !s->left_ref_ctx[row7]) +
1819  (!s->fixcompref || !s->above_ref_ctx[col]);
1820  } else {
1821  c = 2 * !s->left_ref_ctx[row7] + 2 * !s->above_ref_ctx[col];
1822  }
1823  } else if (s->above_intra_ctx[col]) {
1824  c = 2;
1825  } else if (s->above_comp_ctx[col]) {
1826  c = 1 + (!s->fixcompref || !s->above_ref_ctx[col]);
1827  } else {
1828  c = 4 * (!s->above_ref_ctx[col]);
1829  }
1830  } else if (have_l && !s->left_intra_ctx[row7]) {
1831  if (s->left_intra_ctx[row7]) {
1832  c = 2;
1833  } else if (s->left_comp_ctx[row7]) {
1834  c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7]);
1835  } else {
1836  c = 4 * (!s->left_ref_ctx[row7]);
1837  }
1838  } else {
1839  c = 2;
1840  }
1841  bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][0]);
1842  s->counts.single_ref[c][0][bit]++;
1843  if (!bit) {
1844  b->ref[0] = 0;
1845  } else {
1846  // FIXME can this codeblob be replaced by some sort of LUT?
1847  if (have_a) {
1848  if (have_l) {
1849  if (s->left_intra_ctx[row7]) {
1850  if (s->above_intra_ctx[col]) {
1851  c = 2;
1852  } else if (s->above_comp_ctx[col]) {
1853  c = 1 + 2 * (s->fixcompref == 1 ||
1854  s->above_ref_ctx[col] == 1);
1855  } else if (!s->above_ref_ctx[col]) {
1856  c = 3;
1857  } else {
1858  c = 4 * (s->above_ref_ctx[col] == 1);
1859  }
1860  } else if (s->above_intra_ctx[col]) {
1861  if (s->left_intra_ctx[row7]) {
1862  c = 2;
1863  } else if (s->left_comp_ctx[row7]) {
1864  c = 1 + 2 * (s->fixcompref == 1 ||
1865  s->left_ref_ctx[row7] == 1);
1866  } else if (!s->left_ref_ctx[row7]) {
1867  c = 3;
1868  } else {
1869  c = 4 * (s->left_ref_ctx[row7] == 1);
1870  }
1871  } else if (s->above_comp_ctx[col]) {
1872  if (s->left_comp_ctx[row7]) {
1873  if (s->left_ref_ctx[row7] == s->above_ref_ctx[col]) {
1874  c = 3 * (s->fixcompref == 1 ||
1875  s->left_ref_ctx[row7] == 1);
1876  } else {
1877  c = 2;
1878  }
1879  } else if (!s->left_ref_ctx[row7]) {
1880  c = 1 + 2 * (s->fixcompref == 1 ||
1881  s->above_ref_ctx[col] == 1);
1882  } else {
1883  c = 3 * (s->left_ref_ctx[row7] == 1) +
1884  (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
1885  }
1886  } else if (s->left_comp_ctx[row7]) {
1887  if (!s->above_ref_ctx[col]) {
1888  c = 1 + 2 * (s->fixcompref == 1 ||
1889  s->left_ref_ctx[row7] == 1);
1890  } else {
1891  c = 3 * (s->above_ref_ctx[col] == 1) +
1892  (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1893  }
1894  } else if (!s->above_ref_ctx[col]) {
1895  if (!s->left_ref_ctx[row7]) {
1896  c = 3;
1897  } else {
1898  c = 4 * (s->left_ref_ctx[row7] == 1);
1899  }
1900  } else if (!s->left_ref_ctx[row7]) {
1901  c = 4 * (s->above_ref_ctx[col] == 1);
1902  } else {
1903  c = 2 * (s->left_ref_ctx[row7] == 1) +
1904  2 * (s->above_ref_ctx[col] == 1);
1905  }
1906  } else {
1907  if (s->above_intra_ctx[col] ||
1908  (!s->above_comp_ctx[col] && !s->above_ref_ctx[col])) {
1909  c = 2;
1910  } else if (s->above_comp_ctx[col]) {
1911  c = 3 * (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
1912  } else {
1913  c = 4 * (s->above_ref_ctx[col] == 1);
1914  }
1915  }
1916  } else if (have_l) {
1917  if (s->left_intra_ctx[row7] ||
1918  (!s->left_comp_ctx[row7] && !s->left_ref_ctx[row7])) {
1919  c = 2;
1920  } else if (s->left_comp_ctx[row7]) {
1921  c = 3 * (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1922  } else {
1923  c = 4 * (s->left_ref_ctx[row7] == 1);
1924  }
1925  } else {
1926  c = 2;
1927  }
1928  bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][1]);
1929  s->counts.single_ref[c][1][bit]++;
1930  b->ref[0] = 1 + bit;
1931  }
1932  }
1933  }
1934 
1935  if (b->bs <= BS_8x8) {
1936  if (s->segmentation.feat[b->seg_id].skip_enabled) {
1937  b->mode[0] = b->mode[1] = b->mode[2] = b->mode[3] = ZEROMV;
1938  } else {
1939  static const uint8_t off[10] = {
1940  3, 0, 0, 1, 0, 0, 0, 0, 0, 0
1941  };
1942 
1943  // FIXME this needs to use the LUT tables from find_ref_mvs
1944  // because not all are -1,0/0,-1
1945  int c = inter_mode_ctx_lut[s->above_mode_ctx[col + off[b->bs]]]
1946  [s->left_mode_ctx[row7 + off[b->bs]]];
1947 
1949  s->prob.p.mv_mode[c]);
1950  b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1951  s->counts.mv_mode[c][b->mode[0] - 10]++;
1952  }
1953  }
1954 
1955  if (s->filtermode == FILTER_SWITCHABLE) {
1956  int c;
1957 
1958  if (have_a && s->above_mode_ctx[col] >= NEARESTMV) {
1959  if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1960  c = s->above_filter_ctx[col] == s->left_filter_ctx[row7] ?
1961  s->left_filter_ctx[row7] : 3;
1962  } else {
1963  c = s->above_filter_ctx[col];
1964  }
1965  } else if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1966  c = s->left_filter_ctx[row7];
1967  } else {
1968  c = 3;
1969  }
1970 
1971  filter_id = vp8_rac_get_tree(&s->c, vp9_filter_tree,
1972  s->prob.p.filter[c]);
1973  s->counts.filter[c][filter_id]++;
1974  b->filter = vp9_filter_lut[filter_id];
1975  } else {
1976  b->filter = s->filtermode;
1977  }
1978 
1979  if (b->bs > BS_8x8) {
1980  int c = inter_mode_ctx_lut[s->above_mode_ctx[col]][s->left_mode_ctx[row7]];
1981 
1983  s->prob.p.mv_mode[c]);
1984  s->counts.mv_mode[c][b->mode[0] - 10]++;
1985  fill_mv(s, b->mv[0], b->mode[0], 0);
1986 
1987  if (b->bs != BS_8x4) {
1989  s->prob.p.mv_mode[c]);
1990  s->counts.mv_mode[c][b->mode[1] - 10]++;
1991  fill_mv(s, b->mv[1], b->mode[1], 1);
1992  } else {
1993  b->mode[1] = b->mode[0];
1994  AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
1995  AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
1996  }
1997 
1998  if (b->bs != BS_4x8) {
2000  s->prob.p.mv_mode[c]);
2001  s->counts.mv_mode[c][b->mode[2] - 10]++;
2002  fill_mv(s, b->mv[2], b->mode[2], 2);
2003 
2004  if (b->bs != BS_8x4) {
2006  s->prob.p.mv_mode[c]);
2007  s->counts.mv_mode[c][b->mode[3] - 10]++;
2008  fill_mv(s, b->mv[3], b->mode[3], 3);
2009  } else {
2010  b->mode[3] = b->mode[2];
2011  AV_COPY32(&b->mv[3][0], &b->mv[2][0]);
2012  AV_COPY32(&b->mv[3][1], &b->mv[2][1]);
2013  }
2014  } else {
2015  b->mode[2] = b->mode[0];
2016  AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
2017  AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
2018  b->mode[3] = b->mode[1];
2019  AV_COPY32(&b->mv[3][0], &b->mv[1][0]);
2020  AV_COPY32(&b->mv[3][1], &b->mv[1][1]);
2021  }
2022  } else {
2023  fill_mv(s, b->mv[0], b->mode[0], -1);
2024  AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
2025  AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
2026  AV_COPY32(&b->mv[3][0], &b->mv[0][0]);
2027  AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
2028  AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
2029  AV_COPY32(&b->mv[3][1], &b->mv[0][1]);
2030  }
2031 
2032  vref = b->ref[b->comp ? s->signbias[s->varcompref[0]] : 0];
2033  }
2034 
2035 #if HAVE_FAST_64BIT
2036 #define SPLAT_CTX(var, val, n) \
2037  switch (n) { \
2038  case 1: var = val; break; \
2039  case 2: AV_WN16A(&var, val * 0x0101); break; \
2040  case 4: AV_WN32A(&var, val * 0x01010101); break; \
2041  case 8: AV_WN64A(&var, val * 0x0101010101010101ULL); break; \
2042  case 16: { \
2043  uint64_t v64 = val * 0x0101010101010101ULL; \
2044  AV_WN64A( &var, v64); \
2045  AV_WN64A(&((uint8_t *) &var)[8], v64); \
2046  break; \
2047  } \
2048  }
2049 #else
2050 #define SPLAT_CTX(var, val, n) \
2051  switch (n) { \
2052  case 1: var = val; break; \
2053  case 2: AV_WN16A(&var, val * 0x0101); break; \
2054  case 4: AV_WN32A(&var, val * 0x01010101); break; \
2055  case 8: { \
2056  uint32_t v32 = val * 0x01010101; \
2057  AV_WN32A( &var, v32); \
2058  AV_WN32A(&((uint8_t *) &var)[4], v32); \
2059  break; \
2060  } \
2061  case 16: { \
2062  uint32_t v32 = val * 0x01010101; \
2063  AV_WN32A( &var, v32); \
2064  AV_WN32A(&((uint8_t *) &var)[4], v32); \
2065  AV_WN32A(&((uint8_t *) &var)[8], v32); \
2066  AV_WN32A(&((uint8_t *) &var)[12], v32); \
2067  break; \
2068  } \
2069  }
2070 #endif
2071 
2072  switch (bwh_tab[1][b->bs][0]) {
2073 #define SET_CTXS(dir, off, n) \
2074  do { \
2075  SPLAT_CTX(s->dir##_skip_ctx[off], b->skip, n); \
2076  SPLAT_CTX(s->dir##_txfm_ctx[off], b->tx, n); \
2077  SPLAT_CTX(s->dir##_partition_ctx[off], dir##_ctx[b->bs], n); \
2078  if (!s->keyframe && !s->intraonly) { \
2079  SPLAT_CTX(s->dir##_intra_ctx[off], b->intra, n); \
2080  SPLAT_CTX(s->dir##_comp_ctx[off], b->comp, n); \
2081  SPLAT_CTX(s->dir##_mode_ctx[off], b->mode[3], n); \
2082  if (!b->intra) { \
2083  SPLAT_CTX(s->dir##_ref_ctx[off], vref, n); \
2084  if (s->filtermode == FILTER_SWITCHABLE) { \
2085  SPLAT_CTX(s->dir##_filter_ctx[off], filter_id, n); \
2086  } \
2087  } \
2088  } \
2089  } while (0)
2090  case 1: SET_CTXS(above, col, 1); break;
2091  case 2: SET_CTXS(above, col, 2); break;
2092  case 4: SET_CTXS(above, col, 4); break;
2093  case 8: SET_CTXS(above, col, 8); break;
2094  }
2095  switch (bwh_tab[1][b->bs][1]) {
2096  case 1: SET_CTXS(left, row7, 1); break;
2097  case 2: SET_CTXS(left, row7, 2); break;
2098  case 4: SET_CTXS(left, row7, 4); break;
2099  case 8: SET_CTXS(left, row7, 8); break;
2100  }
2101 #undef SPLAT_CTX
2102 #undef SET_CTXS
2103 
2104  if (!s->keyframe && !s->intraonly) {
2105  if (b->bs > BS_8x8) {
2106  int mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
2107 
2108  AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][0], &b->mv[1][0]);
2109  AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][1], &b->mv[1][1]);
2110  AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][0], mv0);
2111  AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][1], mv1);
2112  AV_COPY32(&s->above_mv_ctx[col * 2 + 0][0], &b->mv[2][0]);
2113  AV_COPY32(&s->above_mv_ctx[col * 2 + 0][1], &b->mv[2][1]);
2114  AV_WN32A(&s->above_mv_ctx[col * 2 + 1][0], mv0);
2115  AV_WN32A(&s->above_mv_ctx[col * 2 + 1][1], mv1);
2116  } else {
2117  int n, mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
2118 
2119  for (n = 0; n < w4 * 2; n++) {
2120  AV_WN32A(&s->above_mv_ctx[col * 2 + n][0], mv0);
2121  AV_WN32A(&s->above_mv_ctx[col * 2 + n][1], mv1);
2122  }
2123  for (n = 0; n < h4 * 2; n++) {
2124  AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][0], mv0);
2125  AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][1], mv1);
2126  }
2127  }
2128  }
2129 
2130  // FIXME kinda ugly
2131  for (y = 0; y < h4; y++) {
2132  int x, o = (row + y) * s->sb_cols * 8 + col;
2133  struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[o];
2134 
2135  if (b->intra) {
2136  for (x = 0; x < w4; x++) {
2137  mv[x].ref[0] =
2138  mv[x].ref[1] = -1;
2139  }
2140  } else if (b->comp) {
2141  for (x = 0; x < w4; x++) {
2142  mv[x].ref[0] = b->ref[0];
2143  mv[x].ref[1] = b->ref[1];
2144  AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
2145  AV_COPY32(&mv[x].mv[1], &b->mv[3][1]);
2146  }
2147  } else {
2148  for (x = 0; x < w4; x++) {
2149  mv[x].ref[0] = b->ref[0];
2150  mv[x].ref[1] = -1;
2151  AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
2152  }
2153  }
2154  }
2155 }
2156 
2157 // FIXME merge cnt/eob arguments?
2158 static av_always_inline int
2159 decode_coeffs_b_generic(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
2160  int is_tx32x32, int is8bitsperpixel, int bpp, unsigned (*cnt)[6][3],
2161  unsigned (*eob)[6][2], uint8_t (*p)[6][11],
2162  int nnz, const int16_t *scan, const int16_t (*nb)[2],
2163  const int16_t *band_counts, const int16_t *qmul)
2164 {
2165  int i = 0, band = 0, band_left = band_counts[band];
2166  uint8_t *tp = p[0][nnz];
2167  uint8_t cache[1024];
2168 
2169  do {
2170  int val, rc;
2171 
2172  val = vp56_rac_get_prob_branchy(c, tp[0]); // eob
2173  eob[band][nnz][val]++;
2174  if (!val)
2175  break;
2176 
2177  skip_eob:
2178  if (!vp56_rac_get_prob_branchy(c, tp[1])) { // zero
2179  cnt[band][nnz][0]++;
2180  if (!--band_left)
2181  band_left = band_counts[++band];
2182  cache[scan[i]] = 0;
2183  nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2184  tp = p[band][nnz];
2185  if (++i == n_coeffs)
2186  break; //invalid input; blocks should end with EOB
2187  goto skip_eob;
2188  }
2189 
2190  rc = scan[i];
2191  if (!vp56_rac_get_prob_branchy(c, tp[2])) { // one
2192  cnt[band][nnz][1]++;
2193  val = 1;
2194  cache[rc] = 1;
2195  } else {
2196  // fill in p[3-10] (model fill) - only once per frame for each pos
2197  if (!tp[3])
2198  memcpy(&tp[3], vp9_model_pareto8[tp[2]], 8);
2199 
2200  cnt[band][nnz][2]++;
2201  if (!vp56_rac_get_prob_branchy(c, tp[3])) { // 2, 3, 4
2202  if (!vp56_rac_get_prob_branchy(c, tp[4])) {
2203  cache[rc] = val = 2;
2204  } else {
2205  val = 3 + vp56_rac_get_prob(c, tp[5]);
2206  cache[rc] = 3;
2207  }
2208  } else if (!vp56_rac_get_prob_branchy(c, tp[6])) { // cat1/2
2209  cache[rc] = 4;
2210  if (!vp56_rac_get_prob_branchy(c, tp[7])) {
2211  val = 5 + vp56_rac_get_prob(c, 159);
2212  } else {
2213  val = 7 + (vp56_rac_get_prob(c, 165) << 1);
2214  val += vp56_rac_get_prob(c, 145);
2215  }
2216  } else { // cat 3-6
2217  cache[rc] = 5;
2218  if (!vp56_rac_get_prob_branchy(c, tp[8])) {
2219  if (!vp56_rac_get_prob_branchy(c, tp[9])) {
2220  val = 11 + (vp56_rac_get_prob(c, 173) << 2);
2221  val += (vp56_rac_get_prob(c, 148) << 1);
2222  val += vp56_rac_get_prob(c, 140);
2223  } else {
2224  val = 19 + (vp56_rac_get_prob(c, 176) << 3);
2225  val += (vp56_rac_get_prob(c, 155) << 2);
2226  val += (vp56_rac_get_prob(c, 140) << 1);
2227  val += vp56_rac_get_prob(c, 135);
2228  }
2229  } else if (!vp56_rac_get_prob_branchy(c, tp[10])) {
2230  val = 35 + (vp56_rac_get_prob(c, 180) << 4);
2231  val += (vp56_rac_get_prob(c, 157) << 3);
2232  val += (vp56_rac_get_prob(c, 141) << 2);
2233  val += (vp56_rac_get_prob(c, 134) << 1);
2234  val += vp56_rac_get_prob(c, 130);
2235  } else {
2236  val = 67;
2237  if (!is8bitsperpixel) {
2238  if (bpp == 12) {
2239  val += vp56_rac_get_prob(c, 255) << 17;
2240  val += vp56_rac_get_prob(c, 255) << 16;
2241  }
2242  val += (vp56_rac_get_prob(c, 255) << 15);
2243  val += (vp56_rac_get_prob(c, 255) << 14);
2244  }
2245  val += (vp56_rac_get_prob(c, 254) << 13);
2246  val += (vp56_rac_get_prob(c, 254) << 12);
2247  val += (vp56_rac_get_prob(c, 254) << 11);
2248  val += (vp56_rac_get_prob(c, 252) << 10);
2249  val += (vp56_rac_get_prob(c, 249) << 9);
2250  val += (vp56_rac_get_prob(c, 243) << 8);
2251  val += (vp56_rac_get_prob(c, 230) << 7);
2252  val += (vp56_rac_get_prob(c, 196) << 6);
2253  val += (vp56_rac_get_prob(c, 177) << 5);
2254  val += (vp56_rac_get_prob(c, 153) << 4);
2255  val += (vp56_rac_get_prob(c, 140) << 3);
2256  val += (vp56_rac_get_prob(c, 133) << 2);
2257  val += (vp56_rac_get_prob(c, 130) << 1);
2258  val += vp56_rac_get_prob(c, 129);
2259  }
2260  }
2261  }
2262 #define STORE_COEF(c, i, v) do { \
2263  if (is8bitsperpixel) { \
2264  c[i] = v; \
2265  } else { \
2266  AV_WN32A(&c[i * 2], v); \
2267  } \
2268 } while (0)
2269  if (!--band_left)
2270  band_left = band_counts[++band];
2271  if (is_tx32x32)
2272  STORE_COEF(coef, rc, ((vp8_rac_get(c) ? -val : val) * qmul[!!i]) / 2);
2273  else
2274  STORE_COEF(coef, rc, (vp8_rac_get(c) ? -val : val) * qmul[!!i]);
2275  nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2276  tp = p[band][nnz];
2277  } while (++i < n_coeffs);
2278 
2279  return i;
2280 }
2281 
2282 static int decode_coeffs_b_8bpp(VP9Context *s, int16_t *coef, int n_coeffs,
2283  unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2284  uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2285  const int16_t (*nb)[2], const int16_t *band_counts,
2286  const int16_t *qmul)
2287 {
2288  return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 0, 1, 8, cnt, eob, p,
2289  nnz, scan, nb, band_counts, qmul);
2290 }
2291 
2292 static int decode_coeffs_b32_8bpp(VP9Context *s, int16_t *coef, int n_coeffs,
2293  unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2294  uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2295  const int16_t (*nb)[2], const int16_t *band_counts,
2296  const int16_t *qmul)
2297 {
2298  return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 1, 1, 8, cnt, eob, p,
2299  nnz, scan, nb, band_counts, qmul);
2300 }
2301 
2302 static int decode_coeffs_b_16bpp(VP9Context *s, int16_t *coef, int n_coeffs,
2303  unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2304  uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2305  const int16_t (*nb)[2], const int16_t *band_counts,
2306  const int16_t *qmul)
2307 {
2308  return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 0, 0, s->bpp, cnt, eob, p,
2309  nnz, scan, nb, band_counts, qmul);
2310 }
2311 
2312 static int decode_coeffs_b32_16bpp(VP9Context *s, int16_t *coef, int n_coeffs,
2313  unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2314  uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2315  const int16_t (*nb)[2], const int16_t *band_counts,
2316  const int16_t *qmul)
2317 {
2318  return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 1, 0, s->bpp, cnt, eob, p,
2319  nnz, scan, nb, band_counts, qmul);
2320 }
2321 
2322 static av_always_inline int decode_coeffs(AVCodecContext *ctx, int is8bitsperpixel)
2323 {
2324  VP9Context *s = ctx->priv_data;
2325  VP9Block *b = s->b;
2326  int row = s->row, col = s->col;
2327  uint8_t (*p)[6][11] = s->prob.coef[b->tx][0 /* y */][!b->intra];
2328  unsigned (*c)[6][3] = s->counts.coef[b->tx][0 /* y */][!b->intra];
2329  unsigned (*e)[6][2] = s->counts.eob[b->tx][0 /* y */][!b->intra];
2330  int w4 = bwh_tab[1][b->bs][0] << 1, h4 = bwh_tab[1][b->bs][1] << 1;
2331  int end_x = FFMIN(2 * (s->cols - col), w4);
2332  int end_y = FFMIN(2 * (s->rows - row), h4);
2333  int n, pl, x, y, res;
2334  int16_t (*qmul)[2] = s->segmentation.feat[b->seg_id].qmul;
2335  int tx = 4 * s->lossless + b->tx;
2336  const int16_t * const *yscans = vp9_scans[tx];
2337  const int16_t (* const *ynbs)[2] = vp9_scans_nb[tx];
2338  const int16_t *uvscan = vp9_scans[b->uvtx][DCT_DCT];
2339  const int16_t (*uvnb)[2] = vp9_scans_nb[b->uvtx][DCT_DCT];
2340  uint8_t *a = &s->above_y_nnz_ctx[col * 2];
2341  uint8_t *l = &s->left_y_nnz_ctx[(row & 7) << 1];
2342  static const int16_t band_counts[4][8] = {
2343  { 1, 2, 3, 4, 3, 16 - 13 },
2344  { 1, 2, 3, 4, 11, 64 - 21 },
2345  { 1, 2, 3, 4, 11, 256 - 21 },
2346  { 1, 2, 3, 4, 11, 1024 - 21 },
2347  };
2348  const int16_t *y_band_counts = band_counts[b->tx];
2349  const int16_t *uv_band_counts = band_counts[b->uvtx];
2350  int bytesperpixel = is8bitsperpixel ? 1 : 2;
2351  int total_coeff = 0;
2352 
2353 #define MERGE(la, end, step, rd) \
2354  for (n = 0; n < end; n += step) \
2355  la[n] = !!rd(&la[n])
2356 #define MERGE_CTX(step, rd) \
2357  do { \
2358  MERGE(l, end_y, step, rd); \
2359  MERGE(a, end_x, step, rd); \
2360  } while (0)
2361 
2362 #define DECODE_Y_COEF_LOOP(step, mode_index, v) \
2363  for (n = 0, y = 0; y < end_y; y += step) { \
2364  for (x = 0; x < end_x; x += step, n += step * step) { \
2365  enum TxfmType txtp = vp9_intra_txfm_type[b->mode[mode_index]]; \
2366  res = (is8bitsperpixel ? decode_coeffs_b##v##_8bpp : decode_coeffs_b##v##_16bpp) \
2367  (s, s->block + 16 * n * bytesperpixel, 16 * step * step, \
2368  c, e, p, a[x] + l[y], yscans[txtp], \
2369  ynbs[txtp], y_band_counts, qmul[0]); \
2370  a[x] = l[y] = !!res; \
2371  total_coeff |= !!res; \
2372  if (step >= 4) { \
2373  AV_WN16A(&s->eob[n], res); \
2374  } else { \
2375  s->eob[n] = res; \
2376  } \
2377  } \
2378  }
2379 
2380 #define SPLAT(la, end, step, cond) \
2381  if (step == 2) { \
2382  for (n = 1; n < end; n += step) \
2383  la[n] = la[n - 1]; \
2384  } else if (step == 4) { \
2385  if (cond) { \
2386  for (n = 0; n < end; n += step) \
2387  AV_WN32A(&la[n], la[n] * 0x01010101); \
2388  } else { \
2389  for (n = 0; n < end; n += step) \
2390  memset(&la[n + 1], la[n], FFMIN(end - n - 1, 3)); \
2391  } \
2392  } else /* step == 8 */ { \
2393  if (cond) { \
2394  if (HAVE_FAST_64BIT) { \
2395  for (n = 0; n < end; n += step) \
2396  AV_WN64A(&la[n], la[n] * 0x0101010101010101ULL); \
2397  } else { \
2398  for (n = 0; n < end; n += step) { \
2399  uint32_t v32 = la[n] * 0x01010101; \
2400  AV_WN32A(&la[n], v32); \
2401  AV_WN32A(&la[n + 4], v32); \
2402  } \
2403  } \
2404  } else { \
2405  for (n = 0; n < end; n += step) \
2406  memset(&la[n + 1], la[n], FFMIN(end - n - 1, 7)); \
2407  } \
2408  }
2409 #define SPLAT_CTX(step) \
2410  do { \
2411  SPLAT(a, end_x, step, end_x == w4); \
2412  SPLAT(l, end_y, step, end_y == h4); \
2413  } while (0)
2414 
2415  /* y tokens */
2416  switch (b->tx) {
2417  case TX_4X4:
2418  DECODE_Y_COEF_LOOP(1, b->bs > BS_8x8 ? n : 0,);
2419  break;
2420  case TX_8X8:
2421  MERGE_CTX(2, AV_RN16A);
2422  DECODE_Y_COEF_LOOP(2, 0,);
2423  SPLAT_CTX(2);
2424  break;
2425  case TX_16X16:
2426  MERGE_CTX(4, AV_RN32A);
2427  DECODE_Y_COEF_LOOP(4, 0,);
2428  SPLAT_CTX(4);
2429  break;
2430  case TX_32X32:
2431  MERGE_CTX(8, AV_RN64A);
2432  DECODE_Y_COEF_LOOP(8, 0, 32);
2433  SPLAT_CTX(8);
2434  break;
2435  }
2436 
2437 #define DECODE_UV_COEF_LOOP(step, v) \
2438  for (n = 0, y = 0; y < end_y; y += step) { \
2439  for (x = 0; x < end_x; x += step, n += step * step) { \
2440  res = (is8bitsperpixel ? decode_coeffs_b##v##_8bpp : decode_coeffs_b##v##_16bpp) \
2441  (s, s->uvblock[pl] + 16 * n * bytesperpixel, \
2442  16 * step * step, c, e, p, a[x] + l[y], \
2443  uvscan, uvnb, uv_band_counts, qmul[1]); \
2444  a[x] = l[y] = !!res; \
2445  total_coeff |= !!res; \
2446  if (step >= 4) { \
2447  AV_WN16A(&s->uveob[pl][n], res); \
2448  } else { \
2449  s->uveob[pl][n] = res; \
2450  } \
2451  } \
2452  }
2453 
2454  p = s->prob.coef[b->uvtx][1 /* uv */][!b->intra];
2455  c = s->counts.coef[b->uvtx][1 /* uv */][!b->intra];
2456  e = s->counts.eob[b->uvtx][1 /* uv */][!b->intra];
2457  w4 >>= s->ss_h;
2458  end_x >>= s->ss_h;
2459  h4 >>= s->ss_v;
2460  end_y >>= s->ss_v;
2461  for (pl = 0; pl < 2; pl++) {
2462  a = &s->above_uv_nnz_ctx[pl][col << !s->ss_h];
2463  l = &s->left_uv_nnz_ctx[pl][(row & 7) << !s->ss_v];
2464  switch (b->uvtx) {
2465  case TX_4X4:
2466  DECODE_UV_COEF_LOOP(1,);
2467  break;
2468  case TX_8X8:
2469  MERGE_CTX(2, AV_RN16A);
2470  DECODE_UV_COEF_LOOP(2,);
2471  SPLAT_CTX(2);
2472  break;
2473  case TX_16X16:
2474  MERGE_CTX(4, AV_RN32A);
2475  DECODE_UV_COEF_LOOP(4,);
2476  SPLAT_CTX(4);
2477  break;
2478  case TX_32X32:
2479  MERGE_CTX(8, AV_RN64A);
2480  DECODE_UV_COEF_LOOP(8, 32);
2481  SPLAT_CTX(8);
2482  break;
2483  }
2484  }
2485 
2486  return total_coeff;
2487 }
2488 
2490 {
2491  return decode_coeffs(ctx, 1);
2492 }
2493 
2495 {
2496  return decode_coeffs(ctx, 0);
2497 }
2498 
2500  uint8_t *dst_edge, ptrdiff_t stride_edge,
2501  uint8_t *dst_inner, ptrdiff_t stride_inner,
2502  uint8_t *l, int col, int x, int w,
2503  int row, int y, enum TxfmMode tx,
2504  int p, int ss_h, int ss_v, int bytesperpixel)
2505 {
2506  int have_top = row > 0 || y > 0;
2507  int have_left = col > s->tiling.tile_col_start || x > 0;
2508  int have_right = x < w - 1;
2509  int bpp = s->bpp;
2510  static const uint8_t mode_conv[10][2 /* have_left */][2 /* have_top */] = {
2511  [VERT_PRED] = { { DC_127_PRED, VERT_PRED },
2512  { DC_127_PRED, VERT_PRED } },
2513  [HOR_PRED] = { { DC_129_PRED, DC_129_PRED },
2514  { HOR_PRED, HOR_PRED } },
2515  [DC_PRED] = { { DC_128_PRED, TOP_DC_PRED },
2516  { LEFT_DC_PRED, DC_PRED } },
2526  { DC_127_PRED, VERT_LEFT_PRED } },
2527  [HOR_UP_PRED] = { { DC_129_PRED, DC_129_PRED },
2528  { HOR_UP_PRED, HOR_UP_PRED } },
2529  [TM_VP8_PRED] = { { DC_129_PRED, VERT_PRED },
2530  { HOR_PRED, TM_VP8_PRED } },
2531  };
2532  static const struct {
2533  uint8_t needs_left:1;
2534  uint8_t needs_top:1;
2535  uint8_t needs_topleft:1;
2536  uint8_t needs_topright:1;
2537  uint8_t invert_left:1;
2538  } edges[N_INTRA_PRED_MODES] = {
2539  [VERT_PRED] = { .needs_top = 1 },
2540  [HOR_PRED] = { .needs_left = 1 },
2541  [DC_PRED] = { .needs_top = 1, .needs_left = 1 },
2542  [DIAG_DOWN_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2543  [DIAG_DOWN_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2544  [VERT_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2545  [HOR_DOWN_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2546  [VERT_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2547  [HOR_UP_PRED] = { .needs_left = 1, .invert_left = 1 },
2548  [TM_VP8_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2549  [LEFT_DC_PRED] = { .needs_left = 1 },
2550  [TOP_DC_PRED] = { .needs_top = 1 },
2551  [DC_128_PRED] = { 0 },
2552  [DC_127_PRED] = { 0 },
2553  [DC_129_PRED] = { 0 }
2554  };
2555 
2556  av_assert2(mode >= 0 && mode < 10);
2557  mode = mode_conv[mode][have_left][have_top];
2558  if (edges[mode].needs_top) {
2559  uint8_t *top, *topleft;
2560  int n_px_need = 4 << tx, n_px_have = (((s->cols - col) << !ss_h) - x) * 4;
2561  int n_px_need_tr = 0;
2562 
2563  if (tx == TX_4X4 && edges[mode].needs_topright && have_right)
2564  n_px_need_tr = 4;
2565 
2566  // if top of sb64-row, use s->intra_pred_data[] instead of
2567  // dst[-stride] for intra prediction (it contains pre- instead of
2568  // post-loopfilter data)
2569  if (have_top) {
2570  top = !(row & 7) && !y ?
2571  s->intra_pred_data[p] + (col * (8 >> ss_h) + x * 4) * bytesperpixel :
2572  y == 0 ? &dst_edge[-stride_edge] : &dst_inner[-stride_inner];
2573  if (have_left)
2574  topleft = !(row & 7) && !y ?
2575  s->intra_pred_data[p] + (col * (8 >> ss_h) + x * 4) * bytesperpixel :
2576  y == 0 || x == 0 ? &dst_edge[-stride_edge] :
2577  &dst_inner[-stride_inner];
2578  }
2579 
2580  if (have_top &&
2581  (!edges[mode].needs_topleft || (have_left && top == topleft)) &&
2582  (tx != TX_4X4 || !edges[mode].needs_topright || have_right) &&
2583  n_px_need + n_px_need_tr <= n_px_have) {
2584  *a = top;
2585  } else {
2586  if (have_top) {
2587  if (n_px_need <= n_px_have) {
2588  memcpy(*a, top, n_px_need * bytesperpixel);
2589  } else {
2590 #define memset_bpp(c, i1, v, i2, num) do { \
2591  if (bytesperpixel == 1) { \
2592  memset(&(c)[(i1)], (v)[(i2)], (num)); \
2593  } else { \
2594  int n, val = AV_RN16A(&(v)[(i2) * 2]); \
2595  for (n = 0; n < (num); n++) { \
2596  AV_WN16A(&(c)[((i1) + n) * 2], val); \
2597  } \
2598  } \
2599 } while (0)
2600  memcpy(*a, top, n_px_have * bytesperpixel);
2601  memset_bpp(*a, n_px_have, (*a), n_px_have - 1, n_px_need - n_px_have);
2602  }
2603  } else {
2604 #define memset_val(c, val, num) do { \
2605  if (bytesperpixel == 1) { \
2606  memset((c), (val), (num)); \
2607  } else { \
2608  int n; \
2609  for (n = 0; n < (num); n++) { \
2610  AV_WN16A(&(c)[n * 2], (val)); \
2611  } \
2612  } \
2613 } while (0)
2614  memset_val(*a, (128 << (bpp - 8)) - 1, n_px_need);
2615  }
2616  if (edges[mode].needs_topleft) {
2617  if (have_left && have_top) {
2618 #define assign_bpp(c, i1, v, i2) do { \
2619  if (bytesperpixel == 1) { \
2620  (c)[(i1)] = (v)[(i2)]; \
2621  } else { \
2622  AV_COPY16(&(c)[(i1) * 2], &(v)[(i2) * 2]); \
2623  } \
2624 } while (0)
2625  assign_bpp(*a, -1, topleft, -1);
2626  } else {
2627 #define assign_val(c, i, v) do { \
2628  if (bytesperpixel == 1) { \
2629  (c)[(i)] = (v); \
2630  } else { \
2631  AV_WN16A(&(c)[(i) * 2], (v)); \
2632  } \
2633 } while (0)
2634  assign_val((*a), -1, (128 << (bpp - 8)) + (have_top ? +1 : -1));
2635  }
2636  }
2637  if (tx == TX_4X4 && edges[mode].needs_topright) {
2638  if (have_top && have_right &&
2639  n_px_need + n_px_need_tr <= n_px_have) {
2640  memcpy(&(*a)[4 * bytesperpixel], &top[4 * bytesperpixel], 4 * bytesperpixel);
2641  } else {
2642  memset_bpp(*a, 4, *a, 3, 4);
2643  }
2644  }
2645  }
2646  }
2647  if (edges[mode].needs_left) {
2648  if (have_left) {
2649  int n_px_need = 4 << tx, i, n_px_have = (((s->rows - row) << !ss_v) - y) * 4;
2650  uint8_t *dst = x == 0 ? dst_edge : dst_inner;
2651  ptrdiff_t stride = x == 0 ? stride_edge : stride_inner;
2652 
2653  if (edges[mode].invert_left) {
2654  if (n_px_need <= n_px_have) {
2655  for (i = 0; i < n_px_need; i++)
2656  assign_bpp(l, i, &dst[i * stride], -1);
2657  } else {
2658  for (i = 0; i < n_px_have; i++)
2659  assign_bpp(l, i, &dst[i * stride], -1);
2660  memset_bpp(l, n_px_have, l, n_px_have - 1, n_px_need - n_px_have);
2661  }
2662  } else {
2663  if (n_px_need <= n_px_have) {
2664  for (i = 0; i < n_px_need; i++)
2665  assign_bpp(l, n_px_need - 1 - i, &dst[i * stride], -1);
2666  } else {
2667  for (i = 0; i < n_px_have; i++)
2668  assign_bpp(l, n_px_need - 1 - i, &dst[i * stride], -1);
2669  memset_bpp(l, 0, l, n_px_need - n_px_have, n_px_need - n_px_have);
2670  }
2671  }
2672  } else {
2673  memset_val(l, (128 << (bpp - 8)) + 1, 4 << tx);
2674  }
2675  }
2676 
2677  return mode;
2678 }
2679 
2680 static av_always_inline void intra_recon(AVCodecContext *ctx, ptrdiff_t y_off,
2681  ptrdiff_t uv_off, int bytesperpixel)
2682 {
2683  VP9Context *s = ctx->priv_data;
2684  VP9Block *b = s->b;
2685  int row = s->row, col = s->col;
2686  int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
2687  int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
2688  int end_x = FFMIN(2 * (s->cols - col), w4);
2689  int end_y = FFMIN(2 * (s->rows - row), h4);
2690  int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
2691  int uvstep1d = 1 << b->uvtx, p;
2692  uint8_t *dst = s->dst[0], *dst_r = s->frames[CUR_FRAME].tf.f->data[0] + y_off;
2693  LOCAL_ALIGNED_32(uint8_t, a_buf, [96]);
2694  LOCAL_ALIGNED_32(uint8_t, l, [64]);
2695 
2696  for (n = 0, y = 0; y < end_y; y += step1d) {
2697  uint8_t *ptr = dst, *ptr_r = dst_r;
2698  for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d * bytesperpixel,
2699  ptr_r += 4 * step1d * bytesperpixel, n += step) {
2700  int mode = b->mode[b->bs > BS_8x8 && b->tx == TX_4X4 ?
2701  y * 2 + x : 0];
2702  uint8_t *a = &a_buf[32];
2703  enum TxfmType txtp = vp9_intra_txfm_type[mode];
2704  int eob = b->skip ? 0 : b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
2705 
2706  mode = check_intra_mode(s, mode, &a, ptr_r,
2707  s->frames[CUR_FRAME].tf.f->linesize[0],
2708  ptr, s->y_stride, l,
2709  col, x, w4, row, y, b->tx, 0, 0, 0, bytesperpixel);
2710  s->dsp.intra_pred[b->tx][mode](ptr, s->y_stride, l, a);
2711  if (eob)
2712  s->dsp.itxfm_add[tx][txtp](ptr, s->y_stride,
2713  s->block + 16 * n * bytesperpixel, eob);
2714  }
2715  dst_r += 4 * step1d * s->frames[CUR_FRAME].tf.f->linesize[0];
2716  dst += 4 * step1d * s->y_stride;
2717  }
2718 
2719  // U/V
2720  w4 >>= s->ss_h;
2721  end_x >>= s->ss_h;
2722  end_y >>= s->ss_v;
2723  step = 1 << (b->uvtx * 2);
2724  for (p = 0; p < 2; p++) {
2725  dst = s->dst[1 + p];
2726  dst_r = s->frames[CUR_FRAME].tf.f->data[1 + p] + uv_off;
2727  for (n = 0, y = 0; y < end_y; y += uvstep1d) {
2728  uint8_t *ptr = dst, *ptr_r = dst_r;
2729  for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d * bytesperpixel,
2730  ptr_r += 4 * uvstep1d * bytesperpixel, n += step) {
2731  int mode = b->uvmode;
2732  uint8_t *a = &a_buf[32];
2733  int eob = b->skip ? 0 : b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
2734 
2735  mode = check_intra_mode(s, mode, &a, ptr_r,
2736  s->frames[CUR_FRAME].tf.f->linesize[1],
2737  ptr, s->uv_stride, l, col, x, w4, row, y,
2738  b->uvtx, p + 1, s->ss_h, s->ss_v, bytesperpixel);
2739  s->dsp.intra_pred[b->uvtx][mode](ptr, s->uv_stride, l, a);
2740  if (eob)
2741  s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
2742  s->uvblock[p] + 16 * n * bytesperpixel, eob);
2743  }
2744  dst_r += 4 * uvstep1d * s->frames[CUR_FRAME].tf.f->linesize[1];
2745  dst += 4 * uvstep1d * s->uv_stride;
2746  }
2747  }
2748 }
2749 
2750 static void intra_recon_8bpp(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
2751 {
2752  intra_recon(ctx, y_off, uv_off, 1);
2753 }
2754 
2755 static void intra_recon_16bpp(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
2756 {
2757  intra_recon(ctx, y_off, uv_off, 2);
2758 }
2759 
2761  uint8_t *dst, ptrdiff_t dst_stride,
2762  const uint8_t *ref, ptrdiff_t ref_stride,
2764  ptrdiff_t y, ptrdiff_t x, const VP56mv *in_mv,
2765  int px, int py, int pw, int ph,
2766  int bw, int bh, int w, int h, int bytesperpixel,
2767  const uint16_t *scale, const uint8_t *step)
2768 {
2769 #define scale_mv(n, dim) (((int64_t)(n) * scale[dim]) >> 14)
2770  int mx, my;
2771  int refbw_m1, refbh_m1;
2772  int th;
2773  VP56mv mv;
2774 
2775  mv.x = av_clip(in_mv->x, -(x + pw - px + 4) << 3, (s->cols * 8 - x + px + 3) << 3);
2776  mv.y = av_clip(in_mv->y, -(y + ph - py + 4) << 3, (s->rows * 8 - y + py + 3) << 3);
2777  // BUG libvpx seems to scale the two components separately. This introduces
2778  // rounding errors but we have to reproduce them to be exactly compatible
2779  // with the output from libvpx...
2780  mx = scale_mv(mv.x * 2, 0) + scale_mv(x * 16, 0);
2781  my = scale_mv(mv.y * 2, 1) + scale_mv(y * 16, 1);
2782 
2783  y = my >> 4;
2784  x = mx >> 4;
2785  ref += y * ref_stride + x * bytesperpixel;
2786  mx &= 15;
2787  my &= 15;
2788  refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
2789  refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
2790  // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2791  // we use +7 because the last 7 pixels of each sbrow can be changed in
2792  // the longest loopfilter of the next sbrow
2793  th = (y + refbh_m1 + 4 + 7) >> 6;
2794  ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2795  if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 4 >= h - refbh_m1) {
2797  ref - 3 * ref_stride - 3 * bytesperpixel,
2798  288, ref_stride,
2799  refbw_m1 + 8, refbh_m1 + 8,
2800  x - 3, y - 3, w, h);
2801  ref = s->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
2802  ref_stride = 288;
2803  }
2804  smc(dst, dst_stride, ref, ref_stride, bh, mx, my, step[0], step[1]);
2805 }
2806 
2808  uint8_t *dst_u, uint8_t *dst_v,
2809  ptrdiff_t dst_stride,
2810  const uint8_t *ref_u, ptrdiff_t src_stride_u,
2811  const uint8_t *ref_v, ptrdiff_t src_stride_v,
2813  ptrdiff_t y, ptrdiff_t x, const VP56mv *in_mv,
2814  int px, int py, int pw, int ph,
2815  int bw, int bh, int w, int h, int bytesperpixel,
2816  const uint16_t *scale, const uint8_t *step)
2817 {
2818  int mx, my;
2819  int refbw_m1, refbh_m1;
2820  int th;
2821  VP56mv mv;
2822 
2823  if (s->ss_h) {
2824  // BUG https://code.google.com/p/webm/issues/detail?id=820
2825  mv.x = av_clip(in_mv->x, -(x + pw - px + 4) << 4, (s->cols * 4 - x + px + 3) << 4);
2826  mx = scale_mv(mv.x, 0) + (scale_mv(x * 16, 0) & ~15) + (scale_mv(x * 32, 0) & 15);
2827  } else {
2828  mv.x = av_clip(in_mv->x, -(x + pw - px + 4) << 3, (s->cols * 8 - x + px + 3) << 3);
2829  mx = scale_mv(mv.x << 1, 0) + scale_mv(x * 16, 0);
2830  }
2831  if (s->ss_v) {
2832  // BUG https://code.google.com/p/webm/issues/detail?id=820
2833  mv.y = av_clip(in_mv->y, -(y + ph - py + 4) << 4, (s->rows * 4 - y + py + 3) << 4);
2834  my = scale_mv(mv.y, 1) + (scale_mv(y * 16, 1) & ~15) + (scale_mv(y * 32, 1) & 15);
2835  } else {
2836  mv.y = av_clip(in_mv->y, -(y + ph - py + 4) << 3, (s->rows * 8 - y + py + 3) << 3);
2837  my = scale_mv(mv.y << 1, 1) + scale_mv(y * 16, 1);
2838  }
2839 #undef scale_mv
2840  y = my >> 4;
2841  x = mx >> 4;
2842  ref_u += y * src_stride_u + x * bytesperpixel;
2843  ref_v += y * src_stride_v + x * bytesperpixel;
2844  mx &= 15;
2845  my &= 15;
2846  refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
2847  refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
2848  // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2849  // we use +7 because the last 7 pixels of each sbrow can be changed in
2850  // the longest loopfilter of the next sbrow
2851  th = (y + refbh_m1 + 4 + 7) >> (6 - s->ss_v);
2852  ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2853  if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 4 >= h - refbh_m1) {
2855  ref_u - 3 * src_stride_u - 3 * bytesperpixel,
2856  288, src_stride_u,
2857  refbw_m1 + 8, refbh_m1 + 8,
2858  x - 3, y - 3, w, h);
2859  ref_u = s->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
2860  smc(dst_u, dst_stride, ref_u, 288, bh, mx, my, step[0], step[1]);
2861 
2863  ref_v - 3 * src_stride_v - 3 * bytesperpixel,
2864  288, src_stride_v,
2865  refbw_m1 + 8, refbh_m1 + 8,
2866  x - 3, y - 3, w, h);
2867  ref_v = s->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
2868  smc(dst_v, dst_stride, ref_v, 288, bh, mx, my, step[0], step[1]);
2869  } else {
2870  smc(dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my, step[0], step[1]);
2871  smc(dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my, step[0], step[1]);
2872  }
2873 }
2874 
2875 #define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \
2876  px, py, pw, ph, bw, bh, w, h, i) \
2877  mc_luma_scaled(s, s->dsp.s##mc, dst, dst_ls, src, src_ls, tref, row, col, \
2878  mv, px, py, pw, ph, bw, bh, w, h, bytesperpixel, \
2879  s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
2880 #define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2881  row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \
2882  mc_chroma_scaled(s, s->dsp.s##mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2883  row, col, mv, px, py, pw, ph, bw, bh, w, h, bytesperpixel, \
2884  s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
2885 #define SCALED 1
2886 #define FN(x) x##_scaled_8bpp
2887 #define BYTES_PER_PIXEL 1
2888 #include "vp9_mc_template.c"
2889 #undef FN
2890 #undef BYTES_PER_PIXEL
2891 #define FN(x) x##_scaled_16bpp
2892 #define BYTES_PER_PIXEL 2
2893 #include "vp9_mc_template.c"
2894 #undef mc_luma_dir
2895 #undef mc_chroma_dir
2896 #undef FN
2897 #undef BYTES_PER_PIXEL
2898 #undef SCALED
2899 
2901  uint8_t *dst, ptrdiff_t dst_stride,
2902  const uint8_t *ref, ptrdiff_t ref_stride,
2904  ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2905  int bw, int bh, int w, int h, int bytesperpixel)
2906 {
2907  int mx = mv->x, my = mv->y, th;
2908 
2909  y += my >> 3;
2910  x += mx >> 3;
2911  ref += y * ref_stride + x * bytesperpixel;
2912  mx &= 7;
2913  my &= 7;
2914  // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2915  // we use +7 because the last 7 pixels of each sbrow can be changed in
2916  // the longest loopfilter of the next sbrow
2917  th = (y + bh + 4 * !!my + 7) >> 6;
2918  ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2919  if (x < !!mx * 3 || y < !!my * 3 ||
2920  x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2922  ref - !!my * 3 * ref_stride - !!mx * 3 * bytesperpixel,
2923  160, ref_stride,
2924  bw + !!mx * 7, bh + !!my * 7,
2925  x - !!mx * 3, y - !!my * 3, w, h);
2926  ref = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
2927  ref_stride = 160;
2928  }
2929  mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1);
2930 }
2931 
2933  uint8_t *dst_u, uint8_t *dst_v,
2934  ptrdiff_t dst_stride,
2935  const uint8_t *ref_u, ptrdiff_t src_stride_u,
2936  const uint8_t *ref_v, ptrdiff_t src_stride_v,
2938  ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2939  int bw, int bh, int w, int h, int bytesperpixel)
2940 {
2941  int mx = mv->x << !s->ss_h, my = mv->y << !s->ss_v, th;
2942 
2943  y += my >> 4;
2944  x += mx >> 4;
2945  ref_u += y * src_stride_u + x * bytesperpixel;
2946  ref_v += y * src_stride_v + x * bytesperpixel;
2947  mx &= 15;
2948  my &= 15;
2949  // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2950  // we use +7 because the last 7 pixels of each sbrow can be changed in
2951  // the longest loopfilter of the next sbrow
2952  th = (y + bh + 4 * !!my + 7) >> (6 - s->ss_v);
2953  ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2954  if (x < !!mx * 3 || y < !!my * 3 ||
2955  x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2957  ref_u - !!my * 3 * src_stride_u - !!mx * 3 * bytesperpixel,
2958  160, src_stride_u,
2959  bw + !!mx * 7, bh + !!my * 7,
2960  x - !!mx * 3, y - !!my * 3, w, h);
2961  ref_u = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
2962  mc[!!mx][!!my](dst_u, dst_stride, ref_u, 160, bh, mx, my);
2963 
2965  ref_v - !!my * 3 * src_stride_v - !!mx * 3 * bytesperpixel,
2966  160, src_stride_v,
2967  bw + !!mx * 7, bh + !!my * 7,
2968  x - !!mx * 3, y - !!my * 3, w, h);
2969  ref_v = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
2970  mc[!!mx][!!my](dst_v, dst_stride, ref_v, 160, bh, mx, my);
2971  } else {
2972  mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my);
2973  mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my);
2974  }
2975 }
2976 
2977 #define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \
2978  px, py, pw, ph, bw, bh, w, h, i) \
2979  mc_luma_unscaled(s, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \
2980  mv, bw, bh, w, h, bytesperpixel)
2981 #define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2982  row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \
2983  mc_chroma_unscaled(s, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2984  row, col, mv, bw, bh, w, h, bytesperpixel)
2985 #define SCALED 0
2986 #define FN(x) x##_8bpp
2987 #define BYTES_PER_PIXEL 1
2988 #include "vp9_mc_template.c"
2989 #undef FN
2990 #undef BYTES_PER_PIXEL
2991 #define FN(x) x##_16bpp
2992 #define BYTES_PER_PIXEL 2
2993 #include "vp9_mc_template.c"
2994 #undef mc_luma_dir_dir
2995 #undef mc_chroma_dir_dir
2996 #undef FN
2997 #undef BYTES_PER_PIXEL
2998 #undef SCALED
2999 
3000 static av_always_inline void inter_recon(AVCodecContext *ctx, int bytesperpixel)
3001 {
3002  VP9Context *s = ctx->priv_data;
3003  VP9Block *b = s->b;
3004  int row = s->row, col = s->col;
3005 
3006  if (s->mvscale[b->ref[0]][0] || (b->comp && s->mvscale[b->ref[1]][0])) {
3007  if (bytesperpixel == 1) {
3008  inter_pred_scaled_8bpp(ctx);
3009  } else {
3010  inter_pred_scaled_16bpp(ctx);
3011  }
3012  } else {
3013  if (bytesperpixel == 1) {
3014  inter_pred_8bpp(ctx);
3015  } else {
3016  inter_pred_16bpp(ctx);
3017  }
3018  }
3019  if (!b->skip) {
3020  /* mostly copied intra_recon() */
3021 
3022  int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
3023  int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
3024  int end_x = FFMIN(2 * (s->cols - col), w4);
3025  int end_y = FFMIN(2 * (s->rows - row), h4);
3026  int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
3027  int uvstep1d = 1 << b->uvtx, p;
3028  uint8_t *dst = s->dst[0];
3029 
3030  // y itxfm add
3031  for (n = 0, y = 0; y < end_y; y += step1d) {
3032  uint8_t *ptr = dst;
3033  for (x = 0; x < end_x; x += step1d,
3034  ptr += 4 * step1d * bytesperpixel, n += step) {
3035  int eob = b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
3036 
3037  if (eob)
3038  s->dsp.itxfm_add[tx][DCT_DCT](ptr, s->y_stride,
3039  s->block + 16 * n * bytesperpixel, eob);
3040  }
3041  dst += 4 * s->y_stride * step1d;
3042  }
3043 
3044  // uv itxfm add
3045  end_x >>= s->ss_h;
3046  end_y >>= s->ss_v;
3047  step = 1 << (b->uvtx * 2);
3048  for (p = 0; p < 2; p++) {
3049  dst = s->dst[p + 1];
3050  for (n = 0, y = 0; y < end_y; y += uvstep1d) {
3051  uint8_t *ptr = dst;
3052  for (x = 0; x < end_x; x += uvstep1d,
3053  ptr += 4 * uvstep1d * bytesperpixel, n += step) {
3054  int eob = b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
3055 
3056  if (eob)
3057  s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
3058  s->uvblock[p] + 16 * n * bytesperpixel, eob);
3059  }
3060  dst += 4 * uvstep1d * s->uv_stride;
3061  }
3062  }
3063  }
3064 }
3065 
3067 {
3068  inter_recon(ctx, 1);
3069 }
3070 
3072 {
3073  inter_recon(ctx, 2);
3074 }
3075 
3076 static av_always_inline void mask_edges(uint8_t (*mask)[8][4], int ss_h, int ss_v,
3077  int row_and_7, int col_and_7,
3078  int w, int h, int col_end, int row_end,
3079  enum TxfmMode tx, int skip_inter)
3080 {
3081  static const unsigned wide_filter_col_mask[2] = { 0x11, 0x01 };
3082  static const unsigned wide_filter_row_mask[2] = { 0x03, 0x07 };
3083 
3084  // FIXME I'm pretty sure all loops can be replaced by a single LUT if
3085  // we make VP9Filter.mask uint64_t (i.e. row/col all single variable)
3086  // and make the LUT 5-indexed (bl, bp, is_uv, tx and row/col), and then
3087  // use row_and_7/col_and_7 as shifts (1*col_and_7+8*row_and_7)
3088 
3089  // the intended behaviour of the vp9 loopfilter is to work on 8-pixel
3090  // edges. This means that for UV, we work on two subsampled blocks at
3091  // a time, and we only use the topleft block's mode information to set
3092  // things like block strength. Thus, for any block size smaller than
3093  // 16x16, ignore the odd portion of the block.
3094  if (tx == TX_4X4 && (ss_v | ss_h)) {
3095  if (h == ss_v) {
3096  if (row_and_7 & 1)
3097  return;
3098  if (!row_end)
3099  h += 1;
3100  }
3101  if (w == ss_h) {
3102  if (col_and_7 & 1)
3103  return;
3104  if (!col_end)
3105  w += 1;
3106  }
3107  }
3108 
3109  if (tx == TX_4X4 && !skip_inter) {
3110  int t = 1 << col_and_7, m_col = (t << w) - t, y;
3111  // on 32-px edges, use the 8-px wide loopfilter; else, use 4-px wide
3112  int m_row_8 = m_col & wide_filter_col_mask[ss_h], m_row_4 = m_col - m_row_8;
3113 
3114  for (y = row_and_7; y < h + row_and_7; y++) {
3115  int col_mask_id = 2 - !(y & wide_filter_row_mask[ss_v]);
3116 
3117  mask[0][y][1] |= m_row_8;
3118  mask[0][y][2] |= m_row_4;
3119  // for odd lines, if the odd col is not being filtered,
3120  // skip odd row also:
3121  // .---. <-- a
3122  // | |
3123  // |___| <-- b
3124  // ^ ^
3125  // c d
3126  //
3127  // if a/c are even row/col and b/d are odd, and d is skipped,
3128  // e.g. right edge of size-66x66.webm, then skip b also (bug)
3129  if ((ss_h & ss_v) && (col_end & 1) && (y & 1)) {
3130  mask[1][y][col_mask_id] |= (t << (w - 1)) - t;
3131  } else {
3132  mask[1][y][col_mask_id] |= m_col;
3133  }
3134  if (!ss_h)
3135  mask[0][y][3] |= m_col;
3136  if (!ss_v) {
3137  if (ss_h && (col_end & 1))
3138  mask[1][y][3] |= (t << (w - 1)) - t;
3139  else
3140  mask[1][y][3] |= m_col;
3141  }
3142  }
3143  } else {
3144  int y, t = 1 << col_and_7, m_col = (t << w) - t;
3145 
3146  if (!skip_inter) {
3147  int mask_id = (tx == TX_8X8);
3148  static const unsigned masks[4] = { 0xff, 0x55, 0x11, 0x01 };
3149  int l2 = tx + ss_h - 1, step1d;
3150  int m_row = m_col & masks[l2];
3151 
3152  // at odd UV col/row edges tx16/tx32 loopfilter edges, force
3153  // 8wd loopfilter to prevent going off the visible edge.
3154  if (ss_h && tx > TX_8X8 && (w ^ (w - 1)) == 1) {
3155  int m_row_16 = ((t << (w - 1)) - t) & masks[l2];
3156  int m_row_8 = m_row - m_row_16;
3157 
3158  for (y = row_and_7; y < h + row_and_7; y++) {
3159  mask[0][y][0] |= m_row_16;
3160  mask[0][y][1] |= m_row_8;
3161  }
3162  } else {
3163  for (y = row_and_7; y < h + row_and_7; y++)
3164  mask[0][y][mask_id] |= m_row;
3165  }
3166 
3167  l2 = tx + ss_v - 1;
3168  step1d = 1 << l2;
3169  if (ss_v && tx > TX_8X8 && (h ^ (h - 1)) == 1) {
3170  for (y = row_and_7; y < h + row_and_7 - 1; y += step1d)
3171  mask[1][y][0] |= m_col;
3172  if (y - row_and_7 == h - 1)
3173  mask[1][y][1] |= m_col;
3174  } else {
3175  for (y = row_and_7; y < h + row_and_7; y += step1d)
3176  mask[1][y][mask_id] |= m_col;
3177  }
3178  } else if (tx != TX_4X4) {
3179  int mask_id;
3180 
3181  mask_id = (tx == TX_8X8) || (h == ss_v);
3182  mask[1][row_and_7][mask_id] |= m_col;
3183  mask_id = (tx == TX_8X8) || (w == ss_h);
3184  for (y = row_and_7; y < h + row_and_7; y++)
3185  mask[0][y][mask_id] |= t;
3186  } else {
3187  int t8 = t & wide_filter_col_mask[ss_h], t4 = t - t8;
3188 
3189  for (y = row_and_7; y < h + row_and_7; y++) {
3190  mask[0][y][2] |= t4;
3191  mask[0][y][1] |= t8;
3192  }
3193  mask[1][row_and_7][2 - !(row_and_7 & wide_filter_row_mask[ss_v])] |= m_col;
3194  }
3195  }
3196 }
3197 
3198 static void decode_b(AVCodecContext *ctx, int row, int col,
3199  struct VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff,
3200  enum BlockLevel bl, enum BlockPartition bp)
3201 {
3202  VP9Context *s = ctx->priv_data;
3203  VP9Block *b = s->b;
3204  enum BlockSize bs = bl * 3 + bp;
3205  int bytesperpixel = s->bytesperpixel;
3206  int w4 = bwh_tab[1][bs][0], h4 = bwh_tab[1][bs][1], lvl;
3207  int emu[2];
3208  AVFrame *f = s->frames[CUR_FRAME].tf.f;
3209 
3210  s->row = row;
3211  s->row7 = row & 7;
3212  s->col = col;
3213  s->col7 = col & 7;
3214  s->min_mv.x = -(128 + col * 64);
3215  s->min_mv.y = -(128 + row * 64);
3216  s->max_mv.x = 128 + (s->cols - col - w4) * 64;
3217  s->max_mv.y = 128 + (s->rows - row - h4) * 64;
3218  if (s->pass < 2) {
3219  b->bs = bs;
3220  b->bl = bl;
3221  b->bp = bp;
3222  decode_mode(ctx);
3223  b->uvtx = b->tx - ((s->ss_h && w4 * 2 == (1 << b->tx)) ||
3224  (s->ss_v && h4 * 2 == (1 << b->tx)));
3225 
3226  if (!b->skip) {
3227  int has_coeffs;
3228 
3229  if (bytesperpixel == 1) {
3230  has_coeffs = decode_coeffs_8bpp(ctx);
3231  } else {
3232  has_coeffs = decode_coeffs_16bpp(ctx);
3233  }
3234  if (!has_coeffs && b->bs <= BS_8x8 && !b->intra) {
3235  b->skip = 1;
3236  memset(&s->above_skip_ctx[col], 1, w4);
3237  memset(&s->left_skip_ctx[s->row7], 1, h4);
3238  }
3239  } else {
3240  int row7 = s->row7;
3241 
3242 #define SPLAT_ZERO_CTX(v, n) \
3243  switch (n) { \
3244  case 1: v = 0; break; \
3245  case 2: AV_ZERO16(&v); break; \
3246  case 4: AV_ZERO32(&v); break; \
3247  case 8: AV_ZERO64(&v); break; \
3248  case 16: AV_ZERO128(&v); break; \
3249  }
3250 #define SPLAT_ZERO_YUV(dir, var, off, n, dir2) \
3251  do { \
3252  SPLAT_ZERO_CTX(s->dir##_y_##var[off * 2], n * 2); \
3253  if (s->ss_##dir2) { \
3254  SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off], n); \
3255  SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off], n); \
3256  } else { \
3257  SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off * 2], n * 2); \
3258  SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off * 2], n * 2); \
3259  } \
3260  } while (0)
3261 
3262  switch (w4) {
3263  case 1: SPLAT_ZERO_YUV(above, nnz_ctx, col, 1, h); break;
3264  case 2: SPLAT_ZERO_YUV(above, nnz_ctx, col, 2, h); break;
3265  case 4: SPLAT_ZERO_YUV(above, nnz_ctx, col, 4, h); break;
3266  case 8: SPLAT_ZERO_YUV(above, nnz_ctx, col, 8, h); break;
3267  }
3268  switch (h4) {
3269  case 1: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 1, v); break;
3270  case 2: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 2, v); break;
3271  case 4: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 4, v); break;
3272  case 8: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 8, v); break;
3273  }
3274  }
3275  if (s->pass == 1) {
3276  s->b++;
3277  s->block += w4 * h4 * 64 * bytesperpixel;
3278  s->uvblock[0] += w4 * h4 * 64 * bytesperpixel >> (s->ss_h + s->ss_v);
3279  s->uvblock[1] += w4 * h4 * 64 * bytesperpixel >> (s->ss_h + s->ss_v);
3280  s->eob += 4 * w4 * h4;
3281  s->uveob[0] += 4 * w4 * h4 >> (s->ss_h + s->ss_v);
3282  s->uveob[1] += 4 * w4 * h4 >> (s->ss_h + s->ss_v);
3283 
3284  return;
3285  }
3286  }
3287 
3288  // emulated overhangs if the stride of the target buffer can't hold. This
3289  // allows to support emu-edge and so on even if we have large block
3290  // overhangs
3291  emu[0] = (col + w4) * 8 > f->linesize[0] ||
3292  (row + h4) > s->rows;
3293  emu[1] = (col + w4) * 4 > f->linesize[1] ||
3294  (row + h4) > s->rows;
3295  if (emu[0]) {
3296  s->dst[0] = s->tmp_y;
3297  s->y_stride = 128;
3298  } else {
3299  s->dst[0] = f->data[0] + yoff;
3300  s->y_stride = f->linesize[0];
3301  }
3302  if (emu[1]) {
3303  s->dst[1] = s->tmp_uv[0];
3304  s->dst[2] = s->tmp_uv[1];
3305  s->uv_stride = 128;
3306  } else {
3307  s->dst[1] = f->data[1] + uvoff;
3308  s->dst[2] = f->data[2] + uvoff;
3309  s->uv_stride = f->linesize[1];
3310  }
3311  if (b->intra) {
3312  if (s->bpp > 8) {
3313  intra_recon_16bpp(ctx, yoff, uvoff);
3314  } else {
3315  intra_recon_8bpp(ctx, yoff, uvoff);
3316  }
3317  } else {
3318  if (s->bpp > 8) {
3319  inter_recon_16bpp(ctx);
3320  } else {
3321  inter_recon_8bpp(ctx);
3322  }
3323  }
3324  if (emu[0]) {
3325  int w = FFMIN(s->cols - col, w4) * 8, h = FFMIN(s->rows - row, h4) * 8, n, o = 0;
3326 
3327  for (n = 0; o < w; n++) {
3328  int bw = 64 >> n;
3329 
3330  av_assert2(n <= 4);
3331  if (w & bw) {
3332  s->dsp.mc[n][0][0][0][0](f->data[0] + yoff + o, f->linesize[0],
3333  s->tmp_y + o, 128, h, 0, 0);
3334  o += bw * bytesperpixel;
3335  }
3336  }
3337  }
3338  if (emu[1]) {
3339  int w = FFMIN(s->cols - col, w4) * 8 >> s->ss_h;
3340  int h = FFMIN(s->rows - row, h4) * 8 >> s->ss_v, n, o = 0;
3341 
3342  for (n = s->ss_h; o < w; n++) {
3343  int bw = 64 >> n;
3344 
3345  av_assert2(n <= 4);
3346  if (w & bw) {
3347  s->dsp.mc[n][0][0][0][0](f->data[1] + uvoff + o, f->linesize[1],
3348  s->tmp_uv[0] + o, 128, h, 0, 0);
3349  s->dsp.mc[n][0][0][0][0](f->data[2] + uvoff + o, f->linesize[2],
3350  s->tmp_uv[1] + o, 128, h, 0, 0);
3351  o += bw * bytesperpixel;
3352  }
3353  }
3354  }
3355 
3356  // pick filter level and find edges to apply filter to
3357  if (s->filter.level &&
3358  (lvl = s->segmentation.feat[b->seg_id].lflvl[b->intra ? 0 : b->ref[0] + 1]
3359  [b->mode[3] != ZEROMV]) > 0) {
3360  int x_end = FFMIN(s->cols - col, w4), y_end = FFMIN(s->rows - row, h4);
3361  int skip_inter = !b->intra && b->skip, col7 = s->col7, row7 = s->row7;
3362 
3363  setctx_2d(&lflvl->level[row7 * 8 + col7], w4, h4, 8, lvl);
3364  mask_edges(lflvl->mask[0], 0, 0, row7, col7, x_end, y_end, 0, 0, b->tx, skip_inter);
3365  if (s->ss_h || s->ss_v)
3366  mask_edges(lflvl->mask[1], s->ss_h, s->ss_v, row7, col7, x_end, y_end,
3367  s->cols & 1 && col + w4 >= s->cols ? s->cols & 7 : 0,
3368  s->rows & 1 && row + h4 >= s->rows ? s->rows & 7 : 0,
3369  b->uvtx, skip_inter);
3370 
3371  if (!s->filter.lim_lut[lvl]) {
3372  int sharp = s->filter.sharpness;
3373  int limit = lvl;
3374 
3375  if (sharp > 0) {
3376  limit >>= (sharp + 3) >> 2;
3377  limit = FFMIN(limit, 9 - sharp);
3378  }
3379  limit = FFMAX(limit, 1);
3380 
3381  s->filter.lim_lut[lvl] = limit;
3382  s->filter.mblim_lut[lvl] = 2 * (lvl + 2) + limit;
3383  }
3384  }
3385 
3386  if (s->pass == 2) {
3387  s->b++;
3388  s->block += w4 * h4 * 64 * bytesperpixel;
3389  s->uvblock[0] += w4 * h4 * 64 * bytesperpixel >> (s->ss_v + s->ss_h);
3390  s->uvblock[1] += w4 * h4 * 64 * bytesperpixel >> (s->ss_v + s->ss_h);
3391  s->eob += 4 * w4 * h4;
3392  s->uveob[0] += 4 * w4 * h4 >> (s->ss_v + s->ss_h);
3393  s->uveob[1] += 4 * w4 * h4 >> (s->ss_v + s->ss_h);
3394  }
3395 }
3396 
3397 static void decode_sb(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
3398  ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
3399 {
3400  VP9Context *s = ctx->priv_data;
3401  int c = ((s->above_partition_ctx[col] >> (3 - bl)) & 1) |
3402  (((s->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
3403  const uint8_t *p = s->keyframe || s->intraonly ? vp9_default_kf_partition_probs[bl][c] :
3404  s->prob.p.partition[bl][c];
3405  enum BlockPartition bp;
3406  ptrdiff_t hbs = 4 >> bl;
3407  AVFrame *f = s->frames[CUR_FRAME].tf.f;
3408  ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
3409  int bytesperpixel = s->bytesperpixel;
3410 
3411  if (bl == BL_8X8) {
3412  bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
3413  decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3414  } else if (col + hbs < s->cols) { // FIXME why not <=?
3415  if (row + hbs < s->rows) { // FIXME why not <=?
3416  bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
3417  switch (bp) {
3418  case PARTITION_NONE:
3419  decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3420  break;
3421  case PARTITION_H:
3422  decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3423  yoff += hbs * 8 * y_stride;
3424  uvoff += hbs * 8 * uv_stride >> s->ss_v;
3425  decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, bl, bp);
3426  break;
3427  case PARTITION_V:
3428  decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3429  yoff += hbs * 8 * bytesperpixel;
3430  uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
3431  decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, bl, bp);
3432  break;
3433  case PARTITION_SPLIT:
3434  decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3435  decode_sb(ctx, row, col + hbs, lflvl,
3436  yoff + 8 * hbs * bytesperpixel,
3437  uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3438  yoff += hbs * 8 * y_stride;
3439  uvoff += hbs * 8 * uv_stride >> s->ss_v;
3440  decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3441  decode_sb(ctx, row + hbs, col + hbs, lflvl,
3442  yoff + 8 * hbs * bytesperpixel,
3443  uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3444  break;
3445  default:
3446  av_assert0(0);
3447  }
3448  } else if (vp56_rac_get_prob_branchy(&s->c, p[1])) {
3449  bp = PARTITION_SPLIT;
3450  decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3451  decode_sb(ctx, row, col + hbs, lflvl,
3452  yoff + 8 * hbs * bytesperpixel,
3453  uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3454  } else {
3455  bp = PARTITION_H;
3456  decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3457  }
3458  } else if (row + hbs < s->rows) { // FIXME why not <=?
3459  if (vp56_rac_get_prob_branchy(&s->c, p[2])) {
3460  bp = PARTITION_SPLIT;
3461  decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3462  yoff += hbs * 8 * y_stride;
3463  uvoff += hbs * 8 * uv_stride >> s->ss_v;
3464  decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3465  } else {
3466  bp = PARTITION_V;
3467  decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3468  }
3469  } else {
3470  bp = PARTITION_SPLIT;
3471  decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3472  }
3473  s->counts.partition[bl][c][bp]++;
3474 }
3475 
3476 static void decode_sb_mem(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
3477  ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
3478 {
3479  VP9Context *s = ctx->priv_data;
3480  VP9Block *b = s->b;
3481  ptrdiff_t hbs = 4 >> bl;
3482  AVFrame *f = s->frames[CUR_FRAME].tf.f;
3483  ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
3484  int bytesperpixel = s->bytesperpixel;
3485 
3486  if (bl == BL_8X8) {
3487  av_assert2(b->bl == BL_8X8);
3488  decode_b(ctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
3489  } else if (s->b->bl == bl) {
3490  decode_b(ctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
3491  if (b->bp == PARTITION_H && row + hbs < s->rows) {
3492  yoff += hbs * 8 * y_stride;
3493  uvoff += hbs * 8 * uv_stride >> s->ss_v;
3494  decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, b->bl, b->bp);
3495  } else if (b->bp == PARTITION_V && col + hbs < s->cols) {
3496  yoff += hbs * 8 * bytesperpixel;
3497  uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
3498  decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, b->bl, b->bp);
3499  }
3500  } else {
3501  decode_sb_mem(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3502  if (col + hbs < s->cols) { // FIXME why not <=?
3503  if (row + hbs < s->rows) {
3504  decode_sb_mem(ctx, row, col + hbs, lflvl, yoff + 8 * hbs * bytesperpixel,
3505  uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3506  yoff += hbs * 8 * y_stride;
3507  uvoff += hbs * 8 * uv_stride >> s->ss_v;
3508  decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3509  decode_sb_mem(ctx, row + hbs, col + hbs, lflvl,
3510  yoff + 8 * hbs * bytesperpixel,
3511  uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3512  } else {
3513  yoff += hbs * 8 * bytesperpixel;
3514  uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
3515  decode_sb_mem(ctx, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
3516  }
3517  } else if (row + hbs < s->rows) {
3518  yoff += hbs * 8 * y_stride;
3519  uvoff += hbs * 8 * uv_stride >> s->ss_v;
3520  decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3521  }
3522  }
3523 }
3524 
3525 static av_always_inline void filter_plane_cols(VP9Context *s, int col, int ss_h, int ss_v,
3526  uint8_t *lvl, uint8_t (*mask)[4],
3527  uint8_t *dst, ptrdiff_t ls)
3528 {
3529  int y, x, bytesperpixel = s->bytesperpixel;
3530 
3531  // filter edges between columns (e.g. block1 | block2)
3532  for (y = 0; y < 8; y += 2 << ss_v, dst += 16 * ls, lvl += 16 << ss_v) {
3533  uint8_t *ptr = dst, *l = lvl, *hmask1 = mask[y], *hmask2 = mask[y + 1 + ss_v];
3534  unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2], hm13 = hmask1[3];
3535  unsigned hm2 = hmask2[1] | hmask2[2], hm23 = hmask2[3];
3536  unsigned hm = hm1 | hm2 | hm13 | hm23;
3537 
3538  for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 8 * bytesperpixel >> ss_h) {
3539  if (col || x > 1) {
3540  if (hm1 & x) {
3541  int L = *l, H = L >> 4;
3542  int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3543 
3544  if (hmask1[0] & x) {
3545  if (hmask2[0] & x) {
3546  av_assert2(l[8 << ss_v] == L);
3547  s->dsp.loop_filter_16[0](ptr, ls, E, I, H);
3548  } else {
3549  s->dsp.loop_filter_8[2][0](ptr, ls, E, I, H);
3550  }
3551  } else if (hm2 & x) {
3552  L = l[8 << ss_v];
3553  H |= (L >> 4) << 8;
3554  E |= s->filter.mblim_lut[L] << 8;
3555  I |= s->filter.lim_lut[L] << 8;
3556  s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
3557  [!!(hmask2[1] & x)]
3558  [0](ptr, ls, E, I, H);
3559  } else {
3560  s->dsp.loop_filter_8[!!(hmask1[1] & x)]
3561  [0](ptr, ls, E, I, H);
3562  }
3563  } else if (hm2 & x) {
3564  int L = l[8 << ss_v], H = L >> 4;
3565  int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3566 
3567  s->dsp.loop_filter_8[!!(hmask2[1] & x)]
3568  [0](ptr + 8 * ls, ls, E, I, H);
3569  }
3570  }
3571  if (ss_h) {
3572  if (x & 0xAA)
3573  l += 2;
3574  } else {
3575  if (hm13 & x) {
3576  int L = *l, H = L >> 4;
3577  int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3578 
3579  if (hm23 & x) {
3580  L = l[8 << ss_v];
3581  H |= (L >> 4) << 8;
3582  E |= s->filter.mblim_lut[L] << 8;
3583  I |= s->filter.lim_lut[L] << 8;
3584  s->dsp.loop_filter_mix2[0][0][0](ptr + 4 * bytesperpixel, ls, E, I, H);
3585  } else {
3586  s->dsp.loop_filter_8[0][0](ptr + 4 * bytesperpixel, ls, E, I, H);
3587  }
3588  } else if (hm23 & x) {
3589  int L = l[8 << ss_v], H = L >> 4;
3590  int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3591 
3592  s->dsp.loop_filter_8[0][0](ptr + 8 * ls + 4 * bytesperpixel, ls, E, I, H);
3593  }
3594  l++;
3595  }
3596  }
3597  }
3598 }
3599 
3600 static av_always_inline void filter_plane_rows(VP9Context *s, int row, int ss_h, int ss_v,
3601  uint8_t *lvl, uint8_t (*mask)[4],
3602  uint8_t *dst, ptrdiff_t ls)
3603 {
3604  int y, x, bytesperpixel = s->bytesperpixel;
3605 
3606  // block1
3607  // filter edges between rows (e.g. ------)
3608  // block2
3609  for (y = 0; y < 8; y++, dst += 8 * ls >> ss_v) {
3610  uint8_t *ptr = dst, *l = lvl, *vmask = mask[y];
3611  unsigned vm = vmask[0] | vmask[1] | vmask[2], vm3 = vmask[3];
3612 
3613  for (x = 1; vm & ~(x - 1); x <<= (2 << ss_h), ptr += 16 * bytesperpixel, l += 2 << ss_h) {
3614  if (row || y) {
3615  if (vm & x) {
3616  int L = *l, H = L >> 4;
3617  int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3618 
3619  if (vmask[0] & x) {
3620  if (vmask[0] & (x << (1 + ss_h))) {
3621  av_assert2(l[1 + ss_h] == L);
3622  s->dsp.loop_filter_16[1](ptr, ls, E, I, H);
3623  } else {
3624  s->dsp.loop_filter_8[2][1](ptr, ls, E, I, H);
3625  }
3626  } else if (vm & (x << (1 + ss_h))) {
3627  L = l[1 + ss_h];
3628  H |= (L >> 4) << 8;
3629  E |= s->filter.mblim_lut[L] << 8;
3630  I |= s->filter.lim_lut[L] << 8;
3631  s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
3632  [!!(vmask[1] & (x << (1 + ss_h)))]
3633  [1](ptr, ls, E, I, H);
3634  } else {
3635  s->dsp.loop_filter_8[!!(vmask[1] & x)]
3636  [1](ptr, ls, E, I, H);
3637  }
3638  } else if (vm & (x << (1 + ss_h))) {
3639  int L = l[1 + ss_h], H = L >> 4;
3640  int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3641 
3642  s->dsp.loop_filter_8[!!(vmask[1] & (x << (1 + ss_h)))]
3643  [1](ptr + 8 * bytesperpixel, ls, E, I, H);
3644  }
3645  }
3646  if (!ss_v) {
3647  if (vm3 & x) {
3648  int L = *l, H = L >> 4;
3649  int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3650 
3651  if (vm3 & (x << (1 + ss_h))) {
3652  L = l[1 + ss_h];
3653  H |= (L >> 4) << 8;
3654  E |= s->filter.mblim_lut[L] << 8;
3655  I |= s->filter.lim_lut[L] << 8;
3656  s->dsp.loop_filter_mix2[0][0][1](ptr + ls * 4, ls, E, I, H);
3657  } else {
3658  s->dsp.loop_filter_8[0][1](ptr + ls * 4, ls, E, I, H);
3659  }
3660  } else if (vm3 & (x << (1 + ss_h))) {
3661  int L = l[1 + ss_h], H = L >> 4;
3662  int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3663 
3664  s->dsp.loop_filter_8[0][1](ptr + ls * 4 + 8 * bytesperpixel, ls, E, I, H);
3665  }
3666  }
3667  }
3668  if (ss_v) {
3669  if (y & 1)
3670  lvl += 16;
3671  } else {
3672  lvl += 8;
3673  }
3674  }
3675 }
3676 
3677 static void loopfilter_sb(AVCodecContext *ctx, struct VP9Filter *lflvl,
3678  int row, int col, ptrdiff_t yoff, ptrdiff_t uvoff)
3679 {
3680  VP9Context *s = ctx->priv_data;
3681  AVFrame *f = s->frames[CUR_FRAME].tf.f;
3682  uint8_t *dst = f->data[0] + yoff;
3683  ptrdiff_t ls_y = f->linesize[0], ls_uv = f->linesize[1];
3684  uint8_t (*uv_masks)[8][4] = lflvl->mask[s->ss_h | s->ss_v];
3685  int p;
3686 
3687  // FIXME in how far can we interleave the v/h loopfilter calls? E.g.
3688  // if you think of them as acting on a 8x8 block max, we can interleave
3689  // each v/h within the single x loop, but that only works if we work on
3690  // 8 pixel blocks, and we won't always do that (we want at least 16px
3691  // to use SSE2 optimizations, perhaps 32 for AVX2)
3692 
3693  filter_plane_cols(s, col, 0, 0, lflvl->level, lflvl->mask[0][0], dst, ls_y);
3694  filter_plane_rows(s, row, 0, 0, lflvl->level, lflvl->mask[0][1], dst, ls_y);
3695 
3696  for (p = 0; p < 2; p++) {
3697  dst = f->data[1 + p] + uvoff;
3698  filter_plane_cols(s, col, s->ss_h, s->ss_v, lflvl->level, uv_masks[0], dst, ls_uv);
3699  filter_plane_rows(s, row, s->ss_h, s->ss_v, lflvl->level, uv_masks[1], dst, ls_uv);
3700  }
3701 }
3702 
3703 static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
3704 {
3705  int sb_start = ( idx * n) >> log2_n;
3706  int sb_end = ((idx + 1) * n) >> log2_n;
3707  *start = FFMIN(sb_start, n) << 3;
3708  *end = FFMIN(sb_end, n) << 3;
3709 }
3710 
3711 static av_always_inline void adapt_prob(uint8_t *p, unsigned ct0, unsigned ct1,
3712  int max_count, int update_factor)
3713 {
3714  unsigned ct = ct0 + ct1, p2, p1;
3715 
3716  if (!ct)
3717  return;
3718 
3719  p1 = *p;
3720  p2 = ((ct0 << 8) + (ct >> 1)) / ct;
3721  p2 = av_clip(p2, 1, 255);
3722  ct = FFMIN(ct, max_count);
3723  update_factor = FASTDIV(update_factor * ct, max_count);
3724 
3725  // (p1 * (256 - update_factor) + p2 * update_factor + 128) >> 8
3726  *p = p1 + (((p2 - p1) * update_factor + 128) >> 8);
3727 }
3728 
3729 static void adapt_probs(VP9Context *s)
3730 {
3731  int i, j, k, l, m;
3732  prob_context *p = &s->prob_ctx[s->framectxid].p;
3733  int uf = (s->keyframe || s->intraonly || !s->last_keyframe) ? 112 : 128;
3734 
3735  // coefficients
3736  for (i = 0; i < 4; i++)
3737  for (j = 0; j < 2; j++)
3738  for (k = 0; k < 2; k++)
3739  for (l = 0; l < 6; l++)
3740  for (m = 0; m < 6; m++) {
3741  uint8_t *pp = s->prob_ctx[s->framectxid].coef[i][j][k][l][m];
3742  unsigned *e = s->counts.eob[i][j][k][l][m];
3743  unsigned *c = s->counts.coef[i][j][k][l][m];
3744 
3745  if (l == 0 && m >= 3) // dc only has 3 pt
3746  break;
3747 
3748  adapt_prob(&pp[0], e[0], e[1], 24, uf);
3749  adapt_prob(&pp[1], c[0], c[1] + c[2], 24, uf);
3750  adapt_prob(&pp[2], c[1], c[2], 24, uf);
3751  }
3752 
3753  if (s->keyframe || s->intraonly) {
3754  memcpy(p->skip, s->prob.p.skip, sizeof(p->skip));
3755  memcpy(p->tx32p, s->prob.p.tx32p, sizeof(p->tx32p));
3756  memcpy(p->tx16p, s->prob.p.tx16p, sizeof(p->tx16p));
3757  memcpy(p->tx8p, s->prob.p.tx8p, sizeof(p->tx8p));
3758  return;
3759  }
3760 
3761  // skip flag
3762  for (i = 0; i < 3; i++)
3763  adapt_prob(&p->skip[i], s->counts.skip[i][0], s->counts.skip[i][1], 20, 128);
3764 
3765  // intra/inter flag
3766  for (i = 0; i < 4; i++)
3767  adapt_prob(&p->intra[i], s->counts.intra[i][0], s->counts.intra[i][1], 20, 128);
3768 
3769  // comppred flag
3770  if (s->comppredmode == PRED_SWITCHABLE) {
3771  for (i = 0; i < 5; i++)
3772  adapt_prob(&p->comp[i], s->counts.comp[i][0], s->counts.comp[i][1], 20, 128);
3773  }
3774 
3775  // reference frames
3776  if (s->comppredmode != PRED_SINGLEREF) {
3777  for (i = 0; i < 5; i++)
3778  adapt_prob(&p->comp_ref[i], s->counts.comp_ref[i][0],
3779  s->counts.comp_ref[i][1], 20, 128);
3780  }
3781 
3782  if (s->comppredmode != PRED_COMPREF) {
3783  for (i = 0; i < 5; i++) {
3784  uint8_t *pp = p->single_ref[i];
3785  unsigned (*c)[2] = s->counts.single_ref[i];
3786 
3787  adapt_prob(&pp[0], c[0][0], c[0][1], 20, 128);
3788  adapt_prob(&pp[1], c[1][0], c[1][1], 20, 128);
3789  }
3790  }
3791 
3792  // block partitioning
3793  for (i = 0; i < 4; i++)
3794  for (j = 0; j < 4; j++) {
3795  uint8_t *pp = p->partition[i][j];
3796  unsigned *c = s->counts.partition[i][j];
3797 
3798  adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3799  adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3800  adapt_prob(&pp[2], c[2], c[3], 20, 128);
3801  }
3802 
3803  // tx size
3804  if (s->txfmmode == TX_SWITCHABLE) {
3805  for (i = 0; i < 2; i++) {
3806  unsigned *c16 = s->counts.tx16p[i], *c32 = s->counts.tx32p[i];
3807 
3808  adapt_prob(&p->tx8p[i], s->counts.tx8p[i][0], s->counts.tx8p[i][1], 20, 128);
3809  adapt_prob(&p->tx16p[i][0], c16[0], c16[1] + c16[2], 20, 128);
3810  adapt_prob(&p->tx16p[i][1], c16[1], c16[2], 20, 128);
3811  adapt_prob(&p->tx32p[i][0], c32[0], c32[1] + c32[2] + c32[3], 20, 128);
3812  adapt_prob(&p->tx32p[i][1], c32[1], c32[2] + c32[3], 20, 128);
3813  adapt_prob(&p->tx32p[i][2], c32[2], c32[3], 20, 128);
3814  }
3815  }
3816 
3817  // interpolation filter
3818  if (s->filtermode == FILTER_SWITCHABLE) {
3819  for (i = 0; i < 4; i++) {
3820  uint8_t *pp = p->filter[i];
3821  unsigned *c = s->counts.filter[i];
3822 
3823  adapt_prob(&pp[0], c[0], c[1] + c[2], 20, 128);
3824  adapt_prob(&pp[1], c[1], c[2], 20, 128);
3825  }
3826  }
3827 
3828  // inter modes
3829  for (i = 0; i < 7; i++) {
3830  uint8_t *pp = p->mv_mode[i];
3831  unsigned *c = s->counts.mv_mode[i];
3832 
3833  adapt_prob(&pp[0], c[2], c[1] + c[0] + c[3], 20, 128);
3834  adapt_prob(&pp[1], c[0], c[1] + c[3], 20, 128);
3835  adapt_prob(&pp[2], c[1], c[3], 20, 128);
3836  }
3837 
3838  // mv joints
3839  {
3840  uint8_t *pp = p->mv_joint;
3841  unsigned *c = s->counts.mv_joint;
3842 
3843  adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3844  adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3845  adapt_prob(&pp[2], c[2], c[3], 20, 128);
3846  }
3847 
3848  // mv components
3849  for (i = 0; i < 2; i++) {
3850  uint8_t *pp;
3851  unsigned *c, (*c2)[2], sum;
3852 
3853  adapt_prob(&p->mv_comp[i].sign, s->counts.mv_comp[i].sign[0],
3854  s->counts.mv_comp[i].sign[1], 20, 128);
3855 
3856  pp = p->mv_comp[i].classes;
3857  c = s->counts.mv_comp[i].classes;
3858  sum = c[1] + c[2] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9] + c[10];
3859  adapt_prob(&pp[0], c[0], sum, 20, 128);
3860  sum -= c[1];
3861  adapt_prob(&pp[1], c[1], sum, 20, 128);
3862  sum -= c[2] + c[3];
3863  adapt_prob(&pp[2], c[2] + c[3], sum, 20, 128);
3864  adapt_prob(&pp[3], c[2], c[3], 20, 128);
3865  sum -= c[4] + c[5];
3866  adapt_prob(&pp[4], c[4] + c[5], sum, 20, 128);
3867  adapt_prob(&pp[5], c[4], c[5], 20, 128);
3868  sum -= c[6];
3869  adapt_prob(&pp[6], c[6], sum, 20, 128);
3870  adapt_prob(&pp[7], c[7] + c[8], c[9] + c[10], 20, 128);
3871  adapt_prob(&pp[8], c[7], c[8], 20, 128);
3872  adapt_prob(&pp[9], c[9], c[10], 20, 128);
3873 
3874  adapt_prob(&p->mv_comp[i].class0, s->counts.mv_comp[i].class0[0],
3875  s->counts.mv_comp[i].class0[1], 20, 128);
3876  pp = p->mv_comp[i].bits;
3877  c2 = s->counts.mv_comp[i].bits;
3878  for (j = 0; j < 10; j++)
3879  adapt_prob(&pp[j], c2[j][0], c2[j][1], 20, 128);
3880 
3881  for (j = 0; j < 2; j++) {
3882  pp = p->mv_comp[i].class0_fp[j];
3883  c = s->counts.mv_comp[i].class0_fp[j];
3884  adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3885  adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3886  adapt_prob(&pp[2], c[2], c[3], 20, 128);
3887  }
3888  pp = p->mv_comp[i].fp;
3889  c = s->counts.mv_comp[i].fp;
3890  adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3891  adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3892  adapt_prob(&pp[2], c[2], c[3], 20, 128);
3893 
3894  if (s->highprecisionmvs) {
3895  adapt_prob(&p->mv_comp[i].class0_hp, s->counts.mv_comp[i].class0_hp[0],
3896  s->counts.mv_comp[i].class0_hp[1], 20, 128);
3897  adapt_prob(&p->mv_comp[i].hp, s->counts.mv_comp[i].hp[0],
3898  s->counts.mv_comp[i].hp[1], 20, 128);
3899  }
3900  }
3901 
3902  // y intra modes
3903  for (i = 0; i < 4; i++) {
3904  uint8_t *pp = p->y_mode[i];
3905  unsigned *c = s->counts.y_mode[i], sum, s2;
3906 
3907  sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3908  adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3909  sum -= c[TM_VP8_PRED];
3910  adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3911  sum -= c[VERT_PRED];
3912  adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3913  s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3914  sum -= s2;
3915  adapt_prob(&pp[3], s2, sum, 20, 128);
3916  s2 -= c[HOR_PRED];
3917  adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3918  adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3919  sum -= c[DIAG_DOWN_LEFT_PRED];
3920  adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3921  sum -= c[VERT_LEFT_PRED];
3922  adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3923  adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3924  }
3925 
3926  // uv intra modes
3927  for (i = 0; i < 10; i++) {
3928  uint8_t *pp = p->uv_mode[i];
3929  unsigned *c = s->counts.uv_mode[i], sum, s2;
3930 
3931  sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3932  adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3933  sum -= c[TM_VP8_PRED];
3934  adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3935  sum -= c[VERT_PRED];
3936  adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3937  s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3938  sum -= s2;
3939  adapt_prob(&pp[3], s2, sum, 20, 128);
3940  s2 -= c[HOR_PRED];
3941  adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3942  adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3943  sum -= c[DIAG_DOWN_LEFT_PRED];
3944  adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3945  sum -= c[VERT_LEFT_PRED];
3946  adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3947  adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3948  }
3949 }
3950 
3951 static void free_buffers(VP9Context *s)
3952 {
3953  av_freep(&s->intra_pred_data[0]);
3954  av_freep(&s->b_base);
3955  av_freep(&s->block_base);
3956 }
3957 
3959 {
3960  VP9Context *s = ctx->priv_data;
3961  int i;
3962 
3963  for (i = 0; i < 3; i++) {
3964  if (s->frames[i].tf.f->data[0])
3965  vp9_unref_frame(ctx, &s->frames[i]);
3966  av_frame_free(&s->frames[i].tf.f);
3967  }
3968  for (i = 0; i < 8; i++) {
3969  if (s->refs[i].f->data[0])
3970  ff_thread_release_buffer(ctx, &s->refs[i]);
3971  av_frame_free(&s->refs[i].f);
3972  if (s->next_refs[i].f->data[0])
3973  ff_thread_release_buffer(ctx, &s->next_refs[i]);
3974  av_frame_free(&s->next_refs[i].f);
3975  }
3976  free_buffers(s);
3977  av_freep(&s->c_b);
3978  s->c_b_size = 0;
3979 
3980  return 0;
3981 }
3982 
3983 
3984 static int vp9_decode_frame(AVCodecContext *ctx, void *frame,
3985  int *got_frame, AVPacket *pkt)
3986 {
3987  const uint8_t *data = pkt->data;
3988  int size = pkt->size;
3989  VP9Context *s = ctx->priv_data;
3990  int res, tile_row, tile_col, i, ref, row, col;
3991  int retain_segmap_ref = s->segmentation.enabled && !s->segmentation.update_map;
3992  ptrdiff_t yoff, uvoff, ls_y, ls_uv;
3993  AVFrame *f;
3994  int bytesperpixel;
3995 
3996  if ((res = decode_frame_header(ctx, data, size, &ref)) < 0) {
3997  return res;
3998  } else if (res == 0) {
3999  if (!s->refs[ref].f->data[0]) {
4000  av_log(ctx, AV_LOG_ERROR, "Requested reference %d not available\n", ref);
4001  return AVERROR_INVALIDDATA;
4002  }
4003  if ((res = av_frame_ref(frame, s->refs[ref].f)) < 0)
4004  return res;
4005  ((AVFrame *)frame)->pkt_pts = pkt->pts;
4006  ((AVFrame *)frame)->pkt_dts = pkt->dts;
4007  for (i = 0; i < 8; i++) {
4008  if (s->next_refs[i].f->data[0])
4009  ff_thread_release_buffer(ctx, &s->next_refs[i]);
4010  if (s->refs[i].f->data[0] &&
4011  (res = ff_thread_ref_frame(&s->next_refs[i], &s->refs[i])) < 0)
4012  return res;
4013  }
4014  *got_frame = 1;
4015  return pkt->size;
4016  }
4017  data += res;
4018  size -= res;
4019 
4020  if (!retain_segmap_ref) {
4021  if (s->frames[REF_FRAME_SEGMAP].tf.f->data[0])
4023  if (!s->keyframe && !s->intraonly && !s->errorres && s->frames[CUR_FRAME].tf.f->data[0] &&
4024  (res = vp9_ref_frame(ctx, &s->frames[REF_FRAME_SEGMAP], &s->frames[CUR_FRAME])) < 0)
4025  return res;
4026  }
4027  if (s->frames[REF_FRAME_MVPAIR].tf.f->data[0])
4029  if (!s->intraonly && !s->keyframe && !s->errorres && s->frames[CUR_FRAME].tf.f->data[0] &&
4030  (res = vp9_ref_frame(ctx, &s->frames[REF_FRAME_MVPAIR], &s->frames[CUR_FRAME])) < 0)
4031  return res;
4032  if (s->frames[CUR_FRAME].tf.f->data[0])
4033  vp9_unref_frame(ctx, &s->frames[CUR_FRAME]);
4034  if ((res = vp9_alloc_frame(ctx, &s->frames[CUR_FRAME])) < 0)
4035  return res;
4036  f = s->frames[CUR_FRAME].tf.f;
4037  f->key_frame = s->keyframe;
4039  ls_y = f->linesize[0];
4040  ls_uv =f->linesize[1];
4041 
4042  // ref frame setup
4043  for (i = 0; i < 8; i++) {
4044  if (s->next_refs[i].f->data[0])
4045  ff_thread_release_buffer(ctx, &s->next_refs[i]);
4046  if (s->refreshrefmask & (1 << i)) {
4047  res = ff_thread_ref_frame(&s->next_refs[i], &s->frames[CUR_FRAME].tf);
4048  } else if (s->refs[i].f->data[0]) {
4049  res = ff_thread_ref_frame(&s->next_refs[i], &s->refs[i]);
4050  }
4051  if (res < 0)
4052  return res;
4053  }
4054 
4055  // main tile decode loop
4056  bytesperpixel = s->bytesperpixel;
4057  memset(s->above_partition_ctx, 0, s->cols);
4058  memset(s->above_skip_ctx, 0, s->cols);
4059  if (s->keyframe || s->intraonly) {
4060  memset(s->above_mode_ctx, DC_PRED, s->cols * 2);
4061  } else {
4062  memset(s->above_mode_ctx, NEARESTMV, s->cols);
4063  }
4064  memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16);
4065  memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 16 >> s->ss_h);
4066  memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 16 >> s->ss_h);
4067  memset(s->above_segpred_ctx, 0, s->cols);
4068  s->pass = s->frames[CUR_FRAME].uses_2pass =
4070  if ((res = update_block_buffers(ctx)) < 0) {
4071  av_log(ctx, AV_LOG_ERROR,
4072  "Failed to allocate block buffers\n");
4073  return res;
4074  }
4075  if (s->refreshctx && s->parallelmode) {
4076  int j, k, l, m;
4077 
4078  for (i = 0; i < 4; i++) {
4079  for (j = 0; j < 2; j++)
4080  for (k = 0; k < 2; k++)
4081  for (l = 0; l < 6; l++)
4082  for (m = 0; m < 6; m++)
4083  memcpy(s->prob_ctx[s->framectxid].coef[i][j][k][l][m],
4084  s->prob.coef[i][j][k][l][m], 3);
4085  if (s->txfmmode == i)
4086  break;
4087  }
4088  s->prob_ctx[s->framectxid].p = s->prob.p;
4090  } else if (!s->refreshctx) {
4092  }
4093 
4094  do {
4095  yoff = uvoff = 0;
4096  s->b = s->b_base;
4097  s->block = s->block_base;
4098  s->uvblock[0] = s->uvblock_base[0];
4099  s->uvblock[1] = s->uvblock_base[1];
4100  s->eob = s->eob_base;
4101  s->uveob[0] = s->uveob_base[0];
4102  s->uveob[1] = s->uveob_base[1];
4103 
4104  for (tile_row = 0; tile_row < s->tiling.tile_rows; tile_row++) {
4106  tile_row, s->tiling.log2_tile_rows, s->sb_rows);
4107  if (s->pass != 2) {
4108  for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
4109  int64_t tile_size;
4110 
4111  if (tile_col == s->tiling.tile_cols - 1 &&
4112  tile_row == s->tiling.tile_rows - 1) {
4113  tile_size = size;
4114  } else {
4115  tile_size = AV_RB32(data);
4116  data += 4;
4117  size -= 4;
4118  }
4119  if (tile_size > size) {
4120  ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
4121  return AVERROR_INVALIDDATA;
4122  }
4123  ff_vp56_init_range_decoder(&s->c_b[tile_col], data, tile_size);
4124  if (vp56_rac_get_prob_branchy(&s->c_b[tile_col], 128)) { // marker bit
4125  ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
4126  return AVERROR_INVALIDDATA;
4127  }
4128  data += tile_size;
4129  size -= tile_size;
4130  }
4131  }
4132 
4133  for (row = s->tiling.tile_row_start; row < s->tiling.tile_row_end;
4134  row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >> s->ss_v) {
4135  struct VP9Filter *lflvl_ptr = s->lflvl;
4136  ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
4137 
4138  for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
4140  tile_col, s->tiling.log2_tile_cols, s->sb_cols);
4141 
4142  if (s->pass != 2) {
4143  memset(s->left_partition_ctx, 0, 8);
4144  memset(s->left_skip_ctx, 0, 8);
4145  if (s->keyframe || s->intraonly) {
4146  memset(s->left_mode_ctx, DC_PRED, 16);
4147  } else {
4148  memset(s->left_mode_ctx, NEARESTMV, 8);
4149  }
4150  memset(s->left_y_nnz_ctx, 0, 16);
4151  memset(s->left_uv_nnz_ctx, 0, 32);
4152  memset(s->left_segpred_ctx, 0, 8);
4153 
4154  memcpy(&s->c, &s->c_b[tile_col], sizeof(s->c));
4155  }
4156 
4157  for (col = s->tiling.tile_col_start;
4158  col < s->tiling.tile_col_end;
4159  col += 8, yoff2 += 64 * bytesperpixel,
4160  uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
4161  // FIXME integrate with lf code (i.e. zero after each
4162  // use, similar to invtxfm coefficients, or similar)
4163  if (s->pass != 1) {
4164  memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
4165  }
4166 
4167  if (s->pass == 2) {
4168  decode_sb_mem(ctx, row, col, lflvl_ptr,
4169  yoff2, uvoff2, BL_64X64);
4170  } else {
4171  decode_sb(ctx, row, col, lflvl_ptr,
4172  yoff2, uvoff2, BL_64X64);
4173  }
4174  }
4175  if (s->pass != 2) {
4176  memcpy(&s->c_b[tile_col], &s->c, sizeof(s->c));
4177  }
4178  }
4179 
4180  if (s->pass == 1) {
4181  continue;
4182  }
4183 
4184  // backup pre-loopfilter reconstruction data for intra
4185  // prediction of next row of sb64s
4186  if (row + 8 < s->rows) {
4187  memcpy(s->intra_pred_data[0],
4188  f->data[0] + yoff + 63 * ls_y,
4189  8 * s->cols * bytesperpixel);
4190  memcpy(s->intra_pred_data[1],
4191  f->data[1] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
4192  8 * s->cols * bytesperpixel >> s->ss_h);
4193  memcpy(s->intra_pred_data[2],
4194  f->data[2] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
4195  8 * s->cols * bytesperpixel >> s->ss_h);
4196  }
4197 
4198  // loopfilter one row
4199  if (s->filter.level) {
4200  yoff2 = yoff;
4201  uvoff2 = uvoff;
4202  lflvl_ptr = s->lflvl;
4203  for (col = 0; col < s->cols;
4204  col += 8, yoff2 += 64 * bytesperpixel,
4205  uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
4206  loopfilter_sb(ctx, lflvl_ptr, row, col, yoff2, uvoff2);
4207  }
4208  }
4209 
4210  // FIXME maybe we can make this more finegrained by running the
4211  // loopfilter per-block instead of after each sbrow
4212  // In fact that would also make intra pred left preparation easier?
4213  ff_thread_report_progress(&s->frames[CUR_FRAME].tf, row >> 3, 0);
4214  }
4215  }
4216 
4217  if (s->pass < 2 && s->refreshctx && !s->parallelmode) {
4218  adapt_probs(s);
4220  }
4221  } while (s->pass++ == 1);
4222  ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
4223 
4224  // ref frame setup
4225  for (i = 0; i < 8; i++) {
4226  if (s->refs[i].f->data[0])
4227  ff_thread_release_buffer(ctx, &s->refs[i]);
4228  ff_thread_ref_frame(&s->refs[i], &s->next_refs[i]);
4229  }
4230 
4231  if (!s->invisible) {
4232  if ((res = av_frame_ref(frame, s->frames[CUR_FRAME].tf.f)) < 0)
4233  return res;
4234  *got_frame = 1;
4235  }
4236 
4237  return pkt->size;
4238 }
4239 
4241 {
4242  VP9Context *s = ctx->priv_data;
4243  int i;
4244 
4245  for (i = 0; i < 3; i++)
4246  vp9_unref_frame(ctx, &s->frames[i]);
4247  for (i = 0; i < 8; i++)
4248  ff_thread_release_buffer(ctx, &s->refs[i]);
4249 }
4250 
4251 static int init_frames(AVCodecContext *ctx)
4252 {
4253  VP9Context *s = ctx->priv_data;
4254  int i;
4255 
4256  for (i = 0; i < 3; i++) {
4257  s->frames[i].tf.f = av_frame_alloc();
4258  if (!s->frames[i].tf.f) {
4259  vp9_decode_free(ctx);
4260  av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
4261  return AVERROR(ENOMEM);
4262  }
4263  }
4264  for (i = 0; i < 8; i++) {
4265  s->refs[i].f = av_frame_alloc();
4266  s->next_refs[i].f = av_frame_alloc();
4267  if (!s->refs[i].f || !s->next_refs[i].f) {
4268  vp9_decode_free(ctx);
4269  av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
4270  return AVERROR(ENOMEM);
4271  }
4272  }
4273 
4274  return 0;
4275 }
4276 
4278 {
4279  VP9Context *s = ctx->priv_data;
4280 
4281  ctx->internal->allocate_progress = 1;
4282  s->last_bpp = 0;
4283  s->filter.sharpness = -1;
4284 
4285  return init_frames(ctx);
4286 }
4287 
4289 {
4290  return init_frames(avctx);
4291 }
4292 
4294 {
4295  int i, res;
4296  VP9Context *s = dst->priv_data, *ssrc = src->priv_data;
4297 
4298  // detect size changes in other threads
4299  if (s->intra_pred_data[0] &&
4300  (!ssrc->intra_pred_data[0] || s->cols != ssrc->cols || s->rows != ssrc->rows)) {
4301  free_buffers(s);
4302  }
4303 
4304  for (i = 0; i < 3; i++) {
4305  if (s->frames[i].tf.f->data[0])
4306  vp9_unref_frame(dst, &s->frames[i]);
4307  if (ssrc->frames[i].tf.f->data[0]) {
4308  if ((res = vp9_ref_frame(dst, &s->frames[i], &ssrc->frames[i])) < 0)
4309  return res;
4310  }
4311  }
4312  for (i = 0; i < 8; i++) {
4313  if (s->refs[i].f->data[0])
4314  ff_thread_release_buffer(dst, &s->refs[i]);
4315  if (ssrc->next_refs[i].f->data[0]) {
4316  if ((res = ff_thread_ref_frame(&s->refs[i], &ssrc->next_refs[i])) < 0)
4317  return res;
4318  }
4319  }
4320 
4321  s->invisible = ssrc->invisible;
4322  s->keyframe = ssrc->keyframe;
4323  s->ss_v = ssrc->ss_v;
4324  s->ss_h = ssrc->ss_h;
4325  s->segmentation.enabled = ssrc->segmentation.enabled;
4326  s->segmentation.update_map = ssrc->segmentation.update_map;
4327  s->bytesperpixel = ssrc->bytesperpixel;
4328  s->bpp = ssrc->bpp;
4329  s->bpp_index = ssrc->bpp_index;
4330  memcpy(&s->prob_ctx, &ssrc->prob_ctx, sizeof(s->prob_ctx));
4331  memcpy(&s->lf_delta, &ssrc->lf_delta, sizeof(s->lf_delta));
4332  if (ssrc->segmentation.enabled) {
4333  memcpy(&s->segmentation.feat, &ssrc->segmentation.feat,
4334  sizeof(s->segmentation.feat));
4335  }
4336 
4337  return 0;
4338 }
4339 
4340 static const AVProfile profiles[] = {
4341  { FF_PROFILE_VP9_0, "Profile 0" },
4342  { FF_PROFILE_VP9_1, "Profile 1" },
4343  { FF_PROFILE_VP9_2, "Profile 2" },
4344  { FF_PROFILE_VP9_3, "Profile 3" },
4345  { FF_PROFILE_UNKNOWN },
4346 };
4347 
4349  .name = "vp9",
4350  .long_name = NULL_IF_CONFIG_SMALL("Google VP9"),
4351  .type = AVMEDIA_TYPE_VIDEO,
4352  .id = AV_CODEC_ID_VP9,
4353  .priv_data_size = sizeof(VP9Context),
4354  .init = vp9_decode_init,
4355  .close = vp9_decode_free,
4357  .capabilities = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS,
4361  .profiles = NULL_IF_CONFIG_SMALL(profiles),
4362 };
also ITU-R BT1361 / IEC 61966-2-4 xvYCC709 / SMPTE RP177 Annex B
Definition: pixfmt.h:502
ThreadFrame tf
Definition: vp9.c:74
BlockPartition
Definition: vp9data.h:29
CompPredMode
Definition: vp9.c:38
uint8_t skip[3]
Definition: vp9data.h:1455
uint8_t resetctx
Definition: vp9.c:118
VP9Frame frames[3]
Definition: vp9.c:134
const char const char void * val
Definition: avisynth_c.h:634
Definition: vp9.c:54
unsigned hp[2]
Definition: vp9.c:207
Definition: vp9.h:47
float v
const char * s
Definition: avisynth_c.h:631
uint8_t lossless
Definition: vp9.c:149
#define AVERROR_INVALIDDATA
Invalid data found when processing input.
Definition: error.h:59