FFmpeg
vp9block.c
Go to the documentation of this file.
1 /*
2  * VP9 compatible video decoder
3  *
4  * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5  * Copyright (C) 2013 Clément Bœsch <u pkh me>
6  *
7  * This file is part of FFmpeg.
8  *
9  * FFmpeg is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * FFmpeg is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with FFmpeg; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22  */
23 
24 #include "libavutil/avassert.h"
25 
26 #include "threadframe.h"
27 #include "vp89_rac.h"
28 #include "vp9.h"
29 #include "vp9data.h"
30 #include "vp9dec.h"
31 #include "vpx_rac.h"
32 
33 static av_always_inline void setctx_2d(uint8_t *ptr, int w, int h,
34  ptrdiff_t stride, int v)
35 {
36  switch (w) {
37  case 1:
38  do {
39  *ptr = v;
40  ptr += stride;
41  } while (--h);
42  break;
43  case 2: {
44  int v16 = v * 0x0101;
45  do {
46  AV_WN16A(ptr, v16);
47  ptr += stride;
48  } while (--h);
49  break;
50  }
51  case 4: {
52  uint32_t v32 = v * 0x01010101;
53  do {
54  AV_WN32A(ptr, v32);
55  ptr += stride;
56  } while (--h);
57  break;
58  }
59  case 8: {
60 #if HAVE_FAST_64BIT
61  uint64_t v64 = v * 0x0101010101010101ULL;
62  do {
63  AV_WN64A(ptr, v64);
64  ptr += stride;
65  } while (--h);
66 #else
67  uint32_t v32 = v * 0x01010101;
68  do {
69  AV_WN32A(ptr, v32);
70  AV_WN32A(ptr + 4, v32);
71  ptr += stride;
72  } while (--h);
73 #endif
74  break;
75  }
76  }
77 }
78 
79 static void decode_mode(VP9TileData *td)
80 {
81  static const uint8_t left_ctx[N_BS_SIZES] = {
82  0x0, 0x8, 0x0, 0x8, 0xc, 0x8, 0xc, 0xe, 0xc, 0xe, 0xf, 0xe, 0xf
83  };
84  static const uint8_t above_ctx[N_BS_SIZES] = {
85  0x0, 0x0, 0x8, 0x8, 0x8, 0xc, 0xc, 0xc, 0xe, 0xe, 0xe, 0xf, 0xf
86  };
87  static const uint8_t max_tx_for_bl_bp[N_BS_SIZES] = {
90  };
91  const VP9Context *s = td->s;
92  VP9Block *b = td->b;
93  int row = td->row, col = td->col, row7 = td->row7;
94  enum TxfmMode max_tx = max_tx_for_bl_bp[b->bs];
95  int bw4 = ff_vp9_bwh_tab[1][b->bs][0], w4 = FFMIN(s->cols - col, bw4);
96  int bh4 = ff_vp9_bwh_tab[1][b->bs][1], h4 = FFMIN(s->rows - row, bh4), y;
97  int have_a = row > 0, have_l = col > td->tile_col_start;
98  int vref, filter_id;
99 
100  if (!s->s.h.segmentation.enabled) {
101  b->seg_id = 0;
102  } else if (s->s.h.keyframe || s->s.h.intraonly) {
103  b->seg_id = !s->s.h.segmentation.update_map ? 0 :
105  s->s.h.segmentation.prob);
106  } else if (!s->s.h.segmentation.update_map ||
107  (s->s.h.segmentation.temporal &&
109  s->s.h.segmentation.pred_prob[s->above_segpred_ctx[col] +
110  td->left_segpred_ctx[row7]]))) {
111  if (!s->s.h.errorres && s->s.frames[REF_FRAME_SEGMAP].segmentation_map) {
112  int pred = 8, x;
113  uint8_t *refsegmap = s->s.frames[REF_FRAME_SEGMAP].segmentation_map;
114 
115  if (!s->s.frames[REF_FRAME_SEGMAP].uses_2pass)
116  ff_thread_await_progress(&s->s.frames[REF_FRAME_SEGMAP].tf, row >> 3, 0);
117  for (y = 0; y < h4; y++) {
118  int idx_base = (y + row) * 8 * s->sb_cols + col;
119  for (x = 0; x < w4; x++)
120  pred = FFMIN(pred, refsegmap[idx_base + x]);
121  }
122  av_assert1(pred < 8);
123  b->seg_id = pred;
124  } else {
125  b->seg_id = 0;
126  }
127 
128  memset(&s->above_segpred_ctx[col], 1, w4);
129  memset(&td->left_segpred_ctx[row7], 1, h4);
130  } else {
132  s->s.h.segmentation.prob);
133 
134  memset(&s->above_segpred_ctx[col], 0, w4);
135  memset(&td->left_segpred_ctx[row7], 0, h4);
136  }
137  if (s->s.h.segmentation.enabled &&
138  (s->s.h.segmentation.update_map || s->s.h.keyframe || s->s.h.intraonly)) {
139  setctx_2d(&s->s.frames[CUR_FRAME].segmentation_map[row * 8 * s->sb_cols + col],
140  bw4, bh4, 8 * s->sb_cols, b->seg_id);
141  }
142 
143  b->skip = s->s.h.segmentation.enabled &&
144  s->s.h.segmentation.feat[b->seg_id].skip_enabled;
145  if (!b->skip) {
146  int c = td->left_skip_ctx[row7] + s->above_skip_ctx[col];
147  b->skip = vpx_rac_get_prob(td->c, s->prob.p.skip[c]);
148  td->counts.skip[c][b->skip]++;
149  }
150 
151  if (s->s.h.keyframe || s->s.h.intraonly) {
152  b->intra = 1;
153  } else if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[b->seg_id].ref_enabled) {
154  b->intra = !s->s.h.segmentation.feat[b->seg_id].ref_val;
155  } else {
156  int c, bit;
157 
158  if (have_a && have_l) {
159  c = s->above_intra_ctx[col] + td->left_intra_ctx[row7];
160  c += (c == 2);
161  } else {
162  c = have_a ? 2 * s->above_intra_ctx[col] :
163  have_l ? 2 * td->left_intra_ctx[row7] : 0;
164  }
165  bit = vpx_rac_get_prob(td->c, s->prob.p.intra[c]);
166  td->counts.intra[c][bit]++;
167  b->intra = !bit;
168  }
169 
170  if ((b->intra || !b->skip) && s->s.h.txfmmode == TX_SWITCHABLE) {
171  int c;
172  if (have_a) {
173  if (have_l) {
174  c = (s->above_skip_ctx[col] ? max_tx :
175  s->above_txfm_ctx[col]) +
176  (td->left_skip_ctx[row7] ? max_tx :
177  td->left_txfm_ctx[row7]) > max_tx;
178  } else {
179  c = s->above_skip_ctx[col] ? 1 :
180  (s->above_txfm_ctx[col] * 2 > max_tx);
181  }
182  } else if (have_l) {
183  c = td->left_skip_ctx[row7] ? 1 :
184  (td->left_txfm_ctx[row7] * 2 > max_tx);
185  } else {
186  c = 1;
187  }
188  switch (max_tx) {
189  case TX_32X32:
190  b->tx = vpx_rac_get_prob(td->c, s->prob.p.tx32p[c][0]);
191  if (b->tx) {
192  b->tx += vpx_rac_get_prob(td->c, s->prob.p.tx32p[c][1]);
193  if (b->tx == 2)
194  b->tx += vpx_rac_get_prob(td->c, s->prob.p.tx32p[c][2]);
195  }
196  td->counts.tx32p[c][b->tx]++;
197  break;
198  case TX_16X16:
199  b->tx = vpx_rac_get_prob(td->c, s->prob.p.tx16p[c][0]);
200  if (b->tx)
201  b->tx += vpx_rac_get_prob(td->c, s->prob.p.tx16p[c][1]);
202  td->counts.tx16p[c][b->tx]++;
203  break;
204  case TX_8X8:
205  b->tx = vpx_rac_get_prob(td->c, s->prob.p.tx8p[c]);
206  td->counts.tx8p[c][b->tx]++;
207  break;
208  case TX_4X4:
209  b->tx = TX_4X4;
210  break;
211  }
212  } else {
213  b->tx = FFMIN(max_tx, s->s.h.txfmmode);
214  }
215 
216  if (s->s.h.keyframe || s->s.h.intraonly) {
217  uint8_t *a = &s->above_mode_ctx[col * 2];
218  uint8_t *l = &td->left_mode_ctx[(row7) << 1];
219 
220  b->comp = 0;
221  if (b->bs > BS_8x8) {
222  // FIXME the memory storage intermediates here aren't really
223  // necessary, they're just there to make the code slightly
224  // simpler for now
225  b->mode[0] =
227  ff_vp9_default_kf_ymode_probs[a[0]][l[0]]);
228  if (b->bs != BS_8x4) {
230  ff_vp9_default_kf_ymode_probs[a[1]][b->mode[0]]);
231  l[0] =
232  a[1] = b->mode[1];
233  } else {
234  l[0] =
235  a[1] =
236  b->mode[1] = b->mode[0];
237  }
238  if (b->bs != BS_4x8) {
239  b->mode[2] =
241  ff_vp9_default_kf_ymode_probs[a[0]][l[1]]);
242  if (b->bs != BS_8x4) {
244  ff_vp9_default_kf_ymode_probs[a[1]][b->mode[2]]);
245  l[1] =
246  a[1] = b->mode[3];
247  } else {
248  l[1] =
249  a[1] =
250  b->mode[3] = b->mode[2];
251  }
252  } else {
253  b->mode[2] = b->mode[0];
254  l[1] =
255  a[1] =
256  b->mode[3] = b->mode[1];
257  }
258  } else {
261  b->mode[3] =
262  b->mode[2] =
263  b->mode[1] = b->mode[0];
264  // FIXME this can probably be optimized
265  memset(a, b->mode[0], ff_vp9_bwh_tab[0][b->bs][0]);
266  memset(l, b->mode[0], ff_vp9_bwh_tab[0][b->bs][1]);
267  }
270  } else if (b->intra) {
271  b->comp = 0;
272  if (b->bs > BS_8x8) {
274  s->prob.p.y_mode[0]);
275  td->counts.y_mode[0][b->mode[0]]++;
276  if (b->bs != BS_8x4) {
278  s->prob.p.y_mode[0]);
279  td->counts.y_mode[0][b->mode[1]]++;
280  } else {
281  b->mode[1] = b->mode[0];
282  }
283  if (b->bs != BS_4x8) {
285  s->prob.p.y_mode[0]);
286  td->counts.y_mode[0][b->mode[2]]++;
287  if (b->bs != BS_8x4) {
289  s->prob.p.y_mode[0]);
290  td->counts.y_mode[0][b->mode[3]]++;
291  } else {
292  b->mode[3] = b->mode[2];
293  }
294  } else {
295  b->mode[2] = b->mode[0];
296  b->mode[3] = b->mode[1];
297  }
298  } else {
299  static const uint8_t size_group[10] = {
300  3, 3, 3, 3, 2, 2, 2, 1, 1, 1
301  };
302  int sz = size_group[b->bs];
303 
305  s->prob.p.y_mode[sz]);
306  b->mode[1] =
307  b->mode[2] =
308  b->mode[3] = b->mode[0];
309  td->counts.y_mode[sz][b->mode[3]]++;
310  }
312  s->prob.p.uv_mode[b->mode[3]]);
313  td->counts.uv_mode[b->mode[3]][b->uvmode]++;
314  } else {
315  static const uint8_t inter_mode_ctx_lut[14][14] = {
316  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
317  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
318  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
319  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
320  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
321  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
322  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
323  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
324  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
325  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
326  { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
327  { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
328  { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 0, 3 },
329  { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 4 },
330  };
331 
332  if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[b->seg_id].ref_enabled) {
333  av_assert2(s->s.h.segmentation.feat[b->seg_id].ref_val != 0);
334  b->comp = 0;
335  b->ref[0] = s->s.h.segmentation.feat[b->seg_id].ref_val - 1;
336  } else {
337  // read comp_pred flag
338  if (s->s.h.comppredmode != PRED_SWITCHABLE) {
339  b->comp = s->s.h.comppredmode == PRED_COMPREF;
340  } else {
341  int c;
342 
343  // FIXME add intra as ref=0xff (or -1) to make these easier?
344  if (have_a) {
345  if (have_l) {
346  if (s->above_comp_ctx[col] && td->left_comp_ctx[row7]) {
347  c = 4;
348  } else if (s->above_comp_ctx[col]) {
349  c = 2 + (td->left_intra_ctx[row7] ||
350  td->left_ref_ctx[row7] == s->s.h.fixcompref);
351  } else if (td->left_comp_ctx[row7]) {
352  c = 2 + (s->above_intra_ctx[col] ||
353  s->above_ref_ctx[col] == s->s.h.fixcompref);
354  } else {
355  c = (!s->above_intra_ctx[col] &&
356  s->above_ref_ctx[col] == s->s.h.fixcompref) ^
357  (!td->left_intra_ctx[row7] &&
358  td->left_ref_ctx[row & 7] == s->s.h.fixcompref);
359  }
360  } else {
361  c = s->above_comp_ctx[col] ? 3 :
362  (!s->above_intra_ctx[col] && s->above_ref_ctx[col] == s->s.h.fixcompref);
363  }
364  } else if (have_l) {
365  c = td->left_comp_ctx[row7] ? 3 :
366  (!td->left_intra_ctx[row7] && td->left_ref_ctx[row7] == s->s.h.fixcompref);
367  } else {
368  c = 1;
369  }
370  b->comp = vpx_rac_get_prob(td->c, s->prob.p.comp[c]);
371  td->counts.comp[c][b->comp]++;
372  }
373 
374  // read actual references
375  // FIXME probably cache a few variables here to prevent repetitive
376  // memory accesses below
377  if (b->comp) { /* two references */
378  int fix_idx = s->s.h.signbias[s->s.h.fixcompref], var_idx = !fix_idx, c, bit;
379 
380  b->ref[fix_idx] = s->s.h.fixcompref;
381  // FIXME can this codeblob be replaced by some sort of LUT?
382  if (have_a) {
383  if (have_l) {
384  if (s->above_intra_ctx[col]) {
385  if (td->left_intra_ctx[row7]) {
386  c = 2;
387  } else {
388  c = 1 + 2 * (td->left_ref_ctx[row7] != s->s.h.varcompref[1]);
389  }
390  } else if (td->left_intra_ctx[row7]) {
391  c = 1 + 2 * (s->above_ref_ctx[col] != s->s.h.varcompref[1]);
392  } else {
393  int refl = td->left_ref_ctx[row7], refa = s->above_ref_ctx[col];
394 
395  if (refl == refa && refa == s->s.h.varcompref[1]) {
396  c = 0;
397  } else if (!td->left_comp_ctx[row7] && !s->above_comp_ctx[col]) {
398  if ((refa == s->s.h.fixcompref && refl == s->s.h.varcompref[0]) ||
399  (refl == s->s.h.fixcompref && refa == s->s.h.varcompref[0])) {
400  c = 4;
401  } else {
402  c = (refa == refl) ? 3 : 1;
403  }
404  } else if (!td->left_comp_ctx[row7]) {
405  if (refa == s->s.h.varcompref[1] && refl != s->s.h.varcompref[1]) {
406  c = 1;
407  } else {
408  c = (refl == s->s.h.varcompref[1] &&
409  refa != s->s.h.varcompref[1]) ? 2 : 4;
410  }
411  } else if (!s->above_comp_ctx[col]) {
412  if (refl == s->s.h.varcompref[1] && refa != s->s.h.varcompref[1]) {
413  c = 1;
414  } else {
415  c = (refa == s->s.h.varcompref[1] &&
416  refl != s->s.h.varcompref[1]) ? 2 : 4;
417  }
418  } else {
419  c = (refl == refa) ? 4 : 2;
420  }
421  }
422  } else {
423  if (s->above_intra_ctx[col]) {
424  c = 2;
425  } else if (s->above_comp_ctx[col]) {
426  c = 4 * (s->above_ref_ctx[col] != s->s.h.varcompref[1]);
427  } else {
428  c = 3 * (s->above_ref_ctx[col] != s->s.h.varcompref[1]);
429  }
430  }
431  } else if (have_l) {
432  if (td->left_intra_ctx[row7]) {
433  c = 2;
434  } else if (td->left_comp_ctx[row7]) {
435  c = 4 * (td->left_ref_ctx[row7] != s->s.h.varcompref[1]);
436  } else {
437  c = 3 * (td->left_ref_ctx[row7] != s->s.h.varcompref[1]);
438  }
439  } else {
440  c = 2;
441  }
442  bit = vpx_rac_get_prob(td->c, s->prob.p.comp_ref[c]);
443  b->ref[var_idx] = s->s.h.varcompref[bit];
444  td->counts.comp_ref[c][bit]++;
445  } else /* single reference */ {
446  int bit, c;
447 
448  if (have_a && !s->above_intra_ctx[col]) {
449  if (have_l && !td->left_intra_ctx[row7]) {
450  if (td->left_comp_ctx[row7]) {
451  if (s->above_comp_ctx[col]) {
452  c = 1 + (!s->s.h.fixcompref || !td->left_ref_ctx[row7] ||
453  !s->above_ref_ctx[col]);
454  } else {
455  c = (3 * !s->above_ref_ctx[col]) +
456  (!s->s.h.fixcompref || !td->left_ref_ctx[row7]);
457  }
458  } else if (s->above_comp_ctx[col]) {
459  c = (3 * !td->left_ref_ctx[row7]) +
460  (!s->s.h.fixcompref || !s->above_ref_ctx[col]);
461  } else {
462  c = 2 * !td->left_ref_ctx[row7] + 2 * !s->above_ref_ctx[col];
463  }
464  } else if (s->above_intra_ctx[col]) {
465  c = 2;
466  } else if (s->above_comp_ctx[col]) {
467  c = 1 + (!s->s.h.fixcompref || !s->above_ref_ctx[col]);
468  } else {
469  c = 4 * (!s->above_ref_ctx[col]);
470  }
471  } else if (have_l && !td->left_intra_ctx[row7]) {
472  if (td->left_intra_ctx[row7]) {
473  c = 2;
474  } else if (td->left_comp_ctx[row7]) {
475  c = 1 + (!s->s.h.fixcompref || !td->left_ref_ctx[row7]);
476  } else {
477  c = 4 * (!td->left_ref_ctx[row7]);
478  }
479  } else {
480  c = 2;
481  }
482  bit = vpx_rac_get_prob(td->c, s->prob.p.single_ref[c][0]);
483  td->counts.single_ref[c][0][bit]++;
484  if (!bit) {
485  b->ref[0] = 0;
486  } else {
487  // FIXME can this codeblob be replaced by some sort of LUT?
488  if (have_a) {
489  if (have_l) {
490  if (td->left_intra_ctx[row7]) {
491  if (s->above_intra_ctx[col]) {
492  c = 2;
493  } else if (s->above_comp_ctx[col]) {
494  c = 1 + 2 * (s->s.h.fixcompref == 1 ||
495  s->above_ref_ctx[col] == 1);
496  } else if (!s->above_ref_ctx[col]) {
497  c = 3;
498  } else {
499  c = 4 * (s->above_ref_ctx[col] == 1);
500  }
501  } else if (s->above_intra_ctx[col]) {
502  if (td->left_intra_ctx[row7]) {
503  c = 2;
504  } else if (td->left_comp_ctx[row7]) {
505  c = 1 + 2 * (s->s.h.fixcompref == 1 ||
506  td->left_ref_ctx[row7] == 1);
507  } else if (!td->left_ref_ctx[row7]) {
508  c = 3;
509  } else {
510  c = 4 * (td->left_ref_ctx[row7] == 1);
511  }
512  } else if (s->above_comp_ctx[col]) {
513  if (td->left_comp_ctx[row7]) {
514  if (td->left_ref_ctx[row7] == s->above_ref_ctx[col]) {
515  c = 3 * (s->s.h.fixcompref == 1 ||
516  td->left_ref_ctx[row7] == 1);
517  } else {
518  c = 2;
519  }
520  } else if (!td->left_ref_ctx[row7]) {
521  c = 1 + 2 * (s->s.h.fixcompref == 1 ||
522  s->above_ref_ctx[col] == 1);
523  } else {
524  c = 3 * (td->left_ref_ctx[row7] == 1) +
525  (s->s.h.fixcompref == 1 || s->above_ref_ctx[col] == 1);
526  }
527  } else if (td->left_comp_ctx[row7]) {
528  if (!s->above_ref_ctx[col]) {
529  c = 1 + 2 * (s->s.h.fixcompref == 1 ||
530  td->left_ref_ctx[row7] == 1);
531  } else {
532  c = 3 * (s->above_ref_ctx[col] == 1) +
533  (s->s.h.fixcompref == 1 || td->left_ref_ctx[row7] == 1);
534  }
535  } else if (!s->above_ref_ctx[col]) {
536  if (!td->left_ref_ctx[row7]) {
537  c = 3;
538  } else {
539  c = 4 * (td->left_ref_ctx[row7] == 1);
540  }
541  } else if (!td->left_ref_ctx[row7]) {
542  c = 4 * (s->above_ref_ctx[col] == 1);
543  } else {
544  c = 2 * (td->left_ref_ctx[row7] == 1) +
545  2 * (s->above_ref_ctx[col] == 1);
546  }
547  } else {
548  if (s->above_intra_ctx[col] ||
549  (!s->above_comp_ctx[col] && !s->above_ref_ctx[col])) {
550  c = 2;
551  } else if (s->above_comp_ctx[col]) {
552  c = 3 * (s->s.h.fixcompref == 1 || s->above_ref_ctx[col] == 1);
553  } else {
554  c = 4 * (s->above_ref_ctx[col] == 1);
555  }
556  }
557  } else if (have_l) {
558  if (td->left_intra_ctx[row7] ||
559  (!td->left_comp_ctx[row7] && !td->left_ref_ctx[row7])) {
560  c = 2;
561  } else if (td->left_comp_ctx[row7]) {
562  c = 3 * (s->s.h.fixcompref == 1 || td->left_ref_ctx[row7] == 1);
563  } else {
564  c = 4 * (td->left_ref_ctx[row7] == 1);
565  }
566  } else {
567  c = 2;
568  }
569  bit = vpx_rac_get_prob(td->c, s->prob.p.single_ref[c][1]);
570  td->counts.single_ref[c][1][bit]++;
571  b->ref[0] = 1 + bit;
572  }
573  }
574  }
575 
576  if (b->bs <= BS_8x8) {
577  if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[b->seg_id].skip_enabled) {
578  b->mode[0] =
579  b->mode[1] =
580  b->mode[2] =
581  b->mode[3] = ZEROMV;
582  } else {
583  static const uint8_t off[10] = {
584  3, 0, 0, 1, 0, 0, 0, 0, 0, 0
585  };
586 
587  // FIXME this needs to use the LUT tables from find_ref_mvs
588  // because not all are -1,0/0,-1
589  int c = inter_mode_ctx_lut[s->above_mode_ctx[col + off[b->bs]]]
590  [td->left_mode_ctx[row7 + off[b->bs]]];
591 
593  s->prob.p.mv_mode[c]);
594  b->mode[1] =
595  b->mode[2] =
596  b->mode[3] = b->mode[0];
597  td->counts.mv_mode[c][b->mode[0] - 10]++;
598  }
599  }
600 
601  if (s->s.h.filtermode == FILTER_SWITCHABLE) {
602  int c;
603 
604  if (have_a && s->above_mode_ctx[col] >= NEARESTMV) {
605  if (have_l && td->left_mode_ctx[row7] >= NEARESTMV) {
606  c = s->above_filter_ctx[col] == td->left_filter_ctx[row7] ?
607  td->left_filter_ctx[row7] : 3;
608  } else {
609  c = s->above_filter_ctx[col];
610  }
611  } else if (have_l && td->left_mode_ctx[row7] >= NEARESTMV) {
612  c = td->left_filter_ctx[row7];
613  } else {
614  c = 3;
615  }
616 
617  filter_id = vp89_rac_get_tree(td->c, ff_vp9_filter_tree,
618  s->prob.p.filter[c]);
619  td->counts.filter[c][filter_id]++;
620  b->filter = ff_vp9_filter_lut[filter_id];
621  } else {
622  b->filter = s->s.h.filtermode;
623  }
624 
625  if (b->bs > BS_8x8) {
626  int c = inter_mode_ctx_lut[s->above_mode_ctx[col]][td->left_mode_ctx[row7]];
627 
629  s->prob.p.mv_mode[c]);
630  td->counts.mv_mode[c][b->mode[0] - 10]++;
631  ff_vp9_fill_mv(td, b->mv[0], b->mode[0], 0);
632 
633  if (b->bs != BS_8x4) {
635  s->prob.p.mv_mode[c]);
636  td->counts.mv_mode[c][b->mode[1] - 10]++;
637  ff_vp9_fill_mv(td, b->mv[1], b->mode[1], 1);
638  } else {
639  b->mode[1] = b->mode[0];
640  AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
641  AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
642  }
643 
644  if (b->bs != BS_4x8) {
646  s->prob.p.mv_mode[c]);
647  td->counts.mv_mode[c][b->mode[2] - 10]++;
648  ff_vp9_fill_mv(td, b->mv[2], b->mode[2], 2);
649 
650  if (b->bs != BS_8x4) {
652  s->prob.p.mv_mode[c]);
653  td->counts.mv_mode[c][b->mode[3] - 10]++;
654  ff_vp9_fill_mv(td, b->mv[3], b->mode[3], 3);
655  } else {
656  b->mode[3] = b->mode[2];
657  AV_COPY32(&b->mv[3][0], &b->mv[2][0]);
658  AV_COPY32(&b->mv[3][1], &b->mv[2][1]);
659  }
660  } else {
661  b->mode[2] = b->mode[0];
662  AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
663  AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
664  b->mode[3] = b->mode[1];
665  AV_COPY32(&b->mv[3][0], &b->mv[1][0]);
666  AV_COPY32(&b->mv[3][1], &b->mv[1][1]);
667  }
668  } else {
669  ff_vp9_fill_mv(td, b->mv[0], b->mode[0], -1);
670  AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
671  AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
672  AV_COPY32(&b->mv[3][0], &b->mv[0][0]);
673  AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
674  AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
675  AV_COPY32(&b->mv[3][1], &b->mv[0][1]);
676  }
677 
678  vref = b->ref[b->comp ? s->s.h.signbias[s->s.h.varcompref[0]] : 0];
679  }
680 
681 #if HAVE_FAST_64BIT
682 #define SPLAT_CTX(var, val, n) \
683  switch (n) { \
684  case 1: var = val; break; \
685  case 2: AV_WN16A(&var, val * 0x0101); break; \
686  case 4: AV_WN32A(&var, val * 0x01010101); break; \
687  case 8: AV_WN64A(&var, val * 0x0101010101010101ULL); break; \
688  case 16: { \
689  uint64_t v64 = val * 0x0101010101010101ULL; \
690  AV_WN64A( &var, v64); \
691  AV_WN64A(&((uint8_t *) &var)[8], v64); \
692  break; \
693  } \
694  }
695 #else
696 #define SPLAT_CTX(var, val, n) \
697  switch (n) { \
698  case 1: var = val; break; \
699  case 2: AV_WN16A(&var, val * 0x0101); break; \
700  case 4: AV_WN32A(&var, val * 0x01010101); break; \
701  case 8: { \
702  uint32_t v32 = val * 0x01010101; \
703  AV_WN32A( &var, v32); \
704  AV_WN32A(&((uint8_t *) &var)[4], v32); \
705  break; \
706  } \
707  case 16: { \
708  uint32_t v32 = val * 0x01010101; \
709  AV_WN32A( &var, v32); \
710  AV_WN32A(&((uint8_t *) &var)[4], v32); \
711  AV_WN32A(&((uint8_t *) &var)[8], v32); \
712  AV_WN32A(&((uint8_t *) &var)[12], v32); \
713  break; \
714  } \
715  }
716 #endif
717 
718  switch (ff_vp9_bwh_tab[1][b->bs][0]) {
719 #define SET_CTXS(perf, dir, off, n) \
720  do { \
721  SPLAT_CTX(perf->dir##_skip_ctx[off], b->skip, n); \
722  SPLAT_CTX(perf->dir##_txfm_ctx[off], b->tx, n); \
723  SPLAT_CTX(perf->dir##_partition_ctx[off], dir##_ctx[b->bs], n); \
724  if (!s->s.h.keyframe && !s->s.h.intraonly) { \
725  SPLAT_CTX(perf->dir##_intra_ctx[off], b->intra, n); \
726  SPLAT_CTX(perf->dir##_comp_ctx[off], b->comp, n); \
727  SPLAT_CTX(perf->dir##_mode_ctx[off], b->mode[3], n); \
728  if (!b->intra) { \
729  SPLAT_CTX(perf->dir##_ref_ctx[off], vref, n); \
730  if (s->s.h.filtermode == FILTER_SWITCHABLE) { \
731  SPLAT_CTX(perf->dir##_filter_ctx[off], filter_id, n); \
732  } \
733  } \
734  } \
735  } while (0)
736  case 1: SET_CTXS(s, above, col, 1); break;
737  case 2: SET_CTXS(s, above, col, 2); break;
738  case 4: SET_CTXS(s, above, col, 4); break;
739  case 8: SET_CTXS(s, above, col, 8); break;
740  }
741  switch (ff_vp9_bwh_tab[1][b->bs][1]) {
742  case 1: SET_CTXS(td, left, row7, 1); break;
743  case 2: SET_CTXS(td, left, row7, 2); break;
744  case 4: SET_CTXS(td, left, row7, 4); break;
745  case 8: SET_CTXS(td, left, row7, 8); break;
746  }
747 #undef SPLAT_CTX
748 #undef SET_CTXS
749 
750  if (!s->s.h.keyframe && !s->s.h.intraonly) {
751  if (b->bs > BS_8x8) {
752  int mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
753 
754  AV_COPY32(&td->left_mv_ctx[row7 * 2 + 0][0], &b->mv[1][0]);
755  AV_COPY32(&td->left_mv_ctx[row7 * 2 + 0][1], &b->mv[1][1]);
756  AV_WN32A(&td->left_mv_ctx[row7 * 2 + 1][0], mv0);
757  AV_WN32A(&td->left_mv_ctx[row7 * 2 + 1][1], mv1);
758  AV_COPY32(&s->above_mv_ctx[col * 2 + 0][0], &b->mv[2][0]);
759  AV_COPY32(&s->above_mv_ctx[col * 2 + 0][1], &b->mv[2][1]);
760  AV_WN32A(&s->above_mv_ctx[col * 2 + 1][0], mv0);
761  AV_WN32A(&s->above_mv_ctx[col * 2 + 1][1], mv1);
762  } else {
763  int n, mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
764 
765  for (n = 0; n < w4 * 2; n++) {
766  AV_WN32A(&s->above_mv_ctx[col * 2 + n][0], mv0);
767  AV_WN32A(&s->above_mv_ctx[col * 2 + n][1], mv1);
768  }
769  for (n = 0; n < h4 * 2; n++) {
770  AV_WN32A(&td->left_mv_ctx[row7 * 2 + n][0], mv0);
771  AV_WN32A(&td->left_mv_ctx[row7 * 2 + n][1], mv1);
772  }
773  }
774  }
775 
776  // FIXME kinda ugly
777  for (y = 0; y < h4; y++) {
778  int x, o = (row + y) * s->sb_cols * 8 + col;
779  VP9mvrefPair *mv = &s->s.frames[CUR_FRAME].mv[o];
780 
781  if (b->intra) {
782  for (x = 0; x < w4; x++) {
783  mv[x].ref[0] =
784  mv[x].ref[1] = -1;
785  }
786  } else if (b->comp) {
787  for (x = 0; x < w4; x++) {
788  mv[x].ref[0] = b->ref[0];
789  mv[x].ref[1] = b->ref[1];
790  AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
791  AV_COPY32(&mv[x].mv[1], &b->mv[3][1]);
792  }
793  } else {
794  for (x = 0; x < w4; x++) {
795  mv[x].ref[0] = b->ref[0];
796  mv[x].ref[1] = -1;
797  AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
798  }
799  }
800  }
801 }
802 
803 // FIXME merge cnt/eob arguments?
804 static av_always_inline int
805 decode_coeffs_b_generic(VPXRangeCoder *c, int16_t *coef, int n_coeffs,
806  int is_tx32x32, int is8bitsperpixel, int bpp, unsigned (*cnt)[6][3],
807  unsigned (*eob)[6][2], const uint8_t (*p)[6][11],
808  int nnz, const int16_t *scan, const int16_t (*nb)[2],
809  const int16_t *band_counts, const int16_t *qmul)
810 {
811  int i = 0, band = 0, band_left = band_counts[band];
812  const uint8_t *tp = p[0][nnz];
813  uint8_t cache[1024];
814 
815  do {
816  int val, rc;
817 
818  val = vpx_rac_get_prob_branchy(c, tp[0]); // eob
819  eob[band][nnz][val]++;
820  if (!val)
821  break;
822 
823 skip_eob:
824  if (!vpx_rac_get_prob_branchy(c, tp[1])) { // zero
825  cnt[band][nnz][0]++;
826  if (!--band_left)
827  band_left = band_counts[++band];
828  cache[scan[i]] = 0;
829  nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
830  tp = p[band][nnz];
831  if (++i == n_coeffs)
832  break; //invalid input; blocks should end with EOB
833  goto skip_eob;
834  }
835 
836  rc = scan[i];
837  if (!vpx_rac_get_prob_branchy(c, tp[2])) { // one
838  cnt[band][nnz][1]++;
839  val = 1;
840  cache[rc] = 1;
841  } else {
842  cnt[band][nnz][2]++;
843  if (!vpx_rac_get_prob_branchy(c, tp[3])) { // 2, 3, 4
844  if (!vpx_rac_get_prob_branchy(c, tp[4])) {
845  cache[rc] = val = 2;
846  } else {
847  val = 3 + vpx_rac_get_prob(c, tp[5]);
848  cache[rc] = 3;
849  }
850  } else if (!vpx_rac_get_prob_branchy(c, tp[6])) { // cat1/2
851  cache[rc] = 4;
852  if (!vpx_rac_get_prob_branchy(c, tp[7])) {
853  val = vpx_rac_get_prob(c, 159) + 5;
854  } else {
855  val = (vpx_rac_get_prob(c, 165) << 1) + 7;
856  val += vpx_rac_get_prob(c, 145);
857  }
858  } else { // cat 3-6
859  cache[rc] = 5;
860  if (!vpx_rac_get_prob_branchy(c, tp[8])) {
861  if (!vpx_rac_get_prob_branchy(c, tp[9])) {
862  val = 11 + (vpx_rac_get_prob(c, 173) << 2);
863  val += (vpx_rac_get_prob(c, 148) << 1);
864  val += vpx_rac_get_prob(c, 140);
865  } else {
866  val = 19 + (vpx_rac_get_prob(c, 176) << 3);
867  val += (vpx_rac_get_prob(c, 155) << 2);
868  val += (vpx_rac_get_prob(c, 140) << 1);
869  val += vpx_rac_get_prob(c, 135);
870  }
871  } else if (!vpx_rac_get_prob_branchy(c, tp[10])) {
872  val = (vpx_rac_get_prob(c, 180) << 4) + 35;
873  val += (vpx_rac_get_prob(c, 157) << 3);
874  val += (vpx_rac_get_prob(c, 141) << 2);
875  val += (vpx_rac_get_prob(c, 134) << 1);
876  val += vpx_rac_get_prob(c, 130);
877  } else {
878  val = 67;
879  if (!is8bitsperpixel) {
880  if (bpp == 12) {
881  val += vpx_rac_get_prob(c, 255) << 17;
882  val += vpx_rac_get_prob(c, 255) << 16;
883  }
884  val += (vpx_rac_get_prob(c, 255) << 15);
885  val += (vpx_rac_get_prob(c, 255) << 14);
886  }
887  val += (vpx_rac_get_prob(c, 254) << 13);
888  val += (vpx_rac_get_prob(c, 254) << 12);
889  val += (vpx_rac_get_prob(c, 254) << 11);
890  val += (vpx_rac_get_prob(c, 252) << 10);
891  val += (vpx_rac_get_prob(c, 249) << 9);
892  val += (vpx_rac_get_prob(c, 243) << 8);
893  val += (vpx_rac_get_prob(c, 230) << 7);
894  val += (vpx_rac_get_prob(c, 196) << 6);
895  val += (vpx_rac_get_prob(c, 177) << 5);
896  val += (vpx_rac_get_prob(c, 153) << 4);
897  val += (vpx_rac_get_prob(c, 140) << 3);
898  val += (vpx_rac_get_prob(c, 133) << 2);
899  val += (vpx_rac_get_prob(c, 130) << 1);
900  val += vpx_rac_get_prob(c, 129);
901  }
902  }
903  }
904 #define STORE_COEF(c, i, v) do { \
905  if (is8bitsperpixel) { \
906  c[i] = v; \
907  } else { \
908  AV_WN32A(&c[i * 2], v); \
909  } \
910 } while (0)
911  if (!--band_left)
912  band_left = band_counts[++band];
913  if (is_tx32x32)
914  STORE_COEF(coef, rc, (int)((vp89_rac_get(c) ? -val : val) * (unsigned)qmul[!!i]) / 2);
915  else
916  STORE_COEF(coef, rc, (vp89_rac_get(c) ? -val : val) * (unsigned)qmul[!!i]);
917  nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
918  tp = p[band][nnz];
919  } while (++i < n_coeffs);
920 
921  return i;
922 }
923 
924 static int decode_coeffs_b_8bpp(VP9TileData *td, int16_t *coef, int n_coeffs,
925  unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
926  const uint8_t (*p)[6][11], int nnz, const int16_t *scan,
927  const int16_t (*nb)[2], const int16_t *band_counts,
928  const int16_t *qmul)
929 {
930  return decode_coeffs_b_generic(td->c, coef, n_coeffs, 0, 1, 8, cnt, eob, p,
931  nnz, scan, nb, band_counts, qmul);
932 }
933 
934 static int decode_coeffs_b32_8bpp(VP9TileData *td, int16_t *coef, int n_coeffs,
935  unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
936  const uint8_t (*p)[6][11], int nnz, const int16_t *scan,
937  const int16_t (*nb)[2], const int16_t *band_counts,
938  const int16_t *qmul)
939 {
940  return decode_coeffs_b_generic(td->c, coef, n_coeffs, 1, 1, 8, cnt, eob, p,
941  nnz, scan, nb, band_counts, qmul);
942 }
943 
944 static int decode_coeffs_b_16bpp(VP9TileData *td, int16_t *coef, int n_coeffs,
945  unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
946  const uint8_t (*p)[6][11], int nnz, const int16_t *scan,
947  const int16_t (*nb)[2], const int16_t *band_counts,
948  const int16_t *qmul)
949 {
950  return decode_coeffs_b_generic(td->c, coef, n_coeffs, 0, 0, td->s->s.h.bpp, cnt, eob, p,
951  nnz, scan, nb, band_counts, qmul);
952 }
953 
954 static int decode_coeffs_b32_16bpp(VP9TileData *td, int16_t *coef, int n_coeffs,
955  unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
956  const uint8_t (*p)[6][11], int nnz, const int16_t *scan,
957  const int16_t (*nb)[2], const int16_t *band_counts,
958  const int16_t *qmul)
959 {
960  return decode_coeffs_b_generic(td->c, coef, n_coeffs, 1, 0, td->s->s.h.bpp, cnt, eob, p,
961  nnz, scan, nb, band_counts, qmul);
962 }
963 
964 static av_always_inline int decode_coeffs(VP9TileData *td, int is8bitsperpixel)
965 {
966  const VP9Context *s = td->s;
967  VP9Block *b = td->b;
968  int row = td->row, col = td->col;
969  const uint8_t (*p)[6][11] = s->prob.coef[b->tx][0 /* y */][!b->intra];
970  unsigned (*c)[6][3] = td->counts.coef[b->tx][0 /* y */][!b->intra];
971  unsigned (*e)[6][2] = td->counts.eob[b->tx][0 /* y */][!b->intra];
972  int w4 = ff_vp9_bwh_tab[1][b->bs][0] << 1, h4 = ff_vp9_bwh_tab[1][b->bs][1] << 1;
973  int end_x = FFMIN(2 * (s->cols - col), w4);
974  int end_y = FFMIN(2 * (s->rows - row), h4);
975  int n, pl, x, y, ret;
976  const int16_t (*qmul)[2] = s->s.h.segmentation.feat[b->seg_id].qmul;
977  int tx = 4 * s->s.h.lossless + b->tx;
978  const int16_t * const *yscans = ff_vp9_scans[tx];
979  const int16_t (* const * ynbs)[2] = ff_vp9_scans_nb[tx];
980  const int16_t *uvscan = ff_vp9_scans[b->uvtx][DCT_DCT];
981  const int16_t (*uvnb)[2] = ff_vp9_scans_nb[b->uvtx][DCT_DCT];
982  uint8_t *a = &s->above_y_nnz_ctx[col * 2];
983  uint8_t *l = &td->left_y_nnz_ctx[(row & 7) << 1];
984  static const int16_t band_counts[4][8] = {
985  { 1, 2, 3, 4, 3, 16 - 13 },
986  { 1, 2, 3, 4, 11, 64 - 21 },
987  { 1, 2, 3, 4, 11, 256 - 21 },
988  { 1, 2, 3, 4, 11, 1024 - 21 },
989  };
990  const int16_t *y_band_counts = band_counts[b->tx];
991  const int16_t *uv_band_counts = band_counts[b->uvtx];
992  int bytesperpixel = is8bitsperpixel ? 1 : 2;
993  int total_coeff = 0;
994 
995 #define MERGE(la, end, step, rd) \
996  for (n = 0; n < end; n += step) \
997  la[n] = !!rd(&la[n])
998 #define MERGE_CTX(step, rd) \
999  do { \
1000  MERGE(l, end_y, step, rd); \
1001  MERGE(a, end_x, step, rd); \
1002  } while (0)
1003 
1004 #define DECODE_Y_COEF_LOOP(step, mode_index, v) \
1005  for (n = 0, y = 0; y < end_y; y += step) { \
1006  for (x = 0; x < end_x; x += step, n += step * step) { \
1007  enum TxfmType txtp = ff_vp9_intra_txfm_type[b->mode[mode_index]]; \
1008  ret = (is8bitsperpixel ? decode_coeffs_b##v##_8bpp : decode_coeffs_b##v##_16bpp) \
1009  (td, td->block + 16 * n * bytesperpixel, 16 * step * step, \
1010  c, e, p, a[x] + l[y], yscans[txtp], \
1011  ynbs[txtp], y_band_counts, qmul[0]); \
1012  a[x] = l[y] = !!ret; \
1013  total_coeff |= !!ret; \
1014  if (step >= 4) { \
1015  AV_WN16A(&td->eob[n], ret); \
1016  } else { \
1017  td->eob[n] = ret; \
1018  } \
1019  } \
1020  }
1021 
1022 #define SPLAT(la, end, step, cond) \
1023  if (step == 2) { \
1024  for (n = 1; n < end; n += step) \
1025  la[n] = la[n - 1]; \
1026  } else if (step == 4) { \
1027  if (cond) { \
1028  for (n = 0; n < end; n += step) \
1029  AV_WN32A(&la[n], la[n] * 0x01010101); \
1030  } else { \
1031  for (n = 0; n < end; n += step) \
1032  memset(&la[n + 1], la[n], FFMIN(end - n - 1, 3)); \
1033  } \
1034  } else /* step == 8 */ { \
1035  if (cond) { \
1036  if (HAVE_FAST_64BIT) { \
1037  for (n = 0; n < end; n += step) \
1038  AV_WN64A(&la[n], la[n] * 0x0101010101010101ULL); \
1039  } else { \
1040  for (n = 0; n < end; n += step) { \
1041  uint32_t v32 = la[n] * 0x01010101; \
1042  AV_WN32A(&la[n], v32); \
1043  AV_WN32A(&la[n + 4], v32); \
1044  } \
1045  } \
1046  } else { \
1047  for (n = 0; n < end; n += step) \
1048  memset(&la[n + 1], la[n], FFMIN(end - n - 1, 7)); \
1049  } \
1050  }
1051 #define SPLAT_CTX(step) \
1052  do { \
1053  SPLAT(a, end_x, step, end_x == w4); \
1054  SPLAT(l, end_y, step, end_y == h4); \
1055  } while (0)
1056 
1057  /* y tokens */
1058  switch (b->tx) {
1059  case TX_4X4:
1060  DECODE_Y_COEF_LOOP(1, b->bs > BS_8x8 ? n : 0,);
1061  break;
1062  case TX_8X8:
1063  MERGE_CTX(2, AV_RN16A);
1064  DECODE_Y_COEF_LOOP(2, 0,);
1065  SPLAT_CTX(2);
1066  break;
1067  case TX_16X16:
1068  MERGE_CTX(4, AV_RN32A);
1069  DECODE_Y_COEF_LOOP(4, 0,);
1070  SPLAT_CTX(4);
1071  break;
1072  case TX_32X32:
1073  MERGE_CTX(8, AV_RN64A);
1074  DECODE_Y_COEF_LOOP(8, 0, 32);
1075  SPLAT_CTX(8);
1076  break;
1077  }
1078 
1079 #define DECODE_UV_COEF_LOOP(step, v) \
1080  for (n = 0, y = 0; y < end_y; y += step) { \
1081  for (x = 0; x < end_x; x += step, n += step * step) { \
1082  ret = (is8bitsperpixel ? decode_coeffs_b##v##_8bpp : decode_coeffs_b##v##_16bpp) \
1083  (td, td->uvblock[pl] + 16 * n * bytesperpixel, \
1084  16 * step * step, c, e, p, a[x] + l[y], \
1085  uvscan, uvnb, uv_band_counts, qmul[1]); \
1086  a[x] = l[y] = !!ret; \
1087  total_coeff |= !!ret; \
1088  if (step >= 4) { \
1089  AV_WN16A(&td->uveob[pl][n], ret); \
1090  } else { \
1091  td->uveob[pl][n] = ret; \
1092  } \
1093  } \
1094  }
1095 
1096  p = s->prob.coef[b->uvtx][1 /* uv */][!b->intra];
1097  c = td->counts.coef[b->uvtx][1 /* uv */][!b->intra];
1098  e = td->counts.eob[b->uvtx][1 /* uv */][!b->intra];
1099  w4 >>= s->ss_h;
1100  end_x >>= s->ss_h;
1101  h4 >>= s->ss_v;
1102  end_y >>= s->ss_v;
1103  for (pl = 0; pl < 2; pl++) {
1104  a = &s->above_uv_nnz_ctx[pl][col << !s->ss_h];
1105  l = &td->left_uv_nnz_ctx[pl][(row & 7) << !s->ss_v];
1106  switch (b->uvtx) {
1107  case TX_4X4:
1108  DECODE_UV_COEF_LOOP(1,);
1109  break;
1110  case TX_8X8:
1111  MERGE_CTX(2, AV_RN16A);
1112  DECODE_UV_COEF_LOOP(2,);
1113  SPLAT_CTX(2);
1114  break;
1115  case TX_16X16:
1116  MERGE_CTX(4, AV_RN32A);
1117  DECODE_UV_COEF_LOOP(4,);
1118  SPLAT_CTX(4);
1119  break;
1120  case TX_32X32:
1121  MERGE_CTX(8, AV_RN64A);
1122  DECODE_UV_COEF_LOOP(8, 32);
1123  SPLAT_CTX(8);
1124  break;
1125  }
1126  }
1127 
1128  return total_coeff;
1129 }
1130 
1132 {
1133  return decode_coeffs(td, 1);
1134 }
1135 
1137 {
1138  return decode_coeffs(td, 0);
1139 }
1140 
1141 static av_always_inline void mask_edges(uint8_t (*mask)[8][4], int ss_h, int ss_v,
1142  int row_and_7, int col_and_7,
1143  int w, int h, int col_end, int row_end,
1144  enum TxfmMode tx, int skip_inter)
1145 {
1146  static const unsigned wide_filter_col_mask[2] = { 0x11, 0x01 };
1147  static const unsigned wide_filter_row_mask[2] = { 0x03, 0x07 };
1148 
1149  // FIXME I'm pretty sure all loops can be replaced by a single LUT if
1150  // we make VP9Filter.mask uint64_t (i.e. row/col all single variable)
1151  // and make the LUT 5-indexed (bl, bp, is_uv, tx and row/col), and then
1152  // use row_and_7/col_and_7 as shifts (1*col_and_7+8*row_and_7)
1153 
1154  // the intended behaviour of the vp9 loopfilter is to work on 8-pixel
1155  // edges. This means that for UV, we work on two subsampled blocks at
1156  // a time, and we only use the topleft block's mode information to set
1157  // things like block strength. Thus, for any block size smaller than
1158  // 16x16, ignore the odd portion of the block.
1159  if (tx == TX_4X4 && (ss_v | ss_h)) {
1160  if (h == ss_v) {
1161  if (row_and_7 & 1)
1162  return;
1163  if (!row_end)
1164  h += 1;
1165  }
1166  if (w == ss_h) {
1167  if (col_and_7 & 1)
1168  return;
1169  if (!col_end)
1170  w += 1;
1171  }
1172  }
1173 
1174  if (tx == TX_4X4 && !skip_inter) {
1175  int t = 1 << col_and_7, m_col = (t << w) - t, y;
1176  // on 32-px edges, use the 8-px wide loopfilter; else, use 4-px wide
1177  int m_row_8 = m_col & wide_filter_col_mask[ss_h], m_row_4 = m_col - m_row_8;
1178 
1179  for (y = row_and_7; y < h + row_and_7; y++) {
1180  int col_mask_id = 2 - !(y & wide_filter_row_mask[ss_v]);
1181 
1182  mask[0][y][1] |= m_row_8;
1183  mask[0][y][2] |= m_row_4;
1184  // for odd lines, if the odd col is not being filtered,
1185  // skip odd row also:
1186  // .---. <-- a
1187  // | |
1188  // |___| <-- b
1189  // ^ ^
1190  // c d
1191  //
1192  // if a/c are even row/col and b/d are odd, and d is skipped,
1193  // e.g. right edge of size-66x66.webm, then skip b also (bug)
1194  if ((ss_h & ss_v) && (col_end & 1) && (y & 1)) {
1195  mask[1][y][col_mask_id] |= (t << (w - 1)) - t;
1196  } else {
1197  mask[1][y][col_mask_id] |= m_col;
1198  }
1199  if (!ss_h)
1200  mask[0][y][3] |= m_col;
1201  if (!ss_v) {
1202  if (ss_h && (col_end & 1))
1203  mask[1][y][3] |= (t << (w - 1)) - t;
1204  else
1205  mask[1][y][3] |= m_col;
1206  }
1207  }
1208  } else {
1209  int y, t = 1 << col_and_7, m_col = (t << w) - t;
1210 
1211  if (!skip_inter) {
1212  int mask_id = (tx == TX_8X8);
1213  int l2 = tx + ss_h - 1, step1d;
1214  static const unsigned masks[4] = { 0xff, 0x55, 0x11, 0x01 };
1215  int m_row = m_col & masks[l2];
1216 
1217  // at odd UV col/row edges tx16/tx32 loopfilter edges, force
1218  // 8wd loopfilter to prevent going off the visible edge.
1219  if (ss_h && tx > TX_8X8 && (w ^ (w - 1)) == 1) {
1220  int m_row_16 = ((t << (w - 1)) - t) & masks[l2];
1221  int m_row_8 = m_row - m_row_16;
1222 
1223  for (y = row_and_7; y < h + row_and_7; y++) {
1224  mask[0][y][0] |= m_row_16;
1225  mask[0][y][1] |= m_row_8;
1226  }
1227  } else {
1228  for (y = row_and_7; y < h + row_and_7; y++)
1229  mask[0][y][mask_id] |= m_row;
1230  }
1231 
1232  l2 = tx + ss_v - 1;
1233  step1d = 1 << l2;
1234  if (ss_v && tx > TX_8X8 && (h ^ (h - 1)) == 1) {
1235  for (y = row_and_7; y < h + row_and_7 - 1; y += step1d)
1236  mask[1][y][0] |= m_col;
1237  if (y - row_and_7 == h - 1)
1238  mask[1][y][1] |= m_col;
1239  } else {
1240  for (y = row_and_7; y < h + row_and_7; y += step1d)
1241  mask[1][y][mask_id] |= m_col;
1242  }
1243  } else if (tx != TX_4X4) {
1244  int mask_id;
1245 
1246  mask_id = (tx == TX_8X8) || (h == ss_v);
1247  mask[1][row_and_7][mask_id] |= m_col;
1248  mask_id = (tx == TX_8X8) || (w == ss_h);
1249  for (y = row_and_7; y < h + row_and_7; y++)
1250  mask[0][y][mask_id] |= t;
1251  } else {
1252  int t8 = t & wide_filter_col_mask[ss_h], t4 = t - t8;
1253 
1254  for (y = row_and_7; y < h + row_and_7; y++) {
1255  mask[0][y][2] |= t4;
1256  mask[0][y][1] |= t8;
1257  }
1258  mask[1][row_and_7][2 - !(row_and_7 & wide_filter_row_mask[ss_v])] |= m_col;
1259  }
1260  }
1261 }
1262 
1263 void ff_vp9_decode_block(VP9TileData *td, int row, int col,
1264  VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff,
1265  enum BlockLevel bl, enum BlockPartition bp)
1266 {
1267  const VP9Context *s = td->s;
1268  VP9Block *b = td->b;
1269  enum BlockSize bs = bl * 3 + bp;
1270  int bytesperpixel = s->bytesperpixel;
1271  int w4 = ff_vp9_bwh_tab[1][bs][0], h4 = ff_vp9_bwh_tab[1][bs][1], lvl;
1272  int emu[2];
1273  AVFrame *f = s->s.frames[CUR_FRAME].tf.f;
1274 
1275  td->row = row;
1276  td->row7 = row & 7;
1277  td->col = col;
1278  td->col7 = col & 7;
1279 
1280  td->min_mv.x = -(128 + col * 64);
1281  td->min_mv.y = -(128 + row * 64);
1282  td->max_mv.x = 128 + (s->cols - col - w4) * 64;
1283  td->max_mv.y = 128 + (s->rows - row - h4) * 64;
1284 
1285  if (s->pass < 2) {
1286  b->bs = bs;
1287  b->bl = bl;
1288  b->bp = bp;
1289  decode_mode(td);
1290  b->uvtx = b->tx - ((s->ss_h && w4 * 2 == (1 << b->tx)) ||
1291  (s->ss_v && h4 * 2 == (1 << b->tx)));
1292 
1293  if (td->block_structure) {
1294  td->block_structure[td->nb_block_structure].row = row;
1295  td->block_structure[td->nb_block_structure].col = col;
1296  td->block_structure[td->nb_block_structure].block_size_idx_x = av_log2(w4);
1297  td->block_structure[td->nb_block_structure].block_size_idx_y = av_log2(h4);
1298  td->nb_block_structure++;
1299  }
1300 
1301  if (!b->skip) {
1302  int has_coeffs;
1303 
1304  if (bytesperpixel == 1) {
1305  has_coeffs = decode_coeffs_8bpp(td);
1306  } else {
1307  has_coeffs = decode_coeffs_16bpp(td);
1308  }
1309  if (!has_coeffs && b->bs <= BS_8x8 && !b->intra) {
1310  b->skip = 1;
1311  memset(&s->above_skip_ctx[col], 1, w4);
1312  memset(&td->left_skip_ctx[td->row7], 1, h4);
1313  }
1314  } else {
1315  int row7 = td->row7;
1316 
1317 #define SPLAT_ZERO_CTX(v, n) \
1318  switch (n) { \
1319  case 1: v = 0; break; \
1320  case 2: AV_ZERO16(&v); break; \
1321  case 4: AV_ZERO32(&v); break; \
1322  case 8: AV_ZERO64(&v); break; \
1323  case 16: AV_ZERO128(&v); break; \
1324  }
1325 #define SPLAT_ZERO_YUV(dir, var, off, n, dir2) \
1326  do { \
1327  SPLAT_ZERO_CTX(dir##_y_##var[off * 2], n * 2); \
1328  if (s->ss_##dir2) { \
1329  SPLAT_ZERO_CTX(dir##_uv_##var[0][off], n); \
1330  SPLAT_ZERO_CTX(dir##_uv_##var[1][off], n); \
1331  } else { \
1332  SPLAT_ZERO_CTX(dir##_uv_##var[0][off * 2], n * 2); \
1333  SPLAT_ZERO_CTX(dir##_uv_##var[1][off * 2], n * 2); \
1334  } \
1335  } while (0)
1336 
1337  switch (w4) {
1338  case 1: SPLAT_ZERO_YUV(s->above, nnz_ctx, col, 1, h); break;
1339  case 2: SPLAT_ZERO_YUV(s->above, nnz_ctx, col, 2, h); break;
1340  case 4: SPLAT_ZERO_YUV(s->above, nnz_ctx, col, 4, h); break;
1341  case 8: SPLAT_ZERO_YUV(s->above, nnz_ctx, col, 8, h); break;
1342  }
1343  switch (h4) {
1344  case 1: SPLAT_ZERO_YUV(td->left, nnz_ctx, row7, 1, v); break;
1345  case 2: SPLAT_ZERO_YUV(td->left, nnz_ctx, row7, 2, v); break;
1346  case 4: SPLAT_ZERO_YUV(td->left, nnz_ctx, row7, 4, v); break;
1347  case 8: SPLAT_ZERO_YUV(td->left, nnz_ctx, row7, 8, v); break;
1348  }
1349  }
1350 
1351  if (s->pass == 1) {
1352  s->td[0].b++;
1353  s->td[0].block += w4 * h4 * 64 * bytesperpixel;
1354  s->td[0].uvblock[0] += w4 * h4 * 64 * bytesperpixel >> (s->ss_h + s->ss_v);
1355  s->td[0].uvblock[1] += w4 * h4 * 64 * bytesperpixel >> (s->ss_h + s->ss_v);
1356  s->td[0].eob += 4 * w4 * h4;
1357  s->td[0].uveob[0] += 4 * w4 * h4 >> (s->ss_h + s->ss_v);
1358  s->td[0].uveob[1] += 4 * w4 * h4 >> (s->ss_h + s->ss_v);
1359 
1360  return;
1361  }
1362  }
1363 
1364  // emulated overhangs if the stride of the target buffer can't hold. This
1365  // makes it possible to support emu-edge and so on even if we have large block
1366  // overhangs
1367  emu[0] = (col + w4) * 8 * bytesperpixel > f->linesize[0] ||
1368  (row + h4) > s->rows;
1369  emu[1] = ((col + w4) * 8 >> s->ss_h) * bytesperpixel > f->linesize[1] ||
1370  (row + h4) > s->rows;
1371  if (emu[0]) {
1372  td->dst[0] = td->tmp_y;
1373  td->y_stride = 128;
1374  } else {
1375  td->dst[0] = f->data[0] + yoff;
1376  td->y_stride = f->linesize[0];
1377  }
1378  if (emu[1]) {
1379  td->dst[1] = td->tmp_uv[0];
1380  td->dst[2] = td->tmp_uv[1];
1381  td->uv_stride = 128;
1382  } else {
1383  td->dst[1] = f->data[1] + uvoff;
1384  td->dst[2] = f->data[2] + uvoff;
1385  td->uv_stride = f->linesize[1];
1386  }
1387  if (b->intra) {
1388  if (s->s.h.bpp > 8) {
1389  ff_vp9_intra_recon_16bpp(td, yoff, uvoff);
1390  } else {
1391  ff_vp9_intra_recon_8bpp(td, yoff, uvoff);
1392  }
1393  } else {
1394  if (s->s.h.bpp > 8) {
1396  } else {
1398  }
1399  }
1400  if (emu[0]) {
1401  int w = FFMIN(s->cols - col, w4) * 8, h = FFMIN(s->rows - row, h4) * 8, n, o = 0;
1402 
1403  for (n = 0; o < w; n++) {
1404  int bw = 64 >> n;
1405 
1406  av_assert2(n <= 4);
1407  if (w & bw) {
1408  s->dsp.mc[n][0][0][0][0](f->data[0] + yoff + o * bytesperpixel, f->linesize[0],
1409  td->tmp_y + o * bytesperpixel, 128, h, 0, 0);
1410  o += bw;
1411  }
1412  }
1413  }
1414  if (emu[1]) {
1415  int w = FFMIN(s->cols - col, w4) * 8 >> s->ss_h;
1416  int h = FFMIN(s->rows - row, h4) * 8 >> s->ss_v, n, o = 0;
1417 
1418  for (n = s->ss_h; o < w; n++) {
1419  int bw = 64 >> n;
1420 
1421  av_assert2(n <= 4);
1422  if (w & bw) {
1423  s->dsp.mc[n][0][0][0][0](f->data[1] + uvoff + o * bytesperpixel, f->linesize[1],
1424  td->tmp_uv[0] + o * bytesperpixel, 128, h, 0, 0);
1425  s->dsp.mc[n][0][0][0][0](f->data[2] + uvoff + o * bytesperpixel, f->linesize[2],
1426  td->tmp_uv[1] + o * bytesperpixel, 128, h, 0, 0);
1427  o += bw;
1428  }
1429  }
1430  }
1431 
1432  // pick filter level and find edges to apply filter to
1433  if (s->s.h.filter.level &&
1434  (lvl = s->s.h.segmentation.feat[b->seg_id].lflvl[b->intra ? 0 : b->ref[0] + 1]
1435  [b->mode[3] != ZEROMV]) > 0) {
1436  int x_end = FFMIN(s->cols - col, w4), y_end = FFMIN(s->rows - row, h4);
1437  int skip_inter = !b->intra && b->skip, col7 = td->col7, row7 = td->row7;
1438 
1439  setctx_2d(&lflvl->level[row7 * 8 + col7], w4, h4, 8, lvl);
1440  mask_edges(lflvl->mask[0], 0, 0, row7, col7, x_end, y_end, 0, 0, b->tx, skip_inter);
1441  if (s->ss_h || s->ss_v)
1442  mask_edges(lflvl->mask[1], s->ss_h, s->ss_v, row7, col7, x_end, y_end,
1443  s->cols & 1 && col + w4 >= s->cols ? s->cols & 7 : 0,
1444  s->rows & 1 && row + h4 >= s->rows ? s->rows & 7 : 0,
1445  b->uvtx, skip_inter);
1446  }
1447 
1448  if (s->pass == 2) {
1449  s->td[0].b++;
1450  s->td[0].block += w4 * h4 * 64 * bytesperpixel;
1451  s->td[0].uvblock[0] += w4 * h4 * 64 * bytesperpixel >> (s->ss_v + s->ss_h);
1452  s->td[0].uvblock[1] += w4 * h4 * 64 * bytesperpixel >> (s->ss_v + s->ss_h);
1453  s->td[0].eob += 4 * w4 * h4;
1454  s->td[0].uveob[0] += 4 * w4 * h4 >> (s->ss_v + s->ss_h);
1455  s->td[0].uveob[1] += 4 * w4 * h4 >> (s->ss_v + s->ss_h);
1456  }
1457 }
ff_vp9_inter_recon_8bpp
void ff_vp9_inter_recon_8bpp(VP9TileData *td)
Definition: vp9recon.c:646
DECODE_Y_COEF_LOOP
#define DECODE_Y_COEF_LOOP(step, mode_index, v)
td
#define td
Definition: regdef.h:70
ff_vp9_default_kf_uvmode_probs
const uint8_t ff_vp9_default_kf_uvmode_probs[10][9]
Definition: vp9data.c:201
PRED_SWITCHABLE
@ PRED_SWITCHABLE
Definition: vp9shared.h:52
ff_vp9_fill_mv
void ff_vp9_fill_mv(VP9TileData *td, VP9mv *mv, int mode, int sb)
Definition: vp9mvs.c:291
ff_vp9_filter_tree
const int8_t ff_vp9_filter_tree[2][2]
Definition: vp9data.c:220
SET_CTXS
#define SET_CTXS(perf, dir, off, n)
decode_coeffs_b_8bpp
static int decode_coeffs_b_8bpp(VP9TileData *td, int16_t *coef, int n_coeffs, unsigned(*cnt)[6][3], unsigned(*eob)[6][2], const uint8_t(*p)[6][11], int nnz, const int16_t *scan, const int16_t(*nb)[2], const int16_t *band_counts, const int16_t *qmul)
Definition: vp9block.c:924
mv
static const int8_t mv[256][2]
Definition: 4xm.c:80
decode_coeffs
static av_always_inline int decode_coeffs(VP9TileData *td, int is8bitsperpixel)
Definition: vp9block.c:964
PRED_COMPREF
@ PRED_COMPREF
Definition: vp9shared.h:51
AVFrame
This structure describes decoded (raw) audio or video data.
Definition: frame.h:375
w
uint8_t w
Definition: llviddspenc.c:38
BlockPartition
BlockPartition
Definition: vp9shared.h:35
b
#define b
Definition: input.c:41
REF_FRAME_SEGMAP
#define REF_FRAME_SEGMAP
Definition: vp9shared.h:170
AV_WN32A
#define AV_WN32A(p, v)
Definition: intreadwrite.h:536
vp89_rac.h
VP9Filter
Definition: vp9dec.h:78
BS_4x8
@ BS_4x8
Definition: vp9shared.h:94
VPXRangeCoder
Definition: vpx_rac.h:35
ff_thread_await_progress
the pkt_dts and pkt_pts fields in AVFrame will work as usual Restrictions on codec whose streams don t reset across will not work because their bitstreams cannot be decoded in parallel *The contents of buffers must not be read before ff_thread_await_progress() has been called on them. reget_buffer() and buffer age optimizations no longer work. *The contents of buffers must not be written to after ff_thread_report_progress() has been called on them. This includes draw_edges(). Porting codecs to frame threading
FILTER_SWITCHABLE
@ FILTER_SWITCHABLE
Definition: vp9.h:70
VP9Block
Definition: vp9dec.h:84
decode_mode
static void decode_mode(VP9TileData *td)
Definition: vp9block.c:79
bit
#define bit(string, value)
Definition: cbs_mpeg2.c:56
TX_SWITCHABLE
@ TX_SWITCHABLE
Definition: vp9.h:33
ff_vp9_intramode_tree
const int8_t ff_vp9_intramode_tree[9][2]
Definition: vp9data.c:75
decode_coeffs_b_16bpp
static int decode_coeffs_b_16bpp(VP9TileData *td, int16_t *coef, int n_coeffs, unsigned(*cnt)[6][3], unsigned(*eob)[6][2], const uint8_t(*p)[6][11], int nnz, const int16_t *scan, const int16_t(*nb)[2], const int16_t *band_counts, const int16_t *qmul)
Definition: vp9block.c:944
val
static double val(void *priv, double ch)
Definition: aeval.c:78
mask_edges
static av_always_inline void mask_edges(uint8_t(*mask)[8][4], int ss_h, int ss_v, int row_and_7, int col_and_7, int w, int h, int col_end, int row_end, enum TxfmMode tx, int skip_inter)
Definition: vp9block.c:1141
ZEROMV
@ ZEROMV
Definition: vp9shared.h:45
avassert.h
ff_vp9_default_kf_ymode_probs
const uint8_t ff_vp9_default_kf_ymode_probs[10][10][9]
Definition: vp9data.c:87
decode_coeffs_16bpp
static int decode_coeffs_16bpp(VP9TileData *td)
Definition: vp9block.c:1136
mask
static const uint16_t mask[17]
Definition: lzw.c:38
s
#define s(width, name)
Definition: cbs_vp9.c:198
ff_vp9_inter_mode_tree
const int8_t ff_vp9_inter_mode_tree[3][2]
Definition: vp9data.c:214
ff_vp9_scans
const int16_t *const ff_vp9_scans[5][4]
Definition: vp9data.c:600
vp9data.h
AV_WN16A
#define AV_WN16A(p, v)
Definition: intreadwrite.h:532
BS_8x4
@ BS_8x4
Definition: vp9shared.h:93
STORE_COEF
#define STORE_COEF(c, i, v)
if
if(ret)
Definition: filter_design.txt:179
threadframe.h
decode_coeffs_b_generic
static av_always_inline int decode_coeffs_b_generic(VPXRangeCoder *c, int16_t *coef, int n_coeffs, int is_tx32x32, int is8bitsperpixel, int bpp, unsigned(*cnt)[6][3], unsigned(*eob)[6][2], const uint8_t(*p)[6][11], int nnz, const int16_t *scan, const int16_t(*nb)[2], const int16_t *band_counts, const int16_t *qmul)
Definition: vp9block.c:805
MERGE_CTX
#define MERGE_CTX(step, rd)
VP9Context
Definition: vp9dec.h:96
TX_8X8
@ TX_8X8
Definition: vp9.h:29
TX_16X16
@ TX_16X16
Definition: vp9.h:30
ff_vp9_filter_lut
enum FilterMode ff_vp9_filter_lut[3]
Definition: vp9data.c:225
DECODE_UV_COEF_LOOP
#define DECODE_UV_COEF_LOOP(step, v)
ff_vp9_segmentation_tree
const int8_t ff_vp9_segmentation_tree[7][2]
Definition: vp9data.c:65
SPLAT_ZERO_YUV
#define SPLAT_ZERO_YUV(dir, var, off, n, dir2)
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
ff_vp9_intra_recon_16bpp
void ff_vp9_intra_recon_16bpp(VP9TileData *td, ptrdiff_t y_off, ptrdiff_t uv_off)
Definition: vp9recon.c:293
TxfmMode
TxfmMode
Definition: vp9.h:27
vp9.h
DCT_DCT
@ DCT_DCT
Definition: vp9.h:38
f
f
Definition: af_crystalizer.c:121
for
for(k=2;k<=8;++k)
Definition: h264pred_template.c:425
BS_8x8
@ BS_8x8
Definition: vp9shared.h:92
AV_RN64A
#define AV_RN64A(p)
Definition: intreadwrite.h:528
TX_4X4
@ TX_4X4
Definition: vp9.h:28
t8
#define t8
Definition: regdef.h:53
N_BS_SIZES
@ N_BS_SIZES
Definition: vp9shared.h:96
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
SPLAT_CTX
#define SPLAT_CTX(var, val, n)
decode_coeffs_b32_16bpp
static int decode_coeffs_b32_16bpp(VP9TileData *td, int16_t *coef, int n_coeffs, unsigned(*cnt)[6][3], unsigned(*eob)[6][2], const uint8_t(*p)[6][11], int nnz, const int16_t *scan, const int16_t(*nb)[2], const int16_t *band_counts, const int16_t *qmul)
Definition: vp9block.c:954
av_assert2
#define av_assert2(cond)
assert() equivalent, that does lie in speed critical code.
Definition: avassert.h:67
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:255
vpx_rac.h
t4
#define t4
Definition: regdef.h:32
av_assert1
#define av_assert1(cond)
assert() equivalent, that does not lie in speed critical code.
Definition: avassert.h:56
av_always_inline
#define av_always_inline
Definition: attributes.h:49
FFMIN
#define FFMIN(a, b)
Definition: macros.h:49
vpx_rac_get_prob_branchy
static av_always_inline int vpx_rac_get_prob_branchy(VPXRangeCoder *c, int prob)
Definition: vpx_rac.h:99
BlockSize
BlockSize
Definition: vp9shared.h:82
AV_COPY32
#define AV_COPY32(d, s)
Definition: intreadwrite.h:597
decode_coeffs_b32_8bpp
static int decode_coeffs_b32_8bpp(VP9TileData *td, int16_t *coef, int n_coeffs, unsigned(*cnt)[6][3], unsigned(*eob)[6][2], const uint8_t(*p)[6][11], int nnz, const int16_t *scan, const int16_t(*nb)[2], const int16_t *band_counts, const int16_t *qmul)
Definition: vp9block.c:934
AV_RN32A
#define AV_RN32A(p)
Definition: intreadwrite.h:524
stride
#define stride
Definition: h264pred_template.c:537
vp89_rac_get_tree
static av_always_inline int vp89_rac_get_tree(VPXRangeCoder *c, const int8_t(*tree)[2], const uint8_t *probs)
Definition: vp89_rac.h:54
ret
ret
Definition: filter_design.txt:187
pred
static const float pred[4]
Definition: siprdata.h:259
VP9mvrefPair
Definition: vp9shared.h:60
left
Tag MUST be and< 10hcoeff half pel interpolation filter coefficients, hcoeff[0] are the 2 middle coefficients[1] are the next outer ones and so on, resulting in a filter like:...eff[2], hcoeff[1], hcoeff[0], hcoeff[0], hcoeff[1], hcoeff[2] ... the sign of the coefficients is not explicitly stored but alternates after each coeff and coeff[0] is positive, so ...,+,-,+,-,+,+,-,+,-,+,... hcoeff[0] is not explicitly stored but found by subtracting the sum of all stored coefficients with signs from 32 hcoeff[0]=32 - hcoeff[1] - hcoeff[2] - ... a good choice for hcoeff and htaps is htaps=6 hcoeff={40,-10, 2} an alternative which requires more computations at both encoder and decoder side and may or may not be better is htaps=8 hcoeff={42,-14, 6,-2}ref_frames minimum of the number of available reference frames and max_ref_frames for example the first frame after a key frame always has ref_frames=1spatial_decomposition_type wavelet type 0 is a 9/7 symmetric compact integer wavelet 1 is a 5/3 symmetric compact integer wavelet others are reserved stored as delta from last, last is reset to 0 if always_reset||keyframeqlog quality(logarithmic quantizer scale) stored as delta from last, last is reset to 0 if always_reset||keyframemv_scale stored as delta from last, last is reset to 0 if always_reset||keyframe FIXME check that everything works fine if this changes between framesqbias dequantization bias stored as delta from last, last is reset to 0 if always_reset||keyframeblock_max_depth maximum depth of the block tree stored as delta from last, last is reset to 0 if always_reset||keyframequant_table quantization tableHighlevel bitstream structure:==============================--------------------------------------------|Header|--------------------------------------------|------------------------------------|||Block0||||split?||||yes no||||......... intra?||||:Block01 :yes no||||:Block02 :....... ..........||||:Block03 ::y DC ::ref index:||||:Block04 ::cb DC ::motion x :||||......... :cr DC ::motion y :||||....... ..........|||------------------------------------||------------------------------------|||Block1|||...|--------------------------------------------|------------ ------------ ------------|||Y subbands||Cb subbands||Cr subbands||||--- ---||--- ---||--- ---|||||LL0||HL0||||LL0||HL0||||LL0||HL0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||LH0||HH0||||LH0||HH0||||LH0||HH0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HL1||LH1||||HL1||LH1||||HL1||LH1|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HH1||HL2||||HH1||HL2||||HH1||HL2|||||...||...||...|||------------ ------------ ------------|--------------------------------------------Decoding process:=================------------|||Subbands|------------||||------------|Intra DC||||LL0 subband prediction ------------|\ Dequantization ------------------- \||Reference frames|\ IDWT|------- -------|Motion \|||Frame 0||Frame 1||Compensation . OBMC v -------|------- -------|--------------. \------> Frame n output Frame Frame<----------------------------------/|...|------------------- Range Coder:============Binary Range Coder:------------------- The implemented range coder is an adapted version based upon "Range encoding: an algorithm for removing redundancy from a digitised message." by G. N. N. Martin. The symbols encoded by the Snow range coder are bits(0|1). The associated probabilities are not fix but change depending on the symbol mix seen so far. bit seen|new state ---------+----------------------------------------------- 0|256 - state_transition_table[256 - old_state];1|state_transition_table[old_state];state_transition_table={ 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 190, 191, 192, 194, 194, 195, 196, 197, 198, 199, 200, 201, 202, 202, 204, 205, 206, 207, 208, 209, 209, 210, 211, 212, 213, 215, 215, 216, 217, 218, 219, 220, 220, 222, 223, 224, 225, 226, 227, 227, 229, 229, 230, 231, 232, 234, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 248, 0, 0, 0, 0, 0, 0, 0};FIXME Range Coding of integers:------------------------- FIXME Neighboring Blocks:===================left and top are set to the respective blocks unless they are outside of the image in which case they are set to the Null block top-left is set to the top left block unless it is outside of the image in which case it is set to the left block if this block has no larger parent block or it is at the left side of its parent block and the top right block is not outside of the image then the top right block is used for top-right else the top-left block is used Null block y, cb, cr are 128 level, ref, mx and my are 0 Motion Vector Prediction:=========================1. the motion vectors of all the neighboring blocks are scaled to compensate for the difference of reference frames scaled_mv=(mv *(256 *(current_reference+1)/(mv.reference+1))+128)> the median of the scaled left
Definition: snow.txt:386
VP9TileData
Definition: vp9dec.h:167
AV_WN64A
#define AV_WN64A(p, v)
Definition: intreadwrite.h:540
vp89_rac_get
static av_always_inline int vp89_rac_get(VPXRangeCoder *c)
Definition: vp89_rac.h:36
VP9Filter::mask
uint8_t mask[2][2][8][4]
Definition: vp9dec.h:81
ff_vp9_bwh_tab
const uint8_t ff_vp9_bwh_tab[2][N_BS_SIZES][2]
Definition: vp9data.c:25
AV_RN16A
#define AV_RN16A(p)
Definition: intreadwrite.h:520
setctx_2d
static av_always_inline void setctx_2d(uint8_t *ptr, int w, int h, ptrdiff_t stride, int v)
Definition: vp9block.c:33
ff_vp9_decode_block
void ff_vp9_decode_block(VP9TileData *td, int row, int col, VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl, enum BlockPartition bp)
Definition: vp9block.c:1263
NEARESTMV
@ NEARESTMV
Definition: vp9shared.h:43
BlockLevel
BlockLevel
Definition: vp9shared.h:75
vp9dec.h
CUR_FRAME
#define CUR_FRAME
Definition: vp9shared.h:168
TX_32X32
@ TX_32X32
Definition: vp9.h:31
ff_vp9_intra_recon_8bpp
void ff_vp9_intra_recon_8bpp(VP9TileData *td, ptrdiff_t y_off, ptrdiff_t uv_off)
Definition: vp9recon.c:288
h
h
Definition: vp9dsp_template.c:2038
decode_coeffs_8bpp
static int decode_coeffs_8bpp(VP9TileData *td)
Definition: vp9block.c:1131
vpx_rac_get_prob
#define vpx_rac_get_prob
Definition: vpx_rac.h:82
VP9Filter::level
uint8_t level[8 *8]
Definition: vp9dec.h:79
av_log2
int av_log2(unsigned v)
Definition: intmath.c:26
ff_vp9_scans_nb
const int16_t(*const [5][4] ff_vp9_scans_nb)[2]
Definition: vp9data.c:1157
ff_vp9_inter_recon_16bpp
void ff_vp9_inter_recon_16bpp(VP9TileData *td)
Definition: vp9recon.c:651