FFmpeg
vp9block.c
Go to the documentation of this file.
1 /*
2  * VP9 compatible video decoder
3  *
4  * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5  * Copyright (C) 2013 Clément Bœsch <u pkh me>
6  *
7  * This file is part of FFmpeg.
8  *
9  * FFmpeg is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * FFmpeg is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with FFmpeg; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22  */
23 
24 #include "libavutil/avassert.h"
25 
26 #include "threadframe.h"
27 #include "vp56.h"
28 #include "vp9.h"
29 #include "vp9data.h"
30 #include "vp9dec.h"
31 
32 static av_always_inline void setctx_2d(uint8_t *ptr, int w, int h,
33  ptrdiff_t stride, int v)
34 {
35  switch (w) {
36  case 1:
37  do {
38  *ptr = v;
39  ptr += stride;
40  } while (--h);
41  break;
42  case 2: {
43  int v16 = v * 0x0101;
44  do {
45  AV_WN16A(ptr, v16);
46  ptr += stride;
47  } while (--h);
48  break;
49  }
50  case 4: {
51  uint32_t v32 = v * 0x01010101;
52  do {
53  AV_WN32A(ptr, v32);
54  ptr += stride;
55  } while (--h);
56  break;
57  }
58  case 8: {
59 #if HAVE_FAST_64BIT
60  uint64_t v64 = v * 0x0101010101010101ULL;
61  do {
62  AV_WN64A(ptr, v64);
63  ptr += stride;
64  } while (--h);
65 #else
66  uint32_t v32 = v * 0x01010101;
67  do {
68  AV_WN32A(ptr, v32);
69  AV_WN32A(ptr + 4, v32);
70  ptr += stride;
71  } while (--h);
72 #endif
73  break;
74  }
75  }
76 }
77 
78 static void decode_mode(VP9TileData *td)
79 {
80  static const uint8_t left_ctx[N_BS_SIZES] = {
81  0x0, 0x8, 0x0, 0x8, 0xc, 0x8, 0xc, 0xe, 0xc, 0xe, 0xf, 0xe, 0xf
82  };
83  static const uint8_t above_ctx[N_BS_SIZES] = {
84  0x0, 0x0, 0x8, 0x8, 0x8, 0xc, 0xc, 0xc, 0xe, 0xe, 0xe, 0xf, 0xf
85  };
86  static const uint8_t max_tx_for_bl_bp[N_BS_SIZES] = {
89  };
90  VP9Context *s = td->s;
91  VP9Block *b = td->b;
92  int row = td->row, col = td->col, row7 = td->row7;
93  enum TxfmMode max_tx = max_tx_for_bl_bp[b->bs];
94  int bw4 = ff_vp9_bwh_tab[1][b->bs][0], w4 = FFMIN(s->cols - col, bw4);
95  int bh4 = ff_vp9_bwh_tab[1][b->bs][1], h4 = FFMIN(s->rows - row, bh4), y;
96  int have_a = row > 0, have_l = col > td->tile_col_start;
97  int vref, filter_id;
98 
99  if (!s->s.h.segmentation.enabled) {
100  b->seg_id = 0;
101  } else if (s->s.h.keyframe || s->s.h.intraonly) {
102  b->seg_id = !s->s.h.segmentation.update_map ? 0 :
103  vp8_rac_get_tree(td->c, ff_vp9_segmentation_tree, s->s.h.segmentation.prob);
104  } else if (!s->s.h.segmentation.update_map ||
105  (s->s.h.segmentation.temporal &&
107  s->s.h.segmentation.pred_prob[s->above_segpred_ctx[col] +
108  td->left_segpred_ctx[row7]]))) {
109  if (!s->s.h.errorres && s->s.frames[REF_FRAME_SEGMAP].segmentation_map) {
110  int pred = 8, x;
111  uint8_t *refsegmap = s->s.frames[REF_FRAME_SEGMAP].segmentation_map;
112 
113  if (!s->s.frames[REF_FRAME_SEGMAP].uses_2pass)
114  ff_thread_await_progress(&s->s.frames[REF_FRAME_SEGMAP].tf, row >> 3, 0);
115  for (y = 0; y < h4; y++) {
116  int idx_base = (y + row) * 8 * s->sb_cols + col;
117  for (x = 0; x < w4; x++)
118  pred = FFMIN(pred, refsegmap[idx_base + x]);
119  }
120  av_assert1(pred < 8);
121  b->seg_id = pred;
122  } else {
123  b->seg_id = 0;
124  }
125 
126  memset(&s->above_segpred_ctx[col], 1, w4);
127  memset(&td->left_segpred_ctx[row7], 1, h4);
128  } else {
130  s->s.h.segmentation.prob);
131 
132  memset(&s->above_segpred_ctx[col], 0, w4);
133  memset(&td->left_segpred_ctx[row7], 0, h4);
134  }
135  if (s->s.h.segmentation.enabled &&
136  (s->s.h.segmentation.update_map || s->s.h.keyframe || s->s.h.intraonly)) {
137  setctx_2d(&s->s.frames[CUR_FRAME].segmentation_map[row * 8 * s->sb_cols + col],
138  bw4, bh4, 8 * s->sb_cols, b->seg_id);
139  }
140 
141  b->skip = s->s.h.segmentation.enabled &&
142  s->s.h.segmentation.feat[b->seg_id].skip_enabled;
143  if (!b->skip) {
144  int c = td->left_skip_ctx[row7] + s->above_skip_ctx[col];
145  b->skip = vp56_rac_get_prob(td->c, s->prob.p.skip[c]);
146  td->counts.skip[c][b->skip]++;
147  }
148 
149  if (s->s.h.keyframe || s->s.h.intraonly) {
150  b->intra = 1;
151  } else if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[b->seg_id].ref_enabled) {
152  b->intra = !s->s.h.segmentation.feat[b->seg_id].ref_val;
153  } else {
154  int c, bit;
155 
156  if (have_a && have_l) {
157  c = s->above_intra_ctx[col] + td->left_intra_ctx[row7];
158  c += (c == 2);
159  } else {
160  c = have_a ? 2 * s->above_intra_ctx[col] :
161  have_l ? 2 * td->left_intra_ctx[row7] : 0;
162  }
163  bit = vp56_rac_get_prob(td->c, s->prob.p.intra[c]);
164  td->counts.intra[c][bit]++;
165  b->intra = !bit;
166  }
167 
168  if ((b->intra || !b->skip) && s->s.h.txfmmode == TX_SWITCHABLE) {
169  int c;
170  if (have_a) {
171  if (have_l) {
172  c = (s->above_skip_ctx[col] ? max_tx :
173  s->above_txfm_ctx[col]) +
174  (td->left_skip_ctx[row7] ? max_tx :
175  td->left_txfm_ctx[row7]) > max_tx;
176  } else {
177  c = s->above_skip_ctx[col] ? 1 :
178  (s->above_txfm_ctx[col] * 2 > max_tx);
179  }
180  } else if (have_l) {
181  c = td->left_skip_ctx[row7] ? 1 :
182  (td->left_txfm_ctx[row7] * 2 > max_tx);
183  } else {
184  c = 1;
185  }
186  switch (max_tx) {
187  case TX_32X32:
188  b->tx = vp56_rac_get_prob(td->c, s->prob.p.tx32p[c][0]);
189  if (b->tx) {
190  b->tx += vp56_rac_get_prob(td->c, s->prob.p.tx32p[c][1]);
191  if (b->tx == 2)
192  b->tx += vp56_rac_get_prob(td->c, s->prob.p.tx32p[c][2]);
193  }
194  td->counts.tx32p[c][b->tx]++;
195  break;
196  case TX_16X16:
197  b->tx = vp56_rac_get_prob(td->c, s->prob.p.tx16p[c][0]);
198  if (b->tx)
199  b->tx += vp56_rac_get_prob(td->c, s->prob.p.tx16p[c][1]);
200  td->counts.tx16p[c][b->tx]++;
201  break;
202  case TX_8X8:
203  b->tx = vp56_rac_get_prob(td->c, s->prob.p.tx8p[c]);
204  td->counts.tx8p[c][b->tx]++;
205  break;
206  case TX_4X4:
207  b->tx = TX_4X4;
208  break;
209  }
210  } else {
211  b->tx = FFMIN(max_tx, s->s.h.txfmmode);
212  }
213 
214  if (s->s.h.keyframe || s->s.h.intraonly) {
215  uint8_t *a = &s->above_mode_ctx[col * 2];
216  uint8_t *l = &td->left_mode_ctx[(row7) << 1];
217 
218  b->comp = 0;
219  if (b->bs > BS_8x8) {
220  // FIXME the memory storage intermediates here aren't really
221  // necessary, they're just there to make the code slightly
222  // simpler for now
223  b->mode[0] =
225  ff_vp9_default_kf_ymode_probs[a[0]][l[0]]);
226  if (b->bs != BS_8x4) {
228  ff_vp9_default_kf_ymode_probs[a[1]][b->mode[0]]);
229  l[0] =
230  a[1] = b->mode[1];
231  } else {
232  l[0] =
233  a[1] =
234  b->mode[1] = b->mode[0];
235  }
236  if (b->bs != BS_4x8) {
237  b->mode[2] =
239  ff_vp9_default_kf_ymode_probs[a[0]][l[1]]);
240  if (b->bs != BS_8x4) {
242  ff_vp9_default_kf_ymode_probs[a[1]][b->mode[2]]);
243  l[1] =
244  a[1] = b->mode[3];
245  } else {
246  l[1] =
247  a[1] =
248  b->mode[3] = b->mode[2];
249  }
250  } else {
251  b->mode[2] = b->mode[0];
252  l[1] =
253  a[1] =
254  b->mode[3] = b->mode[1];
255  }
256  } else {
259  b->mode[3] =
260  b->mode[2] =
261  b->mode[1] = b->mode[0];
262  // FIXME this can probably be optimized
263  memset(a, b->mode[0], ff_vp9_bwh_tab[0][b->bs][0]);
264  memset(l, b->mode[0], ff_vp9_bwh_tab[0][b->bs][1]);
265  }
268  } else if (b->intra) {
269  b->comp = 0;
270  if (b->bs > BS_8x8) {
272  s->prob.p.y_mode[0]);
273  td->counts.y_mode[0][b->mode[0]]++;
274  if (b->bs != BS_8x4) {
276  s->prob.p.y_mode[0]);
277  td->counts.y_mode[0][b->mode[1]]++;
278  } else {
279  b->mode[1] = b->mode[0];
280  }
281  if (b->bs != BS_4x8) {
283  s->prob.p.y_mode[0]);
284  td->counts.y_mode[0][b->mode[2]]++;
285  if (b->bs != BS_8x4) {
287  s->prob.p.y_mode[0]);
288  td->counts.y_mode[0][b->mode[3]]++;
289  } else {
290  b->mode[3] = b->mode[2];
291  }
292  } else {
293  b->mode[2] = b->mode[0];
294  b->mode[3] = b->mode[1];
295  }
296  } else {
297  static const uint8_t size_group[10] = {
298  3, 3, 3, 3, 2, 2, 2, 1, 1, 1
299  };
300  int sz = size_group[b->bs];
301 
303  s->prob.p.y_mode[sz]);
304  b->mode[1] =
305  b->mode[2] =
306  b->mode[3] = b->mode[0];
307  td->counts.y_mode[sz][b->mode[3]]++;
308  }
310  s->prob.p.uv_mode[b->mode[3]]);
311  td->counts.uv_mode[b->mode[3]][b->uvmode]++;
312  } else {
313  static const uint8_t inter_mode_ctx_lut[14][14] = {
314  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
315  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
316  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
317  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
318  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
319  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
320  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
321  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
322  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
323  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
324  { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
325  { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
326  { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 0, 3 },
327  { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 4 },
328  };
329 
330  if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[b->seg_id].ref_enabled) {
331  av_assert2(s->s.h.segmentation.feat[b->seg_id].ref_val != 0);
332  b->comp = 0;
333  b->ref[0] = s->s.h.segmentation.feat[b->seg_id].ref_val - 1;
334  } else {
335  // read comp_pred flag
336  if (s->s.h.comppredmode != PRED_SWITCHABLE) {
337  b->comp = s->s.h.comppredmode == PRED_COMPREF;
338  } else {
339  int c;
340 
341  // FIXME add intra as ref=0xff (or -1) to make these easier?
342  if (have_a) {
343  if (have_l) {
344  if (s->above_comp_ctx[col] && td->left_comp_ctx[row7]) {
345  c = 4;
346  } else if (s->above_comp_ctx[col]) {
347  c = 2 + (td->left_intra_ctx[row7] ||
348  td->left_ref_ctx[row7] == s->s.h.fixcompref);
349  } else if (td->left_comp_ctx[row7]) {
350  c = 2 + (s->above_intra_ctx[col] ||
351  s->above_ref_ctx[col] == s->s.h.fixcompref);
352  } else {
353  c = (!s->above_intra_ctx[col] &&
354  s->above_ref_ctx[col] == s->s.h.fixcompref) ^
355  (!td->left_intra_ctx[row7] &&
356  td->left_ref_ctx[row & 7] == s->s.h.fixcompref);
357  }
358  } else {
359  c = s->above_comp_ctx[col] ? 3 :
360  (!s->above_intra_ctx[col] && s->above_ref_ctx[col] == s->s.h.fixcompref);
361  }
362  } else if (have_l) {
363  c = td->left_comp_ctx[row7] ? 3 :
364  (!td->left_intra_ctx[row7] && td->left_ref_ctx[row7] == s->s.h.fixcompref);
365  } else {
366  c = 1;
367  }
368  b->comp = vp56_rac_get_prob(td->c, s->prob.p.comp[c]);
369  td->counts.comp[c][b->comp]++;
370  }
371 
372  // read actual references
373  // FIXME probably cache a few variables here to prevent repetitive
374  // memory accesses below
375  if (b->comp) { /* two references */
376  int fix_idx = s->s.h.signbias[s->s.h.fixcompref], var_idx = !fix_idx, c, bit;
377 
378  b->ref[fix_idx] = s->s.h.fixcompref;
379  // FIXME can this codeblob be replaced by some sort of LUT?
380  if (have_a) {
381  if (have_l) {
382  if (s->above_intra_ctx[col]) {
383  if (td->left_intra_ctx[row7]) {
384  c = 2;
385  } else {
386  c = 1 + 2 * (td->left_ref_ctx[row7] != s->s.h.varcompref[1]);
387  }
388  } else if (td->left_intra_ctx[row7]) {
389  c = 1 + 2 * (s->above_ref_ctx[col] != s->s.h.varcompref[1]);
390  } else {
391  int refl = td->left_ref_ctx[row7], refa = s->above_ref_ctx[col];
392 
393  if (refl == refa && refa == s->s.h.varcompref[1]) {
394  c = 0;
395  } else if (!td->left_comp_ctx[row7] && !s->above_comp_ctx[col]) {
396  if ((refa == s->s.h.fixcompref && refl == s->s.h.varcompref[0]) ||
397  (refl == s->s.h.fixcompref && refa == s->s.h.varcompref[0])) {
398  c = 4;
399  } else {
400  c = (refa == refl) ? 3 : 1;
401  }
402  } else if (!td->left_comp_ctx[row7]) {
403  if (refa == s->s.h.varcompref[1] && refl != s->s.h.varcompref[1]) {
404  c = 1;
405  } else {
406  c = (refl == s->s.h.varcompref[1] &&
407  refa != s->s.h.varcompref[1]) ? 2 : 4;
408  }
409  } else if (!s->above_comp_ctx[col]) {
410  if (refl == s->s.h.varcompref[1] && refa != s->s.h.varcompref[1]) {
411  c = 1;
412  } else {
413  c = (refa == s->s.h.varcompref[1] &&
414  refl != s->s.h.varcompref[1]) ? 2 : 4;
415  }
416  } else {
417  c = (refl == refa) ? 4 : 2;
418  }
419  }
420  } else {
421  if (s->above_intra_ctx[col]) {
422  c = 2;
423  } else if (s->above_comp_ctx[col]) {
424  c = 4 * (s->above_ref_ctx[col] != s->s.h.varcompref[1]);
425  } else {
426  c = 3 * (s->above_ref_ctx[col] != s->s.h.varcompref[1]);
427  }
428  }
429  } else if (have_l) {
430  if (td->left_intra_ctx[row7]) {
431  c = 2;
432  } else if (td->left_comp_ctx[row7]) {
433  c = 4 * (td->left_ref_ctx[row7] != s->s.h.varcompref[1]);
434  } else {
435  c = 3 * (td->left_ref_ctx[row7] != s->s.h.varcompref[1]);
436  }
437  } else {
438  c = 2;
439  }
440  bit = vp56_rac_get_prob(td->c, s->prob.p.comp_ref[c]);
441  b->ref[var_idx] = s->s.h.varcompref[bit];
442  td->counts.comp_ref[c][bit]++;
443  } else /* single reference */ {
444  int bit, c;
445 
446  if (have_a && !s->above_intra_ctx[col]) {
447  if (have_l && !td->left_intra_ctx[row7]) {
448  if (td->left_comp_ctx[row7]) {
449  if (s->above_comp_ctx[col]) {
450  c = 1 + (!s->s.h.fixcompref || !td->left_ref_ctx[row7] ||
451  !s->above_ref_ctx[col]);
452  } else {
453  c = (3 * !s->above_ref_ctx[col]) +
454  (!s->s.h.fixcompref || !td->left_ref_ctx[row7]);
455  }
456  } else if (s->above_comp_ctx[col]) {
457  c = (3 * !td->left_ref_ctx[row7]) +
458  (!s->s.h.fixcompref || !s->above_ref_ctx[col]);
459  } else {
460  c = 2 * !td->left_ref_ctx[row7] + 2 * !s->above_ref_ctx[col];
461  }
462  } else if (s->above_intra_ctx[col]) {
463  c = 2;
464  } else if (s->above_comp_ctx[col]) {
465  c = 1 + (!s->s.h.fixcompref || !s->above_ref_ctx[col]);
466  } else {
467  c = 4 * (!s->above_ref_ctx[col]);
468  }
469  } else if (have_l && !td->left_intra_ctx[row7]) {
470  if (td->left_intra_ctx[row7]) {
471  c = 2;
472  } else if (td->left_comp_ctx[row7]) {
473  c = 1 + (!s->s.h.fixcompref || !td->left_ref_ctx[row7]);
474  } else {
475  c = 4 * (!td->left_ref_ctx[row7]);
476  }
477  } else {
478  c = 2;
479  }
480  bit = vp56_rac_get_prob(td->c, s->prob.p.single_ref[c][0]);
481  td->counts.single_ref[c][0][bit]++;
482  if (!bit) {
483  b->ref[0] = 0;
484  } else {
485  // FIXME can this codeblob be replaced by some sort of LUT?
486  if (have_a) {
487  if (have_l) {
488  if (td->left_intra_ctx[row7]) {
489  if (s->above_intra_ctx[col]) {
490  c = 2;
491  } else if (s->above_comp_ctx[col]) {
492  c = 1 + 2 * (s->s.h.fixcompref == 1 ||
493  s->above_ref_ctx[col] == 1);
494  } else if (!s->above_ref_ctx[col]) {
495  c = 3;
496  } else {
497  c = 4 * (s->above_ref_ctx[col] == 1);
498  }
499  } else if (s->above_intra_ctx[col]) {
500  if (td->left_intra_ctx[row7]) {
501  c = 2;
502  } else if (td->left_comp_ctx[row7]) {
503  c = 1 + 2 * (s->s.h.fixcompref == 1 ||
504  td->left_ref_ctx[row7] == 1);
505  } else if (!td->left_ref_ctx[row7]) {
506  c = 3;
507  } else {
508  c = 4 * (td->left_ref_ctx[row7] == 1);
509  }
510  } else if (s->above_comp_ctx[col]) {
511  if (td->left_comp_ctx[row7]) {
512  if (td->left_ref_ctx[row7] == s->above_ref_ctx[col]) {
513  c = 3 * (s->s.h.fixcompref == 1 ||
514  td->left_ref_ctx[row7] == 1);
515  } else {
516  c = 2;
517  }
518  } else if (!td->left_ref_ctx[row7]) {
519  c = 1 + 2 * (s->s.h.fixcompref == 1 ||
520  s->above_ref_ctx[col] == 1);
521  } else {
522  c = 3 * (td->left_ref_ctx[row7] == 1) +
523  (s->s.h.fixcompref == 1 || s->above_ref_ctx[col] == 1);
524  }
525  } else if (td->left_comp_ctx[row7]) {
526  if (!s->above_ref_ctx[col]) {
527  c = 1 + 2 * (s->s.h.fixcompref == 1 ||
528  td->left_ref_ctx[row7] == 1);
529  } else {
530  c = 3 * (s->above_ref_ctx[col] == 1) +
531  (s->s.h.fixcompref == 1 || td->left_ref_ctx[row7] == 1);
532  }
533  } else if (!s->above_ref_ctx[col]) {
534  if (!td->left_ref_ctx[row7]) {
535  c = 3;
536  } else {
537  c = 4 * (td->left_ref_ctx[row7] == 1);
538  }
539  } else if (!td->left_ref_ctx[row7]) {
540  c = 4 * (s->above_ref_ctx[col] == 1);
541  } else {
542  c = 2 * (td->left_ref_ctx[row7] == 1) +
543  2 * (s->above_ref_ctx[col] == 1);
544  }
545  } else {
546  if (s->above_intra_ctx[col] ||
547  (!s->above_comp_ctx[col] && !s->above_ref_ctx[col])) {
548  c = 2;
549  } else if (s->above_comp_ctx[col]) {
550  c = 3 * (s->s.h.fixcompref == 1 || s->above_ref_ctx[col] == 1);
551  } else {
552  c = 4 * (s->above_ref_ctx[col] == 1);
553  }
554  }
555  } else if (have_l) {
556  if (td->left_intra_ctx[row7] ||
557  (!td->left_comp_ctx[row7] && !td->left_ref_ctx[row7])) {
558  c = 2;
559  } else if (td->left_comp_ctx[row7]) {
560  c = 3 * (s->s.h.fixcompref == 1 || td->left_ref_ctx[row7] == 1);
561  } else {
562  c = 4 * (td->left_ref_ctx[row7] == 1);
563  }
564  } else {
565  c = 2;
566  }
567  bit = vp56_rac_get_prob(td->c, s->prob.p.single_ref[c][1]);
568  td->counts.single_ref[c][1][bit]++;
569  b->ref[0] = 1 + bit;
570  }
571  }
572  }
573 
574  if (b->bs <= BS_8x8) {
575  if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[b->seg_id].skip_enabled) {
576  b->mode[0] =
577  b->mode[1] =
578  b->mode[2] =
579  b->mode[3] = ZEROMV;
580  } else {
581  static const uint8_t off[10] = {
582  3, 0, 0, 1, 0, 0, 0, 0, 0, 0
583  };
584 
585  // FIXME this needs to use the LUT tables from find_ref_mvs
586  // because not all are -1,0/0,-1
587  int c = inter_mode_ctx_lut[s->above_mode_ctx[col + off[b->bs]]]
588  [td->left_mode_ctx[row7 + off[b->bs]]];
589 
591  s->prob.p.mv_mode[c]);
592  b->mode[1] =
593  b->mode[2] =
594  b->mode[3] = b->mode[0];
595  td->counts.mv_mode[c][b->mode[0] - 10]++;
596  }
597  }
598 
599  if (s->s.h.filtermode == FILTER_SWITCHABLE) {
600  int c;
601 
602  if (have_a && s->above_mode_ctx[col] >= NEARESTMV) {
603  if (have_l && td->left_mode_ctx[row7] >= NEARESTMV) {
604  c = s->above_filter_ctx[col] == td->left_filter_ctx[row7] ?
605  td->left_filter_ctx[row7] : 3;
606  } else {
607  c = s->above_filter_ctx[col];
608  }
609  } else if (have_l && td->left_mode_ctx[row7] >= NEARESTMV) {
610  c = td->left_filter_ctx[row7];
611  } else {
612  c = 3;
613  }
614 
615  filter_id = vp8_rac_get_tree(td->c, ff_vp9_filter_tree,
616  s->prob.p.filter[c]);
617  td->counts.filter[c][filter_id]++;
618  b->filter = ff_vp9_filter_lut[filter_id];
619  } else {
620  b->filter = s->s.h.filtermode;
621  }
622 
623  if (b->bs > BS_8x8) {
624  int c = inter_mode_ctx_lut[s->above_mode_ctx[col]][td->left_mode_ctx[row7]];
625 
627  s->prob.p.mv_mode[c]);
628  td->counts.mv_mode[c][b->mode[0] - 10]++;
629  ff_vp9_fill_mv(td, b->mv[0], b->mode[0], 0);
630 
631  if (b->bs != BS_8x4) {
633  s->prob.p.mv_mode[c]);
634  td->counts.mv_mode[c][b->mode[1] - 10]++;
635  ff_vp9_fill_mv(td, b->mv[1], b->mode[1], 1);
636  } else {
637  b->mode[1] = b->mode[0];
638  AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
639  AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
640  }
641 
642  if (b->bs != BS_4x8) {
644  s->prob.p.mv_mode[c]);
645  td->counts.mv_mode[c][b->mode[2] - 10]++;
646  ff_vp9_fill_mv(td, b->mv[2], b->mode[2], 2);
647 
648  if (b->bs != BS_8x4) {
650  s->prob.p.mv_mode[c]);
651  td->counts.mv_mode[c][b->mode[3] - 10]++;
652  ff_vp9_fill_mv(td, b->mv[3], b->mode[3], 3);
653  } else {
654  b->mode[3] = b->mode[2];
655  AV_COPY32(&b->mv[3][0], &b->mv[2][0]);
656  AV_COPY32(&b->mv[3][1], &b->mv[2][1]);
657  }
658  } else {
659  b->mode[2] = b->mode[0];
660  AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
661  AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
662  b->mode[3] = b->mode[1];
663  AV_COPY32(&b->mv[3][0], &b->mv[1][0]);
664  AV_COPY32(&b->mv[3][1], &b->mv[1][1]);
665  }
666  } else {
667  ff_vp9_fill_mv(td, b->mv[0], b->mode[0], -1);
668  AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
669  AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
670  AV_COPY32(&b->mv[3][0], &b->mv[0][0]);
671  AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
672  AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
673  AV_COPY32(&b->mv[3][1], &b->mv[0][1]);
674  }
675 
676  vref = b->ref[b->comp ? s->s.h.signbias[s->s.h.varcompref[0]] : 0];
677  }
678 
679 #if HAVE_FAST_64BIT
680 #define SPLAT_CTX(var, val, n) \
681  switch (n) { \
682  case 1: var = val; break; \
683  case 2: AV_WN16A(&var, val * 0x0101); break; \
684  case 4: AV_WN32A(&var, val * 0x01010101); break; \
685  case 8: AV_WN64A(&var, val * 0x0101010101010101ULL); break; \
686  case 16: { \
687  uint64_t v64 = val * 0x0101010101010101ULL; \
688  AV_WN64A( &var, v64); \
689  AV_WN64A(&((uint8_t *) &var)[8], v64); \
690  break; \
691  } \
692  }
693 #else
694 #define SPLAT_CTX(var, val, n) \
695  switch (n) { \
696  case 1: var = val; break; \
697  case 2: AV_WN16A(&var, val * 0x0101); break; \
698  case 4: AV_WN32A(&var, val * 0x01010101); break; \
699  case 8: { \
700  uint32_t v32 = val * 0x01010101; \
701  AV_WN32A( &var, v32); \
702  AV_WN32A(&((uint8_t *) &var)[4], v32); \
703  break; \
704  } \
705  case 16: { \
706  uint32_t v32 = val * 0x01010101; \
707  AV_WN32A( &var, v32); \
708  AV_WN32A(&((uint8_t *) &var)[4], v32); \
709  AV_WN32A(&((uint8_t *) &var)[8], v32); \
710  AV_WN32A(&((uint8_t *) &var)[12], v32); \
711  break; \
712  } \
713  }
714 #endif
715 
716  switch (ff_vp9_bwh_tab[1][b->bs][0]) {
717 #define SET_CTXS(perf, dir, off, n) \
718  do { \
719  SPLAT_CTX(perf->dir##_skip_ctx[off], b->skip, n); \
720  SPLAT_CTX(perf->dir##_txfm_ctx[off], b->tx, n); \
721  SPLAT_CTX(perf->dir##_partition_ctx[off], dir##_ctx[b->bs], n); \
722  if (!s->s.h.keyframe && !s->s.h.intraonly) { \
723  SPLAT_CTX(perf->dir##_intra_ctx[off], b->intra, n); \
724  SPLAT_CTX(perf->dir##_comp_ctx[off], b->comp, n); \
725  SPLAT_CTX(perf->dir##_mode_ctx[off], b->mode[3], n); \
726  if (!b->intra) { \
727  SPLAT_CTX(perf->dir##_ref_ctx[off], vref, n); \
728  if (s->s.h.filtermode == FILTER_SWITCHABLE) { \
729  SPLAT_CTX(perf->dir##_filter_ctx[off], filter_id, n); \
730  } \
731  } \
732  } \
733  } while (0)
734  case 1: SET_CTXS(s, above, col, 1); break;
735  case 2: SET_CTXS(s, above, col, 2); break;
736  case 4: SET_CTXS(s, above, col, 4); break;
737  case 8: SET_CTXS(s, above, col, 8); break;
738  }
739  switch (ff_vp9_bwh_tab[1][b->bs][1]) {
740  case 1: SET_CTXS(td, left, row7, 1); break;
741  case 2: SET_CTXS(td, left, row7, 2); break;
742  case 4: SET_CTXS(td, left, row7, 4); break;
743  case 8: SET_CTXS(td, left, row7, 8); break;
744  }
745 #undef SPLAT_CTX
746 #undef SET_CTXS
747 
748  if (!s->s.h.keyframe && !s->s.h.intraonly) {
749  if (b->bs > BS_8x8) {
750  int mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
751 
752  AV_COPY32(&td->left_mv_ctx[row7 * 2 + 0][0], &b->mv[1][0]);
753  AV_COPY32(&td->left_mv_ctx[row7 * 2 + 0][1], &b->mv[1][1]);
754  AV_WN32A(&td->left_mv_ctx[row7 * 2 + 1][0], mv0);
755  AV_WN32A(&td->left_mv_ctx[row7 * 2 + 1][1], mv1);
756  AV_COPY32(&s->above_mv_ctx[col * 2 + 0][0], &b->mv[2][0]);
757  AV_COPY32(&s->above_mv_ctx[col * 2 + 0][1], &b->mv[2][1]);
758  AV_WN32A(&s->above_mv_ctx[col * 2 + 1][0], mv0);
759  AV_WN32A(&s->above_mv_ctx[col * 2 + 1][1], mv1);
760  } else {
761  int n, mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
762 
763  for (n = 0; n < w4 * 2; n++) {
764  AV_WN32A(&s->above_mv_ctx[col * 2 + n][0], mv0);
765  AV_WN32A(&s->above_mv_ctx[col * 2 + n][1], mv1);
766  }
767  for (n = 0; n < h4 * 2; n++) {
768  AV_WN32A(&td->left_mv_ctx[row7 * 2 + n][0], mv0);
769  AV_WN32A(&td->left_mv_ctx[row7 * 2 + n][1], mv1);
770  }
771  }
772  }
773 
774  // FIXME kinda ugly
775  for (y = 0; y < h4; y++) {
776  int x, o = (row + y) * s->sb_cols * 8 + col;
777  VP9mvrefPair *mv = &s->s.frames[CUR_FRAME].mv[o];
778 
779  if (b->intra) {
780  for (x = 0; x < w4; x++) {
781  mv[x].ref[0] =
782  mv[x].ref[1] = -1;
783  }
784  } else if (b->comp) {
785  for (x = 0; x < w4; x++) {
786  mv[x].ref[0] = b->ref[0];
787  mv[x].ref[1] = b->ref[1];
788  AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
789  AV_COPY32(&mv[x].mv[1], &b->mv[3][1]);
790  }
791  } else {
792  for (x = 0; x < w4; x++) {
793  mv[x].ref[0] = b->ref[0];
794  mv[x].ref[1] = -1;
795  AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
796  }
797  }
798  }
799 }
800 
801 // FIXME merge cnt/eob arguments?
802 static av_always_inline int
803 decode_coeffs_b_generic(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
804  int is_tx32x32, int is8bitsperpixel, int bpp, unsigned (*cnt)[6][3],
805  unsigned (*eob)[6][2], uint8_t (*p)[6][11],
806  int nnz, const int16_t *scan, const int16_t (*nb)[2],
807  const int16_t *band_counts, int16_t *qmul)
808 {
809  int i = 0, band = 0, band_left = band_counts[band];
810  const uint8_t *tp = p[0][nnz];
811  uint8_t cache[1024];
812 
813  do {
814  int val, rc;
815 
816  val = vp56_rac_get_prob_branchy(c, tp[0]); // eob
817  eob[band][nnz][val]++;
818  if (!val)
819  break;
820 
821 skip_eob:
822  if (!vp56_rac_get_prob_branchy(c, tp[1])) { // zero
823  cnt[band][nnz][0]++;
824  if (!--band_left)
825  band_left = band_counts[++band];
826  cache[scan[i]] = 0;
827  nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
828  tp = p[band][nnz];
829  if (++i == n_coeffs)
830  break; //invalid input; blocks should end with EOB
831  goto skip_eob;
832  }
833 
834  rc = scan[i];
835  if (!vp56_rac_get_prob_branchy(c, tp[2])) { // one
836  cnt[band][nnz][1]++;
837  val = 1;
838  cache[rc] = 1;
839  } else {
840  cnt[band][nnz][2]++;
841  if (!vp56_rac_get_prob_branchy(c, tp[3])) { // 2, 3, 4
842  if (!vp56_rac_get_prob_branchy(c, tp[4])) {
843  cache[rc] = val = 2;
844  } else {
845  val = 3 + vp56_rac_get_prob(c, tp[5]);
846  cache[rc] = 3;
847  }
848  } else if (!vp56_rac_get_prob_branchy(c, tp[6])) { // cat1/2
849  cache[rc] = 4;
850  if (!vp56_rac_get_prob_branchy(c, tp[7])) {
851  val = vp56_rac_get_prob(c, 159) + 5;
852  } else {
853  val = (vp56_rac_get_prob(c, 165) << 1) + 7;
854  val += vp56_rac_get_prob(c, 145);
855  }
856  } else { // cat 3-6
857  cache[rc] = 5;
858  if (!vp56_rac_get_prob_branchy(c, tp[8])) {
859  if (!vp56_rac_get_prob_branchy(c, tp[9])) {
860  val = 11 + (vp56_rac_get_prob(c, 173) << 2);
861  val += (vp56_rac_get_prob(c, 148) << 1);
862  val += vp56_rac_get_prob(c, 140);
863  } else {
864  val = 19 + (vp56_rac_get_prob(c, 176) << 3);
865  val += (vp56_rac_get_prob(c, 155) << 2);
866  val += (vp56_rac_get_prob(c, 140) << 1);
867  val += vp56_rac_get_prob(c, 135);
868  }
869  } else if (!vp56_rac_get_prob_branchy(c, tp[10])) {
870  val = (vp56_rac_get_prob(c, 180) << 4) + 35;
871  val += (vp56_rac_get_prob(c, 157) << 3);
872  val += (vp56_rac_get_prob(c, 141) << 2);
873  val += (vp56_rac_get_prob(c, 134) << 1);
874  val += vp56_rac_get_prob(c, 130);
875  } else {
876  val = 67;
877  if (!is8bitsperpixel) {
878  if (bpp == 12) {
879  val += vp56_rac_get_prob(c, 255) << 17;
880  val += vp56_rac_get_prob(c, 255) << 16;
881  }
882  val += (vp56_rac_get_prob(c, 255) << 15);
883  val += (vp56_rac_get_prob(c, 255) << 14);
884  }
885  val += (vp56_rac_get_prob(c, 254) << 13);
886  val += (vp56_rac_get_prob(c, 254) << 12);
887  val += (vp56_rac_get_prob(c, 254) << 11);
888  val += (vp56_rac_get_prob(c, 252) << 10);
889  val += (vp56_rac_get_prob(c, 249) << 9);
890  val += (vp56_rac_get_prob(c, 243) << 8);
891  val += (vp56_rac_get_prob(c, 230) << 7);
892  val += (vp56_rac_get_prob(c, 196) << 6);
893  val += (vp56_rac_get_prob(c, 177) << 5);
894  val += (vp56_rac_get_prob(c, 153) << 4);
895  val += (vp56_rac_get_prob(c, 140) << 3);
896  val += (vp56_rac_get_prob(c, 133) << 2);
897  val += (vp56_rac_get_prob(c, 130) << 1);
898  val += vp56_rac_get_prob(c, 129);
899  }
900  }
901  }
902 #define STORE_COEF(c, i, v) do { \
903  if (is8bitsperpixel) { \
904  c[i] = v; \
905  } else { \
906  AV_WN32A(&c[i * 2], v); \
907  } \
908 } while (0)
909  if (!--band_left)
910  band_left = band_counts[++band];
911  if (is_tx32x32)
912  STORE_COEF(coef, rc, (int)((vp8_rac_get(c) ? -val : val) * (unsigned)qmul[!!i]) / 2);
913  else
914  STORE_COEF(coef, rc, (vp8_rac_get(c) ? -val : val) * (unsigned)qmul[!!i]);
915  nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
916  tp = p[band][nnz];
917  } while (++i < n_coeffs);
918 
919  return i;
920 }
921 
922 static int decode_coeffs_b_8bpp(VP9TileData *td, int16_t *coef, int n_coeffs,
923  unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
924  uint8_t (*p)[6][11], int nnz, const int16_t *scan,
925  const int16_t (*nb)[2], const int16_t *band_counts,
926  int16_t *qmul)
927 {
928  return decode_coeffs_b_generic(td->c, coef, n_coeffs, 0, 1, 8, cnt, eob, p,
929  nnz, scan, nb, band_counts, qmul);
930 }
931 
932 static int decode_coeffs_b32_8bpp(VP9TileData *td, int16_t *coef, int n_coeffs,
933  unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
934  uint8_t (*p)[6][11], int nnz, const int16_t *scan,
935  const int16_t (*nb)[2], const int16_t *band_counts,
936  int16_t *qmul)
937 {
938  return decode_coeffs_b_generic(td->c, coef, n_coeffs, 1, 1, 8, cnt, eob, p,
939  nnz, scan, nb, band_counts, qmul);
940 }
941 
942 static int decode_coeffs_b_16bpp(VP9TileData *td, int16_t *coef, int n_coeffs,
943  unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
944  uint8_t (*p)[6][11], int nnz, const int16_t *scan,
945  const int16_t (*nb)[2], const int16_t *band_counts,
946  int16_t *qmul)
947 {
948  return decode_coeffs_b_generic(td->c, coef, n_coeffs, 0, 0, td->s->s.h.bpp, cnt, eob, p,
949  nnz, scan, nb, band_counts, qmul);
950 }
951 
952 static int decode_coeffs_b32_16bpp(VP9TileData *td, int16_t *coef, int n_coeffs,
953  unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
954  uint8_t (*p)[6][11], int nnz, const int16_t *scan,
955  const int16_t (*nb)[2], const int16_t *band_counts,
956  int16_t *qmul)
957 {
958  return decode_coeffs_b_generic(td->c, coef, n_coeffs, 1, 0, td->s->s.h.bpp, cnt, eob, p,
959  nnz, scan, nb, band_counts, qmul);
960 }
961 
962 static av_always_inline int decode_coeffs(VP9TileData *td, int is8bitsperpixel)
963 {
964  VP9Context *s = td->s;
965  VP9Block *b = td->b;
966  int row = td->row, col = td->col;
967  uint8_t (*p)[6][11] = s->prob.coef[b->tx][0 /* y */][!b->intra];
968  unsigned (*c)[6][3] = td->counts.coef[b->tx][0 /* y */][!b->intra];
969  unsigned (*e)[6][2] = td->counts.eob[b->tx][0 /* y */][!b->intra];
970  int w4 = ff_vp9_bwh_tab[1][b->bs][0] << 1, h4 = ff_vp9_bwh_tab[1][b->bs][1] << 1;
971  int end_x = FFMIN(2 * (s->cols - col), w4);
972  int end_y = FFMIN(2 * (s->rows - row), h4);
973  int n, pl, x, y, ret;
974  int16_t (*qmul)[2] = s->s.h.segmentation.feat[b->seg_id].qmul;
975  int tx = 4 * s->s.h.lossless + b->tx;
976  const int16_t * const *yscans = ff_vp9_scans[tx];
977  const int16_t (* const * ynbs)[2] = ff_vp9_scans_nb[tx];
978  const int16_t *uvscan = ff_vp9_scans[b->uvtx][DCT_DCT];
979  const int16_t (*uvnb)[2] = ff_vp9_scans_nb[b->uvtx][DCT_DCT];
980  uint8_t *a = &s->above_y_nnz_ctx[col * 2];
981  uint8_t *l = &td->left_y_nnz_ctx[(row & 7) << 1];
982  static const int16_t band_counts[4][8] = {
983  { 1, 2, 3, 4, 3, 16 - 13 },
984  { 1, 2, 3, 4, 11, 64 - 21 },
985  { 1, 2, 3, 4, 11, 256 - 21 },
986  { 1, 2, 3, 4, 11, 1024 - 21 },
987  };
988  const int16_t *y_band_counts = band_counts[b->tx];
989  const int16_t *uv_band_counts = band_counts[b->uvtx];
990  int bytesperpixel = is8bitsperpixel ? 1 : 2;
991  int total_coeff = 0;
992 
993 #define MERGE(la, end, step, rd) \
994  for (n = 0; n < end; n += step) \
995  la[n] = !!rd(&la[n])
996 #define MERGE_CTX(step, rd) \
997  do { \
998  MERGE(l, end_y, step, rd); \
999  MERGE(a, end_x, step, rd); \
1000  } while (0)
1001 
1002 #define DECODE_Y_COEF_LOOP(step, mode_index, v) \
1003  for (n = 0, y = 0; y < end_y; y += step) { \
1004  for (x = 0; x < end_x; x += step, n += step * step) { \
1005  enum TxfmType txtp = ff_vp9_intra_txfm_type[b->mode[mode_index]]; \
1006  ret = (is8bitsperpixel ? decode_coeffs_b##v##_8bpp : decode_coeffs_b##v##_16bpp) \
1007  (td, td->block + 16 * n * bytesperpixel, 16 * step * step, \
1008  c, e, p, a[x] + l[y], yscans[txtp], \
1009  ynbs[txtp], y_band_counts, qmul[0]); \
1010  a[x] = l[y] = !!ret; \
1011  total_coeff |= !!ret; \
1012  if (step >= 4) { \
1013  AV_WN16A(&td->eob[n], ret); \
1014  } else { \
1015  td->eob[n] = ret; \
1016  } \
1017  } \
1018  }
1019 
1020 #define SPLAT(la, end, step, cond) \
1021  if (step == 2) { \
1022  for (n = 1; n < end; n += step) \
1023  la[n] = la[n - 1]; \
1024  } else if (step == 4) { \
1025  if (cond) { \
1026  for (n = 0; n < end; n += step) \
1027  AV_WN32A(&la[n], la[n] * 0x01010101); \
1028  } else { \
1029  for (n = 0; n < end; n += step) \
1030  memset(&la[n + 1], la[n], FFMIN(end - n - 1, 3)); \
1031  } \
1032  } else /* step == 8 */ { \
1033  if (cond) { \
1034  if (HAVE_FAST_64BIT) { \
1035  for (n = 0; n < end; n += step) \
1036  AV_WN64A(&la[n], la[n] * 0x0101010101010101ULL); \
1037  } else { \
1038  for (n = 0; n < end; n += step) { \
1039  uint32_t v32 = la[n] * 0x01010101; \
1040  AV_WN32A(&la[n], v32); \
1041  AV_WN32A(&la[n + 4], v32); \
1042  } \
1043  } \
1044  } else { \
1045  for (n = 0; n < end; n += step) \
1046  memset(&la[n + 1], la[n], FFMIN(end - n - 1, 7)); \
1047  } \
1048  }
1049 #define SPLAT_CTX(step) \
1050  do { \
1051  SPLAT(a, end_x, step, end_x == w4); \
1052  SPLAT(l, end_y, step, end_y == h4); \
1053  } while (0)
1054 
1055  /* y tokens */
1056  switch (b->tx) {
1057  case TX_4X4:
1058  DECODE_Y_COEF_LOOP(1, b->bs > BS_8x8 ? n : 0,);
1059  break;
1060  case TX_8X8:
1061  MERGE_CTX(2, AV_RN16A);
1062  DECODE_Y_COEF_LOOP(2, 0,);
1063  SPLAT_CTX(2);
1064  break;
1065  case TX_16X16:
1066  MERGE_CTX(4, AV_RN32A);
1067  DECODE_Y_COEF_LOOP(4, 0,);
1068  SPLAT_CTX(4);
1069  break;
1070  case TX_32X32:
1071  MERGE_CTX(8, AV_RN64A);
1072  DECODE_Y_COEF_LOOP(8, 0, 32);
1073  SPLAT_CTX(8);
1074  break;
1075  }
1076 
1077 #define DECODE_UV_COEF_LOOP(step, v) \
1078  for (n = 0, y = 0; y < end_y; y += step) { \
1079  for (x = 0; x < end_x; x += step, n += step * step) { \
1080  ret = (is8bitsperpixel ? decode_coeffs_b##v##_8bpp : decode_coeffs_b##v##_16bpp) \
1081  (td, td->uvblock[pl] + 16 * n * bytesperpixel, \
1082  16 * step * step, c, e, p, a[x] + l[y], \
1083  uvscan, uvnb, uv_band_counts, qmul[1]); \
1084  a[x] = l[y] = !!ret; \
1085  total_coeff |= !!ret; \
1086  if (step >= 4) { \
1087  AV_WN16A(&td->uveob[pl][n], ret); \
1088  } else { \
1089  td->uveob[pl][n] = ret; \
1090  } \
1091  } \
1092  }
1093 
1094  p = s->prob.coef[b->uvtx][1 /* uv */][!b->intra];
1095  c = td->counts.coef[b->uvtx][1 /* uv */][!b->intra];
1096  e = td->counts.eob[b->uvtx][1 /* uv */][!b->intra];
1097  w4 >>= s->ss_h;
1098  end_x >>= s->ss_h;
1099  h4 >>= s->ss_v;
1100  end_y >>= s->ss_v;
1101  for (pl = 0; pl < 2; pl++) {
1102  a = &s->above_uv_nnz_ctx[pl][col << !s->ss_h];
1103  l = &td->left_uv_nnz_ctx[pl][(row & 7) << !s->ss_v];
1104  switch (b->uvtx) {
1105  case TX_4X4:
1106  DECODE_UV_COEF_LOOP(1,);
1107  break;
1108  case TX_8X8:
1109  MERGE_CTX(2, AV_RN16A);
1110  DECODE_UV_COEF_LOOP(2,);
1111  SPLAT_CTX(2);
1112  break;
1113  case TX_16X16:
1114  MERGE_CTX(4, AV_RN32A);
1115  DECODE_UV_COEF_LOOP(4,);
1116  SPLAT_CTX(4);
1117  break;
1118  case TX_32X32:
1119  MERGE_CTX(8, AV_RN64A);
1120  DECODE_UV_COEF_LOOP(8, 32);
1121  SPLAT_CTX(8);
1122  break;
1123  }
1124  }
1125 
1126  return total_coeff;
1127 }
1128 
1130 {
1131  return decode_coeffs(td, 1);
1132 }
1133 
1135 {
1136  return decode_coeffs(td, 0);
1137 }
1138 
1139 static av_always_inline void mask_edges(uint8_t (*mask)[8][4], int ss_h, int ss_v,
1140  int row_and_7, int col_and_7,
1141  int w, int h, int col_end, int row_end,
1142  enum TxfmMode tx, int skip_inter)
1143 {
1144  static const unsigned wide_filter_col_mask[2] = { 0x11, 0x01 };
1145  static const unsigned wide_filter_row_mask[2] = { 0x03, 0x07 };
1146 
1147  // FIXME I'm pretty sure all loops can be replaced by a single LUT if
1148  // we make VP9Filter.mask uint64_t (i.e. row/col all single variable)
1149  // and make the LUT 5-indexed (bl, bp, is_uv, tx and row/col), and then
1150  // use row_and_7/col_and_7 as shifts (1*col_and_7+8*row_and_7)
1151 
1152  // the intended behaviour of the vp9 loopfilter is to work on 8-pixel
1153  // edges. This means that for UV, we work on two subsampled blocks at
1154  // a time, and we only use the topleft block's mode information to set
1155  // things like block strength. Thus, for any block size smaller than
1156  // 16x16, ignore the odd portion of the block.
1157  if (tx == TX_4X4 && (ss_v | ss_h)) {
1158  if (h == ss_v) {
1159  if (row_and_7 & 1)
1160  return;
1161  if (!row_end)
1162  h += 1;
1163  }
1164  if (w == ss_h) {
1165  if (col_and_7 & 1)
1166  return;
1167  if (!col_end)
1168  w += 1;
1169  }
1170  }
1171 
1172  if (tx == TX_4X4 && !skip_inter) {
1173  int t = 1 << col_and_7, m_col = (t << w) - t, y;
1174  // on 32-px edges, use the 8-px wide loopfilter; else, use 4-px wide
1175  int m_row_8 = m_col & wide_filter_col_mask[ss_h], m_row_4 = m_col - m_row_8;
1176 
1177  for (y = row_and_7; y < h + row_and_7; y++) {
1178  int col_mask_id = 2 - !(y & wide_filter_row_mask[ss_v]);
1179 
1180  mask[0][y][1] |= m_row_8;
1181  mask[0][y][2] |= m_row_4;
1182  // for odd lines, if the odd col is not being filtered,
1183  // skip odd row also:
1184  // .---. <-- a
1185  // | |
1186  // |___| <-- b
1187  // ^ ^
1188  // c d
1189  //
1190  // if a/c are even row/col and b/d are odd, and d is skipped,
1191  // e.g. right edge of size-66x66.webm, then skip b also (bug)
1192  if ((ss_h & ss_v) && (col_end & 1) && (y & 1)) {
1193  mask[1][y][col_mask_id] |= (t << (w - 1)) - t;
1194  } else {
1195  mask[1][y][col_mask_id] |= m_col;
1196  }
1197  if (!ss_h)
1198  mask[0][y][3] |= m_col;
1199  if (!ss_v) {
1200  if (ss_h && (col_end & 1))
1201  mask[1][y][3] |= (t << (w - 1)) - t;
1202  else
1203  mask[1][y][3] |= m_col;
1204  }
1205  }
1206  } else {
1207  int y, t = 1 << col_and_7, m_col = (t << w) - t;
1208 
1209  if (!skip_inter) {
1210  int mask_id = (tx == TX_8X8);
1211  int l2 = tx + ss_h - 1, step1d;
1212  static const unsigned masks[4] = { 0xff, 0x55, 0x11, 0x01 };
1213  int m_row = m_col & masks[l2];
1214 
1215  // at odd UV col/row edges tx16/tx32 loopfilter edges, force
1216  // 8wd loopfilter to prevent going off the visible edge.
1217  if (ss_h && tx > TX_8X8 && (w ^ (w - 1)) == 1) {
1218  int m_row_16 = ((t << (w - 1)) - t) & masks[l2];
1219  int m_row_8 = m_row - m_row_16;
1220 
1221  for (y = row_and_7; y < h + row_and_7; y++) {
1222  mask[0][y][0] |= m_row_16;
1223  mask[0][y][1] |= m_row_8;
1224  }
1225  } else {
1226  for (y = row_and_7; y < h + row_and_7; y++)
1227  mask[0][y][mask_id] |= m_row;
1228  }
1229 
1230  l2 = tx + ss_v - 1;
1231  step1d = 1 << l2;
1232  if (ss_v && tx > TX_8X8 && (h ^ (h - 1)) == 1) {
1233  for (y = row_and_7; y < h + row_and_7 - 1; y += step1d)
1234  mask[1][y][0] |= m_col;
1235  if (y - row_and_7 == h - 1)
1236  mask[1][y][1] |= m_col;
1237  } else {
1238  for (y = row_and_7; y < h + row_and_7; y += step1d)
1239  mask[1][y][mask_id] |= m_col;
1240  }
1241  } else if (tx != TX_4X4) {
1242  int mask_id;
1243 
1244  mask_id = (tx == TX_8X8) || (h == ss_v);
1245  mask[1][row_and_7][mask_id] |= m_col;
1246  mask_id = (tx == TX_8X8) || (w == ss_h);
1247  for (y = row_and_7; y < h + row_and_7; y++)
1248  mask[0][y][mask_id] |= t;
1249  } else {
1250  int t8 = t & wide_filter_col_mask[ss_h], t4 = t - t8;
1251 
1252  for (y = row_and_7; y < h + row_and_7; y++) {
1253  mask[0][y][2] |= t4;
1254  mask[0][y][1] |= t8;
1255  }
1256  mask[1][row_and_7][2 - !(row_and_7 & wide_filter_row_mask[ss_v])] |= m_col;
1257  }
1258  }
1259 }
1260 
1261 void ff_vp9_decode_block(VP9TileData *td, int row, int col,
1262  VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff,
1263  enum BlockLevel bl, enum BlockPartition bp)
1264 {
1265  VP9Context *s = td->s;
1266  VP9Block *b = td->b;
1267  enum BlockSize bs = bl * 3 + bp;
1268  int bytesperpixel = s->bytesperpixel;
1269  int w4 = ff_vp9_bwh_tab[1][bs][0], h4 = ff_vp9_bwh_tab[1][bs][1], lvl;
1270  int emu[2];
1271  AVFrame *f = s->s.frames[CUR_FRAME].tf.f;
1272 
1273  td->row = row;
1274  td->row7 = row & 7;
1275  td->col = col;
1276  td->col7 = col & 7;
1277 
1278  td->min_mv.x = -(128 + col * 64);
1279  td->min_mv.y = -(128 + row * 64);
1280  td->max_mv.x = 128 + (s->cols - col - w4) * 64;
1281  td->max_mv.y = 128 + (s->rows - row - h4) * 64;
1282 
1283  if (s->pass < 2) {
1284  b->bs = bs;
1285  b->bl = bl;
1286  b->bp = bp;
1287  decode_mode(td);
1288  b->uvtx = b->tx - ((s->ss_h && w4 * 2 == (1 << b->tx)) ||
1289  (s->ss_v && h4 * 2 == (1 << b->tx)));
1290 
1291  if (td->block_structure) {
1292  td->block_structure[td->nb_block_structure].row = row;
1293  td->block_structure[td->nb_block_structure].col = col;
1294  td->block_structure[td->nb_block_structure].block_size_idx_x = av_log2(w4);
1295  td->block_structure[td->nb_block_structure].block_size_idx_y = av_log2(h4);
1296  td->nb_block_structure++;
1297  }
1298 
1299  if (!b->skip) {
1300  int has_coeffs;
1301 
1302  if (bytesperpixel == 1) {
1303  has_coeffs = decode_coeffs_8bpp(td);
1304  } else {
1305  has_coeffs = decode_coeffs_16bpp(td);
1306  }
1307  if (!has_coeffs && b->bs <= BS_8x8 && !b->intra) {
1308  b->skip = 1;
1309  memset(&s->above_skip_ctx[col], 1, w4);
1310  memset(&td->left_skip_ctx[td->row7], 1, h4);
1311  }
1312  } else {
1313  int row7 = td->row7;
1314 
1315 #define SPLAT_ZERO_CTX(v, n) \
1316  switch (n) { \
1317  case 1: v = 0; break; \
1318  case 2: AV_ZERO16(&v); break; \
1319  case 4: AV_ZERO32(&v); break; \
1320  case 8: AV_ZERO64(&v); break; \
1321  case 16: AV_ZERO128(&v); break; \
1322  }
1323 #define SPLAT_ZERO_YUV(dir, var, off, n, dir2) \
1324  do { \
1325  SPLAT_ZERO_CTX(dir##_y_##var[off * 2], n * 2); \
1326  if (s->ss_##dir2) { \
1327  SPLAT_ZERO_CTX(dir##_uv_##var[0][off], n); \
1328  SPLAT_ZERO_CTX(dir##_uv_##var[1][off], n); \
1329  } else { \
1330  SPLAT_ZERO_CTX(dir##_uv_##var[0][off * 2], n * 2); \
1331  SPLAT_ZERO_CTX(dir##_uv_##var[1][off * 2], n * 2); \
1332  } \
1333  } while (0)
1334 
1335  switch (w4) {
1336  case 1: SPLAT_ZERO_YUV(s->above, nnz_ctx, col, 1, h); break;
1337  case 2: SPLAT_ZERO_YUV(s->above, nnz_ctx, col, 2, h); break;
1338  case 4: SPLAT_ZERO_YUV(s->above, nnz_ctx, col, 4, h); break;
1339  case 8: SPLAT_ZERO_YUV(s->above, nnz_ctx, col, 8, h); break;
1340  }
1341  switch (h4) {
1342  case 1: SPLAT_ZERO_YUV(td->left, nnz_ctx, row7, 1, v); break;
1343  case 2: SPLAT_ZERO_YUV(td->left, nnz_ctx, row7, 2, v); break;
1344  case 4: SPLAT_ZERO_YUV(td->left, nnz_ctx, row7, 4, v); break;
1345  case 8: SPLAT_ZERO_YUV(td->left, nnz_ctx, row7, 8, v); break;
1346  }
1347  }
1348 
1349  if (s->pass == 1) {
1350  s->td[0].b++;
1351  s->td[0].block += w4 * h4 * 64 * bytesperpixel;
1352  s->td[0].uvblock[0] += w4 * h4 * 64 * bytesperpixel >> (s->ss_h + s->ss_v);
1353  s->td[0].uvblock[1] += w4 * h4 * 64 * bytesperpixel >> (s->ss_h + s->ss_v);
1354  s->td[0].eob += 4 * w4 * h4;
1355  s->td[0].uveob[0] += 4 * w4 * h4 >> (s->ss_h + s->ss_v);
1356  s->td[0].uveob[1] += 4 * w4 * h4 >> (s->ss_h + s->ss_v);
1357 
1358  return;
1359  }
1360  }
1361 
1362  // emulated overhangs if the stride of the target buffer can't hold. This
1363  // makes it possible to support emu-edge and so on even if we have large block
1364  // overhangs
1365  emu[0] = (col + w4) * 8 * bytesperpixel > f->linesize[0] ||
1366  (row + h4) > s->rows;
1367  emu[1] = ((col + w4) * 8 >> s->ss_h) * bytesperpixel > f->linesize[1] ||
1368  (row + h4) > s->rows;
1369  if (emu[0]) {
1370  td->dst[0] = td->tmp_y;
1371  td->y_stride = 128;
1372  } else {
1373  td->dst[0] = f->data[0] + yoff;
1374  td->y_stride = f->linesize[0];
1375  }
1376  if (emu[1]) {
1377  td->dst[1] = td->tmp_uv[0];
1378  td->dst[2] = td->tmp_uv[1];
1379  td->uv_stride = 128;
1380  } else {
1381  td->dst[1] = f->data[1] + uvoff;
1382  td->dst[2] = f->data[2] + uvoff;
1383  td->uv_stride = f->linesize[1];
1384  }
1385  if (b->intra) {
1386  if (s->s.h.bpp > 8) {
1387  ff_vp9_intra_recon_16bpp(td, yoff, uvoff);
1388  } else {
1389  ff_vp9_intra_recon_8bpp(td, yoff, uvoff);
1390  }
1391  } else {
1392  if (s->s.h.bpp > 8) {
1394  } else {
1396  }
1397  }
1398  if (emu[0]) {
1399  int w = FFMIN(s->cols - col, w4) * 8, h = FFMIN(s->rows - row, h4) * 8, n, o = 0;
1400 
1401  for (n = 0; o < w; n++) {
1402  int bw = 64 >> n;
1403 
1404  av_assert2(n <= 4);
1405  if (w & bw) {
1406  s->dsp.mc[n][0][0][0][0](f->data[0] + yoff + o * bytesperpixel, f->linesize[0],
1407  td->tmp_y + o * bytesperpixel, 128, h, 0, 0);
1408  o += bw;
1409  }
1410  }
1411  }
1412  if (emu[1]) {
1413  int w = FFMIN(s->cols - col, w4) * 8 >> s->ss_h;
1414  int h = FFMIN(s->rows - row, h4) * 8 >> s->ss_v, n, o = 0;
1415 
1416  for (n = s->ss_h; o < w; n++) {
1417  int bw = 64 >> n;
1418 
1419  av_assert2(n <= 4);
1420  if (w & bw) {
1421  s->dsp.mc[n][0][0][0][0](f->data[1] + uvoff + o * bytesperpixel, f->linesize[1],
1422  td->tmp_uv[0] + o * bytesperpixel, 128, h, 0, 0);
1423  s->dsp.mc[n][0][0][0][0](f->data[2] + uvoff + o * bytesperpixel, f->linesize[2],
1424  td->tmp_uv[1] + o * bytesperpixel, 128, h, 0, 0);
1425  o += bw;
1426  }
1427  }
1428  }
1429 
1430  // pick filter level and find edges to apply filter to
1431  if (s->s.h.filter.level &&
1432  (lvl = s->s.h.segmentation.feat[b->seg_id].lflvl[b->intra ? 0 : b->ref[0] + 1]
1433  [b->mode[3] != ZEROMV]) > 0) {
1434  int x_end = FFMIN(s->cols - col, w4), y_end = FFMIN(s->rows - row, h4);
1435  int skip_inter = !b->intra && b->skip, col7 = td->col7, row7 = td->row7;
1436 
1437  setctx_2d(&lflvl->level[row7 * 8 + col7], w4, h4, 8, lvl);
1438  mask_edges(lflvl->mask[0], 0, 0, row7, col7, x_end, y_end, 0, 0, b->tx, skip_inter);
1439  if (s->ss_h || s->ss_v)
1440  mask_edges(lflvl->mask[1], s->ss_h, s->ss_v, row7, col7, x_end, y_end,
1441  s->cols & 1 && col + w4 >= s->cols ? s->cols & 7 : 0,
1442  s->rows & 1 && row + h4 >= s->rows ? s->rows & 7 : 0,
1443  b->uvtx, skip_inter);
1444  }
1445 
1446  if (s->pass == 2) {
1447  s->td[0].b++;
1448  s->td[0].block += w4 * h4 * 64 * bytesperpixel;
1449  s->td[0].uvblock[0] += w4 * h4 * 64 * bytesperpixel >> (s->ss_v + s->ss_h);
1450  s->td[0].uvblock[1] += w4 * h4 * 64 * bytesperpixel >> (s->ss_v + s->ss_h);
1451  s->td[0].eob += 4 * w4 * h4;
1452  s->td[0].uveob[0] += 4 * w4 * h4 >> (s->ss_v + s->ss_h);
1453  s->td[0].uveob[1] += 4 * w4 * h4 >> (s->ss_v + s->ss_h);
1454  }
1455 }
ff_vp9_inter_recon_8bpp
void ff_vp9_inter_recon_8bpp(VP9TileData *td)
Definition: vp9recon.c:646
DECODE_Y_COEF_LOOP
#define DECODE_Y_COEF_LOOP(step, mode_index, v)
td
#define td
Definition: regdef.h:70
decode_coeffs_b32_8bpp
static int decode_coeffs_b32_8bpp(VP9TileData *td, int16_t *coef, int n_coeffs, unsigned(*cnt)[6][3], unsigned(*eob)[6][2], uint8_t(*p)[6][11], int nnz, const int16_t *scan, const int16_t(*nb)[2], const int16_t *band_counts, int16_t *qmul)
Definition: vp9block.c:932
ff_vp9_default_kf_uvmode_probs
const uint8_t ff_vp9_default_kf_uvmode_probs[10][9]
Definition: vp9data.c:201
PRED_SWITCHABLE
@ PRED_SWITCHABLE
Definition: vp9shared.h:51
ff_vp9_filter_tree
const int8_t ff_vp9_filter_tree[2][2]
Definition: vp9data.c:220
SET_CTXS
#define SET_CTXS(perf, dir, off, n)
mv
static const int8_t mv[256][2]
Definition: 4xm.c:80
decode_coeffs
static av_always_inline int decode_coeffs(VP9TileData *td, int is8bitsperpixel)
Definition: vp9block.c:962
PRED_COMPREF
@ PRED_COMPREF
Definition: vp9shared.h:50
AVFrame
This structure describes decoded (raw) audio or video data.
Definition: frame.h:325
w
uint8_t w
Definition: llviddspenc.c:38
BlockPartition
BlockPartition
Definition: vp9shared.h:34
b
#define b
Definition: input.c:34
REF_FRAME_SEGMAP
#define REF_FRAME_SEGMAP
Definition: vp9shared.h:165
AV_WN32A
#define AV_WN32A(p, v)
Definition: intreadwrite.h:538
VP9Filter
Definition: vp9dec.h:76
BS_4x8
@ BS_4x8
Definition: vp9shared.h:89
decode_coeffs_b_16bpp
static int decode_coeffs_b_16bpp(VP9TileData *td, int16_t *coef, int n_coeffs, unsigned(*cnt)[6][3], unsigned(*eob)[6][2], uint8_t(*p)[6][11], int nnz, const int16_t *scan, const int16_t(*nb)[2], const int16_t *band_counts, int16_t *qmul)
Definition: vp9block.c:942
ff_thread_await_progress
the pkt_dts and pkt_pts fields in AVFrame will work as usual Restrictions on codec whose streams don t reset across will not work because their bitstreams cannot be decoded in parallel *The contents of buffers must not be read before ff_thread_await_progress() has been called on them. reget_buffer() and buffer age optimizations no longer work. *The contents of buffers must not be written to after ff_thread_report_progress() has been called on them. This includes draw_edges(). Porting codecs to frame threading
FILTER_SWITCHABLE
@ FILTER_SWITCHABLE
Definition: vp9.h:70
VP9Block
Definition: vp9dec.h:82
decode_mode
static void decode_mode(VP9TileData *td)
Definition: vp9block.c:78
bit
#define bit(string, value)
Definition: cbs_mpeg2.c:58
vp56_rac_get_prob_branchy
static av_always_inline int vp56_rac_get_prob_branchy(VP56RangeCoder *c, int prob)
Definition: vp56.h:289
TX_SWITCHABLE
@ TX_SWITCHABLE
Definition: vp9.h:33
vp8_rac_get
static av_always_inline int vp8_rac_get(VP56RangeCoder *c)
Definition: vp56.h:326
ff_vp9_intramode_tree
const int8_t ff_vp9_intramode_tree[9][2]
Definition: vp9data.c:75
val
static double val(void *priv, double ch)
Definition: aeval.c:77
mask_edges
static av_always_inline void mask_edges(uint8_t(*mask)[8][4], int ss_h, int ss_v, int row_and_7, int col_and_7, int w, int h, int col_end, int row_end, enum TxfmMode tx, int skip_inter)
Definition: vp9block.c:1139
ZEROMV
@ ZEROMV
Definition: vp9shared.h:44
avassert.h
ff_vp9_default_kf_ymode_probs
const uint8_t ff_vp9_default_kf_ymode_probs[10][10][9]
Definition: vp9data.c:87
decode_coeffs_16bpp
static int decode_coeffs_16bpp(VP9TileData *td)
Definition: vp9block.c:1134
mask
static const uint16_t mask[17]
Definition: lzw.c:38
s
#define s(width, name)
Definition: cbs_vp9.c:256
ff_vp9_inter_mode_tree
const int8_t ff_vp9_inter_mode_tree[3][2]
Definition: vp9data.c:214
ff_vp9_scans
const int16_t *const ff_vp9_scans[5][4]
Definition: vp9data.c:600
vp9data.h
AV_WN16A
#define AV_WN16A(p, v)
Definition: intreadwrite.h:534
BS_8x4
@ BS_8x4
Definition: vp9shared.h:88
STORE_COEF
#define STORE_COEF(c, i, v)
vp56.h
if
if(ret)
Definition: filter_design.txt:179
threadframe.h
MERGE_CTX
#define MERGE_CTX(step, rd)
VP9Context
Definition: vp9dec.h:94
vp8_rac_get_tree
static av_always_inline int vp8_rac_get_tree(VP56RangeCoder *c, const int8_t(*tree)[2], const uint8_t *probs)
Definition: vp56.h:398
TX_8X8
@ TX_8X8
Definition: vp9.h:29
TX_16X16
@ TX_16X16
Definition: vp9.h:30
ff_vp9_filter_lut
enum FilterMode ff_vp9_filter_lut[3]
Definition: vp9data.c:225
DECODE_UV_COEF_LOOP
#define DECODE_UV_COEF_LOOP(step, v)
ff_vp9_segmentation_tree
const int8_t ff_vp9_segmentation_tree[7][2]
Definition: vp9data.c:65
SPLAT_ZERO_YUV
#define SPLAT_ZERO_YUV(dir, var, off, n, dir2)
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
ff_vp9_intra_recon_16bpp
void ff_vp9_intra_recon_16bpp(VP9TileData *td, ptrdiff_t y_off, ptrdiff_t uv_off)
Definition: vp9recon.c:293
TxfmMode
TxfmMode
Definition: vp9.h:27
vp9.h
DCT_DCT
@ DCT_DCT
Definition: vp9.h:38
f
f
Definition: af_crystalizer.c:122
ff_vp9_fill_mv
void ff_vp9_fill_mv(VP9TileData *td, VP56mv *mv, int mode, int sb)
Definition: vp9mvs.c:291
for
for(k=2;k<=8;++k)
Definition: h264pred_template.c:425
BS_8x8
@ BS_8x8
Definition: vp9shared.h:87
AV_RN64A
#define AV_RN64A(p)
Definition: intreadwrite.h:530
TX_4X4
@ TX_4X4
Definition: vp9.h:28
t8
#define t8
Definition: regdef.h:53
N_BS_SIZES
@ N_BS_SIZES
Definition: vp9shared.h:91
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
SPLAT_CTX
#define SPLAT_CTX(var, val, n)
av_assert2
#define av_assert2(cond)
assert() equivalent, that does lie in speed critical code.
Definition: avassert.h:64
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:269
t4
#define t4
Definition: regdef.h:32
av_assert1
#define av_assert1(cond)
assert() equivalent, that does not lie in speed critical code.
Definition: avassert.h:53
av_always_inline
#define av_always_inline
Definition: attributes.h:49
FFMIN
#define FFMIN(a, b)
Definition: macros.h:49
BlockSize
BlockSize
Definition: vp9shared.h:77
AV_COPY32
#define AV_COPY32(d, s)
Definition: intreadwrite.h:601
decode_coeffs_b_generic
static av_always_inline int decode_coeffs_b_generic(VP56RangeCoder *c, int16_t *coef, int n_coeffs, int is_tx32x32, int is8bitsperpixel, int bpp, unsigned(*cnt)[6][3], unsigned(*eob)[6][2], uint8_t(*p)[6][11], int nnz, const int16_t *scan, const int16_t(*nb)[2], const int16_t *band_counts, int16_t *qmul)
Definition: vp9block.c:803
AV_RN32A
#define AV_RN32A(p)
Definition: intreadwrite.h:526
stride
#define stride
Definition: h264pred_template.c:537
ret
ret
Definition: filter_design.txt:187
pred
static const float pred[4]
Definition: siprdata.h:259
VP9mvrefPair
Definition: vp9shared.h:54
left
Tag MUST be and< 10hcoeff half pel interpolation filter coefficients, hcoeff[0] are the 2 middle coefficients[1] are the next outer ones and so on, resulting in a filter like:...eff[2], hcoeff[1], hcoeff[0], hcoeff[0], hcoeff[1], hcoeff[2] ... the sign of the coefficients is not explicitly stored but alternates after each coeff and coeff[0] is positive, so ...,+,-,+,-,+,+,-,+,-,+,... hcoeff[0] is not explicitly stored but found by subtracting the sum of all stored coefficients with signs from 32 hcoeff[0]=32 - hcoeff[1] - hcoeff[2] - ... a good choice for hcoeff and htaps is htaps=6 hcoeff={40,-10, 2} an alternative which requires more computations at both encoder and decoder side and may or may not be better is htaps=8 hcoeff={42,-14, 6,-2}ref_frames minimum of the number of available reference frames and max_ref_frames for example the first frame after a key frame always has ref_frames=1spatial_decomposition_type wavelet type 0 is a 9/7 symmetric compact integer wavelet 1 is a 5/3 symmetric compact integer wavelet others are reserved stored as delta from last, last is reset to 0 if always_reset||keyframeqlog quality(logarithmic quantizer scale) stored as delta from last, last is reset to 0 if always_reset||keyframemv_scale stored as delta from last, last is reset to 0 if always_reset||keyframe FIXME check that everything works fine if this changes between framesqbias dequantization bias stored as delta from last, last is reset to 0 if always_reset||keyframeblock_max_depth maximum depth of the block tree stored as delta from last, last is reset to 0 if always_reset||keyframequant_table quantization tableHighlevel bitstream structure:==============================--------------------------------------------|Header|--------------------------------------------|------------------------------------|||Block0||||split?||||yes no||||......... intra?||||:Block01 :yes no||||:Block02 :....... ..........||||:Block03 ::y DC ::ref index:||||:Block04 ::cb DC ::motion x :||||......... :cr DC ::motion y :||||....... ..........|||------------------------------------||------------------------------------|||Block1|||...|--------------------------------------------|------------ ------------ ------------|||Y subbands||Cb subbands||Cr subbands||||--- ---||--- ---||--- ---|||||LL0||HL0||||LL0||HL0||||LL0||HL0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||LH0||HH0||||LH0||HH0||||LH0||HH0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HL1||LH1||||HL1||LH1||||HL1||LH1|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HH1||HL2||||HH1||HL2||||HH1||HL2|||||...||...||...|||------------ ------------ ------------|--------------------------------------------Decoding process:=================------------|||Subbands|------------||||------------|Intra DC||||LL0 subband prediction ------------|\ Dequantization ------------------- \||Reference frames|\ IDWT|------- -------|Motion \|||Frame 0||Frame 1||Compensation . OBMC v -------|------- -------|--------------. \------> Frame n output Frame Frame<----------------------------------/|...|------------------- Range Coder:============Binary Range Coder:------------------- The implemented range coder is an adapted version based upon "Range encoding: an algorithm for removing redundancy from a digitised message." by G. N. N. Martin. The symbols encoded by the Snow range coder are bits(0|1). The associated probabilities are not fix but change depending on the symbol mix seen so far. bit seen|new state ---------+----------------------------------------------- 0|256 - state_transition_table[256 - old_state];1|state_transition_table[old_state];state_transition_table={ 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 190, 191, 192, 194, 194, 195, 196, 197, 198, 199, 200, 201, 202, 202, 204, 205, 206, 207, 208, 209, 209, 210, 211, 212, 213, 215, 215, 216, 217, 218, 219, 220, 220, 222, 223, 224, 225, 226, 227, 227, 229, 229, 230, 231, 232, 234, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 248, 0, 0, 0, 0, 0, 0, 0};FIXME Range Coding of integers:------------------------- FIXME Neighboring Blocks:===================left and top are set to the respective blocks unless they are outside of the image in which case they are set to the Null block top-left is set to the top left block unless it is outside of the image in which case it is set to the left block if this block has no larger parent block or it is at the left side of its parent block and the top right block is not outside of the image then the top right block is used for top-right else the top-left block is used Null block y, cb, cr are 128 level, ref, mx and my are 0 Motion Vector Prediction:=========================1. the motion vectors of all the neighboring blocks are scaled to compensate for the difference of reference frames scaled_mv=(mv *(256 *(current_reference+1)/(mv.reference+1))+128)> the median of the scaled left
Definition: snow.txt:386
VP9TileData
Definition: vp9dec.h:165
AV_WN64A
#define AV_WN64A(p, v)
Definition: intreadwrite.h:542
VP56RangeCoder
Definition: vp56.h:87
VP9Filter::mask
uint8_t mask[2][2][8][4]
Definition: vp9dec.h:79
ff_vp9_bwh_tab
const uint8_t ff_vp9_bwh_tab[2][N_BS_SIZES][2]
Definition: vp9data.c:25
AV_RN16A
#define AV_RN16A(p)
Definition: intreadwrite.h:522
setctx_2d
static av_always_inline void setctx_2d(uint8_t *ptr, int w, int h, ptrdiff_t stride, int v)
Definition: vp9block.c:32
ff_vp9_decode_block
void ff_vp9_decode_block(VP9TileData *td, int row, int col, VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl, enum BlockPartition bp)
Definition: vp9block.c:1261
NEARESTMV
@ NEARESTMV
Definition: vp9shared.h:42
BlockLevel
BlockLevel
Definition: vp9shared.h:70
vp9dec.h
vp56_rac_get_prob
#define vp56_rac_get_prob
Definition: vp56.h:272
CUR_FRAME
#define CUR_FRAME
Definition: vp9shared.h:163
TX_32X32
@ TX_32X32
Definition: vp9.h:31
decode_coeffs_b_8bpp
static int decode_coeffs_b_8bpp(VP9TileData *td, int16_t *coef, int n_coeffs, unsigned(*cnt)[6][3], unsigned(*eob)[6][2], uint8_t(*p)[6][11], int nnz, const int16_t *scan, const int16_t(*nb)[2], const int16_t *band_counts, int16_t *qmul)
Definition: vp9block.c:922
ff_vp9_intra_recon_8bpp
void ff_vp9_intra_recon_8bpp(VP9TileData *td, ptrdiff_t y_off, ptrdiff_t uv_off)
Definition: vp9recon.c:288
h
h
Definition: vp9dsp_template.c:2038
decode_coeffs_8bpp
static int decode_coeffs_8bpp(VP9TileData *td)
Definition: vp9block.c:1129
VP9Filter::level
uint8_t level[8 *8]
Definition: vp9dec.h:77
av_log2
int av_log2(unsigned v)
Definition: intmath.c:26
ff_vp9_scans_nb
const int16_t(*const [5][4] ff_vp9_scans_nb)[2]
Definition: vp9data.c:1157
decode_coeffs_b32_16bpp
static int decode_coeffs_b32_16bpp(VP9TileData *td, int16_t *coef, int n_coeffs, unsigned(*cnt)[6][3], unsigned(*eob)[6][2], uint8_t(*p)[6][11], int nnz, const int16_t *scan, const int16_t(*nb)[2], const int16_t *band_counts, int16_t *qmul)
Definition: vp9block.c:952
ff_vp9_inter_recon_16bpp
void ff_vp9_inter_recon_16bpp(VP9TileData *td)
Definition: vp9recon.c:651