FFmpeg
vp9recon.c
Go to the documentation of this file.
1 /*
2  * VP9 compatible video decoder
3  *
4  * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5  * Copyright (C) 2013 Clément Bœsch <u pkh me>
6  *
7  * This file is part of FFmpeg.
8  *
9  * FFmpeg is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * FFmpeg is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with FFmpeg; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22  */
23 
24 #include "libavutil/avassert.h"
25 #include "libavutil/mem_internal.h"
26 
27 #include "videodsp.h"
28 #include "vp9data.h"
29 #include "vp9dec.h"
30 
31 static av_always_inline int check_intra_mode(VP9TileData *td, int mode, uint8_t **a,
32  uint8_t *dst_edge, ptrdiff_t stride_edge,
33  uint8_t *dst_inner, ptrdiff_t stride_inner,
34  uint8_t *l, int col, int x, int w,
35  int row, int y, enum TxfmMode tx,
36  int p, int ss_h, int ss_v, int bytesperpixel)
37 {
38  VP9Context *s = td->s;
39  int have_top = row > 0 || y > 0;
40  int have_left = col > td->tile_col_start || x > 0;
41  int have_right = x < w - 1;
42  int bpp = s->s.h.bpp;
43  static const uint8_t mode_conv[10][2 /* have_left */][2 /* have_top */] = {
44  [VERT_PRED] = { { DC_127_PRED, VERT_PRED },
45  { DC_127_PRED, VERT_PRED } },
46  [HOR_PRED] = { { DC_129_PRED, DC_129_PRED },
47  { HOR_PRED, HOR_PRED } },
48  [DC_PRED] = { { DC_128_PRED, TOP_DC_PRED },
49  { LEFT_DC_PRED, DC_PRED } },
61  { HOR_UP_PRED, HOR_UP_PRED } },
63  { HOR_PRED, TM_VP8_PRED } },
64  };
65  static const struct {
66  uint8_t needs_left:1;
67  uint8_t needs_top:1;
68  uint8_t needs_topleft:1;
69  uint8_t needs_topright:1;
70  uint8_t invert_left:1;
71  } edges[N_INTRA_PRED_MODES] = {
72  [VERT_PRED] = { .needs_top = 1 },
73  [HOR_PRED] = { .needs_left = 1 },
74  [DC_PRED] = { .needs_top = 1, .needs_left = 1 },
75  [DIAG_DOWN_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
76  [DIAG_DOWN_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1,
77  .needs_topleft = 1 },
78  [VERT_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1,
79  .needs_topleft = 1 },
80  [HOR_DOWN_PRED] = { .needs_left = 1, .needs_top = 1,
81  .needs_topleft = 1 },
82  [VERT_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
83  [HOR_UP_PRED] = { .needs_left = 1, .invert_left = 1 },
84  [TM_VP8_PRED] = { .needs_left = 1, .needs_top = 1,
85  .needs_topleft = 1 },
86  [LEFT_DC_PRED] = { .needs_left = 1 },
87  [TOP_DC_PRED] = { .needs_top = 1 },
88  [DC_128_PRED] = { 0 },
89  [DC_127_PRED] = { 0 },
90  [DC_129_PRED] = { 0 }
91  };
92 
93  av_assert2(mode >= 0 && mode < 10);
94  mode = mode_conv[mode][have_left][have_top];
95  if (edges[mode].needs_top) {
96  uint8_t *top, *topleft;
97  int n_px_need = 4 << tx, n_px_have = (((s->cols - col) << !ss_h) - x) * 4;
98  int n_px_need_tr = 0;
99 
100  if (tx == TX_4X4 && edges[mode].needs_topright && have_right)
101  n_px_need_tr = 4;
102 
103  // if top of sb64-row, use s->intra_pred_data[] instead of
104  // dst[-stride] for intra prediction (it contains pre- instead of
105  // post-loopfilter data)
106  if (have_top) {
107  top = !(row & 7) && !y ?
108  s->intra_pred_data[p] + (col * (8 >> ss_h) + x * 4) * bytesperpixel :
109  y == 0 ? &dst_edge[-stride_edge] : &dst_inner[-stride_inner];
110  if (have_left)
111  topleft = !(row & 7) && !y ?
112  s->intra_pred_data[p] + (col * (8 >> ss_h) + x * 4) * bytesperpixel :
113  y == 0 || x == 0 ? &dst_edge[-stride_edge] :
114  &dst_inner[-stride_inner];
115  }
116 
117  if (have_top &&
118  (!edges[mode].needs_topleft || (have_left && top == topleft)) &&
119  (tx != TX_4X4 || !edges[mode].needs_topright || have_right) &&
120  n_px_need + n_px_need_tr <= n_px_have) {
121  *a = top;
122  } else {
123  if (have_top) {
124  if (n_px_need <= n_px_have) {
125  memcpy(*a, top, n_px_need * bytesperpixel);
126  } else {
127 #define memset_bpp(c, i1, v, i2, num) do { \
128  if (bytesperpixel == 1) { \
129  memset(&(c)[(i1)], (v)[(i2)], (num)); \
130  } else { \
131  int n, val = AV_RN16A(&(v)[(i2) * 2]); \
132  for (n = 0; n < (num); n++) { \
133  AV_WN16A(&(c)[((i1) + n) * 2], val); \
134  } \
135  } \
136 } while (0)
137  memcpy(*a, top, n_px_have * bytesperpixel);
138  memset_bpp(*a, n_px_have, (*a), n_px_have - 1, n_px_need - n_px_have);
139  }
140  } else {
141 #define memset_val(c, val, num) do { \
142  if (bytesperpixel == 1) { \
143  memset((c), (val), (num)); \
144  } else { \
145  int n; \
146  for (n = 0; n < (num); n++) { \
147  AV_WN16A(&(c)[n * 2], (val)); \
148  } \
149  } \
150 } while (0)
151  memset_val(*a, (128 << (bpp - 8)) - 1, n_px_need);
152  }
153  if (edges[mode].needs_topleft) {
154  if (have_left && have_top) {
155 #define assign_bpp(c, i1, v, i2) do { \
156  if (bytesperpixel == 1) { \
157  (c)[(i1)] = (v)[(i2)]; \
158  } else { \
159  AV_COPY16(&(c)[(i1) * 2], &(v)[(i2) * 2]); \
160  } \
161 } while (0)
162  assign_bpp(*a, -1, topleft, -1);
163  } else {
164 #define assign_val(c, i, v) do { \
165  if (bytesperpixel == 1) { \
166  (c)[(i)] = (v); \
167  } else { \
168  AV_WN16A(&(c)[(i) * 2], (v)); \
169  } \
170 } while (0)
171  assign_val((*a), -1, (128 << (bpp - 8)) + (have_top ? +1 : -1));
172  }
173  }
174  if (tx == TX_4X4 && edges[mode].needs_topright) {
175  if (have_top && have_right &&
176  n_px_need + n_px_need_tr <= n_px_have) {
177  memcpy(&(*a)[4 * bytesperpixel], &top[4 * bytesperpixel], 4 * bytesperpixel);
178  } else {
179  memset_bpp(*a, 4, *a, 3, 4);
180  }
181  }
182  }
183  }
184  if (edges[mode].needs_left) {
185  if (have_left) {
186  int n_px_need = 4 << tx, i, n_px_have = (((s->rows - row) << !ss_v) - y) * 4;
187  uint8_t *dst = x == 0 ? dst_edge : dst_inner;
188  ptrdiff_t stride = x == 0 ? stride_edge : stride_inner;
189 
190  if (edges[mode].invert_left) {
191  if (n_px_need <= n_px_have) {
192  for (i = 0; i < n_px_need; i++)
193  assign_bpp(l, i, &dst[i * stride], -1);
194  } else {
195  for (i = 0; i < n_px_have; i++)
196  assign_bpp(l, i, &dst[i * stride], -1);
197  memset_bpp(l, n_px_have, l, n_px_have - 1, n_px_need - n_px_have);
198  }
199  } else {
200  if (n_px_need <= n_px_have) {
201  for (i = 0; i < n_px_need; i++)
202  assign_bpp(l, n_px_need - 1 - i, &dst[i * stride], -1);
203  } else {
204  for (i = 0; i < n_px_have; i++)
205  assign_bpp(l, n_px_need - 1 - i, &dst[i * stride], -1);
206  memset_bpp(l, 0, l, n_px_need - n_px_have, n_px_need - n_px_have);
207  }
208  }
209  } else {
210  memset_val(l, (128 << (bpp - 8)) + 1, 4 << tx);
211  }
212  }
213 
214  return mode;
215 }
216 
217 static av_always_inline void intra_recon(VP9TileData *td, ptrdiff_t y_off,
218  ptrdiff_t uv_off, int bytesperpixel)
219 {
220  VP9Context *s = td->s;
221  VP9Block *b = td->b;
222  int row = td->row, col = td->col;
223  int w4 = ff_vp9_bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
224  int h4 = ff_vp9_bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
225  int end_x = FFMIN(2 * (s->cols - col), w4);
226  int end_y = FFMIN(2 * (s->rows - row), h4);
227  int tx = 4 * s->s.h.lossless + b->tx, uvtx = b->uvtx + 4 * s->s.h.lossless;
228  int uvstep1d = 1 << b->uvtx, p;
229  uint8_t *dst = td->dst[0], *dst_r = s->s.frames[CUR_FRAME].tf.f->data[0] + y_off;
230  LOCAL_ALIGNED_32(uint8_t, a_buf, [96]);
231  LOCAL_ALIGNED_32(uint8_t, l, [64]);
232 
233  for (n = 0, y = 0; y < end_y; y += step1d) {
234  uint8_t *ptr = dst, *ptr_r = dst_r;
235  for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d * bytesperpixel,
236  ptr_r += 4 * step1d * bytesperpixel, n += step) {
237  int mode = b->mode[b->bs > BS_8x8 && b->tx == TX_4X4 ?
238  y * 2 + x : 0];
239  uint8_t *a = &a_buf[32];
240  enum TxfmType txtp = ff_vp9_intra_txfm_type[mode];
241  int eob = b->skip ? 0 : b->tx > TX_8X8 ? AV_RN16A(&td->eob[n]) : td->eob[n];
242 
243  mode = check_intra_mode(td, mode, &a, ptr_r,
244  s->s.frames[CUR_FRAME].tf.f->linesize[0],
245  ptr, td->y_stride, l,
246  col, x, w4, row, y, b->tx, 0, 0, 0, bytesperpixel);
247  s->dsp.intra_pred[b->tx][mode](ptr, td->y_stride, l, a);
248  if (eob)
249  s->dsp.itxfm_add[tx][txtp](ptr, td->y_stride,
250  td->block + 16 * n * bytesperpixel, eob);
251  }
252  dst_r += 4 * step1d * s->s.frames[CUR_FRAME].tf.f->linesize[0];
253  dst += 4 * step1d * td->y_stride;
254  }
255 
256  // U/V
257  w4 >>= s->ss_h;
258  end_x >>= s->ss_h;
259  end_y >>= s->ss_v;
260  step = 1 << (b->uvtx * 2);
261  for (p = 0; p < 2; p++) {
262  dst = td->dst[1 + p];
263  dst_r = s->s.frames[CUR_FRAME].tf.f->data[1 + p] + uv_off;
264  for (n = 0, y = 0; y < end_y; y += uvstep1d) {
265  uint8_t *ptr = dst, *ptr_r = dst_r;
266  for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d * bytesperpixel,
267  ptr_r += 4 * uvstep1d * bytesperpixel, n += step) {
268  int mode = b->uvmode;
269  uint8_t *a = &a_buf[32];
270  int eob = b->skip ? 0 : b->uvtx > TX_8X8 ? AV_RN16A(&td->uveob[p][n]) : td->uveob[p][n];
271 
272  mode = check_intra_mode(td, mode, &a, ptr_r,
273  s->s.frames[CUR_FRAME].tf.f->linesize[1],
274  ptr, td->uv_stride, l, col, x, w4, row, y,
275  b->uvtx, p + 1, s->ss_h, s->ss_v, bytesperpixel);
276  s->dsp.intra_pred[b->uvtx][mode](ptr, td->uv_stride, l, a);
277  if (eob)
278  s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, td->uv_stride,
279  td->uvblock[p] + 16 * n * bytesperpixel, eob);
280  }
281  dst_r += 4 * uvstep1d * s->s.frames[CUR_FRAME].tf.f->linesize[1];
282  dst += 4 * uvstep1d * td->uv_stride;
283  }
284  }
285 }
286 
287 void ff_vp9_intra_recon_8bpp(VP9TileData *td, ptrdiff_t y_off, ptrdiff_t uv_off)
288 {
289  intra_recon(td, y_off, uv_off, 1);
290 }
291 
292 void ff_vp9_intra_recon_16bpp(VP9TileData *td, ptrdiff_t y_off, ptrdiff_t uv_off)
293 {
294  intra_recon(td, y_off, uv_off, 2);
295 }
296 
298  uint8_t *dst, ptrdiff_t dst_stride,
299  const uint8_t *ref, ptrdiff_t ref_stride,
300  ThreadFrame *ref_frame,
301  ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
302  int bw, int bh, int w, int h, int bytesperpixel)
303 {
304  VP9Context *s = td->s;
305  int mx = mv->x, my = mv->y, th;
306 
307  y += my >> 3;
308  x += mx >> 3;
309  ref += y * ref_stride + x * bytesperpixel;
310  mx &= 7;
311  my &= 7;
312  // FIXME bilinear filter only needs 0/1 pixels, not 3/4
313  // we use +7 because the last 7 pixels of each sbrow can be changed in
314  // the longest loopfilter of the next sbrow
315  th = (y + bh + 4 * !!my + 7) >> 6;
316  ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
317  // The arm/aarch64 _hv filters read one more row than what actually is
318  // needed, so switch to emulated edge one pixel sooner vertically
319  // (!!my * 5) than horizontally (!!mx * 4).
320  if (x < !!mx * 3 || y < !!my * 3 ||
321  x + !!mx * 4 > w - bw || y + !!my * 5 > h - bh) {
322  s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
323  ref - !!my * 3 * ref_stride - !!mx * 3 * bytesperpixel,
324  160, ref_stride,
325  bw + !!mx * 7, bh + !!my * 7,
326  x - !!mx * 3, y - !!my * 3, w, h);
327  ref = td->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
328  ref_stride = 160;
329  }
330  mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1);
331 }
332 
334  uint8_t *dst_u, uint8_t *dst_v,
335  ptrdiff_t dst_stride,
336  const uint8_t *ref_u, ptrdiff_t src_stride_u,
337  const uint8_t *ref_v, ptrdiff_t src_stride_v,
338  ThreadFrame *ref_frame,
339  ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
340  int bw, int bh, int w, int h, int bytesperpixel)
341 {
342  VP9Context *s = td->s;
343  int mx = mv->x * (1 << !s->ss_h), my = mv->y * (1 << !s->ss_v), th;
344 
345  y += my >> 4;
346  x += mx >> 4;
347  ref_u += y * src_stride_u + x * bytesperpixel;
348  ref_v += y * src_stride_v + x * bytesperpixel;
349  mx &= 15;
350  my &= 15;
351  // FIXME bilinear filter only needs 0/1 pixels, not 3/4
352  // we use +7 because the last 7 pixels of each sbrow can be changed in
353  // the longest loopfilter of the next sbrow
354  th = (y + bh + 4 * !!my + 7) >> (6 - s->ss_v);
355  ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
356  // The arm/aarch64 _hv filters read one more row than what actually is
357  // needed, so switch to emulated edge one pixel sooner vertically
358  // (!!my * 5) than horizontally (!!mx * 4).
359  if (x < !!mx * 3 || y < !!my * 3 ||
360  x + !!mx * 4 > w - bw || y + !!my * 5 > h - bh) {
361  s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
362  ref_u - !!my * 3 * src_stride_u - !!mx * 3 * bytesperpixel,
363  160, src_stride_u,
364  bw + !!mx * 7, bh + !!my * 7,
365  x - !!mx * 3, y - !!my * 3, w, h);
366  ref_u = td->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
367  mc[!!mx][!!my](dst_u, dst_stride, ref_u, 160, bh, mx, my);
368 
369  s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
370  ref_v - !!my * 3 * src_stride_v - !!mx * 3 * bytesperpixel,
371  160, src_stride_v,
372  bw + !!mx * 7, bh + !!my * 7,
373  x - !!mx * 3, y - !!my * 3, w, h);
374  ref_v = td->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
375  mc[!!mx][!!my](dst_v, dst_stride, ref_v, 160, bh, mx, my);
376  } else {
377  mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my);
378  mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my);
379  }
380 }
381 
382 #define mc_luma_dir(td, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \
383  px, py, pw, ph, bw, bh, w, h, i) \
384  mc_luma_unscaled(td, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \
385  mv, bw, bh, w, h, bytesperpixel)
386 #define mc_chroma_dir(td, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
387  row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \
388  mc_chroma_unscaled(td, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
389  row, col, mv, bw, bh, w, h, bytesperpixel)
390 #define SCALED 0
391 #define FN(x) x##_8bpp
392 #define BYTES_PER_PIXEL 1
393 #include "vp9_mc_template.c"
394 #undef FN
395 #undef BYTES_PER_PIXEL
396 #define FN(x) x##_16bpp
397 #define BYTES_PER_PIXEL 2
398 #include "vp9_mc_template.c"
399 #undef mc_luma_dir
400 #undef mc_chroma_dir
401 #undef FN
402 #undef BYTES_PER_PIXEL
403 #undef SCALED
404 
406  vp9_mc_func (*mc)[2],
407  uint8_t *dst, ptrdiff_t dst_stride,
408  const uint8_t *ref, ptrdiff_t ref_stride,
409  ThreadFrame *ref_frame,
410  ptrdiff_t y, ptrdiff_t x, const VP56mv *in_mv,
411  int px, int py, int pw, int ph,
412  int bw, int bh, int w, int h, int bytesperpixel,
413  const uint16_t *scale, const uint8_t *step)
414 {
415  VP9Context *s = td->s;
416  if (s->s.frames[CUR_FRAME].tf.f->width == ref_frame->f->width &&
417  s->s.frames[CUR_FRAME].tf.f->height == ref_frame->f->height) {
418  mc_luma_unscaled(td, mc, dst, dst_stride, ref, ref_stride, ref_frame,
419  y, x, in_mv, bw, bh, w, h, bytesperpixel);
420  } else {
421 #define scale_mv(n, dim) (((int64_t)(n) * scale[dim]) >> 14)
422  int mx, my;
423  int refbw_m1, refbh_m1;
424  int th;
425  VP56mv mv;
426 
427  mv.x = av_clip(in_mv->x, -(x + pw - px + 4) * 8, (s->cols * 8 - x + px + 3) * 8);
428  mv.y = av_clip(in_mv->y, -(y + ph - py + 4) * 8, (s->rows * 8 - y + py + 3) * 8);
429  // BUG libvpx seems to scale the two components separately. This introduces
430  // rounding errors but we have to reproduce them to be exactly compatible
431  // with the output from libvpx...
432  mx = scale_mv(mv.x * 2, 0) + scale_mv(x * 16, 0);
433  my = scale_mv(mv.y * 2, 1) + scale_mv(y * 16, 1);
434 
435  y = my >> 4;
436  x = mx >> 4;
437  ref += y * ref_stride + x * bytesperpixel;
438  mx &= 15;
439  my &= 15;
440  refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
441  refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
442  // FIXME bilinear filter only needs 0/1 pixels, not 3/4
443  // we use +7 because the last 7 pixels of each sbrow can be changed in
444  // the longest loopfilter of the next sbrow
445  th = (y + refbh_m1 + 4 + 7) >> 6;
446  ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
447  // The arm/aarch64 _hv filters read one more row than what actually is
448  // needed, so switch to emulated edge one pixel sooner vertically
449  // (y + 5 >= h - refbh_m1) than horizontally (x + 4 >= w - refbw_m1).
450  if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 5 >= h - refbh_m1) {
451  s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
452  ref - 3 * ref_stride - 3 * bytesperpixel,
453  288, ref_stride,
454  refbw_m1 + 8, refbh_m1 + 8,
455  x - 3, y - 3, w, h);
456  ref = td->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
457  ref_stride = 288;
458  }
459  smc(dst, dst_stride, ref, ref_stride, bh, mx, my, step[0], step[1]);
460  }
461 }
462 
464  vp9_mc_func (*mc)[2],
465  uint8_t *dst_u, uint8_t *dst_v,
466  ptrdiff_t dst_stride,
467  const uint8_t *ref_u, ptrdiff_t src_stride_u,
468  const uint8_t *ref_v, ptrdiff_t src_stride_v,
469  ThreadFrame *ref_frame,
470  ptrdiff_t y, ptrdiff_t x, const VP56mv *in_mv,
471  int px, int py, int pw, int ph,
472  int bw, int bh, int w, int h, int bytesperpixel,
473  const uint16_t *scale, const uint8_t *step)
474 {
475  VP9Context *s = td->s;
476  if (s->s.frames[CUR_FRAME].tf.f->width == ref_frame->f->width &&
477  s->s.frames[CUR_FRAME].tf.f->height == ref_frame->f->height) {
478  mc_chroma_unscaled(td, mc, dst_u, dst_v, dst_stride, ref_u, src_stride_u,
479  ref_v, src_stride_v, ref_frame,
480  y, x, in_mv, bw, bh, w, h, bytesperpixel);
481  } else {
482  int mx, my;
483  int refbw_m1, refbh_m1;
484  int th;
485  VP56mv mv;
486 
487  if (s->ss_h) {
488  // BUG https://code.google.com/p/webm/issues/detail?id=820
489  mv.x = av_clip(in_mv->x, -(x + pw - px + 4) * 16, (s->cols * 4 - x + px + 3) * 16);
490  mx = scale_mv(mv.x, 0) + (scale_mv(x * 16, 0) & ~15) + (scale_mv(x * 32, 0) & 15);
491  } else {
492  mv.x = av_clip(in_mv->x, -(x + pw - px + 4) * 8, (s->cols * 8 - x + px + 3) * 8);
493  mx = scale_mv(mv.x * 2, 0) + scale_mv(x * 16, 0);
494  }
495  if (s->ss_v) {
496  // BUG https://code.google.com/p/webm/issues/detail?id=820
497  mv.y = av_clip(in_mv->y, -(y + ph - py + 4) * 16, (s->rows * 4 - y + py + 3) * 16);
498  my = scale_mv(mv.y, 1) + (scale_mv(y * 16, 1) & ~15) + (scale_mv(y * 32, 1) & 15);
499  } else {
500  mv.y = av_clip(in_mv->y, -(y + ph - py + 4) * 8, (s->rows * 8 - y + py + 3) * 8);
501  my = scale_mv(mv.y * 2, 1) + scale_mv(y * 16, 1);
502  }
503 #undef scale_mv
504  y = my >> 4;
505  x = mx >> 4;
506  ref_u += y * src_stride_u + x * bytesperpixel;
507  ref_v += y * src_stride_v + x * bytesperpixel;
508  mx &= 15;
509  my &= 15;
510  refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
511  refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
512  // FIXME bilinear filter only needs 0/1 pixels, not 3/4
513  // we use +7 because the last 7 pixels of each sbrow can be changed in
514  // the longest loopfilter of the next sbrow
515  th = (y + refbh_m1 + 4 + 7) >> (6 - s->ss_v);
516  ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
517  // The arm/aarch64 _hv filters read one more row than what actually is
518  // needed, so switch to emulated edge one pixel sooner vertically
519  // (y + 5 >= h - refbh_m1) than horizontally (x + 4 >= w - refbw_m1).
520  if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 5 >= h - refbh_m1) {
521  s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
522  ref_u - 3 * src_stride_u - 3 * bytesperpixel,
523  288, src_stride_u,
524  refbw_m1 + 8, refbh_m1 + 8,
525  x - 3, y - 3, w, h);
526  ref_u = td->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
527  smc(dst_u, dst_stride, ref_u, 288, bh, mx, my, step[0], step[1]);
528 
529  s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
530  ref_v - 3 * src_stride_v - 3 * bytesperpixel,
531  288, src_stride_v,
532  refbw_m1 + 8, refbh_m1 + 8,
533  x - 3, y - 3, w, h);
534  ref_v = td->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
535  smc(dst_v, dst_stride, ref_v, 288, bh, mx, my, step[0], step[1]);
536  } else {
537  smc(dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my, step[0], step[1]);
538  smc(dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my, step[0], step[1]);
539  }
540  }
541 }
542 
543 #define mc_luma_dir(td, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \
544  px, py, pw, ph, bw, bh, w, h, i) \
545  mc_luma_scaled(td, s->dsp.s##mc, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \
546  mv, px, py, pw, ph, bw, bh, w, h, bytesperpixel, \
547  s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
548 #define mc_chroma_dir(td, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
549  row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \
550  mc_chroma_scaled(td, s->dsp.s##mc, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
551  row, col, mv, px, py, pw, ph, bw, bh, w, h, bytesperpixel, \
552  s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
553 #define SCALED 1
554 #define FN(x) x##_scaled_8bpp
555 #define BYTES_PER_PIXEL 1
556 #include "vp9_mc_template.c"
557 #undef FN
558 #undef BYTES_PER_PIXEL
559 #define FN(x) x##_scaled_16bpp
560 #define BYTES_PER_PIXEL 2
561 #include "vp9_mc_template.c"
562 #undef mc_luma_dir
563 #undef mc_chroma_dir
564 #undef FN
565 #undef BYTES_PER_PIXEL
566 #undef SCALED
567 
568 static av_always_inline void inter_recon(VP9TileData *td, int bytesperpixel)
569 {
570  VP9Context *s = td->s;
571  VP9Block *b = td->b;
572  int row = td->row, col = td->col;
573 
574  if (s->mvscale[b->ref[0]][0] == REF_INVALID_SCALE ||
575  (b->comp && s->mvscale[b->ref[1]][0] == REF_INVALID_SCALE)) {
576  if (!s->td->error_info) {
577  s->td->error_info = AVERROR_INVALIDDATA;
578  av_log(NULL, AV_LOG_ERROR, "Bitstream not supported, "
579  "reference frame has invalid dimensions\n");
580  }
581  return;
582  }
583 
584  if (s->mvscale[b->ref[0]][0] || (b->comp && s->mvscale[b->ref[1]][0])) {
585  if (bytesperpixel == 1) {
586  inter_pred_scaled_8bpp(td);
587  } else {
588  inter_pred_scaled_16bpp(td);
589  }
590  } else {
591  if (bytesperpixel == 1) {
592  inter_pred_8bpp(td);
593  } else {
594  inter_pred_16bpp(td);
595  }
596  }
597 
598  if (!b->skip) {
599  /* mostly copied intra_recon() */
600 
601  int w4 = ff_vp9_bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
602  int h4 = ff_vp9_bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
603  int end_x = FFMIN(2 * (s->cols - col), w4);
604  int end_y = FFMIN(2 * (s->rows - row), h4);
605  int tx = 4 * s->s.h.lossless + b->tx, uvtx = b->uvtx + 4 * s->s.h.lossless;
606  int uvstep1d = 1 << b->uvtx, p;
607  uint8_t *dst = td->dst[0];
608 
609  // y itxfm add
610  for (n = 0, y = 0; y < end_y; y += step1d) {
611  uint8_t *ptr = dst;
612  for (x = 0; x < end_x; x += step1d,
613  ptr += 4 * step1d * bytesperpixel, n += step) {
614  int eob = b->tx > TX_8X8 ? AV_RN16A(&td->eob[n]) : td->eob[n];
615 
616  if (eob)
617  s->dsp.itxfm_add[tx][DCT_DCT](ptr, td->y_stride,
618  td->block + 16 * n * bytesperpixel, eob);
619  }
620  dst += 4 * td->y_stride * step1d;
621  }
622 
623  // uv itxfm add
624  end_x >>= s->ss_h;
625  end_y >>= s->ss_v;
626  step = 1 << (b->uvtx * 2);
627  for (p = 0; p < 2; p++) {
628  dst = td->dst[p + 1];
629  for (n = 0, y = 0; y < end_y; y += uvstep1d) {
630  uint8_t *ptr = dst;
631  for (x = 0; x < end_x; x += uvstep1d,
632  ptr += 4 * uvstep1d * bytesperpixel, n += step) {
633  int eob = b->uvtx > TX_8X8 ? AV_RN16A(&td->uveob[p][n]) : td->uveob[p][n];
634 
635  if (eob)
636  s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, td->uv_stride,
637  td->uvblock[p] + 16 * n * bytesperpixel, eob);
638  }
639  dst += 4 * uvstep1d * td->uv_stride;
640  }
641  }
642  }
643 }
644 
646 {
647  inter_recon(td, 1);
648 }
649 
651 {
652  inter_recon(td, 2);
653 }
stride
int stride
Definition: mace.c:144
td
#define td
Definition: regdef.h:70
av_clip
#define av_clip
Definition: common.h:96
VP56mv::x
int16_t x
Definition: vp56.h:69
mem_internal.h
DC_128_PRED
@ DC_128_PRED
Definition: vp9.h:58
mv
static const int8_t mv[256][2]
Definition: 4xm.c:79
TM_VP8_PRED
@ TM_VP8_PRED
Definition: vp9.h:55
step
trying all byte sequences megabyte in length and selecting the best looking sequence will yield cases to try But a word about which is also called distortion Distortion can be quantified by almost any quality measurement one chooses the sum of squared differences is used but more complex methods that consider psychovisual effects can be used as well It makes no difference in this discussion First step
Definition: rate_distortion.txt:58
AVFrame::width
int width
Definition: frame.h:361
w
uint8_t w
Definition: llviddspenc.c:38
DC_PRED
@ DC_PRED
Definition: vp9.h:48
b
#define b
Definition: input.c:40
VERT_LEFT_PRED
@ VERT_LEFT_PRED
Definition: vp9.h:53
inter_recon
static av_always_inline void inter_recon(VP9TileData *td, int bytesperpixel)
Definition: vp9recon.c:568
FFMAX
#define FFMAX(a, b)
Definition: macros.h:47
ff_thread_await_progress
the pkt_dts and pkt_pts fields in AVFrame will work as usual Restrictions on codec whose streams don t reset across will not work because their bitstreams cannot be decoded in parallel *The contents of buffers must not be read before ff_thread_await_progress() has been called on them. reget_buffer() and buffer age optimizations no longer work. *The contents of buffers must not be written to after ff_thread_report_progress() has been called on them. This includes draw_edges(). Porting codecs to frame threading
ff_vp9_intra_recon_16bpp
void ff_vp9_intra_recon_16bpp(VP9TileData *td, ptrdiff_t y_off, ptrdiff_t uv_off)
Definition: vp9recon.c:292
ThreadFrame::f
AVFrame * f
Definition: thread.h:35
VP9Block
Definition: vp9dec.h:82
DC_127_PRED
@ DC_127_PRED
Definition: vp9.h:59
VERT_PRED
@ VERT_PRED
Definition: vp9.h:46
assign_val
#define assign_val(c, i, v)
check_intra_mode
static av_always_inline int check_intra_mode(VP9TileData *td, int mode, uint8_t **a, uint8_t *dst_edge, ptrdiff_t stride_edge, uint8_t *dst_inner, ptrdiff_t stride_inner, uint8_t *l, int col, int x, int w, int row, int y, enum TxfmMode tx, int p, int ss_h, int ss_v, int bytesperpixel)
Definition: vp9recon.c:31
DIAG_DOWN_RIGHT_PRED
@ DIAG_DOWN_RIGHT_PRED
Definition: vp9.h:50
avassert.h
AV_LOG_ERROR
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:180
HOR_PRED
@ HOR_PRED
Definition: vp9.h:47
vp9_scaled_mc_func
void(* vp9_scaled_mc_func)(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *ref, ptrdiff_t ref_stride, int h, int mx, int my, int dx, int dy)
Definition: vp9dsp.h:35
s
#define s(width, name)
Definition: cbs_vp9.c:257
vp9data.h
LEFT_DC_PRED
@ LEFT_DC_PRED
Definition: vp9.h:56
VP56mv::y
int16_t y
Definition: vp56.h:70
VP56mv
Definition: vp56.h:68
memset_val
#define memset_val(c, val, num)
ff_vp9_inter_recon_16bpp
void ff_vp9_inter_recon_16bpp(VP9TileData *td)
Definition: vp9recon.c:650
assign_bpp
#define assign_bpp(c, i1, v, i2)
NULL
#define NULL
Definition: coverity.c:32
LOCAL_ALIGNED_32
#define LOCAL_ALIGNED_32(t, v,...)
Definition: mem_internal.h:136
VP9Context
Definition: vp9dec.h:94
mc_chroma_scaled
static av_always_inline void mc_chroma_scaled(VP9TileData *td, vp9_scaled_mc_func smc, vp9_mc_func(*mc)[2], uint8_t *dst_u, uint8_t *dst_v, ptrdiff_t dst_stride, const uint8_t *ref_u, ptrdiff_t src_stride_u, const uint8_t *ref_v, ptrdiff_t src_stride_v, ThreadFrame *ref_frame, ptrdiff_t y, ptrdiff_t x, const VP56mv *in_mv, int px, int py, int pw, int ph, int bw, int bh, int w, int h, int bytesperpixel, const uint16_t *scale, const uint8_t *step)
Definition: vp9recon.c:463
TX_8X8
@ TX_8X8
Definition: vp9.h:29
vp9_mc_template.c
TxfmMode
TxfmMode
Definition: vp9.h:27
DCT_DCT
@ DCT_DCT
Definition: vp9.h:38
TxfmType
TxfmType
Definition: vp9.h:37
N_INTRA_PRED_MODES
@ N_INTRA_PRED_MODES
Definition: vp9.h:61
REF_INVALID_SCALE
#define REF_INVALID_SCALE
Definition: vp9dec.h:40
mc_luma_unscaled
static av_always_inline void mc_luma_unscaled(VP9TileData *td, vp9_mc_func(*mc)[2], uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *ref, ptrdiff_t ref_stride, ThreadFrame *ref_frame, ptrdiff_t y, ptrdiff_t x, const VP56mv *mv, int bw, int bh, int w, int h, int bytesperpixel)
Definition: vp9recon.c:297
VERT_RIGHT_PRED
@ VERT_RIGHT_PRED
Definition: vp9.h:51
BS_8x8
@ BS_8x8
Definition: vp9shared.h:87
scale_mv
#define scale_mv(n, dim)
TX_4X4
@ TX_4X4
Definition: vp9.h:28
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
th
#define th
Definition: regdef.h:75
mc_chroma_unscaled
static av_always_inline void mc_chroma_unscaled(VP9TileData *td, vp9_mc_func(*mc)[2], uint8_t *dst_u, uint8_t *dst_v, ptrdiff_t dst_stride, const uint8_t *ref_u, ptrdiff_t src_stride_u, const uint8_t *ref_v, ptrdiff_t src_stride_v, ThreadFrame *ref_frame, ptrdiff_t y, ptrdiff_t x, const VP56mv *mv, int bw, int bh, int w, int h, int bytesperpixel)
Definition: vp9recon.c:333
av_assert2
#define av_assert2(cond)
assert() equivalent, that does lie in speed critical code.
Definition: avassert.h:64
i
int i
Definition: input.c:406
DC_129_PRED
@ DC_129_PRED
Definition: vp9.h:60
ff_vp9_intra_txfm_type
enum TxfmType ff_vp9_intra_txfm_type[14]
Definition: vp9data.c:437
av_always_inline
#define av_always_inline
Definition: attributes.h:49
FFMIN
#define FFMIN(a, b)
Definition: macros.h:49
mc_luma_scaled
static av_always_inline void mc_luma_scaled(VP9TileData *td, vp9_scaled_mc_func smc, vp9_mc_func(*mc)[2], uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *ref, ptrdiff_t ref_stride, ThreadFrame *ref_frame, ptrdiff_t y, ptrdiff_t x, const VP56mv *in_mv, int px, int py, int pw, int ph, int bw, int bh, int w, int h, int bytesperpixel, const uint16_t *scale, const uint8_t *step)
Definition: vp9recon.c:405
VP9TileData
Definition: vp9dec.h:164
AVFrame::height
int height
Definition: frame.h:361
ThreadFrame
Definition: thread.h:34
HOR_UP_PRED
@ HOR_UP_PRED
Definition: vp9.h:54
mode
mode
Definition: ebur128.h:83
ff_vp9_bwh_tab
const uint8_t ff_vp9_bwh_tab[2][N_BS_SIZES][2]
Definition: vp9data.c:25
AV_RN16A
#define AV_RN16A(p)
Definition: intreadwrite.h:522
vp9_mc_func
void(* vp9_mc_func)(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *ref, ptrdiff_t ref_stride, int h, int mx, int my)
Definition: vp9dsp.h:32
ref
static int ref[MAX_W *MAX_W]
Definition: jpeg2000dwt.c:107
HOR_DOWN_PRED
@ HOR_DOWN_PRED
Definition: vp9.h:52
vp9dec.h
CUR_FRAME
#define CUR_FRAME
Definition: vp9shared.h:163
TOP_DC_PRED
@ TOP_DC_PRED
Definition: vp9.h:57
videodsp.h
DIAG_DOWN_LEFT_PRED
@ DIAG_DOWN_LEFT_PRED
Definition: vp9.h:49
av_log
#define av_log(a,...)
Definition: tableprint_vlc.h:28
memset_bpp
#define memset_bpp(c, i1, v, i2, num)
ff_vp9_inter_recon_8bpp
void ff_vp9_inter_recon_8bpp(VP9TileData *td)
Definition: vp9recon.c:645
AVERROR_INVALIDDATA
#define AVERROR_INVALIDDATA
Invalid data found when processing input.
Definition: error.h:61
h
h
Definition: vp9dsp_template.c:2038
intra_recon
static av_always_inline void intra_recon(VP9TileData *td, ptrdiff_t y_off, ptrdiff_t uv_off, int bytesperpixel)
Definition: vp9recon.c:217
ff_vp9_intra_recon_8bpp
void ff_vp9_intra_recon_8bpp(VP9TileData *td, ptrdiff_t y_off, ptrdiff_t uv_off)
Definition: vp9recon.c:287
mc
#define mc
Definition: vf_colormatrix.c:102