FFmpeg
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
vp8.c
Go to the documentation of this file.
1 /*
2  * VP8 compatible video decoder
3  *
4  * Copyright (C) 2010 David Conrad
5  * Copyright (C) 2010 Ronald S. Bultje
6  * Copyright (C) 2010 Jason Garrett-Glaser
7  * Copyright (C) 2012 Daniel Kang
8  *
9  * This file is part of FFmpeg.
10  *
11  * FFmpeg is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public
13  * License as published by the Free Software Foundation; either
14  * version 2.1 of the License, or (at your option) any later version.
15  *
16  * FFmpeg is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with FFmpeg; if not, write to the Free Software
23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24  */
25 
26 #include "libavutil/imgutils.h"
27 #include "avcodec.h"
28 #include "internal.h"
29 #include "vp8.h"
30 #include "vp8data.h"
31 #include "rectangle.h"
32 #include "thread.h"
33 
34 #if ARCH_ARM
35 # include "arm/vp8.h"
36 #endif
37 
38 static void free_buffers(VP8Context *s)
39 {
40  int i;
41  if (s->thread_data)
42  for (i = 0; i < MAX_THREADS; i++) {
43 #if HAVE_THREADS
44  pthread_cond_destroy(&s->thread_data[i].cond);
46 #endif
49  }
50  av_freep(&s->thread_data);
53  av_freep(&s->top_nnz);
54  av_freep(&s->top_border);
55 
56  s->macroblocks = NULL;
57 }
58 
60 {
61  int ret;
62  if ((ret = ff_thread_get_buffer(s->avctx, f)) < 0)
63  return ret;
64  if (s->num_maps_to_be_freed && !s->maps_are_invalid) {
66  } else if (!(f->ref_index[0] = av_mallocz(s->mb_width * s->mb_height))) {
68  return AVERROR(ENOMEM);
69  }
70  return 0;
71 }
72 
73 static void vp8_release_frame(VP8Context *s, AVFrame *f, int prefer_delayed_free, int can_direct_free)
74 {
75  if (f->ref_index[0]) {
76  if (prefer_delayed_free) {
77  /* Upon a size change, we want to free the maps but other threads may still
78  * be using them, so queue them. Upon a seek, all threads are inactive so
79  * we want to cache one to prevent re-allocation in the next decoding
80  * iteration, but the rest we can free directly. */
81  int max_queued_maps = can_direct_free ? 1 : FF_ARRAY_ELEMS(s->segmentation_maps);
82  if (s->num_maps_to_be_freed < max_queued_maps) {
84  } else if (can_direct_free) /* vp8_decode_flush(), but our queue is full */ {
85  av_free(f->ref_index[0]);
86  } /* else: MEMLEAK (should never happen, but better that than crash) */
87  f->ref_index[0] = NULL;
88  } else /* vp8_decode_free() */ {
89  av_free(f->ref_index[0]);
90  }
91  }
93 }
94 
96  int prefer_delayed_free, int can_direct_free, int free_mem)
97 {
98  VP8Context *s = avctx->priv_data;
99  int i;
100 
101  if (!avctx->internal->is_copy) {
102  for (i = 0; i < 5; i++)
103  if (s->frames[i].data[0])
104  vp8_release_frame(s, &s->frames[i], prefer_delayed_free, can_direct_free);
105  }
106  memset(s->framep, 0, sizeof(s->framep));
107 
108  if (free_mem) {
109  free_buffers(s);
110  s->maps_are_invalid = 1;
111  }
112 }
113 
114 static void vp8_decode_flush(AVCodecContext *avctx)
115 {
116  vp8_decode_flush_impl(avctx, 1, 1, 0);
117 }
118 
119 static int update_dimensions(VP8Context *s, int width, int height)
120 {
121  AVCodecContext *avctx = s->avctx;
122  int i;
123 
124  if (width != s->avctx->width || ((width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) && s->macroblocks_base ||
125  height != s->avctx->height) {
126  if (av_image_check_size(width, height, 0, s->avctx))
127  return AVERROR_INVALIDDATA;
128 
129  vp8_decode_flush_impl(s->avctx, 1, 0, 1);
130 
131  avcodec_set_dimensions(s->avctx, width, height);
132  }
133 
134  s->mb_width = (s->avctx->coded_width +15) / 16;
135  s->mb_height = (s->avctx->coded_height+15) / 16;
136 
138  if (!s->mb_layout) { // Frame threading and one thread
139  s->macroblocks_base = av_mallocz((s->mb_width+s->mb_height*2+1)*sizeof(*s->macroblocks));
141  }
142  else // Sliced threading
143  s->macroblocks_base = av_mallocz((s->mb_width+2)*(s->mb_height+2)*sizeof(*s->macroblocks));
144  s->top_nnz = av_mallocz(s->mb_width*sizeof(*s->top_nnz));
145  s->top_border = av_mallocz((s->mb_width+1)*sizeof(*s->top_border));
147 
148  for (i = 0; i < MAX_THREADS; i++) {
150 #if HAVE_THREADS
151  pthread_mutex_init(&s->thread_data[i].lock, NULL);
152  pthread_cond_init(&s->thread_data[i].cond, NULL);
153 #endif
154  }
155 
156  if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
157  (!s->intra4x4_pred_mode_top && !s->mb_layout))
158  return AVERROR(ENOMEM);
159 
160  s->macroblocks = s->macroblocks_base + 1;
161 
162  return 0;
163 }
164 
166 {
167  VP56RangeCoder *c = &s->c;
168  int i;
169 
171 
172  if (vp8_rac_get(c)) { // update segment feature data
174 
175  for (i = 0; i < 4; i++)
177 
178  for (i = 0; i < 4; i++)
180  }
181  if (s->segmentation.update_map)
182  for (i = 0; i < 3; i++)
183  s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
184 }
185 
187 {
188  VP56RangeCoder *c = &s->c;
189  int i;
190 
191  for (i = 0; i < 4; i++) {
192  if (vp8_rac_get(c)) {
193  s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
194 
195  if (vp8_rac_get(c))
196  s->lf_delta.ref[i] = -s->lf_delta.ref[i];
197  }
198  }
199 
200  for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
201  if (vp8_rac_get(c)) {
202  s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);
203 
204  if (vp8_rac_get(c))
205  s->lf_delta.mode[i] = -s->lf_delta.mode[i];
206  }
207  }
208 }
209 
210 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
211 {
212  const uint8_t *sizes = buf;
213  int i;
214 
215  s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
216 
217  buf += 3*(s->num_coeff_partitions-1);
218  buf_size -= 3*(s->num_coeff_partitions-1);
219  if (buf_size < 0)
220  return -1;
221 
222  for (i = 0; i < s->num_coeff_partitions-1; i++) {
223  int size = AV_RL24(sizes + 3*i);
224  if (buf_size - size < 0)
225  return -1;
226 
227  ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
228  buf += size;
229  buf_size -= size;
230  }
231  ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
232 
233  return 0;
234 }
235 
236 static void get_quants(VP8Context *s)
237 {
238  VP56RangeCoder *c = &s->c;
239  int i, base_qi;
240 
241  int yac_qi = vp8_rac_get_uint(c, 7);
242  int ydc_delta = vp8_rac_get_sint(c, 4);
243  int y2dc_delta = vp8_rac_get_sint(c, 4);
244  int y2ac_delta = vp8_rac_get_sint(c, 4);
245  int uvdc_delta = vp8_rac_get_sint(c, 4);
246  int uvac_delta = vp8_rac_get_sint(c, 4);
247 
248  for (i = 0; i < 4; i++) {
249  if (s->segmentation.enabled) {
250  base_qi = s->segmentation.base_quant[i];
251  if (!s->segmentation.absolute_vals)
252  base_qi += yac_qi;
253  } else
254  base_qi = yac_qi;
255 
256  s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta , 7)];
257  s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi , 7)];
258  s->qmat[i].luma_dc_qmul[0] = 2 * vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)];
259  /* 101581>>16 is equivalent to 155/100 */
260  s->qmat[i].luma_dc_qmul[1] = (101581 * vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)]) >> 16;
261  s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
262  s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
263 
264  s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
265  s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
266  }
267 }
268 
269 /**
270  * Determine which buffers golden and altref should be updated with after this frame.
271  * The spec isn't clear here, so I'm going by my understanding of what libvpx does
272  *
273  * Intra frames update all 3 references
274  * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
275  * If the update (golden|altref) flag is set, it's updated with the current frame
276  * if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
277  * If the flag is not set, the number read means:
278  * 0: no update
279  * 1: VP56_FRAME_PREVIOUS
280  * 2: update golden with altref, or update altref with golden
281  */
282 static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
283 {
284  VP56RangeCoder *c = &s->c;
285 
286  if (update)
287  return VP56_FRAME_CURRENT;
288 
289  switch (vp8_rac_get_uint(c, 2)) {
290  case 1:
291  return VP56_FRAME_PREVIOUS;
292  case 2:
294  }
295  return VP56_FRAME_NONE;
296 }
297 
298 static void update_refs(VP8Context *s)
299 {
300  VP56RangeCoder *c = &s->c;
301 
302  int update_golden = vp8_rac_get(c);
303  int update_altref = vp8_rac_get(c);
304 
305  s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
306  s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
307 }
308 
309 static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
310 {
311  VP56RangeCoder *c = &s->c;
312  int header_size, hscale, vscale, i, j, k, l, m, ret;
313  int width = s->avctx->width;
314  int height = s->avctx->height;
315 
316  s->keyframe = !(buf[0] & 1);
317  s->profile = (buf[0]>>1) & 7;
318  s->invisible = !(buf[0] & 0x10);
319  header_size = AV_RL24(buf) >> 5;
320  buf += 3;
321  buf_size -= 3;
322 
323  if (s->profile > 3)
324  av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
325 
326  if (!s->profile)
327  memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
328  else // profile 1-3 use bilinear, 4+ aren't defined so whatever
330 
331  if (header_size > buf_size - 7*s->keyframe) {
332  av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
333  return AVERROR_INVALIDDATA;
334  }
335 
336  if (s->keyframe) {
337  if (AV_RL24(buf) != 0x2a019d) {
338  av_log(s->avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", AV_RL24(buf));
339  return AVERROR_INVALIDDATA;
340  }
341  width = AV_RL16(buf+3) & 0x3fff;
342  height = AV_RL16(buf+5) & 0x3fff;
343  hscale = buf[4] >> 6;
344  vscale = buf[6] >> 6;
345  buf += 7;
346  buf_size -= 7;
347 
348  if (hscale || vscale)
349  av_log_missing_feature(s->avctx, "Upscaling", 1);
350 
352  for (i = 0; i < 4; i++)
353  for (j = 0; j < 16; j++)
354  memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
355  sizeof(s->prob->token[i][j]));
356  memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, sizeof(s->prob->pred16x16));
357  memcpy(s->prob->pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s->prob->pred8x8c));
358  memcpy(s->prob->mvc , vp8_mv_default_prob , sizeof(s->prob->mvc));
359  memset(&s->segmentation, 0, sizeof(s->segmentation));
360  memset(&s->lf_delta, 0, sizeof(s->lf_delta));
361  }
362 
363  ff_vp56_init_range_decoder(c, buf, header_size);
364  buf += header_size;
365  buf_size -= header_size;
366 
367  if (s->keyframe) {
368  if (vp8_rac_get(c))
369  av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
370  vp8_rac_get(c); // whether we can skip clamping in dsp functions
371  }
372 
373  if ((s->segmentation.enabled = vp8_rac_get(c)))
375  else
376  s->segmentation.update_map = 0; // FIXME: move this to some init function?
377 
378  s->filter.simple = vp8_rac_get(c);
379  s->filter.level = vp8_rac_get_uint(c, 6);
380  s->filter.sharpness = vp8_rac_get_uint(c, 3);
381 
382  if ((s->lf_delta.enabled = vp8_rac_get(c)))
383  if (vp8_rac_get(c))
384  update_lf_deltas(s);
385 
386  if (setup_partitions(s, buf, buf_size)) {
387  av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
388  return AVERROR_INVALIDDATA;
389  }
390 
391  if (!s->macroblocks_base || /* first frame */
392  width != s->avctx->width || height != s->avctx->height || (width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) {
393  if ((ret = update_dimensions(s, width, height)) < 0)
394  return ret;
395  }
396 
397  get_quants(s);
398 
399  if (!s->keyframe) {
400  update_refs(s);
402  s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
403  }
404 
405  // if we aren't saving this frame's probabilities for future frames,
406  // make a copy of the current probabilities
407  if (!(s->update_probabilities = vp8_rac_get(c)))
408  s->prob[1] = s->prob[0];
409 
410  s->update_last = s->keyframe || vp8_rac_get(c);
411 
412  for (i = 0; i < 4; i++)
413  for (j = 0; j < 8; j++)
414  for (k = 0; k < 3; k++)
415  for (l = 0; l < NUM_DCT_TOKENS-1; l++)
417  int prob = vp8_rac_get_uint(c, 8);
418  for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
419  s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
420  }
421 
422  if ((s->mbskip_enabled = vp8_rac_get(c)))
423  s->prob->mbskip = vp8_rac_get_uint(c, 8);
424 
425  if (!s->keyframe) {
426  s->prob->intra = vp8_rac_get_uint(c, 8);
427  s->prob->last = vp8_rac_get_uint(c, 8);
428  s->prob->golden = vp8_rac_get_uint(c, 8);
429 
430  if (vp8_rac_get(c))
431  for (i = 0; i < 4; i++)
432  s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
433  if (vp8_rac_get(c))
434  for (i = 0; i < 3; i++)
435  s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8);
436 
437  // 17.2 MV probability update
438  for (i = 0; i < 2; i++)
439  for (j = 0; j < 19; j++)
441  s->prob->mvc[i][j] = vp8_rac_get_nn(c);
442  }
443 
444  return 0;
445 }
446 
447 static av_always_inline void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
448 {
449  dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x);
450  dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y);
451 }
452 
453 /**
454  * Motion vector coding, 17.1.
455  */
457 {
458  int bit, x = 0;
459 
460  if (vp56_rac_get_prob_branchy(c, p[0])) {
461  int i;
462 
463  for (i = 0; i < 3; i++)
464  x += vp56_rac_get_prob(c, p[9 + i]) << i;
465  for (i = 9; i > 3; i--)
466  x += vp56_rac_get_prob(c, p[9 + i]) << i;
467  if (!(x & 0xFFF0) || vp56_rac_get_prob(c, p[12]))
468  x += 8;
469  } else {
470  // small_mvtree
471  const uint8_t *ps = p+2;
472  bit = vp56_rac_get_prob(c, *ps);
473  ps += 1 + 3*bit;
474  x += 4*bit;
475  bit = vp56_rac_get_prob(c, *ps);
476  ps += 1 + bit;
477  x += 2*bit;
478  x += vp56_rac_get_prob(c, *ps);
479  }
480 
481  return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
482 }
483 
484 static av_always_inline
485 const uint8_t *get_submv_prob(uint32_t left, uint32_t top)
486 {
487  if (left == top)
488  return vp8_submv_prob[4-!!left];
489  if (!top)
490  return vp8_submv_prob[2];
491  return vp8_submv_prob[1-!!left];
492 }
493 
494 /**
495  * Split motion vector prediction, 16.4.
496  * @returns the number of motion vectors parsed (2, 4 or 16)
497  */
498 static av_always_inline
500 {
501  int part_idx;
502  int n, num;
503  VP8Macroblock *top_mb;
504  VP8Macroblock *left_mb = &mb[-1];
505  const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning],
506  *mbsplits_top,
507  *mbsplits_cur, *firstidx;
508  VP56mv *top_mv;
509  VP56mv *left_mv = left_mb->bmv;
510  VP56mv *cur_mv = mb->bmv;
511 
512  if (!layout) // layout is inlined, s->mb_layout is not
513  top_mb = &mb[2];
514  else
515  top_mb = &mb[-s->mb_width-1];
516  mbsplits_top = vp8_mbsplits[top_mb->partitioning];
517  top_mv = top_mb->bmv;
518 
522  } else {
523  part_idx = VP8_SPLITMVMODE_8x8;
524  }
525  } else {
526  part_idx = VP8_SPLITMVMODE_4x4;
527  }
528 
529  num = vp8_mbsplit_count[part_idx];
530  mbsplits_cur = vp8_mbsplits[part_idx],
531  firstidx = vp8_mbfirstidx[part_idx];
532  mb->partitioning = part_idx;
533 
534  for (n = 0; n < num; n++) {
535  int k = firstidx[n];
536  uint32_t left, above;
537  const uint8_t *submv_prob;
538 
539  if (!(k & 3))
540  left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
541  else
542  left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
543  if (k <= 3)
544  above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
545  else
546  above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
547 
548  submv_prob = get_submv_prob(left, above);
549 
550  if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
551  if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
552  if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
553  mb->bmv[n].y = mb->mv.y + read_mv_component(c, s->prob->mvc[0]);
554  mb->bmv[n].x = mb->mv.x + read_mv_component(c, s->prob->mvc[1]);
555  } else {
556  AV_ZERO32(&mb->bmv[n]);
557  }
558  } else {
559  AV_WN32A(&mb->bmv[n], above);
560  }
561  } else {
562  AV_WN32A(&mb->bmv[n], left);
563  }
564  }
565 
566  return num;
567 }
568 
569 static av_always_inline
570 void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int layout)
571 {
572  VP8Macroblock *mb_edge[3] = { 0 /* top */,
573  mb - 1 /* left */,
574  0 /* top-left */ };
575  enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
576  enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
577  int idx = CNT_ZERO;
578  int cur_sign_bias = s->sign_bias[mb->ref_frame];
579  int8_t *sign_bias = s->sign_bias;
580  VP56mv near_mv[4];
581  uint8_t cnt[4] = { 0 };
582  VP56RangeCoder *c = &s->c;
583 
584  if (!layout) { // layout is inlined (s->mb_layout is not)
585  mb_edge[0] = mb + 2;
586  mb_edge[2] = mb + 1;
587  }
588  else {
589  mb_edge[0] = mb - s->mb_width-1;
590  mb_edge[2] = mb - s->mb_width-2;
591  }
592 
593  AV_ZERO32(&near_mv[0]);
594  AV_ZERO32(&near_mv[1]);
595  AV_ZERO32(&near_mv[2]);
596 
597  /* Process MB on top, left and top-left */
598  #define MV_EDGE_CHECK(n)\
599  {\
600  VP8Macroblock *edge = mb_edge[n];\
601  int edge_ref = edge->ref_frame;\
602  if (edge_ref != VP56_FRAME_CURRENT) {\
603  uint32_t mv = AV_RN32A(&edge->mv);\
604  if (mv) {\
605  if (cur_sign_bias != sign_bias[edge_ref]) {\
606  /* SWAR negate of the values in mv. */\
607  mv = ~mv;\
608  mv = ((mv&0x7fff7fff) + 0x00010001) ^ (mv&0x80008000);\
609  }\
610  if (!n || mv != AV_RN32A(&near_mv[idx]))\
611  AV_WN32A(&near_mv[++idx], mv);\
612  cnt[idx] += 1 + (n != 2);\
613  } else\
614  cnt[CNT_ZERO] += 1 + (n != 2);\
615  }\
616  }
617 
618  MV_EDGE_CHECK(0)
619  MV_EDGE_CHECK(1)
620  MV_EDGE_CHECK(2)
621 
623  if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
624  mb->mode = VP8_MVMODE_MV;
625 
626  /* If we have three distinct MVs, merge first and last if they're the same */
627  if (cnt[CNT_SPLITMV] && AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
628  cnt[CNT_NEAREST] += 1;
629 
630  /* Swap near and nearest if necessary */
631  if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
632  FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
633  FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
634  }
635 
636  if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
637  if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
638 
639  /* Choose the best mv out of 0,0 and the nearest mv */
640  clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
641  cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
642  (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
643  (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
644 
645  if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
646  mb->mode = VP8_MVMODE_SPLIT;
647  mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout) - 1];
648  } else {
649  mb->mv.y += read_mv_component(c, s->prob->mvc[0]);
650  mb->mv.x += read_mv_component(c, s->prob->mvc[1]);
651  mb->bmv[0] = mb->mv;
652  }
653  } else {
654  clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
655  mb->bmv[0] = mb->mv;
656  }
657  } else {
658  clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
659  mb->bmv[0] = mb->mv;
660  }
661  } else {
662  mb->mode = VP8_MVMODE_ZERO;
663  AV_ZERO32(&mb->mv);
664  mb->bmv[0] = mb->mv;
665  }
666 }
667 
668 static av_always_inline
670  int mb_x, int keyframe, int layout)
671 {
672  uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
673 
674  if (layout == 1) {
675  VP8Macroblock *mb_top = mb - s->mb_width - 1;
676  memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
677  }
678  if (keyframe) {
679  int x, y;
680  uint8_t* top;
681  uint8_t* const left = s->intra4x4_pred_mode_left;
682  if (layout == 1)
683  top = mb->intra4x4_pred_mode_top;
684  else
685  top = s->intra4x4_pred_mode_top + 4 * mb_x;
686  for (y = 0; y < 4; y++) {
687  for (x = 0; x < 4; x++) {
688  const uint8_t *ctx;
689  ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
690  *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
691  left[y] = top[x] = *intra4x4;
692  intra4x4++;
693  }
694  }
695  } else {
696  int i;
697  for (i = 0; i < 16; i++)
699  }
700 }
701 
702 static av_always_inline
703 void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
704  uint8_t *segment, uint8_t *ref, int layout)
705 {
706  VP56RangeCoder *c = &s->c;
707 
708  if (s->segmentation.update_map) {
709  int bit = vp56_rac_get_prob(c, s->prob->segmentid[0]);
710  *segment = vp56_rac_get_prob(c, s->prob->segmentid[1+bit]) + 2*bit;
711  } else if (s->segmentation.enabled)
712  *segment = ref ? *ref : *segment;
713  mb->segment = *segment;
714 
715  mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
716 
717  if (s->keyframe) {
719 
720  if (mb->mode == MODE_I4x4) {
721  decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
722  } else {
723  const uint32_t modes = vp8_pred4x4_mode[mb->mode] * 0x01010101u;
724  if (s->mb_layout == 1)
725  AV_WN32A(mb->intra4x4_pred_mode_top, modes);
726  else
727  AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
728  AV_WN32A( s->intra4x4_pred_mode_left, modes);
729  }
730 
733  } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
734  // inter MB, 16.2
735  if (vp56_rac_get_prob_branchy(c, s->prob->last))
736  mb->ref_frame = vp56_rac_get_prob(c, s->prob->golden) ?
737  VP56_FRAME_GOLDEN2 /* altref */ : VP56_FRAME_GOLDEN;
738  else
740  s->ref_count[mb->ref_frame-1]++;
741 
742  // motion vectors, 16.3
743  decode_mvs(s, mb, mb_x, mb_y, layout);
744  } else {
745  // intra MB, 16.1
747 
748  if (mb->mode == MODE_I4x4)
749  decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
750 
754  AV_ZERO32(&mb->bmv[0]);
755  }
756 }
757 
758 #ifndef decode_block_coeffs_internal
759 /**
760  * @param r arithmetic bitstream reader context
761  * @param block destination for block coefficients
762  * @param probs probabilities to use when reading trees from the bitstream
763  * @param i initial coeff index, 0 unless a separate DC block is coded
764  * @param qmul array holding the dc/ac dequant factor at position 0/1
765  * @return 0 if no coeffs were decoded
766  * otherwise, the index of the last coeff decoded plus one
767  */
769  uint8_t probs[16][3][NUM_DCT_TOKENS-1],
770  int i, uint8_t *token_prob, int16_t qmul[2])
771 {
772  VP56RangeCoder c = *r;
773  goto skip_eob;
774  do {
775  int coeff;
776  if (!vp56_rac_get_prob_branchy(&c, token_prob[0])) // DCT_EOB
777  break;
778 
779 skip_eob:
780  if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
781  if (++i == 16)
782  break; // invalid input; blocks should end with EOB
783  token_prob = probs[i][0];
784  goto skip_eob;
785  }
786 
787  if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
788  coeff = 1;
789  token_prob = probs[i+1][1];
790  } else {
791  if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
792  coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
793  if (coeff)
794  coeff += vp56_rac_get_prob(&c, token_prob[5]);
795  coeff += 2;
796  } else {
797  // DCT_CAT*
798  if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
799  if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
800  coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
801  } else { // DCT_CAT2
802  coeff = 7;
803  coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
804  coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
805  }
806  } else { // DCT_CAT3 and up
807  int a = vp56_rac_get_prob(&c, token_prob[8]);
808  int b = vp56_rac_get_prob(&c, token_prob[9+a]);
809  int cat = (a<<1) + b;
810  coeff = 3 + (8<<cat);
811  coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
812  }
813  }
814  token_prob = probs[i+1][2];
815  }
816  block[zigzag_scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
817  } while (++i < 16);
818 
819  *r = c;
820  return i;
821 }
822 #endif
823 
824 /**
825  * @param c arithmetic bitstream reader context
826  * @param block destination for block coefficients
827  * @param probs probabilities to use when reading trees from the bitstream
828  * @param i initial coeff index, 0 unless a separate DC block is coded
829  * @param zero_nhood the initial prediction context for number of surrounding
830  * all-zero blocks (only left/top, so 0-2)
831  * @param qmul array holding the dc/ac dequant factor at position 0/1
832  * @return 0 if no coeffs were decoded
833  * otherwise, the index of the last coeff decoded plus one
834  */
835 static av_always_inline
837  uint8_t probs[16][3][NUM_DCT_TOKENS-1],
838  int i, int zero_nhood, int16_t qmul[2])
839 {
840  uint8_t *token_prob = probs[i][zero_nhood];
841  if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
842  return 0;
843  return decode_block_coeffs_internal(c, block, probs, i, token_prob, qmul);
844 }
845 
846 static av_always_inline
848  uint8_t t_nnz[9], uint8_t l_nnz[9])
849 {
850  int i, x, y, luma_start = 0, luma_ctx = 3;
851  int nnz_pred, nnz, nnz_total = 0;
852  int segment = mb->segment;
853  int block_dc = 0;
854 
855  if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
856  nnz_pred = t_nnz[8] + l_nnz[8];
857 
858  // decode DC values and do hadamard
859  nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0, nnz_pred,
860  s->qmat[segment].luma_dc_qmul);
861  l_nnz[8] = t_nnz[8] = !!nnz;
862  if (nnz) {
863  nnz_total += nnz;
864  block_dc = 1;
865  if (nnz == 1)
867  else
868  s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
869  }
870  luma_start = 1;
871  luma_ctx = 0;
872  }
873 
874  // luma blocks
875  for (y = 0; y < 4; y++)
876  for (x = 0; x < 4; x++) {
877  nnz_pred = l_nnz[y] + t_nnz[x];
878  nnz = decode_block_coeffs(c, td->block[y][x], s->prob->token[luma_ctx], luma_start,
879  nnz_pred, s->qmat[segment].luma_qmul);
880  // nnz+block_dc may be one more than the actual last index, but we don't care
881  td->non_zero_count_cache[y][x] = nnz + block_dc;
882  t_nnz[x] = l_nnz[y] = !!nnz;
883  nnz_total += nnz;
884  }
885 
886  // chroma blocks
887  // TODO: what to do about dimensions? 2nd dim for luma is x,
888  // but for chroma it's (y<<1)|x
889  for (i = 4; i < 6; i++)
890  for (y = 0; y < 2; y++)
891  for (x = 0; x < 2; x++) {
892  nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x];
893  nnz = decode_block_coeffs(c, td->block[i][(y<<1)+x], s->prob->token[2], 0,
894  nnz_pred, s->qmat[segment].chroma_qmul);
895  td->non_zero_count_cache[i][(y<<1)+x] = nnz;
896  t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz;
897  nnz_total += nnz;
898  }
899 
900  // if there were no coded coeffs despite the macroblock not being marked skip,
901  // we MUST not do the inner loop filter and should not do IDCT
902  // Since skip isn't used for bitstream prediction, just manually set it.
903  if (!nnz_total)
904  mb->skip = 1;
905 }
906 
907 static av_always_inline
908 void backup_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
909  int linesize, int uvlinesize, int simple)
910 {
911  AV_COPY128(top_border, src_y + 15*linesize);
912  if (!simple) {
913  AV_COPY64(top_border+16, src_cb + 7*uvlinesize);
914  AV_COPY64(top_border+24, src_cr + 7*uvlinesize);
915  }
916 }
917 
918 static av_always_inline
919 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
920  int linesize, int uvlinesize, int mb_x, int mb_y, int mb_width,
921  int simple, int xchg)
922 {
923  uint8_t *top_border_m1 = top_border-32; // for TL prediction
924  src_y -= linesize;
925  src_cb -= uvlinesize;
926  src_cr -= uvlinesize;
927 
928 #define XCHG(a,b,xchg) do { \
929  if (xchg) AV_SWAP64(b,a); \
930  else AV_COPY64(b,a); \
931  } while (0)
932 
933  XCHG(top_border_m1+8, src_y-8, xchg);
934  XCHG(top_border, src_y, xchg);
935  XCHG(top_border+8, src_y+8, 1);
936  if (mb_x < mb_width-1)
937  XCHG(top_border+32, src_y+16, 1);
938 
939  // only copy chroma for normal loop filter
940  // or to initialize the top row to 127
941  if (!simple || !mb_y) {
942  XCHG(top_border_m1+16, src_cb-8, xchg);
943  XCHG(top_border_m1+24, src_cr-8, xchg);
944  XCHG(top_border+16, src_cb, 1);
945  XCHG(top_border+24, src_cr, 1);
946  }
947 }
948 
949 static av_always_inline
950 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
951 {
952  if (!mb_x) {
953  return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
954  } else {
955  return mb_y ? mode : LEFT_DC_PRED8x8;
956  }
957 }
958 
959 static av_always_inline
960 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y)
961 {
962  if (!mb_x) {
963  return mb_y ? VERT_PRED8x8 : DC_129_PRED8x8;
964  } else {
965  return mb_y ? mode : HOR_PRED8x8;
966  }
967 }
968 
969 static av_always_inline
970 int check_intra_pred8x8_mode(int mode, int mb_x, int mb_y)
971 {
972  if (mode == DC_PRED8x8) {
973  return check_dc_pred8x8_mode(mode, mb_x, mb_y);
974  } else {
975  return mode;
976  }
977 }
978 
979 static av_always_inline
980 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y)
981 {
982  switch (mode) {
983  case DC_PRED8x8:
984  return check_dc_pred8x8_mode(mode, mb_x, mb_y);
985  case VERT_PRED8x8:
986  return !mb_y ? DC_127_PRED8x8 : mode;
987  case HOR_PRED8x8:
988  return !mb_x ? DC_129_PRED8x8 : mode;
989  case PLANE_PRED8x8 /*TM*/:
990  return check_tm_pred8x8_mode(mode, mb_x, mb_y);
991  }
992  return mode;
993 }
994 
995 static av_always_inline
996 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y)
997 {
998  if (!mb_x) {
999  return mb_y ? VERT_VP8_PRED : DC_129_PRED;
1000  } else {
1001  return mb_y ? mode : HOR_VP8_PRED;
1002  }
1003 }
1004 
1005 static av_always_inline
1006 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y, int *copy_buf)
1007 {
1008  switch (mode) {
1009  case VERT_PRED:
1010  if (!mb_x && mb_y) {
1011  *copy_buf = 1;
1012  return mode;
1013  }
1014  /* fall-through */
1015  case DIAG_DOWN_LEFT_PRED:
1016  case VERT_LEFT_PRED:
1017  return !mb_y ? DC_127_PRED : mode;
1018  case HOR_PRED:
1019  if (!mb_y) {
1020  *copy_buf = 1;
1021  return mode;
1022  }
1023  /* fall-through */
1024  case HOR_UP_PRED:
1025  return !mb_x ? DC_129_PRED : mode;
1026  case TM_VP8_PRED:
1027  return check_tm_pred4x4_mode(mode, mb_x, mb_y);
1028  case DC_PRED: // 4x4 DC doesn't use the same "H.264-style" exceptions as 16x16/8x8 DC
1029  case DIAG_DOWN_RIGHT_PRED:
1030  case VERT_RIGHT_PRED:
1031  case HOR_DOWN_PRED:
1032  if (!mb_y || !mb_x)
1033  *copy_buf = 1;
1034  return mode;
1035  }
1036  return mode;
1037 }
1038 
1039 static av_always_inline
1041  VP8Macroblock *mb, int mb_x, int mb_y)
1042 {
1043  AVCodecContext *avctx = s->avctx;
1044  int x, y, mode, nnz;
1045  uint32_t tr;
1046 
1047  // for the first row, we need to run xchg_mb_border to init the top edge to 127
1048  // otherwise, skip it if we aren't going to deblock
1049  if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1050  xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
1051  s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1052  s->filter.simple, 1);
1053 
1054  if (mb->mode < MODE_I4x4) {
1055  if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // tested
1056  mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y);
1057  } else {
1058  mode = check_intra_pred8x8_mode(mb->mode, mb_x, mb_y);
1059  }
1060  s->hpc.pred16x16[mode](dst[0], s->linesize);
1061  } else {
1062  uint8_t *ptr = dst[0];
1063  uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1064  uint8_t tr_top[4] = { 127, 127, 127, 127 };
1065 
1066  // all blocks on the right edge of the macroblock use bottom edge
1067  // the top macroblock for their topright edge
1068  uint8_t *tr_right = ptr - s->linesize + 16;
1069 
1070  // if we're on the right edge of the frame, said edge is extended
1071  // from the top macroblock
1072  if (!(!mb_y && avctx->flags & CODEC_FLAG_EMU_EDGE) &&
1073  mb_x == s->mb_width-1) {
1074  tr = tr_right[-1]*0x01010101u;
1075  tr_right = (uint8_t *)&tr;
1076  }
1077 
1078  if (mb->skip)
1080 
1081  for (y = 0; y < 4; y++) {
1082  uint8_t *topright = ptr + 4 - s->linesize;
1083  for (x = 0; x < 4; x++) {
1084  int copy = 0, linesize = s->linesize;
1085  uint8_t *dst = ptr+4*x;
1086  DECLARE_ALIGNED(4, uint8_t, copy_dst)[5*8];
1087 
1088  if ((y == 0 || x == 3) && mb_y == 0 && avctx->flags & CODEC_FLAG_EMU_EDGE) {
1089  topright = tr_top;
1090  } else if (x == 3)
1091  topright = tr_right;
1092 
1093  if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // mb_x+x or mb_y+y is a hack but works
1094  mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x, mb_y + y, &copy);
1095  if (copy) {
1096  dst = copy_dst + 12;
1097  linesize = 8;
1098  if (!(mb_y + y)) {
1099  copy_dst[3] = 127U;
1100  AV_WN32A(copy_dst+4, 127U * 0x01010101U);
1101  } else {
1102  AV_COPY32(copy_dst+4, ptr+4*x-s->linesize);
1103  if (!(mb_x + x)) {
1104  copy_dst[3] = 129U;
1105  } else {
1106  copy_dst[3] = ptr[4*x-s->linesize-1];
1107  }
1108  }
1109  if (!(mb_x + x)) {
1110  copy_dst[11] =
1111  copy_dst[19] =
1112  copy_dst[27] =
1113  copy_dst[35] = 129U;
1114  } else {
1115  copy_dst[11] = ptr[4*x -1];
1116  copy_dst[19] = ptr[4*x+s->linesize -1];
1117  copy_dst[27] = ptr[4*x+s->linesize*2-1];
1118  copy_dst[35] = ptr[4*x+s->linesize*3-1];
1119  }
1120  }
1121  } else {
1122  mode = intra4x4[x];
1123  }
1124  s->hpc.pred4x4[mode](dst, topright, linesize);
1125  if (copy) {
1126  AV_COPY32(ptr+4*x , copy_dst+12);
1127  AV_COPY32(ptr+4*x+s->linesize , copy_dst+20);
1128  AV_COPY32(ptr+4*x+s->linesize*2, copy_dst+28);
1129  AV_COPY32(ptr+4*x+s->linesize*3, copy_dst+36);
1130  }
1131 
1132  nnz = td->non_zero_count_cache[y][x];
1133  if (nnz) {
1134  if (nnz == 1)
1135  s->vp8dsp.vp8_idct_dc_add(ptr+4*x, td->block[y][x], s->linesize);
1136  else
1137  s->vp8dsp.vp8_idct_add(ptr+4*x, td->block[y][x], s->linesize);
1138  }
1139  topright += 4;
1140  }
1141 
1142  ptr += 4*s->linesize;
1143  intra4x4 += 4;
1144  }
1145  }
1146 
1147  if (avctx->flags & CODEC_FLAG_EMU_EDGE) {
1148  mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode, mb_x, mb_y);
1149  } else {
1150  mode = check_intra_pred8x8_mode(mb->chroma_pred_mode, mb_x, mb_y);
1151  }
1152  s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1153  s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1154 
1155  if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1156  xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
1157  s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1158  s->filter.simple, 0);
1159 }
1160 
1161 static const uint8_t subpel_idx[3][8] = {
1162  { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1163  // also function pointer index
1164  { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1165  { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1166 };
1167 
1168 /**
1169  * luma MC function
1170  *
1171  * @param s VP8 decoding context
1172  * @param dst target buffer for block data at block position
1173  * @param ref reference picture buffer at origin (0, 0)
1174  * @param mv motion vector (relative to block position) to get pixel data from
1175  * @param x_off horizontal position of block from origin (0, 0)
1176  * @param y_off vertical position of block from origin (0, 0)
1177  * @param block_w width of block (16, 8 or 4)
1178  * @param block_h height of block (always same as block_w)
1179  * @param width width of src/dst plane data
1180  * @param height height of src/dst plane data
1181  * @param linesize size of a single line of plane data, including padding
1182  * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1183  */
1184 static av_always_inline
1186  AVFrame *ref, const VP56mv *mv,
1187  int x_off, int y_off, int block_w, int block_h,
1188  int width, int height, int linesize,
1189  vp8_mc_func mc_func[3][3])
1190 {
1191  uint8_t *src = ref->data[0];
1192 
1193  if (AV_RN32A(mv)) {
1194 
1195  int mx = (mv->x << 1)&7, mx_idx = subpel_idx[0][mx];
1196  int my = (mv->y << 1)&7, my_idx = subpel_idx[0][my];
1197 
1198  x_off += mv->x >> 2;
1199  y_off += mv->y >> 2;
1200 
1201  // edge emulation
1202  ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1203  src += y_off * linesize + x_off;
1204  if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1205  y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1206  s->vdsp.emulated_edge_mc(td->edge_emu_buffer, src - my_idx * linesize - mx_idx, linesize,
1207  block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1208  x_off - mx_idx, y_off - my_idx, width, height);
1209  src = td->edge_emu_buffer + mx_idx + linesize * my_idx;
1210  }
1211  mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my);
1212  } else {
1213  ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1214  mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0);
1215  }
1216 }
1217 
1218 /**
1219  * chroma MC function
1220  *
1221  * @param s VP8 decoding context
1222  * @param dst1 target buffer for block data at block position (U plane)
1223  * @param dst2 target buffer for block data at block position (V plane)
1224  * @param ref reference picture buffer at origin (0, 0)
1225  * @param mv motion vector (relative to block position) to get pixel data from
1226  * @param x_off horizontal position of block from origin (0, 0)
1227  * @param y_off vertical position of block from origin (0, 0)
1228  * @param block_w width of block (16, 8 or 4)
1229  * @param block_h height of block (always same as block_w)
1230  * @param width width of src/dst plane data
1231  * @param height height of src/dst plane data
1232  * @param linesize size of a single line of plane data, including padding
1233  * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1234  */
1235 static av_always_inline
1237  AVFrame *ref, const VP56mv *mv, int x_off, int y_off,
1238  int block_w, int block_h, int width, int height, int linesize,
1239  vp8_mc_func mc_func[3][3])
1240 {
1241  uint8_t *src1 = ref->data[1], *src2 = ref->data[2];
1242 
1243  if (AV_RN32A(mv)) {
1244  int mx = mv->x&7, mx_idx = subpel_idx[0][mx];
1245  int my = mv->y&7, my_idx = subpel_idx[0][my];
1246 
1247  x_off += mv->x >> 3;
1248  y_off += mv->y >> 3;
1249 
1250  // edge emulation
1251  src1 += y_off * linesize + x_off;
1252  src2 += y_off * linesize + x_off;
1253  ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1254  if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1255  y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1256  s->vdsp.emulated_edge_mc(td->edge_emu_buffer, src1 - my_idx * linesize - mx_idx, linesize,
1257  block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1258  x_off - mx_idx, y_off - my_idx, width, height);
1259  src1 = td->edge_emu_buffer + mx_idx + linesize * my_idx;
1260  mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1261 
1262  s->vdsp.emulated_edge_mc(td->edge_emu_buffer, src2 - my_idx * linesize - mx_idx, linesize,
1263  block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1264  x_off - mx_idx, y_off - my_idx, width, height);
1265  src2 = td->edge_emu_buffer + mx_idx + linesize * my_idx;
1266  mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1267  } else {
1268  mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1269  mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1270  }
1271  } else {
1272  ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1273  mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1274  mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1275  }
1276 }
1277 
1278 static av_always_inline
1280  AVFrame *ref_frame, int x_off, int y_off,
1281  int bx_off, int by_off,
1282  int block_w, int block_h,
1283  int width, int height, VP56mv *mv)
1284 {
1285  VP56mv uvmv = *mv;
1286 
1287  /* Y */
1288  vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
1289  ref_frame, mv, x_off + bx_off, y_off + by_off,
1290  block_w, block_h, width, height, s->linesize,
1291  s->put_pixels_tab[block_w == 8]);
1292 
1293  /* U/V */
1294  if (s->profile == 3) {
1295  uvmv.x &= ~7;
1296  uvmv.y &= ~7;
1297  }
1298  x_off >>= 1; y_off >>= 1;
1299  bx_off >>= 1; by_off >>= 1;
1300  width >>= 1; height >>= 1;
1301  block_w >>= 1; block_h >>= 1;
1302  vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
1303  dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1304  &uvmv, x_off + bx_off, y_off + by_off,
1305  block_w, block_h, width, height, s->uvlinesize,
1306  s->put_pixels_tab[1 + (block_w == 4)]);
1307 }
1308 
1309 /* Fetch pixels for estimated mv 4 macroblocks ahead.
1310  * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1311 static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref)
1312 {
1313  /* Don't prefetch refs that haven't been used very often this frame. */
1314  if (s->ref_count[ref-1] > (mb_xy >> 5)) {
1315  int x_off = mb_x << 4, y_off = mb_y << 4;
1316  int mx = (mb->mv.x>>2) + x_off + 8;
1317  int my = (mb->mv.y>>2) + y_off;
1318  uint8_t **src= s->framep[ref]->data;
1319  int off= mx + (my + (mb_x&3)*4)*s->linesize + 64;
1320  /* For threading, a ff_thread_await_progress here might be useful, but
1321  * it actually slows down the decoder. Since a bad prefetch doesn't
1322  * generate bad decoder output, we don't run it here. */
1323  s->vdsp.prefetch(src[0]+off, s->linesize, 4);
1324  off= (mx>>1) + ((my>>1) + (mb_x&7))*s->uvlinesize + 64;
1325  s->vdsp.prefetch(src[1]+off, src[2]-src[1], 2);
1326  }
1327 }
1328 
1329 /**
1330  * Apply motion vectors to prediction buffer, chapter 18.
1331  */
1332 static av_always_inline
1334  VP8Macroblock *mb, int mb_x, int mb_y)
1335 {
1336  int x_off = mb_x << 4, y_off = mb_y << 4;
1337  int width = 16*s->mb_width, height = 16*s->mb_height;
1338  AVFrame *ref = s->framep[mb->ref_frame];
1339  VP56mv *bmv = mb->bmv;
1340 
1341  switch (mb->partitioning) {
1342  case VP8_SPLITMVMODE_NONE:
1343  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1344  0, 0, 16, 16, width, height, &mb->mv);
1345  break;
1346  case VP8_SPLITMVMODE_4x4: {
1347  int x, y;
1348  VP56mv uvmv;
1349 
1350  /* Y */
1351  for (y = 0; y < 4; y++) {
1352  for (x = 0; x < 4; x++) {
1353  vp8_mc_luma(s, td, dst[0] + 4*y*s->linesize + x*4,
1354  ref, &bmv[4*y + x],
1355  4*x + x_off, 4*y + y_off, 4, 4,
1356  width, height, s->linesize,
1357  s->put_pixels_tab[2]);
1358  }
1359  }
1360 
1361  /* U/V */
1362  x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1;
1363  for (y = 0; y < 2; y++) {
1364  for (x = 0; x < 2; x++) {
1365  uvmv.x = mb->bmv[ 2*y * 4 + 2*x ].x +
1366  mb->bmv[ 2*y * 4 + 2*x+1].x +
1367  mb->bmv[(2*y+1) * 4 + 2*x ].x +
1368  mb->bmv[(2*y+1) * 4 + 2*x+1].x;
1369  uvmv.y = mb->bmv[ 2*y * 4 + 2*x ].y +
1370  mb->bmv[ 2*y * 4 + 2*x+1].y +
1371  mb->bmv[(2*y+1) * 4 + 2*x ].y +
1372  mb->bmv[(2*y+1) * 4 + 2*x+1].y;
1373  uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT-1))) >> 2;
1374  uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT-1))) >> 2;
1375  if (s->profile == 3) {
1376  uvmv.x &= ~7;
1377  uvmv.y &= ~7;
1378  }
1379  vp8_mc_chroma(s, td, dst[1] + 4*y*s->uvlinesize + x*4,
1380  dst[2] + 4*y*s->uvlinesize + x*4, ref, &uvmv,
1381  4*x + x_off, 4*y + y_off, 4, 4,
1382  width, height, s->uvlinesize,
1383  s->put_pixels_tab[2]);
1384  }
1385  }
1386  break;
1387  }
1388  case VP8_SPLITMVMODE_16x8:
1389  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1390  0, 0, 16, 8, width, height, &bmv[0]);
1391  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1392  0, 8, 16, 8, width, height, &bmv[1]);
1393  break;
1394  case VP8_SPLITMVMODE_8x16:
1395  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1396  0, 0, 8, 16, width, height, &bmv[0]);
1397  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1398  8, 0, 8, 16, width, height, &bmv[1]);
1399  break;
1400  case VP8_SPLITMVMODE_8x8:
1401  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1402  0, 0, 8, 8, width, height, &bmv[0]);
1403  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1404  8, 0, 8, 8, width, height, &bmv[1]);
1405  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1406  0, 8, 8, 8, width, height, &bmv[2]);
1407  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1408  8, 8, 8, 8, width, height, &bmv[3]);
1409  break;
1410  }
1411 }
1412 
1414  uint8_t *dst[3], VP8Macroblock *mb)
1415 {
1416  int x, y, ch;
1417 
1418  if (mb->mode != MODE_I4x4) {
1419  uint8_t *y_dst = dst[0];
1420  for (y = 0; y < 4; y++) {
1421  uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
1422  if (nnz4) {
1423  if (nnz4&~0x01010101) {
1424  for (x = 0; x < 4; x++) {
1425  if ((uint8_t)nnz4 == 1)
1426  s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, td->block[y][x], s->linesize);
1427  else if((uint8_t)nnz4 > 1)
1428  s->vp8dsp.vp8_idct_add(y_dst+4*x, td->block[y][x], s->linesize);
1429  nnz4 >>= 8;
1430  if (!nnz4)
1431  break;
1432  }
1433  } else {
1434  s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
1435  }
1436  }
1437  y_dst += 4*s->linesize;
1438  }
1439  }
1440 
1441  for (ch = 0; ch < 2; ch++) {
1442  uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4+ch]);
1443  if (nnz4) {
1444  uint8_t *ch_dst = dst[1+ch];
1445  if (nnz4&~0x01010101) {
1446  for (y = 0; y < 2; y++) {
1447  for (x = 0; x < 2; x++) {
1448  if ((uint8_t)nnz4 == 1)
1449  s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize);
1450  else if((uint8_t)nnz4 > 1)
1451  s->vp8dsp.vp8_idct_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize);
1452  nnz4 >>= 8;
1453  if (!nnz4)
1454  goto chroma_idct_end;
1455  }
1456  ch_dst += 4*s->uvlinesize;
1457  }
1458  } else {
1459  s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4+ch], s->uvlinesize);
1460  }
1461  }
1462 chroma_idct_end: ;
1463  }
1464 }
1465 
1467 {
1468  int interior_limit, filter_level;
1469 
1470  if (s->segmentation.enabled) {
1471  filter_level = s->segmentation.filter_level[mb->segment];
1472  if (!s->segmentation.absolute_vals)
1473  filter_level += s->filter.level;
1474  } else
1475  filter_level = s->filter.level;
1476 
1477  if (s->lf_delta.enabled) {
1478  filter_level += s->lf_delta.ref[mb->ref_frame];
1479  filter_level += s->lf_delta.mode[mb->mode];
1480  }
1481 
1482  filter_level = av_clip_uintp2(filter_level, 6);
1483 
1484  interior_limit = filter_level;
1485  if (s->filter.sharpness) {
1486  interior_limit >>= (s->filter.sharpness + 3) >> 2;
1487  interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
1488  }
1489  interior_limit = FFMAX(interior_limit, 1);
1490 
1491  f->filter_level = filter_level;
1492  f->inner_limit = interior_limit;
1493  f->inner_filter = !mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT;
1494 }
1495 
1496 static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y)
1497 {
1498  int mbedge_lim, bedge_lim, hev_thresh;
1499  int filter_level = f->filter_level;
1500  int inner_limit = f->inner_limit;
1501  int inner_filter = f->inner_filter;
1502  int linesize = s->linesize;
1503  int uvlinesize = s->uvlinesize;
1504  static const uint8_t hev_thresh_lut[2][64] = {
1505  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1506  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1507  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
1508  3, 3, 3, 3 },
1509  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1510  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1511  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1512  2, 2, 2, 2 }
1513  };
1514 
1515  if (!filter_level)
1516  return;
1517 
1518  bedge_lim = 2*filter_level + inner_limit;
1519  mbedge_lim = bedge_lim + 4;
1520 
1521  hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
1522 
1523  if (mb_x) {
1524  s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
1525  mbedge_lim, inner_limit, hev_thresh);
1526  s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
1527  mbedge_lim, inner_limit, hev_thresh);
1528  }
1529 
1530  if (inner_filter) {
1531  s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 4, linesize, bedge_lim,
1532  inner_limit, hev_thresh);
1533  s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 8, linesize, bedge_lim,
1534  inner_limit, hev_thresh);
1535  s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+12, linesize, bedge_lim,
1536  inner_limit, hev_thresh);
1537  s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4,
1538  uvlinesize, bedge_lim,
1539  inner_limit, hev_thresh);
1540  }
1541 
1542  if (mb_y) {
1543  s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
1544  mbedge_lim, inner_limit, hev_thresh);
1545  s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
1546  mbedge_lim, inner_limit, hev_thresh);
1547  }
1548 
1549  if (inner_filter) {
1550  s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 4*linesize,
1551  linesize, bedge_lim,
1552  inner_limit, hev_thresh);
1553  s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 8*linesize,
1554  linesize, bedge_lim,
1555  inner_limit, hev_thresh);
1556  s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+12*linesize,
1557  linesize, bedge_lim,
1558  inner_limit, hev_thresh);
1559  s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
1560  dst[2] + 4 * uvlinesize,
1561  uvlinesize, bedge_lim,
1562  inner_limit, hev_thresh);
1563  }
1564 }
1565 
1567 {
1568  int mbedge_lim, bedge_lim;
1569  int filter_level = f->filter_level;
1570  int inner_limit = f->inner_limit;
1571  int inner_filter = f->inner_filter;
1572  int linesize = s->linesize;
1573 
1574  if (!filter_level)
1575  return;
1576 
1577  bedge_lim = 2*filter_level + inner_limit;
1578  mbedge_lim = bedge_lim + 4;
1579 
1580  if (mb_x)
1581  s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
1582  if (inner_filter) {
1583  s->vp8dsp.vp8_h_loop_filter_simple(dst+ 4, linesize, bedge_lim);
1584  s->vp8dsp.vp8_h_loop_filter_simple(dst+ 8, linesize, bedge_lim);
1585  s->vp8dsp.vp8_h_loop_filter_simple(dst+12, linesize, bedge_lim);
1586  }
1587 
1588  if (mb_y)
1589  s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
1590  if (inner_filter) {
1591  s->vp8dsp.vp8_v_loop_filter_simple(dst+ 4*linesize, linesize, bedge_lim);
1592  s->vp8dsp.vp8_v_loop_filter_simple(dst+ 8*linesize, linesize, bedge_lim);
1593  s->vp8dsp.vp8_v_loop_filter_simple(dst+12*linesize, linesize, bedge_lim);
1594  }
1595 }
1596 
1597 static void release_queued_segmaps(VP8Context *s, int is_close)
1598 {
1599  int leave_behind = is_close ? 0 : !s->maps_are_invalid;
1600  while (s->num_maps_to_be_freed > leave_behind)
1602  s->maps_are_invalid = 0;
1603 }
1604 
1605 #define MARGIN (16 << 2)
1606 static void vp8_decode_mv_mb_modes(AVCodecContext *avctx, AVFrame *curframe,
1607  AVFrame *prev_frame)
1608 {
1609  VP8Context *s = avctx->priv_data;
1610  int mb_x, mb_y;
1611 
1612  s->mv_min.y = -MARGIN;
1613  s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
1614  for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
1615  VP8Macroblock *mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
1616  int mb_xy = mb_y*s->mb_width;
1617 
1618  AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
1619 
1620  s->mv_min.x = -MARGIN;
1621  s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
1622  for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
1623  if (mb_y == 0)
1624  AV_WN32A((mb-s->mb_width-1)->intra4x4_pred_mode_top, DC_PRED*0x01010101);
1625  decode_mb_mode(s, mb, mb_x, mb_y, curframe->ref_index[0] + mb_xy,
1626  prev_frame && prev_frame->ref_index[0] ? prev_frame->ref_index[0] + mb_xy : NULL, 1);
1627  s->mv_min.x -= 64;
1628  s->mv_max.x -= 64;
1629  }
1630  s->mv_min.y -= 64;
1631  s->mv_max.y -= 64;
1632  }
1633 }
1634 
1635 #if HAVE_THREADS
1636 #define check_thread_pos(td, otd, mb_x_check, mb_y_check)\
1637  do {\
1638  int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF);\
1639  if (otd->thread_mb_pos < tmp) {\
1640  pthread_mutex_lock(&otd->lock);\
1641  td->wait_mb_pos = tmp;\
1642  do {\
1643  if (otd->thread_mb_pos >= tmp)\
1644  break;\
1645  pthread_cond_wait(&otd->cond, &otd->lock);\
1646  } while (1);\
1647  td->wait_mb_pos = INT_MAX;\
1648  pthread_mutex_unlock(&otd->lock);\
1649  }\
1650  } while(0);
1651 
1652 #define update_pos(td, mb_y, mb_x)\
1653  do {\
1654  int pos = (mb_y << 16) | (mb_x & 0xFFFF);\
1655  int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && (num_jobs > 1);\
1656  int is_null = (next_td == NULL) || (prev_td == NULL);\
1657  int pos_check = (is_null) ? 1 :\
1658  (next_td != td && pos >= next_td->wait_mb_pos) ||\
1659  (prev_td != td && pos >= prev_td->wait_mb_pos);\
1660  td->thread_mb_pos = pos;\
1661  if (sliced_threading && pos_check) {\
1662  pthread_mutex_lock(&td->lock);\
1663  pthread_cond_broadcast(&td->cond);\
1664  pthread_mutex_unlock(&td->lock);\
1665  }\
1666  } while(0);
1667 #else
1668 #define check_thread_pos(td, otd, mb_x_check, mb_y_check)
1669 #define update_pos(td, mb_y, mb_x)
1670 #endif
1671 
1672 static void vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
1673  int jobnr, int threadnr)
1674 {
1675  VP8Context *s = avctx->priv_data;
1676  VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
1677  int mb_y = td->thread_mb_pos>>16;
1678  int i, y, mb_x, mb_xy = mb_y*s->mb_width;
1679  int num_jobs = s->num_jobs;
1680  AVFrame *curframe = s->curframe, *prev_frame = s->prev_frame;
1681  VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)];
1682  VP8Macroblock *mb;
1683  uint8_t *dst[3] = {
1684  curframe->data[0] + 16*mb_y*s->linesize,
1685  curframe->data[1] + 8*mb_y*s->uvlinesize,
1686  curframe->data[2] + 8*mb_y*s->uvlinesize
1687  };
1688  if (mb_y == 0) prev_td = td;
1689  else prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs];
1690  if (mb_y == s->mb_height-1) next_td = td;
1691  else next_td = &s->thread_data[(jobnr + 1)%num_jobs];
1692  if (s->mb_layout == 1)
1693  mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
1694  else {
1695  mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
1696  memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
1697  AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
1698  }
1699 
1700  memset(td->left_nnz, 0, sizeof(td->left_nnz));
1701  // left edge of 129 for intra prediction
1702  if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1703  for (i = 0; i < 3; i++)
1704  for (y = 0; y < 16>>!!i; y++)
1705  dst[i][y*curframe->linesize[i]-1] = 129;
1706  if (mb_y == 1) {
1707  s->top_border[0][15] = s->top_border[0][23] = s->top_border[0][31] = 129;
1708  }
1709  }
1710 
1711  s->mv_min.x = -MARGIN;
1712  s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
1713 
1714  for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
1715  // Wait for previous thread to read mb_x+2, and reach mb_y-1.
1716  if (prev_td != td) {
1717  if (threadnr != 0) {
1718  check_thread_pos(td, prev_td, mb_x+1, mb_y-1);
1719  } else {
1720  check_thread_pos(td, prev_td, (s->mb_width+3) + (mb_x+1), mb_y-1);
1721  }
1722  }
1723 
1724  s->vdsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
1725  s->vdsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);
1726 
1727  if (!s->mb_layout)
1728  decode_mb_mode(s, mb, mb_x, mb_y, curframe->ref_index[0] + mb_xy,
1729  prev_frame && prev_frame->ref_index[0] ? prev_frame->ref_index[0] + mb_xy : NULL, 0);
1730 
1731  prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
1732 
1733  if (!mb->skip)
1734  decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz);
1735 
1736  if (mb->mode <= MODE_I4x4)
1737  intra_predict(s, td, dst, mb, mb_x, mb_y);
1738  else
1739  inter_predict(s, td, dst, mb, mb_x, mb_y);
1740 
1741  prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
1742 
1743  if (!mb->skip) {
1744  idct_mb(s, td, dst, mb);
1745  } else {
1746  AV_ZERO64(td->left_nnz);
1747  AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
1748 
1749  // Reset DC block predictors if they would exist if the mb had coefficients
1750  if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
1751  td->left_nnz[8] = 0;
1752  s->top_nnz[mb_x][8] = 0;
1753  }
1754  }
1755 
1756  if (s->deblock_filter)
1757  filter_level_for_mb(s, mb, &td->filter_strength[mb_x]);
1758 
1759  if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs-1) {
1760  if (s->filter.simple)
1761  backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1);
1762  else
1763  backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
1764  }
1765 
1766  prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
1767 
1768  dst[0] += 16;
1769  dst[1] += 8;
1770  dst[2] += 8;
1771  s->mv_min.x -= 64;
1772  s->mv_max.x -= 64;
1773 
1774  if (mb_x == s->mb_width+1) {
1775  update_pos(td, mb_y, s->mb_width+3);
1776  } else {
1777  update_pos(td, mb_y, mb_x);
1778  }
1779  }
1780 }
1781 
1782 static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
1783  int jobnr, int threadnr)
1784 {
1785  VP8Context *s = avctx->priv_data;
1786  VP8ThreadData *td = &s->thread_data[threadnr];
1787  int mb_x, mb_y = td->thread_mb_pos>>16, num_jobs = s->num_jobs;
1788  AVFrame *curframe = s->curframe;
1789  VP8Macroblock *mb;
1790  VP8ThreadData *prev_td, *next_td;
1791  uint8_t *dst[3] = {
1792  curframe->data[0] + 16*mb_y*s->linesize,
1793  curframe->data[1] + 8*mb_y*s->uvlinesize,
1794  curframe->data[2] + 8*mb_y*s->uvlinesize
1795  };
1796 
1797  if (s->mb_layout == 1)
1798  mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
1799  else
1800  mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
1801 
1802  if (mb_y == 0) prev_td = td;
1803  else prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs];
1804  if (mb_y == s->mb_height-1) next_td = td;
1805  else next_td = &s->thread_data[(jobnr + 1)%num_jobs];
1806 
1807  for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
1808  VP8FilterStrength *f = &td->filter_strength[mb_x];
1809  if (prev_td != td) {
1810  check_thread_pos(td, prev_td, (mb_x+1) + (s->mb_width+3), mb_y-1);
1811  }
1812  if (next_td != td)
1813  if (next_td != &s->thread_data[0]) {
1814  check_thread_pos(td, next_td, mb_x+1, mb_y+1);
1815  }
1816 
1817  if (num_jobs == 1) {
1818  if (s->filter.simple)
1819  backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1);
1820  else
1821  backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
1822  }
1823 
1824  if (s->filter.simple)
1825  filter_mb_simple(s, dst[0], f, mb_x, mb_y);
1826  else
1827  filter_mb(s, dst, f, mb_x, mb_y);
1828  dst[0] += 16;
1829  dst[1] += 8;
1830  dst[2] += 8;
1831 
1832  update_pos(td, mb_y, (s->mb_width+3) + mb_x);
1833  }
1834 }
1835 
1836 static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
1837  int jobnr, int threadnr)
1838 {
1839  VP8Context *s = avctx->priv_data;
1840  VP8ThreadData *td = &s->thread_data[jobnr];
1841  VP8ThreadData *next_td = NULL, *prev_td = NULL;
1842  AVFrame *curframe = s->curframe;
1843  int mb_y, num_jobs = s->num_jobs;
1844  td->thread_nr = threadnr;
1845  for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
1846  if (mb_y >= s->mb_height) break;
1847  td->thread_mb_pos = mb_y<<16;
1848  vp8_decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
1849  if (s->deblock_filter)
1850  vp8_filter_mb_row(avctx, tdata, jobnr, threadnr);
1851  update_pos(td, mb_y, INT_MAX & 0xFFFF);
1852 
1853  s->mv_min.y -= 64;
1854  s->mv_max.y -= 64;
1855 
1856  if (avctx->active_thread_type == FF_THREAD_FRAME)
1857  ff_thread_report_progress(curframe, mb_y, 0);
1858  }
1859 
1860  return 0;
1861 }
1862 
1863 static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
1864  AVPacket *avpkt)
1865 {
1866  VP8Context *s = avctx->priv_data;
1867  int ret, i, referenced, num_jobs;
1868  enum AVDiscard skip_thresh;
1869  AVFrame *av_uninit(curframe), *prev_frame;
1870 
1871  release_queued_segmaps(s, 0);
1872 
1873  if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0)
1874  goto err;
1875 
1876  prev_frame = s->framep[VP56_FRAME_CURRENT];
1877 
1878  referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT
1880 
1881  skip_thresh = !referenced ? AVDISCARD_NONREF :
1883 
1884  if (avctx->skip_frame >= skip_thresh) {
1885  s->invisible = 1;
1886  memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
1887  goto skip_decode;
1888  }
1889  s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
1890 
1891  // release no longer referenced frames
1892  for (i = 0; i < 5; i++)
1893  if (s->frames[i].data[0] &&
1894  &s->frames[i] != prev_frame &&
1895  &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1896  &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1897  &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
1898  vp8_release_frame(s, &s->frames[i], 1, 0);
1899 
1900  // find a free buffer
1901  for (i = 0; i < 5; i++)
1902  if (&s->frames[i] != prev_frame &&
1903  &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1904  &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1905  &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
1906  curframe = s->framep[VP56_FRAME_CURRENT] = &s->frames[i];
1907  break;
1908  }
1909  if (i == 5) {
1910  av_log(avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
1911  abort();
1912  }
1913  if (curframe->data[0])
1914  vp8_release_frame(s, curframe, 1, 0);
1915 
1916  // Given that arithmetic probabilities are updated every frame, it's quite likely
1917  // that the values we have on a random interframe are complete junk if we didn't
1918  // start decode on a keyframe. So just don't display anything rather than junk.
1919  if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
1920  !s->framep[VP56_FRAME_GOLDEN] ||
1921  !s->framep[VP56_FRAME_GOLDEN2])) {
1922  av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
1923  ret = AVERROR_INVALIDDATA;
1924  goto err;
1925  }
1926 
1927  curframe->key_frame = s->keyframe;
1928  curframe->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
1929  curframe->reference = referenced ? 3 : 0;
1930  if ((ret = vp8_alloc_frame(s, curframe))) {
1931  av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n");
1932  goto err;
1933  }
1934 
1935  // check if golden and altref are swapped
1936  if (s->update_altref != VP56_FRAME_NONE) {
1938  } else {
1940  }
1941  if (s->update_golden != VP56_FRAME_NONE) {
1943  } else {
1945  }
1946  if (s->update_last) {
1947  s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
1948  } else {
1950  }
1951  s->next_framep[VP56_FRAME_CURRENT] = curframe;
1952 
1953  ff_thread_finish_setup(avctx);
1954 
1955  s->linesize = curframe->linesize[0];
1956  s->uvlinesize = curframe->linesize[1];
1957 
1958  if (!s->thread_data[0].edge_emu_buffer)
1959  for (i = 0; i < MAX_THREADS; i++)
1961 
1962  memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz));
1963  /* Zero macroblock structures for top/top-left prediction from outside the frame. */
1964  if (!s->mb_layout)
1965  memset(s->macroblocks + s->mb_height*2 - 1, 0, (s->mb_width+1)*sizeof(*s->macroblocks));
1966  if (!s->mb_layout && s->keyframe)
1967  memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width*4);
1968 
1969  // top edge of 127 for intra prediction
1970  if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1971  s->top_border[0][15] = s->top_border[0][23] = 127;
1972  s->top_border[0][31] = 127;
1973  memset(s->top_border[1], 127, s->mb_width*sizeof(*s->top_border));
1974  }
1975  memset(s->ref_count, 0, sizeof(s->ref_count));
1976 
1977 
1978  // Make sure the previous frame has read its segmentation map,
1979  // if we re-use the same map.
1980  if (prev_frame && s->segmentation.enabled && !s->segmentation.update_map)
1981  ff_thread_await_progress(prev_frame, 1, 0);
1982 
1983  if (s->mb_layout == 1)
1984  vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
1985 
1986  if (avctx->active_thread_type == FF_THREAD_FRAME)
1987  num_jobs = 1;
1988  else
1989  num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
1990  s->num_jobs = num_jobs;
1991  s->curframe = curframe;
1992  s->prev_frame = prev_frame;
1993  s->mv_min.y = -MARGIN;
1994  s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
1995  for (i = 0; i < MAX_THREADS; i++) {
1996  s->thread_data[i].thread_mb_pos = 0;
1997  s->thread_data[i].wait_mb_pos = INT_MAX;
1998  }
1999  avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL, num_jobs);
2000 
2001  ff_thread_report_progress(curframe, INT_MAX, 0);
2002  memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
2003 
2004 skip_decode:
2005  // if future frames don't use the updated probabilities,
2006  // reset them to the values we saved
2007  if (!s->update_probabilities)
2008  s->prob[0] = s->prob[1];
2009 
2010  if (!s->invisible) {
2011  *(AVFrame*)data = *curframe;
2012  *got_frame = 1;
2013  }
2014 
2015  return avpkt->size;
2016 err:
2017  memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2018  return ret;
2019 }
2020 
2022 {
2023  VP8Context *s = avctx->priv_data;
2024 
2025  s->avctx = avctx;
2026  avctx->pix_fmt = AV_PIX_FMT_YUV420P;
2027 
2028  ff_videodsp_init(&s->vdsp, 8);
2030  ff_vp8dsp_init(&s->vp8dsp);
2031 
2032  return 0;
2033 }
2034 
2036 {
2037  vp8_decode_flush_impl(avctx, 0, 1, 1);
2038  release_queued_segmaps(avctx->priv_data, 1);
2039  return 0;
2040 }
2041 
2043 {
2044  VP8Context *s = avctx->priv_data;
2045 
2046  s->avctx = avctx;
2047 
2048  return 0;
2049 }
2050 
2051 #define REBASE(pic) \
2052  pic ? pic - &s_src->frames[0] + &s->frames[0] : NULL
2053 
2055 {
2056  VP8Context *s = dst->priv_data, *s_src = src->priv_data;
2057 
2058  if (s->macroblocks_base &&
2059  (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
2060  free_buffers(s);
2061  s->maps_are_invalid = 1;
2062  s->mb_width = s_src->mb_width;
2063  s->mb_height = s_src->mb_height;
2064  }
2065 
2066  s->prob[0] = s_src->prob[!s_src->update_probabilities];
2067  s->segmentation = s_src->segmentation;
2068  s->lf_delta = s_src->lf_delta;
2069  memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
2070 
2071  memcpy(&s->frames, &s_src->frames, sizeof(s->frames));
2072  s->framep[0] = REBASE(s_src->next_framep[0]);
2073  s->framep[1] = REBASE(s_src->next_framep[1]);
2074  s->framep[2] = REBASE(s_src->next_framep[2]);
2075  s->framep[3] = REBASE(s_src->next_framep[3]);
2076 
2077  return 0;
2078 }
2079 
2081  .name = "vp8",
2082  .type = AVMEDIA_TYPE_VIDEO,
2083  .id = AV_CODEC_ID_VP8,
2084  .priv_data_size = sizeof(VP8Context),
2085  .init = vp8_decode_init,
2090  .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
2093 };