FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
vp8.c
Go to the documentation of this file.
1 /*
2  * VP8 compatible video decoder
3  *
4  * Copyright (C) 2010 David Conrad
5  * Copyright (C) 2010 Ronald S. Bultje
6  * Copyright (C) 2010 Jason Garrett-Glaser
7  * Copyright (C) 2012 Daniel Kang
8  *
9  * This file is part of FFmpeg.
10  *
11  * FFmpeg is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public
13  * License as published by the Free Software Foundation; either
14  * version 2.1 of the License, or (at your option) any later version.
15  *
16  * FFmpeg is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with FFmpeg; if not, write to the Free Software
23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24  */
25 
26 #include "libavutil/imgutils.h"
27 #include "avcodec.h"
28 #include "internal.h"
29 #include "vp8.h"
30 #include "vp8data.h"
31 #include "rectangle.h"
32 #include "thread.h"
33 
34 #if ARCH_ARM
35 # include "arm/vp8.h"
36 #endif
37 
38 static void free_buffers(VP8Context *s)
39 {
40  int i;
41  if (s->thread_data)
42  for (i = 0; i < MAX_THREADS; i++) {
43 #if HAVE_THREADS
44  pthread_cond_destroy(&s->thread_data[i].cond);
46 #endif
49  }
50  av_freep(&s->thread_data);
53  av_freep(&s->top_nnz);
54  av_freep(&s->top_border);
55 
56  s->macroblocks = NULL;
57 }
58 
59 static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref)
60 {
61  int ret;
62  if ((ret = ff_thread_get_buffer(s->avctx, &f->tf,
63  ref ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
64  return ret;
65  if (!(f->seg_map = av_buffer_allocz(s->mb_width * s->mb_height))) {
67  return AVERROR(ENOMEM);
68  }
69  return 0;
70 }
71 
73 {
76 }
77 
79 {
80  int ret;
81 
82  vp8_release_frame(s, dst);
83 
84  if ((ret = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0)
85  return ret;
86  if (src->seg_map &&
87  !(dst->seg_map = av_buffer_ref(src->seg_map))) {
88  vp8_release_frame(s, dst);
89  return AVERROR(ENOMEM);
90  }
91 
92  return 0;
93 }
94 
95 
96 static void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem)
97 {
98  VP8Context *s = avctx->priv_data;
99  int i;
100 
101  for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
102  vp8_release_frame(s, &s->frames[i]);
103  memset(s->framep, 0, sizeof(s->framep));
104 
105  if (free_mem)
106  free_buffers(s);
107 }
108 
109 static void vp8_decode_flush(AVCodecContext *avctx)
110 {
111  vp8_decode_flush_impl(avctx, 0);
112 }
113 
114 static int update_dimensions(VP8Context *s, int width, int height)
115 {
116  AVCodecContext *avctx = s->avctx;
117  int i;
118 
119  if (width != s->avctx->width || ((width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) && s->macroblocks_base ||
120  height != s->avctx->height) {
121  if (av_image_check_size(width, height, 0, s->avctx))
122  return AVERROR_INVALIDDATA;
123 
125 
126  avcodec_set_dimensions(s->avctx, width, height);
127  }
128 
129  s->mb_width = (s->avctx->coded_width +15) / 16;
130  s->mb_height = (s->avctx->coded_height+15) / 16;
131 
133  if (!s->mb_layout) { // Frame threading and one thread
134  s->macroblocks_base = av_mallocz((s->mb_width+s->mb_height*2+1)*sizeof(*s->macroblocks));
136  }
137  else // Sliced threading
138  s->macroblocks_base = av_mallocz((s->mb_width+2)*(s->mb_height+2)*sizeof(*s->macroblocks));
139  s->top_nnz = av_mallocz(s->mb_width*sizeof(*s->top_nnz));
140  s->top_border = av_mallocz((s->mb_width+1)*sizeof(*s->top_border));
142 
143  for (i = 0; i < MAX_THREADS; i++) {
145 #if HAVE_THREADS
146  pthread_mutex_init(&s->thread_data[i].lock, NULL);
147  pthread_cond_init(&s->thread_data[i].cond, NULL);
148 #endif
149  }
150 
151  if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
152  (!s->intra4x4_pred_mode_top && !s->mb_layout))
153  return AVERROR(ENOMEM);
154 
155  s->macroblocks = s->macroblocks_base + 1;
156 
157  return 0;
158 }
159 
161 {
162  VP56RangeCoder *c = &s->c;
163  int i;
164 
166 
167  if (vp8_rac_get(c)) { // update segment feature data
169 
170  for (i = 0; i < 4; i++)
172 
173  for (i = 0; i < 4; i++)
175  }
176  if (s->segmentation.update_map)
177  for (i = 0; i < 3; i++)
178  s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
179 }
180 
182 {
183  VP56RangeCoder *c = &s->c;
184  int i;
185 
186  for (i = 0; i < 4; i++) {
187  if (vp8_rac_get(c)) {
188  s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
189 
190  if (vp8_rac_get(c))
191  s->lf_delta.ref[i] = -s->lf_delta.ref[i];
192  }
193  }
194 
195  for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
196  if (vp8_rac_get(c)) {
197  s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);
198 
199  if (vp8_rac_get(c))
200  s->lf_delta.mode[i] = -s->lf_delta.mode[i];
201  }
202  }
203 }
204 
205 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
206 {
207  const uint8_t *sizes = buf;
208  int i;
209 
210  s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
211 
212  buf += 3*(s->num_coeff_partitions-1);
213  buf_size -= 3*(s->num_coeff_partitions-1);
214  if (buf_size < 0)
215  return -1;
216 
217  for (i = 0; i < s->num_coeff_partitions-1; i++) {
218  int size = AV_RL24(sizes + 3*i);
219  if (buf_size - size < 0)
220  return -1;
221 
222  ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
223  buf += size;
224  buf_size -= size;
225  }
226  ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
227 
228  return 0;
229 }
230 
231 static void get_quants(VP8Context *s)
232 {
233  VP56RangeCoder *c = &s->c;
234  int i, base_qi;
235 
236  int yac_qi = vp8_rac_get_uint(c, 7);
237  int ydc_delta = vp8_rac_get_sint(c, 4);
238  int y2dc_delta = vp8_rac_get_sint(c, 4);
239  int y2ac_delta = vp8_rac_get_sint(c, 4);
240  int uvdc_delta = vp8_rac_get_sint(c, 4);
241  int uvac_delta = vp8_rac_get_sint(c, 4);
242 
243  for (i = 0; i < 4; i++) {
244  if (s->segmentation.enabled) {
245  base_qi = s->segmentation.base_quant[i];
246  if (!s->segmentation.absolute_vals)
247  base_qi += yac_qi;
248  } else
249  base_qi = yac_qi;
250 
251  s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta , 7)];
252  s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi , 7)];
253  s->qmat[i].luma_dc_qmul[0] = 2 * vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)];
254  /* 101581>>16 is equivalent to 155/100 */
255  s->qmat[i].luma_dc_qmul[1] = (101581 * vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)]) >> 16;
256  s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
257  s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
258 
259  s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
260  s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
261  }
262 }
263 
264 /**
265  * Determine which buffers golden and altref should be updated with after this frame.
266  * The spec isn't clear here, so I'm going by my understanding of what libvpx does
267  *
268  * Intra frames update all 3 references
269  * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
270  * If the update (golden|altref) flag is set, it's updated with the current frame
271  * if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
272  * If the flag is not set, the number read means:
273  * 0: no update
274  * 1: VP56_FRAME_PREVIOUS
275  * 2: update golden with altref, or update altref with golden
276  */
277 static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
278 {
279  VP56RangeCoder *c = &s->c;
280 
281  if (update)
282  return VP56_FRAME_CURRENT;
283 
284  switch (vp8_rac_get_uint(c, 2)) {
285  case 1:
286  return VP56_FRAME_PREVIOUS;
287  case 2:
289  }
290  return VP56_FRAME_NONE;
291 }
292 
293 static void update_refs(VP8Context *s)
294 {
295  VP56RangeCoder *c = &s->c;
296 
297  int update_golden = vp8_rac_get(c);
298  int update_altref = vp8_rac_get(c);
299 
300  s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
301  s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
302 }
303 
304 static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
305 {
306  VP56RangeCoder *c = &s->c;
307  int header_size, hscale, vscale, i, j, k, l, m, ret;
308  int width = s->avctx->width;
309  int height = s->avctx->height;
310 
311  s->keyframe = !(buf[0] & 1);
312  s->profile = (buf[0]>>1) & 7;
313  s->invisible = !(buf[0] & 0x10);
314  header_size = AV_RL24(buf) >> 5;
315  buf += 3;
316  buf_size -= 3;
317 
318  if (s->profile > 3)
319  av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
320 
321  if (!s->profile)
322  memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
323  else // profile 1-3 use bilinear, 4+ aren't defined so whatever
325 
326  if (header_size > buf_size - 7*s->keyframe) {
327  av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
328  return AVERROR_INVALIDDATA;
329  }
330 
331  if (s->keyframe) {
332  if (AV_RL24(buf) != 0x2a019d) {
333  av_log(s->avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", AV_RL24(buf));
334  return AVERROR_INVALIDDATA;
335  }
336  width = AV_RL16(buf+3) & 0x3fff;
337  height = AV_RL16(buf+5) & 0x3fff;
338  hscale = buf[4] >> 6;
339  vscale = buf[6] >> 6;
340  buf += 7;
341  buf_size -= 7;
342 
343  if (hscale || vscale)
344  avpriv_request_sample(s->avctx, "Upscaling");
345 
347  for (i = 0; i < 4; i++)
348  for (j = 0; j < 16; j++)
349  memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
350  sizeof(s->prob->token[i][j]));
351  memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, sizeof(s->prob->pred16x16));
352  memcpy(s->prob->pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s->prob->pred8x8c));
353  memcpy(s->prob->mvc , vp8_mv_default_prob , sizeof(s->prob->mvc));
354  memset(&s->segmentation, 0, sizeof(s->segmentation));
355  memset(&s->lf_delta, 0, sizeof(s->lf_delta));
356  }
357 
358  ff_vp56_init_range_decoder(c, buf, header_size);
359  buf += header_size;
360  buf_size -= header_size;
361 
362  if (s->keyframe) {
363  if (vp8_rac_get(c))
364  av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
365  vp8_rac_get(c); // whether we can skip clamping in dsp functions
366  }
367 
368  if ((s->segmentation.enabled = vp8_rac_get(c)))
370  else
371  s->segmentation.update_map = 0; // FIXME: move this to some init function?
372 
373  s->filter.simple = vp8_rac_get(c);
374  s->filter.level = vp8_rac_get_uint(c, 6);
375  s->filter.sharpness = vp8_rac_get_uint(c, 3);
376 
377  if ((s->lf_delta.enabled = vp8_rac_get(c)))
378  if (vp8_rac_get(c))
379  update_lf_deltas(s);
380 
381  if (setup_partitions(s, buf, buf_size)) {
382  av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
383  return AVERROR_INVALIDDATA;
384  }
385 
386  if (!s->macroblocks_base || /* first frame */
387  width != s->avctx->width || height != s->avctx->height || (width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) {
388  if ((ret = update_dimensions(s, width, height)) < 0)
389  return ret;
390  }
391 
392  get_quants(s);
393 
394  if (!s->keyframe) {
395  update_refs(s);
397  s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
398  }
399 
400  // if we aren't saving this frame's probabilities for future frames,
401  // make a copy of the current probabilities
402  if (!(s->update_probabilities = vp8_rac_get(c)))
403  s->prob[1] = s->prob[0];
404 
405  s->update_last = s->keyframe || vp8_rac_get(c);
406 
407  for (i = 0; i < 4; i++)
408  for (j = 0; j < 8; j++)
409  for (k = 0; k < 3; k++)
410  for (l = 0; l < NUM_DCT_TOKENS-1; l++)
412  int prob = vp8_rac_get_uint(c, 8);
413  for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
414  s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
415  }
416 
417  if ((s->mbskip_enabled = vp8_rac_get(c)))
418  s->prob->mbskip = vp8_rac_get_uint(c, 8);
419 
420  if (!s->keyframe) {
421  s->prob->intra = vp8_rac_get_uint(c, 8);
422  s->prob->last = vp8_rac_get_uint(c, 8);
423  s->prob->golden = vp8_rac_get_uint(c, 8);
424 
425  if (vp8_rac_get(c))
426  for (i = 0; i < 4; i++)
427  s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
428  if (vp8_rac_get(c))
429  for (i = 0; i < 3; i++)
430  s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8);
431 
432  // 17.2 MV probability update
433  for (i = 0; i < 2; i++)
434  for (j = 0; j < 19; j++)
436  s->prob->mvc[i][j] = vp8_rac_get_nn(c);
437  }
438 
439  return 0;
440 }
441 
442 static av_always_inline void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
443 {
444  dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x);
445  dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y);
446 }
447 
448 /**
449  * Motion vector coding, 17.1.
450  */
452 {
453  int bit, x = 0;
454 
455  if (vp56_rac_get_prob_branchy(c, p[0])) {
456  int i;
457 
458  for (i = 0; i < 3; i++)
459  x += vp56_rac_get_prob(c, p[9 + i]) << i;
460  for (i = 9; i > 3; i--)
461  x += vp56_rac_get_prob(c, p[9 + i]) << i;
462  if (!(x & 0xFFF0) || vp56_rac_get_prob(c, p[12]))
463  x += 8;
464  } else {
465  // small_mvtree
466  const uint8_t *ps = p+2;
467  bit = vp56_rac_get_prob(c, *ps);
468  ps += 1 + 3*bit;
469  x += 4*bit;
470  bit = vp56_rac_get_prob(c, *ps);
471  ps += 1 + bit;
472  x += 2*bit;
473  x += vp56_rac_get_prob(c, *ps);
474  }
475 
476  return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
477 }
478 
479 static av_always_inline
480 const uint8_t *get_submv_prob(uint32_t left, uint32_t top)
481 {
482  if (left == top)
483  return vp8_submv_prob[4-!!left];
484  if (!top)
485  return vp8_submv_prob[2];
486  return vp8_submv_prob[1-!!left];
487 }
488 
489 /**
490  * Split motion vector prediction, 16.4.
491  * @returns the number of motion vectors parsed (2, 4 or 16)
492  */
493 static av_always_inline
495 {
496  int part_idx;
497  int n, num;
498  VP8Macroblock *top_mb;
499  VP8Macroblock *left_mb = &mb[-1];
500  const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning],
501  *mbsplits_top,
502  *mbsplits_cur, *firstidx;
503  VP56mv *top_mv;
504  VP56mv *left_mv = left_mb->bmv;
505  VP56mv *cur_mv = mb->bmv;
506 
507  if (!layout) // layout is inlined, s->mb_layout is not
508  top_mb = &mb[2];
509  else
510  top_mb = &mb[-s->mb_width-1];
511  mbsplits_top = vp8_mbsplits[top_mb->partitioning];
512  top_mv = top_mb->bmv;
513 
517  } else {
518  part_idx = VP8_SPLITMVMODE_8x8;
519  }
520  } else {
521  part_idx = VP8_SPLITMVMODE_4x4;
522  }
523 
524  num = vp8_mbsplit_count[part_idx];
525  mbsplits_cur = vp8_mbsplits[part_idx],
526  firstidx = vp8_mbfirstidx[part_idx];
527  mb->partitioning = part_idx;
528 
529  for (n = 0; n < num; n++) {
530  int k = firstidx[n];
531  uint32_t left, above;
532  const uint8_t *submv_prob;
533 
534  if (!(k & 3))
535  left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
536  else
537  left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
538  if (k <= 3)
539  above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
540  else
541  above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
542 
543  submv_prob = get_submv_prob(left, above);
544 
545  if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
546  if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
547  if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
548  mb->bmv[n].y = mb->mv.y + read_mv_component(c, s->prob->mvc[0]);
549  mb->bmv[n].x = mb->mv.x + read_mv_component(c, s->prob->mvc[1]);
550  } else {
551  AV_ZERO32(&mb->bmv[n]);
552  }
553  } else {
554  AV_WN32A(&mb->bmv[n], above);
555  }
556  } else {
557  AV_WN32A(&mb->bmv[n], left);
558  }
559  }
560 
561  return num;
562 }
563 
564 static av_always_inline
565 void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int layout)
566 {
567  VP8Macroblock *mb_edge[3] = { 0 /* top */,
568  mb - 1 /* left */,
569  0 /* top-left */ };
570  enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
571  enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
572  int idx = CNT_ZERO;
573  int cur_sign_bias = s->sign_bias[mb->ref_frame];
574  int8_t *sign_bias = s->sign_bias;
575  VP56mv near_mv[4];
576  uint8_t cnt[4] = { 0 };
577  VP56RangeCoder *c = &s->c;
578 
579  if (!layout) { // layout is inlined (s->mb_layout is not)
580  mb_edge[0] = mb + 2;
581  mb_edge[2] = mb + 1;
582  }
583  else {
584  mb_edge[0] = mb - s->mb_width-1;
585  mb_edge[2] = mb - s->mb_width-2;
586  }
587 
588  AV_ZERO32(&near_mv[0]);
589  AV_ZERO32(&near_mv[1]);
590  AV_ZERO32(&near_mv[2]);
591 
592  /* Process MB on top, left and top-left */
593  #define MV_EDGE_CHECK(n)\
594  {\
595  VP8Macroblock *edge = mb_edge[n];\
596  int edge_ref = edge->ref_frame;\
597  if (edge_ref != VP56_FRAME_CURRENT) {\
598  uint32_t mv = AV_RN32A(&edge->mv);\
599  if (mv) {\
600  if (cur_sign_bias != sign_bias[edge_ref]) {\
601  /* SWAR negate of the values in mv. */\
602  mv = ~mv;\
603  mv = ((mv&0x7fff7fff) + 0x00010001) ^ (mv&0x80008000);\
604  }\
605  if (!n || mv != AV_RN32A(&near_mv[idx]))\
606  AV_WN32A(&near_mv[++idx], mv);\
607  cnt[idx] += 1 + (n != 2);\
608  } else\
609  cnt[CNT_ZERO] += 1 + (n != 2);\
610  }\
611  }
612 
613  MV_EDGE_CHECK(0)
614  MV_EDGE_CHECK(1)
615  MV_EDGE_CHECK(2)
616 
618  if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
619  mb->mode = VP8_MVMODE_MV;
620 
621  /* If we have three distinct MVs, merge first and last if they're the same */
622  if (cnt[CNT_SPLITMV] && AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
623  cnt[CNT_NEAREST] += 1;
624 
625  /* Swap near and nearest if necessary */
626  if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
627  FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
628  FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
629  }
630 
631  if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
632  if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
633 
634  /* Choose the best mv out of 0,0 and the nearest mv */
635  clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
636  cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
637  (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
638  (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
639 
640  if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
641  mb->mode = VP8_MVMODE_SPLIT;
642  mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout) - 1];
643  } else {
644  mb->mv.y += read_mv_component(c, s->prob->mvc[0]);
645  mb->mv.x += read_mv_component(c, s->prob->mvc[1]);
646  mb->bmv[0] = mb->mv;
647  }
648  } else {
649  clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
650  mb->bmv[0] = mb->mv;
651  }
652  } else {
653  clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
654  mb->bmv[0] = mb->mv;
655  }
656  } else {
657  mb->mode = VP8_MVMODE_ZERO;
658  AV_ZERO32(&mb->mv);
659  mb->bmv[0] = mb->mv;
660  }
661 }
662 
663 static av_always_inline
665  int mb_x, int keyframe, int layout)
666 {
667  uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
668 
669  if (layout == 1) {
670  VP8Macroblock *mb_top = mb - s->mb_width - 1;
671  memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
672  }
673  if (keyframe) {
674  int x, y;
675  uint8_t* top;
676  uint8_t* const left = s->intra4x4_pred_mode_left;
677  if (layout == 1)
678  top = mb->intra4x4_pred_mode_top;
679  else
680  top = s->intra4x4_pred_mode_top + 4 * mb_x;
681  for (y = 0; y < 4; y++) {
682  for (x = 0; x < 4; x++) {
683  const uint8_t *ctx;
684  ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
685  *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
686  left[y] = top[x] = *intra4x4;
687  intra4x4++;
688  }
689  }
690  } else {
691  int i;
692  for (i = 0; i < 16; i++)
694  }
695 }
696 
697 static av_always_inline
698 void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
699  uint8_t *segment, uint8_t *ref, int layout)
700 {
701  VP56RangeCoder *c = &s->c;
702 
703  if (s->segmentation.update_map) {
704  int bit = vp56_rac_get_prob(c, s->prob->segmentid[0]);
705  *segment = vp56_rac_get_prob(c, s->prob->segmentid[1+bit]) + 2*bit;
706  } else if (s->segmentation.enabled)
707  *segment = ref ? *ref : *segment;
708  mb->segment = *segment;
709 
710  mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
711 
712  if (s->keyframe) {
714 
715  if (mb->mode == MODE_I4x4) {
716  decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
717  } else {
718  const uint32_t modes = vp8_pred4x4_mode[mb->mode] * 0x01010101u;
719  if (s->mb_layout == 1)
720  AV_WN32A(mb->intra4x4_pred_mode_top, modes);
721  else
722  AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
723  AV_WN32A( s->intra4x4_pred_mode_left, modes);
724  }
725 
728  } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
729  // inter MB, 16.2
730  if (vp56_rac_get_prob_branchy(c, s->prob->last))
731  mb->ref_frame = vp56_rac_get_prob(c, s->prob->golden) ?
732  VP56_FRAME_GOLDEN2 /* altref */ : VP56_FRAME_GOLDEN;
733  else
735  s->ref_count[mb->ref_frame-1]++;
736 
737  // motion vectors, 16.3
738  decode_mvs(s, mb, mb_x, mb_y, layout);
739  } else {
740  // intra MB, 16.1
742 
743  if (mb->mode == MODE_I4x4)
744  decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
745 
749  AV_ZERO32(&mb->bmv[0]);
750  }
751 }
752 
753 #ifndef decode_block_coeffs_internal
754 /**
755  * @param r arithmetic bitstream reader context
756  * @param block destination for block coefficients
757  * @param probs probabilities to use when reading trees from the bitstream
758  * @param i initial coeff index, 0 unless a separate DC block is coded
759  * @param qmul array holding the dc/ac dequant factor at position 0/1
760  * @return 0 if no coeffs were decoded
761  * otherwise, the index of the last coeff decoded plus one
762  */
764  uint8_t probs[16][3][NUM_DCT_TOKENS-1],
765  int i, uint8_t *token_prob, int16_t qmul[2])
766 {
767  VP56RangeCoder c = *r;
768  goto skip_eob;
769  do {
770  int coeff;
771  if (!vp56_rac_get_prob_branchy(&c, token_prob[0])) // DCT_EOB
772  break;
773 
774 skip_eob:
775  if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
776  if (++i == 16)
777  break; // invalid input; blocks should end with EOB
778  token_prob = probs[i][0];
779  goto skip_eob;
780  }
781 
782  if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
783  coeff = 1;
784  token_prob = probs[i+1][1];
785  } else {
786  if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
787  coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
788  if (coeff)
789  coeff += vp56_rac_get_prob(&c, token_prob[5]);
790  coeff += 2;
791  } else {
792  // DCT_CAT*
793  if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
794  if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
795  coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
796  } else { // DCT_CAT2
797  coeff = 7;
798  coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
799  coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
800  }
801  } else { // DCT_CAT3 and up
802  int a = vp56_rac_get_prob(&c, token_prob[8]);
803  int b = vp56_rac_get_prob(&c, token_prob[9+a]);
804  int cat = (a<<1) + b;
805  coeff = 3 + (8<<cat);
806  coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
807  }
808  }
809  token_prob = probs[i+1][2];
810  }
811  block[zigzag_scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
812  } while (++i < 16);
813 
814  *r = c;
815  return i;
816 }
817 #endif
818 
819 /**
820  * @param c arithmetic bitstream reader context
821  * @param block destination for block coefficients
822  * @param probs probabilities to use when reading trees from the bitstream
823  * @param i initial coeff index, 0 unless a separate DC block is coded
824  * @param zero_nhood the initial prediction context for number of surrounding
825  * all-zero blocks (only left/top, so 0-2)
826  * @param qmul array holding the dc/ac dequant factor at position 0/1
827  * @return 0 if no coeffs were decoded
828  * otherwise, the index of the last coeff decoded plus one
829  */
830 static av_always_inline
832  uint8_t probs[16][3][NUM_DCT_TOKENS-1],
833  int i, int zero_nhood, int16_t qmul[2])
834 {
835  uint8_t *token_prob = probs[i][zero_nhood];
836  if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
837  return 0;
838  return decode_block_coeffs_internal(c, block, probs, i, token_prob, qmul);
839 }
840 
841 static av_always_inline
843  uint8_t t_nnz[9], uint8_t l_nnz[9])
844 {
845  int i, x, y, luma_start = 0, luma_ctx = 3;
846  int nnz_pred, nnz, nnz_total = 0;
847  int segment = mb->segment;
848  int block_dc = 0;
849 
850  if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
851  nnz_pred = t_nnz[8] + l_nnz[8];
852 
853  // decode DC values and do hadamard
854  nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0, nnz_pred,
855  s->qmat[segment].luma_dc_qmul);
856  l_nnz[8] = t_nnz[8] = !!nnz;
857  if (nnz) {
858  nnz_total += nnz;
859  block_dc = 1;
860  if (nnz == 1)
862  else
863  s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
864  }
865  luma_start = 1;
866  luma_ctx = 0;
867  }
868 
869  // luma blocks
870  for (y = 0; y < 4; y++)
871  for (x = 0; x < 4; x++) {
872  nnz_pred = l_nnz[y] + t_nnz[x];
873  nnz = decode_block_coeffs(c, td->block[y][x], s->prob->token[luma_ctx], luma_start,
874  nnz_pred, s->qmat[segment].luma_qmul);
875  // nnz+block_dc may be one more than the actual last index, but we don't care
876  td->non_zero_count_cache[y][x] = nnz + block_dc;
877  t_nnz[x] = l_nnz[y] = !!nnz;
878  nnz_total += nnz;
879  }
880 
881  // chroma blocks
882  // TODO: what to do about dimensions? 2nd dim for luma is x,
883  // but for chroma it's (y<<1)|x
884  for (i = 4; i < 6; i++)
885  for (y = 0; y < 2; y++)
886  for (x = 0; x < 2; x++) {
887  nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x];
888  nnz = decode_block_coeffs(c, td->block[i][(y<<1)+x], s->prob->token[2], 0,
889  nnz_pred, s->qmat[segment].chroma_qmul);
890  td->non_zero_count_cache[i][(y<<1)+x] = nnz;
891  t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz;
892  nnz_total += nnz;
893  }
894 
895  // if there were no coded coeffs despite the macroblock not being marked skip,
896  // we MUST not do the inner loop filter and should not do IDCT
897  // Since skip isn't used for bitstream prediction, just manually set it.
898  if (!nnz_total)
899  mb->skip = 1;
900 }
901 
902 static av_always_inline
903 void backup_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
904  int linesize, int uvlinesize, int simple)
905 {
906  AV_COPY128(top_border, src_y + 15*linesize);
907  if (!simple) {
908  AV_COPY64(top_border+16, src_cb + 7*uvlinesize);
909  AV_COPY64(top_border+24, src_cr + 7*uvlinesize);
910  }
911 }
912 
913 static av_always_inline
914 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
915  int linesize, int uvlinesize, int mb_x, int mb_y, int mb_width,
916  int simple, int xchg)
917 {
918  uint8_t *top_border_m1 = top_border-32; // for TL prediction
919  src_y -= linesize;
920  src_cb -= uvlinesize;
921  src_cr -= uvlinesize;
922 
923 #define XCHG(a,b,xchg) do { \
924  if (xchg) AV_SWAP64(b,a); \
925  else AV_COPY64(b,a); \
926  } while (0)
927 
928  XCHG(top_border_m1+8, src_y-8, xchg);
929  XCHG(top_border, src_y, xchg);
930  XCHG(top_border+8, src_y+8, 1);
931  if (mb_x < mb_width-1)
932  XCHG(top_border+32, src_y+16, 1);
933 
934  // only copy chroma for normal loop filter
935  // or to initialize the top row to 127
936  if (!simple || !mb_y) {
937  XCHG(top_border_m1+16, src_cb-8, xchg);
938  XCHG(top_border_m1+24, src_cr-8, xchg);
939  XCHG(top_border+16, src_cb, 1);
940  XCHG(top_border+24, src_cr, 1);
941  }
942 }
943 
944 static av_always_inline
945 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
946 {
947  if (!mb_x) {
948  return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
949  } else {
950  return mb_y ? mode : LEFT_DC_PRED8x8;
951  }
952 }
953 
954 static av_always_inline
955 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y)
956 {
957  if (!mb_x) {
958  return mb_y ? VERT_PRED8x8 : DC_129_PRED8x8;
959  } else {
960  return mb_y ? mode : HOR_PRED8x8;
961  }
962 }
963 
964 static av_always_inline
965 int check_intra_pred8x8_mode(int mode, int mb_x, int mb_y)
966 {
967  if (mode == DC_PRED8x8) {
968  return check_dc_pred8x8_mode(mode, mb_x, mb_y);
969  } else {
970  return mode;
971  }
972 }
973 
974 static av_always_inline
975 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y)
976 {
977  switch (mode) {
978  case DC_PRED8x8:
979  return check_dc_pred8x8_mode(mode, mb_x, mb_y);
980  case VERT_PRED8x8:
981  return !mb_y ? DC_127_PRED8x8 : mode;
982  case HOR_PRED8x8:
983  return !mb_x ? DC_129_PRED8x8 : mode;
984  case PLANE_PRED8x8 /*TM*/:
985  return check_tm_pred8x8_mode(mode, mb_x, mb_y);
986  }
987  return mode;
988 }
989 
990 static av_always_inline
991 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y)
992 {
993  if (!mb_x) {
994  return mb_y ? VERT_VP8_PRED : DC_129_PRED;
995  } else {
996  return mb_y ? mode : HOR_VP8_PRED;
997  }
998 }
999 
1000 static av_always_inline
1001 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y, int *copy_buf)
1002 {
1003  switch (mode) {
1004  case VERT_PRED:
1005  if (!mb_x && mb_y) {
1006  *copy_buf = 1;
1007  return mode;
1008  }
1009  /* fall-through */
1010  case DIAG_DOWN_LEFT_PRED:
1011  case VERT_LEFT_PRED:
1012  return !mb_y ? DC_127_PRED : mode;
1013  case HOR_PRED:
1014  if (!mb_y) {
1015  *copy_buf = 1;
1016  return mode;
1017  }
1018  /* fall-through */
1019  case HOR_UP_PRED:
1020  return !mb_x ? DC_129_PRED : mode;
1021  case TM_VP8_PRED:
1022  return check_tm_pred4x4_mode(mode, mb_x, mb_y);
1023  case DC_PRED: // 4x4 DC doesn't use the same "H.264-style" exceptions as 16x16/8x8 DC
1024  case DIAG_DOWN_RIGHT_PRED:
1025  case VERT_RIGHT_PRED:
1026  case HOR_DOWN_PRED:
1027  if (!mb_y || !mb_x)
1028  *copy_buf = 1;
1029  return mode;
1030  }
1031  return mode;
1032 }
1033 
1034 static av_always_inline
1036  VP8Macroblock *mb, int mb_x, int mb_y)
1037 {
1038  AVCodecContext *avctx = s->avctx;
1039  int x, y, mode, nnz;
1040  uint32_t tr;
1041 
1042  // for the first row, we need to run xchg_mb_border to init the top edge to 127
1043  // otherwise, skip it if we aren't going to deblock
1044  if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1045  xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
1046  s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1047  s->filter.simple, 1);
1048 
1049  if (mb->mode < MODE_I4x4) {
1050  if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // tested
1051  mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y);
1052  } else {
1053  mode = check_intra_pred8x8_mode(mb->mode, mb_x, mb_y);
1054  }
1055  s->hpc.pred16x16[mode](dst[0], s->linesize);
1056  } else {
1057  uint8_t *ptr = dst[0];
1058  uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1059  uint8_t tr_top[4] = { 127, 127, 127, 127 };
1060 
1061  // all blocks on the right edge of the macroblock use bottom edge
1062  // the top macroblock for their topright edge
1063  uint8_t *tr_right = ptr - s->linesize + 16;
1064 
1065  // if we're on the right edge of the frame, said edge is extended
1066  // from the top macroblock
1067  if (!(!mb_y && avctx->flags & CODEC_FLAG_EMU_EDGE) &&
1068  mb_x == s->mb_width-1) {
1069  tr = tr_right[-1]*0x01010101u;
1070  tr_right = (uint8_t *)&tr;
1071  }
1072 
1073  if (mb->skip)
1075 
1076  for (y = 0; y < 4; y++) {
1077  uint8_t *topright = ptr + 4 - s->linesize;
1078  for (x = 0; x < 4; x++) {
1079  int copy = 0, linesize = s->linesize;
1080  uint8_t *dst = ptr+4*x;
1081  DECLARE_ALIGNED(4, uint8_t, copy_dst)[5*8];
1082 
1083  if ((y == 0 || x == 3) && mb_y == 0 && avctx->flags & CODEC_FLAG_EMU_EDGE) {
1084  topright = tr_top;
1085  } else if (x == 3)
1086  topright = tr_right;
1087 
1088  if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // mb_x+x or mb_y+y is a hack but works
1089  mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x, mb_y + y, &copy);
1090  if (copy) {
1091  dst = copy_dst + 12;
1092  linesize = 8;
1093  if (!(mb_y + y)) {
1094  copy_dst[3] = 127U;
1095  AV_WN32A(copy_dst+4, 127U * 0x01010101U);
1096  } else {
1097  AV_COPY32(copy_dst+4, ptr+4*x-s->linesize);
1098  if (!(mb_x + x)) {
1099  copy_dst[3] = 129U;
1100  } else {
1101  copy_dst[3] = ptr[4*x-s->linesize-1];
1102  }
1103  }
1104  if (!(mb_x + x)) {
1105  copy_dst[11] =
1106  copy_dst[19] =
1107  copy_dst[27] =
1108  copy_dst[35] = 129U;
1109  } else {
1110  copy_dst[11] = ptr[4*x -1];
1111  copy_dst[19] = ptr[4*x+s->linesize -1];
1112  copy_dst[27] = ptr[4*x+s->linesize*2-1];
1113  copy_dst[35] = ptr[4*x+s->linesize*3-1];
1114  }
1115  }
1116  } else {
1117  mode = intra4x4[x];
1118  }
1119  s->hpc.pred4x4[mode](dst, topright, linesize);
1120  if (copy) {
1121  AV_COPY32(ptr+4*x , copy_dst+12);
1122  AV_COPY32(ptr+4*x+s->linesize , copy_dst+20);
1123  AV_COPY32(ptr+4*x+s->linesize*2, copy_dst+28);
1124  AV_COPY32(ptr+4*x+s->linesize*3, copy_dst+36);
1125  }
1126 
1127  nnz = td->non_zero_count_cache[y][x];
1128  if (nnz) {
1129  if (nnz == 1)
1130  s->vp8dsp.vp8_idct_dc_add(ptr+4*x, td->block[y][x], s->linesize);
1131  else
1132  s->vp8dsp.vp8_idct_add(ptr+4*x, td->block[y][x], s->linesize);
1133  }
1134  topright += 4;
1135  }
1136 
1137  ptr += 4*s->linesize;
1138  intra4x4 += 4;
1139  }
1140  }
1141 
1142  if (avctx->flags & CODEC_FLAG_EMU_EDGE) {
1143  mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode, mb_x, mb_y);
1144  } else {
1145  mode = check_intra_pred8x8_mode(mb->chroma_pred_mode, mb_x, mb_y);
1146  }
1147  s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1148  s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1149 
1150  if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1151  xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
1152  s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1153  s->filter.simple, 0);
1154 }
1155 
1156 static const uint8_t subpel_idx[3][8] = {
1157  { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1158  // also function pointer index
1159  { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1160  { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1161 };
1162 
1163 /**
1164  * luma MC function
1165  *
1166  * @param s VP8 decoding context
1167  * @param dst target buffer for block data at block position
1168  * @param ref reference picture buffer at origin (0, 0)
1169  * @param mv motion vector (relative to block position) to get pixel data from
1170  * @param x_off horizontal position of block from origin (0, 0)
1171  * @param y_off vertical position of block from origin (0, 0)
1172  * @param block_w width of block (16, 8 or 4)
1173  * @param block_h height of block (always same as block_w)
1174  * @param width width of src/dst plane data
1175  * @param height height of src/dst plane data
1176  * @param linesize size of a single line of plane data, including padding
1177  * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1178  */
1179 static av_always_inline
1181  ThreadFrame *ref, const VP56mv *mv,
1182  int x_off, int y_off, int block_w, int block_h,
1183  int width, int height, int linesize,
1184  vp8_mc_func mc_func[3][3])
1185 {
1186  uint8_t *src = ref->f->data[0];
1187 
1188  if (AV_RN32A(mv)) {
1189 
1190  int mx = (mv->x << 1)&7, mx_idx = subpel_idx[0][mx];
1191  int my = (mv->y << 1)&7, my_idx = subpel_idx[0][my];
1192 
1193  x_off += mv->x >> 2;
1194  y_off += mv->y >> 2;
1195 
1196  // edge emulation
1197  ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1198  src += y_off * linesize + x_off;
1199  if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1200  y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1201  s->vdsp.emulated_edge_mc(td->edge_emu_buffer, src - my_idx * linesize - mx_idx, linesize,
1202  block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1203  x_off - mx_idx, y_off - my_idx, width, height);
1204  src = td->edge_emu_buffer + mx_idx + linesize * my_idx;
1205  }
1206  mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my);
1207  } else {
1208  ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1209  mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0);
1210  }
1211 }
1212 
1213 /**
1214  * chroma MC function
1215  *
1216  * @param s VP8 decoding context
1217  * @param dst1 target buffer for block data at block position (U plane)
1218  * @param dst2 target buffer for block data at block position (V plane)
1219  * @param ref reference picture buffer at origin (0, 0)
1220  * @param mv motion vector (relative to block position) to get pixel data from
1221  * @param x_off horizontal position of block from origin (0, 0)
1222  * @param y_off vertical position of block from origin (0, 0)
1223  * @param block_w width of block (16, 8 or 4)
1224  * @param block_h height of block (always same as block_w)
1225  * @param width width of src/dst plane data
1226  * @param height height of src/dst plane data
1227  * @param linesize size of a single line of plane data, including padding
1228  * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1229  */
1230 static av_always_inline
1232  ThreadFrame *ref, const VP56mv *mv, int x_off, int y_off,
1233  int block_w, int block_h, int width, int height, int linesize,
1234  vp8_mc_func mc_func[3][3])
1235 {
1236  uint8_t *src1 = ref->f->data[1], *src2 = ref->f->data[2];
1237 
1238  if (AV_RN32A(mv)) {
1239  int mx = mv->x&7, mx_idx = subpel_idx[0][mx];
1240  int my = mv->y&7, my_idx = subpel_idx[0][my];
1241 
1242  x_off += mv->x >> 3;
1243  y_off += mv->y >> 3;
1244 
1245  // edge emulation
1246  src1 += y_off * linesize + x_off;
1247  src2 += y_off * linesize + x_off;
1248  ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1249  if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1250  y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1251  s->vdsp.emulated_edge_mc(td->edge_emu_buffer, src1 - my_idx * linesize - mx_idx, linesize,
1252  block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1253  x_off - mx_idx, y_off - my_idx, width, height);
1254  src1 = td->edge_emu_buffer + mx_idx + linesize * my_idx;
1255  mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1256 
1257  s->vdsp.emulated_edge_mc(td->edge_emu_buffer, src2 - my_idx * linesize - mx_idx, linesize,
1258  block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1259  x_off - mx_idx, y_off - my_idx, width, height);
1260  src2 = td->edge_emu_buffer + mx_idx + linesize * my_idx;
1261  mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1262  } else {
1263  mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1264  mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1265  }
1266  } else {
1267  ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1268  mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1269  mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1270  }
1271 }
1272 
1273 static av_always_inline
1275  ThreadFrame *ref_frame, int x_off, int y_off,
1276  int bx_off, int by_off,
1277  int block_w, int block_h,
1278  int width, int height, VP56mv *mv)
1279 {
1280  VP56mv uvmv = *mv;
1281 
1282  /* Y */
1283  vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
1284  ref_frame, mv, x_off + bx_off, y_off + by_off,
1285  block_w, block_h, width, height, s->linesize,
1286  s->put_pixels_tab[block_w == 8]);
1287 
1288  /* U/V */
1289  if (s->profile == 3) {
1290  uvmv.x &= ~7;
1291  uvmv.y &= ~7;
1292  }
1293  x_off >>= 1; y_off >>= 1;
1294  bx_off >>= 1; by_off >>= 1;
1295  width >>= 1; height >>= 1;
1296  block_w >>= 1; block_h >>= 1;
1297  vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
1298  dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1299  &uvmv, x_off + bx_off, y_off + by_off,
1300  block_w, block_h, width, height, s->uvlinesize,
1301  s->put_pixels_tab[1 + (block_w == 4)]);
1302 }
1303 
1304 /* Fetch pixels for estimated mv 4 macroblocks ahead.
1305  * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1306 static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref)
1307 {
1308  /* Don't prefetch refs that haven't been used very often this frame. */
1309  if (s->ref_count[ref-1] > (mb_xy >> 5)) {
1310  int x_off = mb_x << 4, y_off = mb_y << 4;
1311  int mx = (mb->mv.x>>2) + x_off + 8;
1312  int my = (mb->mv.y>>2) + y_off;
1313  uint8_t **src= s->framep[ref]->tf.f->data;
1314  int off= mx + (my + (mb_x&3)*4)*s->linesize + 64;
1315  /* For threading, a ff_thread_await_progress here might be useful, but
1316  * it actually slows down the decoder. Since a bad prefetch doesn't
1317  * generate bad decoder output, we don't run it here. */
1318  s->vdsp.prefetch(src[0]+off, s->linesize, 4);
1319  off= (mx>>1) + ((my>>1) + (mb_x&7))*s->uvlinesize + 64;
1320  s->vdsp.prefetch(src[1]+off, src[2]-src[1], 2);
1321  }
1322 }
1323 
1324 /**
1325  * Apply motion vectors to prediction buffer, chapter 18.
1326  */
1327 static av_always_inline
1329  VP8Macroblock *mb, int mb_x, int mb_y)
1330 {
1331  int x_off = mb_x << 4, y_off = mb_y << 4;
1332  int width = 16*s->mb_width, height = 16*s->mb_height;
1333  ThreadFrame *ref = &s->framep[mb->ref_frame]->tf;
1334  VP56mv *bmv = mb->bmv;
1335 
1336  switch (mb->partitioning) {
1337  case VP8_SPLITMVMODE_NONE:
1338  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1339  0, 0, 16, 16, width, height, &mb->mv);
1340  break;
1341  case VP8_SPLITMVMODE_4x4: {
1342  int x, y;
1343  VP56mv uvmv;
1344 
1345  /* Y */
1346  for (y = 0; y < 4; y++) {
1347  for (x = 0; x < 4; x++) {
1348  vp8_mc_luma(s, td, dst[0] + 4*y*s->linesize + x*4,
1349  ref, &bmv[4*y + x],
1350  4*x + x_off, 4*y + y_off, 4, 4,
1351  width, height, s->linesize,
1352  s->put_pixels_tab[2]);
1353  }
1354  }
1355 
1356  /* U/V */
1357  x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1;
1358  for (y = 0; y < 2; y++) {
1359  for (x = 0; x < 2; x++) {
1360  uvmv.x = mb->bmv[ 2*y * 4 + 2*x ].x +
1361  mb->bmv[ 2*y * 4 + 2*x+1].x +
1362  mb->bmv[(2*y+1) * 4 + 2*x ].x +
1363  mb->bmv[(2*y+1) * 4 + 2*x+1].x;
1364  uvmv.y = mb->bmv[ 2*y * 4 + 2*x ].y +
1365  mb->bmv[ 2*y * 4 + 2*x+1].y +
1366  mb->bmv[(2*y+1) * 4 + 2*x ].y +
1367  mb->bmv[(2*y+1) * 4 + 2*x+1].y;
1368  uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT-1))) >> 2;
1369  uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT-1))) >> 2;
1370  if (s->profile == 3) {
1371  uvmv.x &= ~7;
1372  uvmv.y &= ~7;
1373  }
1374  vp8_mc_chroma(s, td, dst[1] + 4*y*s->uvlinesize + x*4,
1375  dst[2] + 4*y*s->uvlinesize + x*4, ref, &uvmv,
1376  4*x + x_off, 4*y + y_off, 4, 4,
1377  width, height, s->uvlinesize,
1378  s->put_pixels_tab[2]);
1379  }
1380  }
1381  break;
1382  }
1383  case VP8_SPLITMVMODE_16x8:
1384  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1385  0, 0, 16, 8, width, height, &bmv[0]);
1386  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1387  0, 8, 16, 8, width, height, &bmv[1]);
1388  break;
1389  case VP8_SPLITMVMODE_8x16:
1390  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1391  0, 0, 8, 16, width, height, &bmv[0]);
1392  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1393  8, 0, 8, 16, width, height, &bmv[1]);
1394  break;
1395  case VP8_SPLITMVMODE_8x8:
1396  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1397  0, 0, 8, 8, width, height, &bmv[0]);
1398  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1399  8, 0, 8, 8, width, height, &bmv[1]);
1400  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1401  0, 8, 8, 8, width, height, &bmv[2]);
1402  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1403  8, 8, 8, 8, width, height, &bmv[3]);
1404  break;
1405  }
1406 }
1407 
1409  uint8_t *dst[3], VP8Macroblock *mb)
1410 {
1411  int x, y, ch;
1412 
1413  if (mb->mode != MODE_I4x4) {
1414  uint8_t *y_dst = dst[0];
1415  for (y = 0; y < 4; y++) {
1416  uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
1417  if (nnz4) {
1418  if (nnz4&~0x01010101) {
1419  for (x = 0; x < 4; x++) {
1420  if ((uint8_t)nnz4 == 1)
1421  s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, td->block[y][x], s->linesize);
1422  else if((uint8_t)nnz4 > 1)
1423  s->vp8dsp.vp8_idct_add(y_dst+4*x, td->block[y][x], s->linesize);
1424  nnz4 >>= 8;
1425  if (!nnz4)
1426  break;
1427  }
1428  } else {
1429  s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
1430  }
1431  }
1432  y_dst += 4*s->linesize;
1433  }
1434  }
1435 
1436  for (ch = 0; ch < 2; ch++) {
1437  uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4+ch]);
1438  if (nnz4) {
1439  uint8_t *ch_dst = dst[1+ch];
1440  if (nnz4&~0x01010101) {
1441  for (y = 0; y < 2; y++) {
1442  for (x = 0; x < 2; x++) {
1443  if ((uint8_t)nnz4 == 1)
1444  s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize);
1445  else if((uint8_t)nnz4 > 1)
1446  s->vp8dsp.vp8_idct_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize);
1447  nnz4 >>= 8;
1448  if (!nnz4)
1449  goto chroma_idct_end;
1450  }
1451  ch_dst += 4*s->uvlinesize;
1452  }
1453  } else {
1454  s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4+ch], s->uvlinesize);
1455  }
1456  }
1457 chroma_idct_end: ;
1458  }
1459 }
1460 
1462 {
1463  int interior_limit, filter_level;
1464 
1465  if (s->segmentation.enabled) {
1466  filter_level = s->segmentation.filter_level[mb->segment];
1467  if (!s->segmentation.absolute_vals)
1468  filter_level += s->filter.level;
1469  } else
1470  filter_level = s->filter.level;
1471 
1472  if (s->lf_delta.enabled) {
1473  filter_level += s->lf_delta.ref[mb->ref_frame];
1474  filter_level += s->lf_delta.mode[mb->mode];
1475  }
1476 
1477  filter_level = av_clip_uintp2(filter_level, 6);
1478 
1479  interior_limit = filter_level;
1480  if (s->filter.sharpness) {
1481  interior_limit >>= (s->filter.sharpness + 3) >> 2;
1482  interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
1483  }
1484  interior_limit = FFMAX(interior_limit, 1);
1485 
1486  f->filter_level = filter_level;
1487  f->inner_limit = interior_limit;
1488  f->inner_filter = !mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT;
1489 }
1490 
1491 static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y)
1492 {
1493  int mbedge_lim, bedge_lim, hev_thresh;
1494  int filter_level = f->filter_level;
1495  int inner_limit = f->inner_limit;
1496  int inner_filter = f->inner_filter;
1497  int linesize = s->linesize;
1498  int uvlinesize = s->uvlinesize;
1499  static const uint8_t hev_thresh_lut[2][64] = {
1500  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1501  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1502  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
1503  3, 3, 3, 3 },
1504  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1505  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1506  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1507  2, 2, 2, 2 }
1508  };
1509 
1510  if (!filter_level)
1511  return;
1512 
1513  bedge_lim = 2*filter_level + inner_limit;
1514  mbedge_lim = bedge_lim + 4;
1515 
1516  hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
1517 
1518  if (mb_x) {
1519  s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
1520  mbedge_lim, inner_limit, hev_thresh);
1521  s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
1522  mbedge_lim, inner_limit, hev_thresh);
1523  }
1524 
1525  if (inner_filter) {
1526  s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 4, linesize, bedge_lim,
1527  inner_limit, hev_thresh);
1528  s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 8, linesize, bedge_lim,
1529  inner_limit, hev_thresh);
1530  s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+12, linesize, bedge_lim,
1531  inner_limit, hev_thresh);
1532  s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4,
1533  uvlinesize, bedge_lim,
1534  inner_limit, hev_thresh);
1535  }
1536 
1537  if (mb_y) {
1538  s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
1539  mbedge_lim, inner_limit, hev_thresh);
1540  s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
1541  mbedge_lim, inner_limit, hev_thresh);
1542  }
1543 
1544  if (inner_filter) {
1545  s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 4*linesize,
1546  linesize, bedge_lim,
1547  inner_limit, hev_thresh);
1548  s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 8*linesize,
1549  linesize, bedge_lim,
1550  inner_limit, hev_thresh);
1551  s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+12*linesize,
1552  linesize, bedge_lim,
1553  inner_limit, hev_thresh);
1554  s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
1555  dst[2] + 4 * uvlinesize,
1556  uvlinesize, bedge_lim,
1557  inner_limit, hev_thresh);
1558  }
1559 }
1560 
1561 static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y)
1562 {
1563  int mbedge_lim, bedge_lim;
1564  int filter_level = f->filter_level;
1565  int inner_limit = f->inner_limit;
1566  int inner_filter = f->inner_filter;
1567  int linesize = s->linesize;
1568 
1569  if (!filter_level)
1570  return;
1571 
1572  bedge_lim = 2*filter_level + inner_limit;
1573  mbedge_lim = bedge_lim + 4;
1574 
1575  if (mb_x)
1576  s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
1577  if (inner_filter) {
1578  s->vp8dsp.vp8_h_loop_filter_simple(dst+ 4, linesize, bedge_lim);
1579  s->vp8dsp.vp8_h_loop_filter_simple(dst+ 8, linesize, bedge_lim);
1580  s->vp8dsp.vp8_h_loop_filter_simple(dst+12, linesize, bedge_lim);
1581  }
1582 
1583  if (mb_y)
1584  s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
1585  if (inner_filter) {
1586  s->vp8dsp.vp8_v_loop_filter_simple(dst+ 4*linesize, linesize, bedge_lim);
1587  s->vp8dsp.vp8_v_loop_filter_simple(dst+ 8*linesize, linesize, bedge_lim);
1588  s->vp8dsp.vp8_v_loop_filter_simple(dst+12*linesize, linesize, bedge_lim);
1589  }
1590 }
1591 
1592 #define MARGIN (16 << 2)
1593 static void vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
1594  VP8Frame *prev_frame)
1595 {
1596  VP8Context *s = avctx->priv_data;
1597  int mb_x, mb_y;
1598 
1599  s->mv_min.y = -MARGIN;
1600  s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
1601  for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
1602  VP8Macroblock *mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
1603  int mb_xy = mb_y*s->mb_width;
1604 
1605  AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
1606 
1607  s->mv_min.x = -MARGIN;
1608  s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
1609  for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
1610  if (mb_y == 0)
1611  AV_WN32A((mb-s->mb_width-1)->intra4x4_pred_mode_top, DC_PRED*0x01010101);
1612  decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
1613  prev_frame && prev_frame->seg_map ?
1614  prev_frame->seg_map->data + mb_xy : NULL, 1);
1615  s->mv_min.x -= 64;
1616  s->mv_max.x -= 64;
1617  }
1618  s->mv_min.y -= 64;
1619  s->mv_max.y -= 64;
1620  }
1621 }
1622 
1623 #if HAVE_THREADS
1624 #define check_thread_pos(td, otd, mb_x_check, mb_y_check)\
1625  do {\
1626  int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF);\
1627  if (otd->thread_mb_pos < tmp) {\
1628  pthread_mutex_lock(&otd->lock);\
1629  td->wait_mb_pos = tmp;\
1630  do {\
1631  if (otd->thread_mb_pos >= tmp)\
1632  break;\
1633  pthread_cond_wait(&otd->cond, &otd->lock);\
1634  } while (1);\
1635  td->wait_mb_pos = INT_MAX;\
1636  pthread_mutex_unlock(&otd->lock);\
1637  }\
1638  } while(0);
1639 
1640 #define update_pos(td, mb_y, mb_x)\
1641  do {\
1642  int pos = (mb_y << 16) | (mb_x & 0xFFFF);\
1643  int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && (num_jobs > 1);\
1644  int is_null = (next_td == NULL) || (prev_td == NULL);\
1645  int pos_check = (is_null) ? 1 :\
1646  (next_td != td && pos >= next_td->wait_mb_pos) ||\
1647  (prev_td != td && pos >= prev_td->wait_mb_pos);\
1648  td->thread_mb_pos = pos;\
1649  if (sliced_threading && pos_check) {\
1650  pthread_mutex_lock(&td->lock);\
1651  pthread_cond_broadcast(&td->cond);\
1652  pthread_mutex_unlock(&td->lock);\
1653  }\
1654  } while(0);
1655 #else
1656 #define check_thread_pos(td, otd, mb_x_check, mb_y_check)
1657 #define update_pos(td, mb_y, mb_x)
1658 #endif
1659 
1660 static void vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
1661  int jobnr, int threadnr)
1662 {
1663  VP8Context *s = avctx->priv_data;
1664  VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
1665  int mb_y = td->thread_mb_pos>>16;
1666  int i, y, mb_x, mb_xy = mb_y*s->mb_width;
1667  int num_jobs = s->num_jobs;
1668  VP8Frame *curframe = s->curframe, *prev_frame = s->prev_frame;
1669  VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)];
1670  VP8Macroblock *mb;
1671  uint8_t *dst[3] = {
1672  curframe->tf.f->data[0] + 16*mb_y*s->linesize,
1673  curframe->tf.f->data[1] + 8*mb_y*s->uvlinesize,
1674  curframe->tf.f->data[2] + 8*mb_y*s->uvlinesize
1675  };
1676  if (mb_y == 0) prev_td = td;
1677  else prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs];
1678  if (mb_y == s->mb_height-1) next_td = td;
1679  else next_td = &s->thread_data[(jobnr + 1)%num_jobs];
1680  if (s->mb_layout == 1)
1681  mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
1682  else {
1683  // Make sure the previous frame has read its segmentation map,
1684  // if we re-use the same map.
1685  if (prev_frame && s->segmentation.enabled &&
1687  ff_thread_await_progress(&prev_frame->tf, mb_y, 0);
1688  mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
1689  memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
1690  AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
1691  }
1692 
1693  memset(td->left_nnz, 0, sizeof(td->left_nnz));
1694  // left edge of 129 for intra prediction
1695  if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1696  for (i = 0; i < 3; i++)
1697  for (y = 0; y < 16>>!!i; y++)
1698  dst[i][y*curframe->tf.f->linesize[i]-1] = 129;
1699  if (mb_y == 1) {
1700  s->top_border[0][15] = s->top_border[0][23] = s->top_border[0][31] = 129;
1701  }
1702  }
1703 
1704  s->mv_min.x = -MARGIN;
1705  s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
1706 
1707  for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
1708  // Wait for previous thread to read mb_x+2, and reach mb_y-1.
1709  if (prev_td != td) {
1710  if (threadnr != 0) {
1711  check_thread_pos(td, prev_td, mb_x+1, mb_y-1);
1712  } else {
1713  check_thread_pos(td, prev_td, (s->mb_width+3) + (mb_x+1), mb_y-1);
1714  }
1715  }
1716 
1717  s->vdsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
1718  s->vdsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);
1719 
1720  if (!s->mb_layout)
1721  decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
1722  prev_frame && prev_frame->seg_map ?
1723  prev_frame->seg_map->data + mb_xy : NULL, 0);
1724 
1725  prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
1726 
1727  if (!mb->skip)
1728  decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz);
1729 
1730  if (mb->mode <= MODE_I4x4)
1731  intra_predict(s, td, dst, mb, mb_x, mb_y);
1732  else
1733  inter_predict(s, td, dst, mb, mb_x, mb_y);
1734 
1735  prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
1736 
1737  if (!mb->skip) {
1738  idct_mb(s, td, dst, mb);
1739  } else {
1740  AV_ZERO64(td->left_nnz);
1741  AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
1742 
1743  // Reset DC block predictors if they would exist if the mb had coefficients
1744  if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
1745  td->left_nnz[8] = 0;
1746  s->top_nnz[mb_x][8] = 0;
1747  }
1748  }
1749 
1750  if (s->deblock_filter)
1751  filter_level_for_mb(s, mb, &td->filter_strength[mb_x]);
1752 
1753  if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs-1) {
1754  if (s->filter.simple)
1755  backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1);
1756  else
1757  backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
1758  }
1759 
1760  prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
1761 
1762  dst[0] += 16;
1763  dst[1] += 8;
1764  dst[2] += 8;
1765  s->mv_min.x -= 64;
1766  s->mv_max.x -= 64;
1767 
1768  if (mb_x == s->mb_width+1) {
1769  update_pos(td, mb_y, s->mb_width+3);
1770  } else {
1771  update_pos(td, mb_y, mb_x);
1772  }
1773  }
1774 }
1775 
1776 static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
1777  int jobnr, int threadnr)
1778 {
1779  VP8Context *s = avctx->priv_data;
1780  VP8ThreadData *td = &s->thread_data[threadnr];
1781  int mb_x, mb_y = td->thread_mb_pos>>16, num_jobs = s->num_jobs;
1782  AVFrame *curframe = s->curframe->tf.f;
1783  VP8Macroblock *mb;
1784  VP8ThreadData *prev_td, *next_td;
1785  uint8_t *dst[3] = {
1786  curframe->data[0] + 16*mb_y*s->linesize,
1787  curframe->data[1] + 8*mb_y*s->uvlinesize,
1788  curframe->data[2] + 8*mb_y*s->uvlinesize
1789  };
1790 
1791  if (s->mb_layout == 1)
1792  mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
1793  else
1794  mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
1795 
1796  if (mb_y == 0) prev_td = td;
1797  else prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs];
1798  if (mb_y == s->mb_height-1) next_td = td;
1799  else next_td = &s->thread_data[(jobnr + 1)%num_jobs];
1800 
1801  for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
1802  VP8FilterStrength *f = &td->filter_strength[mb_x];
1803  if (prev_td != td) {
1804  check_thread_pos(td, prev_td, (mb_x+1) + (s->mb_width+3), mb_y-1);
1805  }
1806  if (next_td != td)
1807  if (next_td != &s->thread_data[0]) {
1808  check_thread_pos(td, next_td, mb_x+1, mb_y+1);
1809  }
1810 
1811  if (num_jobs == 1) {
1812  if (s->filter.simple)
1813  backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1);
1814  else
1815  backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
1816  }
1817 
1818  if (s->filter.simple)
1819  filter_mb_simple(s, dst[0], f, mb_x, mb_y);
1820  else
1821  filter_mb(s, dst, f, mb_x, mb_y);
1822  dst[0] += 16;
1823  dst[1] += 8;
1824  dst[2] += 8;
1825 
1826  update_pos(td, mb_y, (s->mb_width+3) + mb_x);
1827  }
1828 }
1829 
1830 static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
1831  int jobnr, int threadnr)
1832 {
1833  VP8Context *s = avctx->priv_data;
1834  VP8ThreadData *td = &s->thread_data[jobnr];
1835  VP8ThreadData *next_td = NULL, *prev_td = NULL;
1836  VP8Frame *curframe = s->curframe;
1837  int mb_y, num_jobs = s->num_jobs;
1838  td->thread_nr = threadnr;
1839  for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
1840  if (mb_y >= s->mb_height) break;
1841  td->thread_mb_pos = mb_y<<16;
1842  vp8_decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
1843  if (s->deblock_filter)
1844  vp8_filter_mb_row(avctx, tdata, jobnr, threadnr);
1845  update_pos(td, mb_y, INT_MAX & 0xFFFF);
1846 
1847  s->mv_min.y -= 64;
1848  s->mv_max.y -= 64;
1849 
1850  if (avctx->active_thread_type == FF_THREAD_FRAME)
1851  ff_thread_report_progress(&curframe->tf, mb_y, 0);
1852  }
1853 
1854  return 0;
1855 }
1856 
1857 static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
1858  AVPacket *avpkt)
1859 {
1860  VP8Context *s = avctx->priv_data;
1861  int ret, i, referenced, num_jobs;
1862  enum AVDiscard skip_thresh;
1863  VP8Frame *av_uninit(curframe), *prev_frame;
1864 
1865  if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0)
1866  goto err;
1867 
1868  prev_frame = s->framep[VP56_FRAME_CURRENT];
1869 
1870  referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT
1872 
1873  skip_thresh = !referenced ? AVDISCARD_NONREF :
1875 
1876  if (avctx->skip_frame >= skip_thresh) {
1877  s->invisible = 1;
1878  memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
1879  goto skip_decode;
1880  }
1881  s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
1882 
1883  // release no longer referenced frames
1884  for (i = 0; i < 5; i++)
1885  if (s->frames[i].tf.f->data[0] &&
1886  &s->frames[i] != prev_frame &&
1887  &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1888  &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1889  &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
1890  vp8_release_frame(s, &s->frames[i]);
1891 
1892  // find a free buffer
1893  for (i = 0; i < 5; i++)
1894  if (&s->frames[i] != prev_frame &&
1895  &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1896  &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1897  &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
1898  curframe = s->framep[VP56_FRAME_CURRENT] = &s->frames[i];
1899  break;
1900  }
1901  if (i == 5) {
1902  av_log(avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
1903  abort();
1904  }
1905  if (curframe->tf.f->data[0])
1906  vp8_release_frame(s, curframe);
1907 
1908  // Given that arithmetic probabilities are updated every frame, it's quite likely
1909  // that the values we have on a random interframe are complete junk if we didn't
1910  // start decode on a keyframe. So just don't display anything rather than junk.
1911  if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
1912  !s->framep[VP56_FRAME_GOLDEN] ||
1913  !s->framep[VP56_FRAME_GOLDEN2])) {
1914  av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
1915  ret = AVERROR_INVALIDDATA;
1916  goto err;
1917  }
1918 
1919  curframe->tf.f->key_frame = s->keyframe;
1920  curframe->tf.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
1921  if ((ret = vp8_alloc_frame(s, curframe, referenced)) < 0)
1922  goto err;
1923 
1924  // check if golden and altref are swapped
1925  if (s->update_altref != VP56_FRAME_NONE) {
1927  } else {
1929  }
1930  if (s->update_golden != VP56_FRAME_NONE) {
1932  } else {
1934  }
1935  if (s->update_last) {
1936  s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
1937  } else {
1939  }
1940  s->next_framep[VP56_FRAME_CURRENT] = curframe;
1941 
1942  ff_thread_finish_setup(avctx);
1943 
1944  s->linesize = curframe->tf.f->linesize[0];
1945  s->uvlinesize = curframe->tf.f->linesize[1];
1946 
1947  if (!s->thread_data[0].edge_emu_buffer)
1948  for (i = 0; i < MAX_THREADS; i++)
1950 
1951  memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz));
1952  /* Zero macroblock structures for top/top-left prediction from outside the frame. */
1953  if (!s->mb_layout)
1954  memset(s->macroblocks + s->mb_height*2 - 1, 0, (s->mb_width+1)*sizeof(*s->macroblocks));
1955  if (!s->mb_layout && s->keyframe)
1956  memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width*4);
1957 
1958  // top edge of 127 for intra prediction
1959  if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1960  s->top_border[0][15] = s->top_border[0][23] = 127;
1961  s->top_border[0][31] = 127;
1962  memset(s->top_border[1], 127, s->mb_width*sizeof(*s->top_border));
1963  }
1964  memset(s->ref_count, 0, sizeof(s->ref_count));
1965 
1966 
1967  if (s->mb_layout == 1) {
1968  // Make sure the previous frame has read its segmentation map,
1969  // if we re-use the same map.
1970  if (prev_frame && s->segmentation.enabled &&
1972  ff_thread_await_progress(&prev_frame->tf, 1, 0);
1973  vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
1974  }
1975 
1976  if (avctx->active_thread_type == FF_THREAD_FRAME)
1977  num_jobs = 1;
1978  else
1979  num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
1980  s->num_jobs = num_jobs;
1981  s->curframe = curframe;
1982  s->prev_frame = prev_frame;
1983  s->mv_min.y = -MARGIN;
1984  s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
1985  for (i = 0; i < MAX_THREADS; i++) {
1986  s->thread_data[i].thread_mb_pos = 0;
1987  s->thread_data[i].wait_mb_pos = INT_MAX;
1988  }
1989  avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL, num_jobs);
1990 
1991  ff_thread_report_progress(&curframe->tf, INT_MAX, 0);
1992  memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
1993 
1994 skip_decode:
1995  // if future frames don't use the updated probabilities,
1996  // reset them to the values we saved
1997  if (!s->update_probabilities)
1998  s->prob[0] = s->prob[1];
1999 
2000  if (!s->invisible) {
2001  if ((ret = av_frame_ref(data, curframe->tf.f)) < 0)
2002  return ret;
2003  *got_frame = 1;
2004  }
2005 
2006  return avpkt->size;
2007 err:
2008  memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2009  return ret;
2010 }
2011 
2013 {
2014  VP8Context *s = avctx->priv_data;
2015  int i;
2016 
2017  vp8_decode_flush_impl(avctx, 1);
2018  for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
2019  av_frame_free(&s->frames[i].tf.f);
2020 
2021  return 0;
2022 }
2023 
2025 {
2026  int i;
2027  for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) {
2028  s->frames[i].tf.f = av_frame_alloc();
2029  if (!s->frames[i].tf.f)
2030  return AVERROR(ENOMEM);
2031  }
2032  return 0;
2033 }
2034 
2036 {
2037  VP8Context *s = avctx->priv_data;
2038  int ret;
2039 
2040  s->avctx = avctx;
2041  avctx->pix_fmt = AV_PIX_FMT_YUV420P;
2042  avctx->internal->allocate_progress = 1;
2043 
2044  ff_videodsp_init(&s->vdsp, 8);
2046  ff_vp8dsp_init(&s->vp8dsp);
2047 
2048  if ((ret = vp8_init_frames(s)) < 0) {
2049  vp8_decode_free(avctx);
2050  return ret;
2051  }
2052 
2053  return 0;
2054 }
2055 
2057 {
2058  VP8Context *s = avctx->priv_data;
2059  int ret;
2060 
2061  s->avctx = avctx;
2062 
2063  if ((ret = vp8_init_frames(s)) < 0) {
2064  vp8_decode_free(avctx);
2065  return ret;
2066  }
2067 
2068  return 0;
2069 }
2070 
2071 #define REBASE(pic) \
2072  pic ? pic - &s_src->frames[0] + &s->frames[0] : NULL
2073 
2075 {
2076  VP8Context *s = dst->priv_data, *s_src = src->priv_data;
2077  int i;
2078 
2079  if (s->macroblocks_base &&
2080  (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
2081  free_buffers(s);
2082  s->mb_width = s_src->mb_width;
2083  s->mb_height = s_src->mb_height;
2084  }
2085 
2086  s->prob[0] = s_src->prob[!s_src->update_probabilities];
2087  s->segmentation = s_src->segmentation;
2088  s->lf_delta = s_src->lf_delta;
2089  memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
2090 
2091  for (i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++) {
2092  if (s_src->frames[i].tf.f->data[0]) {
2093  int ret = vp8_ref_frame(s, &s->frames[i], &s_src->frames[i]);
2094  if (ret < 0)
2095  return ret;
2096  }
2097  }
2098 
2099  s->framep[0] = REBASE(s_src->next_framep[0]);
2100  s->framep[1] = REBASE(s_src->next_framep[1]);
2101  s->framep[2] = REBASE(s_src->next_framep[2]);
2102  s->framep[3] = REBASE(s_src->next_framep[3]);
2103 
2104  return 0;
2105 }
2106 
2107 static unsigned apply_padding(unsigned size) { return size + (size & 1); }
2108 
2109 static int webp_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
2110  AVPacket *avpkt)
2111 {
2112  const uint8_t *buf = avpkt->data;
2113  int buf_size = avpkt->size;
2114  AVPacket pkt = *avpkt;
2115 
2116  if (buf_size >= 16
2117  && AV_RL32(buf ) == AV_RL32("RIFF")
2118  && AV_RL32(buf+ 8) == AV_RL32("WEBP")) {
2119  unsigned riff_size = apply_padding(AV_RL32(buf+4)) + 8;
2120  buf += 12; // Skip over main header
2121  buf_size -= 12;
2122  if (buf_size < 8 || riff_size < 8) {
2123  av_log(avctx, AV_LOG_ERROR, "Incomplete header.\n");
2124  return AVERROR_INVALIDDATA;
2125  }
2126  if (AV_RL32(buf) == AV_RL32("VP8L")) {
2127  av_log(avctx, AV_LOG_ERROR, "Unsupported WebP lossless format.\n");
2128  return AVERROR_PATCHWELCOME;
2129  }
2130  if (AV_RL32(buf) == AV_RL32("VP8X") && AV_RL32(buf+4) < (unsigned)buf_size) {
2131  unsigned size = apply_padding(AV_RL32(buf+4) + 8);
2132  buf += size;
2133  buf_size -= size;
2134  }
2135  if (buf_size >= 8
2136  && AV_RL32(buf) == AV_RL32("ALPH") && AV_RL32(buf+4) < (unsigned)buf_size) {
2137  unsigned size = apply_padding(AV_RL32(buf+4) + 8);
2138  buf += size;
2139  buf_size -= size;
2140  av_log(avctx, AV_LOG_WARNING, "Skipping alpha plane\n");
2141  }
2142  if (buf_size >= 8 && AV_RL32(buf) == AV_RL32("VP8 ")) {
2143  buf += 8;
2144  buf_size -= 8;
2145  }
2146  }
2147  pkt.data = buf;
2148  pkt.size = buf_size;
2149 
2150  return vp8_decode_frame(avctx, data, data_size, &pkt);
2151 }
2152 
2154  .name = "vp8",
2155  .type = AVMEDIA_TYPE_VIDEO,
2156  .id = AV_CODEC_ID_VP8,
2157  .priv_data_size = sizeof(VP8Context),
2158  .init = vp8_decode_init,
2163  .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
2166 };
2167 
2169  .name = "webp",
2170  .type = AVMEDIA_TYPE_VIDEO,
2171  .id = AV_CODEC_ID_WEBP,
2172  .priv_data_size = sizeof(VP8Context),
2173  .init = vp8_decode_init,
2178  .long_name = NULL_IF_CONFIG_SMALL("WebP"),
2181 };