FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
vp8.c
Go to the documentation of this file.
1 /*
2  * VP7/VP8 compatible video decoder
3  *
4  * Copyright (C) 2010 David Conrad
5  * Copyright (C) 2010 Ronald S. Bultje
6  * Copyright (C) 2010 Fiona Glaser
7  * Copyright (C) 2012 Daniel Kang
8  * Copyright (C) 2014 Peter Ross
9  *
10  * This file is part of FFmpeg.
11  *
12  * FFmpeg is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU Lesser General Public
14  * License as published by the Free Software Foundation; either
15  * version 2.1 of the License, or (at your option) any later version.
16  *
17  * FFmpeg is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20  * Lesser General Public License for more details.
21  *
22  * You should have received a copy of the GNU Lesser General Public
23  * License along with FFmpeg; if not, write to the Free Software
24  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25  */
26 
27 #include "libavutil/imgutils.h"
28 
29 #include "avcodec.h"
30 #include "internal.h"
31 #include "rectangle.h"
32 #include "thread.h"
33 #include "vp8.h"
34 #include "vp8data.h"
35 
36 #if ARCH_ARM
37 # include "arm/vp8.h"
38 #endif
39 
40 #if CONFIG_VP7_DECODER && CONFIG_VP8_DECODER
41 #define VPX(vp7, f) (vp7 ? vp7_ ## f : vp8_ ## f)
42 #elif CONFIG_VP7_DECODER
43 #define VPX(vp7, f) vp7_ ## f
44 #else // CONFIG_VP8_DECODER
45 #define VPX(vp7, f) vp8_ ## f
46 #endif
47 
48 static void free_buffers(VP8Context *s)
49 {
50  int i;
51  if (s->thread_data)
52  for (i = 0; i < MAX_THREADS; i++) {
53 #if HAVE_THREADS
54  pthread_cond_destroy(&s->thread_data[i].cond);
56 #endif
58  }
59  av_freep(&s->thread_data);
62  av_freep(&s->top_nnz);
63  av_freep(&s->top_border);
64 
65  s->macroblocks = NULL;
66 }
67 
68 static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref)
69 {
70  int ret;
71  if ((ret = ff_thread_get_buffer(s->avctx, &f->tf,
72  ref ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
73  return ret;
74  if (!(f->seg_map = av_buffer_allocz(s->mb_width * s->mb_height))) {
76  return AVERROR(ENOMEM);
77  }
78  return 0;
79 }
80 
82 {
85 }
86 
87 #if CONFIG_VP8_DECODER
88 static int vp8_ref_frame(VP8Context *s, VP8Frame *dst, VP8Frame *src)
89 {
90  int ret;
91 
92  vp8_release_frame(s, dst);
93 
94  if ((ret = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0)
95  return ret;
96  if (src->seg_map &&
97  !(dst->seg_map = av_buffer_ref(src->seg_map))) {
98  vp8_release_frame(s, dst);
99  return AVERROR(ENOMEM);
100  }
101 
102  return 0;
103 }
104 #endif /* CONFIG_VP8_DECODER */
105 
106 static void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem)
107 {
108  VP8Context *s = avctx->priv_data;
109  int i;
110 
111  for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
112  vp8_release_frame(s, &s->frames[i]);
113  memset(s->framep, 0, sizeof(s->framep));
114 
115  if (free_mem)
116  free_buffers(s);
117 }
118 
119 static void vp8_decode_flush(AVCodecContext *avctx)
120 {
121  vp8_decode_flush_impl(avctx, 0);
122 }
123 
125 {
126  VP8Frame *frame = NULL;
127  int i;
128 
129  // find a free buffer
130  for (i = 0; i < 5; i++)
131  if (&s->frames[i] != s->framep[VP56_FRAME_CURRENT] &&
132  &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
133  &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
134  &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
135  frame = &s->frames[i];
136  break;
137  }
138  if (i == 5) {
139  av_log(s->avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
140  abort();
141  }
142  if (frame->tf.f->data[0])
143  vp8_release_frame(s, frame);
144 
145  return frame;
146 }
147 
148 static av_always_inline
149 int update_dimensions(VP8Context *s, int width, int height, int is_vp7)
150 {
151  AVCodecContext *avctx = s->avctx;
152  int i, ret;
153 
154  if (width != s->avctx->width || ((width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) && s->macroblocks_base ||
155  height != s->avctx->height) {
157 
158  ret = ff_set_dimensions(s->avctx, width, height);
159  if (ret < 0)
160  return ret;
161  }
162 
163  s->mb_width = (s->avctx->coded_width + 15) / 16;
164  s->mb_height = (s->avctx->coded_height + 15) / 16;
165 
166  s->mb_layout = is_vp7 || avctx->active_thread_type == FF_THREAD_SLICE &&
167  FFMIN(s->num_coeff_partitions, avctx->thread_count) > 1;
168  if (!s->mb_layout) { // Frame threading and one thread
169  s->macroblocks_base = av_mallocz((s->mb_width + s->mb_height * 2 + 1) *
170  sizeof(*s->macroblocks));
172  } else // Sliced threading
173  s->macroblocks_base = av_mallocz((s->mb_width + 2) * (s->mb_height + 2) *
174  sizeof(*s->macroblocks));
175  s->top_nnz = av_mallocz(s->mb_width * sizeof(*s->top_nnz));
176  s->top_border = av_mallocz((s->mb_width + 1) * sizeof(*s->top_border));
178 
179  for (i = 0; i < MAX_THREADS; i++) {
181  av_mallocz(s->mb_width * sizeof(*s->thread_data[0].filter_strength));
182 #if HAVE_THREADS
183  pthread_mutex_init(&s->thread_data[i].lock, NULL);
184  pthread_cond_init(&s->thread_data[i].cond, NULL);
185 #endif
186  }
187 
188  if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
189  (!s->intra4x4_pred_mode_top && !s->mb_layout))
190  return AVERROR(ENOMEM);
191 
192  s->macroblocks = s->macroblocks_base + 1;
193 
194  return 0;
195 }
196 
198 {
199  return update_dimensions(s, width, height, IS_VP7);
200 }
201 
203 {
204  return update_dimensions(s, width, height, IS_VP8);
205 }
206 
207 
209 {
210  VP56RangeCoder *c = &s->c;
211  int i;
212 
214 
215  if (vp8_rac_get(c)) { // update segment feature data
217 
218  for (i = 0; i < 4; i++)
220 
221  for (i = 0; i < 4; i++)
223  }
224  if (s->segmentation.update_map)
225  for (i = 0; i < 3; i++)
226  s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
227 }
228 
230 {
231  VP56RangeCoder *c = &s->c;
232  int i;
233 
234  for (i = 0; i < 4; i++) {
235  if (vp8_rac_get(c)) {
236  s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
237 
238  if (vp8_rac_get(c))
239  s->lf_delta.ref[i] = -s->lf_delta.ref[i];
240  }
241  }
242 
243  for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
244  if (vp8_rac_get(c)) {
245  s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);
246 
247  if (vp8_rac_get(c))
248  s->lf_delta.mode[i] = -s->lf_delta.mode[i];
249  }
250  }
251 }
252 
253 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
254 {
255  const uint8_t *sizes = buf;
256  int i;
257 
258  s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
259 
260  buf += 3 * (s->num_coeff_partitions - 1);
261  buf_size -= 3 * (s->num_coeff_partitions - 1);
262  if (buf_size < 0)
263  return -1;
264 
265  for (i = 0; i < s->num_coeff_partitions - 1; i++) {
266  int size = AV_RL24(sizes + 3 * i);
267  if (buf_size - size < 0)
268  return -1;
269 
270  ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
271  buf += size;
272  buf_size -= size;
273  }
274  ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
275 
276  return 0;
277 }
278 
279 static void vp7_get_quants(VP8Context *s)
280 {
281  VP56RangeCoder *c = &s->c;
282 
283  int yac_qi = vp8_rac_get_uint(c, 7);
284  int ydc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
285  int y2dc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
286  int y2ac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
287  int uvdc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
288  int uvac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
289 
290  s->qmat[0].luma_qmul[0] = vp7_ydc_qlookup[ydc_qi];
291  s->qmat[0].luma_qmul[1] = vp7_yac_qlookup[yac_qi];
292  s->qmat[0].luma_dc_qmul[0] = vp7_y2dc_qlookup[y2dc_qi];
293  s->qmat[0].luma_dc_qmul[1] = vp7_y2ac_qlookup[y2ac_qi];
294  s->qmat[0].chroma_qmul[0] = FFMIN(vp7_ydc_qlookup[uvdc_qi], 132);
295  s->qmat[0].chroma_qmul[1] = vp7_yac_qlookup[uvac_qi];
296 }
297 
298 static void vp8_get_quants(VP8Context *s)
299 {
300  VP56RangeCoder *c = &s->c;
301  int i, base_qi;
302 
303  int yac_qi = vp8_rac_get_uint(c, 7);
304  int ydc_delta = vp8_rac_get_sint(c, 4);
305  int y2dc_delta = vp8_rac_get_sint(c, 4);
306  int y2ac_delta = vp8_rac_get_sint(c, 4);
307  int uvdc_delta = vp8_rac_get_sint(c, 4);
308  int uvac_delta = vp8_rac_get_sint(c, 4);
309 
310  for (i = 0; i < 4; i++) {
311  if (s->segmentation.enabled) {
312  base_qi = s->segmentation.base_quant[i];
313  if (!s->segmentation.absolute_vals)
314  base_qi += yac_qi;
315  } else
316  base_qi = yac_qi;
317 
318  s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta, 7)];
319  s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi, 7)];
320  s->qmat[i].luma_dc_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)] * 2;
321  /* 101581>>16 is equivalent to 155/100 */
322  s->qmat[i].luma_dc_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)] * 101581 >> 16;
323  s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
324  s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
325 
326  s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
327  s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
328  }
329 }
330 
331 /**
332  * Determine which buffers golden and altref should be updated with after this frame.
333  * The spec isn't clear here, so I'm going by my understanding of what libvpx does
334  *
335  * Intra frames update all 3 references
336  * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
337  * If the update (golden|altref) flag is set, it's updated with the current frame
338  * if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
339  * If the flag is not set, the number read means:
340  * 0: no update
341  * 1: VP56_FRAME_PREVIOUS
342  * 2: update golden with altref, or update altref with golden
343  */
345 {
346  VP56RangeCoder *c = &s->c;
347 
348  if (update)
349  return VP56_FRAME_CURRENT;
350 
351  switch (vp8_rac_get_uint(c, 2)) {
352  case 1:
353  return VP56_FRAME_PREVIOUS;
354  case 2:
356  }
357  return VP56_FRAME_NONE;
358 }
359 
361 {
362  int i, j;
363  for (i = 0; i < 4; i++)
364  for (j = 0; j < 16; j++)
365  memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
366  sizeof(s->prob->token[i][j]));
367 }
368 
370 {
371  VP56RangeCoder *c = &s->c;
372  int i, j, k, l, m;
373 
374  for (i = 0; i < 4; i++)
375  for (j = 0; j < 8; j++)
376  for (k = 0; k < 3; k++)
377  for (l = 0; l < NUM_DCT_TOKENS-1; l++)
379  int prob = vp8_rac_get_uint(c, 8);
380  for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
381  s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
382  }
383 }
384 
385 #define VP7_MVC_SIZE 17
386 #define VP8_MVC_SIZE 19
387 
389  int mvc_size)
390 {
391  VP56RangeCoder *c = &s->c;
392  int i, j;
393 
394  if (vp8_rac_get(c))
395  for (i = 0; i < 4; i++)
396  s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
397  if (vp8_rac_get(c))
398  for (i = 0; i < 3; i++)
399  s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8);
400 
401  // 17.2 MV probability update
402  for (i = 0; i < 2; i++)
403  for (j = 0; j < mvc_size; j++)
405  s->prob->mvc[i][j] = vp8_rac_get_nn(c);
406 }
407 
408 static void update_refs(VP8Context *s)
409 {
410  VP56RangeCoder *c = &s->c;
411 
412  int update_golden = vp8_rac_get(c);
413  int update_altref = vp8_rac_get(c);
414 
415  s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
416  s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
417 }
418 
419 static void copy_chroma(AVFrame *dst, AVFrame *src, int width, int height)
420 {
421  int i, j;
422 
423  for (j = 1; j < 3; j++) {
424  for (i = 0; i < height / 2; i++)
425  memcpy(dst->data[j] + i * dst->linesize[j],
426  src->data[j] + i * src->linesize[j], width / 2);
427  }
428 }
429 
430 static void fade(uint8_t *dst, int dst_linesize,
431  const uint8_t *src, int src_linesize,
432  int width, int height,
433  int alpha, int beta)
434 {
435  int i, j;
436  for (j = 0; j < height; j++) {
437  for (i = 0; i < width; i++) {
438  uint8_t y = src[j * src_linesize + i];
439  dst[j * dst_linesize + i] = av_clip_uint8(y + ((y * beta) >> 8) + alpha);
440  }
441  }
442 }
443 
445 {
446  int alpha = (int8_t) vp8_rac_get_uint(c, 8);
447  int beta = (int8_t) vp8_rac_get_uint(c, 8);
448  int ret;
449 
450  if (!s->keyframe && (alpha || beta)) {
451  int width = s->mb_width * 16;
452  int height = s->mb_height * 16;
453  AVFrame *src, *dst;
454 
455  if (!s->framep[VP56_FRAME_PREVIOUS] ||
456  !s->framep[VP56_FRAME_GOLDEN]) {
457  av_log(s->avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
458  return AVERROR_INVALIDDATA;
459  }
460 
461  dst =
462  src = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
463 
464  /* preserve the golden frame, write a new previous frame */
467  if ((ret = vp8_alloc_frame(s, s->framep[VP56_FRAME_PREVIOUS], 1)) < 0)
468  return ret;
469 
470  dst = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
471 
472  copy_chroma(dst, src, width, height);
473  }
474 
475  fade(dst->data[0], dst->linesize[0],
476  src->data[0], src->linesize[0],
477  width, height, alpha, beta);
478  }
479 
480  return 0;
481 }
482 
483 static int vp7_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
484 {
485  VP56RangeCoder *c = &s->c;
486  int part1_size, hscale, vscale, i, j, ret;
487  int width = s->avctx->width;
488  int height = s->avctx->height;
489 
490  s->profile = (buf[0] >> 1) & 7;
491  if (s->profile > 1) {
492  avpriv_request_sample(s->avctx, "Unknown profile %d", s->profile);
493  return AVERROR_INVALIDDATA;
494  }
495 
496  s->keyframe = !(buf[0] & 1);
497  s->invisible = 0;
498  part1_size = AV_RL24(buf) >> 4;
499 
500  if (buf_size < 4 - s->profile + part1_size) {
501  av_log(s->avctx, AV_LOG_ERROR, "Buffer size %d is too small, needed : %d\n", buf_size, 4 - s->profile + part1_size);
502  return AVERROR_INVALIDDATA;
503  }
504 
505  buf += 4 - s->profile;
506  buf_size -= 4 - s->profile;
507 
508  memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
509 
510  ff_vp56_init_range_decoder(c, buf, part1_size);
511  buf += part1_size;
512  buf_size -= part1_size;
513 
514  /* A. Dimension information (keyframes only) */
515  if (s->keyframe) {
516  width = vp8_rac_get_uint(c, 12);
517  height = vp8_rac_get_uint(c, 12);
518  hscale = vp8_rac_get_uint(c, 2);
519  vscale = vp8_rac_get_uint(c, 2);
520  if (hscale || vscale)
521  avpriv_request_sample(s->avctx, "Upscaling");
522 
526  sizeof(s->prob->pred16x16));
528  sizeof(s->prob->pred8x8c));
529  for (i = 0; i < 2; i++)
530  memcpy(s->prob->mvc[i], vp7_mv_default_prob[i],
531  sizeof(vp7_mv_default_prob[i]));
532  memset(&s->segmentation, 0, sizeof(s->segmentation));
533  memset(&s->lf_delta, 0, sizeof(s->lf_delta));
534  memcpy(s->prob[0].scan, zigzag_scan, sizeof(s->prob[0].scan));
535  }
536 
537  if (s->keyframe || s->profile > 0)
538  memset(s->inter_dc_pred, 0 , sizeof(s->inter_dc_pred));
539 
540  /* B. Decoding information for all four macroblock-level features */
541  for (i = 0; i < 4; i++) {
542  s->feature_enabled[i] = vp8_rac_get(c);
543  if (s->feature_enabled[i]) {
545 
546  for (j = 0; j < 3; j++)
547  s->feature_index_prob[i][j] =
548  vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
549 
550  if (vp7_feature_value_size[s->profile][i])
551  for (j = 0; j < 4; j++)
552  s->feature_value[i][j] =
554  }
555  }
556 
557  s->segmentation.enabled = 0;
558  s->segmentation.update_map = 0;
559  s->lf_delta.enabled = 0;
560 
561  s->num_coeff_partitions = 1;
562  ff_vp56_init_range_decoder(&s->coeff_partition[0], buf, buf_size);
563 
564  if (!s->macroblocks_base || /* first frame */
565  width != s->avctx->width || height != s->avctx->height ||
566  (width + 15) / 16 != s->mb_width || (height + 15) / 16 != s->mb_height) {
567  if ((ret = vp7_update_dimensions(s, width, height)) < 0)
568  return ret;
569  }
570 
571  /* C. Dequantization indices */
572  vp7_get_quants(s);
573 
574  /* D. Golden frame update flag (a Flag) for interframes only */
575  if (!s->keyframe) {
578  }
579 
580  s->update_last = 1;
581  s->update_probabilities = 1;
582  s->fade_present = 1;
583 
584  if (s->profile > 0) {
586  if (!s->update_probabilities)
587  s->prob[1] = s->prob[0];
588 
589  if (!s->keyframe)
590  s->fade_present = vp8_rac_get(c);
591  }
592 
593  /* E. Fading information for previous frame */
594  if (s->fade_present && vp8_rac_get(c)) {
595  if ((ret = vp7_fade_frame(s ,c)) < 0)
596  return ret;
597  }
598 
599  /* F. Loop filter type */
600  if (!s->profile)
601  s->filter.simple = vp8_rac_get(c);
602 
603  /* G. DCT coefficient ordering specification */
604  if (vp8_rac_get(c))
605  for (i = 1; i < 16; i++)
606  s->prob[0].scan[i] = zigzag_scan[vp8_rac_get_uint(c, 4)];
607 
608  /* H. Loop filter levels */
609  if (s->profile > 0)
610  s->filter.simple = vp8_rac_get(c);
611  s->filter.level = vp8_rac_get_uint(c, 6);
612  s->filter.sharpness = vp8_rac_get_uint(c, 3);
613 
614  /* I. DCT coefficient probability update; 13.3 Token Probability Updates */
616 
617  s->mbskip_enabled = 0;
618 
619  /* J. The remaining frame header data occurs ONLY FOR INTERFRAMES */
620  if (!s->keyframe) {
621  s->prob->intra = vp8_rac_get_uint(c, 8);
622  s->prob->last = vp8_rac_get_uint(c, 8);
624  }
625 
626  return 0;
627 }
628 
629 static int vp8_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
630 {
631  VP56RangeCoder *c = &s->c;
632  int header_size, hscale, vscale, ret;
633  int width = s->avctx->width;
634  int height = s->avctx->height;
635 
636  s->keyframe = !(buf[0] & 1);
637  s->profile = (buf[0]>>1) & 7;
638  s->invisible = !(buf[0] & 0x10);
639  header_size = AV_RL24(buf) >> 5;
640  buf += 3;
641  buf_size -= 3;
642 
643  if (s->profile > 3)
644  av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
645 
646  if (!s->profile)
648  sizeof(s->put_pixels_tab));
649  else // profile 1-3 use bilinear, 4+ aren't defined so whatever
651  sizeof(s->put_pixels_tab));
652 
653  if (header_size > buf_size - 7 * s->keyframe) {
654  av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
655  return AVERROR_INVALIDDATA;
656  }
657 
658  if (s->keyframe) {
659  if (AV_RL24(buf) != 0x2a019d) {
661  "Invalid start code 0x%x\n", AV_RL24(buf));
662  return AVERROR_INVALIDDATA;
663  }
664  width = AV_RL16(buf + 3) & 0x3fff;
665  height = AV_RL16(buf + 5) & 0x3fff;
666  hscale = buf[4] >> 6;
667  vscale = buf[6] >> 6;
668  buf += 7;
669  buf_size -= 7;
670 
671  if (hscale || vscale)
672  avpriv_request_sample(s->avctx, "Upscaling");
673 
677  sizeof(s->prob->pred16x16));
679  sizeof(s->prob->pred8x8c));
680  memcpy(s->prob->mvc, vp8_mv_default_prob,
681  sizeof(s->prob->mvc));
682  memset(&s->segmentation, 0, sizeof(s->segmentation));
683  memset(&s->lf_delta, 0, sizeof(s->lf_delta));
684  }
685 
686  ff_vp56_init_range_decoder(c, buf, header_size);
687  buf += header_size;
688  buf_size -= header_size;
689 
690  if (s->keyframe) {
691  if (vp8_rac_get(c))
692  av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
693  vp8_rac_get(c); // whether we can skip clamping in dsp functions
694  }
695 
696  if ((s->segmentation.enabled = vp8_rac_get(c)))
698  else
699  s->segmentation.update_map = 0; // FIXME: move this to some init function?
700 
701  s->filter.simple = vp8_rac_get(c);
702  s->filter.level = vp8_rac_get_uint(c, 6);
703  s->filter.sharpness = vp8_rac_get_uint(c, 3);
704 
705  if ((s->lf_delta.enabled = vp8_rac_get(c)))
706  if (vp8_rac_get(c))
707  update_lf_deltas(s);
708 
709  if (setup_partitions(s, buf, buf_size)) {
710  av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
711  return AVERROR_INVALIDDATA;
712  }
713 
714  if (!s->macroblocks_base || /* first frame */
715  width != s->avctx->width || height != s->avctx->height ||
716  (width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height)
717  if ((ret = vp8_update_dimensions(s, width, height)) < 0)
718  return ret;
719 
720  vp8_get_quants(s);
721 
722  if (!s->keyframe) {
723  update_refs(s);
725  s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
726  }
727 
728  // if we aren't saving this frame's probabilities for future frames,
729  // make a copy of the current probabilities
730  if (!(s->update_probabilities = vp8_rac_get(c)))
731  s->prob[1] = s->prob[0];
732 
733  s->update_last = s->keyframe || vp8_rac_get(c);
734 
736 
737  if ((s->mbskip_enabled = vp8_rac_get(c)))
738  s->prob->mbskip = vp8_rac_get_uint(c, 8);
739 
740  if (!s->keyframe) {
741  s->prob->intra = vp8_rac_get_uint(c, 8);
742  s->prob->last = vp8_rac_get_uint(c, 8);
743  s->prob->golden = vp8_rac_get_uint(c, 8);
745  }
746 
747  return 0;
748 }
749 
750 static av_always_inline
751 void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
752 {
753  dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x);
754  dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y);
755 }
756 
757 /**
758  * Motion vector coding, 17.1.
759  */
761 {
762  int bit, x = 0;
763 
764  if (vp56_rac_get_prob_branchy(c, p[0])) {
765  int i;
766 
767  for (i = 0; i < 3; i++)
768  x += vp56_rac_get_prob(c, p[9 + i]) << i;
769  for (i = (vp7 ? 7 : 9); i > 3; i--)
770  x += vp56_rac_get_prob(c, p[9 + i]) << i;
771  if (!(x & (vp7 ? 0xF0 : 0xFFF0)) || vp56_rac_get_prob(c, p[12]))
772  x += 8;
773  } else {
774  // small_mvtree
775  const uint8_t *ps = p + 2;
776  bit = vp56_rac_get_prob(c, *ps);
777  ps += 1 + 3 * bit;
778  x += 4 * bit;
779  bit = vp56_rac_get_prob(c, *ps);
780  ps += 1 + bit;
781  x += 2 * bit;
782  x += vp56_rac_get_prob(c, *ps);
783  }
784 
785  return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
786 }
787 
789 {
790  return read_mv_component(c, p, 1);
791 }
792 
794 {
795  return read_mv_component(c, p, 0);
796 }
797 
798 static av_always_inline
799 const uint8_t *get_submv_prob(uint32_t left, uint32_t top, int is_vp7)
800 {
801  if (is_vp7)
802  return vp7_submv_prob;
803 
804  if (left == top)
805  return vp8_submv_prob[4 - !!left];
806  if (!top)
807  return vp8_submv_prob[2];
808  return vp8_submv_prob[1 - !!left];
809 }
810 
811 /**
812  * Split motion vector prediction, 16.4.
813  * @returns the number of motion vectors parsed (2, 4 or 16)
814  */
815 static av_always_inline
817  int layout, int is_vp7)
818 {
819  int part_idx;
820  int n, num;
821  VP8Macroblock *top_mb;
822  VP8Macroblock *left_mb = &mb[-1];
823  const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning];
824  const uint8_t *mbsplits_top, *mbsplits_cur, *firstidx;
825  VP56mv *top_mv;
826  VP56mv *left_mv = left_mb->bmv;
827  VP56mv *cur_mv = mb->bmv;
828 
829  if (!layout) // layout is inlined, s->mb_layout is not
830  top_mb = &mb[2];
831  else
832  top_mb = &mb[-s->mb_width - 1];
833  mbsplits_top = vp8_mbsplits[top_mb->partitioning];
834  top_mv = top_mb->bmv;
835 
839  else
840  part_idx = VP8_SPLITMVMODE_8x8;
841  } else {
842  part_idx = VP8_SPLITMVMODE_4x4;
843  }
844 
845  num = vp8_mbsplit_count[part_idx];
846  mbsplits_cur = vp8_mbsplits[part_idx],
847  firstidx = vp8_mbfirstidx[part_idx];
848  mb->partitioning = part_idx;
849 
850  for (n = 0; n < num; n++) {
851  int k = firstidx[n];
852  uint32_t left, above;
853  const uint8_t *submv_prob;
854 
855  if (!(k & 3))
856  left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
857  else
858  left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
859  if (k <= 3)
860  above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
861  else
862  above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
863 
864  submv_prob = get_submv_prob(left, above, is_vp7);
865 
866  if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
867  if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
868  if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
869  mb->bmv[n].y = mb->mv.y +
870  read_mv_component(c, s->prob->mvc[0], is_vp7);
871  mb->bmv[n].x = mb->mv.x +
872  read_mv_component(c, s->prob->mvc[1], is_vp7);
873  } else {
874  AV_ZERO32(&mb->bmv[n]);
875  }
876  } else {
877  AV_WN32A(&mb->bmv[n], above);
878  }
879  } else {
880  AV_WN32A(&mb->bmv[n], left);
881  }
882  }
883 
884  return num;
885 }
886 
887 /**
888  * The vp7 reference decoder uses a padding macroblock column (added to right
889  * edge of the frame) to guard against illegal macroblock offsets. The
890  * algorithm has bugs that permit offsets to straddle the padding column.
891  * This function replicates those bugs.
892  *
893  * @param[out] edge_x macroblock x address
894  * @param[out] edge_y macroblock y address
895  *
896  * @return macroblock offset legal (boolean)
897  */
898 static int vp7_calculate_mb_offset(int mb_x, int mb_y, int mb_width,
899  int xoffset, int yoffset, int boundary,
900  int *edge_x, int *edge_y)
901 {
902  int vwidth = mb_width + 1;
903  int new = (mb_y + yoffset) * vwidth + mb_x + xoffset;
904  if (new < boundary || new % vwidth == vwidth - 1)
905  return 0;
906  *edge_y = new / vwidth;
907  *edge_x = new % vwidth;
908  return 1;
909 }
910 
911 static const VP56mv *get_bmv_ptr(const VP8Macroblock *mb, int subblock)
912 {
913  return &mb->bmv[mb->mode == VP8_MVMODE_SPLIT ? vp8_mbsplits[mb->partitioning][subblock] : 0];
914 }
915 
916 static av_always_inline
918  int mb_x, int mb_y, int layout)
919 {
920  VP8Macroblock *mb_edge[12];
921  enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR };
922  enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
923  int idx = CNT_ZERO;
924  VP56mv near_mv[3];
925  uint8_t cnt[3] = { 0 };
926  VP56RangeCoder *c = &s->c;
927  int i;
928 
929  AV_ZERO32(&near_mv[0]);
930  AV_ZERO32(&near_mv[1]);
931  AV_ZERO32(&near_mv[2]);
932 
933  for (i = 0; i < VP7_MV_PRED_COUNT; i++) {
934  const VP7MVPred * pred = &vp7_mv_pred[i];
935  int edge_x, edge_y;
936 
937  if (vp7_calculate_mb_offset(mb_x, mb_y, s->mb_width, pred->xoffset,
938  pred->yoffset, !s->profile, &edge_x, &edge_y)) {
939  VP8Macroblock *edge = mb_edge[i] = (s->mb_layout == 1)
940  ? s->macroblocks_base + 1 + edge_x +
941  (s->mb_width + 1) * (edge_y + 1)
942  : s->macroblocks + edge_x +
943  (s->mb_height - edge_y - 1) * 2;
944  uint32_t mv = AV_RN32A(get_bmv_ptr(edge, vp7_mv_pred[i].subblock));
945  if (mv) {
946  if (AV_RN32A(&near_mv[CNT_NEAREST])) {
947  if (mv == AV_RN32A(&near_mv[CNT_NEAREST])) {
948  idx = CNT_NEAREST;
949  } else if (AV_RN32A(&near_mv[CNT_NEAR])) {
950  if (mv != AV_RN32A(&near_mv[CNT_NEAR]))
951  continue;
952  idx = CNT_NEAR;
953  } else {
954  AV_WN32A(&near_mv[CNT_NEAR], mv);
955  idx = CNT_NEAR;
956  }
957  } else {
958  AV_WN32A(&near_mv[CNT_NEAREST], mv);
959  idx = CNT_NEAREST;
960  }
961  } else {
962  idx = CNT_ZERO;
963  }
964  } else {
965  idx = CNT_ZERO;
966  }
967  cnt[idx] += vp7_mv_pred[i].score;
968  }
969 
971 
972  if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_ZERO]][0])) {
973  mb->mode = VP8_MVMODE_MV;
974 
975  if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAREST]][1])) {
976 
977  if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][2])) {
978 
979  if (cnt[CNT_NEAREST] > cnt[CNT_NEAR])
980  AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAREST] ? 0 : AV_RN32A(&near_mv[CNT_NEAREST]));
981  else
982  AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAR] ? 0 : AV_RN32A(&near_mv[CNT_NEAR]));
983 
984  if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][3])) {
985  mb->mode = VP8_MVMODE_SPLIT;
986  mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP7) - 1];
987  } else {
988  mb->mv.y += vp7_read_mv_component(c, s->prob->mvc[0]);
989  mb->mv.x += vp7_read_mv_component(c, s->prob->mvc[1]);
990  mb->bmv[0] = mb->mv;
991  }
992  } else {
993  mb->mv = near_mv[CNT_NEAR];
994  mb->bmv[0] = mb->mv;
995  }
996  } else {
997  mb->mv = near_mv[CNT_NEAREST];
998  mb->bmv[0] = mb->mv;
999  }
1000  } else {
1001  mb->mode = VP8_MVMODE_ZERO;
1002  AV_ZERO32(&mb->mv);
1003  mb->bmv[0] = mb->mv;
1004  }
1005 }
1006 
1007 static av_always_inline
1009  int mb_x, int mb_y, int layout)
1010 {
1011  VP8Macroblock *mb_edge[3] = { 0 /* top */,
1012  mb - 1 /* left */,
1013  0 /* top-left */ };
1014  enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
1015  enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
1016  int idx = CNT_ZERO;
1017  int cur_sign_bias = s->sign_bias[mb->ref_frame];
1018  int8_t *sign_bias = s->sign_bias;
1019  VP56mv near_mv[4];
1020  uint8_t cnt[4] = { 0 };
1021  VP56RangeCoder *c = &s->c;
1022 
1023  if (!layout) { // layout is inlined (s->mb_layout is not)
1024  mb_edge[0] = mb + 2;
1025  mb_edge[2] = mb + 1;
1026  } else {
1027  mb_edge[0] = mb - s->mb_width - 1;
1028  mb_edge[2] = mb - s->mb_width - 2;
1029  }
1030 
1031  AV_ZERO32(&near_mv[0]);
1032  AV_ZERO32(&near_mv[1]);
1033  AV_ZERO32(&near_mv[2]);
1034 
1035  /* Process MB on top, left and top-left */
1036 #define MV_EDGE_CHECK(n) \
1037  { \
1038  VP8Macroblock *edge = mb_edge[n]; \
1039  int edge_ref = edge->ref_frame; \
1040  if (edge_ref != VP56_FRAME_CURRENT) { \
1041  uint32_t mv = AV_RN32A(&edge->mv); \
1042  if (mv) { \
1043  if (cur_sign_bias != sign_bias[edge_ref]) { \
1044  /* SWAR negate of the values in mv. */ \
1045  mv = ~mv; \
1046  mv = ((mv & 0x7fff7fff) + \
1047  0x00010001) ^ (mv & 0x80008000); \
1048  } \
1049  if (!n || mv != AV_RN32A(&near_mv[idx])) \
1050  AV_WN32A(&near_mv[++idx], mv); \
1051  cnt[idx] += 1 + (n != 2); \
1052  } else \
1053  cnt[CNT_ZERO] += 1 + (n != 2); \
1054  } \
1055  }
1056 
1057  MV_EDGE_CHECK(0)
1058  MV_EDGE_CHECK(1)
1059  MV_EDGE_CHECK(2)
1060 
1062  if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
1063  mb->mode = VP8_MVMODE_MV;
1064 
1065  /* If we have three distinct MVs, merge first and last if they're the same */
1066  if (cnt[CNT_SPLITMV] &&
1067  AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
1068  cnt[CNT_NEAREST] += 1;
1069 
1070  /* Swap near and nearest if necessary */
1071  if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
1072  FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
1073  FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
1074  }
1075 
1076  if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
1077  if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
1078  /* Choose the best mv out of 0,0 and the nearest mv */
1079  clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
1080  cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
1081  (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
1082  (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
1083 
1084  if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
1085  mb->mode = VP8_MVMODE_SPLIT;
1086  mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP8) - 1];
1087  } else {
1088  mb->mv.y += vp8_read_mv_component(c, s->prob->mvc[0]);
1089  mb->mv.x += vp8_read_mv_component(c, s->prob->mvc[1]);
1090  mb->bmv[0] = mb->mv;
1091  }
1092  } else {
1093  clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
1094  mb->bmv[0] = mb->mv;
1095  }
1096  } else {
1097  clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
1098  mb->bmv[0] = mb->mv;
1099  }
1100  } else {
1101  mb->mode = VP8_MVMODE_ZERO;
1102  AV_ZERO32(&mb->mv);
1103  mb->bmv[0] = mb->mv;
1104  }
1105 }
1106 
1107 static av_always_inline
1109  int mb_x, int keyframe, int layout)
1110 {
1111  uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1112 
1113  if (layout) {
1114  VP8Macroblock *mb_top = mb - s->mb_width - 1;
1115  memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
1116  }
1117  if (keyframe) {
1118  int x, y;
1119  uint8_t *top;
1120  uint8_t *const left = s->intra4x4_pred_mode_left;
1121  if (layout)
1122  top = mb->intra4x4_pred_mode_top;
1123  else
1124  top = s->intra4x4_pred_mode_top + 4 * mb_x;
1125  for (y = 0; y < 4; y++) {
1126  for (x = 0; x < 4; x++) {
1127  const uint8_t *ctx;
1128  ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
1129  *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
1130  left[y] = top[x] = *intra4x4;
1131  intra4x4++;
1132  }
1133  }
1134  } else {
1135  int i;
1136  for (i = 0; i < 16; i++)
1137  intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree,
1139  }
1140 }
1141 
1142 static av_always_inline
1143 void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
1144  uint8_t *segment, uint8_t *ref, int layout, int is_vp7)
1145 {
1146  VP56RangeCoder *c = &s->c;
1147  const char *vp7_feature_name[] = { "q-index",
1148  "lf-delta",
1149  "partial-golden-update",
1150  "blit-pitch" };
1151  if (is_vp7) {
1152  int i;
1153  *segment = 0;
1154  for (i = 0; i < 4; i++) {
1155  if (s->feature_enabled[i]) {
1158  s->feature_index_prob[i]);
1160  "Feature %s present in macroblock (value 0x%x)\n",
1161  vp7_feature_name[i], s->feature_value[i][index]);
1162  }
1163  }
1164  }
1165  } else if (s->segmentation.update_map) {
1166  int bit = vp56_rac_get_prob(c, s->prob->segmentid[0]);
1167  *segment = vp56_rac_get_prob(c, s->prob->segmentid[1+bit]) + 2*bit;
1168  } else if (s->segmentation.enabled)
1169  *segment = ref ? *ref : *segment;
1170  mb->segment = *segment;
1171 
1172  mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
1173 
1174  if (s->keyframe) {
1177 
1178  if (mb->mode == MODE_I4x4) {
1179  decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
1180  } else {
1181  const uint32_t modes = (is_vp7 ? vp7_pred4x4_mode
1182  : vp8_pred4x4_mode)[mb->mode] * 0x01010101u;
1183  if (s->mb_layout)
1184  AV_WN32A(mb->intra4x4_pred_mode_top, modes);
1185  else
1186  AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
1187  AV_WN32A(s->intra4x4_pred_mode_left, modes);
1188  }
1189 
1193  } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
1194  // inter MB, 16.2
1195  if (vp56_rac_get_prob_branchy(c, s->prob->last))
1196  mb->ref_frame =
1197  (!is_vp7 && vp56_rac_get_prob(c, s->prob->golden)) ? VP56_FRAME_GOLDEN2 /* altref */
1199  else
1201  s->ref_count[mb->ref_frame - 1]++;
1202 
1203  // motion vectors, 16.3
1204  if (is_vp7)
1205  vp7_decode_mvs(s, mb, mb_x, mb_y, layout);
1206  else
1207  vp8_decode_mvs(s, mb, mb_x, mb_y, layout);
1208  } else {
1209  // intra MB, 16.1
1211 
1212  if (mb->mode == MODE_I4x4)
1213  decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
1214 
1216  s->prob->pred8x8c);
1219  AV_ZERO32(&mb->bmv[0]);
1220  }
1221 }
1222 
1223 /**
1224  * @param r arithmetic bitstream reader context
1225  * @param block destination for block coefficients
1226  * @param probs probabilities to use when reading trees from the bitstream
1227  * @param i initial coeff index, 0 unless a separate DC block is coded
1228  * @param qmul array holding the dc/ac dequant factor at position 0/1
1229  *
1230  * @return 0 if no coeffs were decoded
1231  * otherwise, the index of the last coeff decoded plus one
1232  */
1233 static av_always_inline
1235  uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1236  int i, uint8_t *token_prob, int16_t qmul[2],
1237  const uint8_t scan[16], int vp7)
1238 {
1239  VP56RangeCoder c = *r;
1240  goto skip_eob;
1241  do {
1242  int coeff;
1243 restart:
1244  if (!vp56_rac_get_prob_branchy(&c, token_prob[0])) // DCT_EOB
1245  break;
1246 
1247 skip_eob:
1248  if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
1249  if (++i == 16)
1250  break; // invalid input; blocks should end with EOB
1251  token_prob = probs[i][0];
1252  if (vp7)
1253  goto restart;
1254  goto skip_eob;
1255  }
1256 
1257  if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
1258  coeff = 1;
1259  token_prob = probs[i + 1][1];
1260  } else {
1261  if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
1262  coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
1263  if (coeff)
1264  coeff += vp56_rac_get_prob(&c, token_prob[5]);
1265  coeff += 2;
1266  } else {
1267  // DCT_CAT*
1268  if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
1269  if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
1270  coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
1271  } else { // DCT_CAT2
1272  coeff = 7;
1273  coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
1274  coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
1275  }
1276  } else { // DCT_CAT3 and up
1277  int a = vp56_rac_get_prob(&c, token_prob[8]);
1278  int b = vp56_rac_get_prob(&c, token_prob[9 + a]);
1279  int cat = (a << 1) + b;
1280  coeff = 3 + (8 << cat);
1281  coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
1282  }
1283  }
1284  token_prob = probs[i + 1][2];
1285  }
1286  block[scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
1287  } while (++i < 16);
1288 
1289  *r = c;
1290  return i;
1291 }
1292 
1293 static av_always_inline
1294 int inter_predict_dc(int16_t block[16], int16_t pred[2])
1295 {
1296  int16_t dc = block[0];
1297  int ret = 0;
1298 
1299  if (pred[1] > 3) {
1300  dc += pred[0];
1301  ret = 1;
1302  }
1303 
1304  if (!pred[0] | !dc | ((int32_t)pred[0] ^ (int32_t)dc) >> 31) {
1305  block[0] = pred[0] = dc;
1306  pred[1] = 0;
1307  } else {
1308  if (pred[0] == dc)
1309  pred[1]++;
1310  block[0] = pred[0] = dc;
1311  }
1312 
1313  return ret;
1314 }
1315 
1317  int16_t block[16],
1318  uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1319  int i, uint8_t *token_prob,
1320  int16_t qmul[2],
1321  const uint8_t scan[16])
1322 {
1323  return decode_block_coeffs_internal(r, block, probs, i,
1324  token_prob, qmul, scan, IS_VP7);
1325 }
1326 
1327 #ifndef vp8_decode_block_coeffs_internal
1329  int16_t block[16],
1330  uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1331  int i, uint8_t *token_prob,
1332  int16_t qmul[2])
1333 {
1334  return decode_block_coeffs_internal(r, block, probs, i,
1335  token_prob, qmul, zigzag_scan, IS_VP8);
1336 }
1337 #endif
1338 
1339 /**
1340  * @param c arithmetic bitstream reader context
1341  * @param block destination for block coefficients
1342  * @param probs probabilities to use when reading trees from the bitstream
1343  * @param i initial coeff index, 0 unless a separate DC block is coded
1344  * @param zero_nhood the initial prediction context for number of surrounding
1345  * all-zero blocks (only left/top, so 0-2)
1346  * @param qmul array holding the dc/ac dequant factor at position 0/1
1347  * @param scan scan pattern (VP7 only)
1348  *
1349  * @return 0 if no coeffs were decoded
1350  * otherwise, the index of the last coeff decoded plus one
1351  */
1352 static av_always_inline
1354  uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1355  int i, int zero_nhood, int16_t qmul[2],
1356  const uint8_t scan[16], int vp7)
1357 {
1358  uint8_t *token_prob = probs[i][zero_nhood];
1359  if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
1360  return 0;
1361  return vp7 ? vp7_decode_block_coeffs_internal(c, block, probs, i,
1362  token_prob, qmul, scan)
1363  : vp8_decode_block_coeffs_internal(c, block, probs, i,
1364  token_prob, qmul);
1365 }
1366 
1367 static av_always_inline
1369  VP8Macroblock *mb, uint8_t t_nnz[9], uint8_t l_nnz[9],
1370  int is_vp7)
1371 {
1372  int i, x, y, luma_start = 0, luma_ctx = 3;
1373  int nnz_pred, nnz, nnz_total = 0;
1374  int segment = mb->segment;
1375  int block_dc = 0;
1376 
1377  if (mb->mode != MODE_I4x4 && (is_vp7 || mb->mode != VP8_MVMODE_SPLIT)) {
1378  nnz_pred = t_nnz[8] + l_nnz[8];
1379 
1380  // decode DC values and do hadamard
1381  nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0,
1382  nnz_pred, s->qmat[segment].luma_dc_qmul,
1383  zigzag_scan, is_vp7);
1384  l_nnz[8] = t_nnz[8] = !!nnz;
1385 
1386  if (is_vp7 && mb->mode > MODE_I4x4) {
1387  nnz |= inter_predict_dc(td->block_dc,
1388  s->inter_dc_pred[mb->ref_frame - 1]);
1389  }
1390 
1391  if (nnz) {
1392  nnz_total += nnz;
1393  block_dc = 1;
1394  if (nnz == 1)
1395  s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc);
1396  else
1397  s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
1398  }
1399  luma_start = 1;
1400  luma_ctx = 0;
1401  }
1402 
1403  // luma blocks
1404  for (y = 0; y < 4; y++)
1405  for (x = 0; x < 4; x++) {
1406  nnz_pred = l_nnz[y] + t_nnz[x];
1407  nnz = decode_block_coeffs(c, td->block[y][x],
1408  s->prob->token[luma_ctx],
1409  luma_start, nnz_pred,
1410  s->qmat[segment].luma_qmul,
1411  s->prob[0].scan, is_vp7);
1412  /* nnz+block_dc may be one more than the actual last index,
1413  * but we don't care */
1414  td->non_zero_count_cache[y][x] = nnz + block_dc;
1415  t_nnz[x] = l_nnz[y] = !!nnz;
1416  nnz_total += nnz;
1417  }
1418 
1419  // chroma blocks
1420  // TODO: what to do about dimensions? 2nd dim for luma is x,
1421  // but for chroma it's (y<<1)|x
1422  for (i = 4; i < 6; i++)
1423  for (y = 0; y < 2; y++)
1424  for (x = 0; x < 2; x++) {
1425  nnz_pred = l_nnz[i + 2 * y] + t_nnz[i + 2 * x];
1426  nnz = decode_block_coeffs(c, td->block[i][(y << 1) + x],
1427  s->prob->token[2], 0, nnz_pred,
1428  s->qmat[segment].chroma_qmul,
1429  s->prob[0].scan, is_vp7);
1430  td->non_zero_count_cache[i][(y << 1) + x] = nnz;
1431  t_nnz[i + 2 * x] = l_nnz[i + 2 * y] = !!nnz;
1432  nnz_total += nnz;
1433  }
1434 
1435  // if there were no coded coeffs despite the macroblock not being marked skip,
1436  // we MUST not do the inner loop filter and should not do IDCT
1437  // Since skip isn't used for bitstream prediction, just manually set it.
1438  if (!nnz_total)
1439  mb->skip = 1;
1440 }
1441 
1442 static av_always_inline
1443 void backup_mb_border(uint8_t *top_border, uint8_t *src_y,
1444  uint8_t *src_cb, uint8_t *src_cr,
1445  int linesize, int uvlinesize, int simple)
1446 {
1447  AV_COPY128(top_border, src_y + 15 * linesize);
1448  if (!simple) {
1449  AV_COPY64(top_border + 16, src_cb + 7 * uvlinesize);
1450  AV_COPY64(top_border + 24, src_cr + 7 * uvlinesize);
1451  }
1452 }
1453 
1454 static av_always_inline
1455 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb,
1456  uint8_t *src_cr, int linesize, int uvlinesize, int mb_x,
1457  int mb_y, int mb_width, int simple, int xchg)
1458 {
1459  uint8_t *top_border_m1 = top_border - 32; // for TL prediction
1460  src_y -= linesize;
1461  src_cb -= uvlinesize;
1462  src_cr -= uvlinesize;
1463 
1464 #define XCHG(a, b, xchg) \
1465  do { \
1466  if (xchg) \
1467  AV_SWAP64(b, a); \
1468  else \
1469  AV_COPY64(b, a); \
1470  } while (0)
1471 
1472  XCHG(top_border_m1 + 8, src_y - 8, xchg);
1473  XCHG(top_border, src_y, xchg);
1474  XCHG(top_border + 8, src_y + 8, 1);
1475  if (mb_x < mb_width - 1)
1476  XCHG(top_border + 32, src_y + 16, 1);
1477 
1478  // only copy chroma for normal loop filter
1479  // or to initialize the top row to 127
1480  if (!simple || !mb_y) {
1481  XCHG(top_border_m1 + 16, src_cb - 8, xchg);
1482  XCHG(top_border_m1 + 24, src_cr - 8, xchg);
1483  XCHG(top_border + 16, src_cb, 1);
1484  XCHG(top_border + 24, src_cr, 1);
1485  }
1486 }
1487 
1488 static av_always_inline
1489 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
1490 {
1491  if (!mb_x)
1492  return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
1493  else
1494  return mb_y ? mode : LEFT_DC_PRED8x8;
1495 }
1496 
1497 static av_always_inline
1498 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y, int vp7)
1499 {
1500  if (!mb_x)
1501  return mb_y ? VERT_PRED8x8 : (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8);
1502  else
1503  return mb_y ? mode : HOR_PRED8x8;
1504 }
1505 
1506 static av_always_inline
1507 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y, int vp7)
1508 {
1509  switch (mode) {
1510  case DC_PRED8x8:
1511  return check_dc_pred8x8_mode(mode, mb_x, mb_y);
1512  case VERT_PRED8x8:
1513  return !mb_y ? (vp7 ? DC_128_PRED8x8 : DC_127_PRED8x8) : mode;
1514  case HOR_PRED8x8:
1515  return !mb_x ? (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8) : mode;
1516  case PLANE_PRED8x8: /* TM */
1517  return check_tm_pred8x8_mode(mode, mb_x, mb_y, vp7);
1518  }
1519  return mode;
1520 }
1521 
1522 static av_always_inline
1523 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y, int vp7)
1524 {
1525  if (!mb_x) {
1526  return mb_y ? VERT_VP8_PRED : (vp7 ? DC_128_PRED : DC_129_PRED);
1527  } else {
1528  return mb_y ? mode : HOR_VP8_PRED;
1529  }
1530 }
1531 
1532 static av_always_inline
1533 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y,
1534  int *copy_buf, int vp7)
1535 {
1536  switch (mode) {
1537  case VERT_PRED:
1538  if (!mb_x && mb_y) {
1539  *copy_buf = 1;
1540  return mode;
1541  }
1542  /* fall-through */
1543  case DIAG_DOWN_LEFT_PRED:
1544  case VERT_LEFT_PRED:
1545  return !mb_y ? (vp7 ? DC_128_PRED : DC_127_PRED) : mode;
1546  case HOR_PRED:
1547  if (!mb_y) {
1548  *copy_buf = 1;
1549  return mode;
1550  }
1551  /* fall-through */
1552  case HOR_UP_PRED:
1553  return !mb_x ? (vp7 ? DC_128_PRED : DC_129_PRED) : mode;
1554  case TM_VP8_PRED:
1555  return check_tm_pred4x4_mode(mode, mb_x, mb_y, vp7);
1556  case DC_PRED: /* 4x4 DC doesn't use the same "H.264-style" exceptions
1557  * as 16x16/8x8 DC */
1558  case DIAG_DOWN_RIGHT_PRED:
1559  case VERT_RIGHT_PRED:
1560  case HOR_DOWN_PRED:
1561  if (!mb_y || !mb_x)
1562  *copy_buf = 1;
1563  return mode;
1564  }
1565  return mode;
1566 }
1567 
1568 static av_always_inline
1570  VP8Macroblock *mb, int mb_x, int mb_y, int is_vp7)
1571 {
1572  int x, y, mode, nnz;
1573  uint32_t tr;
1574 
1575  /* for the first row, we need to run xchg_mb_border to init the top edge
1576  * to 127 otherwise, skip it if we aren't going to deblock */
1577  if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1578  xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1579  s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1580  s->filter.simple, 1);
1581 
1582  if (mb->mode < MODE_I4x4) {
1583  mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y, is_vp7);
1584  s->hpc.pred16x16[mode](dst[0], s->linesize);
1585  } else {
1586  uint8_t *ptr = dst[0];
1587  uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1588  const uint8_t lo = is_vp7 ? 128 : 127;
1589  const uint8_t hi = is_vp7 ? 128 : 129;
1590  uint8_t tr_top[4] = { lo, lo, lo, lo };
1591 
1592  // all blocks on the right edge of the macroblock use bottom edge
1593  // the top macroblock for their topright edge
1594  uint8_t *tr_right = ptr - s->linesize + 16;
1595 
1596  // if we're on the right edge of the frame, said edge is extended
1597  // from the top macroblock
1598  if (mb_y && mb_x == s->mb_width - 1) {
1599  tr = tr_right[-1] * 0x01010101u;
1600  tr_right = (uint8_t *) &tr;
1601  }
1602 
1603  if (mb->skip)
1605 
1606  for (y = 0; y < 4; y++) {
1607  uint8_t *topright = ptr + 4 - s->linesize;
1608  for (x = 0; x < 4; x++) {
1609  int copy = 0, linesize = s->linesize;
1610  uint8_t *dst = ptr + 4 * x;
1611  DECLARE_ALIGNED(4, uint8_t, copy_dst)[5 * 8];
1612 
1613  if ((y == 0 || x == 3) && mb_y == 0) {
1614  topright = tr_top;
1615  } else if (x == 3)
1616  topright = tr_right;
1617 
1618  mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x,
1619  mb_y + y, &copy, is_vp7);
1620  if (copy) {
1621  dst = copy_dst + 12;
1622  linesize = 8;
1623  if (!(mb_y + y)) {
1624  copy_dst[3] = lo;
1625  AV_WN32A(copy_dst + 4, lo * 0x01010101U);
1626  } else {
1627  AV_COPY32(copy_dst + 4, ptr + 4 * x - s->linesize);
1628  if (!(mb_x + x)) {
1629  copy_dst[3] = hi;
1630  } else {
1631  copy_dst[3] = ptr[4 * x - s->linesize - 1];
1632  }
1633  }
1634  if (!(mb_x + x)) {
1635  copy_dst[11] =
1636  copy_dst[19] =
1637  copy_dst[27] =
1638  copy_dst[35] = hi;
1639  } else {
1640  copy_dst[11] = ptr[4 * x - 1];
1641  copy_dst[19] = ptr[4 * x + s->linesize - 1];
1642  copy_dst[27] = ptr[4 * x + s->linesize * 2 - 1];
1643  copy_dst[35] = ptr[4 * x + s->linesize * 3 - 1];
1644  }
1645  }
1646  s->hpc.pred4x4[mode](dst, topright, linesize);
1647  if (copy) {
1648  AV_COPY32(ptr + 4 * x, copy_dst + 12);
1649  AV_COPY32(ptr + 4 * x + s->linesize, copy_dst + 20);
1650  AV_COPY32(ptr + 4 * x + s->linesize * 2, copy_dst + 28);
1651  AV_COPY32(ptr + 4 * x + s->linesize * 3, copy_dst + 36);
1652  }
1653 
1654  nnz = td->non_zero_count_cache[y][x];
1655  if (nnz) {
1656  if (nnz == 1)
1657  s->vp8dsp.vp8_idct_dc_add(ptr + 4 * x,
1658  td->block[y][x], s->linesize);
1659  else
1660  s->vp8dsp.vp8_idct_add(ptr + 4 * x,
1661  td->block[y][x], s->linesize);
1662  }
1663  topright += 4;
1664  }
1665 
1666  ptr += 4 * s->linesize;
1667  intra4x4 += 4;
1668  }
1669  }
1670 
1672  mb_x, mb_y, is_vp7);
1673  s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1674  s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1675 
1676  if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1677  xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1678  s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1679  s->filter.simple, 0);
1680 }
1681 
1682 static const uint8_t subpel_idx[3][8] = {
1683  { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1684  // also function pointer index
1685  { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1686  { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1687 };
1688 
1689 /**
1690  * luma MC function
1691  *
1692  * @param s VP8 decoding context
1693  * @param dst target buffer for block data at block position
1694  * @param ref reference picture buffer at origin (0, 0)
1695  * @param mv motion vector (relative to block position) to get pixel data from
1696  * @param x_off horizontal position of block from origin (0, 0)
1697  * @param y_off vertical position of block from origin (0, 0)
1698  * @param block_w width of block (16, 8 or 4)
1699  * @param block_h height of block (always same as block_w)
1700  * @param width width of src/dst plane data
1701  * @param height height of src/dst plane data
1702  * @param linesize size of a single line of plane data, including padding
1703  * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1704  */
1705 static av_always_inline
1707  ThreadFrame *ref, const VP56mv *mv,
1708  int x_off, int y_off, int block_w, int block_h,
1709  int width, int height, ptrdiff_t linesize,
1710  vp8_mc_func mc_func[3][3])
1711 {
1712  uint8_t *src = ref->f->data[0];
1713 
1714  if (AV_RN32A(mv)) {
1715  int src_linesize = linesize;
1716 
1717  int mx = (mv->x << 1) & 7, mx_idx = subpel_idx[0][mx];
1718  int my = (mv->y << 1) & 7, my_idx = subpel_idx[0][my];
1719 
1720  x_off += mv->x >> 2;
1721  y_off += mv->y >> 2;
1722 
1723  // edge emulation
1724  ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1725  src += y_off * linesize + x_off;
1726  if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1727  y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1729  src - my_idx * linesize - mx_idx,
1730  EDGE_EMU_LINESIZE, linesize,
1731  block_w + subpel_idx[1][mx],
1732  block_h + subpel_idx[1][my],
1733  x_off - mx_idx, y_off - my_idx,
1734  width, height);
1735  src = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1736  src_linesize = EDGE_EMU_LINESIZE;
1737  }
1738  mc_func[my_idx][mx_idx](dst, linesize, src, src_linesize, block_h, mx, my);
1739  } else {
1740  ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1741  mc_func[0][0](dst, linesize, src + y_off * linesize + x_off,
1742  linesize, block_h, 0, 0);
1743  }
1744 }
1745 
1746 /**
1747  * chroma MC function
1748  *
1749  * @param s VP8 decoding context
1750  * @param dst1 target buffer for block data at block position (U plane)
1751  * @param dst2 target buffer for block data at block position (V plane)
1752  * @param ref reference picture buffer at origin (0, 0)
1753  * @param mv motion vector (relative to block position) to get pixel data from
1754  * @param x_off horizontal position of block from origin (0, 0)
1755  * @param y_off vertical position of block from origin (0, 0)
1756  * @param block_w width of block (16, 8 or 4)
1757  * @param block_h height of block (always same as block_w)
1758  * @param width width of src/dst plane data
1759  * @param height height of src/dst plane data
1760  * @param linesize size of a single line of plane data, including padding
1761  * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1762  */
1763 static av_always_inline
1765  uint8_t *dst2, ThreadFrame *ref, const VP56mv *mv,
1766  int x_off, int y_off, int block_w, int block_h,
1767  int width, int height, ptrdiff_t linesize,
1768  vp8_mc_func mc_func[3][3])
1769 {
1770  uint8_t *src1 = ref->f->data[1], *src2 = ref->f->data[2];
1771 
1772  if (AV_RN32A(mv)) {
1773  int mx = mv->x & 7, mx_idx = subpel_idx[0][mx];
1774  int my = mv->y & 7, my_idx = subpel_idx[0][my];
1775 
1776  x_off += mv->x >> 3;
1777  y_off += mv->y >> 3;
1778 
1779  // edge emulation
1780  src1 += y_off * linesize + x_off;
1781  src2 += y_off * linesize + x_off;
1782  ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1783  if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1784  y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1786  src1 - my_idx * linesize - mx_idx,
1787  EDGE_EMU_LINESIZE, linesize,
1788  block_w + subpel_idx[1][mx],
1789  block_h + subpel_idx[1][my],
1790  x_off - mx_idx, y_off - my_idx, width, height);
1791  src1 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1792  mc_func[my_idx][mx_idx](dst1, linesize, src1, EDGE_EMU_LINESIZE, block_h, mx, my);
1793 
1795  src2 - my_idx * linesize - mx_idx,
1796  EDGE_EMU_LINESIZE, linesize,
1797  block_w + subpel_idx[1][mx],
1798  block_h + subpel_idx[1][my],
1799  x_off - mx_idx, y_off - my_idx, width, height);
1800  src2 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1801  mc_func[my_idx][mx_idx](dst2, linesize, src2, EDGE_EMU_LINESIZE, block_h, mx, my);
1802  } else {
1803  mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1804  mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1805  }
1806  } else {
1807  ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1808  mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1809  mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1810  }
1811 }
1812 
1813 static av_always_inline
1815  ThreadFrame *ref_frame, int x_off, int y_off,
1816  int bx_off, int by_off, int block_w, int block_h,
1817  int width, int height, VP56mv *mv)
1818 {
1819  VP56mv uvmv = *mv;
1820 
1821  /* Y */
1822  vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
1823  ref_frame, mv, x_off + bx_off, y_off + by_off,
1824  block_w, block_h, width, height, s->linesize,
1825  s->put_pixels_tab[block_w == 8]);
1826 
1827  /* U/V */
1828  if (s->profile == 3) {
1829  /* this block only applies VP8; it is safe to check
1830  * only the profile, as VP7 profile <= 1 */
1831  uvmv.x &= ~7;
1832  uvmv.y &= ~7;
1833  }
1834  x_off >>= 1;
1835  y_off >>= 1;
1836  bx_off >>= 1;
1837  by_off >>= 1;
1838  width >>= 1;
1839  height >>= 1;
1840  block_w >>= 1;
1841  block_h >>= 1;
1842  vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
1843  dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1844  &uvmv, x_off + bx_off, y_off + by_off,
1845  block_w, block_h, width, height, s->uvlinesize,
1846  s->put_pixels_tab[1 + (block_w == 4)]);
1847 }
1848 
1849 /* Fetch pixels for estimated mv 4 macroblocks ahead.
1850  * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1851 static av_always_inline
1852 void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
1853  int mb_xy, int ref)
1854 {
1855  /* Don't prefetch refs that haven't been used very often this frame. */
1856  if (s->ref_count[ref - 1] > (mb_xy >> 5)) {
1857  int x_off = mb_x << 4, y_off = mb_y << 4;
1858  int mx = (mb->mv.x >> 2) + x_off + 8;
1859  int my = (mb->mv.y >> 2) + y_off;
1860  uint8_t **src = s->framep[ref]->tf.f->data;
1861  int off = mx + (my + (mb_x & 3) * 4) * s->linesize + 64;
1862  /* For threading, a ff_thread_await_progress here might be useful, but
1863  * it actually slows down the decoder. Since a bad prefetch doesn't
1864  * generate bad decoder output, we don't run it here. */
1865  s->vdsp.prefetch(src[0] + off, s->linesize, 4);
1866  off = (mx >> 1) + ((my >> 1) + (mb_x & 7)) * s->uvlinesize + 64;
1867  s->vdsp.prefetch(src[1] + off, src[2] - src[1], 2);
1868  }
1869 }
1870 
1871 /**
1872  * Apply motion vectors to prediction buffer, chapter 18.
1873  */
1874 static av_always_inline
1876  VP8Macroblock *mb, int mb_x, int mb_y)
1877 {
1878  int x_off = mb_x << 4, y_off = mb_y << 4;
1879  int width = 16 * s->mb_width, height = 16 * s->mb_height;
1880  ThreadFrame *ref = &s->framep[mb->ref_frame]->tf;
1881  VP56mv *bmv = mb->bmv;
1882 
1883  switch (mb->partitioning) {
1884  case VP8_SPLITMVMODE_NONE:
1885  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1886  0, 0, 16, 16, width, height, &mb->mv);
1887  break;
1888  case VP8_SPLITMVMODE_4x4: {
1889  int x, y;
1890  VP56mv uvmv;
1891 
1892  /* Y */
1893  for (y = 0; y < 4; y++) {
1894  for (x = 0; x < 4; x++) {
1895  vp8_mc_luma(s, td, dst[0] + 4 * y * s->linesize + x * 4,
1896  ref, &bmv[4 * y + x],
1897  4 * x + x_off, 4 * y + y_off, 4, 4,
1898  width, height, s->linesize,
1899  s->put_pixels_tab[2]);
1900  }
1901  }
1902 
1903  /* U/V */
1904  x_off >>= 1;
1905  y_off >>= 1;
1906  width >>= 1;
1907  height >>= 1;
1908  for (y = 0; y < 2; y++) {
1909  for (x = 0; x < 2; x++) {
1910  uvmv.x = mb->bmv[2 * y * 4 + 2 * x ].x +
1911  mb->bmv[2 * y * 4 + 2 * x + 1].x +
1912  mb->bmv[(2 * y + 1) * 4 + 2 * x ].x +
1913  mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].x;
1914  uvmv.y = mb->bmv[2 * y * 4 + 2 * x ].y +
1915  mb->bmv[2 * y * 4 + 2 * x + 1].y +
1916  mb->bmv[(2 * y + 1) * 4 + 2 * x ].y +
1917  mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].y;
1918  uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT - 1))) >> 2;
1919  uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT - 1))) >> 2;
1920  if (s->profile == 3) {
1921  uvmv.x &= ~7;
1922  uvmv.y &= ~7;
1923  }
1924  vp8_mc_chroma(s, td, dst[1] + 4 * y * s->uvlinesize + x * 4,
1925  dst[2] + 4 * y * s->uvlinesize + x * 4, ref,
1926  &uvmv, 4 * x + x_off, 4 * y + y_off, 4, 4,
1927  width, height, s->uvlinesize,
1928  s->put_pixels_tab[2]);
1929  }
1930  }
1931  break;
1932  }
1933  case VP8_SPLITMVMODE_16x8:
1934  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1935  0, 0, 16, 8, width, height, &bmv[0]);
1936  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1937  0, 8, 16, 8, width, height, &bmv[1]);
1938  break;
1939  case VP8_SPLITMVMODE_8x16:
1940  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1941  0, 0, 8, 16, width, height, &bmv[0]);
1942  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1943  8, 0, 8, 16, width, height, &bmv[1]);
1944  break;
1945  case VP8_SPLITMVMODE_8x8:
1946  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1947  0, 0, 8, 8, width, height, &bmv[0]);
1948  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1949  8, 0, 8, 8, width, height, &bmv[1]);
1950  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1951  0, 8, 8, 8, width, height, &bmv[2]);
1952  vp8_mc_part(s, td, dst, ref, x_off, y_off,
1953  8, 8, 8, 8, width, height, &bmv[3]);
1954  break;
1955  }
1956 }
1957 
1958 static av_always_inline
1960 {
1961  int x, y, ch;
1962 
1963  if (mb->mode != MODE_I4x4) {
1964  uint8_t *y_dst = dst[0];
1965  for (y = 0; y < 4; y++) {
1966  uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
1967  if (nnz4) {
1968  if (nnz4 & ~0x01010101) {
1969  for (x = 0; x < 4; x++) {
1970  if ((uint8_t) nnz4 == 1)
1971  s->vp8dsp.vp8_idct_dc_add(y_dst + 4 * x,
1972  td->block[y][x],
1973  s->linesize);
1974  else if ((uint8_t) nnz4 > 1)
1975  s->vp8dsp.vp8_idct_add(y_dst + 4 * x,
1976  td->block[y][x],
1977  s->linesize);
1978  nnz4 >>= 8;
1979  if (!nnz4)
1980  break;
1981  }
1982  } else {
1983  s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
1984  }
1985  }
1986  y_dst += 4 * s->linesize;
1987  }
1988  }
1989 
1990  for (ch = 0; ch < 2; ch++) {
1991  uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4 + ch]);
1992  if (nnz4) {
1993  uint8_t *ch_dst = dst[1 + ch];
1994  if (nnz4 & ~0x01010101) {
1995  for (y = 0; y < 2; y++) {
1996  for (x = 0; x < 2; x++) {
1997  if ((uint8_t) nnz4 == 1)
1998  s->vp8dsp.vp8_idct_dc_add(ch_dst + 4 * x,
1999  td->block[4 + ch][(y << 1) + x],
2000  s->uvlinesize);
2001  else if ((uint8_t) nnz4 > 1)
2002  s->vp8dsp.vp8_idct_add(ch_dst + 4 * x,
2003  td->block[4 + ch][(y << 1) + x],
2004  s->uvlinesize);
2005  nnz4 >>= 8;
2006  if (!nnz4)
2007  goto chroma_idct_end;
2008  }
2009  ch_dst += 4 * s->uvlinesize;
2010  }
2011  } else {
2012  s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4 + ch], s->uvlinesize);
2013  }
2014  }
2015 chroma_idct_end:
2016  ;
2017  }
2018 }
2019 
2020 static av_always_inline
2022  VP8FilterStrength *f, int is_vp7)
2023 {
2024  int interior_limit, filter_level;
2025 
2026  if (s->segmentation.enabled) {
2027  filter_level = s->segmentation.filter_level[mb->segment];
2028  if (!s->segmentation.absolute_vals)
2029  filter_level += s->filter.level;
2030  } else
2031  filter_level = s->filter.level;
2032 
2033  if (s->lf_delta.enabled) {
2034  filter_level += s->lf_delta.ref[mb->ref_frame];
2035  filter_level += s->lf_delta.mode[mb->mode];
2036  }
2037 
2038  filter_level = av_clip_uintp2(filter_level, 6);
2039 
2040  interior_limit = filter_level;
2041  if (s->filter.sharpness) {
2042  interior_limit >>= (s->filter.sharpness + 3) >> 2;
2043  interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
2044  }
2045  interior_limit = FFMAX(interior_limit, 1);
2046 
2047  f->filter_level = filter_level;
2048  f->inner_limit = interior_limit;
2049  f->inner_filter = is_vp7 || !mb->skip || mb->mode == MODE_I4x4 ||
2050  mb->mode == VP8_MVMODE_SPLIT;
2051 }
2052 
2053 static av_always_inline
2055  int mb_x, int mb_y, int is_vp7)
2056 {
2057  int mbedge_lim, bedge_lim_y, bedge_lim_uv, hev_thresh;
2058  int filter_level = f->filter_level;
2059  int inner_limit = f->inner_limit;
2060  int inner_filter = f->inner_filter;
2061  int linesize = s->linesize;
2062  int uvlinesize = s->uvlinesize;
2063  static const uint8_t hev_thresh_lut[2][64] = {
2064  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2065  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2066  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
2067  3, 3, 3, 3 },
2068  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2069  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2070  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2071  2, 2, 2, 2 }
2072  };
2073 
2074  if (!filter_level)
2075  return;
2076 
2077  if (is_vp7) {
2078  bedge_lim_y = filter_level;
2079  bedge_lim_uv = filter_level * 2;
2080  mbedge_lim = filter_level + 2;
2081  } else {
2082  bedge_lim_y =
2083  bedge_lim_uv = filter_level * 2 + inner_limit;
2084  mbedge_lim = bedge_lim_y + 4;
2085  }
2086 
2087  hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
2088 
2089  if (mb_x) {
2090  s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
2091  mbedge_lim, inner_limit, hev_thresh);
2092  s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
2093  mbedge_lim, inner_limit, hev_thresh);
2094  }
2095 
2096 #define H_LOOP_FILTER_16Y_INNER(cond) \
2097  if (cond && inner_filter) { \
2098  s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 4, linesize, \
2099  bedge_lim_y, inner_limit, \
2100  hev_thresh); \
2101  s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 8, linesize, \
2102  bedge_lim_y, inner_limit, \
2103  hev_thresh); \
2104  s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 12, linesize, \
2105  bedge_lim_y, inner_limit, \
2106  hev_thresh); \
2107  s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4, \
2108  uvlinesize, bedge_lim_uv, \
2109  inner_limit, hev_thresh); \
2110  }
2111 
2112  H_LOOP_FILTER_16Y_INNER(!is_vp7)
2113 
2114  if (mb_y) {
2115  s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
2116  mbedge_lim, inner_limit, hev_thresh);
2117  s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
2118  mbedge_lim, inner_limit, hev_thresh);
2119  }
2120 
2121  if (inner_filter) {
2122  s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 4 * linesize,
2123  linesize, bedge_lim_y,
2124  inner_limit, hev_thresh);
2125  s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 8 * linesize,
2126  linesize, bedge_lim_y,
2127  inner_limit, hev_thresh);
2128  s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 12 * linesize,
2129  linesize, bedge_lim_y,
2130  inner_limit, hev_thresh);
2131  s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
2132  dst[2] + 4 * uvlinesize,
2133  uvlinesize, bedge_lim_uv,
2134  inner_limit, hev_thresh);
2135  }
2136 
2137  H_LOOP_FILTER_16Y_INNER(is_vp7)
2138 }
2139 
2140 static av_always_inline
2142  int mb_x, int mb_y)
2143 {
2144  int mbedge_lim, bedge_lim;
2145  int filter_level = f->filter_level;
2146  int inner_limit = f->inner_limit;
2147  int inner_filter = f->inner_filter;
2148  int linesize = s->linesize;
2149 
2150  if (!filter_level)
2151  return;
2152 
2153  bedge_lim = 2 * filter_level + inner_limit;
2154  mbedge_lim = bedge_lim + 4;
2155 
2156  if (mb_x)
2157  s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
2158  if (inner_filter) {
2159  s->vp8dsp.vp8_h_loop_filter_simple(dst + 4, linesize, bedge_lim);
2160  s->vp8dsp.vp8_h_loop_filter_simple(dst + 8, linesize, bedge_lim);
2161  s->vp8dsp.vp8_h_loop_filter_simple(dst + 12, linesize, bedge_lim);
2162  }
2163 
2164  if (mb_y)
2165  s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
2166  if (inner_filter) {
2167  s->vp8dsp.vp8_v_loop_filter_simple(dst + 4 * linesize, linesize, bedge_lim);
2168  s->vp8dsp.vp8_v_loop_filter_simple(dst + 8 * linesize, linesize, bedge_lim);
2169  s->vp8dsp.vp8_v_loop_filter_simple(dst + 12 * linesize, linesize, bedge_lim);
2170  }
2171 }
2172 
2173 #define MARGIN (16 << 2)
2174 static av_always_inline
2176  VP8Frame *prev_frame, int is_vp7)
2177 {
2178  VP8Context *s = avctx->priv_data;
2179  int mb_x, mb_y;
2180 
2181  s->mv_min.y = -MARGIN;
2182  s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2183  for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
2185  ((s->mb_width + 1) * (mb_y + 1) + 1);
2186  int mb_xy = mb_y * s->mb_width;
2187 
2188  AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2189 
2190  s->mv_min.x = -MARGIN;
2191  s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2192  for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2193  if (mb_y == 0)
2194  AV_WN32A((mb - s->mb_width - 1)->intra4x4_pred_mode_top,
2195  DC_PRED * 0x01010101);
2196  decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2197  prev_frame && prev_frame->seg_map ?
2198  prev_frame->seg_map->data + mb_xy : NULL, 1, is_vp7);
2199  s->mv_min.x -= 64;
2200  s->mv_max.x -= 64;
2201  }
2202  s->mv_min.y -= 64;
2203  s->mv_max.y -= 64;
2204  }
2205 }
2206 
2207 static void vp7_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2208  VP8Frame *prev_frame)
2209 {
2210  vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP7);
2211 }
2212 
2213 static void vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2214  VP8Frame *prev_frame)
2215 {
2216  vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP8);
2217 }
2218 
2219 #if HAVE_THREADS
2220 #define check_thread_pos(td, otd, mb_x_check, mb_y_check) \
2221  do { \
2222  int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF); \
2223  if (otd->thread_mb_pos < tmp) { \
2224  pthread_mutex_lock(&otd->lock); \
2225  td->wait_mb_pos = tmp; \
2226  do { \
2227  if (otd->thread_mb_pos >= tmp) \
2228  break; \
2229  pthread_cond_wait(&otd->cond, &otd->lock); \
2230  } while (1); \
2231  td->wait_mb_pos = INT_MAX; \
2232  pthread_mutex_unlock(&otd->lock); \
2233  } \
2234  } while (0);
2235 
2236 #define update_pos(td, mb_y, mb_x) \
2237  do { \
2238  int pos = (mb_y << 16) | (mb_x & 0xFFFF); \
2239  int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && \
2240  (num_jobs > 1); \
2241  int is_null = (next_td == NULL) || (prev_td == NULL); \
2242  int pos_check = (is_null) ? 1 \
2243  : (next_td != td && \
2244  pos >= next_td->wait_mb_pos) || \
2245  (prev_td != td && \
2246  pos >= prev_td->wait_mb_pos); \
2247  td->thread_mb_pos = pos; \
2248  if (sliced_threading && pos_check) { \
2249  pthread_mutex_lock(&td->lock); \
2250  pthread_cond_broadcast(&td->cond); \
2251  pthread_mutex_unlock(&td->lock); \
2252  } \
2253  } while (0);
2254 #else
2255 #define check_thread_pos(td, otd, mb_x_check, mb_y_check)
2256 #define update_pos(td, mb_y, mb_x)
2257 #endif
2258 
2260  int jobnr, int threadnr, int is_vp7)
2261 {
2262  VP8Context *s = avctx->priv_data;
2263  VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
2264  int mb_y = td->thread_mb_pos >> 16;
2265  int mb_x, mb_xy = mb_y * s->mb_width;
2266  int num_jobs = s->num_jobs;
2267  VP8Frame *curframe = s->curframe, *prev_frame = s->prev_frame;
2268  VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions - 1)];
2269  VP8Macroblock *mb;
2270  uint8_t *dst[3] = {
2271  curframe->tf.f->data[0] + 16 * mb_y * s->linesize,
2272  curframe->tf.f->data[1] + 8 * mb_y * s->uvlinesize,
2273  curframe->tf.f->data[2] + 8 * mb_y * s->uvlinesize
2274  };
2275  if (mb_y == 0)
2276  prev_td = td;
2277  else
2278  prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2279  if (mb_y == s->mb_height - 1)
2280  next_td = td;
2281  else
2282  next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2283  if (s->mb_layout == 1)
2284  mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2285  else {
2286  // Make sure the previous frame has read its segmentation map,
2287  // if we re-use the same map.
2288  if (prev_frame && s->segmentation.enabled &&
2290  ff_thread_await_progress(&prev_frame->tf, mb_y, 0);
2291  mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2292  memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
2293  AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2294  }
2295 
2296  if (!is_vp7 || mb_y == 0)
2297  memset(td->left_nnz, 0, sizeof(td->left_nnz));
2298 
2299  s->mv_min.x = -MARGIN;
2300  s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2301 
2302  for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2303  // Wait for previous thread to read mb_x+2, and reach mb_y-1.
2304  if (prev_td != td) {
2305  if (threadnr != 0) {
2306  check_thread_pos(td, prev_td,
2307  mb_x + (is_vp7 ? 2 : 1),
2308  mb_y - (is_vp7 ? 2 : 1));
2309  } else {
2310  check_thread_pos(td, prev_td,
2311  mb_x + (is_vp7 ? 2 : 1) + s->mb_width + 3,
2312  mb_y - (is_vp7 ? 2 : 1));
2313  }
2314  }
2315 
2316  s->vdsp.prefetch(dst[0] + (mb_x & 3) * 4 * s->linesize + 64,
2317  s->linesize, 4);
2318  s->vdsp.prefetch(dst[1] + (mb_x & 7) * s->uvlinesize + 64,
2319  dst[2] - dst[1], 2);
2320 
2321  if (!s->mb_layout)
2322  decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2323  prev_frame && prev_frame->seg_map ?
2324  prev_frame->seg_map->data + mb_xy : NULL, 0, is_vp7);
2325 
2326  prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
2327 
2328  if (!mb->skip)
2329  decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz, is_vp7);
2330 
2331  if (mb->mode <= MODE_I4x4)
2332  intra_predict(s, td, dst, mb, mb_x, mb_y, is_vp7);
2333  else
2334  inter_predict(s, td, dst, mb, mb_x, mb_y);
2335 
2336  prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
2337 
2338  if (!mb->skip) {
2339  idct_mb(s, td, dst, mb);
2340  } else {
2341  AV_ZERO64(td->left_nnz);
2342  AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
2343 
2344  /* Reset DC block predictors if they would exist
2345  * if the mb had coefficients */
2346  if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
2347  td->left_nnz[8] = 0;
2348  s->top_nnz[mb_x][8] = 0;
2349  }
2350  }
2351 
2352  if (s->deblock_filter)
2353  filter_level_for_mb(s, mb, &td->filter_strength[mb_x], is_vp7);
2354 
2355  if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs - 1) {
2356  if (s->filter.simple)
2357  backup_mb_border(s->top_border[mb_x + 1], dst[0],
2358  NULL, NULL, s->linesize, 0, 1);
2359  else
2360  backup_mb_border(s->top_border[mb_x + 1], dst[0],
2361  dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2362  }
2363 
2364  prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
2365 
2366  dst[0] += 16;
2367  dst[1] += 8;
2368  dst[2] += 8;
2369  s->mv_min.x -= 64;
2370  s->mv_max.x -= 64;
2371 
2372  if (mb_x == s->mb_width + 1) {
2373  update_pos(td, mb_y, s->mb_width + 3);
2374  } else {
2375  update_pos(td, mb_y, mb_x);
2376  }
2377  }
2378 }
2379 
2380 static void vp7_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2381  int jobnr, int threadnr)
2382 {
2383  decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 1);
2384 }
2385 
2386 static void vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2387  int jobnr, int threadnr)
2388 {
2389  decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 0);
2390 }
2391 
2392 static av_always_inline void filter_mb_row(AVCodecContext *avctx, void *tdata,
2393  int jobnr, int threadnr, int is_vp7)
2394 {
2395  VP8Context *s = avctx->priv_data;
2396  VP8ThreadData *td = &s->thread_data[threadnr];
2397  int mb_x, mb_y = td->thread_mb_pos >> 16, num_jobs = s->num_jobs;
2398  AVFrame *curframe = s->curframe->tf.f;
2399  VP8Macroblock *mb;
2400  VP8ThreadData *prev_td, *next_td;
2401  uint8_t *dst[3] = {
2402  curframe->data[0] + 16 * mb_y * s->linesize,
2403  curframe->data[1] + 8 * mb_y * s->uvlinesize,
2404  curframe->data[2] + 8 * mb_y * s->uvlinesize
2405  };
2406 
2407  if (s->mb_layout == 1)
2408  mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2409  else
2410  mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2411 
2412  if (mb_y == 0)
2413  prev_td = td;
2414  else
2415  prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2416  if (mb_y == s->mb_height - 1)
2417  next_td = td;
2418  else
2419  next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2420 
2421  for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
2422  VP8FilterStrength *f = &td->filter_strength[mb_x];
2423  if (prev_td != td)
2424  check_thread_pos(td, prev_td,
2425  (mb_x + 1) + (s->mb_width + 3), mb_y - 1);
2426  if (next_td != td)
2427  if (next_td != &s->thread_data[0])
2428  check_thread_pos(td, next_td, mb_x + 1, mb_y + 1);
2429 
2430  if (num_jobs == 1) {
2431  if (s->filter.simple)
2432  backup_mb_border(s->top_border[mb_x + 1], dst[0],
2433  NULL, NULL, s->linesize, 0, 1);
2434  else
2435  backup_mb_border(s->top_border[mb_x + 1], dst[0],
2436  dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2437  }
2438 
2439  if (s->filter.simple)
2440  filter_mb_simple(s, dst[0], f, mb_x, mb_y);
2441  else
2442  filter_mb(s, dst, f, mb_x, mb_y, is_vp7);
2443  dst[0] += 16;
2444  dst[1] += 8;
2445  dst[2] += 8;
2446 
2447  update_pos(td, mb_y, (s->mb_width + 3) + mb_x);
2448  }
2449 }
2450 
2451 static void vp7_filter_mb_row(AVCodecContext *avctx, void *tdata,
2452  int jobnr, int threadnr)
2453 {
2454  filter_mb_row(avctx, tdata, jobnr, threadnr, 1);
2455 }
2456 
2457 static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
2458  int jobnr, int threadnr)
2459 {
2460  filter_mb_row(avctx, tdata, jobnr, threadnr, 0);
2461 }
2462 
2463 static av_always_inline
2464 int vp78_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, int jobnr,
2465  int threadnr, int is_vp7)
2466 {
2467  VP8Context *s = avctx->priv_data;
2468  VP8ThreadData *td = &s->thread_data[jobnr];
2469  VP8ThreadData *next_td = NULL, *prev_td = NULL;
2470  VP8Frame *curframe = s->curframe;
2471  int mb_y, num_jobs = s->num_jobs;
2472 
2473  td->thread_nr = threadnr;
2474  for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
2475  if (mb_y >= s->mb_height)
2476  break;
2477  td->thread_mb_pos = mb_y << 16;
2478  s->decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
2479  if (s->deblock_filter)
2480  s->filter_mb_row(avctx, tdata, jobnr, threadnr);
2481  update_pos(td, mb_y, INT_MAX & 0xFFFF);
2482 
2483  s->mv_min.y -= 64;
2484  s->mv_max.y -= 64;
2485 
2486  if (avctx->active_thread_type == FF_THREAD_FRAME)
2487  ff_thread_report_progress(&curframe->tf, mb_y, 0);
2488  }
2489 
2490  return 0;
2491 }
2492 
2493 static int vp7_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2494  int jobnr, int threadnr)
2495 {
2496  return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP7);
2497 }
2498 
2499 static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2500  int jobnr, int threadnr)
2501 {
2502  return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP8);
2503 }
2504 
2505 
2506 static av_always_inline
2507 int vp78_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2508  AVPacket *avpkt, int is_vp7)
2509 {
2510  VP8Context *s = avctx->priv_data;
2511  int ret, i, referenced, num_jobs;
2512  enum AVDiscard skip_thresh;
2513  VP8Frame *av_uninit(curframe), *prev_frame;
2514 
2515  if (is_vp7)
2516  ret = vp7_decode_frame_header(s, avpkt->data, avpkt->size);
2517  else
2518  ret = vp8_decode_frame_header(s, avpkt->data, avpkt->size);
2519 
2520  if (ret < 0)
2521  goto err;
2522 
2523  prev_frame = s->framep[VP56_FRAME_CURRENT];
2524 
2525  referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT ||
2527 
2528  skip_thresh = !referenced ? AVDISCARD_NONREF
2529  : !s->keyframe ? AVDISCARD_NONKEY
2530  : AVDISCARD_ALL;
2531 
2532  if (avctx->skip_frame >= skip_thresh) {
2533  s->invisible = 1;
2534  memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2535  goto skip_decode;
2536  }
2537  s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
2538 
2539  // release no longer referenced frames
2540  for (i = 0; i < 5; i++)
2541  if (s->frames[i].tf.f->data[0] &&
2542  &s->frames[i] != prev_frame &&
2543  &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
2544  &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
2545  &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
2546  vp8_release_frame(s, &s->frames[i]);
2547 
2548  curframe = s->framep[VP56_FRAME_CURRENT] = vp8_find_free_buffer(s);
2549 
2550  /* Given that arithmetic probabilities are updated every frame, it's quite
2551  * likely that the values we have on a random interframe are complete
2552  * junk if we didn't start decode on a keyframe. So just don't display
2553  * anything rather than junk. */
2554  if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
2555  !s->framep[VP56_FRAME_GOLDEN] ||
2556  !s->framep[VP56_FRAME_GOLDEN2])) {
2557  av_log(avctx, AV_LOG_WARNING,
2558  "Discarding interframe without a prior keyframe!\n");
2559  ret = AVERROR_INVALIDDATA;
2560  goto err;
2561  }
2562 
2563  curframe->tf.f->key_frame = s->keyframe;
2564  curframe->tf.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I
2566  if ((ret = vp8_alloc_frame(s, curframe, referenced)) < 0)
2567  goto err;
2568 
2569  // check if golden and altref are swapped
2570  if (s->update_altref != VP56_FRAME_NONE)
2572  else
2574 
2575  if (s->update_golden != VP56_FRAME_NONE)
2577  else
2579 
2580  if (s->update_last)
2581  s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
2582  else
2584 
2585  s->next_framep[VP56_FRAME_CURRENT] = curframe;
2586 
2587  if (avctx->codec->update_thread_context)
2588  ff_thread_finish_setup(avctx);
2589 
2590  s->linesize = curframe->tf.f->linesize[0];
2591  s->uvlinesize = curframe->tf.f->linesize[1];
2592 
2593  memset(s->top_nnz, 0, s->mb_width * sizeof(*s->top_nnz));
2594  /* Zero macroblock structures for top/top-left prediction
2595  * from outside the frame. */
2596  if (!s->mb_layout)
2597  memset(s->macroblocks + s->mb_height * 2 - 1, 0,
2598  (s->mb_width + 1) * sizeof(*s->macroblocks));
2599  if (!s->mb_layout && s->keyframe)
2600  memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width * 4);
2601 
2602  memset(s->ref_count, 0, sizeof(s->ref_count));
2603 
2604  if (s->mb_layout == 1) {
2605  // Make sure the previous frame has read its segmentation map,
2606  // if we re-use the same map.
2607  if (prev_frame && s->segmentation.enabled &&
2609  ff_thread_await_progress(&prev_frame->tf, 1, 0);
2610  if (is_vp7)
2611  vp7_decode_mv_mb_modes(avctx, curframe, prev_frame);
2612  else
2613  vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
2614  }
2615 
2616  if (avctx->active_thread_type == FF_THREAD_FRAME)
2617  num_jobs = 1;
2618  else
2619  num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
2620  s->num_jobs = num_jobs;
2621  s->curframe = curframe;
2622  s->prev_frame = prev_frame;
2623  s->mv_min.y = -MARGIN;
2624  s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2625  for (i = 0; i < MAX_THREADS; i++) {
2626  s->thread_data[i].thread_mb_pos = 0;
2627  s->thread_data[i].wait_mb_pos = INT_MAX;
2628  }
2629  if (is_vp7)
2630  avctx->execute2(avctx, vp7_decode_mb_row_sliced, s->thread_data, NULL,
2631  num_jobs);
2632  else
2633  avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL,
2634  num_jobs);
2635 
2636  ff_thread_report_progress(&curframe->tf, INT_MAX, 0);
2637  memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
2638 
2639 skip_decode:
2640  // if future frames don't use the updated probabilities,
2641  // reset them to the values we saved
2642  if (!s->update_probabilities)
2643  s->prob[0] = s->prob[1];
2644 
2645  if (!s->invisible) {
2646  if ((ret = av_frame_ref(data, curframe->tf.f)) < 0)
2647  return ret;
2648  *got_frame = 1;
2649  }
2650 
2651  return avpkt->size;
2652 err:
2653  memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2654  return ret;
2655 }
2656 
2657 int ff_vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2658  AVPacket *avpkt)
2659 {
2660  return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP8);
2661 }
2662 
2663 #if CONFIG_VP7_DECODER
2664 static int vp7_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2665  AVPacket *avpkt)
2666 {
2667  return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP7);
2668 }
2669 #endif /* CONFIG_VP7_DECODER */
2670 
2672 {
2673  VP8Context *s = avctx->priv_data;
2674  int i;
2675 
2676  vp8_decode_flush_impl(avctx, 1);
2677  for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
2678  av_frame_free(&s->frames[i].tf.f);
2679 
2680  return 0;
2681 }
2682 
2684 {
2685  int i;
2686  for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) {
2687  s->frames[i].tf.f = av_frame_alloc();
2688  if (!s->frames[i].tf.f)
2689  return AVERROR(ENOMEM);
2690  }
2691  return 0;
2692 }
2693 
2694 static av_always_inline
2695 int vp78_decode_init(AVCodecContext *avctx, int is_vp7)
2696 {
2697  VP8Context *s = avctx->priv_data;
2698  int ret;
2699 
2700  s->avctx = avctx;
2701  s->vp7 = avctx->codec->id == AV_CODEC_ID_VP7;
2702  avctx->pix_fmt = AV_PIX_FMT_YUV420P;
2703  avctx->internal->allocate_progress = 1;
2704 
2705  ff_videodsp_init(&s->vdsp, 8);
2706 
2707  ff_vp78dsp_init(&s->vp8dsp);
2708  if (CONFIG_VP7_DECODER && is_vp7) {
2710  ff_vp7dsp_init(&s->vp8dsp);
2713  } else if (CONFIG_VP8_DECODER && !is_vp7) {
2715  ff_vp8dsp_init(&s->vp8dsp);
2718  }
2719 
2720  /* does not change for VP8 */
2721  memcpy(s->prob[0].scan, zigzag_scan, sizeof(s->prob[0].scan));
2722 
2723  if ((ret = vp8_init_frames(s)) < 0) {
2724  ff_vp8_decode_free(avctx);
2725  return ret;
2726  }
2727 
2728  return 0;
2729 }
2730 
2731 #if CONFIG_VP7_DECODER
2732 static int vp7_decode_init(AVCodecContext *avctx)
2733 {
2734  return vp78_decode_init(avctx, IS_VP7);
2735 }
2736 #endif /* CONFIG_VP7_DECODER */
2737 
2739 {
2740  return vp78_decode_init(avctx, IS_VP8);
2741 }
2742 
2743 #if CONFIG_VP8_DECODER
2744 static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
2745 {
2746  VP8Context *s = avctx->priv_data;
2747  int ret;
2748 
2749  s->avctx = avctx;
2750 
2751  if ((ret = vp8_init_frames(s)) < 0) {
2752  ff_vp8_decode_free(avctx);
2753  return ret;
2754  }
2755 
2756  return 0;
2757 }
2758 
2759 #define REBASE(pic) ((pic) ? (pic) - &s_src->frames[0] + &s->frames[0] : NULL)
2760 
2761 static int vp8_decode_update_thread_context(AVCodecContext *dst,
2762  const AVCodecContext *src)
2763 {
2764  VP8Context *s = dst->priv_data, *s_src = src->priv_data;
2765  int i;
2766 
2767  if (s->macroblocks_base &&
2768  (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
2769  free_buffers(s);
2770  s->mb_width = s_src->mb_width;
2771  s->mb_height = s_src->mb_height;
2772  }
2773 
2774  s->prob[0] = s_src->prob[!s_src->update_probabilities];
2775  s->segmentation = s_src->segmentation;
2776  s->lf_delta = s_src->lf_delta;
2777  memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
2778 
2779  for (i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++) {
2780  if (s_src->frames[i].tf.f->data[0]) {
2781  int ret = vp8_ref_frame(s, &s->frames[i], &s_src->frames[i]);
2782  if (ret < 0)
2783  return ret;
2784  }
2785  }
2786 
2787  s->framep[0] = REBASE(s_src->next_framep[0]);
2788  s->framep[1] = REBASE(s_src->next_framep[1]);
2789  s->framep[2] = REBASE(s_src->next_framep[2]);
2790  s->framep[3] = REBASE(s_src->next_framep[3]);
2791 
2792  return 0;
2793 }
2794 #endif /* CONFIG_VP8_DECODER */
2795 
2796 #if CONFIG_VP7_DECODER
2797 AVCodec ff_vp7_decoder = {
2798  .name = "vp7",
2799  .long_name = NULL_IF_CONFIG_SMALL("On2 VP7"),
2800  .type = AVMEDIA_TYPE_VIDEO,
2801  .id = AV_CODEC_ID_VP7,
2802  .priv_data_size = sizeof(VP8Context),
2803  .init = vp7_decode_init,
2805  .decode = vp7_decode_frame,
2806  .capabilities = CODEC_CAP_DR1,
2808 };
2809 #endif /* CONFIG_VP7_DECODER */
2810 
2811 #if CONFIG_VP8_DECODER
2812 AVCodec ff_vp8_decoder = {
2813  .name = "vp8",
2814  .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
2815  .type = AVMEDIA_TYPE_VIDEO,
2816  .id = AV_CODEC_ID_VP8,
2817  .priv_data_size = sizeof(VP8Context),
2823  .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
2824  .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),
2825 };
2826 #endif /* CONFIG_VP7_DECODER */