FFmpeg
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
proresenc_kostya.c
Go to the documentation of this file.
1 /*
2  * Apple ProRes encoder
3  *
4  * Copyright (c) 2012 Konstantin Shishkov
5  *
6  * This encoder appears to be based on Anatoliy Wassermans considering
7  * similarities in the bugs.
8  *
9  * This file is part of Libav.
10  *
11  * Libav is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public
13  * License as published by the Free Software Foundation; either
14  * version 2.1 of the License, or (at your option) any later version.
15  *
16  * Libav is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with Libav; if not, write to the Free Software
23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24  */
25 
26 #include "libavutil/opt.h"
27 #include "avcodec.h"
28 #include "dsputil.h"
29 #include "put_bits.h"
30 #include "bytestream.h"
31 #include "internal.h"
32 #include "proresdsp.h"
33 #include "proresdata.h"
34 
35 #define CFACTOR_Y422 2
36 #define CFACTOR_Y444 3
37 
38 #define MAX_MBS_PER_SLICE 8
39 
40 #define MAX_PLANES 3 // should be increased to 4 when there's AV_PIX_FMT_YUV444AP10
41 
42 enum {
47 };
48 
49 enum {
55 };
56 
57 static const uint8_t prores_quant_matrices[][64] = {
58  { // proxy
59  4, 7, 9, 11, 13, 14, 15, 63,
60  7, 7, 11, 12, 14, 15, 63, 63,
61  9, 11, 13, 14, 15, 63, 63, 63,
62  11, 11, 13, 14, 63, 63, 63, 63,
63  11, 13, 14, 63, 63, 63, 63, 63,
64  13, 14, 63, 63, 63, 63, 63, 63,
65  13, 63, 63, 63, 63, 63, 63, 63,
66  63, 63, 63, 63, 63, 63, 63, 63,
67  },
68  { // LT
69  4, 5, 6, 7, 9, 11, 13, 15,
70  5, 5, 7, 8, 11, 13, 15, 17,
71  6, 7, 9, 11, 13, 15, 15, 17,
72  7, 7, 9, 11, 13, 15, 17, 19,
73  7, 9, 11, 13, 14, 16, 19, 23,
74  9, 11, 13, 14, 16, 19, 23, 29,
75  9, 11, 13, 15, 17, 21, 28, 35,
76  11, 13, 16, 17, 21, 28, 35, 41,
77  },
78  { // standard
79  4, 4, 5, 5, 6, 7, 7, 9,
80  4, 4, 5, 6, 7, 7, 9, 9,
81  5, 5, 6, 7, 7, 9, 9, 10,
82  5, 5, 6, 7, 7, 9, 9, 10,
83  5, 6, 7, 7, 8, 9, 10, 12,
84  6, 7, 7, 8, 9, 10, 12, 15,
85  6, 7, 7, 9, 10, 11, 14, 17,
86  7, 7, 9, 10, 11, 14, 17, 21,
87  },
88  { // high quality
89  4, 4, 4, 4, 4, 4, 4, 4,
90  4, 4, 4, 4, 4, 4, 4, 4,
91  4, 4, 4, 4, 4, 4, 4, 4,
92  4, 4, 4, 4, 4, 4, 4, 5,
93  4, 4, 4, 4, 4, 4, 5, 5,
94  4, 4, 4, 4, 4, 5, 5, 6,
95  4, 4, 4, 4, 5, 5, 6, 7,
96  4, 4, 4, 4, 5, 6, 7, 7,
97  },
98  { // codec default
99  4, 4, 4, 4, 4, 4, 4, 4,
100  4, 4, 4, 4, 4, 4, 4, 4,
101  4, 4, 4, 4, 4, 4, 4, 4,
102  4, 4, 4, 4, 4, 4, 4, 4,
103  4, 4, 4, 4, 4, 4, 4, 4,
104  4, 4, 4, 4, 4, 4, 4, 4,
105  4, 4, 4, 4, 4, 4, 4, 4,
106  4, 4, 4, 4, 4, 4, 4, 4,
107  },
108 };
109 
110 #define NUM_MB_LIMITS 4
111 static const int prores_mb_limits[NUM_MB_LIMITS] = {
112  1620, // up to 720x576
113  2700, // up to 960x720
114  6075, // up to 1440x1080
115  9216, // up to 2048x1152
116 };
117 
118 static const struct prores_profile {
119  const char *full_name;
120  uint32_t tag;
124  int quant;
125 } prores_profile_info[4] = {
126  {
127  .full_name = "proxy",
128  .tag = MKTAG('a', 'p', 'c', 'o'),
129  .min_quant = 4,
130  .max_quant = 8,
131  .br_tab = { 300, 242, 220, 194 },
132  .quant = QUANT_MAT_PROXY,
133  },
134  {
135  .full_name = "LT",
136  .tag = MKTAG('a', 'p', 'c', 's'),
137  .min_quant = 1,
138  .max_quant = 9,
139  .br_tab = { 720, 560, 490, 440 },
140  .quant = QUANT_MAT_LT,
141  },
142  {
143  .full_name = "standard",
144  .tag = MKTAG('a', 'p', 'c', 'n'),
145  .min_quant = 1,
146  .max_quant = 6,
147  .br_tab = { 1050, 808, 710, 632 },
148  .quant = QUANT_MAT_STANDARD,
149  },
150  {
151  .full_name = "high quality",
152  .tag = MKTAG('a', 'p', 'c', 'h'),
153  .min_quant = 1,
154  .max_quant = 6,
155  .br_tab = { 1566, 1216, 1070, 950 },
156  .quant = QUANT_MAT_HQ,
157  }
158 // for 4444 profile bitrate numbers are { 2350, 1828, 1600, 1425 }
159 };
160 
161 #define TRELLIS_WIDTH 16
162 #define SCORE_LIMIT INT_MAX / 2
163 
164 struct TrellisNode {
166  int quant;
167  int bits;
168  int score;
169 };
170 
171 #define MAX_STORED_Q 16
172 
173 typedef struct ProresThreadData {
174  DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
175  DECLARE_ALIGNED(16, uint16_t, emu_buf)[16 * 16];
176  int16_t custom_q[64];
179 
180 typedef struct ProresContext {
181  AVClass *class;
183  DECLARE_ALIGNED(16, uint16_t, emu_buf)[16*16];
184  int16_t quants[MAX_STORED_Q][64];
185  int16_t custom_q[64];
187 
190 
196  int pictures_per_frame; // 1 for progressive, 2 for interlaced
201 
202  char *vendor;
204 
206 
207  int profile;
209 
210  int *slice_q;
211 
213 } ProresContext;
214 
215 static void get_slice_data(ProresContext *ctx, const uint16_t *src,
216  int linesize, int x, int y, int w, int h,
217  int16_t *blocks, uint16_t *emu_buf,
218  int mbs_per_slice, int blocks_per_mb, int is_chroma)
219 {
220  const uint16_t *esrc;
221  const int mb_width = 4 * blocks_per_mb;
222  int elinesize;
223  int i, j, k;
224 
225  for (i = 0; i < mbs_per_slice; i++, src += mb_width) {
226  if (x >= w) {
227  memset(blocks, 0, 64 * (mbs_per_slice - i) * blocks_per_mb
228  * sizeof(*blocks));
229  return;
230  }
231  if (x + mb_width <= w && y + 16 <= h) {
232  esrc = src;
233  elinesize = linesize;
234  } else {
235  int bw, bh, pix;
236 
237  esrc = emu_buf;
238  elinesize = 16 * sizeof(*emu_buf);
239 
240  bw = FFMIN(w - x, mb_width);
241  bh = FFMIN(h - y, 16);
242 
243  for (j = 0; j < bh; j++) {
244  memcpy(emu_buf + j * 16,
245  (const uint8_t*)src + j * linesize,
246  bw * sizeof(*src));
247  pix = emu_buf[j * 16 + bw - 1];
248  for (k = bw; k < mb_width; k++)
249  emu_buf[j * 16 + k] = pix;
250  }
251  for (; j < 16; j++)
252  memcpy(emu_buf + j * 16,
253  emu_buf + (bh - 1) * 16,
254  mb_width * sizeof(*emu_buf));
255  }
256  if (!is_chroma) {
257  ctx->dsp.fdct(esrc, elinesize, blocks);
258  blocks += 64;
259  if (blocks_per_mb > 2) {
260  ctx->dsp.fdct(esrc + 8, elinesize, blocks);
261  blocks += 64;
262  }
263  ctx->dsp.fdct(esrc + elinesize * 4, elinesize, blocks);
264  blocks += 64;
265  if (blocks_per_mb > 2) {
266  ctx->dsp.fdct(esrc + elinesize * 4 + 8, elinesize, blocks);
267  blocks += 64;
268  }
269  } else {
270  ctx->dsp.fdct(esrc, elinesize, blocks);
271  blocks += 64;
272  ctx->dsp.fdct(esrc + elinesize * 4, elinesize, blocks);
273  blocks += 64;
274  if (blocks_per_mb > 2) {
275  ctx->dsp.fdct(esrc + 8, elinesize, blocks);
276  blocks += 64;
277  ctx->dsp.fdct(esrc + elinesize * 4 + 8, elinesize, blocks);
278  blocks += 64;
279  }
280  }
281 
282  x += mb_width;
283  }
284 }
285 
286 /**
287  * Write an unsigned rice/exp golomb codeword.
288  */
289 static inline void encode_vlc_codeword(PutBitContext *pb, unsigned codebook, int val)
290 {
291  unsigned int rice_order, exp_order, switch_bits, switch_val;
292  int exponent;
293 
294  /* number of prefix bits to switch between Rice and expGolomb */
295  switch_bits = (codebook & 3) + 1;
296  rice_order = codebook >> 5; /* rice code order */
297  exp_order = (codebook >> 2) & 7; /* exp golomb code order */
298 
299  switch_val = switch_bits << rice_order;
300 
301  if (val >= switch_val) {
302  val -= switch_val - (1 << exp_order);
303  exponent = av_log2(val);
304 
305  put_bits(pb, exponent - exp_order + switch_bits, 0);
306  put_bits(pb, exponent + 1, val);
307  } else {
308  exponent = val >> rice_order;
309 
310  if (exponent)
311  put_bits(pb, exponent, 0);
312  put_bits(pb, 1, 1);
313  if (rice_order)
314  put_sbits(pb, rice_order, val);
315  }
316 }
317 
318 #define GET_SIGN(x) ((x) >> 31)
319 #define MAKE_CODE(x) (((x) << 1) ^ GET_SIGN(x))
320 
321 static void encode_dcs(PutBitContext *pb, int16_t *blocks,
322  int blocks_per_slice, int scale)
323 {
324  int i;
325  int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
326 
327  prev_dc = (blocks[0] - 0x4000) / scale;
329  sign = 0;
330  codebook = 3;
331  blocks += 64;
332 
333  for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
334  dc = (blocks[0] - 0x4000) / scale;
335  delta = dc - prev_dc;
336  new_sign = GET_SIGN(delta);
337  delta = (delta ^ sign) - sign;
338  code = MAKE_CODE(delta);
339  encode_vlc_codeword(pb, ff_prores_dc_codebook[codebook], code);
340  codebook = (code + (code & 1)) >> 1;
341  codebook = FFMIN(codebook, 3);
342  sign = new_sign;
343  prev_dc = dc;
344  }
345 }
346 
347 static void encode_acs(PutBitContext *pb, int16_t *blocks,
348  int blocks_per_slice,
349  int plane_size_factor,
350  const uint8_t *scan, const int16_t *qmat)
351 {
352  int idx, i;
353  int run, level, run_cb, lev_cb;
354  int max_coeffs, abs_level;
355 
356  max_coeffs = blocks_per_slice << 6;
357  run_cb = ff_prores_run_to_cb_index[4];
358  lev_cb = ff_prores_lev_to_cb_index[2];
359  run = 0;
360 
361  for (i = 1; i < 64; i++) {
362  for (idx = scan[i]; idx < max_coeffs; idx += 64) {
363  level = blocks[idx] / qmat[scan[i]];
364  if (level) {
365  abs_level = FFABS(level);
366  encode_vlc_codeword(pb, ff_prores_ac_codebook[run_cb], run);
368  abs_level - 1);
369  put_sbits(pb, 1, GET_SIGN(level));
370 
371  run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
372  lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
373  run = 0;
374  } else {
375  run++;
376  }
377  }
378  }
379 }
380 
382  const uint16_t *src, int linesize,
383  int mbs_per_slice, int16_t *blocks,
384  int blocks_per_mb, int plane_size_factor,
385  const int16_t *qmat)
386 {
387  int blocks_per_slice, saved_pos;
388 
389  saved_pos = put_bits_count(pb);
390  blocks_per_slice = mbs_per_slice * blocks_per_mb;
391 
392  encode_dcs(pb, blocks, blocks_per_slice, qmat[0]);
393  encode_acs(pb, blocks, blocks_per_slice, plane_size_factor,
394  ctx->scantable.permutated, qmat);
395  flush_put_bits(pb);
396 
397  return (put_bits_count(pb) - saved_pos) >> 3;
398 }
399 
400 static int encode_slice(AVCodecContext *avctx, const AVFrame *pic,
401  PutBitContext *pb,
402  int sizes[4], int x, int y, int quant,
403  int mbs_per_slice)
404 {
405  ProresContext *ctx = avctx->priv_data;
406  int i, xp, yp;
407  int total_size = 0;
408  const uint16_t *src;
409  int slice_width_factor = av_log2(mbs_per_slice);
410  int num_cblocks, pwidth, linesize, line_add;
411  int plane_factor, is_chroma;
412  uint16_t *qmat;
413 
414  if (ctx->pictures_per_frame == 1)
415  line_add = 0;
416  else
417  line_add = ctx->cur_picture_idx ^ !pic->top_field_first;
418 
419  if (ctx->force_quant) {
420  qmat = ctx->quants[0];
421  } else if (quant < MAX_STORED_Q) {
422  qmat = ctx->quants[quant];
423  } else {
424  qmat = ctx->custom_q;
425  for (i = 0; i < 64; i++)
426  qmat[i] = ctx->quant_mat[i] * quant;
427  }
428 
429  for (i = 0; i < ctx->num_planes; i++) {
430  is_chroma = (i == 1 || i == 2);
431  plane_factor = slice_width_factor + 2;
432  if (is_chroma)
433  plane_factor += ctx->chroma_factor - 3;
434  if (!is_chroma || ctx->chroma_factor == CFACTOR_Y444) {
435  xp = x << 4;
436  yp = y << 4;
437  num_cblocks = 4;
438  pwidth = avctx->width;
439  } else {
440  xp = x << 3;
441  yp = y << 4;
442  num_cblocks = 2;
443  pwidth = avctx->width >> 1;
444  }
445 
446  linesize = pic->linesize[i] * ctx->pictures_per_frame;
447  src = (const uint16_t*)(pic->data[i] + yp * linesize +
448  line_add * pic->linesize[i]) + xp;
449 
450  get_slice_data(ctx, src, linesize, xp, yp,
451  pwidth, avctx->height / ctx->pictures_per_frame,
452  ctx->blocks[0], ctx->emu_buf,
453  mbs_per_slice, num_cblocks, is_chroma);
454  sizes[i] = encode_slice_plane(ctx, pb, src, linesize,
455  mbs_per_slice, ctx->blocks[0],
456  num_cblocks, plane_factor,
457  qmat);
458  total_size += sizes[i];
459  }
460  return total_size;
461 }
462 
463 static inline int estimate_vlc(unsigned codebook, int val)
464 {
465  unsigned int rice_order, exp_order, switch_bits, switch_val;
466  int exponent;
467 
468  /* number of prefix bits to switch between Rice and expGolomb */
469  switch_bits = (codebook & 3) + 1;
470  rice_order = codebook >> 5; /* rice code order */
471  exp_order = (codebook >> 2) & 7; /* exp golomb code order */
472 
473  switch_val = switch_bits << rice_order;
474 
475  if (val >= switch_val) {
476  val -= switch_val - (1 << exp_order);
477  exponent = av_log2(val);
478 
479  return exponent * 2 - exp_order + switch_bits + 1;
480  } else {
481  return (val >> rice_order) + rice_order + 1;
482  }
483 }
484 
485 static int estimate_dcs(int *error, int16_t *blocks, int blocks_per_slice,
486  int scale)
487 {
488  int i;
489  int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
490  int bits;
491 
492  prev_dc = (blocks[0] - 0x4000) / scale;
493  bits = estimate_vlc(FIRST_DC_CB, MAKE_CODE(prev_dc));
494  sign = 0;
495  codebook = 3;
496  blocks += 64;
497  *error += FFABS(blocks[0] - 0x4000) % scale;
498 
499  for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
500  dc = (blocks[0] - 0x4000) / scale;
501  *error += FFABS(blocks[0] - 0x4000) % scale;
502  delta = dc - prev_dc;
503  new_sign = GET_SIGN(delta);
504  delta = (delta ^ sign) - sign;
505  code = MAKE_CODE(delta);
506  bits += estimate_vlc(ff_prores_dc_codebook[codebook], code);
507  codebook = (code + (code & 1)) >> 1;
508  codebook = FFMIN(codebook, 3);
509  sign = new_sign;
510  prev_dc = dc;
511  }
512 
513  return bits;
514 }
515 
516 static int estimate_acs(int *error, int16_t *blocks, int blocks_per_slice,
517  int plane_size_factor,
518  const uint8_t *scan, const int16_t *qmat)
519 {
520  int idx, i;
521  int run, level, run_cb, lev_cb;
522  int max_coeffs, abs_level;
523  int bits = 0;
524 
525  max_coeffs = blocks_per_slice << 6;
526  run_cb = ff_prores_run_to_cb_index[4];
527  lev_cb = ff_prores_lev_to_cb_index[2];
528  run = 0;
529 
530  for (i = 1; i < 64; i++) {
531  for (idx = scan[i]; idx < max_coeffs; idx += 64) {
532  level = blocks[idx] / qmat[scan[i]];
533  *error += FFABS(blocks[idx]) % qmat[scan[i]];
534  if (level) {
535  abs_level = FFABS(level);
536  bits += estimate_vlc(ff_prores_ac_codebook[run_cb], run);
537  bits += estimate_vlc(ff_prores_ac_codebook[lev_cb],
538  abs_level - 1) + 1;
539 
540  run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
541  lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
542  run = 0;
543  } else {
544  run++;
545  }
546  }
547  }
548 
549  return bits;
550 }
551 
552 static int estimate_slice_plane(ProresContext *ctx, int *error, int plane,
553  const uint16_t *src, int linesize,
554  int mbs_per_slice,
555  int blocks_per_mb, int plane_size_factor,
556  const int16_t *qmat, ProresThreadData *td)
557 {
558  int blocks_per_slice;
559  int bits;
560 
561  blocks_per_slice = mbs_per_slice * blocks_per_mb;
562 
563  bits = estimate_dcs(error, td->blocks[plane], blocks_per_slice, qmat[0]);
564  bits += estimate_acs(error, td->blocks[plane], blocks_per_slice,
565  plane_size_factor, ctx->scantable.permutated, qmat);
566 
567  return FFALIGN(bits, 8);
568 }
569 
570 static int find_slice_quant(AVCodecContext *avctx, const AVFrame *pic,
571  int trellis_node, int x, int y, int mbs_per_slice,
572  ProresThreadData *td)
573 {
574  ProresContext *ctx = avctx->priv_data;
575  int i, q, pq, xp, yp;
576  const uint16_t *src;
577  int slice_width_factor = av_log2(mbs_per_slice);
578  int num_cblocks[MAX_PLANES], pwidth;
579  int plane_factor[MAX_PLANES], is_chroma[MAX_PLANES];
580  const int min_quant = ctx->profile_info->min_quant;
581  const int max_quant = ctx->profile_info->max_quant;
582  int error, bits, bits_limit;
583  int mbs, prev, cur, new_score;
584  int slice_bits[TRELLIS_WIDTH], slice_score[TRELLIS_WIDTH];
585  int overquant;
586  uint16_t *qmat;
587  int linesize[4], line_add;
588 
589  if (ctx->pictures_per_frame == 1)
590  line_add = 0;
591  else
592  line_add = ctx->cur_picture_idx ^ !pic->top_field_first;
593  mbs = x + mbs_per_slice;
594 
595  for (i = 0; i < ctx->num_planes; i++) {
596  is_chroma[i] = (i == 1 || i == 2);
597  plane_factor[i] = slice_width_factor + 2;
598  if (is_chroma[i])
599  plane_factor[i] += ctx->chroma_factor - 3;
600  if (!is_chroma[i] || ctx->chroma_factor == CFACTOR_Y444) {
601  xp = x << 4;
602  yp = y << 4;
603  num_cblocks[i] = 4;
604  pwidth = avctx->width;
605  } else {
606  xp = x << 3;
607  yp = y << 4;
608  num_cblocks[i] = 2;
609  pwidth = avctx->width >> 1;
610  }
611 
612  linesize[i] = pic->linesize[i] * ctx->pictures_per_frame;
613  src = (const uint16_t*)(pic->data[i] + yp * linesize[i] +
614  line_add * pic->linesize[i]) + xp;
615 
616  get_slice_data(ctx, src, linesize[i], xp, yp,
617  pwidth, avctx->height / ctx->pictures_per_frame,
618  td->blocks[i], td->emu_buf,
619  mbs_per_slice, num_cblocks[i], is_chroma[i]);
620  }
621 
622  for (q = min_quant; q < max_quant + 2; q++) {
623  td->nodes[trellis_node + q].prev_node = -1;
624  td->nodes[trellis_node + q].quant = q;
625  }
626 
627  // todo: maybe perform coarser quantising to fit into frame size when needed
628  for (q = min_quant; q <= max_quant; q++) {
629  bits = 0;
630  error = 0;
631  for (i = 0; i < ctx->num_planes; i++) {
632  bits += estimate_slice_plane(ctx, &error, i,
633  src, linesize[i],
634  mbs_per_slice,
635  num_cblocks[i], plane_factor[i],
636  ctx->quants[q], td);
637  }
638  if (bits > 65000 * 8) {
639  error = SCORE_LIMIT;
640  break;
641  }
642  slice_bits[q] = bits;
643  slice_score[q] = error;
644  }
645  if (slice_bits[max_quant] <= ctx->bits_per_mb * mbs_per_slice) {
646  slice_bits[max_quant + 1] = slice_bits[max_quant];
647  slice_score[max_quant + 1] = slice_score[max_quant] + 1;
648  overquant = max_quant;
649  } else {
650  for (q = max_quant + 1; q < 128; q++) {
651  bits = 0;
652  error = 0;
653  if (q < MAX_STORED_Q) {
654  qmat = ctx->quants[q];
655  } else {
656  qmat = td->custom_q;
657  for (i = 0; i < 64; i++)
658  qmat[i] = ctx->quant_mat[i] * q;
659  }
660  for (i = 0; i < ctx->num_planes; i++) {
661  bits += estimate_slice_plane(ctx, &error, i,
662  src, linesize[i],
663  mbs_per_slice,
664  num_cblocks[i], plane_factor[i],
665  qmat, td);
666  }
667  if (bits <= ctx->bits_per_mb * mbs_per_slice)
668  break;
669  }
670 
671  slice_bits[max_quant + 1] = bits;
672  slice_score[max_quant + 1] = error;
673  overquant = q;
674  }
675  td->nodes[trellis_node + max_quant + 1].quant = overquant;
676 
677  bits_limit = mbs * ctx->bits_per_mb;
678  for (pq = min_quant; pq < max_quant + 2; pq++) {
679  prev = trellis_node - TRELLIS_WIDTH + pq;
680 
681  for (q = min_quant; q < max_quant + 2; q++) {
682  cur = trellis_node + q;
683 
684  bits = td->nodes[prev].bits + slice_bits[q];
685  error = slice_score[q];
686  if (bits > bits_limit)
687  error = SCORE_LIMIT;
688 
689  if (td->nodes[prev].score < SCORE_LIMIT && error < SCORE_LIMIT)
690  new_score = td->nodes[prev].score + error;
691  else
692  new_score = SCORE_LIMIT;
693  if (td->nodes[cur].prev_node == -1 ||
694  td->nodes[cur].score >= new_score) {
695 
696  td->nodes[cur].bits = bits;
697  td->nodes[cur].score = new_score;
698  td->nodes[cur].prev_node = prev;
699  }
700  }
701  }
702 
703  error = td->nodes[trellis_node + min_quant].score;
704  pq = trellis_node + min_quant;
705  for (q = min_quant + 1; q < max_quant + 2; q++) {
706  if (td->nodes[trellis_node + q].score <= error) {
707  error = td->nodes[trellis_node + q].score;
708  pq = trellis_node + q;
709  }
710  }
711 
712  return pq;
713 }
714 
715 static int find_quant_thread(AVCodecContext *avctx, void *arg,
716  int jobnr, int threadnr)
717 {
718  ProresContext *ctx = avctx->priv_data;
719  ProresThreadData *td = ctx->tdata + threadnr;
720  int mbs_per_slice = ctx->mbs_per_slice;
721  int x, y = jobnr, mb, q = 0;
722 
723  for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
724  while (ctx->mb_width - x < mbs_per_slice)
725  mbs_per_slice >>= 1;
726  q = find_slice_quant(avctx, avctx->coded_frame,
727  (mb + 1) * TRELLIS_WIDTH, x, y,
728  mbs_per_slice, td);
729  }
730 
731  for (x = ctx->slices_width - 1; x >= 0; x--) {
732  ctx->slice_q[x + y * ctx->slices_width] = td->nodes[q].quant;
733  q = td->nodes[q].prev_node;
734  }
735 
736  return 0;
737 }
738 
740  const AVFrame *pic, int *got_packet)
741 {
742  ProresContext *ctx = avctx->priv_data;
743  uint8_t *orig_buf, *buf, *slice_hdr, *slice_sizes, *tmp;
744  uint8_t *picture_size_pos;
745  PutBitContext pb;
746  int x, y, i, mb, q = 0;
747  int sizes[4] = { 0 };
748  int slice_hdr_size = 2 + 2 * (ctx->num_planes - 1);
749  int frame_size, picture_size, slice_size;
750  int pkt_size, ret;
751  uint8_t frame_flags;
752 
753  *avctx->coded_frame = *pic;
755  avctx->coded_frame->key_frame = 1;
756 
757  pkt_size = ctx->frame_size_upper_bound + FF_MIN_BUFFER_SIZE;
758 
759  if ((ret = ff_alloc_packet2(avctx, pkt, pkt_size)) < 0)
760  return ret;
761 
762  orig_buf = pkt->data;
763 
764  // frame atom
765  orig_buf += 4; // frame size
766  bytestream_put_be32 (&orig_buf, FRAME_ID); // frame container ID
767  buf = orig_buf;
768 
769  // frame header
770  tmp = buf;
771  buf += 2; // frame header size will be stored here
772  bytestream_put_be16 (&buf, 0); // version 1
773  bytestream_put_buffer(&buf, ctx->vendor, 4);
774  bytestream_put_be16 (&buf, avctx->width);
775  bytestream_put_be16 (&buf, avctx->height);
776 
777  frame_flags = ctx->chroma_factor << 6;
778  if (avctx->flags & CODEC_FLAG_INTERLACED_DCT)
779  frame_flags |= pic->top_field_first ? 0x04 : 0x08;
780  bytestream_put_byte (&buf, frame_flags);
781 
782  bytestream_put_byte (&buf, 0); // reserved
783  bytestream_put_byte (&buf, avctx->color_primaries);
784  bytestream_put_byte (&buf, avctx->color_trc);
785  bytestream_put_byte (&buf, avctx->colorspace);
786  bytestream_put_byte (&buf, 0x40); // source format and alpha information
787  bytestream_put_byte (&buf, 0); // reserved
788  if (ctx->quant_sel != QUANT_MAT_DEFAULT) {
789  bytestream_put_byte (&buf, 0x03); // matrix flags - both matrices are present
790  // luma quantisation matrix
791  for (i = 0; i < 64; i++)
792  bytestream_put_byte(&buf, ctx->quant_mat[i]);
793  // chroma quantisation matrix
794  for (i = 0; i < 64; i++)
795  bytestream_put_byte(&buf, ctx->quant_mat[i]);
796  } else {
797  bytestream_put_byte (&buf, 0x00); // matrix flags - default matrices are used
798  }
799  bytestream_put_be16 (&tmp, buf - orig_buf); // write back frame header size
800 
801  for (ctx->cur_picture_idx = 0;
803  ctx->cur_picture_idx++) {
804  // picture header
805  picture_size_pos = buf + 1;
806  bytestream_put_byte (&buf, 0x40); // picture header size (in bits)
807  buf += 4; // picture data size will be stored here
808  bytestream_put_be16 (&buf, ctx->slices_per_picture);
809  bytestream_put_byte (&buf, av_log2(ctx->mbs_per_slice) << 4); // slice width and height in MBs
810 
811  // seek table - will be filled during slice encoding
812  slice_sizes = buf;
813  buf += ctx->slices_per_picture * 2;
814 
815  // slices
816  if (!ctx->force_quant) {
817  ret = avctx->execute2(avctx, find_quant_thread, NULL, NULL,
818  ctx->mb_height);
819  if (ret)
820  return ret;
821  }
822 
823  for (y = 0; y < ctx->mb_height; y++) {
824  int mbs_per_slice = ctx->mbs_per_slice;
825  for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
826  q = ctx->force_quant ? ctx->force_quant
827  : ctx->slice_q[mb + y * ctx->slices_width];
828 
829  while (ctx->mb_width - x < mbs_per_slice)
830  mbs_per_slice >>= 1;
831 
832  bytestream_put_byte(&buf, slice_hdr_size << 3);
833  slice_hdr = buf;
834  buf += slice_hdr_size - 1;
835  init_put_bits(&pb, buf, (pkt_size - (buf - orig_buf)) * 8);
836  encode_slice(avctx, pic, &pb, sizes, x, y, q, mbs_per_slice);
837 
838  bytestream_put_byte(&slice_hdr, q);
839  slice_size = slice_hdr_size + sizes[ctx->num_planes - 1];
840  for (i = 0; i < ctx->num_planes - 1; i++) {
841  bytestream_put_be16(&slice_hdr, sizes[i]);
842  slice_size += sizes[i];
843  }
844  bytestream_put_be16(&slice_sizes, slice_size);
845  buf += slice_size - slice_hdr_size;
846  }
847  }
848 
849  picture_size = buf - (picture_size_pos - 1);
850  bytestream_put_be32(&picture_size_pos, picture_size);
851  }
852 
853  orig_buf -= 8;
854  frame_size = buf - orig_buf;
855  bytestream_put_be32(&orig_buf, frame_size);
856 
857  pkt->size = frame_size;
858  pkt->flags |= AV_PKT_FLAG_KEY;
859  *got_packet = 1;
860 
861  return 0;
862 }
863 
865 {
866  ProresContext *ctx = avctx->priv_data;
867  int i;
868 
869  av_freep(&avctx->coded_frame);
870 
871  if (ctx->tdata) {
872  for (i = 0; i < avctx->thread_count; i++)
873  av_free(ctx->tdata[i].nodes);
874  }
875  av_freep(&ctx->tdata);
876  av_freep(&ctx->slice_q);
877 
878  return 0;
879 }
880 
882 {
883  ProresContext *ctx = avctx->priv_data;
884  int mps;
885  int i, j;
886  int min_quant, max_quant;
887  int interlaced = !!(avctx->flags & CODEC_FLAG_INTERLACED_DCT);
888 
889  avctx->bits_per_raw_sample = 10;
890  avctx->coded_frame = avcodec_alloc_frame();
891  if (!avctx->coded_frame)
892  return AVERROR(ENOMEM);
893 
894  ff_proresdsp_init(&ctx->dsp, avctx);
895  ff_init_scantable(ctx->dsp.dct_permutation, &ctx->scantable,
896  interlaced ? ff_prores_interlaced_scan
898 
899  mps = ctx->mbs_per_slice;
900  if (mps & (mps - 1)) {
901  av_log(avctx, AV_LOG_ERROR,
902  "there should be an integer power of two MBs per slice\n");
903  return AVERROR(EINVAL);
904  }
905 
907  ? CFACTOR_Y422
908  : CFACTOR_Y444;
910  ctx->num_planes = 3;
911 
912  ctx->mb_width = FFALIGN(avctx->width, 16) >> 4;
913 
914  if (interlaced)
915  ctx->mb_height = FFALIGN(avctx->height, 32) >> 5;
916  else
917  ctx->mb_height = FFALIGN(avctx->height, 16) >> 4;
918 
919  ctx->slices_width = ctx->mb_width / mps;
920  ctx->slices_width += av_popcount(ctx->mb_width - ctx->slices_width * mps);
921  ctx->slices_per_picture = ctx->mb_height * ctx->slices_width;
922  ctx->pictures_per_frame = 1 + interlaced;
923 
924  if (ctx->quant_sel == -1)
926  else
928 
929  if (strlen(ctx->vendor) != 4) {
930  av_log(avctx, AV_LOG_ERROR, "vendor ID should be 4 bytes\n");
931  return AVERROR_INVALIDDATA;
932  }
933 
934  ctx->force_quant = avctx->global_quality / FF_QP2LAMBDA;
935  if (!ctx->force_quant) {
936  if (!ctx->bits_per_mb) {
937  for (i = 0; i < NUM_MB_LIMITS - 1; i++)
938  if (prores_mb_limits[i] >= ctx->mb_width * ctx->mb_height *
939  ctx->pictures_per_frame)
940  break;
941  ctx->bits_per_mb = ctx->profile_info->br_tab[i];
942  } else if (ctx->bits_per_mb < 128) {
943  av_log(avctx, AV_LOG_ERROR, "too few bits per MB, please set at least 128\n");
944  return AVERROR_INVALIDDATA;
945  }
946 
947  min_quant = ctx->profile_info->min_quant;
948  max_quant = ctx->profile_info->max_quant;
949  for (i = min_quant; i < MAX_STORED_Q; i++) {
950  for (j = 0; j < 64; j++)
951  ctx->quants[i][j] = ctx->quant_mat[j] * i;
952  }
953 
954  ctx->slice_q = av_malloc(ctx->slices_per_picture * sizeof(*ctx->slice_q));
955  if (!ctx->slice_q) {
956  encode_close(avctx);
957  return AVERROR(ENOMEM);
958  }
959 
960  ctx->tdata = av_mallocz(avctx->thread_count * sizeof(*ctx->tdata));
961  if (!ctx->tdata) {
962  encode_close(avctx);
963  return AVERROR(ENOMEM);
964  }
965 
966  for (j = 0; j < avctx->thread_count; j++) {
967  ctx->tdata[j].nodes = av_malloc((ctx->slices_width + 1)
968  * TRELLIS_WIDTH
969  * sizeof(*ctx->tdata->nodes));
970  if (!ctx->tdata[j].nodes) {
971  encode_close(avctx);
972  return AVERROR(ENOMEM);
973  }
974  for (i = min_quant; i < max_quant + 2; i++) {
975  ctx->tdata[j].nodes[i].prev_node = -1;
976  ctx->tdata[j].nodes[i].bits = 0;
977  ctx->tdata[j].nodes[i].score = 0;
978  }
979  }
980  } else {
981  int ls = 0;
982 
983  if (ctx->force_quant > 64) {
984  av_log(avctx, AV_LOG_ERROR, "too large quantiser, maximum is 64\n");
985  return AVERROR_INVALIDDATA;
986  }
987 
988  for (j = 0; j < 64; j++) {
989  ctx->quants[0][j] = ctx->quant_mat[j] * ctx->force_quant;
990  ls += av_log2((1 << 11) / ctx->quants[0][j]) * 2 + 1;
991  }
992 
993  ctx->bits_per_mb = ls * 8;
994  if (ctx->chroma_factor == CFACTOR_Y444)
995  ctx->bits_per_mb += ls * 4;
996  if (ctx->num_planes == 4)
997  ctx->bits_per_mb += ls * 4;
998  }
999 
1001  ctx->slices_per_picture *
1002  (2 + 2 * ctx->num_planes +
1003  (mps * ctx->bits_per_mb) / 8)
1004  + 200;
1005 
1006  avctx->codec_tag = ctx->profile_info->tag;
1007 
1008  av_log(avctx, AV_LOG_DEBUG,
1009  "profile %d, %d slices, interlacing: %s, %d bits per MB\n",
1010  ctx->profile, ctx->slices_per_picture * ctx->pictures_per_frame,
1011  interlaced ? "yes" : "no", ctx->bits_per_mb);
1012  av_log(avctx, AV_LOG_DEBUG, "frame size upper bound: %d\n",
1013  ctx->frame_size_upper_bound);
1014 
1015  return 0;
1016 }
1017 
1018 #define OFFSET(x) offsetof(ProresContext, x)
1019 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
1020 
1021 static const AVOption options[] = {
1022  { "mbs_per_slice", "macroblocks per slice", OFFSET(mbs_per_slice),
1023  AV_OPT_TYPE_INT, { .i64 = 8 }, 1, MAX_MBS_PER_SLICE, VE },
1024  { "profile", NULL, OFFSET(profile), AV_OPT_TYPE_INT,
1025  { .i64 = PRORES_PROFILE_STANDARD },
1026  PRORES_PROFILE_PROXY, PRORES_PROFILE_HQ, VE, "profile" },
1027  { "proxy", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_PROXY },
1028  0, 0, VE, "profile" },
1029  { "lt", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_LT },
1030  0, 0, VE, "profile" },
1031  { "standard", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_STANDARD },
1032  0, 0, VE, "profile" },
1033  { "hq", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_HQ },
1034  0, 0, VE, "profile" },
1035  { "vendor", "vendor ID", OFFSET(vendor),
1036  AV_OPT_TYPE_STRING, { .str = "Lavc" }, CHAR_MIN, CHAR_MAX, VE },
1037  { "bits_per_mb", "desired bits per macroblock", OFFSET(bits_per_mb),
1038  AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 8192, VE },
1039  { "quant_mat", "quantiser matrix", OFFSET(quant_sel), AV_OPT_TYPE_INT,
1040  { .i64 = -1 }, -1, QUANT_MAT_DEFAULT, VE, "quant_mat" },
1041  { "auto", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = -1 },
1042  0, 0, VE, "quant_mat" },
1043  { "proxy", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_PROXY },
1044  0, 0, VE, "quant_mat" },
1045  { "lt", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_LT },
1046  0, 0, VE, "quant_mat" },
1047  { "standard", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_STANDARD },
1048  0, 0, VE, "quant_mat" },
1049  { "hq", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_HQ },
1050  0, 0, VE, "quant_mat" },
1051  { "default", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_DEFAULT },
1052  0, 0, VE, "quant_mat" },
1053  { NULL }
1054 };
1055 
1056 static const AVClass proresenc_class = {
1057  .class_name = "ProRes encoder",
1058  .item_name = av_default_item_name,
1059  .option = options,
1060  .version = LIBAVUTIL_VERSION_INT,
1061 };
1062 
1064  .name = "prores_kostya",
1065  .type = AVMEDIA_TYPE_VIDEO,
1066  .id = AV_CODEC_ID_PRORES,
1067  .priv_data_size = sizeof(ProresContext),
1068  .init = encode_init,
1069  .close = encode_close,
1070  .encode2 = encode_frame,
1071  .capabilities = CODEC_CAP_SLICE_THREADS,
1072  .long_name = NULL_IF_CONFIG_SMALL("Apple ProRes (iCodec Pro)"),
1073  .pix_fmts = (const enum AVPixelFormat[]) {
1075  },
1076  .priv_class = &proresenc_class,
1077 };