FFmpeg
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
proresenc_kostya.c
Go to the documentation of this file.
1 /*
2  * Apple ProRes encoder
3  *
4  * Copyright (c) 2012 Konstantin Shishkov
5  *
6  * This encoder appears to be based on Anatoliy Wassermans considering
7  * similarities in the bugs.
8  *
9  * This file is part of Libav.
10  *
11  * Libav is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public
13  * License as published by the Free Software Foundation; either
14  * version 2.1 of the License, or (at your option) any later version.
15  *
16  * Libav is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with Libav; if not, write to the Free Software
23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24  */
25 
26 #include "libavutil/opt.h"
27 #include "avcodec.h"
28 #include "put_bits.h"
29 #include "bytestream.h"
30 #include "internal.h"
31 #include "proresdsp.h"
32 #include "proresdata.h"
33 
34 #define CFACTOR_Y422 2
35 #define CFACTOR_Y444 3
36 
37 #define MAX_MBS_PER_SLICE 8
38 
39 #define MAX_PLANES 3 // should be increased to 4 when there's AV_PIX_FMT_YUV444AP10
40 
41 enum {
46 };
47 
48 enum {
54 };
55 
56 static const uint8_t prores_quant_matrices[][64] = {
57  { // proxy
58  4, 7, 9, 11, 13, 14, 15, 63,
59  7, 7, 11, 12, 14, 15, 63, 63,
60  9, 11, 13, 14, 15, 63, 63, 63,
61  11, 11, 13, 14, 63, 63, 63, 63,
62  11, 13, 14, 63, 63, 63, 63, 63,
63  13, 14, 63, 63, 63, 63, 63, 63,
64  13, 63, 63, 63, 63, 63, 63, 63,
65  63, 63, 63, 63, 63, 63, 63, 63,
66  },
67  { // LT
68  4, 5, 6, 7, 9, 11, 13, 15,
69  5, 5, 7, 8, 11, 13, 15, 17,
70  6, 7, 9, 11, 13, 15, 15, 17,
71  7, 7, 9, 11, 13, 15, 17, 19,
72  7, 9, 11, 13, 14, 16, 19, 23,
73  9, 11, 13, 14, 16, 19, 23, 29,
74  9, 11, 13, 15, 17, 21, 28, 35,
75  11, 13, 16, 17, 21, 28, 35, 41,
76  },
77  { // standard
78  4, 4, 5, 5, 6, 7, 7, 9,
79  4, 4, 5, 6, 7, 7, 9, 9,
80  5, 5, 6, 7, 7, 9, 9, 10,
81  5, 5, 6, 7, 7, 9, 9, 10,
82  5, 6, 7, 7, 8, 9, 10, 12,
83  6, 7, 7, 8, 9, 10, 12, 15,
84  6, 7, 7, 9, 10, 11, 14, 17,
85  7, 7, 9, 10, 11, 14, 17, 21,
86  },
87  { // high quality
88  4, 4, 4, 4, 4, 4, 4, 4,
89  4, 4, 4, 4, 4, 4, 4, 4,
90  4, 4, 4, 4, 4, 4, 4, 4,
91  4, 4, 4, 4, 4, 4, 4, 5,
92  4, 4, 4, 4, 4, 4, 5, 5,
93  4, 4, 4, 4, 4, 5, 5, 6,
94  4, 4, 4, 4, 5, 5, 6, 7,
95  4, 4, 4, 4, 5, 6, 7, 7,
96  },
97  { // codec default
98  4, 4, 4, 4, 4, 4, 4, 4,
99  4, 4, 4, 4, 4, 4, 4, 4,
100  4, 4, 4, 4, 4, 4, 4, 4,
101  4, 4, 4, 4, 4, 4, 4, 4,
102  4, 4, 4, 4, 4, 4, 4, 4,
103  4, 4, 4, 4, 4, 4, 4, 4,
104  4, 4, 4, 4, 4, 4, 4, 4,
105  4, 4, 4, 4, 4, 4, 4, 4,
106  },
107 };
108 
109 #define NUM_MB_LIMITS 4
110 static const int prores_mb_limits[NUM_MB_LIMITS] = {
111  1620, // up to 720x576
112  2700, // up to 960x720
113  6075, // up to 1440x1080
114  9216, // up to 2048x1152
115 };
116 
117 static const struct prores_profile {
118  const char *full_name;
119  uint32_t tag;
123  int quant;
124 } prores_profile_info[4] = {
125  {
126  .full_name = "proxy",
127  .tag = MKTAG('a', 'p', 'c', 'o'),
128  .min_quant = 4,
129  .max_quant = 8,
130  .br_tab = { 300, 242, 220, 194 },
131  .quant = QUANT_MAT_PROXY,
132  },
133  {
134  .full_name = "LT",
135  .tag = MKTAG('a', 'p', 'c', 's'),
136  .min_quant = 1,
137  .max_quant = 9,
138  .br_tab = { 720, 560, 490, 440 },
139  .quant = QUANT_MAT_LT,
140  },
141  {
142  .full_name = "standard",
143  .tag = MKTAG('a', 'p', 'c', 'n'),
144  .min_quant = 1,
145  .max_quant = 6,
146  .br_tab = { 1050, 808, 710, 632 },
147  .quant = QUANT_MAT_STANDARD,
148  },
149  {
150  .full_name = "high quality",
151  .tag = MKTAG('a', 'p', 'c', 'h'),
152  .min_quant = 1,
153  .max_quant = 6,
154  .br_tab = { 1566, 1216, 1070, 950 },
155  .quant = QUANT_MAT_HQ,
156  }
157 // for 4444 profile bitrate numbers are { 2350, 1828, 1600, 1425 }
158 };
159 
160 #define TRELLIS_WIDTH 16
161 #define SCORE_LIMIT INT_MAX / 2
162 
163 struct TrellisNode {
165  int quant;
166  int bits;
167  int score;
168 };
169 
170 #define MAX_STORED_Q 16
171 
172 typedef struct ProresThreadData {
174  DECLARE_ALIGNED(16, uint16_t, emu_buf)[16 * 16];
175  int16_t custom_q[64];
178 
179 typedef struct ProresContext {
180  AVClass *class;
182  DECLARE_ALIGNED(16, uint16_t, emu_buf)[16*16];
183  int16_t quants[MAX_STORED_Q][64];
184  int16_t custom_q[64];
186 
189 
195  int pictures_per_frame; // 1 for progressive, 2 for interlaced
200 
201  char *vendor;
203 
205 
206  int profile;
208 
209  int *slice_q;
210 
212 } ProresContext;
213 
214 static void get_slice_data(ProresContext *ctx, const uint16_t *src,
215  int linesize, int x, int y, int w, int h,
216  DCTELEM *blocks, uint16_t *emu_buf,
217  int mbs_per_slice, int blocks_per_mb, int is_chroma)
218 {
219  const uint16_t *esrc;
220  const int mb_width = 4 * blocks_per_mb;
221  int elinesize;
222  int i, j, k;
223 
224  for (i = 0; i < mbs_per_slice; i++, src += mb_width) {
225  if (x >= w) {
226  memset(blocks, 0, 64 * (mbs_per_slice - i) * blocks_per_mb
227  * sizeof(*blocks));
228  return;
229  }
230  if (x + mb_width <= w && y + 16 <= h) {
231  esrc = src;
232  elinesize = linesize;
233  } else {
234  int bw, bh, pix;
235 
236  esrc = emu_buf;
237  elinesize = 16 * sizeof(*emu_buf);
238 
239  bw = FFMIN(w - x, mb_width);
240  bh = FFMIN(h - y, 16);
241 
242  for (j = 0; j < bh; j++) {
243  memcpy(emu_buf + j * 16,
244  (const uint8_t*)src + j * linesize,
245  bw * sizeof(*src));
246  pix = emu_buf[j * 16 + bw - 1];
247  for (k = bw; k < mb_width; k++)
248  emu_buf[j * 16 + k] = pix;
249  }
250  for (; j < 16; j++)
251  memcpy(emu_buf + j * 16,
252  emu_buf + (bh - 1) * 16,
253  mb_width * sizeof(*emu_buf));
254  }
255  if (!is_chroma) {
256  ctx->dsp.fdct(esrc, elinesize, blocks);
257  blocks += 64;
258  if (blocks_per_mb > 2) {
259  ctx->dsp.fdct(esrc + 8, elinesize, blocks);
260  blocks += 64;
261  }
262  ctx->dsp.fdct(esrc + elinesize * 4, elinesize, blocks);
263  blocks += 64;
264  if (blocks_per_mb > 2) {
265  ctx->dsp.fdct(esrc + elinesize * 4 + 8, elinesize, blocks);
266  blocks += 64;
267  }
268  } else {
269  ctx->dsp.fdct(esrc, elinesize, blocks);
270  blocks += 64;
271  ctx->dsp.fdct(esrc + elinesize * 4, elinesize, blocks);
272  blocks += 64;
273  if (blocks_per_mb > 2) {
274  ctx->dsp.fdct(esrc + 8, elinesize, blocks);
275  blocks += 64;
276  ctx->dsp.fdct(esrc + elinesize * 4 + 8, elinesize, blocks);
277  blocks += 64;
278  }
279  }
280 
281  x += mb_width;
282  }
283 }
284 
285 /**
286  * Write an unsigned rice/exp golomb codeword.
287  */
288 static inline void encode_vlc_codeword(PutBitContext *pb, unsigned codebook, int val)
289 {
290  unsigned int rice_order, exp_order, switch_bits, switch_val;
291  int exponent;
292 
293  /* number of prefix bits to switch between Rice and expGolomb */
294  switch_bits = (codebook & 3) + 1;
295  rice_order = codebook >> 5; /* rice code order */
296  exp_order = (codebook >> 2) & 7; /* exp golomb code order */
297 
298  switch_val = switch_bits << rice_order;
299 
300  if (val >= switch_val) {
301  val -= switch_val - (1 << exp_order);
302  exponent = av_log2(val);
303 
304  put_bits(pb, exponent - exp_order + switch_bits, 0);
305  put_bits(pb, exponent + 1, val);
306  } else {
307  exponent = val >> rice_order;
308 
309  if (exponent)
310  put_bits(pb, exponent, 0);
311  put_bits(pb, 1, 1);
312  if (rice_order)
313  put_sbits(pb, rice_order, val);
314  }
315 }
316 
317 #define GET_SIGN(x) ((x) >> 31)
318 #define MAKE_CODE(x) (((x) << 1) ^ GET_SIGN(x))
319 
320 static void encode_dcs(PutBitContext *pb, DCTELEM *blocks,
321  int blocks_per_slice, int scale)
322 {
323  int i;
324  int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
325 
326  prev_dc = (blocks[0] - 0x4000) / scale;
328  sign = 0;
329  codebook = 3;
330  blocks += 64;
331 
332  for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
333  dc = (blocks[0] - 0x4000) / scale;
334  delta = dc - prev_dc;
335  new_sign = GET_SIGN(delta);
336  delta = (delta ^ sign) - sign;
337  code = MAKE_CODE(delta);
338  encode_vlc_codeword(pb, ff_prores_dc_codebook[codebook], code);
339  codebook = (code + (code & 1)) >> 1;
340  codebook = FFMIN(codebook, 3);
341  sign = new_sign;
342  prev_dc = dc;
343  }
344 }
345 
346 static void encode_acs(PutBitContext *pb, DCTELEM *blocks,
347  int blocks_per_slice,
348  int plane_size_factor,
349  const uint8_t *scan, const int16_t *qmat)
350 {
351  int idx, i;
352  int run, level, run_cb, lev_cb;
353  int max_coeffs, abs_level;
354 
355  max_coeffs = blocks_per_slice << 6;
356  run_cb = ff_prores_run_to_cb_index[4];
357  lev_cb = ff_prores_lev_to_cb_index[2];
358  run = 0;
359 
360  for (i = 1; i < 64; i++) {
361  for (idx = scan[i]; idx < max_coeffs; idx += 64) {
362  level = blocks[idx] / qmat[scan[i]];
363  if (level) {
364  abs_level = FFABS(level);
365  encode_vlc_codeword(pb, ff_prores_ac_codebook[run_cb], run);
367  abs_level - 1);
368  put_sbits(pb, 1, GET_SIGN(level));
369 
370  run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
371  lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
372  run = 0;
373  } else {
374  run++;
375  }
376  }
377  }
378 }
379 
381  const uint16_t *src, int linesize,
382  int mbs_per_slice, DCTELEM *blocks,
383  int blocks_per_mb, int plane_size_factor,
384  const int16_t *qmat)
385 {
386  int blocks_per_slice, saved_pos;
387 
388  saved_pos = put_bits_count(pb);
389  blocks_per_slice = mbs_per_slice * blocks_per_mb;
390 
391  encode_dcs(pb, blocks, blocks_per_slice, qmat[0]);
392  encode_acs(pb, blocks, blocks_per_slice, plane_size_factor,
393  ctx->scantable.permutated, qmat);
394  flush_put_bits(pb);
395 
396  return (put_bits_count(pb) - saved_pos) >> 3;
397 }
398 
399 static int encode_slice(AVCodecContext *avctx, const AVFrame *pic,
400  PutBitContext *pb,
401  int sizes[4], int x, int y, int quant,
402  int mbs_per_slice)
403 {
404  ProresContext *ctx = avctx->priv_data;
405  int i, xp, yp;
406  int total_size = 0;
407  const uint16_t *src;
408  int slice_width_factor = av_log2(mbs_per_slice);
409  int num_cblocks, pwidth, linesize, line_add;
410  int plane_factor, is_chroma;
411  uint16_t *qmat;
412 
413  if (ctx->pictures_per_frame == 1)
414  line_add = 0;
415  else
416  line_add = ctx->cur_picture_idx ^ !pic->top_field_first;
417 
418  if (ctx->force_quant) {
419  qmat = ctx->quants[0];
420  } else if (quant < MAX_STORED_Q) {
421  qmat = ctx->quants[quant];
422  } else {
423  qmat = ctx->custom_q;
424  for (i = 0; i < 64; i++)
425  qmat[i] = ctx->quant_mat[i] * quant;
426  }
427 
428  for (i = 0; i < ctx->num_planes; i++) {
429  is_chroma = (i == 1 || i == 2);
430  plane_factor = slice_width_factor + 2;
431  if (is_chroma)
432  plane_factor += ctx->chroma_factor - 3;
433  if (!is_chroma || ctx->chroma_factor == CFACTOR_Y444) {
434  xp = x << 4;
435  yp = y << 4;
436  num_cblocks = 4;
437  pwidth = avctx->width;
438  } else {
439  xp = x << 3;
440  yp = y << 4;
441  num_cblocks = 2;
442  pwidth = avctx->width >> 1;
443  }
444 
445  linesize = pic->linesize[i] * ctx->pictures_per_frame;
446  src = (const uint16_t*)(pic->data[i] + yp * linesize +
447  line_add * pic->linesize[i]) + xp;
448 
449  get_slice_data(ctx, src, linesize, xp, yp,
450  pwidth, avctx->height / ctx->pictures_per_frame,
451  ctx->blocks[0], ctx->emu_buf,
452  mbs_per_slice, num_cblocks, is_chroma);
453  sizes[i] = encode_slice_plane(ctx, pb, src, linesize,
454  mbs_per_slice, ctx->blocks[0],
455  num_cblocks, plane_factor,
456  qmat);
457  total_size += sizes[i];
458  }
459  return total_size;
460 }
461 
462 static inline int estimate_vlc(unsigned codebook, int val)
463 {
464  unsigned int rice_order, exp_order, switch_bits, switch_val;
465  int exponent;
466 
467  /* number of prefix bits to switch between Rice and expGolomb */
468  switch_bits = (codebook & 3) + 1;
469  rice_order = codebook >> 5; /* rice code order */
470  exp_order = (codebook >> 2) & 7; /* exp golomb code order */
471 
472  switch_val = switch_bits << rice_order;
473 
474  if (val >= switch_val) {
475  val -= switch_val - (1 << exp_order);
476  exponent = av_log2(val);
477 
478  return exponent * 2 - exp_order + switch_bits + 1;
479  } else {
480  return (val >> rice_order) + rice_order + 1;
481  }
482 }
483 
484 static int estimate_dcs(int *error, DCTELEM *blocks, int blocks_per_slice,
485  int scale)
486 {
487  int i;
488  int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
489  int bits;
490 
491  prev_dc = (blocks[0] - 0x4000) / scale;
492  bits = estimate_vlc(FIRST_DC_CB, MAKE_CODE(prev_dc));
493  sign = 0;
494  codebook = 3;
495  blocks += 64;
496  *error += FFABS(blocks[0] - 0x4000) % scale;
497 
498  for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
499  dc = (blocks[0] - 0x4000) / scale;
500  *error += FFABS(blocks[0] - 0x4000) % scale;
501  delta = dc - prev_dc;
502  new_sign = GET_SIGN(delta);
503  delta = (delta ^ sign) - sign;
504  code = MAKE_CODE(delta);
505  bits += estimate_vlc(ff_prores_dc_codebook[codebook], code);
506  codebook = (code + (code & 1)) >> 1;
507  codebook = FFMIN(codebook, 3);
508  sign = new_sign;
509  prev_dc = dc;
510  }
511 
512  return bits;
513 }
514 
515 static int estimate_acs(int *error, DCTELEM *blocks, int blocks_per_slice,
516  int plane_size_factor,
517  const uint8_t *scan, const int16_t *qmat)
518 {
519  int idx, i;
520  int run, level, run_cb, lev_cb;
521  int max_coeffs, abs_level;
522  int bits = 0;
523 
524  max_coeffs = blocks_per_slice << 6;
525  run_cb = ff_prores_run_to_cb_index[4];
526  lev_cb = ff_prores_lev_to_cb_index[2];
527  run = 0;
528 
529  for (i = 1; i < 64; i++) {
530  for (idx = scan[i]; idx < max_coeffs; idx += 64) {
531  level = blocks[idx] / qmat[scan[i]];
532  *error += FFABS(blocks[idx]) % qmat[scan[i]];
533  if (level) {
534  abs_level = FFABS(level);
535  bits += estimate_vlc(ff_prores_ac_codebook[run_cb], run);
536  bits += estimate_vlc(ff_prores_ac_codebook[lev_cb],
537  abs_level - 1) + 1;
538 
539  run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
540  lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
541  run = 0;
542  } else {
543  run++;
544  }
545  }
546  }
547 
548  return bits;
549 }
550 
551 static int estimate_slice_plane(ProresContext *ctx, int *error, int plane,
552  const uint16_t *src, int linesize,
553  int mbs_per_slice,
554  int blocks_per_mb, int plane_size_factor,
555  const int16_t *qmat, ProresThreadData *td)
556 {
557  int blocks_per_slice;
558  int bits;
559 
560  blocks_per_slice = mbs_per_slice * blocks_per_mb;
561 
562  bits = estimate_dcs(error, td->blocks[plane], blocks_per_slice, qmat[0]);
563  bits += estimate_acs(error, td->blocks[plane], blocks_per_slice,
564  plane_size_factor, ctx->scantable.permutated, qmat);
565 
566  return FFALIGN(bits, 8);
567 }
568 
569 static int find_slice_quant(AVCodecContext *avctx, const AVFrame *pic,
570  int trellis_node, int x, int y, int mbs_per_slice,
571  ProresThreadData *td)
572 {
573  ProresContext *ctx = avctx->priv_data;
574  int i, q, pq, xp, yp;
575  const uint16_t *src;
576  int slice_width_factor = av_log2(mbs_per_slice);
577  int num_cblocks[MAX_PLANES], pwidth;
578  int plane_factor[MAX_PLANES], is_chroma[MAX_PLANES];
579  const int min_quant = ctx->profile_info->min_quant;
580  const int max_quant = ctx->profile_info->max_quant;
581  int error, bits, bits_limit;
582  int mbs, prev, cur, new_score;
583  int slice_bits[TRELLIS_WIDTH], slice_score[TRELLIS_WIDTH];
584  int overquant;
585  uint16_t *qmat;
586  int linesize[4], line_add;
587 
588  if (ctx->pictures_per_frame == 1)
589  line_add = 0;
590  else
591  line_add = ctx->cur_picture_idx ^ !pic->top_field_first;
592  mbs = x + mbs_per_slice;
593 
594  for (i = 0; i < ctx->num_planes; i++) {
595  is_chroma[i] = (i == 1 || i == 2);
596  plane_factor[i] = slice_width_factor + 2;
597  if (is_chroma[i])
598  plane_factor[i] += ctx->chroma_factor - 3;
599  if (!is_chroma[i] || ctx->chroma_factor == CFACTOR_Y444) {
600  xp = x << 4;
601  yp = y << 4;
602  num_cblocks[i] = 4;
603  pwidth = avctx->width;
604  } else {
605  xp = x << 3;
606  yp = y << 4;
607  num_cblocks[i] = 2;
608  pwidth = avctx->width >> 1;
609  }
610 
611  linesize[i] = pic->linesize[i] * ctx->pictures_per_frame;
612  src = (const uint16_t*)(pic->data[i] + yp * linesize[i] +
613  line_add * pic->linesize[i]) + xp;
614 
615  get_slice_data(ctx, src, linesize[i], xp, yp,
616  pwidth, avctx->height / ctx->pictures_per_frame,
617  td->blocks[i], td->emu_buf,
618  mbs_per_slice, num_cblocks[i], is_chroma[i]);
619  }
620 
621  for (q = min_quant; q < max_quant + 2; q++) {
622  td->nodes[trellis_node + q].prev_node = -1;
623  td->nodes[trellis_node + q].quant = q;
624  }
625 
626  // todo: maybe perform coarser quantising to fit into frame size when needed
627  for (q = min_quant; q <= max_quant; q++) {
628  bits = 0;
629  error = 0;
630  for (i = 0; i < ctx->num_planes; i++) {
631  bits += estimate_slice_plane(ctx, &error, i,
632  src, linesize[i],
633  mbs_per_slice,
634  num_cblocks[i], plane_factor[i],
635  ctx->quants[q], td);
636  }
637  if (bits > 65000 * 8) {
638  error = SCORE_LIMIT;
639  break;
640  }
641  slice_bits[q] = bits;
642  slice_score[q] = error;
643  }
644  if (slice_bits[max_quant] <= ctx->bits_per_mb * mbs_per_slice) {
645  slice_bits[max_quant + 1] = slice_bits[max_quant];
646  slice_score[max_quant + 1] = slice_score[max_quant] + 1;
647  overquant = max_quant;
648  } else {
649  for (q = max_quant + 1; q < 128; q++) {
650  bits = 0;
651  error = 0;
652  if (q < MAX_STORED_Q) {
653  qmat = ctx->quants[q];
654  } else {
655  qmat = td->custom_q;
656  for (i = 0; i < 64; i++)
657  qmat[i] = ctx->quant_mat[i] * q;
658  }
659  for (i = 0; i < ctx->num_planes; i++) {
660  bits += estimate_slice_plane(ctx, &error, i,
661  src, linesize[i],
662  mbs_per_slice,
663  num_cblocks[i], plane_factor[i],
664  qmat, td);
665  }
666  if (bits <= ctx->bits_per_mb * mbs_per_slice)
667  break;
668  }
669 
670  slice_bits[max_quant + 1] = bits;
671  slice_score[max_quant + 1] = error;
672  overquant = q;
673  }
674  td->nodes[trellis_node + max_quant + 1].quant = overquant;
675 
676  bits_limit = mbs * ctx->bits_per_mb;
677  for (pq = min_quant; pq < max_quant + 2; pq++) {
678  prev = trellis_node - TRELLIS_WIDTH + pq;
679 
680  for (q = min_quant; q < max_quant + 2; q++) {
681  cur = trellis_node + q;
682 
683  bits = td->nodes[prev].bits + slice_bits[q];
684  error = slice_score[q];
685  if (bits > bits_limit)
686  error = SCORE_LIMIT;
687 
688  if (td->nodes[prev].score < SCORE_LIMIT && error < SCORE_LIMIT)
689  new_score = td->nodes[prev].score + error;
690  else
691  new_score = SCORE_LIMIT;
692  if (td->nodes[cur].prev_node == -1 ||
693  td->nodes[cur].score >= new_score) {
694 
695  td->nodes[cur].bits = bits;
696  td->nodes[cur].score = new_score;
697  td->nodes[cur].prev_node = prev;
698  }
699  }
700  }
701 
702  error = td->nodes[trellis_node + min_quant].score;
703  pq = trellis_node + min_quant;
704  for (q = min_quant + 1; q < max_quant + 2; q++) {
705  if (td->nodes[trellis_node + q].score <= error) {
706  error = td->nodes[trellis_node + q].score;
707  pq = trellis_node + q;
708  }
709  }
710 
711  return pq;
712 }
713 
714 static int find_quant_thread(AVCodecContext *avctx, void *arg,
715  int jobnr, int threadnr)
716 {
717  ProresContext *ctx = avctx->priv_data;
718  ProresThreadData *td = ctx->tdata + threadnr;
719  int mbs_per_slice = ctx->mbs_per_slice;
720  int x, y = jobnr, mb, q = 0;
721 
722  for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
723  while (ctx->mb_width - x < mbs_per_slice)
724  mbs_per_slice >>= 1;
725  q = find_slice_quant(avctx, avctx->coded_frame,
726  (mb + 1) * TRELLIS_WIDTH, x, y,
727  mbs_per_slice, td);
728  }
729 
730  for (x = ctx->slices_width - 1; x >= 0; x--) {
731  ctx->slice_q[x + y * ctx->slices_width] = td->nodes[q].quant;
732  q = td->nodes[q].prev_node;
733  }
734 
735  return 0;
736 }
737 
739  const AVFrame *pic, int *got_packet)
740 {
741  ProresContext *ctx = avctx->priv_data;
742  uint8_t *orig_buf, *buf, *slice_hdr, *slice_sizes, *tmp;
743  uint8_t *picture_size_pos;
744  PutBitContext pb;
745  int x, y, i, mb, q = 0;
746  int sizes[4] = { 0 };
747  int slice_hdr_size = 2 + 2 * (ctx->num_planes - 1);
748  int frame_size, picture_size, slice_size;
749  int pkt_size, ret;
750  uint8_t frame_flags;
751 
752  *avctx->coded_frame = *pic;
754  avctx->coded_frame->key_frame = 1;
755 
756  pkt_size = ctx->frame_size_upper_bound + FF_MIN_BUFFER_SIZE;
757 
758  if ((ret = ff_alloc_packet2(avctx, pkt, pkt_size)) < 0)
759  return ret;
760 
761  orig_buf = pkt->data;
762 
763  // frame atom
764  orig_buf += 4; // frame size
765  bytestream_put_be32 (&orig_buf, FRAME_ID); // frame container ID
766  buf = orig_buf;
767 
768  // frame header
769  tmp = buf;
770  buf += 2; // frame header size will be stored here
771  bytestream_put_be16 (&buf, 0); // version 1
772  bytestream_put_buffer(&buf, ctx->vendor, 4);
773  bytestream_put_be16 (&buf, avctx->width);
774  bytestream_put_be16 (&buf, avctx->height);
775 
776  frame_flags = ctx->chroma_factor << 6;
777  if (avctx->flags & CODEC_FLAG_INTERLACED_DCT)
778  frame_flags |= pic->top_field_first ? 0x04 : 0x08;
779  bytestream_put_byte (&buf, frame_flags);
780 
781  bytestream_put_byte (&buf, 0); // reserved
782  bytestream_put_byte (&buf, avctx->color_primaries);
783  bytestream_put_byte (&buf, avctx->color_trc);
784  bytestream_put_byte (&buf, avctx->colorspace);
785  bytestream_put_byte (&buf, 0x40); // source format and alpha information
786  bytestream_put_byte (&buf, 0); // reserved
787  if (ctx->quant_sel != QUANT_MAT_DEFAULT) {
788  bytestream_put_byte (&buf, 0x03); // matrix flags - both matrices are present
789  // luma quantisation matrix
790  for (i = 0; i < 64; i++)
791  bytestream_put_byte(&buf, ctx->quant_mat[i]);
792  // chroma quantisation matrix
793  for (i = 0; i < 64; i++)
794  bytestream_put_byte(&buf, ctx->quant_mat[i]);
795  } else {
796  bytestream_put_byte (&buf, 0x00); // matrix flags - default matrices are used
797  }
798  bytestream_put_be16 (&tmp, buf - orig_buf); // write back frame header size
799 
800  for (ctx->cur_picture_idx = 0;
802  ctx->cur_picture_idx++) {
803  // picture header
804  picture_size_pos = buf + 1;
805  bytestream_put_byte (&buf, 0x40); // picture header size (in bits)
806  buf += 4; // picture data size will be stored here
807  bytestream_put_be16 (&buf, ctx->slices_per_picture);
808  bytestream_put_byte (&buf, av_log2(ctx->mbs_per_slice) << 4); // slice width and height in MBs
809 
810  // seek table - will be filled during slice encoding
811  slice_sizes = buf;
812  buf += ctx->slices_per_picture * 2;
813 
814  // slices
815  if (!ctx->force_quant) {
816  ret = avctx->execute2(avctx, find_quant_thread, NULL, NULL,
817  ctx->mb_height);
818  if (ret)
819  return ret;
820  }
821 
822  for (y = 0; y < ctx->mb_height; y++) {
823  int mbs_per_slice = ctx->mbs_per_slice;
824  for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
825  q = ctx->force_quant ? ctx->force_quant
826  : ctx->slice_q[mb + y * ctx->slices_width];
827 
828  while (ctx->mb_width - x < mbs_per_slice)
829  mbs_per_slice >>= 1;
830 
831  bytestream_put_byte(&buf, slice_hdr_size << 3);
832  slice_hdr = buf;
833  buf += slice_hdr_size - 1;
834  init_put_bits(&pb, buf, (pkt_size - (buf - orig_buf)) * 8);
835  encode_slice(avctx, pic, &pb, sizes, x, y, q, mbs_per_slice);
836 
837  bytestream_put_byte(&slice_hdr, q);
838  slice_size = slice_hdr_size + sizes[ctx->num_planes - 1];
839  for (i = 0; i < ctx->num_planes - 1; i++) {
840  bytestream_put_be16(&slice_hdr, sizes[i]);
841  slice_size += sizes[i];
842  }
843  bytestream_put_be16(&slice_sizes, slice_size);
844  buf += slice_size - slice_hdr_size;
845  }
846  }
847 
848  picture_size = buf - (picture_size_pos - 1);
849  bytestream_put_be32(&picture_size_pos, picture_size);
850  }
851 
852  orig_buf -= 8;
853  frame_size = buf - orig_buf;
854  bytestream_put_be32(&orig_buf, frame_size);
855 
856  pkt->size = frame_size;
857  pkt->flags |= AV_PKT_FLAG_KEY;
858  *got_packet = 1;
859 
860  return 0;
861 }
862 
864 {
865  ProresContext *ctx = avctx->priv_data;
866  int i;
867 
868  av_freep(&avctx->coded_frame);
869 
870  if (ctx->tdata) {
871  for (i = 0; i < avctx->thread_count; i++)
872  av_free(ctx->tdata[i].nodes);
873  }
874  av_freep(&ctx->tdata);
875  av_freep(&ctx->slice_q);
876 
877  return 0;
878 }
879 
881 {
882  ProresContext *ctx = avctx->priv_data;
883  int mps;
884  int i, j;
885  int min_quant, max_quant;
886  int interlaced = !!(avctx->flags & CODEC_FLAG_INTERLACED_DCT);
887 
888  avctx->bits_per_raw_sample = 10;
889  avctx->coded_frame = avcodec_alloc_frame();
890  if (!avctx->coded_frame)
891  return AVERROR(ENOMEM);
892 
893  ff_proresdsp_init(&ctx->dsp, avctx);
894  ff_init_scantable(ctx->dsp.dct_permutation, &ctx->scantable,
895  interlaced ? ff_prores_interlaced_scan
897 
898  mps = ctx->mbs_per_slice;
899  if (mps & (mps - 1)) {
900  av_log(avctx, AV_LOG_ERROR,
901  "there should be an integer power of two MBs per slice\n");
902  return AVERROR(EINVAL);
903  }
904 
906  ? CFACTOR_Y422
907  : CFACTOR_Y444;
909  ctx->num_planes = 3;
910 
911  ctx->mb_width = FFALIGN(avctx->width, 16) >> 4;
912 
913  if (interlaced)
914  ctx->mb_height = FFALIGN(avctx->height, 32) >> 5;
915  else
916  ctx->mb_height = FFALIGN(avctx->height, 16) >> 4;
917 
918  ctx->slices_width = ctx->mb_width / mps;
919  ctx->slices_width += av_popcount(ctx->mb_width - ctx->slices_width * mps);
920  ctx->slices_per_picture = ctx->mb_height * ctx->slices_width;
921  ctx->pictures_per_frame = 1 + interlaced;
922 
923  if (ctx->quant_sel == -1)
925  else
927 
928  if (strlen(ctx->vendor) != 4) {
929  av_log(avctx, AV_LOG_ERROR, "vendor ID should be 4 bytes\n");
930  return AVERROR_INVALIDDATA;
931  }
932 
933  ctx->force_quant = avctx->global_quality / FF_QP2LAMBDA;
934  if (!ctx->force_quant) {
935  if (!ctx->bits_per_mb) {
936  for (i = 0; i < NUM_MB_LIMITS - 1; i++)
937  if (prores_mb_limits[i] >= ctx->mb_width * ctx->mb_height *
938  ctx->pictures_per_frame)
939  break;
940  ctx->bits_per_mb = ctx->profile_info->br_tab[i];
941  } else if (ctx->bits_per_mb < 128) {
942  av_log(avctx, AV_LOG_ERROR, "too few bits per MB, please set at least 128\n");
943  return AVERROR_INVALIDDATA;
944  }
945 
946  min_quant = ctx->profile_info->min_quant;
947  max_quant = ctx->profile_info->max_quant;
948  for (i = min_quant; i < MAX_STORED_Q; i++) {
949  for (j = 0; j < 64; j++)
950  ctx->quants[i][j] = ctx->quant_mat[j] * i;
951  }
952 
953  ctx->slice_q = av_malloc(ctx->slices_per_picture * sizeof(*ctx->slice_q));
954  if (!ctx->slice_q) {
955  encode_close(avctx);
956  return AVERROR(ENOMEM);
957  }
958 
959  ctx->tdata = av_mallocz(avctx->thread_count * sizeof(*ctx->tdata));
960  if (!ctx->tdata) {
961  encode_close(avctx);
962  return AVERROR(ENOMEM);
963  }
964 
965  for (j = 0; j < avctx->thread_count; j++) {
966  ctx->tdata[j].nodes = av_malloc((ctx->slices_width + 1)
967  * TRELLIS_WIDTH
968  * sizeof(*ctx->tdata->nodes));
969  if (!ctx->tdata[j].nodes) {
970  encode_close(avctx);
971  return AVERROR(ENOMEM);
972  }
973  for (i = min_quant; i < max_quant + 2; i++) {
974  ctx->tdata[j].nodes[i].prev_node = -1;
975  ctx->tdata[j].nodes[i].bits = 0;
976  ctx->tdata[j].nodes[i].score = 0;
977  }
978  }
979  } else {
980  int ls = 0;
981 
982  if (ctx->force_quant > 64) {
983  av_log(avctx, AV_LOG_ERROR, "too large quantiser, maximum is 64\n");
984  return AVERROR_INVALIDDATA;
985  }
986 
987  for (j = 0; j < 64; j++) {
988  ctx->quants[0][j] = ctx->quant_mat[j] * ctx->force_quant;
989  ls += av_log2((1 << 11) / ctx->quants[0][j]) * 2 + 1;
990  }
991 
992  ctx->bits_per_mb = ls * 8;
993  if (ctx->chroma_factor == CFACTOR_Y444)
994  ctx->bits_per_mb += ls * 4;
995  if (ctx->num_planes == 4)
996  ctx->bits_per_mb += ls * 4;
997  }
998 
1000  ctx->slices_per_picture *
1001  (2 + 2 * ctx->num_planes +
1002  (mps * ctx->bits_per_mb) / 8)
1003  + 200;
1004 
1005  avctx->codec_tag = ctx->profile_info->tag;
1006 
1007  av_log(avctx, AV_LOG_DEBUG,
1008  "profile %d, %d slices, interlacing: %s, %d bits per MB\n",
1009  ctx->profile, ctx->slices_per_picture * ctx->pictures_per_frame,
1010  interlaced ? "yes" : "no", ctx->bits_per_mb);
1011  av_log(avctx, AV_LOG_DEBUG, "frame size upper bound: %d\n",
1012  ctx->frame_size_upper_bound);
1013 
1014  return 0;
1015 }
1016 
1017 #define OFFSET(x) offsetof(ProresContext, x)
1018 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
1019 
1020 static const AVOption options[] = {
1021  { "mbs_per_slice", "macroblocks per slice", OFFSET(mbs_per_slice),
1022  AV_OPT_TYPE_INT, { .i64 = 8 }, 1, MAX_MBS_PER_SLICE, VE },
1023  { "profile", NULL, OFFSET(profile), AV_OPT_TYPE_INT,
1024  { .i64 = PRORES_PROFILE_STANDARD },
1025  PRORES_PROFILE_PROXY, PRORES_PROFILE_HQ, VE, "profile" },
1026  { "proxy", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_PROXY },
1027  0, 0, VE, "profile" },
1028  { "lt", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_LT },
1029  0, 0, VE, "profile" },
1030  { "standard", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_STANDARD },
1031  0, 0, VE, "profile" },
1032  { "hq", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_HQ },
1033  0, 0, VE, "profile" },
1034  { "vendor", "vendor ID", OFFSET(vendor),
1035  AV_OPT_TYPE_STRING, { .str = "Lavc" }, CHAR_MIN, CHAR_MAX, VE },
1036  { "bits_per_mb", "desired bits per macroblock", OFFSET(bits_per_mb),
1037  AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 8192, VE },
1038  { "quant_mat", "quantiser matrix", OFFSET(quant_sel), AV_OPT_TYPE_INT,
1039  { .i64 = -1 }, -1, QUANT_MAT_DEFAULT, VE, "quant_mat" },
1040  { "auto", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = -1 },
1041  0, 0, VE, "quant_mat" },
1042  { "proxy", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_PROXY },
1043  0, 0, VE, "quant_mat" },
1044  { "lt", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_LT },
1045  0, 0, VE, "quant_mat" },
1046  { "standard", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_STANDARD },
1047  0, 0, VE, "quant_mat" },
1048  { "hq", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_HQ },
1049  0, 0, VE, "quant_mat" },
1050  { "default", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_DEFAULT },
1051  0, 0, VE, "quant_mat" },
1052  { NULL }
1053 };
1054 
1055 static const AVClass proresenc_class = {
1056  .class_name = "ProRes encoder",
1057  .item_name = av_default_item_name,
1058  .option = options,
1059  .version = LIBAVUTIL_VERSION_INT,
1060 };
1061 
1063  .name = "prores_kostya",
1064  .type = AVMEDIA_TYPE_VIDEO,
1065  .id = AV_CODEC_ID_PRORES,
1066  .priv_data_size = sizeof(ProresContext),
1067  .init = encode_init,
1068  .close = encode_close,
1069  .encode2 = encode_frame,
1070  .capabilities = CODEC_CAP_SLICE_THREADS,
1071  .long_name = NULL_IF_CONFIG_SMALL("Apple ProRes (iCodec Pro)"),
1072  .pix_fmts = (const enum AVPixelFormat[]) {
1074  },
1075  .priv_class = &proresenc_class,
1076 };