FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
alacenc.c
Go to the documentation of this file.
1 /*
2  * ALAC audio encoder
3  * Copyright (c) 2008 Jaikrishnan Menon <realityman@gmx.net>
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 #include "avcodec.h"
23 #include "put_bits.h"
24 #include "internal.h"
25 #include "lpc.h"
26 #include "mathops.h"
27 #include "alac_data.h"
28 
29 #define DEFAULT_FRAME_SIZE 4096
30 #define ALAC_EXTRADATA_SIZE 36
31 #define ALAC_FRAME_HEADER_SIZE 55
32 #define ALAC_FRAME_FOOTER_SIZE 3
33 
34 #define ALAC_ESCAPE_CODE 0x1FF
35 #define ALAC_MAX_LPC_ORDER 30
36 #define DEFAULT_MAX_PRED_ORDER 6
37 #define DEFAULT_MIN_PRED_ORDER 4
38 #define ALAC_MAX_LPC_PRECISION 9
39 #define ALAC_MAX_LPC_SHIFT 9
40 
41 #define ALAC_CHMODE_LEFT_RIGHT 0
42 #define ALAC_CHMODE_LEFT_SIDE 1
43 #define ALAC_CHMODE_RIGHT_SIDE 2
44 #define ALAC_CHMODE_MID_SIDE 3
45 
46 typedef struct RiceContext {
51 } RiceContext;
52 
53 typedef struct AlacLPCContext {
54  int lpc_order;
56  int lpc_quant;
58 
59 typedef struct AlacEncodeContext {
60  int frame_size; /**< current frame size */
61  int verbatim; /**< current frame verbatim mode flag */
78 
79 
80 static void init_sample_buffers(AlacEncodeContext *s, int channels,
81  uint8_t const *samples[2])
82 {
83  int ch, i;
86 
87 #define COPY_SAMPLES(type) do { \
88  for (ch = 0; ch < channels; ch++) { \
89  int32_t *bptr = s->sample_buf[ch]; \
90  const type *sptr = (const type *)samples[ch]; \
91  for (i = 0; i < s->frame_size; i++) \
92  bptr[i] = sptr[i] >> shift; \
93  } \
94  } while (0)
95 
98  else
99  COPY_SAMPLES(int16_t);
100 }
101 
102 static void encode_scalar(AlacEncodeContext *s, int x,
103  int k, int write_sample_size)
104 {
105  int divisor, q, r;
106 
107  k = FFMIN(k, s->rc.k_modifier);
108  divisor = (1<<k) - 1;
109  q = x / divisor;
110  r = x % divisor;
111 
112  if (q > 8) {
113  // write escape code and sample value directly
115  put_bits(&s->pbctx, write_sample_size, x);
116  } else {
117  if (q)
118  put_bits(&s->pbctx, q, (1<<q) - 1);
119  put_bits(&s->pbctx, 1, 0);
120 
121  if (k != 1) {
122  if (r > 0)
123  put_bits(&s->pbctx, k, r+1);
124  else
125  put_bits(&s->pbctx, k-1, 0);
126  }
127  }
128 }
129 
131  enum AlacRawDataBlockType element,
132  int instance)
133 {
134  int encode_fs = 0;
135 
137  encode_fs = 1;
138 
139  put_bits(&s->pbctx, 3, element); // element type
140  put_bits(&s->pbctx, 4, instance); // element instance
141  put_bits(&s->pbctx, 12, 0); // unused header bits
142  put_bits(&s->pbctx, 1, encode_fs); // Sample count is in the header
143  put_bits(&s->pbctx, 2, s->extra_bits >> 3); // Extra bytes (for 24-bit)
144  put_bits(&s->pbctx, 1, s->verbatim); // Audio block is verbatim
145  if (encode_fs)
146  put_bits32(&s->pbctx, s->frame_size); // No. of samples in the frame
147 }
148 
150 {
152  int shift[MAX_LPC_ORDER];
153  int opt_order;
154 
155  if (s->compression_level == 1) {
156  s->lpc[ch].lpc_order = 6;
157  s->lpc[ch].lpc_quant = 6;
158  s->lpc[ch].lpc_coeff[0] = 160;
159  s->lpc[ch].lpc_coeff[1] = -190;
160  s->lpc[ch].lpc_coeff[2] = 170;
161  s->lpc[ch].lpc_coeff[3] = -130;
162  s->lpc[ch].lpc_coeff[4] = 80;
163  s->lpc[ch].lpc_coeff[5] = -25;
164  } else {
165  opt_order = ff_lpc_calc_coefs(&s->lpc_ctx, s->sample_buf[ch],
166  s->frame_size,
169  ALAC_MAX_LPC_PRECISION, coefs, shift,
172 
173  s->lpc[ch].lpc_order = opt_order;
174  s->lpc[ch].lpc_quant = shift[opt_order-1];
175  memcpy(s->lpc[ch].lpc_coeff, coefs[opt_order-1], opt_order*sizeof(int));
176  }
177 }
178 
179 static int estimate_stereo_mode(int32_t *left_ch, int32_t *right_ch, int n)
180 {
181  int i, best;
182  int32_t lt, rt;
183  uint64_t sum[4];
184  uint64_t score[4];
185 
186  /* calculate sum of 2nd order residual for each channel */
187  sum[0] = sum[1] = sum[2] = sum[3] = 0;
188  for (i = 2; i < n; i++) {
189  lt = left_ch[i] - 2 * left_ch[i - 1] + left_ch[i - 2];
190  rt = right_ch[i] - 2 * right_ch[i - 1] + right_ch[i - 2];
191  sum[2] += FFABS((lt + rt) >> 1);
192  sum[3] += FFABS(lt - rt);
193  sum[0] += FFABS(lt);
194  sum[1] += FFABS(rt);
195  }
196 
197  /* calculate score for each mode */
198  score[0] = sum[0] + sum[1];
199  score[1] = sum[0] + sum[3];
200  score[2] = sum[1] + sum[3];
201  score[3] = sum[2] + sum[3];
202 
203  /* return mode with lowest score */
204  best = 0;
205  for (i = 1; i < 4; i++) {
206  if (score[i] < score[best])
207  best = i;
208  }
209  return best;
210 }
211 
213 {
214  int32_t *left = s->sample_buf[0], *right = s->sample_buf[1];
215  int i, mode, n = s->frame_size;
216  int32_t tmp;
217 
218  mode = estimate_stereo_mode(left, right, n);
219 
220  switch (mode) {
222  s->interlacing_leftweight = 0;
223  s->interlacing_shift = 0;
224  break;
226  for (i = 0; i < n; i++)
227  right[i] = left[i] - right[i];
228  s->interlacing_leftweight = 1;
229  s->interlacing_shift = 0;
230  break;
232  for (i = 0; i < n; i++) {
233  tmp = right[i];
234  right[i] = left[i] - right[i];
235  left[i] = tmp + (right[i] >> 31);
236  }
237  s->interlacing_leftweight = 1;
238  s->interlacing_shift = 31;
239  break;
240  default:
241  for (i = 0; i < n; i++) {
242  tmp = left[i];
243  left[i] = (tmp + right[i]) >> 1;
244  right[i] = tmp - right[i];
245  }
246  s->interlacing_leftweight = 1;
247  s->interlacing_shift = 1;
248  break;
249  }
250 }
251 
253 {
254  int i;
255  AlacLPCContext lpc = s->lpc[ch];
256  int32_t *residual = s->predictor_buf[ch];
257 
258  if (lpc.lpc_order == 31) {
259  residual[0] = s->sample_buf[ch][0];
260 
261  for (i = 1; i < s->frame_size; i++) {
262  residual[i] = s->sample_buf[ch][i ] -
263  s->sample_buf[ch][i - 1];
264  }
265 
266  return;
267  }
268 
269  // generalised linear predictor
270 
271  if (lpc.lpc_order > 0) {
272  int32_t *samples = s->sample_buf[ch];
273 
274  // generate warm-up samples
275  residual[0] = samples[0];
276  for (i = 1; i <= lpc.lpc_order; i++)
277  residual[i] = sign_extend(samples[i] - samples[i-1], s->write_sample_size);
278 
279  // perform lpc on remaining samples
280  for (i = lpc.lpc_order + 1; i < s->frame_size; i++) {
281  int sum = 1 << (lpc.lpc_quant - 1), res_val, j;
282 
283  for (j = 0; j < lpc.lpc_order; j++) {
284  sum += (samples[lpc.lpc_order-j] - samples[0]) *
285  lpc.lpc_coeff[j];
286  }
287 
288  sum >>= lpc.lpc_quant;
289  sum += samples[0];
290  residual[i] = sign_extend(samples[lpc.lpc_order+1] - sum,
291  s->write_sample_size);
292  res_val = residual[i];
293 
294  if (res_val) {
295  int index = lpc.lpc_order - 1;
296  int neg = (res_val < 0);
297 
298  while (index >= 0 && (neg ? (res_val < 0) : (res_val > 0))) {
299  int val = samples[0] - samples[lpc.lpc_order - index];
300  int sign = (val ? FFSIGN(val) : 0);
301 
302  if (neg)
303  sign *= -1;
304 
305  lpc.lpc_coeff[index] -= sign;
306  val *= sign;
307  res_val -= (val >> lpc.lpc_quant) * (lpc.lpc_order - index);
308  index--;
309  }
310  }
311  samples++;
312  }
313  }
314 }
315 
317 {
318  unsigned int history = s->rc.initial_history;
319  int sign_modifier = 0, i, k;
320  int32_t *samples = s->predictor_buf[ch];
321 
322  for (i = 0; i < s->frame_size;) {
323  int x;
324 
325  k = av_log2((history >> 9) + 3);
326 
327  x = -2 * (*samples) -1;
328  x ^= x >> 31;
329 
330  samples++;
331  i++;
332 
333  encode_scalar(s, x - sign_modifier, k, s->write_sample_size);
334 
335  history += x * s->rc.history_mult -
336  ((history * s->rc.history_mult) >> 9);
337 
338  sign_modifier = 0;
339  if (x > 0xFFFF)
340  history = 0xFFFF;
341 
342  if (history < 128 && i < s->frame_size) {
343  unsigned int block_size = 0;
344 
345  k = 7 - av_log2(history) + ((history + 16) >> 6);
346 
347  while (*samples == 0 && i < s->frame_size) {
348  samples++;
349  i++;
350  block_size++;
351  }
352  encode_scalar(s, block_size, k, 16);
353  sign_modifier = (block_size <= 0xFFFF);
354  history = 0;
355  }
356 
357  }
358 }
359 
361  enum AlacRawDataBlockType element, int instance,
362  const uint8_t *samples0, const uint8_t *samples1)
363 {
364  uint8_t const *samples[2] = { samples0, samples1 };
365  int i, j, channels;
366  int prediction_type = 0;
367  PutBitContext *pb = &s->pbctx;
368 
369  channels = element == TYPE_CPE ? 2 : 1;
370 
371  if (s->verbatim) {
372  write_element_header(s, element, instance);
373  /* samples are channel-interleaved in verbatim mode */
374  if (s->avctx->sample_fmt == AV_SAMPLE_FMT_S32P) {
375  int shift = 32 - s->avctx->bits_per_raw_sample;
376  int32_t const *samples_s32[2] = { (const int32_t *)samples0,
377  (const int32_t *)samples1 };
378  for (i = 0; i < s->frame_size; i++)
379  for (j = 0; j < channels; j++)
381  samples_s32[j][i] >> shift);
382  } else {
383  int16_t const *samples_s16[2] = { (const int16_t *)samples0,
384  (const int16_t *)samples1 };
385  for (i = 0; i < s->frame_size; i++)
386  for (j = 0; j < channels; j++)
388  samples_s16[j][i]);
389  }
390  } else {
392  channels - 1;
393 
394  init_sample_buffers(s, channels, samples);
395  write_element_header(s, element, instance);
396 
397  // extract extra bits if needed
398  if (s->extra_bits) {
399  uint32_t mask = (1 << s->extra_bits) - 1;
400  for (j = 0; j < channels; j++) {
401  int32_t *extra = s->predictor_buf[j];
402  int32_t *smp = s->sample_buf[j];
403  for (i = 0; i < s->frame_size; i++) {
404  extra[i] = smp[i] & mask;
405  smp[i] >>= s->extra_bits;
406  }
407  }
408  }
409 
410  if (channels == 2)
412  else
414  put_bits(pb, 8, s->interlacing_shift);
415  put_bits(pb, 8, s->interlacing_leftweight);
416 
417  for (i = 0; i < channels; i++) {
418  calc_predictor_params(s, i);
419 
420  put_bits(pb, 4, prediction_type);
421  put_bits(pb, 4, s->lpc[i].lpc_quant);
422 
423  put_bits(pb, 3, s->rc.rice_modifier);
424  put_bits(pb, 5, s->lpc[i].lpc_order);
425  // predictor coeff. table
426  for (j = 0; j < s->lpc[i].lpc_order; j++)
427  put_sbits(pb, 16, s->lpc[i].lpc_coeff[j]);
428  }
429 
430  // write extra bits if needed
431  if (s->extra_bits) {
432  uint32_t mask = (1 << s->extra_bits) - 1;
433  for (i = 0; i < s->frame_size; i++) {
434  for (j = 0; j < channels; j++) {
435  put_bits(pb, s->extra_bits, s->predictor_buf[j][i] & mask);
436  }
437  }
438  }
439 
440  // apply lpc and entropy coding to audio samples
441  for (i = 0; i < channels; i++) {
442  alac_linear_predictor(s, i);
443 
444  // TODO: determine when this will actually help. for now it's not used.
445  if (prediction_type == 15) {
446  // 2nd pass 1st order filter
447  int32_t *residual = s->predictor_buf[channels];
448  for (j = s->frame_size - 1; j > 0; j--)
449  residual[j] -= residual[j - 1];
450  }
451  alac_entropy_coder(s, i);
452  }
453  }
454 }
455 
457  uint8_t * const *samples)
458 {
459  PutBitContext *pb = &s->pbctx;
460  const enum AlacRawDataBlockType *ch_elements = ff_alac_channel_elements[s->avctx->channels - 1];
461  const uint8_t *ch_map = ff_alac_channel_layout_offsets[s->avctx->channels - 1];
462  int ch, element, sce, cpe;
463 
464  init_put_bits(pb, avpkt->data, avpkt->size);
465 
466  ch = element = sce = cpe = 0;
467  while (ch < s->avctx->channels) {
468  if (ch_elements[element] == TYPE_CPE) {
469  write_element(s, TYPE_CPE, cpe, samples[ch_map[ch]],
470  samples[ch_map[ch + 1]]);
471  cpe++;
472  ch += 2;
473  } else {
474  write_element(s, TYPE_SCE, sce, samples[ch_map[ch]], NULL);
475  sce++;
476  ch++;
477  }
478  element++;
479  }
480 
481  put_bits(pb, 3, TYPE_END);
482  flush_put_bits(pb);
483 
484  return put_bits_count(pb) >> 3;
485 }
486 
487 static av_always_inline int get_max_frame_size(int frame_size, int ch, int bps)
488 {
489  int header_bits = 23 + 32 * (frame_size < DEFAULT_FRAME_SIZE);
490  return FFALIGN(header_bits + bps * ch * frame_size + 3, 8) / 8;
491 }
492 
494 {
495  AlacEncodeContext *s = avctx->priv_data;
496  ff_lpc_end(&s->lpc_ctx);
497  av_freep(&avctx->extradata);
498  avctx->extradata_size = 0;
499  return 0;
500 }
501 
503 {
504  AlacEncodeContext *s = avctx->priv_data;
505  int ret;
506  uint8_t *alac_extradata;
507 
509 
510  if (avctx->sample_fmt == AV_SAMPLE_FMT_S32P) {
511  if (avctx->bits_per_raw_sample != 24)
512  av_log(avctx, AV_LOG_WARNING, "encoding as 24 bits-per-sample\n");
513  avctx->bits_per_raw_sample = 24;
514  } else {
515  avctx->bits_per_raw_sample = 16;
516  s->extra_bits = 0;
517  }
518 
519  // Set default compression level
521  s->compression_level = 2;
522  else
523  s->compression_level = av_clip(avctx->compression_level, 0, 2);
524 
525  // Initialize default Rice parameters
526  s->rc.history_mult = 40;
527  s->rc.initial_history = 10;
528  s->rc.k_modifier = 14;
529  s->rc.rice_modifier = 4;
530 
532  avctx->channels,
533  avctx->bits_per_raw_sample);
534 
536  if (!avctx->extradata) {
537  ret = AVERROR(ENOMEM);
538  goto error;
539  }
541 
542  alac_extradata = avctx->extradata;
543  AV_WB32(alac_extradata, ALAC_EXTRADATA_SIZE);
544  AV_WB32(alac_extradata+4, MKBETAG('a','l','a','c'));
545  AV_WB32(alac_extradata+12, avctx->frame_size);
546  AV_WB8 (alac_extradata+17, avctx->bits_per_raw_sample);
547  AV_WB8 (alac_extradata+21, avctx->channels);
548  AV_WB32(alac_extradata+24, s->max_coded_frame_size);
549  AV_WB32(alac_extradata+28,
550  avctx->sample_rate * avctx->channels * avctx->bits_per_raw_sample); // average bitrate
551  AV_WB32(alac_extradata+32, avctx->sample_rate);
552 
553  // Set relevant extradata fields
554  if (s->compression_level > 0) {
555  AV_WB8(alac_extradata+18, s->rc.history_mult);
556  AV_WB8(alac_extradata+19, s->rc.initial_history);
557  AV_WB8(alac_extradata+20, s->rc.k_modifier);
558  }
559 
561  if (avctx->min_prediction_order >= 0) {
562  if (avctx->min_prediction_order < MIN_LPC_ORDER ||
564  av_log(avctx, AV_LOG_ERROR, "invalid min prediction order: %d\n",
565  avctx->min_prediction_order);
566  ret = AVERROR(EINVAL);
567  goto error;
568  }
569 
571  }
572 
574  if (avctx->max_prediction_order >= 0) {
575  if (avctx->max_prediction_order < MIN_LPC_ORDER ||
577  av_log(avctx, AV_LOG_ERROR, "invalid max prediction order: %d\n",
578  avctx->max_prediction_order);
579  ret = AVERROR(EINVAL);
580  goto error;
581  }
582 
584  }
585 
587  av_log(avctx, AV_LOG_ERROR,
588  "invalid prediction orders: min=%d max=%d\n",
590  ret = AVERROR(EINVAL);
591  goto error;
592  }
593 
594  s->avctx = avctx;
595 
596  if ((ret = ff_lpc_init(&s->lpc_ctx, avctx->frame_size,
598  FF_LPC_TYPE_LEVINSON)) < 0) {
599  goto error;
600  }
601 
602  return 0;
603 error:
604  alac_encode_close(avctx);
605  return ret;
606 }
607 
608 static int alac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
609  const AVFrame *frame, int *got_packet_ptr)
610 {
611  AlacEncodeContext *s = avctx->priv_data;
612  int out_bytes, max_frame_size, ret;
613 
614  s->frame_size = frame->nb_samples;
615 
616  if (frame->nb_samples < DEFAULT_FRAME_SIZE)
617  max_frame_size = get_max_frame_size(s->frame_size, avctx->channels,
618  avctx->bits_per_raw_sample);
619  else
620  max_frame_size = s->max_coded_frame_size;
621 
622  if ((ret = ff_alloc_packet2(avctx, avpkt, 2 * max_frame_size)) < 0)
623  return ret;
624 
625  /* use verbatim mode for compression_level 0 */
626  if (s->compression_level) {
627  s->verbatim = 0;
628  s->extra_bits = avctx->bits_per_raw_sample - 16;
629  } else {
630  s->verbatim = 1;
631  s->extra_bits = 0;
632  }
633 
634  out_bytes = write_frame(s, avpkt, frame->extended_data);
635 
636  if (out_bytes > max_frame_size) {
637  /* frame too large. use verbatim mode */
638  s->verbatim = 1;
639  s->extra_bits = 0;
640  out_bytes = write_frame(s, avpkt, frame->extended_data);
641  }
642 
643  avpkt->size = out_bytes;
644  *got_packet_ptr = 1;
645  return 0;
646 }
647 
649  .name = "alac",
650  .long_name = NULL_IF_CONFIG_SMALL("ALAC (Apple Lossless Audio Codec)"),
651  .type = AVMEDIA_TYPE_AUDIO,
652  .id = AV_CODEC_ID_ALAC,
653  .priv_data_size = sizeof(AlacEncodeContext),
655  .encode2 = alac_encode_frame,
657  .capabilities = CODEC_CAP_SMALL_LAST_FRAME,
658  .channel_layouts = ff_alac_channel_layouts,
659  .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S32P,
662 };