FFmpeg
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
alac.c
Go to the documentation of this file.
1 /*
2  * ALAC (Apple Lossless Audio Codec) decoder
3  * Copyright (c) 2005 David Hammerton
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 /**
23  * @file
24  * ALAC (Apple Lossless Audio Codec) decoder
25  * @author 2005 David Hammerton
26  * @see http://crazney.net/programs/itunes/alac.html
27  *
28  * Note: This decoder expects a 36-byte QuickTime atom to be
29  * passed through the extradata[_size] fields. This atom is tacked onto
30  * the end of an 'alac' stsd atom and has the following format:
31  *
32  * 32bit atom size
33  * 32bit tag ("alac")
34  * 32bit tag version (0)
35  * 32bit samples per frame (used when not set explicitly in the frames)
36  * 8bit compatible version (0)
37  * 8bit sample size
38  * 8bit history mult (40)
39  * 8bit initial history (14)
40  * 8bit rice param limit (10)
41  * 8bit channels
42  * 16bit maxRun (255)
43  * 32bit max coded frame size (0 means unknown)
44  * 32bit average bitrate (0 means unknown)
45  * 32bit samplerate
46  */
47 
49 #include "avcodec.h"
50 #include "get_bits.h"
51 #include "bytestream.h"
52 #include "internal.h"
53 #include "unary.h"
54 #include "mathops.h"
55 #include "alac_data.h"
56 
57 #define ALAC_EXTRADATA_SIZE 36
58 
59 typedef struct {
62  int channels;
63 
64  int32_t *predict_error_buffer[2];
65  int32_t *output_samples_buffer[2];
66  int32_t *extra_bits_buffer[2];
67 
73 
74  int extra_bits; /**< number of extra bits beyond 16-bit */
75  int nb_samples; /**< number of samples in the current frame */
76 
78 } ALACContext;
79 
80 static inline unsigned int decode_scalar(GetBitContext *gb, int k, int bps)
81 {
82  unsigned int x = get_unary_0_9(gb);
83 
84  if (x > 8) { /* RICE THRESHOLD */
85  /* use alternative encoding */
86  x = get_bits_long(gb, bps);
87  } else if (k != 1) {
88  int extrabits = show_bits(gb, k);
89 
90  /* multiply x by 2^k - 1, as part of their strange algorithm */
91  x = (x << k) - x;
92 
93  if (extrabits > 1) {
94  x += extrabits - 1;
95  skip_bits(gb, k);
96  } else
97  skip_bits(gb, k - 1);
98  }
99  return x;
100 }
101 
102 static int rice_decompress(ALACContext *alac, int32_t *output_buffer,
103  int nb_samples, int bps, int rice_history_mult)
104 {
105  int i;
106  unsigned int history = alac->rice_initial_history;
107  int sign_modifier = 0;
108 
109  for (i = 0; i < nb_samples; i++) {
110  int k;
111  unsigned int x;
112 
113  if(get_bits_left(&alac->gb) <= 0)
114  return -1;
115 
116  /* calculate rice param and decode next value */
117  k = av_log2((history >> 9) + 3);
118  k = FFMIN(k, alac->rice_limit);
119  x = decode_scalar(&alac->gb, k, bps);
120  x += sign_modifier;
121  sign_modifier = 0;
122  output_buffer[i] = (x >> 1) ^ -(x & 1);
123 
124  /* update the history */
125  if (x > 0xffff)
126  history = 0xffff;
127  else
128  history += x * rice_history_mult -
129  ((history * rice_history_mult) >> 9);
130 
131  /* special case: there may be compressed blocks of 0 */
132  if ((history < 128) && (i + 1 < nb_samples)) {
133  int block_size;
134 
135  /* calculate rice param and decode block size */
136  k = 7 - av_log2(history) + ((history + 16) >> 6);
137  k = FFMIN(k, alac->rice_limit);
138  block_size = decode_scalar(&alac->gb, k, 16);
139 
140  if (block_size > 0) {
141  if (block_size >= nb_samples - i) {
142  av_log(alac->avctx, AV_LOG_ERROR,
143  "invalid zero block size of %d %d %d\n", block_size,
144  nb_samples, i);
145  block_size = nb_samples - i - 1;
146  }
147  memset(&output_buffer[i + 1], 0,
148  block_size * sizeof(*output_buffer));
149  i += block_size;
150  }
151  if (block_size <= 0xffff)
152  sign_modifier = 1;
153  history = 0;
154  }
155  }
156  return 0;
157 }
158 
159 static inline int sign_only(int v)
160 {
161  return v ? FFSIGN(v) : 0;
162 }
163 
164 static void lpc_prediction(int32_t *error_buffer, int32_t *buffer_out,
165  int nb_samples, int bps, int16_t *lpc_coefs,
166  int lpc_order, int lpc_quant)
167 {
168  int i;
169  int32_t *pred = buffer_out;
170 
171  /* first sample always copies */
172  *buffer_out = *error_buffer;
173 
174  if (nb_samples <= 1)
175  return;
176 
177  if (!lpc_order) {
178  memcpy(&buffer_out[1], &error_buffer[1],
179  (nb_samples - 1) * sizeof(*buffer_out));
180  return;
181  }
182 
183  if (lpc_order == 31) {
184  /* simple 1st-order prediction */
185  for (i = 1; i < nb_samples; i++) {
186  buffer_out[i] = sign_extend(buffer_out[i - 1] + error_buffer[i],
187  bps);
188  }
189  return;
190  }
191 
192  /* read warm-up samples */
193  for (i = 1; i <= lpc_order && i < nb_samples; i++)
194  buffer_out[i] = sign_extend(buffer_out[i - 1] + error_buffer[i], bps);
195 
196  /* NOTE: 4 and 8 are very common cases that could be optimized. */
197 
198  for (; i < nb_samples; i++) {
199  int j;
200  int val = 0;
201  int error_val = error_buffer[i];
202  int error_sign;
203  int d = *pred++;
204 
205  /* LPC prediction */
206  for (j = 0; j < lpc_order; j++)
207  val += (pred[j] - d) * lpc_coefs[j];
208  val = (val + (1 << (lpc_quant - 1))) >> lpc_quant;
209  val += d + error_val;
210  buffer_out[i] = sign_extend(val, bps);
211 
212  /* adapt LPC coefficients */
213  error_sign = sign_only(error_val);
214  if (error_sign) {
215  for (j = 0; j < lpc_order && error_val * error_sign > 0; j++) {
216  int sign;
217  val = d - pred[j];
218  sign = sign_only(val) * error_sign;
219  lpc_coefs[j] -= sign;
220  val *= sign;
221  error_val -= (val >> lpc_quant) * (j + 1);
222  }
223  }
224  }
225 }
226 
228  int decorr_shift, int decorr_left_weight)
229 {
230  int i;
231 
232  for (i = 0; i < nb_samples; i++) {
233  int32_t a, b;
234 
235  a = buffer[0][i];
236  b = buffer[1][i];
237 
238  a -= (b * decorr_left_weight) >> decorr_shift;
239  b += a;
240 
241  buffer[0][i] = b;
242  buffer[1][i] = a;
243  }
244 }
245 
246 static void append_extra_bits(int32_t *buffer[2], int32_t *extra_bits_buffer[2],
247  int extra_bits, int channels, int nb_samples)
248 {
249  int i, ch;
250 
251  for (ch = 0; ch < channels; ch++)
252  for (i = 0; i < nb_samples; i++)
253  buffer[ch][i] = (buffer[ch][i] << extra_bits) | extra_bits_buffer[ch][i];
254 }
255 
256 static int decode_element(AVCodecContext *avctx, AVFrame *frame, int ch_index,
257  int channels)
258 {
259  ALACContext *alac = avctx->priv_data;
260  int has_size, bps, is_compressed, decorr_shift, decorr_left_weight, ret;
261  uint32_t output_samples;
262  int i, ch;
263 
264  skip_bits(&alac->gb, 4); /* element instance tag */
265  skip_bits(&alac->gb, 12); /* unused header bits */
266 
267  /* the number of output samples is stored in the frame */
268  has_size = get_bits1(&alac->gb);
269 
270  alac->extra_bits = get_bits(&alac->gb, 2) << 3;
271  bps = alac->sample_size - alac->extra_bits + channels - 1;
272  if (bps > 32U) {
273  av_log(avctx, AV_LOG_ERROR, "bps is unsupported: %d\n", bps);
274  return AVERROR_PATCHWELCOME;
275  }
276 
277  /* whether the frame is compressed */
278  is_compressed = !get_bits1(&alac->gb);
279 
280  if (has_size)
281  output_samples = get_bits_long(&alac->gb, 32);
282  else
283  output_samples = alac->max_samples_per_frame;
284  if (!output_samples || output_samples > alac->max_samples_per_frame) {
285  av_log(avctx, AV_LOG_ERROR, "invalid samples per frame: %d\n",
286  output_samples);
287  return AVERROR_INVALIDDATA;
288  }
289  if (!alac->nb_samples) {
290  /* get output buffer */
291  frame->nb_samples = output_samples;
292  if ((ret = ff_get_buffer(avctx, frame)) < 0) {
293  av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
294  return ret;
295  }
296  } else if (output_samples != alac->nb_samples) {
297  av_log(avctx, AV_LOG_ERROR, "sample count mismatch: %u != %d\n",
298  output_samples, alac->nb_samples);
299  return AVERROR_INVALIDDATA;
300  }
301  alac->nb_samples = output_samples;
302  if (alac->direct_output) {
303  for (ch = 0; ch < channels; ch++)
304  alac->output_samples_buffer[ch] = (int32_t *)frame->extended_data[ch_index + ch];
305  }
306 
307  if (is_compressed) {
308  int16_t lpc_coefs[2][32];
309  int lpc_order[2];
310  int prediction_type[2];
311  int lpc_quant[2];
312  int rice_history_mult[2];
313 
314  decorr_shift = get_bits(&alac->gb, 8);
315  decorr_left_weight = get_bits(&alac->gb, 8);
316 
317  for (ch = 0; ch < channels; ch++) {
318  prediction_type[ch] = get_bits(&alac->gb, 4);
319  lpc_quant[ch] = get_bits(&alac->gb, 4);
320  rice_history_mult[ch] = get_bits(&alac->gb, 3);
321  lpc_order[ch] = get_bits(&alac->gb, 5);
322 
323  /* read the predictor table */
324  for (i = lpc_order[ch] - 1; i >= 0; i--)
325  lpc_coefs[ch][i] = get_sbits(&alac->gb, 16);
326  }
327 
328  if (alac->extra_bits) {
329  for (i = 0; i < alac->nb_samples; i++) {
330  if(get_bits_left(&alac->gb) <= 0)
331  return -1;
332  for (ch = 0; ch < channels; ch++)
333  alac->extra_bits_buffer[ch][i] = get_bits(&alac->gb, alac->extra_bits);
334  }
335  }
336  for (ch = 0; ch < channels; ch++) {
337  int ret=rice_decompress(alac, alac->predict_error_buffer[ch],
338  alac->nb_samples, bps,
339  rice_history_mult[ch] * alac->rice_history_mult / 4);
340  if(ret<0)
341  return ret;
342 
343  /* adaptive FIR filter */
344  if (prediction_type[ch] == 15) {
345  /* Prediction type 15 runs the adaptive FIR twice.
346  * The first pass uses the special-case coef_num = 31, while
347  * the second pass uses the coefs from the bitstream.
348  *
349  * However, this prediction type is not currently used by the
350  * reference encoder.
351  */
353  alac->predict_error_buffer[ch],
354  alac->nb_samples, bps, NULL, 31, 0);
355  } else if (prediction_type[ch] > 0) {
356  av_log(avctx, AV_LOG_WARNING, "unknown prediction type: %i\n",
357  prediction_type[ch]);
358  }
360  alac->output_samples_buffer[ch], alac->nb_samples,
361  bps, lpc_coefs[ch], lpc_order[ch], lpc_quant[ch]);
362  }
363  } else {
364  /* not compressed, easy case */
365  for (i = 0; i < alac->nb_samples; i++) {
366  if(get_bits_left(&alac->gb) <= 0)
367  return -1;
368  for (ch = 0; ch < channels; ch++) {
369  alac->output_samples_buffer[ch][i] =
370  get_sbits_long(&alac->gb, alac->sample_size);
371  }
372  }
373  alac->extra_bits = 0;
374  decorr_shift = 0;
375  decorr_left_weight = 0;
376  }
377 
378  if (channels == 2 && decorr_left_weight) {
380  decorr_shift, decorr_left_weight);
381  }
382 
383  if (alac->extra_bits) {
385  alac->extra_bits, channels, alac->nb_samples);
386  }
387 
388  if(av_sample_fmt_is_planar(avctx->sample_fmt)) {
389  switch(alac->sample_size) {
390  case 16: {
391  for (ch = 0; ch < channels; ch++) {
392  int16_t *outbuffer = (int16_t *)frame->extended_data[ch_index + ch];
393  for (i = 0; i < alac->nb_samples; i++)
394  *outbuffer++ = alac->output_samples_buffer[ch][i];
395  }}
396  break;
397  case 24: {
398  for (ch = 0; ch < channels; ch++) {
399  for (i = 0; i < alac->nb_samples; i++)
400  alac->output_samples_buffer[ch][i] <<= 8;
401  }}
402  break;
403  }
404  }else{
405  switch(alac->sample_size) {
406  case 16: {
407  int16_t *outbuffer = ((int16_t *)frame->extended_data[0]) + ch_index;
408  for (i = 0; i < alac->nb_samples; i++) {
409  for (ch = 0; ch < channels; ch++)
410  *outbuffer++ = alac->output_samples_buffer[ch][i];
411  outbuffer += alac->channels - channels;
412  }
413  }
414  break;
415  case 24: {
416  int32_t *outbuffer = ((int32_t *)frame->extended_data[0]) + ch_index;
417  for (i = 0; i < alac->nb_samples; i++) {
418  for (ch = 0; ch < channels; ch++)
419  *outbuffer++ = alac->output_samples_buffer[ch][i] << 8;
420  outbuffer += alac->channels - channels;
421  }
422  }
423  break;
424  case 32: {
425  int32_t *outbuffer = ((int32_t *)frame->extended_data[0]) + ch_index;
426  for (i = 0; i < alac->nb_samples; i++) {
427  for (ch = 0; ch < channels; ch++)
428  *outbuffer++ = alac->output_samples_buffer[ch][i];
429  outbuffer += alac->channels - channels;
430  }
431  }
432  break;
433  }
434  }
435 
436  return 0;
437 }
438 
439 static int alac_decode_frame(AVCodecContext *avctx, void *data,
440  int *got_frame_ptr, AVPacket *avpkt)
441 {
442  ALACContext *alac = avctx->priv_data;
443  AVFrame *frame = data;
444  enum AlacRawDataBlockType element;
445  int channels;
446  int ch, ret, got_end;
447 
448  init_get_bits(&alac->gb, avpkt->data, avpkt->size * 8);
449 
450  got_end = 0;
451  alac->nb_samples = 0;
452  ch = 0;
453  while (get_bits_left(&alac->gb) >= 3) {
454  element = get_bits(&alac->gb, 3);
455  if (element == TYPE_END) {
456  got_end = 1;
457  break;
458  }
459  if (element > TYPE_CPE && element != TYPE_LFE) {
460  av_log(avctx, AV_LOG_ERROR, "syntax element unsupported: %d\n", element);
461  return AVERROR_PATCHWELCOME;
462  }
463 
464  channels = (element == TYPE_CPE) ? 2 : 1;
465  if ( ch + channels > alac->channels
466  || ff_alac_channel_layout_offsets[alac->channels - 1][ch] + channels > alac->channels
467  ) {
468  av_log(avctx, AV_LOG_ERROR, "invalid element channel count\n");
469  return AVERROR_INVALIDDATA;
470  }
471 
472  ret = decode_element(avctx, frame,
474  channels);
475  if (ret < 0 && get_bits_left(&alac->gb))
476  return ret;
477 
478  ch += channels;
479  }
480  if (!got_end) {
481  av_log(avctx, AV_LOG_ERROR, "no end tag found. incomplete packet.\n");
482  return AVERROR_INVALIDDATA;
483  }
484 
485  if (avpkt->size * 8 - get_bits_count(&alac->gb) > 8) {
486  av_log(avctx, AV_LOG_ERROR, "Error : %d bits left\n",
487  avpkt->size * 8 - get_bits_count(&alac->gb));
488  }
489 
490  *got_frame_ptr = 1;
491 
492  return avpkt->size;
493 }
494 
496 {
497  ALACContext *alac = avctx->priv_data;
498 
499  int ch;
500  for (ch = 0; ch < FFMIN(alac->channels, 2); ch++) {
501  av_freep(&alac->predict_error_buffer[ch]);
502  if (!alac->direct_output)
503  av_freep(&alac->output_samples_buffer[ch]);
504  av_freep(&alac->extra_bits_buffer[ch]);
505  }
506 
507  return 0;
508 }
509 
510 static int allocate_buffers(ALACContext *alac)
511 {
512  int ch;
513  int buf_size;
514 
515  if (alac->max_samples_per_frame > INT_MAX / sizeof(int32_t))
516  goto buf_alloc_fail;
517  buf_size = alac->max_samples_per_frame * sizeof(int32_t);
518 
519  for (ch = 0; ch < FFMIN(alac->channels, 2); ch++) {
521  buf_size, buf_alloc_fail);
522 
523  alac->direct_output = alac->sample_size > 16 && av_sample_fmt_is_planar(alac->avctx->sample_fmt);
524  if (!alac->direct_output) {
526  buf_size, buf_alloc_fail);
527  }
528 
529  FF_ALLOC_OR_GOTO(alac->avctx, alac->extra_bits_buffer[ch],
530  buf_size, buf_alloc_fail);
531  }
532  return 0;
533 buf_alloc_fail:
534  alac_decode_close(alac->avctx);
535  return AVERROR(ENOMEM);
536 }
537 
538 static int alac_set_info(ALACContext *alac)
539 {
540  GetByteContext gb;
541 
542  bytestream2_init(&gb, alac->avctx->extradata,
543  alac->avctx->extradata_size);
544 
545  bytestream2_skipu(&gb, 12); // size:4, alac:4, version:4
546 
547  alac->max_samples_per_frame = bytestream2_get_be32u(&gb);
548  if (!alac->max_samples_per_frame || alac->max_samples_per_frame > INT_MAX) {
549  av_log(alac->avctx, AV_LOG_ERROR, "max samples per frame invalid: %u\n",
550  alac->max_samples_per_frame);
551  return AVERROR_INVALIDDATA;
552  }
553  bytestream2_skipu(&gb, 1); // compatible version
554  alac->sample_size = bytestream2_get_byteu(&gb);
555  alac->rice_history_mult = bytestream2_get_byteu(&gb);
556  alac->rice_initial_history = bytestream2_get_byteu(&gb);
557  alac->rice_limit = bytestream2_get_byteu(&gb);
558  alac->channels = bytestream2_get_byteu(&gb);
559  bytestream2_get_be16u(&gb); // maxRun
560  bytestream2_get_be32u(&gb); // max coded frame size
561  bytestream2_get_be32u(&gb); // average bitrate
562  bytestream2_get_be32u(&gb); // samplerate
563 
564  return 0;
565 }
566 
568 {
569  int ret;
570  int req_packed;
571  ALACContext *alac = avctx->priv_data;
572  alac->avctx = avctx;
573 
574  /* initialize from the extradata */
576  av_log(avctx, AV_LOG_ERROR, "extradata is too small\n");
577  return AVERROR_INVALIDDATA;
578  }
579  if (alac_set_info(alac)) {
580  av_log(avctx, AV_LOG_ERROR, "set_info failed\n");
581  return -1;
582  }
583 
585  switch (alac->sample_size) {
586  case 16: avctx->sample_fmt = req_packed ? AV_SAMPLE_FMT_S16 : AV_SAMPLE_FMT_S16P;
587  break;
588  case 24:
589  case 32: avctx->sample_fmt = req_packed ? AV_SAMPLE_FMT_S32 : AV_SAMPLE_FMT_S32P;
590  break;
591  default: av_log_ask_for_sample(avctx, "Sample depth %d is not supported.\n",
592  alac->sample_size);
593  return AVERROR_PATCHWELCOME;
594  }
595  avctx->bits_per_raw_sample = alac->sample_size;
596 
597  if (alac->channels < 1) {
598  av_log(avctx, AV_LOG_WARNING, "Invalid channel count\n");
599  alac->channels = avctx->channels;
600  } else {
601  if (alac->channels > ALAC_MAX_CHANNELS)
602  alac->channels = avctx->channels;
603  else
604  avctx->channels = alac->channels;
605  }
606  if (avctx->channels > ALAC_MAX_CHANNELS || avctx->channels <= 0 ) {
607  av_log(avctx, AV_LOG_ERROR, "Unsupported channel count: %d\n",
608  avctx->channels);
609  return AVERROR_PATCHWELCOME;
610  }
611  avctx->channel_layout = ff_alac_channel_layouts[alac->channels - 1];
612 
613  if ((ret = allocate_buffers(alac)) < 0) {
614  av_log(avctx, AV_LOG_ERROR, "Error allocating buffers\n");
615  return ret;
616  }
617 
618  return 0;
619 }
620 
622  .name = "alac",
623  .type = AVMEDIA_TYPE_AUDIO,
624  .id = AV_CODEC_ID_ALAC,
625  .priv_data_size = sizeof(ALACContext),
629  .capabilities = CODEC_CAP_DR1,
630  .long_name = NULL_IF_CONFIG_SMALL("ALAC (Apple Lossless Audio Codec)"),
631 };