FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
alac.c
Go to the documentation of this file.
1 /*
2  * ALAC (Apple Lossless Audio Codec) decoder
3  * Copyright (c) 2005 David Hammerton
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 /**
23  * @file
24  * ALAC (Apple Lossless Audio Codec) decoder
25  * @author 2005 David Hammerton
26  * @see http://crazney.net/programs/itunes/alac.html
27  *
28  * Note: This decoder expects a 36-byte QuickTime atom to be
29  * passed through the extradata[_size] fields. This atom is tacked onto
30  * the end of an 'alac' stsd atom and has the following format:
31  *
32  * 32bit atom size
33  * 32bit tag ("alac")
34  * 32bit tag version (0)
35  * 32bit samples per frame (used when not set explicitly in the frames)
36  * 8bit compatible version (0)
37  * 8bit sample size
38  * 8bit history mult (40)
39  * 8bit initial history (10)
40  * 8bit rice param limit (14)
41  * 8bit channels
42  * 16bit maxRun (255)
43  * 32bit max coded frame size (0 means unknown)
44  * 32bit average bitrate (0 means unknown)
45  * 32bit samplerate
46  */
47 
49 #include "avcodec.h"
50 #include "get_bits.h"
51 #include "bytestream.h"
52 #include "internal.h"
53 #include "thread.h"
54 #include "unary.h"
55 #include "mathops.h"
56 #include "alac_data.h"
57 
58 #define ALAC_EXTRADATA_SIZE 36
59 
60 typedef struct {
63  int channels;
64 
65  int32_t *predict_error_buffer[2];
66  int32_t *output_samples_buffer[2];
67  int32_t *extra_bits_buffer[2];
68 
74 
75  int extra_bits; /**< number of extra bits beyond 16-bit */
76  int nb_samples; /**< number of samples in the current frame */
77 
79 } ALACContext;
80 
81 static inline unsigned int decode_scalar(GetBitContext *gb, int k, int bps)
82 {
83  unsigned int x = get_unary_0_9(gb);
84 
85  if (x > 8) { /* RICE THRESHOLD */
86  /* use alternative encoding */
87  x = get_bits_long(gb, bps);
88  } else if (k != 1) {
89  int extrabits = show_bits(gb, k);
90 
91  /* multiply x by 2^k - 1, as part of their strange algorithm */
92  x = (x << k) - x;
93 
94  if (extrabits > 1) {
95  x += extrabits - 1;
96  skip_bits(gb, k);
97  } else
98  skip_bits(gb, k - 1);
99  }
100  return x;
101 }
102 
103 static int rice_decompress(ALACContext *alac, int32_t *output_buffer,
104  int nb_samples, int bps, int rice_history_mult)
105 {
106  int i;
107  unsigned int history = alac->rice_initial_history;
108  int sign_modifier = 0;
109 
110  for (i = 0; i < nb_samples; i++) {
111  int k;
112  unsigned int x;
113 
114  if(get_bits_left(&alac->gb) <= 0)
115  return -1;
116 
117  /* calculate rice param and decode next value */
118  k = av_log2((history >> 9) + 3);
119  k = FFMIN(k, alac->rice_limit);
120  x = decode_scalar(&alac->gb, k, bps);
121  x += sign_modifier;
122  sign_modifier = 0;
123  output_buffer[i] = (x >> 1) ^ -(x & 1);
124 
125  /* update the history */
126  if (x > 0xffff)
127  history = 0xffff;
128  else
129  history += x * rice_history_mult -
130  ((history * rice_history_mult) >> 9);
131 
132  /* special case: there may be compressed blocks of 0 */
133  if ((history < 128) && (i + 1 < nb_samples)) {
134  int block_size;
135 
136  /* calculate rice param and decode block size */
137  k = 7 - av_log2(history) + ((history + 16) >> 6);
138  k = FFMIN(k, alac->rice_limit);
139  block_size = decode_scalar(&alac->gb, k, 16);
140 
141  if (block_size > 0) {
142  if (block_size >= nb_samples - i) {
143  av_log(alac->avctx, AV_LOG_ERROR,
144  "invalid zero block size of %d %d %d\n", block_size,
145  nb_samples, i);
146  block_size = nb_samples - i - 1;
147  }
148  memset(&output_buffer[i + 1], 0,
149  block_size * sizeof(*output_buffer));
150  i += block_size;
151  }
152  if (block_size <= 0xffff)
153  sign_modifier = 1;
154  history = 0;
155  }
156  }
157  return 0;
158 }
159 
160 static inline int sign_only(int v)
161 {
162  return v ? FFSIGN(v) : 0;
163 }
164 
165 static void lpc_prediction(int32_t *error_buffer, int32_t *buffer_out,
166  int nb_samples, int bps, int16_t *lpc_coefs,
167  int lpc_order, int lpc_quant)
168 {
169  int i;
170  int32_t *pred = buffer_out;
171 
172  /* first sample always copies */
173  *buffer_out = *error_buffer;
174 
175  if (nb_samples <= 1)
176  return;
177 
178  if (!lpc_order) {
179  memcpy(&buffer_out[1], &error_buffer[1],
180  (nb_samples - 1) * sizeof(*buffer_out));
181  return;
182  }
183 
184  if (lpc_order == 31) {
185  /* simple 1st-order prediction */
186  for (i = 1; i < nb_samples; i++) {
187  buffer_out[i] = sign_extend(buffer_out[i - 1] + error_buffer[i],
188  bps);
189  }
190  return;
191  }
192 
193  /* read warm-up samples */
194  for (i = 1; i <= lpc_order && i < nb_samples; i++)
195  buffer_out[i] = sign_extend(buffer_out[i - 1] + error_buffer[i], bps);
196 
197  /* NOTE: 4 and 8 are very common cases that could be optimized. */
198 
199  for (; i < nb_samples; i++) {
200  int j;
201  int val = 0;
202  int error_val = error_buffer[i];
203  int error_sign;
204  int d = *pred++;
205 
206  /* LPC prediction */
207  for (j = 0; j < lpc_order; j++)
208  val += (pred[j] - d) * lpc_coefs[j];
209  val = (val + (1 << (lpc_quant - 1))) >> lpc_quant;
210  val += d + error_val;
211  buffer_out[i] = sign_extend(val, bps);
212 
213  /* adapt LPC coefficients */
214  error_sign = sign_only(error_val);
215  if (error_sign) {
216  for (j = 0; j < lpc_order && error_val * error_sign > 0; j++) {
217  int sign;
218  val = d - pred[j];
219  sign = sign_only(val) * error_sign;
220  lpc_coefs[j] -= sign;
221  val *= sign;
222  error_val -= (val >> lpc_quant) * (j + 1);
223  }
224  }
225  }
226 }
227 
228 static void decorrelate_stereo(int32_t *buffer[2], int nb_samples,
229  int decorr_shift, int decorr_left_weight)
230 {
231  int i;
232 
233  for (i = 0; i < nb_samples; i++) {
234  int32_t a, b;
235 
236  a = buffer[0][i];
237  b = buffer[1][i];
238 
239  a -= (b * decorr_left_weight) >> decorr_shift;
240  b += a;
241 
242  buffer[0][i] = b;
243  buffer[1][i] = a;
244  }
245 }
246 
247 static void append_extra_bits(int32_t *buffer[2], int32_t *extra_bits_buffer[2],
248  int extra_bits, int channels, int nb_samples)
249 {
250  int i, ch;
251 
252  for (ch = 0; ch < channels; ch++)
253  for (i = 0; i < nb_samples; i++)
254  buffer[ch][i] = (buffer[ch][i] << extra_bits) | extra_bits_buffer[ch][i];
255 }
256 
257 static int decode_element(AVCodecContext *avctx, AVFrame *frame, int ch_index,
258  int channels)
259 {
260  ALACContext *alac = avctx->priv_data;
261  int has_size, bps, is_compressed, decorr_shift, decorr_left_weight, ret;
262  uint32_t output_samples;
263  int i, ch;
264 
265  skip_bits(&alac->gb, 4); /* element instance tag */
266  skip_bits(&alac->gb, 12); /* unused header bits */
267 
268  /* the number of output samples is stored in the frame */
269  has_size = get_bits1(&alac->gb);
270 
271  alac->extra_bits = get_bits(&alac->gb, 2) << 3;
272  bps = alac->sample_size - alac->extra_bits + channels - 1;
273  if (bps > 32U) {
274  av_log(avctx, AV_LOG_ERROR, "bps is unsupported: %d\n", bps);
275  return AVERROR_PATCHWELCOME;
276  }
277 
278  /* whether the frame is compressed */
279  is_compressed = !get_bits1(&alac->gb);
280 
281  if (has_size)
282  output_samples = get_bits_long(&alac->gb, 32);
283  else
284  output_samples = alac->max_samples_per_frame;
285  if (!output_samples || output_samples > alac->max_samples_per_frame) {
286  av_log(avctx, AV_LOG_ERROR, "invalid samples per frame: %d\n",
287  output_samples);
288  return AVERROR_INVALIDDATA;
289  }
290  if (!alac->nb_samples) {
291  ThreadFrame tframe = { .f = frame };
292  /* get output buffer */
293  frame->nb_samples = output_samples;
294  if ((ret = ff_thread_get_buffer(avctx, &tframe, 0)) < 0)
295  return ret;
296  } else if (output_samples != alac->nb_samples) {
297  av_log(avctx, AV_LOG_ERROR, "sample count mismatch: %u != %d\n",
298  output_samples, alac->nb_samples);
299  return AVERROR_INVALIDDATA;
300  }
301  alac->nb_samples = output_samples;
302  if (alac->direct_output) {
303  for (ch = 0; ch < channels; ch++)
304  alac->output_samples_buffer[ch] = (int32_t *)frame->extended_data[ch_index + ch];
305  }
306 
307  if (is_compressed) {
308  int16_t lpc_coefs[2][32];
309  int lpc_order[2];
310  int prediction_type[2];
311  int lpc_quant[2];
312  int rice_history_mult[2];
313 
314  decorr_shift = get_bits(&alac->gb, 8);
315  decorr_left_weight = get_bits(&alac->gb, 8);
316 
317  for (ch = 0; ch < channels; ch++) {
318  prediction_type[ch] = get_bits(&alac->gb, 4);
319  lpc_quant[ch] = get_bits(&alac->gb, 4);
320  rice_history_mult[ch] = get_bits(&alac->gb, 3);
321  lpc_order[ch] = get_bits(&alac->gb, 5);
322 
323  if (lpc_order[ch] >= alac->max_samples_per_frame)
324  return AVERROR_INVALIDDATA;
325 
326  /* read the predictor table */
327  for (i = lpc_order[ch] - 1; i >= 0; i--)
328  lpc_coefs[ch][i] = get_sbits(&alac->gb, 16);
329  }
330 
331  if (alac->extra_bits) {
332  for (i = 0; i < alac->nb_samples; i++) {
333  if(get_bits_left(&alac->gb) <= 0)
334  return -1;
335  for (ch = 0; ch < channels; ch++)
336  alac->extra_bits_buffer[ch][i] = get_bits(&alac->gb, alac->extra_bits);
337  }
338  }
339  for (ch = 0; ch < channels; ch++) {
340  int ret=rice_decompress(alac, alac->predict_error_buffer[ch],
341  alac->nb_samples, bps,
342  rice_history_mult[ch] * alac->rice_history_mult / 4);
343  if(ret<0)
344  return ret;
345 
346  /* adaptive FIR filter */
347  if (prediction_type[ch] == 15) {
348  /* Prediction type 15 runs the adaptive FIR twice.
349  * The first pass uses the special-case coef_num = 31, while
350  * the second pass uses the coefs from the bitstream.
351  *
352  * However, this prediction type is not currently used by the
353  * reference encoder.
354  */
356  alac->predict_error_buffer[ch],
357  alac->nb_samples, bps, NULL, 31, 0);
358  } else if (prediction_type[ch] > 0) {
359  av_log(avctx, AV_LOG_WARNING, "unknown prediction type: %i\n",
360  prediction_type[ch]);
361  }
363  alac->output_samples_buffer[ch], alac->nb_samples,
364  bps, lpc_coefs[ch], lpc_order[ch], lpc_quant[ch]);
365  }
366  } else {
367  /* not compressed, easy case */
368  for (i = 0; i < alac->nb_samples; i++) {
369  if(get_bits_left(&alac->gb) <= 0)
370  return -1;
371  for (ch = 0; ch < channels; ch++) {
372  alac->output_samples_buffer[ch][i] =
373  get_sbits_long(&alac->gb, alac->sample_size);
374  }
375  }
376  alac->extra_bits = 0;
377  decorr_shift = 0;
378  decorr_left_weight = 0;
379  }
380 
381  if (channels == 2 && decorr_left_weight) {
383  decorr_shift, decorr_left_weight);
384  }
385 
386  if (alac->extra_bits) {
388  alac->extra_bits, channels, alac->nb_samples);
389  }
390 
391  if(av_sample_fmt_is_planar(avctx->sample_fmt)) {
392  switch(alac->sample_size) {
393  case 16: {
394  for (ch = 0; ch < channels; ch++) {
395  int16_t *outbuffer = (int16_t *)frame->extended_data[ch_index + ch];
396  for (i = 0; i < alac->nb_samples; i++)
397  *outbuffer++ = alac->output_samples_buffer[ch][i];
398  }}
399  break;
400  case 24: {
401  for (ch = 0; ch < channels; ch++) {
402  for (i = 0; i < alac->nb_samples; i++)
403  alac->output_samples_buffer[ch][i] <<= 8;
404  }}
405  break;
406  }
407  }else{
408  switch(alac->sample_size) {
409  case 16: {
410  int16_t *outbuffer = ((int16_t *)frame->extended_data[0]) + ch_index;
411  for (i = 0; i < alac->nb_samples; i++) {
412  for (ch = 0; ch < channels; ch++)
413  *outbuffer++ = alac->output_samples_buffer[ch][i];
414  outbuffer += alac->channels - channels;
415  }
416  }
417  break;
418  case 24: {
419  int32_t *outbuffer = ((int32_t *)frame->extended_data[0]) + ch_index;
420  for (i = 0; i < alac->nb_samples; i++) {
421  for (ch = 0; ch < channels; ch++)
422  *outbuffer++ = alac->output_samples_buffer[ch][i] << 8;
423  outbuffer += alac->channels - channels;
424  }
425  }
426  break;
427  case 32: {
428  int32_t *outbuffer = ((int32_t *)frame->extended_data[0]) + ch_index;
429  for (i = 0; i < alac->nb_samples; i++) {
430  for (ch = 0; ch < channels; ch++)
431  *outbuffer++ = alac->output_samples_buffer[ch][i];
432  outbuffer += alac->channels - channels;
433  }
434  }
435  break;
436  }
437  }
438 
439  return 0;
440 }
441 
442 static int alac_decode_frame(AVCodecContext *avctx, void *data,
443  int *got_frame_ptr, AVPacket *avpkt)
444 {
445  ALACContext *alac = avctx->priv_data;
446  AVFrame *frame = data;
447  enum AlacRawDataBlockType element;
448  int channels;
449  int ch, ret, got_end;
450 
451  if ((ret = init_get_bits8(&alac->gb, avpkt->data, avpkt->size)) < 0)
452  return ret;
453 
454  got_end = 0;
455  alac->nb_samples = 0;
456  ch = 0;
457  while (get_bits_left(&alac->gb) >= 3) {
458  element = get_bits(&alac->gb, 3);
459  if (element == TYPE_END) {
460  got_end = 1;
461  break;
462  }
463  if (element > TYPE_CPE && element != TYPE_LFE) {
464  av_log(avctx, AV_LOG_ERROR, "syntax element unsupported: %d\n", element);
465  return AVERROR_PATCHWELCOME;
466  }
467 
468  channels = (element == TYPE_CPE) ? 2 : 1;
469  if (ch + channels > alac->channels ||
470  ff_alac_channel_layout_offsets[alac->channels - 1][ch] + channels > alac->channels) {
471  av_log(avctx, AV_LOG_ERROR, "invalid element channel count\n");
472  return AVERROR_INVALIDDATA;
473  }
474 
475  ret = decode_element(avctx, frame,
477  channels);
478  if (ret < 0 && get_bits_left(&alac->gb))
479  return ret;
480 
481  ch += channels;
482  }
483  if (!got_end) {
484  av_log(avctx, AV_LOG_ERROR, "no end tag found. incomplete packet.\n");
485  return AVERROR_INVALIDDATA;
486  }
487 
488  if (avpkt->size * 8 - get_bits_count(&alac->gb) > 8) {
489  av_log(avctx, AV_LOG_ERROR, "Error : %d bits left\n",
490  avpkt->size * 8 - get_bits_count(&alac->gb));
491  }
492 
493  if (alac->channels == ch)
494  *got_frame_ptr = 1;
495 
496  return avpkt->size;
497 }
498 
500 {
501  ALACContext *alac = avctx->priv_data;
502 
503  int ch;
504  for (ch = 0; ch < FFMIN(alac->channels, 2); ch++) {
505  av_freep(&alac->predict_error_buffer[ch]);
506  if (!alac->direct_output)
507  av_freep(&alac->output_samples_buffer[ch]);
508  av_freep(&alac->extra_bits_buffer[ch]);
509  }
510 
511  return 0;
512 }
513 
514 static int allocate_buffers(ALACContext *alac)
515 {
516  int ch;
517  int buf_size = alac->max_samples_per_frame * sizeof(int32_t);
518 
519  for (ch = 0; ch < FFMIN(alac->channels, 2); ch++) {
521  buf_size, buf_alloc_fail);
522 
523  alac->direct_output = alac->sample_size > 16 && av_sample_fmt_is_planar(alac->avctx->sample_fmt);
524  if (!alac->direct_output) {
526  buf_size, buf_alloc_fail);
527  }
528 
529  FF_ALLOC_OR_GOTO(alac->avctx, alac->extra_bits_buffer[ch],
530  buf_size, buf_alloc_fail);
531  }
532  return 0;
533 buf_alloc_fail:
534  alac_decode_close(alac->avctx);
535  return AVERROR(ENOMEM);
536 }
537 
538 static int alac_set_info(ALACContext *alac)
539 {
540  GetByteContext gb;
541 
542  bytestream2_init(&gb, alac->avctx->extradata,
543  alac->avctx->extradata_size);
544 
545  bytestream2_skipu(&gb, 12); // size:4, alac:4, version:4
546 
547  alac->max_samples_per_frame = bytestream2_get_be32u(&gb);
548  if (!alac->max_samples_per_frame ||
549  alac->max_samples_per_frame > INT_MAX / sizeof(int32_t)) {
550  av_log(alac->avctx, AV_LOG_ERROR, "max samples per frame invalid: %u\n",
551  alac->max_samples_per_frame);
552  return AVERROR_INVALIDDATA;
553  }
554  bytestream2_skipu(&gb, 1); // compatible version
555  alac->sample_size = bytestream2_get_byteu(&gb);
556  alac->rice_history_mult = bytestream2_get_byteu(&gb);
557  alac->rice_initial_history = bytestream2_get_byteu(&gb);
558  alac->rice_limit = bytestream2_get_byteu(&gb);
559  alac->channels = bytestream2_get_byteu(&gb);
560  bytestream2_get_be16u(&gb); // maxRun
561  bytestream2_get_be32u(&gb); // max coded frame size
562  bytestream2_get_be32u(&gb); // average bitrate
563  bytestream2_get_be32u(&gb); // samplerate
564 
565  return 0;
566 }
567 
569 {
570  int ret;
571  int req_packed;
572  ALACContext *alac = avctx->priv_data;
573  alac->avctx = avctx;
574 
575  /* initialize from the extradata */
577  av_log(avctx, AV_LOG_ERROR, "extradata is too small\n");
578  return AVERROR_INVALIDDATA;
579  }
580  if (alac_set_info(alac)) {
581  av_log(avctx, AV_LOG_ERROR, "set_info failed\n");
582  return -1;
583  }
584 
586  switch (alac->sample_size) {
587  case 16: avctx->sample_fmt = req_packed ? AV_SAMPLE_FMT_S16 : AV_SAMPLE_FMT_S16P;
588  break;
589  case 24:
590  case 32: avctx->sample_fmt = req_packed ? AV_SAMPLE_FMT_S32 : AV_SAMPLE_FMT_S32P;
591  break;
592  default: avpriv_request_sample(avctx, "Sample depth %d", alac->sample_size);
593  return AVERROR_PATCHWELCOME;
594  }
595  avctx->bits_per_raw_sample = alac->sample_size;
596 
597  if (alac->channels < 1) {
598  av_log(avctx, AV_LOG_WARNING, "Invalid channel count\n");
599  alac->channels = avctx->channels;
600  } else {
601  if (alac->channels > ALAC_MAX_CHANNELS)
602  alac->channels = avctx->channels;
603  else
604  avctx->channels = alac->channels;
605  }
606  if (avctx->channels > ALAC_MAX_CHANNELS || avctx->channels <= 0 ) {
607  av_log(avctx, AV_LOG_ERROR, "Unsupported channel count: %d\n",
608  avctx->channels);
609  return AVERROR_PATCHWELCOME;
610  }
611  avctx->channel_layout = ff_alac_channel_layouts[alac->channels - 1];
612 
613  if ((ret = allocate_buffers(alac)) < 0) {
614  av_log(avctx, AV_LOG_ERROR, "Error allocating buffers\n");
615  return ret;
616  }
617 
618  return 0;
619 }
620 
622 {
623  ALACContext *alac = avctx->priv_data;
624  alac->avctx = avctx;
625  return allocate_buffers(alac);
626 }
627 
629  .name = "alac",
630  .long_name = NULL_IF_CONFIG_SMALL("ALAC (Apple Lossless Audio Codec)"),
631  .type = AVMEDIA_TYPE_AUDIO,
632  .id = AV_CODEC_ID_ALAC,
633  .priv_data_size = sizeof(ALACContext),
638  .capabilities = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS,
639 };