FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
sonic.c
Go to the documentation of this file.
1 /*
2  * Simple free lossless/lossy audio codec
3  * Copyright (c) 2004 Alex Beregszaszi
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 #include "avcodec.h"
22 #include "get_bits.h"
23 #include "golomb.h"
24 #include "internal.h"
25 
26 /**
27  * @file
28  * Simple free lossless/lossy audio codec
29  * Based on Paul Francis Harrison's Bonk (http://www.logarithmic.net/pfh/bonk)
30  * Written and designed by Alex Beregszaszi
31  *
32  * TODO:
33  * - CABAC put/get_symbol
34  * - independent quantizer for channels
35  * - >2 channels support
36  * - more decorrelation types
37  * - more tap_quant tests
38  * - selectable intlist writers/readers (bonk-style, golomb, cabac)
39  */
40 
41 #define MAX_CHANNELS 2
42 
43 #define MID_SIDE 0
44 #define LEFT_SIDE 1
45 #define RIGHT_SIDE 2
46 
47 typedef struct SonicContext {
49 
51  double quantization;
52 
54 
55  int *tap_quant;
58 
59  // for encoding
60  int *tail;
61  int tail_size;
62  int *window;
64 
65  // for decoding
68 } SonicContext;
69 
70 #define LATTICE_SHIFT 10
71 #define SAMPLE_SHIFT 4
72 #define LATTICE_FACTOR (1 << LATTICE_SHIFT)
73 #define SAMPLE_FACTOR (1 << SAMPLE_SHIFT)
74 
75 #define BASE_QUANT 0.6
76 #define RATE_VARIATION 3.0
77 
78 static inline int shift(int a,int b)
79 {
80  return (a+(1<<(b-1))) >> b;
81 }
82 
83 static inline int shift_down(int a,int b)
84 {
85  return (a>>b)+(a<0);
86 }
87 
88 #if 1
89 static inline int intlist_write(PutBitContext *pb, int *buf, int entries, int base_2_part)
90 {
91  int i;
92 
93  for (i = 0; i < entries; i++)
94  set_se_golomb(pb, buf[i]);
95 
96  return 1;
97 }
98 
99 static inline int intlist_read(GetBitContext *gb, int *buf, int entries, int base_2_part)
100 {
101  int i;
102 
103  for (i = 0; i < entries; i++)
104  buf[i] = get_se_golomb(gb);
105 
106  return 1;
107 }
108 
109 #else
110 
111 #define ADAPT_LEVEL 8
112 
113 static int bits_to_store(uint64_t x)
114 {
115  int res = 0;
116 
117  while(x)
118  {
119  res++;
120  x >>= 1;
121  }
122  return res;
123 }
124 
125 static void write_uint_max(PutBitContext *pb, unsigned int value, unsigned int max)
126 {
127  int i, bits;
128 
129  if (!max)
130  return;
131 
132  bits = bits_to_store(max);
133 
134  for (i = 0; i < bits-1; i++)
135  put_bits(pb, 1, value & (1 << i));
136 
137  if ( (value | (1 << (bits-1))) <= max)
138  put_bits(pb, 1, value & (1 << (bits-1)));
139 }
140 
141 static unsigned int read_uint_max(GetBitContext *gb, int max)
142 {
143  int i, bits, value = 0;
144 
145  if (!max)
146  return 0;
147 
148  bits = bits_to_store(max);
149 
150  for (i = 0; i < bits-1; i++)
151  if (get_bits1(gb))
152  value += 1 << i;
153 
154  if ( (value | (1<<(bits-1))) <= max)
155  if (get_bits1(gb))
156  value += 1 << (bits-1);
157 
158  return value;
159 }
160 
161 static int intlist_write(PutBitContext *pb, int *buf, int entries, int base_2_part)
162 {
163  int i, j, x = 0, low_bits = 0, max = 0;
164  int step = 256, pos = 0, dominant = 0, any = 0;
165  int *copy, *bits;
166 
167  copy = av_calloc(entries, sizeof(*copy));
168  if (!copy)
169  return AVERROR(ENOMEM);
170 
171  if (base_2_part)
172  {
173  int energy = 0;
174 
175  for (i = 0; i < entries; i++)
176  energy += abs(buf[i]);
177 
178  low_bits = bits_to_store(energy / (entries * 2));
179  if (low_bits > 15)
180  low_bits = 15;
181 
182  put_bits(pb, 4, low_bits);
183  }
184 
185  for (i = 0; i < entries; i++)
186  {
187  put_bits(pb, low_bits, abs(buf[i]));
188  copy[i] = abs(buf[i]) >> low_bits;
189  if (copy[i] > max)
190  max = abs(copy[i]);
191  }
192 
193  bits = av_calloc(entries*max, sizeof(*bits));
194  if (!bits)
195  {
196 // av_free(copy);
197  return AVERROR(ENOMEM);
198  }
199 
200  for (i = 0; i <= max; i++)
201  {
202  for (j = 0; j < entries; j++)
203  if (copy[j] >= i)
204  bits[x++] = copy[j] > i;
205  }
206 
207  // store bitstream
208  while (pos < x)
209  {
210  int steplet = step >> 8;
211 
212  if (pos + steplet > x)
213  steplet = x - pos;
214 
215  for (i = 0; i < steplet; i++)
216  if (bits[i+pos] != dominant)
217  any = 1;
218 
219  put_bits(pb, 1, any);
220 
221  if (!any)
222  {
223  pos += steplet;
224  step += step / ADAPT_LEVEL;
225  }
226  else
227  {
228  int interloper = 0;
229 
230  while (((pos + interloper) < x) && (bits[pos + interloper] == dominant))
231  interloper++;
232 
233  // note change
234  write_uint_max(pb, interloper, (step >> 8) - 1);
235 
236  pos += interloper + 1;
237  step -= step / ADAPT_LEVEL;
238  }
239 
240  if (step < 256)
241  {
242  step = 65536 / step;
243  dominant = !dominant;
244  }
245  }
246 
247  // store signs
248  for (i = 0; i < entries; i++)
249  if (buf[i])
250  put_bits(pb, 1, buf[i] < 0);
251 
252 // av_free(bits);
253 // av_free(copy);
254 
255  return 0;
256 }
257 
258 static int intlist_read(GetBitContext *gb, int *buf, int entries, int base_2_part)
259 {
260  int i, low_bits = 0, x = 0;
261  int n_zeros = 0, step = 256, dominant = 0;
262  int pos = 0, level = 0;
263  int *bits = av_calloc(entries, sizeof(*bits));
264 
265  if (!bits)
266  return AVERROR(ENOMEM);
267 
268  if (base_2_part)
269  {
270  low_bits = get_bits(gb, 4);
271 
272  if (low_bits)
273  for (i = 0; i < entries; i++)
274  buf[i] = get_bits(gb, low_bits);
275  }
276 
277 // av_log(NULL, AV_LOG_INFO, "entries: %d, low bits: %d\n", entries, low_bits);
278 
279  while (n_zeros < entries)
280  {
281  int steplet = step >> 8;
282 
283  if (!get_bits1(gb))
284  {
285  for (i = 0; i < steplet; i++)
286  bits[x++] = dominant;
287 
288  if (!dominant)
289  n_zeros += steplet;
290 
291  step += step / ADAPT_LEVEL;
292  }
293  else
294  {
295  int actual_run = read_uint_max(gb, steplet-1);
296 
297 // av_log(NULL, AV_LOG_INFO, "actual run: %d\n", actual_run);
298 
299  for (i = 0; i < actual_run; i++)
300  bits[x++] = dominant;
301 
302  bits[x++] = !dominant;
303 
304  if (!dominant)
305  n_zeros += actual_run;
306  else
307  n_zeros++;
308 
309  step -= step / ADAPT_LEVEL;
310  }
311 
312  if (step < 256)
313  {
314  step = 65536 / step;
315  dominant = !dominant;
316  }
317  }
318 
319  // reconstruct unsigned values
320  n_zeros = 0;
321  for (i = 0; n_zeros < entries; i++)
322  {
323  while(1)
324  {
325  if (pos >= entries)
326  {
327  pos = 0;
328  level += 1 << low_bits;
329  }
330 
331  if (buf[pos] >= level)
332  break;
333 
334  pos++;
335  }
336 
337  if (bits[i])
338  buf[pos] += 1 << low_bits;
339  else
340  n_zeros++;
341 
342  pos++;
343  }
344 // av_free(bits);
345 
346  // read signs
347  for (i = 0; i < entries; i++)
348  if (buf[i] && get_bits1(gb))
349  buf[i] = -buf[i];
350 
351 // av_log(NULL, AV_LOG_INFO, "zeros: %d pos: %d\n", n_zeros, pos);
352 
353  return 0;
354 }
355 #endif
356 
357 static void predictor_init_state(int *k, int *state, int order)
358 {
359  int i;
360 
361  for (i = order-2; i >= 0; i--)
362  {
363  int j, p, x = state[i];
364 
365  for (j = 0, p = i+1; p < order; j++,p++)
366  {
367  int tmp = x + shift_down(k[j] * state[p], LATTICE_SHIFT);
368  state[p] += shift_down(k[j]*x, LATTICE_SHIFT);
369  x = tmp;
370  }
371  }
372 }
373 
374 static int predictor_calc_error(int *k, int *state, int order, int error)
375 {
376  int i, x = error - shift_down(k[order-1] * state[order-1], LATTICE_SHIFT);
377 
378 #if 1
379  int *k_ptr = &(k[order-2]),
380  *state_ptr = &(state[order-2]);
381  for (i = order-2; i >= 0; i--, k_ptr--, state_ptr--)
382  {
383  int k_value = *k_ptr, state_value = *state_ptr;
384  x -= shift_down(k_value * state_value, LATTICE_SHIFT);
385  state_ptr[1] = state_value + shift_down(k_value * x, LATTICE_SHIFT);
386  }
387 #else
388  for (i = order-2; i >= 0; i--)
389  {
390  x -= shift_down(k[i] * state[i], LATTICE_SHIFT);
391  state[i+1] = state[i] + shift_down(k[i] * x, LATTICE_SHIFT);
392  }
393 #endif
394 
395  // don't drift too far, to avoid overflows
396  if (x > (SAMPLE_FACTOR<<16)) x = (SAMPLE_FACTOR<<16);
397  if (x < -(SAMPLE_FACTOR<<16)) x = -(SAMPLE_FACTOR<<16);
398 
399  state[0] = x;
400 
401  return x;
402 }
403 
404 #if CONFIG_SONIC_ENCODER || CONFIG_SONIC_LS_ENCODER
405 // Heavily modified Levinson-Durbin algorithm which
406 // copes better with quantization, and calculates the
407 // actual whitened result as it goes.
408 
409 static void modified_levinson_durbin(int *window, int window_entries,
410  int *out, int out_entries, int channels, int *tap_quant)
411 {
412  int i;
413  int *state = av_calloc(window_entries, sizeof(*state));
414 
415  memcpy(state, window, 4* window_entries);
416 
417  for (i = 0; i < out_entries; i++)
418  {
419  int step = (i+1)*channels, k, j;
420  double xx = 0.0, xy = 0.0;
421 #if 1
422  int *x_ptr = &(window[step]);
423  int *state_ptr = &(state[0]);
424  j = window_entries - step;
425  for (;j>0;j--,x_ptr++,state_ptr++)
426  {
427  double x_value = *x_ptr;
428  double state_value = *state_ptr;
429  xx += state_value*state_value;
430  xy += x_value*state_value;
431  }
432 #else
433  for (j = 0; j <= (window_entries - step); j++);
434  {
435  double stepval = window[step+j];
436  double stateval = window[j];
437 // xx += (double)window[j]*(double)window[j];
438 // xy += (double)window[step+j]*(double)window[j];
439  xx += stateval*stateval;
440  xy += stepval*stateval;
441  }
442 #endif
443  if (xx == 0.0)
444  k = 0;
445  else
446  k = (int)(floor(-xy/xx * (double)LATTICE_FACTOR / (double)(tap_quant[i]) + 0.5));
447 
448  if (k > (LATTICE_FACTOR/tap_quant[i]))
449  k = LATTICE_FACTOR/tap_quant[i];
450  if (-k > (LATTICE_FACTOR/tap_quant[i]))
451  k = -(LATTICE_FACTOR/tap_quant[i]);
452 
453  out[i] = k;
454  k *= tap_quant[i];
455 
456 #if 1
457  x_ptr = &(window[step]);
458  state_ptr = &(state[0]);
459  j = window_entries - step;
460  for (;j>0;j--,x_ptr++,state_ptr++)
461  {
462  int x_value = *x_ptr;
463  int state_value = *state_ptr;
464  *x_ptr = x_value + shift_down(k*state_value,LATTICE_SHIFT);
465  *state_ptr = state_value + shift_down(k*x_value, LATTICE_SHIFT);
466  }
467 #else
468  for (j=0; j <= (window_entries - step); j++)
469  {
470  int stepval = window[step+j];
471  int stateval=state[j];
472  window[step+j] += shift_down(k * stateval, LATTICE_SHIFT);
473  state[j] += shift_down(k * stepval, LATTICE_SHIFT);
474  }
475 #endif
476  }
477 
478  av_free(state);
479 }
480 
481 static inline int code_samplerate(int samplerate)
482 {
483  switch (samplerate)
484  {
485  case 44100: return 0;
486  case 22050: return 1;
487  case 11025: return 2;
488  case 96000: return 3;
489  case 48000: return 4;
490  case 32000: return 5;
491  case 24000: return 6;
492  case 16000: return 7;
493  case 8000: return 8;
494  }
495  return AVERROR(EINVAL);
496 }
497 
498 static av_cold int sonic_encode_init(AVCodecContext *avctx)
499 {
500  SonicContext *s = avctx->priv_data;
501  PutBitContext pb;
502  int i, version = 0;
503 
504  if (avctx->channels > MAX_CHANNELS)
505  {
506  av_log(avctx, AV_LOG_ERROR, "Only mono and stereo streams are supported by now\n");
507  return AVERROR(EINVAL); /* only stereo or mono for now */
508  }
509 
510  if (avctx->channels == 2)
511  s->decorrelation = MID_SIDE;
512  else
513  s->decorrelation = 3;
514 
515  if (avctx->codec->id == AV_CODEC_ID_SONIC_LS)
516  {
517  s->lossless = 1;
518  s->num_taps = 32;
519  s->downsampling = 1;
520  s->quantization = 0.0;
521  }
522  else
523  {
524  s->num_taps = 128;
525  s->downsampling = 2;
526  s->quantization = 1.0;
527  }
528 
529  // max tap 2048
530  if (s->num_taps < 32 || s->num_taps > 1024 || s->num_taps % 32) {
531  av_log(avctx, AV_LOG_ERROR, "Invalid number of taps\n");
532  return AVERROR_INVALIDDATA;
533  }
534 
535  // generate taps
536  s->tap_quant = av_calloc(s->num_taps, sizeof(*s->tap_quant));
537  for (i = 0; i < s->num_taps; i++)
538  s->tap_quant[i] = ff_sqrt(i+1);
539 
540  s->channels = avctx->channels;
541  s->samplerate = avctx->sample_rate;
542 
543  s->block_align = 2048LL*s->samplerate/(44100*s->downsampling);
545 
546  s->tail_size = s->num_taps*s->channels;
547  s->tail = av_calloc(s->tail_size, sizeof(*s->tail));
548  if (!s->tail)
549  return AVERROR(ENOMEM);
550 
551  s->predictor_k = av_calloc(s->num_taps, sizeof(*s->predictor_k) );
552  if (!s->predictor_k)
553  return AVERROR(ENOMEM);
554 
555  for (i = 0; i < s->channels; i++)
556  {
557  s->coded_samples[i] = av_calloc(s->block_align, sizeof(**s->coded_samples));
558  if (!s->coded_samples[i])
559  return AVERROR(ENOMEM);
560  }
561 
562  s->int_samples = av_calloc(s->frame_size, sizeof(*s->int_samples));
563 
564  s->window_size = ((2*s->tail_size)+s->frame_size);
565  s->window = av_calloc(s->window_size, sizeof(*s->window));
566  if (!s->window)
567  return AVERROR(ENOMEM);
568 
569  avctx->extradata = av_mallocz(16);
570  if (!avctx->extradata)
571  return AVERROR(ENOMEM);
572  init_put_bits(&pb, avctx->extradata, 16*8);
573 
574  put_bits(&pb, 2, version); // version
575  if (version == 1)
576  {
577  put_bits(&pb, 2, s->channels);
578  put_bits(&pb, 4, code_samplerate(s->samplerate));
579  }
580  put_bits(&pb, 1, s->lossless);
581  if (!s->lossless)
582  put_bits(&pb, 3, SAMPLE_SHIFT); // XXX FIXME: sample precision
583  put_bits(&pb, 2, s->decorrelation);
584  put_bits(&pb, 2, s->downsampling);
585  put_bits(&pb, 5, (s->num_taps >> 5)-1); // 32..1024
586  put_bits(&pb, 1, 0); // XXX FIXME: no custom tap quant table
587 
588  flush_put_bits(&pb);
589  avctx->extradata_size = put_bits_count(&pb)/8;
590 
591  av_log(avctx, AV_LOG_INFO, "Sonic: ver: %d ls: %d dr: %d taps: %d block: %d frame: %d downsamp: %d\n",
592  version, s->lossless, s->decorrelation, s->num_taps, s->block_align, s->frame_size, s->downsampling);
593 
594  avctx->frame_size = s->block_align*s->downsampling;
595 
596  return 0;
597 }
598 
599 static av_cold int sonic_encode_close(AVCodecContext *avctx)
600 {
601  SonicContext *s = avctx->priv_data;
602  int i;
603 
604  for (i = 0; i < s->channels; i++)
605  av_freep(&s->coded_samples[i]);
606 
607  av_freep(&s->predictor_k);
608  av_freep(&s->tail);
609  av_freep(&s->tap_quant);
610  av_freep(&s->window);
611  av_freep(&s->int_samples);
612 
613  return 0;
614 }
615 
616 static int sonic_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
617  const AVFrame *frame, int *got_packet_ptr)
618 {
619  SonicContext *s = avctx->priv_data;
620  PutBitContext pb;
621  int i, j, ch, quant = 0, x = 0;
622  int ret;
623  const short *samples = (const int16_t*)frame->data[0];
624 
625  if ((ret = ff_alloc_packet2(avctx, avpkt, s->frame_size * 5 + 1000)) < 0)
626  return ret;
627 
628  init_put_bits(&pb, avpkt->data, avpkt->size);
629 
630  // short -> internal
631  for (i = 0; i < s->frame_size; i++)
632  s->int_samples[i] = samples[i];
633 
634  if (!s->lossless)
635  for (i = 0; i < s->frame_size; i++)
636  s->int_samples[i] = s->int_samples[i] << SAMPLE_SHIFT;
637 
638  switch(s->decorrelation)
639  {
640  case MID_SIDE:
641  for (i = 0; i < s->frame_size; i += s->channels)
642  {
643  s->int_samples[i] += s->int_samples[i+1];
644  s->int_samples[i+1] -= shift(s->int_samples[i], 1);
645  }
646  break;
647  case LEFT_SIDE:
648  for (i = 0; i < s->frame_size; i += s->channels)
649  s->int_samples[i+1] -= s->int_samples[i];
650  break;
651  case RIGHT_SIDE:
652  for (i = 0; i < s->frame_size; i += s->channels)
653  s->int_samples[i] -= s->int_samples[i+1];
654  break;
655  }
656 
657  memset(s->window, 0, 4* s->window_size);
658 
659  for (i = 0; i < s->tail_size; i++)
660  s->window[x++] = s->tail[i];
661 
662  for (i = 0; i < s->frame_size; i++)
663  s->window[x++] = s->int_samples[i];
664 
665  for (i = 0; i < s->tail_size; i++)
666  s->window[x++] = 0;
667 
668  for (i = 0; i < s->tail_size; i++)
669  s->tail[i] = s->int_samples[s->frame_size - s->tail_size + i];
670 
671  // generate taps
672  modified_levinson_durbin(s->window, s->window_size,
673  s->predictor_k, s->num_taps, s->channels, s->tap_quant);
674  if ((ret = intlist_write(&pb, s->predictor_k, s->num_taps, 0)) < 0)
675  return ret;
676 
677  for (ch = 0; ch < s->channels; ch++)
678  {
679  x = s->tail_size+ch;
680  for (i = 0; i < s->block_align; i++)
681  {
682  int sum = 0;
683  for (j = 0; j < s->downsampling; j++, x += s->channels)
684  sum += s->window[x];
685  s->coded_samples[ch][i] = sum;
686  }
687  }
688 
689  // simple rate control code
690  if (!s->lossless)
691  {
692  double energy1 = 0.0, energy2 = 0.0;
693  for (ch = 0; ch < s->channels; ch++)
694  {
695  for (i = 0; i < s->block_align; i++)
696  {
697  double sample = s->coded_samples[ch][i];
698  energy2 += sample*sample;
699  energy1 += fabs(sample);
700  }
701  }
702 
703  energy2 = sqrt(energy2/(s->channels*s->block_align));
704  energy1 = sqrt(2.0)*energy1/(s->channels*s->block_align);
705 
706  // increase bitrate when samples are like a gaussian distribution
707  // reduce bitrate when samples are like a two-tailed exponential distribution
708 
709  if (energy2 > energy1)
710  energy2 += (energy2-energy1)*RATE_VARIATION;
711 
712  quant = (int)(BASE_QUANT*s->quantization*energy2/SAMPLE_FACTOR);
713 // av_log(avctx, AV_LOG_DEBUG, "quant: %d energy: %f / %f\n", quant, energy1, energy2);
714 
715  quant = av_clip(quant, 1, 65534);
716 
717  set_ue_golomb(&pb, quant);
718 
719  quant *= SAMPLE_FACTOR;
720  }
721 
722  // write out coded samples
723  for (ch = 0; ch < s->channels; ch++)
724  {
725  if (!s->lossless)
726  for (i = 0; i < s->block_align; i++)
727  s->coded_samples[ch][i] = ROUNDED_DIV(s->coded_samples[ch][i], quant);
728 
729  if ((ret = intlist_write(&pb, s->coded_samples[ch], s->block_align, 1)) < 0)
730  return ret;
731  }
732 
733 // av_log(avctx, AV_LOG_DEBUG, "used bytes: %d\n", (put_bits_count(&pb)+7)/8);
734 
735  flush_put_bits(&pb);
736  avpkt->size = (put_bits_count(&pb)+7)/8;
737  *got_packet_ptr = 1;
738  return 0;
739 }
740 #endif /* CONFIG_SONIC_ENCODER || CONFIG_SONIC_LS_ENCODER */
741 
742 #if CONFIG_SONIC_DECODER
743 static const int samplerate_table[] =
744  { 44100, 22050, 11025, 96000, 48000, 32000, 24000, 16000, 8000 };
745 
746 static av_cold int sonic_decode_init(AVCodecContext *avctx)
747 {
748  SonicContext *s = avctx->priv_data;
749  GetBitContext gb;
750  int i, version;
751 
752  s->channels = avctx->channels;
753  s->samplerate = avctx->sample_rate;
754 
755  if (!avctx->extradata)
756  {
757  av_log(avctx, AV_LOG_ERROR, "No mandatory headers present\n");
758  return AVERROR_INVALIDDATA;
759  }
760 
761  init_get_bits8(&gb, avctx->extradata, avctx->extradata_size);
762 
763  version = get_bits(&gb, 2);
764  if (version > 1)
765  {
766  av_log(avctx, AV_LOG_ERROR, "Unsupported Sonic version, please report\n");
767  return AVERROR_INVALIDDATA;
768  }
769 
770  if (version == 1)
771  {
772  s->channels = get_bits(&gb, 2);
773  s->samplerate = samplerate_table[get_bits(&gb, 4)];
774  av_log(avctx, AV_LOG_INFO, "Sonicv2 chans: %d samprate: %d\n",
775  s->channels, s->samplerate);
776  }
777 
778  if (s->channels > MAX_CHANNELS)
779  {
780  av_log(avctx, AV_LOG_ERROR, "Only mono and stereo streams are supported by now\n");
781  return AVERROR_INVALIDDATA;
782  }
783 
784  s->lossless = get_bits1(&gb);
785  if (!s->lossless)
786  skip_bits(&gb, 3); // XXX FIXME
787  s->decorrelation = get_bits(&gb, 2);
788  if (s->decorrelation != 3 && s->channels != 2) {
789  av_log(avctx, AV_LOG_ERROR, "invalid decorrelation %d\n", s->decorrelation);
790  return AVERROR_INVALIDDATA;
791  }
792 
793  s->downsampling = get_bits(&gb, 2);
794  if (!s->downsampling) {
795  av_log(avctx, AV_LOG_ERROR, "invalid downsampling value\n");
796  return AVERROR_INVALIDDATA;
797  }
798 
799  s->num_taps = (get_bits(&gb, 5)+1)<<5;
800  if (get_bits1(&gb)) // XXX FIXME
801  av_log(avctx, AV_LOG_INFO, "Custom quant table\n");
802 
803  s->block_align = 2048LL*s->samplerate/(44100*s->downsampling);
805 // avctx->frame_size = s->block_align;
806 
807  av_log(avctx, AV_LOG_INFO, "Sonic: ver: %d ls: %d dr: %d taps: %d block: %d frame: %d downsamp: %d\n",
808  version, s->lossless, s->decorrelation, s->num_taps, s->block_align, s->frame_size, s->downsampling);
809 
810  // generate taps
811  s->tap_quant = av_calloc(s->num_taps, sizeof(*s->tap_quant));
812  for (i = 0; i < s->num_taps; i++)
813  s->tap_quant[i] = ff_sqrt(i+1);
814 
815  s->predictor_k = av_calloc(s->num_taps, sizeof(*s->predictor_k));
816 
817  for (i = 0; i < s->channels; i++)
818  {
819  s->predictor_state[i] = av_calloc(s->num_taps, sizeof(**s->predictor_state));
820  if (!s->predictor_state[i])
821  return AVERROR(ENOMEM);
822  }
823 
824  for (i = 0; i < s->channels; i++)
825  {
826  s->coded_samples[i] = av_calloc(s->block_align, sizeof(**s->coded_samples));
827  if (!s->coded_samples[i])
828  return AVERROR(ENOMEM);
829  }
830  s->int_samples = av_calloc(s->frame_size, sizeof(*s->int_samples));
831 
832  avctx->sample_fmt = AV_SAMPLE_FMT_S16;
833  return 0;
834 }
835 
836 static av_cold int sonic_decode_close(AVCodecContext *avctx)
837 {
838  SonicContext *s = avctx->priv_data;
839  int i;
840 
841  av_freep(&s->int_samples);
842  av_freep(&s->tap_quant);
843  av_freep(&s->predictor_k);
844 
845  for (i = 0; i < s->channels; i++)
846  {
847  av_freep(&s->predictor_state[i]);
848  av_freep(&s->coded_samples[i]);
849  }
850 
851  return 0;
852 }
853 
854 static int sonic_decode_frame(AVCodecContext *avctx,
855  void *data, int *got_frame_ptr,
856  AVPacket *avpkt)
857 {
858  const uint8_t *buf = avpkt->data;
859  int buf_size = avpkt->size;
860  SonicContext *s = avctx->priv_data;
861  GetBitContext gb;
862  int i, quant, ch, j, ret;
863  int16_t *samples;
864  AVFrame *frame = data;
865 
866  if (buf_size == 0) return 0;
867 
868  frame->nb_samples = s->frame_size / avctx->channels;
869  if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
870  return ret;
871  samples = (int16_t *)frame->data[0];
872 
873 // av_log(NULL, AV_LOG_INFO, "buf_size: %d\n", buf_size);
874 
875  init_get_bits8(&gb, buf, buf_size);
876 
877  intlist_read(&gb, s->predictor_k, s->num_taps, 0);
878 
879  // dequantize
880  for (i = 0; i < s->num_taps; i++)
881  s->predictor_k[i] *= s->tap_quant[i];
882 
883  if (s->lossless)
884  quant = 1;
885  else
886  quant = get_ue_golomb(&gb) * SAMPLE_FACTOR;
887 
888 // av_log(NULL, AV_LOG_INFO, "quant: %d\n", quant);
889 
890  for (ch = 0; ch < s->channels; ch++)
891  {
892  int x = ch;
893 
895 
896  intlist_read(&gb, s->coded_samples[ch], s->block_align, 1);
897 
898  for (i = 0; i < s->block_align; i++)
899  {
900  for (j = 0; j < s->downsampling - 1; j++)
901  {
903  x += s->channels;
904  }
905 
906  s->int_samples[x] = predictor_calc_error(s->predictor_k, s->predictor_state[ch], s->num_taps, s->coded_samples[ch][i] * quant);
907  x += s->channels;
908  }
909 
910  for (i = 0; i < s->num_taps; i++)
911  s->predictor_state[ch][i] = s->int_samples[s->frame_size - s->channels + ch - i*s->channels];
912  }
913 
914  switch(s->decorrelation)
915  {
916  case MID_SIDE:
917  for (i = 0; i < s->frame_size; i += s->channels)
918  {
919  s->int_samples[i+1] += shift(s->int_samples[i], 1);
920  s->int_samples[i] -= s->int_samples[i+1];
921  }
922  break;
923  case LEFT_SIDE:
924  for (i = 0; i < s->frame_size; i += s->channels)
925  s->int_samples[i+1] += s->int_samples[i];
926  break;
927  case RIGHT_SIDE:
928  for (i = 0; i < s->frame_size; i += s->channels)
929  s->int_samples[i] += s->int_samples[i+1];
930  break;
931  }
932 
933  if (!s->lossless)
934  for (i = 0; i < s->frame_size; i++)
935  s->int_samples[i] = shift(s->int_samples[i], SAMPLE_SHIFT);
936 
937  // internal -> short
938  for (i = 0; i < s->frame_size; i++)
939  samples[i] = av_clip_int16(s->int_samples[i]);
940 
941  align_get_bits(&gb);
942 
943  *got_frame_ptr = 1;
944 
945  return (get_bits_count(&gb)+7)/8;
946 }
947 
948 AVCodec ff_sonic_decoder = {
949  .name = "sonic",
950  .long_name = NULL_IF_CONFIG_SMALL("Sonic"),
951  .type = AVMEDIA_TYPE_AUDIO,
952  .id = AV_CODEC_ID_SONIC,
953  .priv_data_size = sizeof(SonicContext),
954  .init = sonic_decode_init,
955  .close = sonic_decode_close,
956  .decode = sonic_decode_frame,
957  .capabilities = CODEC_CAP_DR1 | CODEC_CAP_EXPERIMENTAL,
958 };
959 #endif /* CONFIG_SONIC_DECODER */
960 
961 #if CONFIG_SONIC_ENCODER
962 AVCodec ff_sonic_encoder = {
963  .name = "sonic",
964  .long_name = NULL_IF_CONFIG_SMALL("Sonic"),
965  .type = AVMEDIA_TYPE_AUDIO,
966  .id = AV_CODEC_ID_SONIC,
967  .priv_data_size = sizeof(SonicContext),
968  .init = sonic_encode_init,
969  .encode2 = sonic_encode_frame,
971  .capabilities = CODEC_CAP_EXPERIMENTAL,
972  .close = sonic_encode_close,
973 };
974 #endif
975 
976 #if CONFIG_SONIC_LS_ENCODER
977 AVCodec ff_sonic_ls_encoder = {
978  .name = "sonicls",
979  .long_name = NULL_IF_CONFIG_SMALL("Sonic lossless"),
980  .type = AVMEDIA_TYPE_AUDIO,
981  .id = AV_CODEC_ID_SONIC_LS,
982  .priv_data_size = sizeof(SonicContext),
983  .init = sonic_encode_init,
984  .encode2 = sonic_encode_frame,
986  .capabilities = CODEC_CAP_EXPERIMENTAL,
987  .close = sonic_encode_close,
988 };
989 #endif