FFmpeg
sonic.c
Go to the documentation of this file.
1 /*
2  * Simple free lossless/lossy audio codec
3  * Copyright (c) 2004 Alex Beregszaszi
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 #include "avcodec.h"
22 #include "get_bits.h"
23 #include "golomb.h"
24 #include "internal.h"
25 #include "rangecoder.h"
26 
27 
28 /**
29  * @file
30  * Simple free lossless/lossy audio codec
31  * Based on Paul Francis Harrison's Bonk (http://www.logarithmic.net/pfh/bonk)
32  * Written and designed by Alex Beregszaszi
33  *
34  * TODO:
35  * - CABAC put/get_symbol
36  * - independent quantizer for channels
37  * - >2 channels support
38  * - more decorrelation types
39  * - more tap_quant tests
40  * - selectable intlist writers/readers (bonk-style, golomb, cabac)
41  */
42 
43 #define MAX_CHANNELS 2
44 
45 #define MID_SIDE 0
46 #define LEFT_SIDE 1
47 #define RIGHT_SIDE 2
48 
49 typedef struct SonicContext {
50  int version;
53 
55  double quantization;
56 
58 
59  int *tap_quant;
62 
63  // for encoding
64  int *tail;
65  int tail_size;
66  int *window;
68 
69  // for decoding
72 } SonicContext;
73 
74 #define LATTICE_SHIFT 10
75 #define SAMPLE_SHIFT 4
76 #define LATTICE_FACTOR (1 << LATTICE_SHIFT)
77 #define SAMPLE_FACTOR (1 << SAMPLE_SHIFT)
78 
79 #define BASE_QUANT 0.6
80 #define RATE_VARIATION 3.0
81 
82 static inline int shift(int a,int b)
83 {
84  return (a+(1<<(b-1))) >> b;
85 }
86 
87 static inline int shift_down(int a,int b)
88 {
89  return (a>>b)+(a<0);
90 }
91 
92 static av_always_inline av_flatten void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed, uint64_t rc_stat[256][2], uint64_t rc_stat2[32][2]){
93  int i;
94 
95 #define put_rac(C,S,B) \
96 do{\
97  if(rc_stat){\
98  rc_stat[*(S)][B]++;\
99  rc_stat2[(S)-state][B]++;\
100  }\
101  put_rac(C,S,B);\
102 }while(0)
103 
104  if(v){
105  const int a= FFABS(v);
106  const int e= av_log2(a);
107  put_rac(c, state+0, 0);
108  if(e<=9){
109  for(i=0; i<e; i++){
110  put_rac(c, state+1+i, 1); //1..10
111  }
112  put_rac(c, state+1+i, 0);
113 
114  for(i=e-1; i>=0; i--){
115  put_rac(c, state+22+i, (a>>i)&1); //22..31
116  }
117 
118  if(is_signed)
119  put_rac(c, state+11 + e, v < 0); //11..21
120  }else{
121  for(i=0; i<e; i++){
122  put_rac(c, state+1+FFMIN(i,9), 1); //1..10
123  }
124  put_rac(c, state+1+9, 0);
125 
126  for(i=e-1; i>=0; i--){
127  put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
128  }
129 
130  if(is_signed)
131  put_rac(c, state+11 + 10, v < 0); //11..21
132  }
133  }else{
134  put_rac(c, state+0, 1);
135  }
136 #undef put_rac
137 }
138 
139 static inline av_flatten int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
140  if(get_rac(c, state+0))
141  return 0;
142  else{
143  int i, e;
144  unsigned a;
145  e= 0;
146  while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
147  e++;
148  if (e > 31)
149  return AVERROR_INVALIDDATA;
150  }
151 
152  a= 1;
153  for(i=e-1; i>=0; i--){
154  a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
155  }
156 
157  e= -(is_signed && get_rac(c, state+11 + FFMIN(e, 10))); //11..21
158  return (a^e)-e;
159  }
160 }
161 
162 #if 1
163 static inline int intlist_write(RangeCoder *c, uint8_t *state, int *buf, int entries, int base_2_part)
164 {
165  int i;
166 
167  for (i = 0; i < entries; i++)
168  put_symbol(c, state, buf[i], 1, NULL, NULL);
169 
170  return 1;
171 }
172 
173 static inline int intlist_read(RangeCoder *c, uint8_t *state, int *buf, int entries, int base_2_part)
174 {
175  int i;
176 
177  for (i = 0; i < entries; i++)
178  buf[i] = get_symbol(c, state, 1);
179 
180  return 1;
181 }
182 #elif 1
183 static inline int intlist_write(PutBitContext *pb, int *buf, int entries, int base_2_part)
184 {
185  int i;
186 
187  for (i = 0; i < entries; i++)
188  set_se_golomb(pb, buf[i]);
189 
190  return 1;
191 }
192 
193 static inline int intlist_read(GetBitContext *gb, int *buf, int entries, int base_2_part)
194 {
195  int i;
196 
197  for (i = 0; i < entries; i++)
198  buf[i] = get_se_golomb(gb);
199 
200  return 1;
201 }
202 
203 #else
204 
205 #define ADAPT_LEVEL 8
206 
207 static int bits_to_store(uint64_t x)
208 {
209  int res = 0;
210 
211  while(x)
212  {
213  res++;
214  x >>= 1;
215  }
216  return res;
217 }
218 
219 static void write_uint_max(PutBitContext *pb, unsigned int value, unsigned int max)
220 {
221  int i, bits;
222 
223  if (!max)
224  return;
225 
226  bits = bits_to_store(max);
227 
228  for (i = 0; i < bits-1; i++)
229  put_bits(pb, 1, value & (1 << i));
230 
231  if ( (value | (1 << (bits-1))) <= max)
232  put_bits(pb, 1, value & (1 << (bits-1)));
233 }
234 
235 static unsigned int read_uint_max(GetBitContext *gb, int max)
236 {
237  int i, bits, value = 0;
238 
239  if (!max)
240  return 0;
241 
242  bits = bits_to_store(max);
243 
244  for (i = 0; i < bits-1; i++)
245  if (get_bits1(gb))
246  value += 1 << i;
247 
248  if ( (value | (1<<(bits-1))) <= max)
249  if (get_bits1(gb))
250  value += 1 << (bits-1);
251 
252  return value;
253 }
254 
255 static int intlist_write(PutBitContext *pb, int *buf, int entries, int base_2_part)
256 {
257  int i, j, x = 0, low_bits = 0, max = 0;
258  int step = 256, pos = 0, dominant = 0, any = 0;
259  int *copy, *bits;
260 
261  copy = av_calloc(entries, sizeof(*copy));
262  if (!copy)
263  return AVERROR(ENOMEM);
264 
265  if (base_2_part)
266  {
267  int energy = 0;
268 
269  for (i = 0; i < entries; i++)
270  energy += abs(buf[i]);
271 
272  low_bits = bits_to_store(energy / (entries * 2));
273  if (low_bits > 15)
274  low_bits = 15;
275 
276  put_bits(pb, 4, low_bits);
277  }
278 
279  for (i = 0; i < entries; i++)
280  {
281  put_bits(pb, low_bits, abs(buf[i]));
282  copy[i] = abs(buf[i]) >> low_bits;
283  if (copy[i] > max)
284  max = abs(copy[i]);
285  }
286 
287  bits = av_calloc(entries*max, sizeof(*bits));
288  if (!bits)
289  {
290  av_free(copy);
291  return AVERROR(ENOMEM);
292  }
293 
294  for (i = 0; i <= max; i++)
295  {
296  for (j = 0; j < entries; j++)
297  if (copy[j] >= i)
298  bits[x++] = copy[j] > i;
299  }
300 
301  // store bitstream
302  while (pos < x)
303  {
304  int steplet = step >> 8;
305 
306  if (pos + steplet > x)
307  steplet = x - pos;
308 
309  for (i = 0; i < steplet; i++)
310  if (bits[i+pos] != dominant)
311  any = 1;
312 
313  put_bits(pb, 1, any);
314 
315  if (!any)
316  {
317  pos += steplet;
318  step += step / ADAPT_LEVEL;
319  }
320  else
321  {
322  int interloper = 0;
323 
324  while (((pos + interloper) < x) && (bits[pos + interloper] == dominant))
325  interloper++;
326 
327  // note change
328  write_uint_max(pb, interloper, (step >> 8) - 1);
329 
330  pos += interloper + 1;
331  step -= step / ADAPT_LEVEL;
332  }
333 
334  if (step < 256)
335  {
336  step = 65536 / step;
337  dominant = !dominant;
338  }
339  }
340 
341  // store signs
342  for (i = 0; i < entries; i++)
343  if (buf[i])
344  put_bits(pb, 1, buf[i] < 0);
345 
346  av_free(bits);
347  av_free(copy);
348 
349  return 0;
350 }
351 
352 static int intlist_read(GetBitContext *gb, int *buf, int entries, int base_2_part)
353 {
354  int i, low_bits = 0, x = 0;
355  int n_zeros = 0, step = 256, dominant = 0;
356  int pos = 0, level = 0;
357  int *bits = av_calloc(entries, sizeof(*bits));
358 
359  if (!bits)
360  return AVERROR(ENOMEM);
361 
362  if (base_2_part)
363  {
364  low_bits = get_bits(gb, 4);
365 
366  if (low_bits)
367  for (i = 0; i < entries; i++)
368  buf[i] = get_bits(gb, low_bits);
369  }
370 
371 // av_log(NULL, AV_LOG_INFO, "entries: %d, low bits: %d\n", entries, low_bits);
372 
373  while (n_zeros < entries)
374  {
375  int steplet = step >> 8;
376 
377  if (!get_bits1(gb))
378  {
379  for (i = 0; i < steplet; i++)
380  bits[x++] = dominant;
381 
382  if (!dominant)
383  n_zeros += steplet;
384 
385  step += step / ADAPT_LEVEL;
386  }
387  else
388  {
389  int actual_run = read_uint_max(gb, steplet-1);
390 
391 // av_log(NULL, AV_LOG_INFO, "actual run: %d\n", actual_run);
392 
393  for (i = 0; i < actual_run; i++)
394  bits[x++] = dominant;
395 
396  bits[x++] = !dominant;
397 
398  if (!dominant)
399  n_zeros += actual_run;
400  else
401  n_zeros++;
402 
403  step -= step / ADAPT_LEVEL;
404  }
405 
406  if (step < 256)
407  {
408  step = 65536 / step;
409  dominant = !dominant;
410  }
411  }
412 
413  // reconstruct unsigned values
414  n_zeros = 0;
415  for (i = 0; n_zeros < entries; i++)
416  {
417  while(1)
418  {
419  if (pos >= entries)
420  {
421  pos = 0;
422  level += 1 << low_bits;
423  }
424 
425  if (buf[pos] >= level)
426  break;
427 
428  pos++;
429  }
430 
431  if (bits[i])
432  buf[pos] += 1 << low_bits;
433  else
434  n_zeros++;
435 
436  pos++;
437  }
438  av_free(bits);
439 
440  // read signs
441  for (i = 0; i < entries; i++)
442  if (buf[i] && get_bits1(gb))
443  buf[i] = -buf[i];
444 
445 // av_log(NULL, AV_LOG_INFO, "zeros: %d pos: %d\n", n_zeros, pos);
446 
447  return 0;
448 }
449 #endif
450 
451 static void predictor_init_state(int *k, int *state, int order)
452 {
453  int i;
454 
455  for (i = order-2; i >= 0; i--)
456  {
457  int j, p, x = state[i];
458 
459  for (j = 0, p = i+1; p < order; j++,p++)
460  {
461  int tmp = x + shift_down(k[j] * (unsigned)state[p], LATTICE_SHIFT);
462  state[p] += shift_down(k[j]* (unsigned)x, LATTICE_SHIFT);
463  x = tmp;
464  }
465  }
466 }
467 
468 static int predictor_calc_error(int *k, int *state, int order, int error)
469 {
470  int i, x = error - shift_down(k[order-1] * (unsigned)state[order-1], LATTICE_SHIFT);
471 
472 #if 1
473  int *k_ptr = &(k[order-2]),
474  *state_ptr = &(state[order-2]);
475  for (i = order-2; i >= 0; i--, k_ptr--, state_ptr--)
476  {
477  int k_value = *k_ptr, state_value = *state_ptr;
478  x -= shift_down(k_value * (unsigned)state_value, LATTICE_SHIFT);
479  state_ptr[1] = state_value + shift_down(k_value * (unsigned)x, LATTICE_SHIFT);
480  }
481 #else
482  for (i = order-2; i >= 0; i--)
483  {
484  x -= shift_down(k[i] * state[i], LATTICE_SHIFT);
485  state[i+1] = state[i] + shift_down(k[i] * x, LATTICE_SHIFT);
486  }
487 #endif
488 
489  // don't drift too far, to avoid overflows
490  if (x > (SAMPLE_FACTOR<<16)) x = (SAMPLE_FACTOR<<16);
491  if (x < -(SAMPLE_FACTOR<<16)) x = -(SAMPLE_FACTOR<<16);
492 
493  state[0] = x;
494 
495  return x;
496 }
497 
498 #if CONFIG_SONIC_ENCODER || CONFIG_SONIC_LS_ENCODER
499 // Heavily modified Levinson-Durbin algorithm which
500 // copes better with quantization, and calculates the
501 // actual whitened result as it goes.
502 
503 static void modified_levinson_durbin(int *window, int window_entries,
504  int *out, int out_entries, int channels, int *tap_quant)
505 {
506  int i;
507  int *state = window + window_entries;
508 
509  memcpy(state, window, window_entries * sizeof(*state));
510 
511  for (i = 0; i < out_entries; i++)
512  {
513  int step = (i+1)*channels, k, j;
514  double xx = 0.0, xy = 0.0;
515 #if 1
516  int *x_ptr = &(window[step]);
517  int *state_ptr = &(state[0]);
518  j = window_entries - step;
519  for (;j>0;j--,x_ptr++,state_ptr++)
520  {
521  double x_value = *x_ptr;
522  double state_value = *state_ptr;
523  xx += state_value*state_value;
524  xy += x_value*state_value;
525  }
526 #else
527  for (j = 0; j <= (window_entries - step); j++);
528  {
529  double stepval = window[step+j];
530  double stateval = window[j];
531 // xx += (double)window[j]*(double)window[j];
532 // xy += (double)window[step+j]*(double)window[j];
533  xx += stateval*stateval;
534  xy += stepval*stateval;
535  }
536 #endif
537  if (xx == 0.0)
538  k = 0;
539  else
540  k = (int)(floor(-xy/xx * (double)LATTICE_FACTOR / (double)(tap_quant[i]) + 0.5));
541 
542  if (k > (LATTICE_FACTOR/tap_quant[i]))
543  k = LATTICE_FACTOR/tap_quant[i];
544  if (-k > (LATTICE_FACTOR/tap_quant[i]))
545  k = -(LATTICE_FACTOR/tap_quant[i]);
546 
547  out[i] = k;
548  k *= tap_quant[i];
549 
550 #if 1
551  x_ptr = &(window[step]);
552  state_ptr = &(state[0]);
553  j = window_entries - step;
554  for (;j>0;j--,x_ptr++,state_ptr++)
555  {
556  int x_value = *x_ptr;
557  int state_value = *state_ptr;
558  *x_ptr = x_value + shift_down(k*state_value,LATTICE_SHIFT);
559  *state_ptr = state_value + shift_down(k*x_value, LATTICE_SHIFT);
560  }
561 #else
562  for (j=0; j <= (window_entries - step); j++)
563  {
564  int stepval = window[step+j];
565  int stateval=state[j];
566  window[step+j] += shift_down(k * stateval, LATTICE_SHIFT);
567  state[j] += shift_down(k * stepval, LATTICE_SHIFT);
568  }
569 #endif
570  }
571 }
572 
573 static inline int code_samplerate(int samplerate)
574 {
575  switch (samplerate)
576  {
577  case 44100: return 0;
578  case 22050: return 1;
579  case 11025: return 2;
580  case 96000: return 3;
581  case 48000: return 4;
582  case 32000: return 5;
583  case 24000: return 6;
584  case 16000: return 7;
585  case 8000: return 8;
586  }
587  return AVERROR(EINVAL);
588 }
589 
590 static av_cold int sonic_encode_init(AVCodecContext *avctx)
591 {
592  SonicContext *s = avctx->priv_data;
593  int *coded_samples;
594  PutBitContext pb;
595  int i;
596 
597  s->version = 2;
598 
599  if (avctx->channels > MAX_CHANNELS)
600  {
601  av_log(avctx, AV_LOG_ERROR, "Only mono and stereo streams are supported by now\n");
602  return AVERROR(EINVAL); /* only stereo or mono for now */
603  }
604 
605  if (avctx->channels == 2)
606  s->decorrelation = MID_SIDE;
607  else
608  s->decorrelation = 3;
609 
610  if (avctx->codec->id == AV_CODEC_ID_SONIC_LS)
611  {
612  s->lossless = 1;
613  s->num_taps = 32;
614  s->downsampling = 1;
615  s->quantization = 0.0;
616  }
617  else
618  {
619  s->num_taps = 128;
620  s->downsampling = 2;
621  s->quantization = 1.0;
622  }
623 
624  // max tap 2048
625  if (s->num_taps < 32 || s->num_taps > 1024 || s->num_taps % 32) {
626  av_log(avctx, AV_LOG_ERROR, "Invalid number of taps\n");
627  return AVERROR_INVALIDDATA;
628  }
629 
630  // generate taps
631  s->tap_quant = av_calloc(s->num_taps, sizeof(*s->tap_quant));
632  if (!s->tap_quant)
633  return AVERROR(ENOMEM);
634 
635  for (i = 0; i < s->num_taps; i++)
636  s->tap_quant[i] = ff_sqrt(i+1);
637 
638  s->channels = avctx->channels;
639  s->samplerate = avctx->sample_rate;
640 
641  s->block_align = 2048LL*s->samplerate/(44100*s->downsampling);
643 
644  s->tail_size = s->num_taps*s->channels;
645  s->tail = av_calloc(s->tail_size, sizeof(*s->tail));
646  if (!s->tail)
647  return AVERROR(ENOMEM);
648 
649  s->predictor_k = av_calloc(s->num_taps, sizeof(*s->predictor_k) );
650  if (!s->predictor_k)
651  return AVERROR(ENOMEM);
652 
653  coded_samples = av_calloc(s->block_align, s->channels * sizeof(**s->coded_samples));
654  if (!coded_samples)
655  return AVERROR(ENOMEM);
656  for (i = 0; i < s->channels; i++, coded_samples += s->block_align)
657  s->coded_samples[i] = coded_samples;
658 
659  s->int_samples = av_calloc(s->frame_size, sizeof(*s->int_samples));
660 
661  s->window_size = ((2*s->tail_size)+s->frame_size);
662  s->window = av_calloc(s->window_size, 2 * sizeof(*s->window));
663  if (!s->window || !s->int_samples)
664  return AVERROR(ENOMEM);
665 
666  avctx->extradata = av_mallocz(16);
667  if (!avctx->extradata)
668  return AVERROR(ENOMEM);
669  init_put_bits(&pb, avctx->extradata, 16*8);
670 
671  put_bits(&pb, 2, s->version); // version
672  if (s->version >= 1)
673  {
674  if (s->version >= 2) {
675  put_bits(&pb, 8, s->version);
676  put_bits(&pb, 8, s->minor_version);
677  }
678  put_bits(&pb, 2, s->channels);
679  put_bits(&pb, 4, code_samplerate(s->samplerate));
680  }
681  put_bits(&pb, 1, s->lossless);
682  if (!s->lossless)
683  put_bits(&pb, 3, SAMPLE_SHIFT); // XXX FIXME: sample precision
684  put_bits(&pb, 2, s->decorrelation);
685  put_bits(&pb, 2, s->downsampling);
686  put_bits(&pb, 5, (s->num_taps >> 5)-1); // 32..1024
687  put_bits(&pb, 1, 0); // XXX FIXME: no custom tap quant table
688 
689  flush_put_bits(&pb);
690  avctx->extradata_size = put_bits_count(&pb)/8;
691 
692  av_log(avctx, AV_LOG_INFO, "Sonic: ver: %d.%d ls: %d dr: %d taps: %d block: %d frame: %d downsamp: %d\n",
694 
695  avctx->frame_size = s->block_align*s->downsampling;
696 
697  return 0;
698 }
699 
700 static av_cold int sonic_encode_close(AVCodecContext *avctx)
701 {
702  SonicContext *s = avctx->priv_data;
703 
704  av_freep(&s->coded_samples[0]);
705  av_freep(&s->predictor_k);
706  av_freep(&s->tail);
707  av_freep(&s->tap_quant);
708  av_freep(&s->window);
709  av_freep(&s->int_samples);
710 
711  return 0;
712 }
713 
714 static int sonic_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
715  const AVFrame *frame, int *got_packet_ptr)
716 {
717  SonicContext *s = avctx->priv_data;
718  RangeCoder c;
719  int i, j, ch, quant = 0, x = 0;
720  int ret;
721  const short *samples = (const int16_t*)frame->data[0];
722  uint8_t state[32];
723 
724  if ((ret = ff_alloc_packet2(avctx, avpkt, s->frame_size * 5 + 1000, 0)) < 0)
725  return ret;
726 
727  ff_init_range_encoder(&c, avpkt->data, avpkt->size);
728  ff_build_rac_states(&c, 0.05*(1LL<<32), 256-8);
729  memset(state, 128, sizeof(state));
730 
731  // short -> internal
732  for (i = 0; i < s->frame_size; i++)
733  s->int_samples[i] = samples[i];
734 
735  if (!s->lossless)
736  for (i = 0; i < s->frame_size; i++)
737  s->int_samples[i] = s->int_samples[i] << SAMPLE_SHIFT;
738 
739  switch(s->decorrelation)
740  {
741  case MID_SIDE:
742  for (i = 0; i < s->frame_size; i += s->channels)
743  {
744  s->int_samples[i] += s->int_samples[i+1];
745  s->int_samples[i+1] -= shift(s->int_samples[i], 1);
746  }
747  break;
748  case LEFT_SIDE:
749  for (i = 0; i < s->frame_size; i += s->channels)
750  s->int_samples[i+1] -= s->int_samples[i];
751  break;
752  case RIGHT_SIDE:
753  for (i = 0; i < s->frame_size; i += s->channels)
754  s->int_samples[i] -= s->int_samples[i+1];
755  break;
756  }
757 
758  memset(s->window, 0, s->window_size * sizeof(*s->window));
759 
760  for (i = 0; i < s->tail_size; i++)
761  s->window[x++] = s->tail[i];
762 
763  for (i = 0; i < s->frame_size; i++)
764  s->window[x++] = s->int_samples[i];
765 
766  for (i = 0; i < s->tail_size; i++)
767  s->window[x++] = 0;
768 
769  for (i = 0; i < s->tail_size; i++)
770  s->tail[i] = s->int_samples[s->frame_size - s->tail_size + i];
771 
772  // generate taps
773  modified_levinson_durbin(s->window, s->window_size,
774  s->predictor_k, s->num_taps, s->channels, s->tap_quant);
775 
776  if ((ret = intlist_write(&c, state, s->predictor_k, s->num_taps, 0)) < 0)
777  return ret;
778 
779  for (ch = 0; ch < s->channels; ch++)
780  {
781  x = s->tail_size+ch;
782  for (i = 0; i < s->block_align; i++)
783  {
784  int sum = 0;
785  for (j = 0; j < s->downsampling; j++, x += s->channels)
786  sum += s->window[x];
787  s->coded_samples[ch][i] = sum;
788  }
789  }
790 
791  // simple rate control code
792  if (!s->lossless)
793  {
794  double energy1 = 0.0, energy2 = 0.0;
795  for (ch = 0; ch < s->channels; ch++)
796  {
797  for (i = 0; i < s->block_align; i++)
798  {
799  double sample = s->coded_samples[ch][i];
800  energy2 += sample*sample;
801  energy1 += fabs(sample);
802  }
803  }
804 
805  energy2 = sqrt(energy2/(s->channels*s->block_align));
806  energy1 = M_SQRT2*energy1/(s->channels*s->block_align);
807 
808  // increase bitrate when samples are like a gaussian distribution
809  // reduce bitrate when samples are like a two-tailed exponential distribution
810 
811  if (energy2 > energy1)
812  energy2 += (energy2-energy1)*RATE_VARIATION;
813 
814  quant = (int)(BASE_QUANT*s->quantization*energy2/SAMPLE_FACTOR);
815 // av_log(avctx, AV_LOG_DEBUG, "quant: %d energy: %f / %f\n", quant, energy1, energy2);
816 
817  quant = av_clip(quant, 1, 65534);
818 
819  put_symbol(&c, state, quant, 0, NULL, NULL);
820 
821  quant *= SAMPLE_FACTOR;
822  }
823 
824  // write out coded samples
825  for (ch = 0; ch < s->channels; ch++)
826  {
827  if (!s->lossless)
828  for (i = 0; i < s->block_align; i++)
829  s->coded_samples[ch][i] = ROUNDED_DIV(s->coded_samples[ch][i], quant);
830 
831  if ((ret = intlist_write(&c, state, s->coded_samples[ch], s->block_align, 1)) < 0)
832  return ret;
833  }
834 
835 // av_log(avctx, AV_LOG_DEBUG, "used bytes: %d\n", (put_bits_count(&pb)+7)/8);
836 
837  avpkt->size = ff_rac_terminate(&c, 0);
838  *got_packet_ptr = 1;
839  return 0;
840 
841 }
842 #endif /* CONFIG_SONIC_ENCODER || CONFIG_SONIC_LS_ENCODER */
843 
844 #if CONFIG_SONIC_DECODER
845 static const int samplerate_table[] =
846  { 44100, 22050, 11025, 96000, 48000, 32000, 24000, 16000, 8000 };
847 
848 static av_cold int sonic_decode_init(AVCodecContext *avctx)
849 {
850  SonicContext *s = avctx->priv_data;
851  int *tmp;
852  GetBitContext gb;
853  int i;
854  int ret;
855 
856  s->channels = avctx->channels;
857  s->samplerate = avctx->sample_rate;
858 
859  if (!avctx->extradata)
860  {
861  av_log(avctx, AV_LOG_ERROR, "No mandatory headers present\n");
862  return AVERROR_INVALIDDATA;
863  }
864 
865  ret = init_get_bits8(&gb, avctx->extradata, avctx->extradata_size);
866  if (ret < 0)
867  return ret;
868 
869  s->version = get_bits(&gb, 2);
870  if (s->version >= 2) {
871  s->version = get_bits(&gb, 8);
872  s->minor_version = get_bits(&gb, 8);
873  }
874  if (s->version != 2)
875  {
876  av_log(avctx, AV_LOG_ERROR, "Unsupported Sonic version, please report\n");
877  return AVERROR_INVALIDDATA;
878  }
879 
880  if (s->version >= 1)
881  {
882  int sample_rate_index;
883  s->channels = get_bits(&gb, 2);
884  sample_rate_index = get_bits(&gb, 4);
885  if (sample_rate_index >= FF_ARRAY_ELEMS(samplerate_table)) {
886  av_log(avctx, AV_LOG_ERROR, "Invalid sample_rate_index %d\n", sample_rate_index);
887  return AVERROR_INVALIDDATA;
888  }
889  s->samplerate = samplerate_table[sample_rate_index];
890  av_log(avctx, AV_LOG_INFO, "Sonicv2 chans: %d samprate: %d\n",
891  s->channels, s->samplerate);
892  }
893 
894  if (s->channels > MAX_CHANNELS || s->channels < 1)
895  {
896  av_log(avctx, AV_LOG_ERROR, "Only mono and stereo streams are supported by now\n");
897  return AVERROR_INVALIDDATA;
898  }
899  avctx->channels = s->channels;
900 
901  s->lossless = get_bits1(&gb);
902  if (!s->lossless)
903  skip_bits(&gb, 3); // XXX FIXME
904  s->decorrelation = get_bits(&gb, 2);
905  if (s->decorrelation != 3 && s->channels != 2) {
906  av_log(avctx, AV_LOG_ERROR, "invalid decorrelation %d\n", s->decorrelation);
907  return AVERROR_INVALIDDATA;
908  }
909 
910  s->downsampling = get_bits(&gb, 2);
911  if (!s->downsampling) {
912  av_log(avctx, AV_LOG_ERROR, "invalid downsampling value\n");
913  return AVERROR_INVALIDDATA;
914  }
915 
916  s->num_taps = (get_bits(&gb, 5)+1)<<5;
917  if (get_bits1(&gb)) // XXX FIXME
918  av_log(avctx, AV_LOG_INFO, "Custom quant table\n");
919 
920  s->block_align = 2048LL*s->samplerate/(44100*s->downsampling);
922 // avctx->frame_size = s->block_align;
923 
924  if (s->num_taps * s->channels > s->frame_size) {
925  av_log(avctx, AV_LOG_ERROR,
926  "number of taps times channels (%d * %d) larger than frame size %d\n",
927  s->num_taps, s->channels, s->frame_size);
928  return AVERROR_INVALIDDATA;
929  }
930 
931  av_log(avctx, AV_LOG_INFO, "Sonic: ver: %d.%d ls: %d dr: %d taps: %d block: %d frame: %d downsamp: %d\n",
933 
934  // generate taps
935  s->tap_quant = av_calloc(s->num_taps, sizeof(*s->tap_quant));
936  if (!s->tap_quant)
937  return AVERROR(ENOMEM);
938 
939  for (i = 0; i < s->num_taps; i++)
940  s->tap_quant[i] = ff_sqrt(i+1);
941 
942  s->predictor_k = av_calloc(s->num_taps, sizeof(*s->predictor_k));
943 
944  tmp = av_calloc(s->num_taps, s->channels * sizeof(**s->predictor_state));
945  if (!tmp)
946  return AVERROR(ENOMEM);
947  for (i = 0; i < s->channels; i++, tmp += s->num_taps)
948  s->predictor_state[i] = tmp;
949 
950  tmp = av_calloc(s->block_align, s->channels * sizeof(**s->coded_samples));
951  if (!tmp)
952  return AVERROR(ENOMEM);
953  for (i = 0; i < s->channels; i++, tmp += s->block_align)
954  s->coded_samples[i] = tmp;
955 
956  s->int_samples = av_calloc(s->frame_size, sizeof(*s->int_samples));
957  if (!s->int_samples)
958  return AVERROR(ENOMEM);
959 
960  avctx->sample_fmt = AV_SAMPLE_FMT_S16;
961  return 0;
962 }
963 
964 static av_cold int sonic_decode_close(AVCodecContext *avctx)
965 {
966  SonicContext *s = avctx->priv_data;
967 
968  av_freep(&s->int_samples);
969  av_freep(&s->tap_quant);
970  av_freep(&s->predictor_k);
971  av_freep(&s->predictor_state[0]);
972  av_freep(&s->coded_samples[0]);
973 
974  return 0;
975 }
976 
977 static int sonic_decode_frame(AVCodecContext *avctx,
978  void *data, int *got_frame_ptr,
979  AVPacket *avpkt)
980 {
981  const uint8_t *buf = avpkt->data;
982  int buf_size = avpkt->size;
983  SonicContext *s = avctx->priv_data;
984  RangeCoder c;
985  uint8_t state[32];
986  int i, quant, ch, j, ret;
987  int16_t *samples;
988  AVFrame *frame = data;
989 
990  if (buf_size == 0) return 0;
991 
992  frame->nb_samples = s->frame_size / avctx->channels;
993  if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
994  return ret;
995  samples = (int16_t *)frame->data[0];
996 
997 // av_log(NULL, AV_LOG_INFO, "buf_size: %d\n", buf_size);
998 
999  memset(state, 128, sizeof(state));
1000  ff_init_range_decoder(&c, buf, buf_size);
1001  ff_build_rac_states(&c, 0.05*(1LL<<32), 256-8);
1002 
1003  intlist_read(&c, state, s->predictor_k, s->num_taps, 0);
1004 
1005  // dequantize
1006  for (i = 0; i < s->num_taps; i++)
1007  s->predictor_k[i] *= s->tap_quant[i];
1008 
1009  if (s->lossless)
1010  quant = 1;
1011  else
1012  quant = get_symbol(&c, state, 0) * SAMPLE_FACTOR;
1013 
1014 // av_log(NULL, AV_LOG_INFO, "quant: %d\n", quant);
1015 
1016  for (ch = 0; ch < s->channels; ch++)
1017  {
1018  int x = ch;
1019 
1020  if (c.overread > MAX_OVERREAD)
1021  return AVERROR_INVALIDDATA;
1022 
1024 
1025  intlist_read(&c, state, s->coded_samples[ch], s->block_align, 1);
1026 
1027  for (i = 0; i < s->block_align; i++)
1028  {
1029  for (j = 0; j < s->downsampling - 1; j++)
1030  {
1032  x += s->channels;
1033  }
1034 
1035  s->int_samples[x] = predictor_calc_error(s->predictor_k, s->predictor_state[ch], s->num_taps, s->coded_samples[ch][i] * (unsigned)quant);
1036  x += s->channels;
1037  }
1038 
1039  for (i = 0; i < s->num_taps; i++)
1040  s->predictor_state[ch][i] = s->int_samples[s->frame_size - s->channels + ch - i*s->channels];
1041  }
1042 
1043  switch(s->decorrelation)
1044  {
1045  case MID_SIDE:
1046  for (i = 0; i < s->frame_size; i += s->channels)
1047  {
1048  s->int_samples[i+1] += shift(s->int_samples[i], 1);
1049  s->int_samples[i] -= s->int_samples[i+1];
1050  }
1051  break;
1052  case LEFT_SIDE:
1053  for (i = 0; i < s->frame_size; i += s->channels)
1054  s->int_samples[i+1] += s->int_samples[i];
1055  break;
1056  case RIGHT_SIDE:
1057  for (i = 0; i < s->frame_size; i += s->channels)
1058  s->int_samples[i] += s->int_samples[i+1];
1059  break;
1060  }
1061 
1062  if (!s->lossless)
1063  for (i = 0; i < s->frame_size; i++)
1064  s->int_samples[i] = shift(s->int_samples[i], SAMPLE_SHIFT);
1065 
1066  // internal -> short
1067  for (i = 0; i < s->frame_size; i++)
1068  samples[i] = av_clip_int16(s->int_samples[i]);
1069 
1070  *got_frame_ptr = 1;
1071 
1072  return buf_size;
1073 }
1074 
1076  .name = "sonic",
1077  .long_name = NULL_IF_CONFIG_SMALL("Sonic"),
1078  .type = AVMEDIA_TYPE_AUDIO,
1079  .id = AV_CODEC_ID_SONIC,
1080  .priv_data_size = sizeof(SonicContext),
1081  .init = sonic_decode_init,
1082  .close = sonic_decode_close,
1083  .decode = sonic_decode_frame,
1084  .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_EXPERIMENTAL,
1085  .caps_internal = FF_CODEC_CAP_INIT_CLEANUP,
1086 };
1087 #endif /* CONFIG_SONIC_DECODER */
1088 
1089 #if CONFIG_SONIC_ENCODER
1091  .name = "sonic",
1092  .long_name = NULL_IF_CONFIG_SMALL("Sonic"),
1093  .type = AVMEDIA_TYPE_AUDIO,
1094  .id = AV_CODEC_ID_SONIC,
1095  .priv_data_size = sizeof(SonicContext),
1096  .init = sonic_encode_init,
1097  .encode2 = sonic_encode_frame,
1099  .capabilities = AV_CODEC_CAP_EXPERIMENTAL,
1100  .caps_internal = FF_CODEC_CAP_INIT_CLEANUP,
1101  .close = sonic_encode_close,
1102 };
1103 #endif
1104 
1105 #if CONFIG_SONIC_LS_ENCODER
1107  .name = "sonicls",
1108  .long_name = NULL_IF_CONFIG_SMALL("Sonic lossless"),
1109  .type = AVMEDIA_TYPE_AUDIO,
1110  .id = AV_CODEC_ID_SONIC_LS,
1111  .priv_data_size = sizeof(SonicContext),
1112  .init = sonic_encode_init,
1113  .encode2 = sonic_encode_frame,
1115  .capabilities = AV_CODEC_CAP_EXPERIMENTAL,
1116  .caps_internal = FF_CODEC_CAP_INIT_CLEANUP,
1117  .close = sonic_encode_close,
1118 };
1119 #endif
#define FF_CODEC_CAP_INIT_CLEANUP
The codec allows calling the close function for deallocation even if the init function returned a fai...
Definition: internal.h:48
#define NULL
Definition: coverity.c:32
const struct AVCodec * codec
Definition: avcodec.h:535
int * int_samples
Definition: sonic.c:60
int * tail
Definition: sonic.c:64
int samplerate
Definition: sonic.c:57
#define LATTICE_FACTOR
Definition: sonic.c:76
#define AVERROR_INVALIDDATA
Invalid data found when processing input.
Definition: error.h:59
static int shift(int a, int b)
Definition: sonic.c:82
static void copy(const float *p1, float *p2, const int length)
This structure describes decoded (raw) audio or video data.
Definition: frame.h:308
int lossless
Definition: sonic.c:52
ptrdiff_t const GLvoid * data
Definition: opengl_enc.c:100
static int get_se_golomb(GetBitContext *gb)
read signed exp golomb code.
Definition: golomb.h:241
static void put_bits(Jpeg2000EncoderContext *s, int val, int n)
put n times val bit
Definition: j2kenc.c:218
static unsigned int get_bits(GetBitContext *s, int n)
Read 1-25 bits.
Definition: get_bits.h:379
int * predictor_state[MAX_CHANNELS]
Definition: sonic.c:71
static av_cold int init(AVCodecContext *avctx)
Definition: avrndec.c:35
Range coder.
int size
Definition: packet.h:364
#define LATTICE_SHIFT
Definition: sonic.c:74
int av_log2(unsigned v)
Definition: intmath.c:26
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:36
#define AV_CODEC_CAP_EXPERIMENTAL
Codec is experimental and is thus avoided in favor of non experimental encoders.
Definition: codec.h:98
int version
Definition: sonic.c:50
void * av_mallocz(size_t size)
Allocate a memory block with alignment suitable for all memory accesses (including vectors if availab...
Definition: mem.c:237
int * tap_quant
Definition: sonic.c:59
static void error(const char *err)
#define sample
AVCodec.
Definition: codec.h:190
static void decode(AVCodecContext *dec_ctx, AVPacket *pkt, AVFrame *frame, FILE *outfile)
Definition: decode_audio.c:71
void * av_calloc(size_t nmemb, size_t size)
Non-inlined equivalent of av_mallocz_array().
Definition: mem.c:245
static __device__ float floor(float a)
Definition: cuda_runtime.h:173
#define MID_SIDE
Definition: sonic.c:45
int ff_alloc_packet2(AVCodecContext *avctx, AVPacket *avpkt, int64_t size, int64_t min_size)
Check AVPacket size and/or allocate data.
Definition: encode.c:33
AVCodec ff_sonic_ls_encoder
enum AVSampleFormat sample_fmt
audio sample format
Definition: avcodec.h:1194
uint8_t
#define av_cold
Definition: attributes.h:88
static int get_rac(RangeCoder *c, uint8_t *const state)
Definition: rangecoder.h:136
#define MAX_CHANNELS
Definition: sonic.c:43
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
GLsizei GLboolean const GLfloat * value
Definition: opengl_enc.c:108
uint8_t * extradata
some codecs need / can use extradata like Huffman tables.
Definition: avcodec.h:627
uint8_t * data
Definition: packet.h:363
bitstream reader API header.
#define max(a, b)
Definition: cuda_runtime.h:33
#define RIGHT_SIDE
Definition: sonic.c:47
channels
Definition: aptx.h:33
#define av_log(a,...)
#define ff_sqrt
Definition: mathops.h:206
#define ROUNDED_DIV(a, b)
Definition: common.h:56
enum AVCodecID id
Definition: codec.h:204
int channels
Definition: sonic.c:57
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:194
static __device__ float fabs(float a)
Definition: cuda_runtime.h:182
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification. ...
Definition: internal.h:153
unsigned int pos
Definition: spdifenc.c:410
const char * name
Name of the codec implementation.
Definition: codec.h:197
uint8_t bits
Definition: vp3data.h:202
static int put_bits_count(PutBitContext *s)
Definition: put_bits.h:83
AVCodec ff_sonic_decoder
#define av_flatten
Definition: attributes.h:94
#define b
Definition: input.c:41
static struct @322 state
#define FFMIN(a, b)
Definition: common.h:96
static av_flatten int get_symbol(RangeCoder *c, uint8_t *state, int is_signed)
Definition: sonic.c:139
int block_align
Definition: sonic.c:57
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several the filter must be ready for frames arriving randomly on any input any filter with several inputs will most likely require some kind of queuing mechanism It is perfectly acceptable to have a limited queue and to drop frames when the inputs are too unbalanced request_frame For filters that do not use the this method is called when a frame is wanted on an output For a it should directly call filter_frame on the corresponding output For a if there are queued frames already one of these frames should be pushed If the filter should request a frame on one of its repeatedly until at least one frame has been pushed Return or at least make progress towards producing a frame
void ff_build_rac_states(RangeCoder *c, int factor, int max_p)
Definition: rangecoder.c:68
#define FFABS(a)
Absolute value, Note, INT_MIN / INT64_MIN result in undefined behavior as they are not representable ...
Definition: common.h:72
#define s(width, name)
Definition: cbs_vp9.c:257
#define RATE_VARIATION
Definition: sonic.c:80
#define FF_ARRAY_ELEMS(a)
if(ret)
int frame_size
Number of samples per channel in an audio frame.
Definition: avcodec.h:1206
AVCodec ff_sonic_encoder
#define AV_LOG_INFO
Standard information.
Definition: log.h:205
Libavcodec external API header.
static void set_se_golomb(PutBitContext *pb, int i)
write signed exp golomb code.
Definition: golomb.h:667
AVSampleFormat
Audio sample formats.
Definition: samplefmt.h:58
int sample_rate
samples per second
Definition: avcodec.h:1186
static int init_get_bits8(GetBitContext *s, const uint8_t *buffer, int byte_size)
Initialize GetBitContext.
Definition: get_bits.h:677
#define abs(x)
Definition: cuda_runtime.h:35
int * predictor_k
Definition: sonic.c:70
main external API structure.
Definition: avcodec.h:526
int ff_get_buffer(AVCodecContext *avctx, AVFrame *frame, int flags)
Get a buffer for a frame.
Definition: decode.c:1879
int tail_size
Definition: sonic.c:65
int extradata_size
Definition: avcodec.h:628
static unsigned int get_bits1(GetBitContext *s)
Definition: get_bits.h:498
static void skip_bits(GetBitContext *s, int n)
Definition: get_bits.h:467
av_cold void ff_init_range_encoder(RangeCoder *c, uint8_t *buf, int buf_size)
Definition: rangecoder.c:42
av_cold void ff_init_range_decoder(RangeCoder *c, const uint8_t *buf, int buf_size)
Definition: rangecoder.c:53
#define LEFT_SIDE
Definition: sonic.c:46
#define MAX_OVERREAD
Definition: lagarithrac.h:51
static int intlist_write(RangeCoder *c, uint8_t *state, int *buf, int entries, int base_2_part)
Definition: sonic.c:163
static void predictor_init_state(int *k, int *state, int order)
Definition: sonic.c:451
const uint8_t * quant
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:322
uint8_t level
Definition: svq3.c:205
#define BASE_QUANT
Definition: sonic.c:79
#define SAMPLE_SHIFT
Definition: sonic.c:75
#define put_rac(C, S, B)
int ff_rac_terminate(RangeCoder *c, int version)
Terminates the range coder.
Definition: rangecoder.c:109
#define M_SQRT2
Definition: mathematics.h:61
int
int downsampling
Definition: sonic.c:54
int decorrelation
Definition: sonic.c:52
common internal api header.
static void flush_put_bits(PutBitContext *s)
Pad the end of the output stream with zeros.
Definition: put_bits.h:117
signed 16 bits
Definition: samplefmt.h:61
int overread
Definition: rangecoder.h:45
int window_size
Definition: sonic.c:67
static void init_put_bits(PutBitContext *s, uint8_t *buffer, int buffer_size)
Initialize the PutBitContext s.
Definition: put_bits.h:64
void * priv_data
Definition: avcodec.h:553
#define av_free(p)
int channels
number of audio channels
Definition: avcodec.h:1187
double quantization
Definition: sonic.c:55
int * coded_samples[MAX_CHANNELS]
Definition: sonic.c:61
static int predictor_calc_error(int *k, int *state, int order, int error)
Definition: sonic.c:468
int frame_size
Definition: sonic.c:57
int num_taps
Definition: sonic.c:54
static enum AVSampleFormat sample_fmts[]
Definition: adpcmenc.c:940
int minor_version
Definition: sonic.c:51
FILE * out
Definition: movenc.c:54
#define SAMPLE_FACTOR
Definition: sonic.c:77
Filter the word “frame” indicates either a video frame or a group of audio samples
#define av_freep(p)
#define av_always_inline
Definition: attributes.h:45
static int shift_down(int a, int b)
Definition: sonic.c:87
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later.That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another.Frame references ownership and permissions
exp golomb vlc stuff
This structure stores compressed data.
Definition: packet.h:340
int nb_samples
number of audio samples (per channel) described by this frame
Definition: frame.h:374
#define AV_CODEC_CAP_DR1
Codec uses get_buffer() for allocating buffers and supports custom allocators.
Definition: codec.h:50
static int intlist_read(RangeCoder *c, uint8_t *state, int *buf, int entries, int base_2_part)
Definition: sonic.c:173
for(j=16;j >0;--j)
int i
Definition: input.c:407
static av_always_inline av_flatten void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed, uint64_t rc_stat[256][2], uint64_t rc_stat2[32][2])
Definition: sonic.c:92
trying all byte sequences megabyte in length and selecting the best looking sequence will yield cases to try But a word about which is also called distortion Distortion can be quantified by almost any quality measurement one chooses the sum of squared differences is used but more complex methods that consider psychovisual effects can be used as well It makes no difference in this discussion First step
int * window
Definition: sonic.c:66
static uint8_t tmp[11]
Definition: aes_ctr.c:26