FFmpeg
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
nellymoserenc.c
Go to the documentation of this file.
1 /*
2  * Nellymoser encoder
3  * This code is developed as part of Google Summer of Code 2008 Program.
4  *
5  * Copyright (c) 2008 Bartlomiej Wolowiec
6  *
7  * This file is part of FFmpeg.
8  *
9  * FFmpeg is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * FFmpeg is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with FFmpeg; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22  */
23 
24 /**
25  * @file
26  * Nellymoser encoder
27  * by Bartlomiej Wolowiec
28  *
29  * Generic codec information: libavcodec/nellymoserdec.c
30  *
31  * Some information also from: http://samples.mplayerhq.hu/A-codecs/Nelly_Moser/ASAO/ASAO.zip
32  * (Copyright Joseph Artsimovich and UAB "DKD")
33  *
34  * for more information about nellymoser format, visit:
35  * http://wiki.multimedia.cx/index.php?title=Nellymoser
36  */
37 
38 #include "libavutil/float_dsp.h"
39 #include "libavutil/mathematics.h"
40 #include "nellymoser.h"
41 #include "avcodec.h"
42 #include "audio_frame_queue.h"
43 #include "fft.h"
44 #include "internal.h"
45 #include "sinewin.h"
46 
47 #define BITSTREAM_WRITER_LE
48 #include "put_bits.h"
49 
50 #define POW_TABLE_SIZE (1<<11)
51 #define POW_TABLE_OFFSET 3
52 #define OPT_SIZE ((1<<15) + 3000)
53 
54 typedef struct NellyMoserEncodeContext {
62  DECLARE_ALIGNED(32, float, buf)[3 * NELLY_BUF_LEN]; ///< sample buffer
63  float (*opt )[OPT_SIZE];
66 
67 static float pow_table[POW_TABLE_SIZE]; ///< -pow(2, -i / 2048.0 - 3.0);
68 
69 static const uint8_t sf_lut[96] = {
70  0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4,
71  5, 5, 5, 6, 7, 7, 8, 8, 9, 10, 11, 11, 12, 13, 13, 14,
72  15, 15, 16, 17, 17, 18, 19, 19, 20, 21, 22, 22, 23, 24, 25, 26,
73  27, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40,
74  41, 41, 42, 43, 44, 45, 45, 46, 47, 48, 49, 50, 51, 52, 52, 53,
75  54, 55, 55, 56, 57, 57, 58, 59, 59, 60, 60, 60, 61, 61, 61, 62,
76 };
77 
78 static const uint8_t sf_delta_lut[78] = {
79  0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4,
80  4, 5, 5, 5, 6, 6, 7, 7, 8, 8, 9, 10, 10, 11, 11, 12,
81  13, 13, 14, 15, 16, 17, 17, 18, 19, 19, 20, 21, 21, 22, 22, 23,
82  23, 24, 24, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, 27, 28,
83  28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 30,
84 };
85 
86 static const uint8_t quant_lut[230] = {
87  0,
88 
89  0, 1, 2,
90 
91  0, 1, 2, 3, 4, 5, 6,
92 
93  0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11,
94  12, 13, 13, 13, 14,
95 
96  0, 1, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 8,
97  8, 9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
98  22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 29,
99  30,
100 
101  0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3,
102  4, 4, 4, 5, 5, 5, 6, 6, 7, 7, 7, 8, 8, 9, 9, 9,
103  10, 10, 11, 11, 11, 12, 12, 13, 13, 13, 13, 14, 14, 14, 15, 15,
104  15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 20, 20, 20,
105  21, 21, 22, 22, 23, 23, 24, 25, 26, 26, 27, 28, 29, 30, 31, 32,
106  33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 42, 43, 44, 44, 45, 45,
107  46, 47, 47, 48, 48, 49, 49, 50, 50, 50, 51, 51, 51, 52, 52, 52,
108  53, 53, 53, 54, 54, 54, 55, 55, 55, 56, 56, 56, 57, 57, 57, 57,
109  58, 58, 58, 58, 59, 59, 59, 59, 60, 60, 60, 60, 60, 61, 61, 61,
110  61, 61, 61, 61, 62,
111 };
112 
113 static const float quant_lut_mul[7] = { 0.0, 0.0, 2.0, 2.0, 5.0, 12.0, 36.6 };
114 static const float quant_lut_add[7] = { 0.0, 0.0, 2.0, 7.0, 21.0, 56.0, 157.0 };
115 static const uint8_t quant_lut_offset[8] = { 0, 0, 1, 4, 11, 32, 81, 230 };
116 
118 {
119  float *in0 = s->buf;
120  float *in1 = s->buf + NELLY_BUF_LEN;
121  float *in2 = s->buf + 2 * NELLY_BUF_LEN;
122 
123  s->fdsp.vector_fmul (s->in_buff, in0, ff_sine_128, NELLY_BUF_LEN);
124  s->fdsp.vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, in1, ff_sine_128, NELLY_BUF_LEN);
125  s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out, s->in_buff);
126 
127  s->fdsp.vector_fmul (s->in_buff, in1, ff_sine_128, NELLY_BUF_LEN);
128  s->fdsp.vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, in2, ff_sine_128, NELLY_BUF_LEN);
130 }
131 
133 {
135 
136  ff_mdct_end(&s->mdct_ctx);
137 
138  if (s->avctx->trellis) {
139  av_free(s->opt);
140  av_free(s->path);
141  }
142  ff_af_queue_close(&s->afq);
143 #if FF_API_OLD_ENCODE_AUDIO
144  av_freep(&avctx->coded_frame);
145 #endif
146 
147  return 0;
148 }
149 
151 {
153  int i, ret;
154 
155  if (avctx->channels != 1) {
156  av_log(avctx, AV_LOG_ERROR, "Nellymoser supports only 1 channel\n");
157  return AVERROR(EINVAL);
158  }
159 
160  if (avctx->sample_rate != 8000 && avctx->sample_rate != 16000 &&
161  avctx->sample_rate != 11025 &&
162  avctx->sample_rate != 22050 && avctx->sample_rate != 44100 &&
164  av_log(avctx, AV_LOG_ERROR, "Nellymoser works only with 8000, 16000, 11025, 22050 and 44100 sample rate\n");
165  return AVERROR(EINVAL);
166  }
167 
168  avctx->frame_size = NELLY_SAMPLES;
169  avctx->delay = NELLY_BUF_LEN;
170  ff_af_queue_init(avctx, &s->afq);
171  s->avctx = avctx;
172  if ((ret = ff_mdct_init(&s->mdct_ctx, 8, 0, 32768.0)) < 0)
173  goto error;
175 
176  /* Generate overlap window */
178  for (i = 0; i < POW_TABLE_SIZE; i++)
179  pow_table[i] = -pow(2, -i / 2048.0 - 3.0 + POW_TABLE_OFFSET);
180 
181  if (s->avctx->trellis) {
182  s->opt = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(float ));
183  s->path = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(uint8_t));
184  if (!s->opt || !s->path) {
185  ret = AVERROR(ENOMEM);
186  goto error;
187  }
188  }
189 
190 #if FF_API_OLD_ENCODE_AUDIO
191  avctx->coded_frame = avcodec_alloc_frame();
192  if (!avctx->coded_frame) {
193  ret = AVERROR(ENOMEM);
194  goto error;
195  }
196 #endif
197 
198  return 0;
199 error:
200  encode_end(avctx);
201  return ret;
202 }
203 
204 #define find_best(val, table, LUT, LUT_add, LUT_size) \
205  best_idx = \
206  LUT[av_clip ((lrintf(val) >> 8) + LUT_add, 0, LUT_size - 1)]; \
207  if (fabs(val - table[best_idx]) > fabs(val - table[best_idx + 1])) \
208  best_idx++;
209 
210 static void get_exponent_greedy(NellyMoserEncodeContext *s, float *cand, int *idx_table)
211 {
212  int band, best_idx, power_idx = 0;
213  float power_candidate;
214 
215  //base exponent
216  find_best(cand[0], ff_nelly_init_table, sf_lut, -20, 96);
217  idx_table[0] = best_idx;
218  power_idx = ff_nelly_init_table[best_idx];
219 
220  for (band = 1; band < NELLY_BANDS; band++) {
221  power_candidate = cand[band] - power_idx;
222  find_best(power_candidate, ff_nelly_delta_table, sf_delta_lut, 37, 78);
223  idx_table[band] = best_idx;
224  power_idx += ff_nelly_delta_table[best_idx];
225  }
226 }
227 
228 static inline float distance(float x, float y, int band)
229 {
230  //return pow(fabs(x-y), 2.0);
231  float tmp = x - y;
232  return tmp * tmp;
233 }
234 
235 static void get_exponent_dynamic(NellyMoserEncodeContext *s, float *cand, int *idx_table)
236 {
237  int i, j, band, best_idx;
238  float power_candidate, best_val;
239 
240  float (*opt )[OPT_SIZE] = s->opt ;
241  uint8_t(*path)[OPT_SIZE] = s->path;
242 
243  for (i = 0; i < NELLY_BANDS * OPT_SIZE; i++) {
244  opt[0][i] = INFINITY;
245  }
246 
247  for (i = 0; i < 64; i++) {
248  opt[0][ff_nelly_init_table[i]] = distance(cand[0], ff_nelly_init_table[i], 0);
249  path[0][ff_nelly_init_table[i]] = i;
250  }
251 
252  for (band = 1; band < NELLY_BANDS; band++) {
253  int q, c = 0;
254  float tmp;
255  int idx_min, idx_max, idx;
256  power_candidate = cand[band];
257  for (q = 1000; !c && q < OPT_SIZE; q <<= 2) {
258  idx_min = FFMAX(0, cand[band] - q);
259  idx_max = FFMIN(OPT_SIZE, cand[band - 1] + q);
260  for (i = FFMAX(0, cand[band - 1] - q); i < FFMIN(OPT_SIZE, cand[band - 1] + q); i++) {
261  if ( isinf(opt[band - 1][i]) )
262  continue;
263  for (j = 0; j < 32; j++) {
264  idx = i + ff_nelly_delta_table[j];
265  if (idx > idx_max)
266  break;
267  if (idx >= idx_min) {
268  tmp = opt[band - 1][i] + distance(idx, power_candidate, band);
269  if (opt[band][idx] > tmp) {
270  opt[band][idx] = tmp;
271  path[band][idx] = j;
272  c = 1;
273  }
274  }
275  }
276  }
277  }
278  assert(c); //FIXME
279  }
280 
281  best_val = INFINITY;
282  best_idx = -1;
283  band = NELLY_BANDS - 1;
284  for (i = 0; i < OPT_SIZE; i++) {
285  if (best_val > opt[band][i]) {
286  best_val = opt[band][i];
287  best_idx = i;
288  }
289  }
290  for (band = NELLY_BANDS - 1; band >= 0; band--) {
291  idx_table[band] = path[band][best_idx];
292  if (band) {
293  best_idx -= ff_nelly_delta_table[path[band][best_idx]];
294  }
295  }
296 }
297 
298 /**
299  * Encode NELLY_SAMPLES samples. It assumes, that samples contains 3 * NELLY_BUF_LEN values
300  * @param s encoder context
301  * @param output output buffer
302  * @param output_size size of output buffer
303  */
304 static void encode_block(NellyMoserEncodeContext *s, unsigned char *output, int output_size)
305 {
306  PutBitContext pb;
307  int i, j, band, block, best_idx, power_idx = 0;
308  float power_val, coeff, coeff_sum;
309  float pows[NELLY_FILL_LEN];
310  int bits[NELLY_BUF_LEN], idx_table[NELLY_BANDS];
311  float cand[NELLY_BANDS];
312 
313  apply_mdct(s);
314 
315  init_put_bits(&pb, output, output_size * 8);
316 
317  i = 0;
318  for (band = 0; band < NELLY_BANDS; band++) {
319  coeff_sum = 0;
320  for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
321  coeff_sum += s->mdct_out[i ] * s->mdct_out[i ]
322  + s->mdct_out[i + NELLY_BUF_LEN] * s->mdct_out[i + NELLY_BUF_LEN];
323  }
324  cand[band] =
325  log(FFMAX(1.0, coeff_sum / (ff_nelly_band_sizes_table[band] << 7))) * 1024.0 / M_LN2;
326  }
327 
328  if (s->avctx->trellis) {
329  get_exponent_dynamic(s, cand, idx_table);
330  } else {
331  get_exponent_greedy(s, cand, idx_table);
332  }
333 
334  i = 0;
335  for (band = 0; band < NELLY_BANDS; band++) {
336  if (band) {
337  power_idx += ff_nelly_delta_table[idx_table[band]];
338  put_bits(&pb, 5, idx_table[band]);
339  } else {
340  power_idx = ff_nelly_init_table[idx_table[0]];
341  put_bits(&pb, 6, idx_table[0]);
342  }
343  power_val = pow_table[power_idx & 0x7FF] / (1 << ((power_idx >> 11) + POW_TABLE_OFFSET));
344  for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
345  s->mdct_out[i] *= power_val;
346  s->mdct_out[i + NELLY_BUF_LEN] *= power_val;
347  pows[i] = power_idx;
348  }
349  }
350 
351  ff_nelly_get_sample_bits(pows, bits);
352 
353  for (block = 0; block < 2; block++) {
354  for (i = 0; i < NELLY_FILL_LEN; i++) {
355  if (bits[i] > 0) {
356  const float *table = ff_nelly_dequantization_table + (1 << bits[i]) - 1;
357  coeff = s->mdct_out[block * NELLY_BUF_LEN + i];
358  best_idx =
359  quant_lut[av_clip (
360  coeff * quant_lut_mul[bits[i]] + quant_lut_add[bits[i]],
361  quant_lut_offset[bits[i]],
362  quant_lut_offset[bits[i]+1] - 1
363  )];
364  if (fabs(coeff - table[best_idx]) > fabs(coeff - table[best_idx + 1]))
365  best_idx++;
366 
367  put_bits(&pb, bits[i], best_idx);
368  }
369  }
370  if (!block)
372  }
373 
374  flush_put_bits(&pb);
375  memset(put_bits_ptr(&pb), 0, output + output_size - put_bits_ptr(&pb));
376 }
377 
378 static int encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
379  const AVFrame *frame, int *got_packet_ptr)
380 {
382  int ret;
383 
384  if (s->last_frame)
385  return 0;
386 
387  memcpy(s->buf, s->buf + NELLY_SAMPLES, NELLY_BUF_LEN * sizeof(*s->buf));
388  if (frame) {
389  memcpy(s->buf + NELLY_BUF_LEN, frame->data[0],
390  frame->nb_samples * sizeof(*s->buf));
391  if (frame->nb_samples < NELLY_SAMPLES) {
392  memset(s->buf + NELLY_BUF_LEN + frame->nb_samples, 0,
393  (NELLY_SAMPLES - frame->nb_samples) * sizeof(*s->buf));
394  if (frame->nb_samples >= NELLY_BUF_LEN)
395  s->last_frame = 1;
396  }
397  if ((ret = ff_af_queue_add(&s->afq, frame)) < 0)
398  return ret;
399  } else {
400  memset(s->buf + NELLY_BUF_LEN, 0, NELLY_SAMPLES * sizeof(*s->buf));
401  s->last_frame = 1;
402  }
403 
404  if ((ret = ff_alloc_packet2(avctx, avpkt, NELLY_BLOCK_LEN)) < 0)
405  return ret;
406  encode_block(s, avpkt->data, avpkt->size);
407 
408  /* Get the next frame pts/duration */
409  ff_af_queue_remove(&s->afq, avctx->frame_size, &avpkt->pts,
410  &avpkt->duration);
411 
412  *got_packet_ptr = 1;
413  return 0;
414 }
415 
417  .name = "nellymoser",
418  .type = AVMEDIA_TYPE_AUDIO,
420  .priv_data_size = sizeof(NellyMoserEncodeContext),
421  .init = encode_init,
422  .encode2 = encode_frame,
423  .close = encode_end,
425  .long_name = NULL_IF_CONFIG_SMALL("Nellymoser Asao"),
426  .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLT,
428 };