FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
aacenc.c
Go to the documentation of this file.
1 /*
2  * AAC encoder
3  * Copyright (C) 2008 Konstantin Shishkov
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 /**
23  * @file
24  * AAC encoder
25  */
26 
27 /***********************************
28  * TODOs:
29  * add sane pulse detection
30  * add temporal noise shaping
31  ***********************************/
32 
33 #include "libavutil/float_dsp.h"
34 #include "libavutil/opt.h"
35 #include "avcodec.h"
36 #include "put_bits.h"
37 #include "internal.h"
38 #include "mpeg4audio.h"
39 #include "kbdwin.h"
40 #include "sinewin.h"
41 
42 #include "aac.h"
43 #include "aactab.h"
44 #include "aacenc.h"
45 
46 #include "psymodel.h"
47 
48 #define AAC_MAX_CHANNELS 6
49 
50 #define ERROR_IF(cond, ...) \
51  if (cond) { \
52  av_log(avctx, AV_LOG_ERROR, __VA_ARGS__); \
53  return AVERROR(EINVAL); \
54  }
55 
56 #define WARN_IF(cond, ...) \
57  if (cond) { \
58  av_log(avctx, AV_LOG_WARNING, __VA_ARGS__); \
59  }
60 
61 float ff_aac_pow34sf_tab[428];
62 
63 static const uint8_t swb_size_1024_96[] = {
64  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8,
65  12, 12, 12, 12, 12, 16, 16, 24, 28, 36, 44,
66  64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64
67 };
68 
69 static const uint8_t swb_size_1024_64[] = {
70  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8,
71  12, 12, 12, 16, 16, 16, 20, 24, 24, 28, 36,
72  40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40
73 };
74 
75 static const uint8_t swb_size_1024_48[] = {
76  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8,
77  12, 12, 12, 12, 16, 16, 20, 20, 24, 24, 28, 28,
78  32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
79  96
80 };
81 
82 static const uint8_t swb_size_1024_32[] = {
83  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8,
84  12, 12, 12, 12, 16, 16, 20, 20, 24, 24, 28, 28,
85  32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32
86 };
87 
88 static const uint8_t swb_size_1024_24[] = {
89  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
90  12, 12, 12, 12, 16, 16, 16, 20, 20, 24, 24, 28, 28,
91  32, 36, 36, 40, 44, 48, 52, 52, 64, 64, 64, 64, 64
92 };
93 
94 static const uint8_t swb_size_1024_16[] = {
95  8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
96  12, 12, 12, 12, 12, 12, 12, 12, 12, 16, 16, 16, 16, 20, 20, 20, 24, 24, 28, 28,
97  32, 36, 40, 40, 44, 48, 52, 56, 60, 64, 64, 64
98 };
99 
100 static const uint8_t swb_size_1024_8[] = {
101  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
102  16, 16, 16, 16, 16, 16, 16, 20, 20, 20, 20, 24, 24, 24, 28, 28,
103  32, 36, 36, 40, 44, 48, 52, 56, 60, 64, 80
104 };
105 
106 static const uint8_t *swb_size_1024[] = {
111  swb_size_1024_8
112 };
113 
114 static const uint8_t swb_size_128_96[] = {
115  4, 4, 4, 4, 4, 4, 8, 8, 8, 16, 28, 36
116 };
117 
118 static const uint8_t swb_size_128_48[] = {
119  4, 4, 4, 4, 4, 8, 8, 8, 12, 12, 12, 16, 16, 16
120 };
121 
122 static const uint8_t swb_size_128_24[] = {
123  4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 20
124 };
125 
126 static const uint8_t swb_size_128_16[] = {
127  4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 12, 12, 16, 20, 20
128 };
129 
130 static const uint8_t swb_size_128_8[] = {
131  4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 12, 16, 20, 20
132 };
133 
134 static const uint8_t *swb_size_128[] = {
135  /* the last entry on the following row is swb_size_128_64 but is a
136  duplicate of swb_size_128_96 */
141  swb_size_128_8
142 };
143 
144 /** default channel configurations */
145 static const uint8_t aac_chan_configs[6][5] = {
146  {1, TYPE_SCE}, // 1 channel - single channel element
147  {1, TYPE_CPE}, // 2 channels - channel pair
148  {2, TYPE_SCE, TYPE_CPE}, // 3 channels - center + stereo
149  {3, TYPE_SCE, TYPE_CPE, TYPE_SCE}, // 4 channels - front center + stereo + back center
150  {3, TYPE_SCE, TYPE_CPE, TYPE_CPE}, // 5 channels - front center + stereo + back stereo
151  {4, TYPE_SCE, TYPE_CPE, TYPE_CPE, TYPE_LFE}, // 6 channels - front center + stereo + back stereo + LFE
152 };
153 
154 /**
155  * Table to remap channels from libavcodec's default order to AAC order.
156  */
158  { 0 },
159  { 0, 1 },
160  { 2, 0, 1 },
161  { 2, 0, 1, 3 },
162  { 2, 0, 1, 3, 4 },
163  { 2, 0, 1, 4, 5, 3 },
164 };
165 
166 /**
167  * Make AAC audio config object.
168  * @see 1.6.2.1 "Syntax - AudioSpecificConfig"
169  */
171 {
172  PutBitContext pb;
173  AACEncContext *s = avctx->priv_data;
174 
175  init_put_bits(&pb, avctx->extradata, avctx->extradata_size);
176  put_bits(&pb, 5, 2); //object type - AAC-LC
177  put_bits(&pb, 4, s->samplerate_index); //sample rate index
178  put_bits(&pb, 4, s->channels);
179  //GASpecificConfig
180  put_bits(&pb, 1, 0); //frame length - 1024 samples
181  put_bits(&pb, 1, 0); //does not depend on core coder
182  put_bits(&pb, 1, 0); //is not extension
183 
184  //Explicitly Mark SBR absent
185  put_bits(&pb, 11, 0x2b7); //sync extension
186  put_bits(&pb, 5, AOT_SBR);
187  put_bits(&pb, 1, 0);
188  flush_put_bits(&pb);
189 }
190 
191 #define WINDOW_FUNC(type) \
192 static void apply_ ##type ##_window(AVFloatDSPContext *fdsp, \
193  SingleChannelElement *sce, \
194  const float *audio)
195 
196 WINDOW_FUNC(only_long)
197 {
198  const float *lwindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
199  const float *pwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
200  float *out = sce->ret_buf;
201 
202  fdsp->vector_fmul (out, audio, lwindow, 1024);
203  fdsp->vector_fmul_reverse(out + 1024, audio + 1024, pwindow, 1024);
204 }
205 
206 WINDOW_FUNC(long_start)
207 {
208  const float *lwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
209  const float *swindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
210  float *out = sce->ret_buf;
211 
212  fdsp->vector_fmul(out, audio, lwindow, 1024);
213  memcpy(out + 1024, audio + 1024, sizeof(out[0]) * 448);
214  fdsp->vector_fmul_reverse(out + 1024 + 448, audio + 1024 + 448, swindow, 128);
215  memset(out + 1024 + 576, 0, sizeof(out[0]) * 448);
216 }
217 
218 WINDOW_FUNC(long_stop)
219 {
220  const float *lwindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
221  const float *swindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
222  float *out = sce->ret_buf;
223 
224  memset(out, 0, sizeof(out[0]) * 448);
225  fdsp->vector_fmul(out + 448, audio + 448, swindow, 128);
226  memcpy(out + 576, audio + 576, sizeof(out[0]) * 448);
227  fdsp->vector_fmul_reverse(out + 1024, audio + 1024, lwindow, 1024);
228 }
229 
230 WINDOW_FUNC(eight_short)
231 {
232  const float *swindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
233  const float *pwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
234  const float *in = audio + 448;
235  float *out = sce->ret_buf;
236  int w;
237 
238  for (w = 0; w < 8; w++) {
239  fdsp->vector_fmul (out, in, w ? pwindow : swindow, 128);
240  out += 128;
241  in += 128;
242  fdsp->vector_fmul_reverse(out, in, swindow, 128);
243  out += 128;
244  }
245 }
246 
247 static void (*const apply_window[4])(AVFloatDSPContext *fdsp,
249  const float *audio) = {
250  [ONLY_LONG_SEQUENCE] = apply_only_long_window,
251  [LONG_START_SEQUENCE] = apply_long_start_window,
252  [EIGHT_SHORT_SEQUENCE] = apply_eight_short_window,
253  [LONG_STOP_SEQUENCE] = apply_long_stop_window
254 };
255 
257  float *audio)
258 {
259  int i;
260  float *output = sce->ret_buf;
261 
262  apply_window[sce->ics.window_sequence[0]](s->fdsp, sce, audio);
263 
265  s->mdct1024.mdct_calc(&s->mdct1024, sce->coeffs, output);
266  else
267  for (i = 0; i < 1024; i += 128)
268  s->mdct128.mdct_calc(&s->mdct128, sce->coeffs + i, output + i*2);
269  memcpy(audio, audio + 1024, sizeof(audio[0]) * 1024);
270  memcpy(sce->pcoeffs, sce->coeffs, sizeof(sce->pcoeffs));
271 }
272 
273 /**
274  * Encode ics_info element.
275  * @see Table 4.6 (syntax of ics_info)
276  */
278 {
279  int w;
280 
281  put_bits(&s->pb, 1, 0); // ics_reserved bit
282  put_bits(&s->pb, 2, info->window_sequence[0]);
283  put_bits(&s->pb, 1, info->use_kb_window[0]);
284  if (info->window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
285  put_bits(&s->pb, 6, info->max_sfb);
286  put_bits(&s->pb, 1, 0); // no prediction
287  } else {
288  put_bits(&s->pb, 4, info->max_sfb);
289  for (w = 1; w < 8; w++)
290  put_bits(&s->pb, 1, !info->group_len[w]);
291  }
292 }
293 
294 /**
295  * Encode MS data.
296  * @see 4.6.8.1 "Joint Coding - M/S Stereo"
297  */
299 {
300  int i, w;
301 
302  put_bits(pb, 2, cpe->ms_mode);
303  if (cpe->ms_mode == 1)
304  for (w = 0; w < cpe->ch[0].ics.num_windows; w += cpe->ch[0].ics.group_len[w])
305  for (i = 0; i < cpe->ch[0].ics.max_sfb; i++)
306  put_bits(pb, 1, cpe->ms_mask[w*16 + i]);
307 }
308 
309 /**
310  * Produce integer coefficients from scalefactors provided by the model.
311  */
312 static void adjust_frame_information(ChannelElement *cpe, int chans)
313 {
314  int i, w, w2, g, ch;
315  int start, maxsfb, cmaxsfb;
316 
317  for (ch = 0; ch < chans; ch++) {
318  IndividualChannelStream *ics = &cpe->ch[ch].ics;
319  start = 0;
320  maxsfb = 0;
321  cpe->ch[ch].pulse.num_pulse = 0;
322  for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
323  for (w2 = 0; w2 < ics->group_len[w]; w2++) {
324  start = (w+w2) * 128;
325  for (g = 0; g < ics->num_swb; g++) {
326  //apply M/S
327  if (cpe->common_window && !ch && cpe->ms_mask[w*16 + g]) {
328  for (i = 0; i < ics->swb_sizes[g]; i++) {
329  cpe->ch[0].coeffs[start+i] = (cpe->ch[0].pcoeffs[start+i] + cpe->ch[1].pcoeffs[start+i]) * 0.5f;
330  cpe->ch[1].coeffs[start+i] = cpe->ch[0].coeffs[start+i] - cpe->ch[1].pcoeffs[start+i];
331  }
332  }
333  start += ics->swb_sizes[g];
334  }
335  for (cmaxsfb = ics->num_swb; cmaxsfb > 0 && cpe->ch[ch].zeroes[w*16+cmaxsfb-1]; cmaxsfb--)
336  ;
337  maxsfb = FFMAX(maxsfb, cmaxsfb);
338  }
339  }
340  ics->max_sfb = maxsfb;
341 
342  //adjust zero bands for window groups
343  for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
344  for (g = 0; g < ics->max_sfb; g++) {
345  i = 1;
346  for (w2 = w; w2 < w + ics->group_len[w]; w2++) {
347  if (!cpe->ch[ch].zeroes[w2*16 + g]) {
348  i = 0;
349  break;
350  }
351  }
352  cpe->ch[ch].zeroes[w*16 + g] = i;
353  }
354  }
355  }
356 
357  if (chans > 1 && cpe->common_window) {
358  IndividualChannelStream *ics0 = &cpe->ch[0].ics;
359  IndividualChannelStream *ics1 = &cpe->ch[1].ics;
360  int msc = 0;
361  ics0->max_sfb = FFMAX(ics0->max_sfb, ics1->max_sfb);
362  ics1->max_sfb = ics0->max_sfb;
363  for (w = 0; w < ics0->num_windows*16; w += 16)
364  for (i = 0; i < ics0->max_sfb; i++)
365  if (cpe->ms_mask[w+i])
366  msc++;
367  if (msc == 0 || ics0->max_sfb == 0)
368  cpe->ms_mode = 0;
369  else
370  cpe->ms_mode = msc < ics0->max_sfb * ics0->num_windows ? 1 : 2;
371  }
372 }
373 
374 /**
375  * Encode scalefactor band coding type.
376  */
378 {
379  int w;
380 
381  for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
382  s->coder->encode_window_bands_info(s, sce, w, sce->ics.group_len[w], s->lambda);
383 }
384 
385 /**
386  * Encode scalefactors.
387  */
390 {
391  int off = sce->sf_idx[0], diff;
392  int i, w;
393 
394  for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
395  for (i = 0; i < sce->ics.max_sfb; i++) {
396  if (!sce->zeroes[w*16 + i]) {
397  diff = sce->sf_idx[w*16 + i] - off + SCALE_DIFF_ZERO;
398  av_assert0(diff >= 0 && diff <= 120);
399  off = sce->sf_idx[w*16 + i];
401  }
402  }
403  }
404 }
405 
406 /**
407  * Encode pulse data.
408  */
409 static void encode_pulses(AACEncContext *s, Pulse *pulse)
410 {
411  int i;
412 
413  put_bits(&s->pb, 1, !!pulse->num_pulse);
414  if (!pulse->num_pulse)
415  return;
416 
417  put_bits(&s->pb, 2, pulse->num_pulse - 1);
418  put_bits(&s->pb, 6, pulse->start);
419  for (i = 0; i < pulse->num_pulse; i++) {
420  put_bits(&s->pb, 5, pulse->pos[i]);
421  put_bits(&s->pb, 4, pulse->amp[i]);
422  }
423 }
424 
425 /**
426  * Encode spectral coefficients processed by psychoacoustic model.
427  */
429 {
430  int start, i, w, w2;
431 
432  for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
433  start = 0;
434  for (i = 0; i < sce->ics.max_sfb; i++) {
435  if (sce->zeroes[w*16 + i]) {
436  start += sce->ics.swb_sizes[i];
437  continue;
438  }
439  for (w2 = w; w2 < w + sce->ics.group_len[w]; w2++)
440  s->coder->quantize_and_encode_band(s, &s->pb, sce->coeffs + start + w2*128,
441  sce->ics.swb_sizes[i],
442  sce->sf_idx[w*16 + i],
443  sce->band_type[w*16 + i],
444  s->lambda);
445  start += sce->ics.swb_sizes[i];
446  }
447  }
448 }
449 
450 /**
451  * Encode one channel of audio data.
452  */
455  int common_window)
456 {
457  put_bits(&s->pb, 8, sce->sf_idx[0]);
458  if (!common_window)
459  put_ics_info(s, &sce->ics);
460  encode_band_info(s, sce);
461  encode_scale_factors(avctx, s, sce);
462  encode_pulses(s, &sce->pulse);
463  put_bits(&s->pb, 1, 0); //tns
464  put_bits(&s->pb, 1, 0); //ssr
465  encode_spectral_coeffs(s, sce);
466  return 0;
467 }
468 
469 /**
470  * Write some auxiliary information about the created AAC file.
471  */
472 static void put_bitstream_info(AACEncContext *s, const char *name)
473 {
474  int i, namelen, padbits;
475 
476  namelen = strlen(name) + 2;
477  put_bits(&s->pb, 3, TYPE_FIL);
478  put_bits(&s->pb, 4, FFMIN(namelen, 15));
479  if (namelen >= 15)
480  put_bits(&s->pb, 8, namelen - 14);
481  put_bits(&s->pb, 4, 0); //extension type - filler
482  padbits = -put_bits_count(&s->pb) & 7;
484  for (i = 0; i < namelen - 2; i++)
485  put_bits(&s->pb, 8, name[i]);
486  put_bits(&s->pb, 12 - padbits, 0);
487 }
488 
489 /*
490  * Copy input samples.
491  * Channels are reordered from libavcodec's default order to AAC order.
492  */
494 {
495  int ch;
496  int end = 2048 + (frame ? frame->nb_samples : 0);
497  const uint8_t *channel_map = aac_chan_maps[s->channels - 1];
498 
499  /* copy and remap input samples */
500  for (ch = 0; ch < s->channels; ch++) {
501  /* copy last 1024 samples of previous frame to the start of the current frame */
502  memcpy(&s->planar_samples[ch][1024], &s->planar_samples[ch][2048], 1024 * sizeof(s->planar_samples[0][0]));
503 
504  /* copy new samples and zero any remaining samples */
505  if (frame) {
506  memcpy(&s->planar_samples[ch][2048],
507  frame->extended_data[channel_map[ch]],
508  frame->nb_samples * sizeof(s->planar_samples[0][0]));
509  }
510  memset(&s->planar_samples[ch][end], 0,
511  (3072 - end) * sizeof(s->planar_samples[0][0]));
512  }
513 }
514 
515 static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
516  const AVFrame *frame, int *got_packet_ptr)
517 {
518  AACEncContext *s = avctx->priv_data;
519  float **samples = s->planar_samples, *samples2, *la, *overlap;
520  ChannelElement *cpe;
521  int i, ch, w, g, chans, tag, start_ch, ret, ms_mode = 0;
522  int chan_el_counter[4];
524 
525  if (s->last_frame == 2)
526  return 0;
527 
528  /* add current frame to queue */
529  if (frame) {
530  if ((ret = ff_af_queue_add(&s->afq, frame)) < 0)
531  return ret;
532  }
533 
534  copy_input_samples(s, frame);
535  if (s->psypp)
537 
538  if (!avctx->frame_number)
539  return 0;
540 
541  start_ch = 0;
542  for (i = 0; i < s->chan_map[0]; i++) {
543  FFPsyWindowInfo* wi = windows + start_ch;
544  tag = s->chan_map[i+1];
545  chans = tag == TYPE_CPE ? 2 : 1;
546  cpe = &s->cpe[i];
547  for (ch = 0; ch < chans; ch++) {
548  IndividualChannelStream *ics = &cpe->ch[ch].ics;
549  int cur_channel = start_ch + ch;
550  overlap = &samples[cur_channel][0];
551  samples2 = overlap + 1024;
552  la = samples2 + (448+64);
553  if (!frame)
554  la = NULL;
555  if (tag == TYPE_LFE) {
556  wi[ch].window_type[0] = ONLY_LONG_SEQUENCE;
557  wi[ch].window_shape = 0;
558  wi[ch].num_windows = 1;
559  wi[ch].grouping[0] = 1;
560 
561  /* Only the lowest 12 coefficients are used in a LFE channel.
562  * The expression below results in only the bottom 8 coefficients
563  * being used for 11.025kHz to 16kHz sample rates.
564  */
565  ics->num_swb = s->samplerate_index >= 8 ? 1 : 3;
566  } else {
567  wi[ch] = s->psy.model->window(&s->psy, samples2, la, cur_channel,
568  ics->window_sequence[0]);
569  }
570  ics->window_sequence[1] = ics->window_sequence[0];
571  ics->window_sequence[0] = wi[ch].window_type[0];
572  ics->use_kb_window[1] = ics->use_kb_window[0];
573  ics->use_kb_window[0] = wi[ch].window_shape;
574  ics->num_windows = wi[ch].num_windows;
575  ics->swb_sizes = s->psy.bands [ics->num_windows == 8];
576  ics->num_swb = tag == TYPE_LFE ? ics->num_swb : s->psy.num_bands[ics->num_windows == 8];
577  for (w = 0; w < ics->num_windows; w++)
578  ics->group_len[w] = wi[ch].grouping[w];
579 
580  apply_window_and_mdct(s, &cpe->ch[ch], overlap);
581  if (isnan(cpe->ch->coeffs[0])) {
582  av_log(avctx, AV_LOG_ERROR, "Input contains NaN\n");
583  return AVERROR(EINVAL);
584  }
585  }
586  start_ch += chans;
587  }
588  if ((ret = ff_alloc_packet2(avctx, avpkt, 8192 * s->channels)) < 0)
589  return ret;
590  do {
591  int frame_bits;
592 
593  init_put_bits(&s->pb, avpkt->data, avpkt->size);
594 
595  if ((avctx->frame_number & 0xFF)==1 && !(avctx->flags & CODEC_FLAG_BITEXACT))
597  start_ch = 0;
598  memset(chan_el_counter, 0, sizeof(chan_el_counter));
599  for (i = 0; i < s->chan_map[0]; i++) {
600  FFPsyWindowInfo* wi = windows + start_ch;
601  const float *coeffs[2];
602  tag = s->chan_map[i+1];
603  chans = tag == TYPE_CPE ? 2 : 1;
604  cpe = &s->cpe[i];
605  put_bits(&s->pb, 3, tag);
606  put_bits(&s->pb, 4, chan_el_counter[tag]++);
607  for (ch = 0; ch < chans; ch++)
608  coeffs[ch] = cpe->ch[ch].coeffs;
609  s->psy.model->analyze(&s->psy, start_ch, coeffs, wi);
610  for (ch = 0; ch < chans; ch++) {
611  s->cur_channel = start_ch + ch;
612  s->coder->search_for_quantizers(avctx, s, &cpe->ch[ch], s->lambda);
613  }
614  cpe->common_window = 0;
615  if (chans > 1
616  && wi[0].window_type[0] == wi[1].window_type[0]
617  && wi[0].window_shape == wi[1].window_shape) {
618 
619  cpe->common_window = 1;
620  for (w = 0; w < wi[0].num_windows; w++) {
621  if (wi[0].grouping[w] != wi[1].grouping[w]) {
622  cpe->common_window = 0;
623  break;
624  }
625  }
626  }
627  s->cur_channel = start_ch;
628  if (s->options.stereo_mode && cpe->common_window) {
629  if (s->options.stereo_mode > 0) {
630  IndividualChannelStream *ics = &cpe->ch[0].ics;
631  for (w = 0; w < ics->num_windows; w += ics->group_len[w])
632  for (g = 0; g < ics->num_swb; g++)
633  cpe->ms_mask[w*16+g] = 1;
634  } else if (s->coder->search_for_ms) {
635  s->coder->search_for_ms(s, cpe, s->lambda);
636  }
637  }
638  adjust_frame_information(cpe, chans);
639  if (chans == 2) {
640  put_bits(&s->pb, 1, cpe->common_window);
641  if (cpe->common_window) {
642  put_ics_info(s, &cpe->ch[0].ics);
643  encode_ms_info(&s->pb, cpe);
644  if (cpe->ms_mode) ms_mode = 1;
645  }
646  }
647  for (ch = 0; ch < chans; ch++) {
648  s->cur_channel = start_ch + ch;
649  encode_individual_channel(avctx, s, &cpe->ch[ch], cpe->common_window);
650  }
651  start_ch += chans;
652  }
653 
654  frame_bits = put_bits_count(&s->pb);
655  if (frame_bits <= 6144 * s->channels - 3) {
656  s->psy.bitres.bits = frame_bits / s->channels;
657  break;
658  }
659  if (ms_mode) {
660  for (i = 0; i < s->chan_map[0]; i++) {
661  // Must restore coeffs
662  chans = tag == TYPE_CPE ? 2 : 1;
663  cpe = &s->cpe[i];
664  for (ch = 0; ch < chans; ch++)
665  memcpy(cpe->ch[ch].coeffs, cpe->ch[ch].pcoeffs, sizeof(cpe->ch[ch].coeffs));
666  }
667  }
668 
669  s->lambda *= avctx->bit_rate * 1024.0f / avctx->sample_rate / frame_bits;
670 
671  } while (1);
672 
673  put_bits(&s->pb, 3, TYPE_END);
674  flush_put_bits(&s->pb);
675  avctx->frame_bits = put_bits_count(&s->pb);
676 
677  // rate control stuff
678  if (!(avctx->flags & CODEC_FLAG_QSCALE)) {
679  float ratio = avctx->bit_rate * 1024.0f / avctx->sample_rate / avctx->frame_bits;
680  s->lambda *= ratio;
681  s->lambda = FFMIN(s->lambda, 65536.f);
682  }
683 
684  if (!frame)
685  s->last_frame++;
686 
687  ff_af_queue_remove(&s->afq, avctx->frame_size, &avpkt->pts,
688  &avpkt->duration);
689 
690  avpkt->size = put_bits_count(&s->pb) >> 3;
691  *got_packet_ptr = 1;
692  return 0;
693 }
694 
696 {
697  AACEncContext *s = avctx->priv_data;
698 
699  ff_mdct_end(&s->mdct1024);
700  ff_mdct_end(&s->mdct128);
701  ff_psy_end(&s->psy);
702  if (s->psypp)
704  av_freep(&s->buffer.samples);
705  av_freep(&s->cpe);
706  av_freep(&s->fdsp);
707  ff_af_queue_close(&s->afq);
708  return 0;
709 }
710 
712 {
713  int ret = 0;
714 
716  if (!s->fdsp)
717  return AVERROR(ENOMEM);
718 
719  // window init
724 
725  if (ret = ff_mdct_init(&s->mdct1024, 11, 0, 32768.0))
726  return ret;
727  if (ret = ff_mdct_init(&s->mdct128, 8, 0, 32768.0))
728  return ret;
729 
730  return 0;
731 }
732 
734 {
735  int ch;
736  FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->buffer.samples, s->channels, 3 * 1024 * sizeof(s->buffer.samples[0]), alloc_fail);
737  FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->cpe, s->chan_map[0], sizeof(ChannelElement), alloc_fail);
738  FF_ALLOCZ_OR_GOTO(avctx, avctx->extradata, 5 + FF_INPUT_BUFFER_PADDING_SIZE, alloc_fail);
739 
740  for(ch = 0; ch < s->channels; ch++)
741  s->planar_samples[ch] = s->buffer.samples + 3 * 1024 * ch;
742 
743  return 0;
744 alloc_fail:
745  return AVERROR(ENOMEM);
746 }
747 
749 {
750  AACEncContext *s = avctx->priv_data;
751  int i, ret = 0;
752  const uint8_t *sizes[2];
753  uint8_t grouping[AAC_MAX_CHANNELS];
754  int lengths[2];
755 
756  avctx->frame_size = 1024;
757 
758  for (i = 0; i < 16; i++)
760  break;
761 
762  s->channels = avctx->channels;
763 
764  ERROR_IF(i == 16
765  || i >= (sizeof(swb_size_1024) / sizeof(*swb_size_1024))
766  || i >= (sizeof(swb_size_128) / sizeof(*swb_size_128)),
767  "Unsupported sample rate %d\n", avctx->sample_rate);
769  "Unsupported number of channels: %d\n", s->channels);
771  "Unsupported profile %d\n", avctx->profile);
772  WARN_IF(1024.0 * avctx->bit_rate / avctx->sample_rate > 6144 * s->channels,
773  "Too many bits per frame requested, clamping to max\n");
774 
775  avctx->bit_rate = (int)FFMIN(
776  6144 * s->channels / 1024.0 * avctx->sample_rate,
777  avctx->bit_rate);
778 
779  s->samplerate_index = i;
780 
782 
783  if ((ret = dsp_init(avctx, s)) < 0)
784  goto fail;
785 
786  if ((ret = alloc_buffers(avctx, s)) < 0)
787  goto fail;
788 
789  avctx->extradata_size = 5;
791 
792  sizes[0] = swb_size_1024[i];
793  sizes[1] = swb_size_128[i];
794  lengths[0] = ff_aac_num_swb_1024[i];
795  lengths[1] = ff_aac_num_swb_128[i];
796  for (i = 0; i < s->chan_map[0]; i++)
797  grouping[i] = s->chan_map[i + 1] == TYPE_CPE;
798  if ((ret = ff_psy_init(&s->psy, avctx, 2, sizes, lengths,
799  s->chan_map[0], grouping)) < 0)
800  goto fail;
801  s->psypp = ff_psy_preprocess_init(avctx);
803 
804  if (HAVE_MIPSDSPR1)
806 
807  s->lambda = avctx->global_quality > 0 ? avctx->global_quality : 120;
808 
810 
811  for (i = 0; i < 428; i++)
812  ff_aac_pow34sf_tab[i] = sqrt(ff_aac_pow2sf_tab[i] * sqrt(ff_aac_pow2sf_tab[i]));
813 
814  avctx->initial_padding = 1024;
815  ff_af_queue_init(avctx, &s->afq);
816 
817  return 0;
818 fail:
819  aac_encode_end(avctx);
820  return ret;
821 }
822 
823 #define AACENC_FLAGS AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM
824 static const AVOption aacenc_options[] = {
825  {"stereo_mode", "Stereo coding method", offsetof(AACEncContext, options.stereo_mode), AV_OPT_TYPE_INT, {.i64 = 0}, -1, 1, AACENC_FLAGS, "stereo_mode"},
826  {"auto", "Selected by the Encoder", 0, AV_OPT_TYPE_CONST, {.i64 = -1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
827  {"ms_off", "Disable Mid/Side coding", 0, AV_OPT_TYPE_CONST, {.i64 = 0 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
828  {"ms_force", "Force Mid/Side for the whole frame if possible", 0, AV_OPT_TYPE_CONST, {.i64 = 1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
829  {"aac_coder", "", offsetof(AACEncContext, options.aac_coder), AV_OPT_TYPE_INT, {.i64 = AAC_CODER_TWOLOOP}, 0, AAC_CODER_NB-1, AACENC_FLAGS, "aac_coder"},
830  {"faac", "FAAC-inspired method", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_FAAC}, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
831  {"anmr", "ANMR method", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_ANMR}, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
832  {"twoloop", "Two loop searching method", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_TWOLOOP}, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
833  {"fast", "Constant quantizer", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_FAST}, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
834  {NULL}
835 };
836 
837 static const AVClass aacenc_class = {
838  "AAC encoder",
842 };
843 
844 /* duplicated from avpriv_mpeg4audio_sample_rates to avoid shared build
845  * failures */
846 static const int mpeg4audio_sample_rates[16] = {
847  96000, 88200, 64000, 48000, 44100, 32000,
848  24000, 22050, 16000, 12000, 11025, 8000, 7350
849 };
850 
852  .name = "aac",
853  .long_name = NULL_IF_CONFIG_SMALL("AAC (Advanced Audio Coding)"),
854  .type = AVMEDIA_TYPE_AUDIO,
855  .id = AV_CODEC_ID_AAC,
856  .priv_data_size = sizeof(AACEncContext),
858  .encode2 = aac_encode_frame,
859  .close = aac_encode_end,
861  .capabilities = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY |
863  .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLTP,
865  .priv_class = &aacenc_class,
866 };