FFmpeg
aacpsy.c
Go to the documentation of this file.
1 /*
2  * AAC encoder psychoacoustic model
3  * Copyright (C) 2008 Konstantin Shishkov
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 /**
23  * @file
24  * AAC encoder psychoacoustic model
25  */
26 
27 #include "libavutil/attributes.h"
28 #include "libavutil/ffmath.h"
29 #include "libavutil/mem.h"
30 
31 #include "avcodec.h"
32 #include "aac.h"
33 #include "psymodel.h"
34 
35 /***********************************
36  * TODOs:
37  * try other bitrate controlling mechanism (maybe use ratecontrol.c?)
38  * control quality for quality-based output
39  **********************************/
40 
41 /**
42  * constants for 3GPP AAC psychoacoustic model
43  * @{
44  */
45 #define PSY_3GPP_THR_SPREAD_HI 1.5f // spreading factor for low-to-hi threshold spreading (15 dB/Bark)
46 #define PSY_3GPP_THR_SPREAD_LOW 3.0f // spreading factor for hi-to-low threshold spreading (30 dB/Bark)
47 /* spreading factor for low-to-hi energy spreading, long block, > 22kbps/channel (20dB/Bark) */
48 #define PSY_3GPP_EN_SPREAD_HI_L1 2.0f
49 /* spreading factor for low-to-hi energy spreading, long block, <= 22kbps/channel (15dB/Bark) */
50 #define PSY_3GPP_EN_SPREAD_HI_L2 1.5f
51 /* spreading factor for low-to-hi energy spreading, short block (15 dB/Bark) */
52 #define PSY_3GPP_EN_SPREAD_HI_S 1.5f
53 /* spreading factor for hi-to-low energy spreading, long block (30dB/Bark) */
54 #define PSY_3GPP_EN_SPREAD_LOW_L 3.0f
55 /* spreading factor for hi-to-low energy spreading, short block (20dB/Bark) */
56 #define PSY_3GPP_EN_SPREAD_LOW_S 2.0f
57 
58 #define PSY_3GPP_RPEMIN 0.01f
59 #define PSY_3GPP_RPELEV 2.0f
60 
61 #define PSY_3GPP_C1 3.0f /* log2(8) */
62 #define PSY_3GPP_C2 1.3219281f /* log2(2.5) */
63 #define PSY_3GPP_C3 0.55935729f /* 1 - C2 / C1 */
64 
65 #define PSY_SNR_1DB 7.9432821e-1f /* -1dB */
66 #define PSY_SNR_25DB 3.1622776e-3f /* -25dB */
67 
68 #define PSY_3GPP_SAVE_SLOPE_L -0.46666667f
69 #define PSY_3GPP_SAVE_SLOPE_S -0.36363637f
70 #define PSY_3GPP_SAVE_ADD_L -0.84285712f
71 #define PSY_3GPP_SAVE_ADD_S -0.75f
72 #define PSY_3GPP_SPEND_SLOPE_L 0.66666669f
73 #define PSY_3GPP_SPEND_SLOPE_S 0.81818181f
74 #define PSY_3GPP_SPEND_ADD_L -0.35f
75 #define PSY_3GPP_SPEND_ADD_S -0.26111111f
76 #define PSY_3GPP_CLIP_LO_L 0.2f
77 #define PSY_3GPP_CLIP_LO_S 0.2f
78 #define PSY_3GPP_CLIP_HI_L 0.95f
79 #define PSY_3GPP_CLIP_HI_S 0.75f
80 
81 #define PSY_3GPP_AH_THR_LONG 0.5f
82 #define PSY_3GPP_AH_THR_SHORT 0.63f
83 
84 #define PSY_PE_FORGET_SLOPE 511
85 
86 enum {
90 };
91 
92 #define PSY_3GPP_BITS_TO_PE(bits) ((bits) * 1.18f)
93 #define PSY_3GPP_PE_TO_BITS(bits) ((bits) / 1.18f)
94 
95 /* LAME psy model constants */
96 #define PSY_LAME_FIR_LEN 21 ///< LAME psy model FIR order
97 #define AAC_BLOCK_SIZE_LONG 1024 ///< long block size
98 #define AAC_BLOCK_SIZE_SHORT 128 ///< short block size
99 #define AAC_NUM_BLOCKS_SHORT 8 ///< number of blocks in a short sequence
100 #define PSY_LAME_NUM_SUBBLOCKS 2 ///< Number of sub-blocks in each short block
101 
102 /* Pre-echo-aware attack detection: the LAME ratio test misses gentler attacks after a quiet
103  * gap, which then stay long and pre-echo. For an isolated onset (long for PSY_LAME_PE_GAP
104  * frames) whose pre-onset is below PSY_LAME_PE_QUIET of the frame peak, scale the threshold by
105  * PSY_LAME_PE_RED so it switches short; dense-transient content never qualifies. */
106 #define PSY_LAME_PE_GAP 12 ///< min consecutive long frames before the relaxation applies
107 #define PSY_LAME_PE_QUIET 0.4f ///< pre-onset must be below this fraction of the frame peak
108 #define PSY_LAME_PE_RED 0.45f ///< attack-threshold multiplier for a qualifying isolated onset
109 
110 /**
111  * @}
112  */
113 
114 /**
115  * information for single band used by 3GPP TS26.403-inspired psychoacoustic model
116  */
117 typedef struct AacPsyBand{
118  float energy; ///< band energy
119  float thr; ///< energy threshold
120  float thr_quiet; ///< threshold in quiet
121  float nz_lines; ///< number of non-zero spectral lines
122  float active_lines; ///< number of active spectral lines
123  float pe; ///< perceptual entropy
124  float pe_const; ///< constant part of the PE calculation
125  float norm_fac; ///< normalization factor for linearization
126  int avoid_holes; ///< hole avoidance flag
127 }AacPsyBand;
128 
129 /**
130  * single/pair channel context for psychoacoustic model
131  */
132 typedef struct AacPsyChannel{
133  AacPsyBand band[128]; ///< bands information
134  AacPsyBand prev_band[128]; ///< bands information from the previous frame
135 
136  float win_energy; ///< sliding average of channel energy
137  float iir_state[2]; ///< hi-pass IIR filter state
138  uint8_t next_grouping; ///< stored grouping scheme for the next frame (in case of 8 short window sequence)
139  enum WindowSequence next_window_seq; ///< window sequence to be used in the next frame
140  /* LAME psy model specific members */
141  float attack_threshold; ///< attack threshold for this channel
143  int prev_attack; ///< attack value for the last short block in the previous sequence
144  int next_attack0_zero; ///< whether attack[0] of the next frame is zero
145  int frames_since_short; ///< consecutive long frames (pre-echo-aware isolated-onset gate)
146 
147  /* rate-loop re-analysis rewind state, see psy_3gpp_analyze() */
148  int64_t rc_frame_num; ///< frame this channel last saved rewind state for
149  AacPsyBand rc_prev_band[128]; ///< prev_band as it was entering the frame
151 
152 /**
153  * psychoacoustic model frame type-dependent coefficients
154  */
155 typedef struct AacPsyCoeffs{
156  float ath; ///< absolute threshold of hearing per bands
157  float barks; ///< Bark value for each spectral band in long frame
158  float spread_low[2]; ///< spreading factor for low-to-high threshold spreading in long frame
159  float spread_hi [2]; ///< spreading factor for high-to-low threshold spreading in long frame
160  float min_snr; ///< minimal SNR
161 }AacPsyCoeffs;
162 
163 /**
164  * 3GPP TS26.403-inspired psychoacoustic model specific data
165  */
166 typedef struct AacPsyContext{
167  int chan_bitrate; ///< bitrate per channel
168  int frame_bits; ///< average bits per frame
169  int fill_level; ///< bit reservoir fill level
170  struct {
171  float min; ///< minimum allowed PE for bit factor calculation
172  float max; ///< maximum allowed PE for bit factor calculation
173  float previous; ///< allowed PE of the previous frame
174  float correction; ///< PE correction factor
175  } pe;
178  float global_quality; ///< normalized global quality taken from avctx
179 
180  /* rate-loop re-analysis rewind state, see psy_3gpp_analyze() */
181  int64_t rc_frame_num; ///< frame the rewind state was saved for
182  int rc_first_ch; ///< first channel analyzed in that frame
186 
187 /**
188  * LAME psy model preset struct
189  */
190 typedef struct PsyLamePreset {
191  int quality; ///< Quality to map the rest of the values to.
192  /* This is overloaded to be both kbps per channel in ABR mode, and
193  * requested quality in constant quality mode.
194  */
195  float st_lrm; ///< short threshold for L, R, and M channels
196 } PsyLamePreset;
197 
198 /**
199  * LAME psy model preset table for ABR
200  */
201 static const PsyLamePreset psy_abr_map[] = {
202 /* TODO: Tuning. These were taken from LAME. */
203 /* kbps/ch st_lrm */
204  { 8, 7.60},
205  { 16, 7.60},
206  { 24, 7.60},
207  { 32, 7.60},
208  { 40, 7.60},
209  { 48, 7.60},
210  { 56, 7.60},
211  { 64, 7.40},
212  { 80, 7.00},
213  { 96, 6.60},
214  {112, 6.20},
215  {128, 6.20},
216  {160, 6.20}
217 };
218 
219 /**
220 * LAME psy model preset table for constant quality
221 */
222 static const PsyLamePreset psy_vbr_map[] = {
223 /* vbr_q st_lrm */
224  { 0, 4.20},
225  { 1, 4.20},
226  { 2, 4.20},
227  { 3, 4.20},
228  { 4, 4.20},
229  { 5, 4.20},
230  { 6, 4.20},
231  { 7, 4.20},
232  { 8, 4.20},
233  { 9, 4.20},
234  {10, 4.20}
235 };
236 
237 /**
238  * LAME psy model FIR coefficient table
239  */
240 static const float psy_fir_coeffs[] = {
241  -8.65163e-18 * 2, -0.00851586 * 2, -6.74764e-18 * 2, 0.0209036 * 2,
242  -3.36639e-17 * 2, -0.0438162 * 2, -1.54175e-17 * 2, 0.0931738 * 2,
243  -5.52212e-17 * 2, -0.313819 * 2
244 };
245 
246 /**
247  * Calculate the ABR attack threshold from the above LAME psymodel table.
248  */
250 {
251  /* Assume max bitrate to start with */
252  int lower_range = 12, upper_range = 12;
253  int lower_range_kbps = psy_abr_map[12].quality;
254  int upper_range_kbps = psy_abr_map[12].quality;
255  int i;
256 
257  /* Determine which bitrates the value specified falls between.
258  * If the loop ends without breaking our above assumption of 320kbps was correct.
259  */
260  for (i = 1; i < 13; i++) {
262  upper_range = i;
263  upper_range_kbps = psy_abr_map[i ].quality;
264  lower_range = i - 1;
265  lower_range_kbps = psy_abr_map[i - 1].quality;
266  break; /* Upper range found */
267  }
268  }
269 
270  /* Determine which range the value specified is closer to */
271  if ((upper_range_kbps - bitrate) > (bitrate - lower_range_kbps))
272  return psy_abr_map[lower_range].st_lrm;
273  return psy_abr_map[upper_range].st_lrm;
274 }
275 
276 /**
277  * LAME psy model specific initialization
278  */
280 {
281  int i, j;
282 
283  for (i = 0; i < avctx->ch_layout.nb_channels; i++) {
284  AacPsyChannel *pch = &ctx->ch[i];
285 
286  if (avctx->flags & AV_CODEC_FLAG_QSCALE)
288  else
290 
291  for (j = 0; j < AAC_NUM_BLOCKS_SHORT * PSY_LAME_NUM_SUBBLOCKS; j++)
292  pch->prev_energy_subshort[j] = 10.0f;
293  }
294 }
295 
296 /**
297  * Calculate Bark value for given line.
298  */
299 static av_cold float calc_bark(float f)
300 {
301  return 13.3f * atanf(0.00076f * f) + 3.5f * atanf((f / 7500.0f) * (f / 7500.0f));
302 }
303 
304 #define ATH_ADD 4
305 /**
306  * Calculate ATH value for given frequency.
307  * Borrowed from Lame.
308  */
309 static av_cold float ath(float f, float add)
310 {
311  f /= 1000.0f;
312  return 3.64 * pow(f, -0.8)
313  - 6.8 * exp(-0.6 * (f - 3.4) * (f - 3.4))
314  + 6.0 * exp(-0.15 * (f - 8.7) * (f - 8.7))
315  + (0.6 + 0.04 * add) * 0.001 * f * f * f * f;
316 }
317 
319  AacPsyContext *pctx;
320  float bark;
321  int i, j, g, start;
322  float prev, minscale, minath, minsnr, pe_min;
323  int chan_bitrate = ctx->avctx->bit_rate / ((ctx->avctx->flags & AV_CODEC_FLAG_QSCALE) ? 2.0f : ctx->avctx->ch_layout.nb_channels);
324 
325  const int bandwidth = ctx->cutoff ? ctx->cutoff : AAC_CUTOFF(ctx->avctx);
326  const float num_bark = calc_bark((float)bandwidth);
327 
328  if (bandwidth <= 0)
329  return AVERROR(EINVAL);
330 
331  ctx->model_priv_data = av_mallocz(sizeof(AacPsyContext));
332  if (!ctx->model_priv_data)
333  return AVERROR(ENOMEM);
334  pctx = ctx->model_priv_data;
335  pctx->global_quality = (ctx->avctx->global_quality ? ctx->avctx->global_quality : 120) * 0.01f;
336 
337  if (ctx->avctx->flags & AV_CODEC_FLAG_QSCALE) {
338  /* Use the target average bitrate to compute spread parameters */
339  chan_bitrate = (int)(chan_bitrate / 120.0 * (ctx->avctx->global_quality ? ctx->avctx->global_quality : 120));
340  }
341 
342  pctx->chan_bitrate = chan_bitrate;
343  pctx->frame_bits = FFMIN(2560, chan_bitrate * AAC_BLOCK_SIZE_LONG / ctx->avctx->sample_rate);
344  pctx->pe.min = 8.0f * AAC_BLOCK_SIZE_LONG * bandwidth / (ctx->avctx->sample_rate * 2.0f);
345  pctx->pe.max = 12.0f * AAC_BLOCK_SIZE_LONG * bandwidth / (ctx->avctx->sample_rate * 2.0f);
346  ctx->bitres.size = 6144 - pctx->frame_bits;
347  ctx->bitres.size -= ctx->bitres.size % 8;
348  pctx->fill_level = ctx->bitres.size;
349  minath = ath(3410 - 0.733 * ATH_ADD, ATH_ADD);
350  for (j = 0; j < 2; j++) {
351  AacPsyCoeffs *coeffs = pctx->psy_coef[j];
352  const uint8_t *band_sizes = ctx->bands[j];
353  float line_to_frequency = ctx->avctx->sample_rate / (j ? 256.f : 2048.0f);
354  float avg_chan_bits = chan_bitrate * (j ? 128.0f : 1024.0f) / ctx->avctx->sample_rate;
355  /* reference encoder uses 2.4% here instead of 60% like the spec says */
356  float bark_pe = 0.024f * PSY_3GPP_BITS_TO_PE(avg_chan_bits) / num_bark;
357  float en_spread_low = j ? PSY_3GPP_EN_SPREAD_LOW_S : PSY_3GPP_EN_SPREAD_LOW_L;
358  /* High energy spreading for long blocks <= 22kbps/channel and short blocks are the same. */
359  float en_spread_hi = (j || (chan_bitrate <= 22.0f)) ? PSY_3GPP_EN_SPREAD_HI_S : PSY_3GPP_EN_SPREAD_HI_L1;
360 
361  i = 0;
362  prev = 0.0;
363  for (g = 0; g < ctx->num_bands[j]; g++) {
364  i += band_sizes[g];
365  bark = calc_bark((i-1) * line_to_frequency);
366  coeffs[g].barks = (bark + prev) / 2.0;
367  prev = bark;
368  }
369  for (g = 0; g < ctx->num_bands[j] - 1; g++) {
370  AacPsyCoeffs *coeff = &coeffs[g];
371  float bark_width = coeffs[g+1].barks - coeffs->barks;
372  coeff->spread_low[0] = ff_exp10(-bark_width * PSY_3GPP_THR_SPREAD_LOW);
373  coeff->spread_hi [0] = ff_exp10(-bark_width * PSY_3GPP_THR_SPREAD_HI);
374  coeff->spread_low[1] = ff_exp10(-bark_width * en_spread_low);
375  coeff->spread_hi [1] = ff_exp10(-bark_width * en_spread_hi);
376  pe_min = bark_pe * bark_width;
377  minsnr = exp2(pe_min / band_sizes[g]) - 1.5f;
378  coeff->min_snr = av_clipf(1.0f / minsnr, PSY_SNR_25DB, PSY_SNR_1DB);
379  }
380  start = 0;
381  for (g = 0; g < ctx->num_bands[j]; g++) {
382  minscale = ath(start * line_to_frequency, ATH_ADD);
383  for (i = 1; i < band_sizes[g]; i++)
384  minscale = FFMIN(minscale, ath((start + i) * line_to_frequency, ATH_ADD));
385  coeffs[g].ath = minscale - minath;
386  start += band_sizes[g];
387  }
388  }
389 
390  pctx->ch = av_calloc(ctx->avctx->ch_layout.nb_channels, sizeof(*pctx->ch));
391  if (!pctx->ch) {
392  av_freep(&ctx->model_priv_data);
393  return AVERROR(ENOMEM);
394  }
395 
396  pctx->rc_frame_num = -1;
397  for (i = 0; i < ctx->avctx->ch_layout.nb_channels; i++)
398  pctx->ch[i].rc_frame_num = -1;
399 
400  lame_window_init(pctx, ctx->avctx);
401 
402  return 0;
403 }
404 
405 /**
406  * IIR filter used in block switching decision
407  */
408 static float iir_filter(int in, float state[2])
409 {
410  float ret;
411 
412  ret = 0.7548f * (in - state[0]) + 0.5095f * state[1];
413  state[0] = in;
414  state[1] = ret;
415  return ret;
416 }
417 
418 /**
419  * window grouping information stored as bits (0 - new group, 1 - group continues)
420  */
421 static const uint8_t window_grouping[9] = {
422  0xB6, 0x6C, 0xD8, 0xB2, 0x66, 0xC6, 0x96, 0x36, 0x36
423 };
424 
425 /**
426  * Tell encoder which window types to use.
427  * @see 3GPP TS26.403 5.4.1 "Blockswitching"
428  */
430  const int16_t *audio,
431  const int16_t *la,
432  int channel, int prev_type)
433 {
434  int i, j;
435  int br = ((AacPsyContext*)ctx->model_priv_data)->chan_bitrate;
436  int attack_ratio = br <= 16000 ? 18 : 10;
437  AacPsyContext *pctx = (AacPsyContext*) ctx->model_priv_data;
438  AacPsyChannel *pch = &pctx->ch[channel];
439  uint8_t grouping = 0;
440  int next_type = pch->next_window_seq;
441  FFPsyWindowInfo wi = { { 0 } };
442 
443  if (la) {
444  float s[8], v;
445  int switch_to_eight = 0;
446  float sum = 0.0, sum2 = 0.0;
447  int attack_n = 0;
448  int stay_short = 0;
449  for (i = 0; i < 8; i++) {
450  for (j = 0; j < 128; j++) {
451  v = iir_filter(la[i*128+j], pch->iir_state);
452  sum += v*v;
453  }
454  s[i] = sum;
455  sum2 += sum;
456  }
457  for (i = 0; i < 8; i++) {
458  if (s[i] > pch->win_energy * attack_ratio) {
459  attack_n = i + 1;
460  switch_to_eight = 1;
461  break;
462  }
463  }
464  pch->win_energy = pch->win_energy*7/8 + sum2/64;
465 
466  wi.window_type[1] = prev_type;
467  switch (prev_type) {
468  case ONLY_LONG_SEQUENCE:
469  wi.window_type[0] = switch_to_eight ? LONG_START_SEQUENCE : ONLY_LONG_SEQUENCE;
470  next_type = switch_to_eight ? EIGHT_SHORT_SEQUENCE : ONLY_LONG_SEQUENCE;
471  break;
472  case LONG_START_SEQUENCE:
473  wi.window_type[0] = EIGHT_SHORT_SEQUENCE;
474  grouping = pch->next_grouping;
475  next_type = switch_to_eight ? EIGHT_SHORT_SEQUENCE : LONG_STOP_SEQUENCE;
476  break;
477  case LONG_STOP_SEQUENCE:
478  wi.window_type[0] = switch_to_eight ? LONG_START_SEQUENCE : ONLY_LONG_SEQUENCE;
479  next_type = switch_to_eight ? EIGHT_SHORT_SEQUENCE : ONLY_LONG_SEQUENCE;
480  break;
482  stay_short = next_type == EIGHT_SHORT_SEQUENCE || switch_to_eight;
483  wi.window_type[0] = stay_short ? EIGHT_SHORT_SEQUENCE : LONG_STOP_SEQUENCE;
484  grouping = next_type == EIGHT_SHORT_SEQUENCE ? pch->next_grouping : 0;
485  next_type = switch_to_eight ? EIGHT_SHORT_SEQUENCE : LONG_STOP_SEQUENCE;
486  break;
487  }
488 
489  pch->next_grouping = window_grouping[attack_n];
490  pch->next_window_seq = next_type;
491  } else {
492  for (i = 0; i < 3; i++)
493  wi.window_type[i] = prev_type;
494  grouping = (prev_type == EIGHT_SHORT_SEQUENCE) ? window_grouping[0] : 0;
495  }
496 
497  wi.window_shape = 1;
498  if (wi.window_type[0] != EIGHT_SHORT_SEQUENCE) {
499  wi.num_windows = 1;
500  wi.grouping[0] = 1;
501  } else {
502  int lastgrp = 0;
503  wi.num_windows = 8;
504  for (i = 0; i < 8; i++) {
505  if (!((grouping >> i) & 1))
506  lastgrp = i;
507  wi.grouping[lastgrp]++;
508  }
509  }
510 
511  return wi;
512 }
513 
514 /* 5.6.1.2 "Calculation of Bit Demand" */
515 static int calc_bit_demand(AacPsyContext *ctx, float pe, int bits, int size,
516  int short_window)
517 {
518  const float bitsave_slope = short_window ? PSY_3GPP_SAVE_SLOPE_S : PSY_3GPP_SAVE_SLOPE_L;
519  const float bitsave_add = short_window ? PSY_3GPP_SAVE_ADD_S : PSY_3GPP_SAVE_ADD_L;
520  const float bitspend_slope = short_window ? PSY_3GPP_SPEND_SLOPE_S : PSY_3GPP_SPEND_SLOPE_L;
521  const float bitspend_add = short_window ? PSY_3GPP_SPEND_ADD_S : PSY_3GPP_SPEND_ADD_L;
522  const float clip_low = short_window ? PSY_3GPP_CLIP_LO_S : PSY_3GPP_CLIP_LO_L;
523  const float clip_high = short_window ? PSY_3GPP_CLIP_HI_S : PSY_3GPP_CLIP_HI_L;
524  float clipped_pe, bit_save, bit_spend, bit_factor, fill_level, forgetful_min_pe;
525 
526  ctx->fill_level += ctx->frame_bits - bits;
527  ctx->fill_level = av_clip(ctx->fill_level, 0, size);
528  fill_level = av_clipf((float)ctx->fill_level / size, clip_low, clip_high);
529  clipped_pe = av_clipf(pe, ctx->pe.min, ctx->pe.max);
530  bit_save = (fill_level + bitsave_add) * bitsave_slope;
531  assert(bit_save <= 0.3f && bit_save >= -0.05000001f);
532  bit_spend = (fill_level + bitspend_add) * bitspend_slope;
533  assert(bit_spend <= 0.5f && bit_spend >= -0.1f);
534  /* The bit factor graph in the spec is obviously incorrect.
535  * bit_spend + ((bit_spend - bit_spend))...
536  * The reference encoder subtracts everything from 1, but also seems incorrect.
537  * 1 - bit_save + ((bit_spend + bit_save))...
538  * Hopefully below is correct.
539  */
540  bit_factor = 1.0f - bit_save + ((bit_spend - bit_save) / (ctx->pe.max - ctx->pe.min)) * (clipped_pe - ctx->pe.min);
541  /* NOTE: The reference encoder attempts to center pe max/min around the current pe.
542  * Here we do that by slowly forgetting pe.min when pe stays in a range that makes
543  * it unlikely (ie: above the mean)
544  */
545  ctx->pe.max = FFMAX(pe, ctx->pe.max);
546  forgetful_min_pe = ((ctx->pe.min * PSY_PE_FORGET_SLOPE)
547  + FFMAX(ctx->pe.min, pe * (pe / ctx->pe.max))) / (PSY_PE_FORGET_SLOPE + 1);
548  ctx->pe.min = FFMIN(pe, forgetful_min_pe);
549 
550  /* NOTE: allocate a minimum of 1/8th average frame bits, to avoid
551  * reservoir starvation from producing zero-bit frames
552  */
553  return FFMIN(
554  ctx->frame_bits * bit_factor,
555  FFMAX(ctx->frame_bits + size - bits, ctx->frame_bits / 8));
556 }
557 
558 static float calc_pe_3gpp(AacPsyBand *band)
559 {
560  float pe, a;
561 
562  band->pe = 0.0f;
563  band->pe_const = 0.0f;
564  band->active_lines = 0.0f;
565  if (band->energy > band->thr) {
566  a = log2f(band->energy);
567  pe = a - log2f(band->thr);
568  band->active_lines = band->nz_lines;
569  if (pe < PSY_3GPP_C1) {
570  pe = pe * PSY_3GPP_C3 + PSY_3GPP_C2;
571  a = a * PSY_3GPP_C3 + PSY_3GPP_C2;
572  band->active_lines *= PSY_3GPP_C3;
573  }
574  band->pe = pe * band->nz_lines;
575  band->pe_const = a * band->nz_lines;
576  }
577 
578  return band->pe;
579 }
580 
581 static float calc_reduction_3gpp(float a, float desired_pe, float pe,
582  float active_lines)
583 {
584  float thr_avg, reduction;
585 
586  if(active_lines == 0.0)
587  return 0;
588 
589  thr_avg = exp2f((a - pe) / (4.0f * active_lines));
590  reduction = exp2f((a - desired_pe) / (4.0f * active_lines)) - thr_avg;
591 
592  return FFMAX(reduction, 0.0f);
593 }
594 
595 static float calc_reduced_thr_3gpp(AacPsyBand *band, float min_snr,
596  float reduction)
597 {
598  float thr = band->thr;
599 
600  if (band->energy > thr) {
601  thr = sqrtf(thr);
602  thr = sqrtf(thr) + reduction;
603  thr *= thr;
604  thr *= thr;
605 
606  /* This deviates from the 3GPP spec to match the reference encoder.
607  * It performs min(thr_reduced, max(thr, energy/min_snr)) only for bands
608  * that have hole avoidance on (active or inactive). It always reduces the
609  * threshold of bands with hole avoidance off.
610  */
611  if (thr > band->energy * min_snr && band->avoid_holes != PSY_3GPP_AH_NONE) {
612  thr = FFMAX(band->thr, band->energy * min_snr);
614  }
615  }
616 
617  return thr;
618 }
619 
620 static void calc_thr_3gpp(const FFPsyWindowInfo *wi, const int num_bands, AacPsyChannel *pch,
621  const uint8_t *band_sizes, const float *coefs, const int cutoff)
622 {
623  int i, w, g;
624  int start = 0, wstart = 0;
625  for (w = 0; w < wi->num_windows*16; w += 16) {
626  wstart = 0;
627  for (g = 0; g < num_bands; g++) {
628  AacPsyBand *band = &pch->band[w+g];
629 
630  float form_factor = 0.0f;
631  float Temp;
632  band->energy = 0.0f;
633  if (wstart < cutoff) {
634  for (i = 0; i < band_sizes[g]; i++) {
635  band->energy += coefs[start+i] * coefs[start+i];
636  form_factor += sqrtf(fabs(coefs[start+i]));
637  }
638  }
639  Temp = band->energy > 0 ? sqrtf((float)band_sizes[g] / band->energy) : 0;
640  band->thr = band->energy * 0.001258925f;
641  band->nz_lines = form_factor * sqrtf(Temp);
642 
643  start += band_sizes[g];
644  wstart += band_sizes[g];
645  }
646  }
647 }
648 
649 static void psy_hp_filter(const float *firbuf, float *hpfsmpl, const float *psy_fir_coeffs)
650 {
651  int i, j;
652  for (i = 0; i < AAC_BLOCK_SIZE_LONG; i++) {
653  float sum1, sum2;
654  sum1 = firbuf[i + (PSY_LAME_FIR_LEN - 1) / 2];
655  sum2 = 0.0;
656  for (j = 0; j < ((PSY_LAME_FIR_LEN - 1) / 2) - 1; j += 2) {
657  sum1 += psy_fir_coeffs[j] * (firbuf[i + j] + firbuf[i + PSY_LAME_FIR_LEN - j]);
658  sum2 += psy_fir_coeffs[j + 1] * (firbuf[i + j + 1] + firbuf[i + PSY_LAME_FIR_LEN - j - 1]);
659  }
660  /* NOTE: The LAME psymodel expects it's input in the range -32768 to 32768.
661  * Tuning this for normalized floats would be difficult. */
662  hpfsmpl[i] = (sum1 + sum2) * 32768.0f;
663  }
664 }
665 
666 /**
667  * Calculate band thresholds as suggested in 3GPP TS26.403
668  */
670  const float *coefs, const FFPsyWindowInfo *wi)
671 {
672  AacPsyContext *pctx = (AacPsyContext*) ctx->model_priv_data;
673  AacPsyChannel *pch = &pctx->ch[channel];
674  int i, w, g;
675  float desired_bits, desired_pe, delta_pe, reduction= NAN, spread_en[128] = {0};
676  float a = 0.0f, active_lines = 0.0f, norm_fac = 0.0f;
677  float pe = pctx->chan_bitrate > 32000 ? 0.0f : FFMAX(50.0f, 100.0f - pctx->chan_bitrate * 100.0f / 32000.0f);
678  const int num_bands = ctx->num_bands[wi->num_windows == 8];
679  const uint8_t *band_sizes = ctx->bands[wi->num_windows == 8];
680  AacPsyCoeffs *coeffs = pctx->psy_coef[wi->num_windows == 8];
681  const float avoid_hole_thr = wi->num_windows == 8 ? PSY_3GPP_AH_THR_SHORT : PSY_3GPP_AH_THR_LONG;
682  const int bandwidth = ctx->cutoff ? ctx->cutoff : AAC_CUTOFF(ctx->avctx);
683  const int cutoff = bandwidth * 2048 / wi->num_windows / ctx->avctx->sample_rate;
684 
685  //calculate energies, initial thresholds and related values - 5.4.2 "Threshold Calculation"
686  calc_thr_3gpp(wi, num_bands, pch, band_sizes, coefs, cutoff);
687 
688  //modify thresholds and energies - spread, threshold in quiet, pre-echo control
689  for (w = 0; w < wi->num_windows*16; w += 16) {
690  AacPsyBand *bands = &pch->band[w];
691 
692  /* 5.4.2.3 "Spreading" & 5.4.3 "Spread Energy Calculation" */
693  spread_en[0] = bands[0].energy;
694  for (g = 1; g < num_bands; g++) {
695  bands[g].thr = FFMAX(bands[g].thr, bands[g-1].thr * coeffs[g].spread_hi[0]);
696  spread_en[w+g] = FFMAX(bands[g].energy, spread_en[w+g-1] * coeffs[g].spread_hi[1]);
697  }
698  for (g = num_bands - 2; g >= 0; g--) {
699  bands[g].thr = FFMAX(bands[g].thr, bands[g+1].thr * coeffs[g].spread_low[0]);
700  spread_en[w+g] = FFMAX(spread_en[w+g], spread_en[w+g+1] * coeffs[g].spread_low[1]);
701  }
702  //5.4.2.4 "Threshold in quiet"
703  for (g = 0; g < num_bands; g++) {
704  AacPsyBand *band = &bands[g];
705 
706  band->thr_quiet = band->thr = FFMAX(band->thr, coeffs[g].ath);
707  //5.4.2.5 "Pre-echo control"
708  if (!(wi->window_type[0] == LONG_STOP_SEQUENCE || (!w && wi->window_type[1] == LONG_START_SEQUENCE)))
709  band->thr = FFMAX(PSY_3GPP_RPEMIN*band->thr, FFMIN(band->thr,
710  PSY_3GPP_RPELEV*pch->prev_band[w+g].thr_quiet));
711 
712  /* 5.6.1.3.1 "Preparatory steps of the perceptual entropy calculation" */
713  pe += calc_pe_3gpp(band);
714  a += band->pe_const;
715  active_lines += band->active_lines;
716 
717  /* 5.6.1.3.3 "Selection of the bands for avoidance of holes" */
718  if (spread_en[w+g] * avoid_hole_thr > band->energy || coeffs[g].min_snr > 1.0f)
720  else
722  }
723  }
724 
725  /* 5.6.1.3.2 "Calculation of the desired perceptual entropy" */
726  ctx->ch[channel].entropy = pe;
727  if (ctx->avctx->flags & AV_CODEC_FLAG_QSCALE) {
728  /* (2.5 * 120) achieves almost transparent rate, and we want to give
729  * ample room downwards, so we make that equivalent to QSCALE=2.4
730  */
731  desired_pe = pe * (ctx->avctx->global_quality ? ctx->avctx->global_quality : 120) / (2 * 2.5f * 120.0f);
732  desired_bits = FFMIN(2560, PSY_3GPP_PE_TO_BITS(desired_pe));
733  desired_pe = PSY_3GPP_BITS_TO_PE(desired_bits); // reflect clipping
734 
735  /* PE slope smoothing */
736  if (ctx->bitres.bits > 0) {
737  desired_bits = FFMIN(2560, PSY_3GPP_PE_TO_BITS(desired_pe));
738  desired_pe = PSY_3GPP_BITS_TO_PE(desired_bits); // reflect clipping
739  }
740 
741  pctx->pe.max = FFMAX(pe, pctx->pe.max);
742  pctx->pe.min = FFMIN(pe, pctx->pe.min);
743  } else {
744  desired_bits = calc_bit_demand(pctx, pe, ctx->bitres.bits, ctx->bitres.size, wi->num_windows == 8);
745  desired_pe = PSY_3GPP_BITS_TO_PE(desired_bits);
746 
747  /* NOTE: PE correction is kept simple. During initial testing it had very
748  * little effect on the final bitrate. Probably a good idea to come
749  * back and do more testing later.
750  */
751  if (ctx->bitres.bits > 0)
752  desired_pe *= av_clipf(pctx->pe.previous / PSY_3GPP_BITS_TO_PE(ctx->bitres.bits),
753  0.85f, 1.15f);
754  }
755  pctx->pe.previous = PSY_3GPP_BITS_TO_PE(desired_bits);
756  ctx->bitres.alloc = desired_bits;
757 
758  if (desired_pe < pe) {
759  /* 5.6.1.3.4 "First Estimation of the reduction value" */
760  for (w = 0; w < wi->num_windows*16; w += 16) {
761  reduction = calc_reduction_3gpp(a, desired_pe, pe, active_lines);
762  pe = 0.0f;
763  a = 0.0f;
764  active_lines = 0.0f;
765  for (g = 0; g < num_bands; g++) {
766  AacPsyBand *band = &pch->band[w+g];
767 
768  band->thr = calc_reduced_thr_3gpp(band, coeffs[g].min_snr, reduction);
769  /* recalculate PE */
770  pe += calc_pe_3gpp(band);
771  a += band->pe_const;
772  active_lines += band->active_lines;
773  }
774  }
775 
776  /* 5.6.1.3.5 "Second Estimation of the reduction value" */
777  for (i = 0; i < 2; i++) {
778  float pe_no_ah = 0.0f, desired_pe_no_ah;
779  active_lines = a = 0.0f;
780  for (w = 0; w < wi->num_windows*16; w += 16) {
781  for (g = 0; g < num_bands; g++) {
782  AacPsyBand *band = &pch->band[w+g];
783 
784  if (band->avoid_holes != PSY_3GPP_AH_ACTIVE) {
785  pe_no_ah += band->pe;
786  a += band->pe_const;
787  active_lines += band->active_lines;
788  }
789  }
790  }
791  desired_pe_no_ah = FFMAX(desired_pe - (pe - pe_no_ah), 0.0f);
792  if (active_lines > 0.0f)
793  reduction = calc_reduction_3gpp(a, desired_pe_no_ah, pe_no_ah, active_lines);
794 
795  pe = 0.0f;
796  for (w = 0; w < wi->num_windows*16; w += 16) {
797  for (g = 0; g < num_bands; g++) {
798  AacPsyBand *band = &pch->band[w+g];
799 
800  if (active_lines > 0.0f)
801  band->thr = calc_reduced_thr_3gpp(band, coeffs[g].min_snr, reduction);
802  pe += calc_pe_3gpp(band);
803  if (band->thr > 0.0f)
804  band->norm_fac = band->active_lines / band->thr;
805  else
806  band->norm_fac = 0.0f;
807  norm_fac += band->norm_fac;
808  }
809  }
810  delta_pe = desired_pe - pe;
811  if (fabs(delta_pe) > 0.05f * desired_pe)
812  break;
813  }
814 
815  if (pe < 1.15f * desired_pe) {
816  /* 6.6.1.3.6 "Final threshold modification by linearization" */
817  norm_fac = norm_fac ? 1.0f / norm_fac : 0;
818  for (w = 0; w < wi->num_windows*16; w += 16) {
819  for (g = 0; g < num_bands; g++) {
820  AacPsyBand *band = &pch->band[w+g];
821 
822  if (band->active_lines > 0.5f) {
823  float delta_sfb_pe = band->norm_fac * norm_fac * delta_pe;
824  float thr = band->thr;
825 
826  thr *= exp2f(delta_sfb_pe / band->active_lines);
827  if (thr > coeffs[g].min_snr * band->energy && band->avoid_holes == PSY_3GPP_AH_INACTIVE)
828  thr = FFMAX(band->thr, coeffs[g].min_snr * band->energy);
829  band->thr = thr;
830  }
831  }
832  }
833  } else {
834  /* 5.6.1.3.7 "Further perceptual entropy reduction" */
835  g = num_bands;
836  while (pe > desired_pe && g--) {
837  for (w = 0; w < wi->num_windows*16; w+= 16) {
838  AacPsyBand *band = &pch->band[w+g];
839  if (band->avoid_holes != PSY_3GPP_AH_NONE && coeffs[g].min_snr < PSY_SNR_1DB) {
840  coeffs[g].min_snr = PSY_SNR_1DB;
841  band->thr = band->energy * PSY_SNR_1DB;
842  pe += band->active_lines * 1.5f - band->pe;
843  }
844  }
845  }
846  /* TODO: allow more holes (unused without mid/side) */
847  }
848  }
849 
850  for (w = 0; w < wi->num_windows*16; w += 16) {
851  for (g = 0; g < num_bands; g++) {
852  AacPsyBand *band = &pch->band[w+g];
853  FFPsyBand *psy_band = &ctx->ch[channel].psy_bands[w+g];
854 
855  psy_band->threshold = band->thr;
856  psy_band->energy = band->energy;
857  psy_band->spread = band->active_lines * 2.0f / band_sizes[g];
858  psy_band->bits = PSY_3GPP_PE_TO_BITS(band->pe);
859  }
860  }
861 
862  memcpy(pch->prev_band, pch->band, sizeof(pch->band));
863 }
864 
866  const float **coeffs, const FFPsyWindowInfo *wi)
867 {
868  int ch;
870  AacPsyContext *pctx = ctx->model_priv_data;
871 
872  /* The encoder's rate-control loop may re-run the analysis for the same
873  * frame; carried state (bit reservoir, PE history, previous-frame
874  * thresholds) must advance exactly once per frame, so save it on the
875  * frame's first run and rewind on re-runs. */
876  if (ctx->avctx->frame_num != pctx->rc_frame_num) {
877  pctx->rc_frame_num = ctx->avctx->frame_num;
878  pctx->rc_first_ch = channel;
879  pctx->rc_fill_level = pctx->fill_level;
880  pctx->rc_pe_min = pctx->pe.min;
881  pctx->rc_pe_max = pctx->pe.max;
882  pctx->rc_pe_previous = pctx->pe.previous;
883  } else if (channel == pctx->rc_first_ch) {
884  pctx->fill_level = pctx->rc_fill_level;
885  pctx->pe.min = pctx->rc_pe_min;
886  pctx->pe.max = pctx->rc_pe_max;
887  pctx->pe.previous = pctx->rc_pe_previous;
888  }
889 
890  for (ch = 0; ch < group->num_ch; ch++) {
891  AacPsyChannel *pch = &pctx->ch[channel + ch];
892  if (ctx->avctx->frame_num != pch->rc_frame_num) {
893  pch->rc_frame_num = ctx->avctx->frame_num;
894  memcpy(pch->rc_prev_band, pch->prev_band, sizeof(pch->prev_band));
895  } else {
896  memcpy(pch->prev_band, pch->rc_prev_band, sizeof(pch->prev_band));
897  }
898  psy_3gpp_analyze_channel(ctx, channel + ch, coeffs[ch], &wi[ch]);
899  }
900 }
901 
903 {
905  if (pctx)
906  av_freep(&pctx->ch);
907  av_freep(&apc->model_priv_data);
908 }
909 
910 static void lame_apply_block_type(AacPsyChannel *ctx, FFPsyWindowInfo *wi, int uselongblock)
911 {
912  int blocktype = ONLY_LONG_SEQUENCE;
913  if (uselongblock) {
914  if (ctx->next_window_seq == EIGHT_SHORT_SEQUENCE)
915  blocktype = LONG_STOP_SEQUENCE;
916  } else {
917  blocktype = EIGHT_SHORT_SEQUENCE;
918  if (ctx->next_window_seq == ONLY_LONG_SEQUENCE)
919  ctx->next_window_seq = LONG_START_SEQUENCE;
920  if (ctx->next_window_seq == LONG_STOP_SEQUENCE)
921  ctx->next_window_seq = EIGHT_SHORT_SEQUENCE;
922  }
923 
924  wi->window_type[0] = ctx->next_window_seq;
925  ctx->next_window_seq = blocktype;
926 }
927 
928 static FFPsyWindowInfo psy_lame_window(FFPsyContext *ctx, const float *audio,
929  const float *la, int channel, int prev_type)
930 {
931  AacPsyContext *pctx = (AacPsyContext*) ctx->model_priv_data;
932  AacPsyChannel *pch = &pctx->ch[channel];
933  int grouping = 0;
934  int uselongblock = 1;
935  int attacks[AAC_NUM_BLOCKS_SHORT + 1] = { 0 };
936  int i;
937  FFPsyWindowInfo wi = { { 0 } };
938 
939  if (la) {
940  float hpfsmpl[AAC_BLOCK_SIZE_LONG];
941  const float *pf = hpfsmpl;
942  float attack_intensity[(AAC_NUM_BLOCKS_SHORT + 1) * PSY_LAME_NUM_SUBBLOCKS];
943  float energy_subshort[(AAC_NUM_BLOCKS_SHORT + 1) * PSY_LAME_NUM_SUBBLOCKS];
944  float energy_short[AAC_NUM_BLOCKS_SHORT + 1] = { 0 };
945  const float *firbuf = la + (AAC_BLOCK_SIZE_SHORT/4 - PSY_LAME_FIR_LEN);
946  int att_sum = 0;
947 
948  /* LAME comment: apply high pass filter of fs/4 */
949  psy_hp_filter(firbuf, hpfsmpl, psy_fir_coeffs);
950 
951  /* Calculate the energies of each sub-shortblock */
952  for (i = 0; i < PSY_LAME_NUM_SUBBLOCKS; i++) {
953  energy_subshort[i] = pch->prev_energy_subshort[i + ((AAC_NUM_BLOCKS_SHORT - 1) * PSY_LAME_NUM_SUBBLOCKS)];
954  assert(pch->prev_energy_subshort[i + ((AAC_NUM_BLOCKS_SHORT - 1) * PSY_LAME_NUM_SUBBLOCKS - 2)] > 0);
955  attack_intensity[i] = energy_subshort[i] / pch->prev_energy_subshort[i + ((AAC_NUM_BLOCKS_SHORT - 1) * PSY_LAME_NUM_SUBBLOCKS - 2)];
956  energy_short[0] += energy_subshort[i];
957  }
958 
959  for (i = 0; i < AAC_NUM_BLOCKS_SHORT * PSY_LAME_NUM_SUBBLOCKS; i++) {
960  const float *const pfe = pf + AAC_BLOCK_SIZE_LONG / (AAC_NUM_BLOCKS_SHORT * PSY_LAME_NUM_SUBBLOCKS);
961  float p = 1.0f;
962  for (; pf < pfe; pf++)
963  p = FFMAX(p, fabsf(*pf));
964  pch->prev_energy_subshort[i] = energy_subshort[i + PSY_LAME_NUM_SUBBLOCKS] = p;
965  energy_short[1 + i / PSY_LAME_NUM_SUBBLOCKS] += p;
966 
967  /* NOTE: The indexes below are [i + 3 - 2] in the LAME source. Compare each sub-block to sub-block - 2 */
968  if (p > energy_subshort[i + PSY_LAME_NUM_SUBBLOCKS - 2])
969  p = p / energy_subshort[i + PSY_LAME_NUM_SUBBLOCKS - 2];
970  else if (energy_subshort[i + PSY_LAME_NUM_SUBBLOCKS - 2] > p * 10.0f)
971  p = energy_subshort[i + PSY_LAME_NUM_SUBBLOCKS - 2] / (p * 10.0f);
972  else
973  p = 0.0;
974 
975  attack_intensity[i + PSY_LAME_NUM_SUBBLOCKS] = p;
976  }
977 
978  { /* pre-echo-aware threshold relaxation, see PSY_LAME_PE_* */
979  float frame_peak = 1.0f;
981  frame_peak = FFMAX(frame_peak, energy_subshort[i]);
982  for (i = 0; i < (AAC_NUM_BLOCKS_SHORT + 1) * PSY_LAME_NUM_SUBBLOCKS; i++)
983  if (!attacks[i / PSY_LAME_NUM_SUBBLOCKS]) {
984  float thr = pch->attack_threshold;
985  if (i >= PSY_LAME_NUM_SUBBLOCKS &&
986  pch->frames_since_short >= PSY_LAME_PE_GAP &&
987  energy_subshort[i - PSY_LAME_NUM_SUBBLOCKS] < PSY_LAME_PE_QUIET * frame_peak)
988  thr *= PSY_LAME_PE_RED;
989  if (attack_intensity[i] > thr)
990  attacks[i / PSY_LAME_NUM_SUBBLOCKS] = (i % PSY_LAME_NUM_SUBBLOCKS) + 1;
991  }
992  }
993 
994  /* should have energy change between short blocks, in order to avoid periodic signals */
995  /* Good samples to show the effect are Trumpet test songs */
996  /* GB: tuned (1) to avoid too many short blocks for test sample TRUMPET */
997  /* RH: tuned (2) to let enough short blocks through for test sample FSOL and SNAPS */
998  for (i = 1; i < AAC_NUM_BLOCKS_SHORT + 1; i++) {
999  const float u = energy_short[i - 1];
1000  const float v = energy_short[i];
1001  const float m = FFMAX(u, v);
1002  if (m < 40000) { /* (2) */
1003  if (u < 2.3f * v && v < 2.3f * u) { /* (1) */
1004  if (i == 1 && attacks[0] < attacks[i])
1005  attacks[0] = 0;
1006  attacks[i] = 0;
1007  }
1008  }
1009  att_sum += attacks[i];
1010  }
1011 
1012  if (pch->next_attack0_zero)
1013  attacks[0] = 0;
1014  pch->next_attack0_zero = !attacks[AAC_NUM_BLOCKS_SHORT];
1015 
1016  if (attacks[0] <= pch->prev_attack)
1017  attacks[0] = 0;
1018 
1019  att_sum += attacks[0];
1020 
1021  /* If the previous attack happened in the last sub-block of the previous sequence,
1022  * or if there's a new attack, use short window */
1023  if (pch->prev_attack == PSY_LAME_NUM_SUBBLOCKS || att_sum) {
1024  uselongblock = 0;
1025 
1026  for (i = 1; i < AAC_NUM_BLOCKS_SHORT + 1; i++)
1027  if (attacks[i] && attacks[i-1])
1028  attacks[i] = 0;
1029  }
1030 
1031  pch->frames_since_short = uselongblock ? pch->frames_since_short + 1 : 0;
1032  } else {
1033  /* We have no lookahead info, so just use same type as the previous sequence. */
1034  uselongblock = !(prev_type == EIGHT_SHORT_SEQUENCE);
1035  }
1036 
1037  lame_apply_block_type(pch, &wi, uselongblock);
1038 
1039  wi.window_type[1] = prev_type;
1040  if (wi.window_type[0] != EIGHT_SHORT_SEQUENCE) {
1041 
1042  wi.num_windows = 1;
1043  wi.grouping[0] = 1;
1044  if (wi.window_type[0] == LONG_START_SEQUENCE)
1045  wi.window_shape = 0;
1046  else
1047  wi.window_shape = 1;
1048 
1049  } else {
1050  int lastgrp = 0;
1051 
1052  wi.num_windows = 8;
1053  wi.window_shape = 0;
1054  for (i = 0; i < 8; i++) {
1055  if (!((pch->next_grouping >> i) & 1))
1056  lastgrp = i;
1057  wi.grouping[lastgrp]++;
1058  }
1059  }
1060 
1061  /* Determine grouping, based on the location of the first attack, and save for
1062  * the next frame.
1063  * FIXME: Move this to analysis.
1064  * TODO: Tune groupings depending on attack location
1065  * TODO: Handle more than one attack in a group
1066  */
1067  for (i = 0; i < 9; i++) {
1068  if (attacks[i]) {
1069  grouping = i;
1070  break;
1071  }
1072  }
1073  pch->next_grouping = window_grouping[grouping];
1074 
1075  pch->prev_attack = attacks[AAC_NUM_BLOCKS_SHORT - 1];
1076 
1077  return wi;
1078 }
1079 
1081 {
1082  .name = "3GPP TS 26.403-inspired model",
1083  .init = psy_3gpp_init,
1084  .window = psy_lame_window,
1085  .analyze = psy_3gpp_analyze,
1086  .end = psy_3gpp_end,
1087 };
PSY_LAME_PE_QUIET
#define PSY_LAME_PE_QUIET
pre-onset must be below this fraction of the frame peak
Definition: aacpsy.c:107
PSY_LAME_PE_RED
#define PSY_LAME_PE_RED
attack-threshold multiplier for a qualifying isolated onset
Definition: aacpsy.c:108
AacPsyCoeffs::spread_low
float spread_low[2]
spreading factor for low-to-high threshold spreading in long frame
Definition: aacpsy.c:158
ff_exp10
static av_always_inline double ff_exp10(double x)
Compute 10^x for floating point values.
Definition: ffmath.h:42
av_clip
#define av_clip
Definition: common.h:100
psy_3gpp_init
static av_cold int psy_3gpp_init(FFPsyContext *ctx)
Definition: aacpsy.c:318
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
AacPsyContext::rc_pe_min
float rc_pe_min
Definition: aacpsy.c:184
psy_3gpp_window
static av_unused FFPsyWindowInfo psy_3gpp_window(FFPsyContext *ctx, const int16_t *audio, const int16_t *la, int channel, int prev_type)
Tell encoder which window types to use.
Definition: aacpsy.c:429
lame_calc_attack_threshold
static float lame_calc_attack_threshold(int bitrate)
Calculate the ABR attack threshold from the above LAME psymodel table.
Definition: aacpsy.c:249
FFPsyModel::name
const char * name
Definition: psymodel.h:115
PSY_PE_FORGET_SLOPE
#define PSY_PE_FORGET_SLOPE
Definition: aacpsy.c:84
psy_lame_window
static FFPsyWindowInfo psy_lame_window(FFPsyContext *ctx, const float *audio, const float *la, int channel, int prev_type)
Definition: aacpsy.c:928
log2f
#define log2f(x)
Definition: libm.h:411
AacPsyBand::thr
float thr
energy threshold
Definition: aacpsy.c:119
calc_thr_3gpp
static void calc_thr_3gpp(const FFPsyWindowInfo *wi, const int num_bands, AacPsyChannel *pch, const uint8_t *band_sizes, const float *coefs, const int cutoff)
Definition: aacpsy.c:620
PSY_3GPP_PE_TO_BITS
#define PSY_3GPP_PE_TO_BITS(bits)
Definition: aacpsy.c:93
AV_CODEC_FLAG_QSCALE
#define AV_CODEC_FLAG_QSCALE
Use fixed qscale.
Definition: avcodec.h:213
calc_bark
static av_cold float calc_bark(float f)
Calculate Bark value for given line.
Definition: aacpsy.c:299
av_cold
#define av_cold
Definition: attributes.h:119
int64_t
long long int64_t
Definition: coverity.c:34
AacPsyBand::nz_lines
float nz_lines
number of non-zero spectral lines
Definition: aacpsy.c:121
PSY_3GPP_CLIP_LO_S
#define PSY_3GPP_CLIP_LO_S
Definition: aacpsy.c:77
u
#define u(width, name, range_min, range_max)
Definition: cbs_apv.c:68
AacPsyContext::rc_pe_max
float rc_pe_max
Definition: aacpsy.c:184
PSY_3GPP_AH_THR_LONG
#define PSY_3GPP_AH_THR_LONG
Definition: aacpsy.c:81
FFPsyWindowInfo::window_shape
int window_shape
window shape (sine/KBD/whatever)
Definition: psymodel.h:79
PSY_SNR_1DB
#define PSY_SNR_1DB
Definition: aacpsy.c:65
calc_pe_3gpp
static float calc_pe_3gpp(AacPsyBand *band)
Definition: aacpsy.c:558
FFMAX
#define FFMAX(a, b)
Definition: macros.h:47
AacPsyContext::min
float min
minimum allowed PE for bit factor calculation
Definition: aacpsy.c:171
PSY_3GPP_SPEND_SLOPE_L
#define PSY_3GPP_SPEND_SLOPE_L
Definition: aacpsy.c:72
PSY_3GPP_THR_SPREAD_HI
#define PSY_3GPP_THR_SPREAD_HI
constants for 3GPP AAC psychoacoustic model
Definition: aacpsy.c:45
AacPsyContext::fill_level
int fill_level
bit reservoir fill level
Definition: aacpsy.c:169
AVChannelLayout::nb_channels
int nb_channels
Number of channels in this layout.
Definition: channel_layout.h:329
AacPsyCoeffs::spread_hi
float spread_hi[2]
spreading factor for high-to-low threshold spreading in long frame
Definition: aacpsy.c:159
quality
trying all byte sequences megabyte in length and selecting the best looking sequence will yield cases to try But a word about quality
Definition: rate_distortion.txt:12
lame_apply_block_type
static void lame_apply_block_type(AacPsyChannel *ctx, FFPsyWindowInfo *wi, int uselongblock)
Definition: aacpsy.c:910
AacPsyCoeffs
psychoacoustic model frame type-dependent coefficients
Definition: aacpsy.c:155
AacPsyContext::rc_frame_num
int64_t rc_frame_num
frame the rewind state was saved for
Definition: aacpsy.c:181
AVCodecContext::ch_layout
AVChannelLayout ch_layout
Audio channel layout.
Definition: avcodec.h:1055
lame_window_init
static av_cold void lame_window_init(AacPsyContext *ctx, AVCodecContext *avctx)
LAME psy model specific initialization.
Definition: aacpsy.c:279
PsyLamePreset::st_lrm
float st_lrm
short threshold for L, R, and M channels
Definition: aacpsy.c:195
PSY_3GPP_EN_SPREAD_HI_S
#define PSY_3GPP_EN_SPREAD_HI_S
Definition: aacpsy.c:52
PSY_LAME_PE_GAP
#define PSY_LAME_PE_GAP
min consecutive long frames before the relaxation applies
Definition: aacpsy.c:106
PSY_3GPP_SPEND_ADD_L
#define PSY_3GPP_SPEND_ADD_L
Definition: aacpsy.c:74
AVCodecContext::flags
int flags
AV_CODEC_FLAG_*.
Definition: avcodec.h:500
AacPsyCoeffs::barks
float barks
Bark value for each spectral band in long frame.
Definition: aacpsy.c:157
AacPsyChannel::prev_energy_subshort
float prev_energy_subshort[AAC_NUM_BLOCKS_SHORT *PSY_LAME_NUM_SUBBLOCKS]
Definition: aacpsy.c:142
fabsf
static __device__ float fabsf(float a)
Definition: cuda_runtime.h:181
av_unused
#define av_unused
Definition: attributes.h:164
FFPsyWindowInfo
windowing related information
Definition: psymodel.h:77
ATH_ADD
#define ATH_ADD
Definition: aacpsy.c:304
AVFormatContext::bit_rate
int64_t bit_rate
Total stream bitrate in bit/s, 0 if not available.
Definition: avformat.h:1456
AacPsyContext::previous
float previous
allowed PE of the previous frame
Definition: aacpsy.c:173
ff_aac_psy_model
const FFPsyModel ff_aac_psy_model
Definition: aacpsy.c:1080
AacPsyContext::ch
AacPsyChannel * ch
Definition: aacpsy.c:177
FFPsyChannelGroup::num_ch
uint8_t num_ch
number of channels in this group
Definition: psymodel.h:70
AacPsyContext::rc_first_ch
int rc_first_ch
first channel analyzed in that frame
Definition: aacpsy.c:182
PsyLamePreset
LAME psy model preset struct.
Definition: aacpsy.c:190
AacPsyChannel::rc_frame_num
int64_t rc_frame_num
frame this channel last saved rewind state for
Definition: aacpsy.c:148
PSY_3GPP_CLIP_HI_S
#define PSY_3GPP_CLIP_HI_S
Definition: aacpsy.c:79
AacPsyBand
information for single band used by 3GPP TS26.403-inspired psychoacoustic model
Definition: aacpsy.c:117
AVCodecContext::global_quality
int global_quality
Global quality for codecs which cannot change it per frame.
Definition: avcodec.h:1235
AVFormatContext::flags
int flags
Flags modifying the (de)muxer behaviour.
Definition: avformat.h:1465
bitrate
int64_t bitrate
Definition: av1_levels.c:47
g
const char * g
Definition: vf_curves.c:128
EIGHT_SHORT_SEQUENCE
@ EIGHT_SHORT_SEQUENCE
Definition: aac.h:66
state
static struct @595 state
PsyLamePreset::quality
int quality
Quality to map the rest of the values to.
Definition: aacpsy.c:191
AacPsyBand::pe_const
float pe_const
constant part of the PE calculation
Definition: aacpsy.c:124
bits
uint8_t bits
Definition: vp3data.h:128
AacPsyContext
3GPP TS26.403-inspired psychoacoustic model specific data
Definition: aacpsy.c:166
AacPsyChannel::next_attack0_zero
int next_attack0_zero
whether attack[0] of the next frame is zero
Definition: aacpsy.c:144
AacPsyCoeffs::min_snr
float min_snr
minimal SNR
Definition: aacpsy.c:160
ctx
static AVFormatContext * ctx
Definition: movenc.c:49
exp2f
#define exp2f(x)
Definition: libm.h:295
calc_reduction_3gpp
static float calc_reduction_3gpp(float a, float desired_pe, float pe, float active_lines)
Definition: aacpsy.c:581
window_grouping
static const uint8_t window_grouping[9]
window grouping information stored as bits (0 - new group, 1 - group continues)
Definition: aacpsy.c:421
AAC_BLOCK_SIZE_SHORT
#define AAC_BLOCK_SIZE_SHORT
short block size
Definition: aacpsy.c:98
AacPsyContext::pe
struct AacPsyContext::@41 pe
bands
static const float bands[]
Definition: af_superequalizer.c:56
ath
static av_cold float ath(float f, float add)
Calculate ATH value for given frequency.
Definition: aacpsy.c:309
av_mallocz
#define av_mallocz(s)
Definition: tableprint_vlc.h:31
calc_bit_demand
static int calc_bit_demand(AacPsyContext *ctx, float pe, int bits, int size, int short_window)
Definition: aacpsy.c:515
NAN
#define NAN
Definition: mathematics.h:115
PSY_3GPP_AH_THR_SHORT
#define PSY_3GPP_AH_THR_SHORT
Definition: aacpsy.c:82
psy_hp_filter
static void psy_hp_filter(const float *firbuf, float *hpfsmpl, const float *psy_fir_coeffs)
Definition: aacpsy.c:649
if
if(ret)
Definition: filter_design.txt:179
iir_filter
static float iir_filter(int in, float state[2])
IIR filter used in block switching decision.
Definition: aacpsy.c:408
psy_vbr_map
static const PsyLamePreset psy_vbr_map[]
LAME psy model preset table for constant quality.
Definition: aacpsy.c:222
AAC_CUTOFF
#define AAC_CUTOFF(s)
Definition: psymodel.h:41
FFPsyWindowInfo::window_type
int window_type[3]
window type (short/long/transitional, etc.) - current, previous and next
Definition: psymodel.h:78
FFPsyBand::bits
int bits
Definition: psymodel.h:51
fabs
static __device__ float fabs(float a)
Definition: cuda_runtime.h:182
PSY_3GPP_RPEMIN
#define PSY_3GPP_RPEMIN
Definition: aacpsy.c:58
psy_abr_map
static const PsyLamePreset psy_abr_map[]
LAME psy model preset table for ABR.
Definition: aacpsy.c:201
PSY_3GPP_C1
#define PSY_3GPP_C1
Definition: aacpsy.c:61
AVCodecContext::bit_rate
int64_t bit_rate
the average bitrate
Definition: avcodec.h:493
psy_3gpp_end
static av_cold void psy_3gpp_end(FFPsyContext *apc)
Definition: aacpsy.c:902
PSY_3GPP_BITS_TO_PE
#define PSY_3GPP_BITS_TO_PE(bits)
Definition: aacpsy.c:92
FFPsyBand
single band psychoacoustic information
Definition: psymodel.h:50
aac.h
sqrtf
static __device__ float sqrtf(float a)
Definition: cuda_runtime.h:184
FFPsyWindowInfo::grouping
int grouping[8]
window grouping (for e.g. AAC)
Definition: psymodel.h:81
attributes.h
av_clipf
av_clipf
Definition: af_crystalizer.c:122
AacPsyContext::max
float max
maximum allowed PE for bit factor calculation
Definition: aacpsy.c:172
exp
int8_t exp
Definition: eval.c:76
AacPsyChannel::iir_state
float iir_state[2]
hi-pass IIR filter state
Definition: aacpsy.c:137
AacPsyContext::psy_coef
AacPsyCoeffs psy_coef[2][64]
Definition: aacpsy.c:176
AacPsyBand::thr_quiet
float thr_quiet
threshold in quiet
Definition: aacpsy.c:120
AAC_BLOCK_SIZE_LONG
#define AAC_BLOCK_SIZE_LONG
long block size
Definition: aacpsy.c:97
f
f
Definition: af_crystalizer.c:122
ONLY_LONG_SEQUENCE
@ ONLY_LONG_SEQUENCE
Definition: aac.h:64
AacPsyChannel::band
AacPsyBand band[128]
bands information
Definition: aacpsy.c:133
i
#define i(width, name, range_min, range_max)
Definition: cbs_h264.c:63
size
int size
Definition: twinvq_data.h:10344
calc_reduced_thr_3gpp
static float calc_reduced_thr_3gpp(AacPsyBand *band, float min_snr, float reduction)
Definition: aacpsy.c:595
AacPsyCoeffs::ath
float ath
absolute threshold of hearing per bands
Definition: aacpsy.c:156
AacPsyBand::active_lines
float active_lines
number of active spectral lines
Definition: aacpsy.c:122
AAC_NUM_BLOCKS_SHORT
#define AAC_NUM_BLOCKS_SHORT
number of blocks in a short sequence
Definition: aacpsy.c:99
PSY_LAME_FIR_LEN
#define PSY_LAME_FIR_LEN
LAME psy model FIR order.
Definition: aacpsy.c:96
AacPsyChannel::rc_prev_band
AacPsyBand rc_prev_band[128]
prev_band as it was entering the frame
Definition: aacpsy.c:149
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
PSY_3GPP_CLIP_LO_L
#define PSY_3GPP_CLIP_LO_L
Definition: aacpsy.c:76
AacPsyBand::avoid_holes
int avoid_holes
hole avoidance flag
Definition: aacpsy.c:126
PSY_3GPP_THR_SPREAD_LOW
#define PSY_3GPP_THR_SPREAD_LOW
Definition: aacpsy.c:46
PSY_3GPP_SAVE_ADD_S
#define PSY_3GPP_SAVE_ADD_S
Definition: aacpsy.c:71
PSY_3GPP_SPEND_ADD_S
#define PSY_3GPP_SPEND_ADD_S
Definition: aacpsy.c:75
AacPsyChannel::frames_since_short
int frames_since_short
consecutive long frames (pre-echo-aware isolated-onset gate)
Definition: aacpsy.c:145
psy_fir_coeffs
static const float psy_fir_coeffs[]
LAME psy model FIR coefficient table.
Definition: aacpsy.c:240
AacPsyChannel::attack_threshold
float attack_threshold
attack threshold for this channel
Definition: aacpsy.c:141
AacPsyBand::norm_fac
float norm_fac
normalization factor for linearization
Definition: aacpsy.c:125
FFPsyBand::threshold
float threshold
Definition: psymodel.h:53
AacPsyContext::rc_fill_level
int rc_fill_level
Definition: aacpsy.c:183
PSY_3GPP_CLIP_HI_L
#define PSY_3GPP_CLIP_HI_L
Definition: aacpsy.c:78
LONG_STOP_SEQUENCE
@ LONG_STOP_SEQUENCE
Definition: aac.h:67
s
uint8_t s
Definition: llvidencdsp.c:39
atanf
#define atanf(x)
Definition: libm.h:42
exp2
#define exp2(x)
Definition: libm.h:290
FFMIN
#define FFMIN(a, b)
Definition: macros.h:49
PSY_3GPP_RPELEV
#define PSY_3GPP_RPELEV
Definition: aacpsy.c:59
AacPsyBand::pe
float pe
perceptual entropy
Definition: aacpsy.c:123
av_calloc
void * av_calloc(size_t nmemb, size_t size)
Definition: mem.c:264
AacPsyBand::energy
float energy
band energy
Definition: aacpsy.c:118
avcodec.h
FFPsyChannelGroup
psychoacoustic information for an arbitrary group of channels
Definition: psymodel.h:68
AacPsyChannel::next_window_seq
enum WindowSequence next_window_seq
window sequence to be used in the next frame
Definition: aacpsy.c:139
AacPsyChannel::win_energy
float win_energy
sliding average of channel energy
Definition: aacpsy.c:136
ret
ret
Definition: filter_design.txt:187
AacPsyChannel
single/pair channel context for psychoacoustic model
Definition: aacpsy.c:132
AacPsyContext::correction
float correction
PE correction factor.
Definition: aacpsy.c:174
FFPsyContext::model_priv_data
void * model_priv_data
psychoacoustic model implementation private data
Definition: psymodel.h:108
LONG_START_SEQUENCE
@ LONG_START_SEQUENCE
Definition: aac.h:65
PSY_3GPP_AH_NONE
@ PSY_3GPP_AH_NONE
Definition: aacpsy.c:87
PSY_3GPP_SAVE_SLOPE_S
#define PSY_3GPP_SAVE_SLOPE_S
Definition: aacpsy.c:69
PSY_3GPP_EN_SPREAD_HI_L1
#define PSY_3GPP_EN_SPREAD_HI_L1
Definition: aacpsy.c:48
AacPsyChannel::next_grouping
uint8_t next_grouping
stored grouping scheme for the next frame (in case of 8 short window sequence)
Definition: aacpsy.c:138
FFPsyBand::energy
float energy
Definition: psymodel.h:52
AVCodecContext
main external API structure.
Definition: avcodec.h:443
PSY_3GPP_AH_ACTIVE
@ PSY_3GPP_AH_ACTIVE
Definition: aacpsy.c:89
PSY_LAME_NUM_SUBBLOCKS
#define PSY_LAME_NUM_SUBBLOCKS
Number of sub-blocks in each short block.
Definition: aacpsy.c:100
PSY_SNR_25DB
#define PSY_SNR_25DB
Definition: aacpsy.c:66
AacPsyContext::global_quality
float global_quality
normalized global quality taken from avctx
Definition: aacpsy.c:178
psy_3gpp_analyze_channel
static void psy_3gpp_analyze_channel(FFPsyContext *ctx, int channel, const float *coefs, const FFPsyWindowInfo *wi)
Calculate band thresholds as suggested in 3GPP TS26.403.
Definition: aacpsy.c:669
FFPsyModel
codec-specific psychoacoustic model implementation
Definition: psymodel.h:114
Windows::Graphics::DirectX::Direct3D11::p
IDirect3DDxgiInterfaceAccess _COM_Outptr_ void ** p
Definition: vsrc_gfxcapture_winrt.hpp:53
AacPsyContext::frame_bits
int frame_bits
average bits per frame
Definition: aacpsy.c:168
ffmath.h
ff_psy_find_group
FFPsyChannelGroup * ff_psy_find_group(FFPsyContext *ctx, int channel)
Determine what group a channel belongs to.
Definition: psymodel.c:67
psy_3gpp_analyze
static void psy_3gpp_analyze(FFPsyContext *ctx, int channel, const float **coeffs, const FFPsyWindowInfo *wi)
Definition: aacpsy.c:865
PSY_3GPP_C3
#define PSY_3GPP_C3
Definition: aacpsy.c:63
mem.h
PSY_3GPP_EN_SPREAD_LOW_L
#define PSY_3GPP_EN_SPREAD_LOW_L
Definition: aacpsy.c:54
PSY_3GPP_AH_INACTIVE
@ PSY_3GPP_AH_INACTIVE
Definition: aacpsy.c:88
AacPsyContext::rc_pe_previous
float rc_pe_previous
Definition: aacpsy.c:184
w
uint8_t w
Definition: llvidencdsp.c:39
AacPsyContext::chan_bitrate
int chan_bitrate
bitrate per channel
Definition: aacpsy.c:167
PSY_3GPP_SAVE_SLOPE_L
#define PSY_3GPP_SAVE_SLOPE_L
Definition: aacpsy.c:68
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:35
PSY_3GPP_C2
#define PSY_3GPP_C2
Definition: aacpsy.c:62
coeff
static const double coeff[2][5]
Definition: vf_owdenoise.c:80
PSY_3GPP_SPEND_SLOPE_S
#define PSY_3GPP_SPEND_SLOPE_S
Definition: aacpsy.c:73
WindowSequence
WindowSequence
Definition: aac.h:63
FFPsyBand::spread
float spread
Definition: psymodel.h:54
FF_QP2LAMBDA
#define FF_QP2LAMBDA
factor to convert from H.263 QP to lambda
Definition: avutil.h:226
PSY_3GPP_EN_SPREAD_LOW_S
#define PSY_3GPP_EN_SPREAD_LOW_S
Definition: aacpsy.c:56
AacPsyChannel::prev_attack
int prev_attack
attack value for the last short block in the previous sequence
Definition: aacpsy.c:143
FFPsyContext
context used by psychoacoustic model
Definition: psymodel.h:89
AacPsyChannel::prev_band
AacPsyBand prev_band[128]
bands information from the previous frame
Definition: aacpsy.c:134
psymodel.h
channel
channel
Definition: ebur128.h:39
FFPsyWindowInfo::num_windows
int num_windows
number of windows in a frame
Definition: psymodel.h:80
PSY_3GPP_SAVE_ADD_L
#define PSY_3GPP_SAVE_ADD_L
Definition: aacpsy.c:70