FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
aacenc_is.c
Go to the documentation of this file.
1 /*
2  * AAC encoder intensity stereo
3  * Copyright (C) 2015 Rostislav Pehlivanov
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 /**
23  * @file
24  * AAC encoder Intensity Stereo
25  * @author Rostislav Pehlivanov ( atomnuker gmail com )
26  */
27 
28 #include "aacenc.h"
29 #include "aacenc_utils.h"
30 #include "aacenc_is.h"
31 #include "aacenc_quantization.h"
32 
34  int start, int w, int g, float ener0,
35  float ener1, float ener01,
36  int use_pcoeffs, int phase)
37 {
38  int i, w2;
39  SingleChannelElement *sce0 = &cpe->ch[0];
40  SingleChannelElement *sce1 = &cpe->ch[1];
41  float *L = use_pcoeffs ? sce0->pcoeffs : sce0->coeffs;
42  float *R = use_pcoeffs ? sce1->pcoeffs : sce1->coeffs;
43  float *L34 = &s->scoefs[256*0], *R34 = &s->scoefs[256*1];
44  float *IS = &s->scoefs[256*2], *I34 = &s->scoefs[256*3];
45  float dist1 = 0.0f, dist2 = 0.0f;
46  struct AACISError is_error = {0};
47 
48  for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
49  FFPsyBand *band0 = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g];
50  FFPsyBand *band1 = &s->psy.ch[s->cur_channel+1].psy_bands[(w+w2)*16+g];
51  int is_band_type, is_sf_idx = FFMAX(1, sce0->sf_idx[(w+w2)*16+g]-4);
52  float e01_34 = phase*pow(ener1/ener0, 3.0/4.0);
53  float maxval, dist_spec_err = 0.0f;
54  float minthr = FFMIN(band0->threshold, band1->threshold);
55  for (i = 0; i < sce0->ics.swb_sizes[g]; i++)
56  IS[i] = (L[start+(w+w2)*128+i] + phase*R[start+(w+w2)*128+i])*sqrt(ener0/ener01);
57  abs_pow34_v(L34, &L[start+(w+w2)*128], sce0->ics.swb_sizes[g]);
58  abs_pow34_v(R34, &R[start+(w+w2)*128], sce0->ics.swb_sizes[g]);
59  abs_pow34_v(I34, IS, sce0->ics.swb_sizes[g]);
60  maxval = find_max_val(1, sce0->ics.swb_sizes[g], I34);
61  is_band_type = find_min_book(maxval, is_sf_idx);
62  dist1 += quantize_band_cost(s, &L[start + (w+w2)*128], L34,
63  sce0->ics.swb_sizes[g],
64  sce0->sf_idx[(w+w2)*16+g],
65  sce0->band_type[(w+w2)*16+g],
66  s->lambda / band0->threshold, INFINITY, NULL, 0);
67  dist1 += quantize_band_cost(s, &R[start + (w+w2)*128], R34,
68  sce1->ics.swb_sizes[g],
69  sce1->sf_idx[(w+w2)*16+g],
70  sce1->band_type[(w+w2)*16+g],
71  s->lambda / band1->threshold, INFINITY, NULL, 0);
72  dist2 += quantize_band_cost(s, IS, I34, sce0->ics.swb_sizes[g],
73  is_sf_idx, is_band_type,
74  s->lambda / minthr, INFINITY, NULL, 0);
75  for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
76  dist_spec_err += (L34[i] - I34[i])*(L34[i] - I34[i]);
77  dist_spec_err += (R34[i] - I34[i]*e01_34)*(R34[i] - I34[i]*e01_34);
78  }
79  dist_spec_err *= s->lambda / minthr;
80  dist2 += dist_spec_err;
81  }
82 
83  is_error.pass = dist2 <= dist1;
84  is_error.phase = phase;
85  is_error.error = fabsf(dist1 - dist2);
86  is_error.dist1 = dist1;
87  is_error.dist2 = dist2;
88 
89  return is_error;
90 }
91 
93 {
94  SingleChannelElement *sce0 = &cpe->ch[0];
95  SingleChannelElement *sce1 = &cpe->ch[1];
96  int start = 0, count = 0, w, w2, g, i;
97  const float freq_mult = avctx->sample_rate/(1024.0f/sce0->ics.num_windows)/2.0f;
98 
99  if (!cpe->common_window)
100  return;
101 
102  for (w = 0; w < sce0->ics.num_windows; w += sce0->ics.group_len[w]) {
103  start = 0;
104  for (g = 0; g < sce0->ics.num_swb; g++) {
105  if (start*freq_mult > INT_STEREO_LOW_LIMIT*(s->lambda/170.0f) &&
106  cpe->ch[0].band_type[w*16+g] != NOISE_BT && !cpe->ch[0].zeroes[w*16+g] &&
107  cpe->ch[1].band_type[w*16+g] != NOISE_BT && !cpe->ch[1].zeroes[w*16+g]) {
108  float ener0 = 0.0f, ener1 = 0.0f, ener01 = 0.0f;
109  struct AACISError ph_err1, ph_err2, *erf;
110  for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
111  for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
112  float coef0 = sce0->pcoeffs[start+(w+w2)*128+i];
113  float coef1 = sce1->pcoeffs[start+(w+w2)*128+i];
114  ener0 += coef0*coef0;
115  ener1 += coef1*coef1;
116  ener01 += (coef0 + coef1)*(coef0 + coef1);
117  }
118  }
119  ph_err1 = ff_aac_is_encoding_err(s, cpe, start, w, g,
120  ener0, ener1, ener01, 0, -1);
121  ph_err2 = ff_aac_is_encoding_err(s, cpe, start, w, g,
122  ener0, ener1, ener01, 0, +1);
123  erf = ph_err1.error < ph_err2.error ? &ph_err1 : &ph_err2;
124  if (erf->pass) {
125  cpe->is_mask[w*16+g] = 1;
126  cpe->ch[0].is_ener[w*16+g] = sqrt(ener0/ener01);
127  cpe->ch[1].is_ener[w*16+g] = ener0/ener1;
128  cpe->ch[1].band_type[w*16+g] = erf->phase ? INTENSITY_BT : INTENSITY_BT2;
129  count++;
130  }
131  }
132  start += sce0->ics.swb_sizes[g];
133  }
134  }
135  cpe->is_mode = !!count;
136 }
#define NULL
Definition: coverity.c:32
const char * s
Definition: avisynth_c.h:631
float dist2
Definition: aacenc_is.h:41
static void abs_pow34_v(float *out, const float *in, const int size)
Definition: aacenc_utils.h:39
float pcoeffs[1024]
coefficients for IMDCT, pristine
Definition: aac.h:257
const char * g
Definition: vf_curves.c:108
int common_window
Set if channels share a common 'IndividualChannelStream' in bitstream.
Definition: aac.h:273
float lambda
Definition: aacenc.h:101
Spectral data are scaled white noise not coded in the bitstream.
Definition: aac.h:87
AAC encoder quantizer.
AAC encoder context.
Definition: aacenc.h:80
SingleChannelElement ch[2]
Definition: aac.h:279
int pass
Definition: aacenc_is.h:37
Scalefactor data are intensity stereo positions (in phase).
Definition: aac.h:89
single band psychoacoustic information
Definition: psymodel.h:37
float coeffs[1024]
coefficients for IMDCT, maybe processed
Definition: aac.h:258
float dist1
Definition: aacenc_is.h:40
float is_ener[128]
Intensity stereo pos (used by encoder)
Definition: aac.h:255
GLsizei count
Definition: opengl_enc.c:109
int num_swb
number of scalefactor window bands
Definition: aac.h:180
#define FFMAX(a, b)
Definition: common.h:79
float error
Definition: aacenc_is.h:39
#define FFMIN(a, b)
Definition: common.h:81
AAC encoder Intensity Stereo.
#define L(x)
Definition: vp56_arith.h:36
int phase
Definition: aacenc_is.h:38
#define INFINITY
Definition: math.h:27
void ff_aac_search_for_is(AACEncContext *s, AVCodecContext *avctx, ChannelElement *cpe)
Definition: aacenc_is.c:92
static int find_min_book(float maxval, int sf)
Definition: aacenc_utils.h:86
int sample_rate
samples per second
Definition: avcodec.h:2262
main external API structure.
Definition: avcodec.h:1502
IndividualChannelStream ics
Definition: aac.h:246
uint8_t group_len[8]
Definition: aac.h:176
const uint8_t * swb_sizes
table of scalefactor band sizes for a particular window
Definition: aac.h:179
uint8_t zeroes[128]
band is not coded (used by encoder)
Definition: aac.h:254
int sf_idx[128]
scalefactor indices (used by encoder)
Definition: aac.h:253
uint8_t is_mode
Set if any bands have been encoded using intensity stereo (used by encoder)
Definition: aac.h:275
Scalefactor data are intensity stereo positions (out of phase).
Definition: aac.h:88
AAC encoder utilities.
Single Channel Element - used for both SCE and LFE elements.
Definition: aac.h:245
struct AACISError ff_aac_is_encoding_err(AACEncContext *s, ChannelElement *cpe, int start, int w, int g, float ener0, float ener1, float ener01, int use_pcoeffs, int phase)
Definition: aacenc_is.c:33
channel element - generic struct for SCE/CPE/CCE/LFE
Definition: aac.h:270
enum BandType band_type[128]
band types
Definition: aac.h:249
#define INT_STEREO_LOW_LIMIT
Frequency in Hz for lower limit of intensity stereo.
Definition: aacenc_is.h:34
static float find_max_val(int group_len, int swb_size, const float *scaled)
Definition: aacenc_utils.h:74
void INT64 start
Definition: avisynth_c.h:553
Definition: vf_geq.c:46
uint8_t is_mask[128]
Set if intensity stereo is used (used by encoder)
Definition: aac.h:277
float threshold
Definition: psymodel.h:40
static float quantize_band_cost(struct AACEncContext *s, const float *in, const float *scaled, int size, int scale_idx, int cb, const float lambda, const float uplim, int *bits, int rtz)