FFmpeg
enc_psy.c
Go to the documentation of this file.
1 /*
2  * Opus encoder
3  * Copyright (c) 2017 Rostislav Pehlivanov <atomnuker@gmail.com>
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 #include <float.h>
23 
24 #include "libavutil/mem.h"
25 #include "enc_psy.h"
26 #include "celt.h"
27 #include "pvq.h"
28 #include "tab.h"
30 
31 static float pvq_band_cost(CeltPVQ *pvq, CeltFrame *f, OpusRangeCoder *rc, int band,
32  float *bits, float lambda)
33 {
34  int i, b = 0;
35  uint32_t cm[2] = { (1 << f->blocks) - 1, (1 << f->blocks) - 1 };
36  const int band_size = ff_celt_freq_range[band] << f->size;
37  float buf[176 * 2], lowband_scratch[176], norm1[176], norm2[176];
38  float dist, cost, err_x = 0.0f, err_y = 0.0f;
39  float *X = buf;
40  float *X_orig = f->block[0].coeffs + (ff_celt_freq_bands[band] << f->size);
41  float *Y = (f->channels == 2) ? &buf[176] : NULL;
42  float *Y_orig = f->block[1].coeffs + (ff_celt_freq_bands[band] << f->size);
44 
45  memcpy(X, X_orig, band_size*sizeof(float));
46  if (Y)
47  memcpy(Y, Y_orig, band_size*sizeof(float));
48 
49  f->remaining2 = ((f->framebits << 3) - f->anticollapse_needed) - opus_rc_tell_frac(rc) - 1;
50  if (band <= f->coded_bands - 1) {
51  int curr_balance = f->remaining / FFMIN(3, f->coded_bands - band);
52  b = av_clip_uintp2(FFMIN(f->remaining2 + 1, f->pulses[band] + curr_balance), 14);
53  }
54 
55  if (f->dual_stereo) {
56  pvq->quant_band(pvq, f, rc, band, X, NULL, band_size, b / 2, f->blocks, NULL,
57  f->size, norm1, 0, 1.0f, lowband_scratch, cm[0]);
58 
59  pvq->quant_band(pvq, f, rc, band, Y, NULL, band_size, b / 2, f->blocks, NULL,
60  f->size, norm2, 0, 1.0f, lowband_scratch, cm[1]);
61  } else {
62  pvq->quant_band(pvq, f, rc, band, X, Y, band_size, b, f->blocks, NULL, f->size,
63  norm1, 0, 1.0f, lowband_scratch, cm[0] | cm[1]);
64  }
65 
66  for (i = 0; i < band_size; i++) {
67  err_x += (X[i] - X_orig[i])*(X[i] - X_orig[i]);
68  if (Y)
69  err_y += (Y[i] - Y_orig[i])*(Y[i] - Y_orig[i]);
70  }
71 
72  dist = sqrtf(err_x) + sqrtf(err_y);
73  cost = OPUS_RC_CHECKPOINT_BITS(rc)/8.0f;
74  *bits += cost;
75 
77 
78  return lambda*dist*cost;
79 }
80 
81 /* Populate metrics without taking into consideration neighbouring steps */
83 {
84  int silence = 0, ch, i, j;
85  /* The MDCT analysis covers 2 * OPUS_BLOCK_SIZE(bsize_analysis) samples.
86  * Each bufqueue entry holds avctx->frame_size samples (120 historically,
87  * 960 after c3aea7628c for default settings). steps_per_half is how many
88  * bufqueue entries fill one MDCT half-window.
89  *
90  * bufqueue[0] is reserved for the previous packet's overlap (initially the
91  * empty padding frame). The audio that step N analyzes starts at
92  * bufqueue[index + 1]; bufqueue[index] is its preceding overlap. */
93  const int half_samples = OPUS_BLOCK_SIZE(s->bsize_analysis);
94  const int step_samples = s->avctx->frame_size;
95  const int steps_per_half = half_samples / step_samples;
96  OpusPsyStep *st = s->steps[index];
97 
98  st->index = index;
99 
100  for (ch = 0; ch < s->avctx->ch_layout.nb_channels; ch++) {
101  memset(s->scratch, 0, sizeof(float) * (half_samples << 1));
102 
103  for (i = 1; i <= FFMIN(steps_per_half, index + 1); i++) {
104  const int offset = (steps_per_half - i) * step_samples;
105  AVFrame *cur = ff_bufqueue_peek(s->bufqueue, index + 1 - i);
106  memcpy(&s->scratch[offset], cur->extended_data[ch], cur->nb_samples*sizeof(float));
107  }
108  for (i = 0; i < steps_per_half; i++) {
109  if (index + 1 + i >= s->bufqueue->available)
110  break;
111  const int offset = (steps_per_half + i) * step_samples;
112  AVFrame *cur = ff_bufqueue_peek(s->bufqueue, index + 1 + i);
113  memcpy(&s->scratch[offset], cur->extended_data[ch], cur->nb_samples*sizeof(float));
114  }
115 
116  s->dsp->vector_fmul(s->scratch, s->scratch, s->window[s->bsize_analysis],
117  (OPUS_BLOCK_SIZE(s->bsize_analysis) << 1));
118 
119  s->mdct_fn[s->bsize_analysis](s->mdct[s->bsize_analysis], st->coeffs[ch],
120  s->scratch, sizeof(float));
121 
122  for (i = 0; i < CELT_MAX_BANDS; i++)
123  st->bands[ch][i] = &st->coeffs[ch][ff_celt_freq_bands[i] << s->bsize_analysis];
124  }
125 
126  for (ch = 0; ch < s->avctx->ch_layout.nb_channels; ch++) {
127  for (i = 0; i < CELT_MAX_BANDS; i++) {
128  float avg_c_s, energy = 0.0f, dist_dev = 0.0f;
129  const int range = ff_celt_freq_range[i] << s->bsize_analysis;
130  const float *coeffs = st->bands[ch][i];
131  for (j = 0; j < range; j++)
132  energy += coeffs[j]*coeffs[j];
133 
134  st->energy[ch][i] += sqrtf(energy);
135  silence |= !!st->energy[ch][i];
136  avg_c_s = energy / range;
137 
138  for (j = 0; j < range; j++) {
139  const float c_s = coeffs[j]*coeffs[j];
140  dist_dev += (avg_c_s - c_s)*(avg_c_s - c_s);
141  }
142 
143  st->tone[ch][i] += sqrtf(dist_dev);
144  }
145  }
146 
147  st->silence = !silence;
148 
149  if (s->avctx->ch_layout.nb_channels > 1) {
150  for (i = 0; i < CELT_MAX_BANDS; i++) {
151  float incompat = 0.0f;
152  const float *coeffs1 = st->bands[0][i];
153  const float *coeffs2 = st->bands[1][i];
154  const int range = ff_celt_freq_range[i] << s->bsize_analysis;
155  for (j = 0; j < range; j++)
156  incompat += (coeffs1[j] - coeffs2[j])*(coeffs1[j] - coeffs2[j]);
157  st->stereo[i] = sqrtf(incompat);
158  }
159  }
160 
161  for (ch = 0; ch < s->avctx->ch_layout.nb_channels; ch++) {
162  for (i = 0; i < CELT_MAX_BANDS; i++) {
163  OpusBandExcitation *ex = &s->ex[ch][i];
164  float bp_e = bessel_filter(&s->bfilter_lo[ch][i], st->energy[ch][i]);
165  bp_e = bessel_filter(&s->bfilter_hi[ch][i], bp_e);
166  bp_e *= bp_e;
167  if (bp_e > ex->excitation) {
168  st->change_amp[ch][i] = bp_e - ex->excitation;
169  st->total_change += st->change_amp[ch][i];
170  ex->excitation = ex->excitation_init = bp_e;
171  ex->excitation_dist = 0.0f;
172  }
173  if (ex->excitation > 0.0f) {
174  ex->excitation -= av_clipf((1/expf(ex->excitation_dist)), ex->excitation_init/20, ex->excitation_init/1.09);
175  ex->excitation = FFMAX(ex->excitation, 0.0f);
176  ex->excitation_dist += 1.0f;
177  }
178  }
179  }
180 }
181 
182 static void search_for_change_points(OpusPsyContext *s, float tgt_change,
183  int offset_s, int offset_e, int resolution,
184  int level)
185 {
186  int i;
187  float c_change = 0.0f;
188  if ((offset_e - offset_s) <= resolution)
189  return;
190  for (i = offset_s; i < offset_e; i++) {
191  c_change += s->steps[i]->total_change;
192  if (c_change > tgt_change)
193  break;
194  }
195  if (i == offset_e)
196  return;
197  search_for_change_points(s, tgt_change / 2.0f, offset_s, i + 0, resolution, level + 1);
198  s->inflection_points[s->inflection_points_count++] = i;
199  search_for_change_points(s, tgt_change / 2.0f, i + 1, offset_e, resolution, level + 1);
200 }
201 
203 {
204  /* buffered_steps and silent_frames count psy steps, each of which is
205  * avctx->frame_size samples (120 historically, up to 960 after
206  * c3aea7628c). The CELT framesize fsize we pick must consume at least
207  * one psy step per emitted packet, otherwise postencode_update would
208  * compute steps_out = 0 and the psy state would stall while bufqueue
209  * and afq drain. */
210  const int step_samples = s->avctx->frame_size;
211  int fsize, silent_frames;
212 
213  for (silent_frames = 0; silent_frames < s->buffered_steps; silent_frames++)
214  if (!s->steps[silent_frames]->silence)
215  break;
216  if (--silent_frames < 0)
217  return 0;
218 
220  const int packet_samples = OPUS_BLOCK_SIZE(fsize);
221  const int steps_per_packet = packet_samples / step_samples;
222 
223  if (steps_per_packet < 1 || silent_frames < steps_per_packet)
224  continue;
225  /* 48 * CELT_SHORT_BLOCKSIZE = 5760 samples = 120 ms; matches the
226  * historical (48 >> fsize) cap for frame_size = 120. */
227  s->p.frames = FFMIN(silent_frames / steps_per_packet,
228  (48 * CELT_SHORT_BLOCKSIZE) / packet_samples);
229  s->p.framesize = fsize;
230  return 1;
231  }
232 
233  return 0;
234 }
235 
236 /* Main function which decides frame size and frames per current packet */
238 {
239  int max_delay_samples = (s->options->max_delay_ms*s->avctx->sample_rate)/1000;
240  int max_bsize = FFMIN(OPUS_SAMPLES_TO_BLOCK_SIZE(max_delay_samples), CELT_BLOCK_960);
241 
242  /* These don't change for now */
243  s->p.mode = OPUS_MODE_CELT;
244  s->p.bandwidth = OPUS_BANDWIDTH_FULLBAND;
245 
246  /* Flush silent frames ASAP */
247  if (s->steps[0]->silence && flush_silent_frames(s))
248  return;
249 
250  s->p.framesize = FFMIN(max_bsize, CELT_BLOCK_960);
251  s->p.frames = 1;
252 }
253 
255 {
256  int i;
257  float total_energy_change = 0.0f;
258 
259  if (s->buffered_steps < s->max_steps && !s->eof) {
260  /* awin counts how many bufqueue entries fill one MDCT half-window.
261  * With frame_size=120 this is 8 (matches the historical 1 << bsize_analysis);
262  * with frame_size=960 it is 1, so we collect every call. */
263  const int awin = OPUS_BLOCK_SIZE(s->bsize_analysis) / s->avctx->frame_size;
264  if (++s->steps_to_process >= awin) {
265  step_collect_psy_metrics(s, s->buffered_steps - awin + 1);
266  s->steps_to_process = 0;
267  }
268  if ((++s->buffered_steps) < s->max_steps)
269  return 1;
270  }
271 
272  for (i = 0; i < s->buffered_steps; i++)
273  total_energy_change += s->steps[i]->total_change;
274 
275  search_for_change_points(s, total_energy_change / 2.0f, 0,
276  s->buffered_steps, 1, 0);
277 
279 
280  p->frames = s->p.frames;
281  p->framesize = s->p.framesize;
282  p->mode = s->p.mode;
283  p->bandwidth = s->p.bandwidth;
284 
285  return 0;
286 }
287 
289 {
290  int i, neighbouring_points = 0, start_offset = 0;
291  int steps_per_frame = OPUS_BLOCK_SIZE(s->p.framesize) / s->avctx->frame_size;
292  int step_offset = steps_per_frame*index;
293  int silence = 1;
294 
295  f->start_band = (s->p.mode == OPUS_MODE_HYBRID) ? 17 : 0;
296  f->end_band = ff_celt_band_end[s->p.bandwidth];
297  f->channels = s->avctx->ch_layout.nb_channels;
298  f->size = s->p.framesize;
299 
300  for (i = 0; i < steps_per_frame; i++)
301  silence &= s->steps[index * steps_per_frame + i]->silence;
302 
303  /* If we reach EOF with fewer collected psy steps than this packet wants
304  * to encode, the slots beyond buffered_steps were zeroed by the previous
305  * postencode_update and contain no valid analysis data. Encoding garbage
306  * with the full rate budget can overrun the range coder buffer (rng_bytes
307  * exceeds the per-frame size passed to ff_opus_rc_enc_end), so force a
308  * silent packet instead. */
309  if (s->eof && step_offset >= s->buffered_steps)
310  silence = 1;
311 
312  f->silence = silence;
313  if (f->silence) {
314  f->framebits = 0; /* Otherwise the silence flag eats up 16(!) bits */
315  return;
316  }
317 
318  for (i = 0; i < s->inflection_points_count; i++) {
319  if (s->inflection_points[i] >= step_offset) {
320  start_offset = i;
321  break;
322  }
323  }
324 
325  for (i = start_offset; i < FFMIN(steps_per_frame, s->inflection_points_count - start_offset); i++) {
326  if (s->inflection_points[i] < (step_offset + steps_per_frame)) {
327  neighbouring_points++;
328  }
329  }
330 
331  /* Transient flagging */
332  f->transient = neighbouring_points > 0;
333  f->blocks = f->transient ? OPUS_BLOCK_SIZE(s->p.framesize)/CELT_OVERLAP : 1;
334 
335  /* Some sane defaults */
336  f->pfilter = 0;
337  f->pf_gain = 0.5f;
338  f->pf_octave = 2;
339  f->pf_period = 1;
340  f->pf_tapset = 2;
341 
342  /* More sane defaults */
343  f->tf_select = 0;
344  f->anticollapse = 1;
345  f->alloc_trim = 5;
346  f->skip_band_floor = f->end_band;
347  f->intensity_stereo = f->end_band;
348  f->dual_stereo = 0;
349  f->spread = CELT_SPREAD_NORMAL;
350  memset(f->tf_change, 0, sizeof(int)*CELT_MAX_BANDS);
351  memset(f->alloc_boost, 0, sizeof(int)*CELT_MAX_BANDS);
352 }
353 
355  CeltFrame *f_out)
356 {
357  int i, f, ch;
358  int frame_size = OPUS_BLOCK_SIZE(s->p.framesize);
359  int steps_per_frame = frame_size / s->avctx->frame_size;
360  float rate, frame_bits = 0;
361 
362  /* Used for the global ROTATE flag */
363  float tonal = 0.0f;
364 
365  /* Pseudo-weights */
366  float band_score[CELT_MAX_BANDS] = { 0 };
367  float max_score = 1.0f;
368 
369  /* Pass one - one loop around each band, computing unquant stuff */
370  for (i = 0; i < CELT_MAX_BANDS; i++) {
371  float weight = 0.0f;
372  float tonal_contrib = 0.0f;
373  for (f = 0; f < steps_per_frame; f++) {
374  weight = start[f]->stereo[i];
375  for (ch = 0; ch < s->avctx->ch_layout.nb_channels; ch++) {
376  weight += start[f]->change_amp[ch][i] + start[f]->tone[ch][i] + start[f]->energy[ch][i];
377  tonal_contrib += start[f]->tone[ch][i];
378  }
379  }
380  tonal += tonal_contrib;
381  band_score[i] = weight;
382  }
383 
384  tonal /= (float)CELT_MAX_BANDS;
385 
386  for (i = 0; i < CELT_MAX_BANDS; i++) {
387  if (band_score[i] > max_score)
388  max_score = band_score[i];
389  }
390 
391  for (i = 0; i < CELT_MAX_BANDS; i++) {
392  f_out->alloc_boost[i] = (int)((band_score[i]/max_score)*3.0f);
393  //TODO: implements frame_bits adjustment.
394  }
395 
396  tonal /= 1333136.0f;
397  f_out->spread = av_clip_uintp2(lrintf(tonal), 2);
398 
399  rate = ((float)s->avctx->bit_rate) + frame_bits*frame_size*16;
400  rate *= s->lambda;
401  rate /= s->avctx->sample_rate/frame_size;
402 
403  f_out->framebits = lrintf(rate);
404  f_out->framebits = FFMIN(f_out->framebits, OPUS_MAX_FRAME_SIZE * 8);
405  f_out->framebits = FFALIGN(f_out->framebits, 8);
406 }
407 
408 static int bands_dist(OpusPsyContext *s, CeltFrame *f, float *total_dist)
409 {
410  int i, tdist = 0.0f;
411  OpusRangeCoder dump;
412 
413  ff_opus_rc_enc_init(&dump);
414  ff_celt_bitalloc(f, &dump, 1);
415 
416  for (i = 0; i < CELT_MAX_BANDS; i++) {
417  float bits = 0.0f;
418  float dist = pvq_band_cost(f->pvq, f, &dump, i, &bits, s->lambda);
419  tdist += dist;
420  }
421 
422  *total_dist = tdist;
423 
424  return 0;
425 }
426 
428 {
429  float td1, td2;
430  f->dual_stereo = 0;
431 
432  if (s->avctx->ch_layout.nb_channels < 2)
433  return;
434 
435  bands_dist(s, f, &td1);
436  f->dual_stereo = 1;
437  bands_dist(s, f, &td2);
438 
439  f->dual_stereo = td2 < td1;
440  s->dual_stereo_used += td2 < td1;
441 }
442 
444 {
445  int i, best_band = CELT_MAX_BANDS - 1;
446  float dist, best_dist = FLT_MAX;
447  /* TODO: fix, make some heuristic up here using the lambda value */
448  float end_band = 0;
449 
450  if (s->avctx->ch_layout.nb_channels < 2)
451  return;
452 
453  for (i = f->end_band; i >= end_band; i--) {
454  f->intensity_stereo = i;
455  bands_dist(s, f, &dist);
456  if (best_dist > dist) {
457  best_dist = dist;
458  best_band = i;
459  }
460  }
461 
462  f->intensity_stereo = best_band;
463  s->avg_is_band = (s->avg_is_band + f->intensity_stereo)/2.0f;
464 }
465 
467 {
468  int i, j, k, cway, config[2][CELT_MAX_BANDS] = { { 0 } };
469  int steps_per_frame = OPUS_BLOCK_SIZE(f->size) / s->avctx->frame_size;
470  float score[2] = { 0 };
471 
472  for (cway = 0; cway < 2; cway++) {
473  int mag[2];
474  int base = f->transient ? 120 : 960;
475 
476  for (i = 0; i < 2; i++) {
477  int c = ff_celt_tf_select[f->size][f->transient][cway][i];
478  mag[i] = c < 0 ? base >> FFABS(c) : base << FFABS(c);
479  }
480 
481  for (i = 0; i < CELT_MAX_BANDS; i++) {
482  float iscore0 = 0.0f;
483  float iscore1 = 0.0f;
484  for (j = 0; j < steps_per_frame; j++) {
485  for (k = 0; k < s->avctx->ch_layout.nb_channels; k++) {
486  iscore0 += start[j]->tone[k][i]*start[j]->change_amp[k][i]/mag[0];
487  iscore1 += start[j]->tone[k][i]*start[j]->change_amp[k][i]/mag[1];
488  }
489  }
490  config[cway][i] = FFABS(iscore0 - 1.0f) < FFABS(iscore1 - 1.0f);
491  score[cway] += config[cway][i] ? iscore1 : iscore0;
492  }
493  }
494 
495  f->tf_select = score[0] < score[1];
496  memcpy(f->tf_change, config[f->tf_select], sizeof(int)*CELT_MAX_BANDS);
497 
498  return 0;
499 }
500 
502 {
503  int start_transient_flag = f->transient;
504  int steps_per_frame = OPUS_BLOCK_SIZE(s->p.framesize) / s->avctx->frame_size;
505  OpusPsyStep **start = &s->steps[index * steps_per_frame];
506 
507  if (f->silence)
508  return 0;
509 
510  celt_gauge_psy_weight(s, start, f);
513  celt_search_for_tf(s, start, f);
514 
515  if (f->transient != start_transient_flag) {
516  f->blocks = f->transient ? OPUS_BLOCK_SIZE(s->p.framesize)/CELT_OVERLAP : 1;
517  return 1;
518  }
519 
520  return 0;
521 }
522 
524 {
525  int i, frame_size = OPUS_BLOCK_SIZE(s->p.framesize);
526  int steps_out = s->p.frames*(frame_size/s->avctx->frame_size);
527  void *tmp[FF_BUFQUEUE_SIZE];
528  float ideal_fbits;
529 
530  for (i = 0; i < steps_out; i++)
531  memset(s->steps[i], 0, sizeof(OpusPsyStep));
532 
533  for (i = 0; i < s->max_steps; i++)
534  tmp[i] = s->steps[i];
535 
536  for (i = 0; i < s->max_steps; i++) {
537  const int i_new = i - steps_out;
538  s->steps[i_new < 0 ? s->max_steps + i_new : i_new] = tmp[i];
539  }
540 
541  for (i = steps_out; i < s->buffered_steps; i++)
542  s->steps[i]->index -= steps_out;
543 
544  ideal_fbits = s->avctx->bit_rate/(s->avctx->sample_rate/frame_size);
545 
546  for (i = 0; i < s->p.frames; i++) {
547  s->avg_is_band += f[i].intensity_stereo;
548  if (f[i].framebits > 0)
549  s->lambda *= ideal_fbits / f[i].framebits;
550  }
551 
552  s->avg_is_band /= (s->p.frames + 1);
553 
554  s->steps_to_process = 0;
555  s->buffered_steps -= steps_out;
556  s->total_packets_out += s->p.frames;
557  s->inflection_points_count = 0;
558 }
559 
561  struct FFBufQueue *bufqueue, OpusEncOptions *options)
562 {
563  int i, ch, ret;
564 
565  s->lambda = 1.0f;
566  s->options = options;
567  s->avctx = avctx;
568  s->bufqueue = bufqueue;
569  s->max_steps = ceilf(s->options->max_delay_ms * avctx->sample_rate /
570  (1000.0f * avctx->frame_size));
571 
572  s->bsize_analysis = CELT_BLOCK_960;
573  s->avg_is_band = CELT_MAX_BANDS - 1;
574  s->inflection_points_count = 0;
575 
576  s->inflection_points = av_mallocz(sizeof(*s->inflection_points)*s->max_steps);
577  if (!s->inflection_points) {
578  ret = AVERROR(ENOMEM);
579  goto fail;
580  }
581 
583  if (!s->dsp) {
584  ret = AVERROR(ENOMEM);
585  goto fail;
586  }
587 
588  for (ch = 0; ch < s->avctx->ch_layout.nb_channels; ch++) {
589  for (i = 0; i < CELT_MAX_BANDS; i++) {
590  bessel_init(&s->bfilter_hi[ch][i], 1.0f, 19.0f, 100.0f, 1);
591  bessel_init(&s->bfilter_lo[ch][i], 1.0f, 20.0f, 100.0f, 0);
592  }
593  }
594 
595  for (i = 0; i < s->max_steps; i++) {
596  s->steps[i] = av_mallocz(sizeof(OpusPsyStep));
597  if (!s->steps[i]) {
598  ret = AVERROR(ENOMEM);
599  goto fail;
600  }
601  }
602 
603  for (i = 0; i < CELT_BLOCK_NB; i++) {
604  float tmp;
605  const int len = OPUS_BLOCK_SIZE(i);
606  const float scale = 68 << (CELT_BLOCK_NB - 1 - i);
607  s->window[i] = av_malloc(2*len*sizeof(float));
608  if (!s->window[i]) {
609  ret = AVERROR(ENOMEM);
610  goto fail;
611  }
612  generate_window_func(s->window[i], 2*len, WFUNC_SINE, &tmp);
613  ret = av_tx_init(&s->mdct[i], &s->mdct_fn[i], AV_TX_FLOAT_MDCT,
614  0, 15 << (i + 3), &scale, 0);
615  if (ret < 0)
616  goto fail;
617  }
618 
619  return 0;
620 
621 fail:
622  av_freep(&s->inflection_points);
623  av_freep(&s->dsp);
624 
625  for (i = 0; i < CELT_BLOCK_NB; i++) {
626  av_tx_uninit(&s->mdct[i]);
627  av_freep(&s->window[i]);
628  }
629 
630  for (i = 0; i < s->max_steps; i++)
631  av_freep(&s->steps[i]);
632 
633  return ret;
634 }
635 
637 {
638  s->eof = 1;
639 }
640 
642 {
643  int i;
644 
645  av_freep(&s->inflection_points);
646  av_freep(&s->dsp);
647 
648  for (i = 0; i < CELT_BLOCK_NB; i++) {
649  av_tx_uninit(&s->mdct[i]);
650  av_freep(&s->window[i]);
651  }
652 
653  for (i = 0; i < s->max_steps; i++)
654  av_freep(&s->steps[i]);
655 
656  av_log(s->avctx, AV_LOG_INFO, "Average Intensity Stereo band: %0.1f\n", s->avg_is_band);
657  av_log(s->avctx, AV_LOG_INFO, "Dual Stereo used: %0.2f%%\n", ((float)s->dual_stereo_used/s->total_packets_out)*100.0f);
658 
659  return 0;
660 }
OpusPsyStep::stereo
float stereo[CELT_MAX_BANDS]
Definition: enc_psy.h:38
celt_search_for_dual_stereo
static void celt_search_for_dual_stereo(OpusPsyContext *s, CeltFrame *f)
Definition: enc_psy.c:427
AVCodecContext::frame_size
int frame_size
Number of samples per channel in an audio frame.
Definition: avcodec.h:1068
flush_silent_frames
static int flush_silent_frames(OpusPsyContext *s)
Definition: enc_psy.c:202
level
uint8_t level
Definition: svq3.c:208
bands_dist
static int bands_dist(OpusPsyContext *s, CeltFrame *f, float *total_dist)
Definition: enc_psy.c:408
CELT_MAX_BANDS
#define CELT_MAX_BANDS
Definition: celt.h:43
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
AVCodecContext::sample_rate
int sample_rate
samples per second
Definition: avcodec.h:1040
OpusBandExcitation::excitation
float excitation
Definition: enc_psy.h:47
av_clip_uintp2
#define av_clip_uintp2
Definition: common.h:124
OPUS_MAX_FRAME_SIZE
#define OPUS_MAX_FRAME_SIZE
Definition: opus.h:28
search_for_change_points
static void search_for_change_points(OpusPsyContext *s, float tgt_change, int offset_s, int offset_e, int resolution, int level)
Definition: enc_psy.c:182
step_collect_psy_metrics
static void step_collect_psy_metrics(OpusPsyContext *s, int index)
Definition: enc_psy.c:82
AVFrame
This structure describes decoded (raw) audio or video data.
Definition: frame.h:459
CELT_BLOCK_NB
@ CELT_BLOCK_NB
Definition: celt.h:68
OpusBandExcitation::excitation_init
float excitation_init
Definition: enc_psy.h:49
FF_BUFQUEUE_SIZE
#define FF_BUFQUEUE_SIZE
Definition: audiotoolboxenc.c:25
CeltFrame::spread
enum CeltSpread spread
Definition: celt.h:130
celt_search_for_tf
static int celt_search_for_tf(OpusPsyContext *s, OpusPsyStep **start, CeltFrame *f)
Definition: enc_psy.c:466
b
#define b
Definition: input.c:43
expf
#define expf(x)
Definition: libm.h:285
base
uint8_t base
Definition: vp3data.h:128
float.h
OpusPsyStep::silence
int silence
Definition: enc_psy.h:35
OPUS_BANDWIDTH_FULLBAND
@ OPUS_BANDWIDTH_FULLBAND
Definition: opus.h:54
FFMAX
#define FFMAX(a, b)
Definition: macros.h:47
CeltFrame::framebits
int framebits
Definition: celt.h:139
X
@ X
Definition: vf_addroi.c:27
OpusPsyStep::coeffs
float coeffs[OPUS_MAX_CHANNELS][OPUS_BLOCK_SIZE(CELT_BLOCK_960)]
Definition: enc_psy.h:43
tf_sess_config.config
config
Definition: tf_sess_config.py:33
ceilf
static __device__ float ceilf(float a)
Definition: cuda_runtime.h:175
av_tx_init
av_cold int av_tx_init(AVTXContext **ctx, av_tx_fn *tx, enum AVTXType type, int inv, int len, const void *scale, uint64_t flags)
Initialize a transform context with the given configuration (i)MDCTs with an odd length are currently...
Definition: tx.c:903
OPUS_RC_CHECKPOINT_SPAWN
#define OPUS_RC_CHECKPOINT_SPAWN(rc)
Definition: rc.h:117
CeltPVQ
Definition: pvq.h:37
ff_opus_rc_enc_init
void ff_opus_rc_enc_init(OpusRangeCoder *rc)
Definition: rc.c:402
ff_opus_psy_postencode_update
void ff_opus_psy_postencode_update(OpusPsyContext *s, CeltFrame *f)
Definition: enc_psy.c:523
fail
#define fail()
Definition: checkasm.h:225
resolution
The official guide to swscale for confused that consecutive non overlapping rectangles of slice_bottom special converter These generally are unscaled converters of common like for each output line the vertical scaler pulls lines from a ring buffer When the ring buffer does not contain the wanted then it is pulled from the input slice through the input converter and horizontal scaler The result is also stored in the ring buffer to serve future vertical scaler requests When no more output can be generated because lines from a future slice would be then all remaining lines in the current slice are horizontally scaled and put in the ring buffer[This is done for luma and chroma, each with possibly different numbers of lines per picture.] Input to YUV Converter When the input to the main path is not planar bits per component YUV or bit it is converted to planar bit YUV Two sets of converters exist for this the other leaves the full chroma resolution
Definition: swscale.txt:54
OpusPsyStep::change_amp
float change_amp[OPUS_MAX_CHANNELS][CELT_MAX_BANDS]
Definition: enc_psy.h:39
AVCodecContext::flags
int flags
AV_CODEC_FLAG_*.
Definition: avcodec.h:500
weight
const h264_weight_func weight
Definition: h264dsp_init.c:33
ff_opus_psy_signal_eof
void ff_opus_psy_signal_eof(OpusPsyContext *s)
Definition: enc_psy.c:636
OPUS_SAMPLES_TO_BLOCK_SIZE
#define OPUS_SAMPLES_TO_BLOCK_SIZE(x)
Definition: enc.h:41
celt_gauge_psy_weight
static void celt_gauge_psy_weight(OpusPsyContext *s, OpusPsyStep **start, CeltFrame *f_out)
Definition: enc_psy.c:354
bessel_filter
static float bessel_filter(FFBesselFilter *s, float x)
Definition: enc_utils.h:79
av_cold
#define av_cold
Definition: attributes.h:119
CeltPVQ::quant_band
QUANT_FN * quant_band
Definition: pvq.h:42
float
float
Definition: af_crystalizer.c:122
AV_TX_FLOAT_MDCT
@ AV_TX_FLOAT_MDCT
Standard MDCT with a sample data type of float, double or int32_t, respectively.
Definition: tx.h:68
celt_search_for_intensity
static void celt_search_for_intensity(OpusPsyContext *s, CeltFrame *f)
Definition: enc_psy.c:443
OpusPsyStep
Definition: enc_psy.h:33
s
#define s(width, name)
Definition: cbs_vp9.c:198
frame_size
int frame_size
Definition: mxfenc.c:2489
pvq.h
bits
uint8_t bits
Definition: vp3data.h:128
CeltFrame::alloc_boost
int alloc_boost[CELT_MAX_BANDS]
Definition: celt.h:120
av_mallocz
#define av_mallocz(s)
Definition: tableprint_vlc.h:31
fsize
static int64_t fsize(FILE *f)
Definition: audiomatch.c:29
OPUS_BLOCK_SIZE
#define OPUS_BLOCK_SIZE(x)
Definition: enc.h:39
OpusPsyContext
Definition: enc_psy.h:52
tmp
static uint8_t tmp[40]
Definition: aes_ctr.c:52
FFABS
#define FFABS(a)
Absolute value, Note, INT_MIN / INT64_MIN result in undefined behavior as they are not representable ...
Definition: common.h:74
if
if(ret)
Definition: filter_design.txt:179
OpusBandExcitation::excitation_dist
float excitation_dist
Definition: enc_psy.h:48
OpusPacketInfo
Definition: enc.h:48
OPUS_MODE_CELT
@ OPUS_MODE_CELT
Definition: opus.h:44
ff_celt_tf_select
const int8_t ff_celt_tf_select[4][2][2][2]
Definition: tab.c:846
CELT_BLOCK_960
@ CELT_BLOCK_960
Definition: celt.h:66
opus_rc_tell_frac
static av_always_inline uint32_t opus_rc_tell_frac(const OpusRangeCoder *rc)
Definition: rc.h:67
NULL
#define NULL
Definition: coverity.c:32
ff_celt_freq_range
const uint8_t ff_celt_freq_range[]
Definition: tab.c:836
OpusPsyStep::total_change
float total_change
Definition: enc_psy.h:40
ff_opus_psy_end
av_cold int ff_opus_psy_end(OpusPsyContext *s)
Definition: enc_psy.c:641
psy_output_groups
static void psy_output_groups(OpusPsyContext *s)
Definition: enc_psy.c:237
options
Definition: swscale.c:45
sqrtf
static __device__ float sqrtf(float a)
Definition: cuda_runtime.h:184
generate_window_func
static void generate_window_func(float *lut, int N, int win_func, float *overlap)
Definition: window_func.h:63
enc_psy.h
av_clipf
av_clipf
Definition: af_crystalizer.c:122
ff_opus_psy_celt_frame_init
void ff_opus_psy_celt_frame_init(OpusPsyContext *s, CeltFrame *f, int index)
Definition: enc_psy.c:288
index
int index
Definition: gxfenc.c:90
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
options
const OptionDef options[]
OpusPsyStep::bands
float * bands[OPUS_MAX_CHANNELS][CELT_MAX_BANDS]
Definition: enc_psy.h:42
OpusPsyStep::index
int index
Definition: enc_psy.h:34
f
f
Definition: af_crystalizer.c:122
i
#define i(width, name, range_min, range_max)
Definition: cbs_h264.c:63
for
for(k=2;k<=8;++k)
Definition: h264pred_template.c:424
OpusPsyStep::tone
float tone[OPUS_MAX_CHANNELS][CELT_MAX_BANDS]
Definition: enc_psy.h:37
OpusRangeCoder
Definition: rc.h:41
range
enum AVColorRange range
Definition: mediacodec_wrapper.c:2594
ff_opus_psy_process
int ff_opus_psy_process(OpusPsyContext *s, OpusPacketInfo *p)
Definition: enc_psy.c:254
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
ff_opus_psy_celt_frame_process
int ff_opus_psy_celt_frame_process(OpusPsyContext *s, CeltFrame *f, int index)
Definition: enc_psy.c:501
bessel_init
static int bessel_init(FFBesselFilter *s, float n, float f0, float fs, int highpass)
Definition: enc_utils.h:72
Y
#define Y
Definition: boxblur.h:37
av_tx_uninit
av_cold void av_tx_uninit(AVTXContext **ctx)
Frees a context and sets *ctx to NULL, does nothing when *ctx == NULL.
Definition: tx.c:295
AV_LOG_INFO
#define AV_LOG_INFO
Standard information.
Definition: log.h:221
pvq_band_cost
static float pvq_band_cost(CeltPVQ *pvq, CeltFrame *f, OpusRangeCoder *rc, int band, float *bits, float lambda)
Definition: enc_psy.c:31
AVFrame::nb_samples
int nb_samples
number of audio samples (per channel) described by this frame
Definition: frame.h:539
tab.h
lrintf
#define lrintf(x)
Definition: libm_mips.h:72
av_malloc
#define av_malloc(s)
Definition: ops_asmgen.c:44
ff_bufqueue_peek
static AVFrame * ff_bufqueue_peek(struct FFBufQueue *queue, unsigned index)
Get a buffer from the queue without altering it.
Definition: bufferqueue.h:87
FFBufQueue
Structure holding the queue.
Definition: bufferqueue.h:49
AVFrame::extended_data
uint8_t ** extended_data
pointers to the data planes/channels.
Definition: frame.h:520
CELT_SHORT_BLOCKSIZE
#define CELT_SHORT_BLOCKSIZE
Definition: celt.h:39
CELT_BLOCK_120
@ CELT_BLOCK_120
Definition: celt.h:63
OPUS_MODE_HYBRID
@ OPUS_MODE_HYBRID
Definition: opus.h:43
FFMIN
#define FFMIN(a, b)
Definition: macros.h:49
OpusPsyStep::energy
float energy[OPUS_MAX_CHANNELS][CELT_MAX_BANDS]
Definition: enc_psy.h:36
len
int len
Definition: vorbis_enc_data.h:426
celt.h
ret
ret
Definition: filter_design.txt:187
ff_celt_bitalloc
void ff_celt_bitalloc(CeltFrame *f, OpusRangeCoder *rc, int encode)
Definition: celt.c:137
CELT_OVERLAP
#define CELT_OVERLAP
Definition: celt.h:40
window_func.h
AVCodecContext
main external API structure.
Definition: avcodec.h:443
WFUNC_SINE
@ WFUNC_SINE
Definition: window_func.h:31
cm
#define cm
Definition: dvbsubdec.c:40
Windows::Graphics::DirectX::Direct3D11::p
IDirect3DDxgiInterfaceAccess _COM_Outptr_ void ** p
Definition: vsrc_gfxcapture_winrt.hpp:53
OpusBandExcitation
Definition: enc_psy.h:46
mem.h
AV_CODEC_FLAG_BITEXACT
#define AV_CODEC_FLAG_BITEXACT
Use only bitexact stuff (except (I)DCT).
Definition: avcodec.h:322
scale
static void scale(int *out, const int *in, const int w, const int h, const int shift)
Definition: intra.c:278
OpusEncOptions
Definition: enc.h:43
FFALIGN
#define FFALIGN(x, a)
Definition: macros.h:78
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:35
avpriv_float_dsp_alloc
av_cold AVFloatDSPContext * avpriv_float_dsp_alloc(int bit_exact)
Allocate a float DSP context.
Definition: float_dsp.c:135
av_log
#define av_log(a,...)
Definition: tableprint_vlc.h:27
OPUS_RC_CHECKPOINT_ROLLBACK
#define OPUS_RC_CHECKPOINT_ROLLBACK(rc)
Definition: rc.h:124
ff_celt_band_end
const uint8_t ff_celt_band_end[]
Definition: tab.c:29
ff_celt_freq_bands
const uint8_t ff_celt_freq_bands[]
Definition: tab.c:832
ff_opus_psy_init
av_cold int ff_opus_psy_init(OpusPsyContext *s, AVCodecContext *avctx, struct FFBufQueue *bufqueue, OpusEncOptions *options)
Definition: enc_psy.c:560
OPUS_RC_CHECKPOINT_BITS
#define OPUS_RC_CHECKPOINT_BITS(rc)
Definition: rc.h:121
CELT_SPREAD_NORMAL
@ CELT_SPREAD_NORMAL
Definition: celt.h:58
CeltFrame
Definition: celt.h:98