FFmpeg
af_atempo.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2012 Pavel Koshevoy <pkoshevoy at gmail dot com>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 /**
22  * @file
23  * tempo scaling audio filter -- an implementation of WSOLA algorithm
24  *
25  * Based on MIT licensed yaeAudioTempoFilter.h and yaeAudioFragment.h
26  * from Apprentice Video player by Pavel Koshevoy.
27  * https://sourceforge.net/projects/apprenticevideo/
28  *
29  * An explanation of SOLA algorithm is available at
30  * http://www.surina.net/article/time-and-pitch-scaling.html
31  *
32  * WSOLA is very similar to SOLA, only one major difference exists between
33  * these algorithms. SOLA shifts audio fragments along the output stream,
34  * where as WSOLA shifts audio fragments along the input stream.
35  *
36  * The advantage of WSOLA algorithm is that the overlap region size is
37  * always the same, therefore the blending function is constant and
38  * can be precomputed.
39  */
40 
41 #include <float.h>
42 #include "libavutil/avassert.h"
44 #include "libavutil/mem.h"
45 #include "libavutil/opt.h"
46 #include "libavutil/samplefmt.h"
47 #include "libavutil/tx.h"
48 #include "avfilter.h"
49 #include "audio.h"
50 #include "internal.h"
51 
52 /**
53  * A fragment of audio waveform
54  */
55 typedef struct AudioFragment {
56  // index of the first sample of this fragment in the overall waveform;
57  // 0: input sample position
58  // 1: output sample position
59  int64_t position[2];
60 
61  // original packed multi-channel samples:
62  uint8_t *data;
63 
64  // number of samples in this fragment:
65  int nsamples;
66 
67  // rDFT transform of the down-mixed mono fragment, used for
68  // fast waveform alignment via correlation in frequency domain:
69  float *xdat_in;
70  float *xdat;
72 
73 /**
74  * Filter state machine states
75  */
76 typedef enum {
82 } FilterState;
83 
84 /**
85  * Filter state machine
86  */
87 typedef struct ATempoContext {
88  const AVClass *class;
89 
90  // ring-buffer of input samples, necessary because some times
91  // input fragment position may be adjusted backwards:
92  uint8_t *buffer;
93 
94  // ring-buffer maximum capacity, expressed in sample rate time base:
95  int ring;
96 
97  // ring-buffer house keeping:
98  int size;
99  int head;
100  int tail;
101 
102  // 0: input sample position corresponding to the ring buffer tail
103  // 1: output sample position
104  int64_t position[2];
105 
106  // first input timestamp, all other timestamps are offset by this one
107  int64_t start_pts;
108 
109  // sample format:
111 
112  // number of channels:
113  int channels;
114 
115  // row of bytes to skip from one sample to next, across multple channels;
116  // stride = (number-of-channels * bits-per-sample-per-channel) / 8
117  int stride;
118 
119  // fragment window size, power-of-two integer:
120  int window;
121 
122  // Hann window coefficients, for feathering
123  // (blending) the overlapping fragment region:
124  float *hann;
125 
126  // tempo scaling factor:
127  double tempo;
128 
129  // a snapshot of previous fragment input and output position values
130  // captured when the tempo scale factor was set most recently:
131  int64_t origin[2];
132 
133  // current/previous fragment ring-buffer:
135 
136  // current fragment index:
137  uint64_t nfrag;
138 
139  // current state:
141 
142  // for fast correlation calculation in frequency domain:
147  float *correlation;
148 
149  // for managing AVFilterPad.request_frame and AVFilterPad.filter_frame
151  uint8_t *dst;
152  uint8_t *dst_end;
153  uint64_t nsamples_in;
154  uint64_t nsamples_out;
155 } ATempoContext;
156 
157 #define YAE_ATEMPO_MIN 0.5
158 #define YAE_ATEMPO_MAX 100.0
159 
160 #define OFFSET(x) offsetof(ATempoContext, x)
161 
162 static const AVOption atempo_options[] = {
163  { "tempo", "set tempo scale factor",
164  OFFSET(tempo), AV_OPT_TYPE_DOUBLE, { .dbl = 1.0 },
168  { NULL }
169 };
170 
171 AVFILTER_DEFINE_CLASS(atempo);
172 
174 {
175  return &atempo->frag[atempo->nfrag % 2];
176 }
177 
179 {
180  return &atempo->frag[(atempo->nfrag + 1) % 2];
181 }
182 
183 /**
184  * Reset filter to initial state, do not deallocate existing local buffers.
185  */
186 static void yae_clear(ATempoContext *atempo)
187 {
188  atempo->size = 0;
189  atempo->head = 0;
190  atempo->tail = 0;
191 
192  atempo->nfrag = 0;
193  atempo->state = YAE_LOAD_FRAGMENT;
194  atempo->start_pts = AV_NOPTS_VALUE;
195 
196  atempo->position[0] = 0;
197  atempo->position[1] = 0;
198 
199  atempo->origin[0] = 0;
200  atempo->origin[1] = 0;
201 
202  atempo->frag[0].position[0] = 0;
203  atempo->frag[0].position[1] = 0;
204  atempo->frag[0].nsamples = 0;
205 
206  atempo->frag[1].position[0] = 0;
207  atempo->frag[1].position[1] = 0;
208  atempo->frag[1].nsamples = 0;
209 
210  // shift left position of 1st fragment by half a window
211  // so that no re-normalization would be required for
212  // the left half of the 1st fragment:
213  atempo->frag[0].position[0] = -(int64_t)(atempo->window / 2);
214  atempo->frag[0].position[1] = -(int64_t)(atempo->window / 2);
215 
216  av_frame_free(&atempo->dst_buffer);
217  atempo->dst = NULL;
218  atempo->dst_end = NULL;
219 
220  atempo->nsamples_in = 0;
221  atempo->nsamples_out = 0;
222 }
223 
224 /**
225  * Reset filter to initial state and deallocate all buffers.
226  */
227 static void yae_release_buffers(ATempoContext *atempo)
228 {
229  yae_clear(atempo);
230 
231  av_freep(&atempo->frag[0].data);
232  av_freep(&atempo->frag[1].data);
233  av_freep(&atempo->frag[0].xdat_in);
234  av_freep(&atempo->frag[1].xdat_in);
235  av_freep(&atempo->frag[0].xdat);
236  av_freep(&atempo->frag[1].xdat);
237 
238  av_freep(&atempo->buffer);
239  av_freep(&atempo->hann);
240  av_freep(&atempo->correlation_in);
241  av_freep(&atempo->correlation);
242 
243  av_tx_uninit(&atempo->real_to_complex);
244  av_tx_uninit(&atempo->complex_to_real);
245 }
246 
247 /* av_realloc is not aligned enough; fortunately, the data does not need to
248  * be preserved */
249 #define RE_MALLOC_OR_FAIL(field, field_size, element_size) \
250  do { \
251  av_freep(&field); \
252  field = av_calloc(field_size, element_size); \
253  if (!field) { \
254  yae_release_buffers(atempo); \
255  return AVERROR(ENOMEM); \
256  } \
257  } while (0)
258 
259 /**
260  * Prepare filter for processing audio data of given format,
261  * sample rate and number of channels.
262  */
263 static int yae_reset(ATempoContext *atempo,
264  enum AVSampleFormat format,
265  int sample_rate,
266  int channels)
267 {
268  const int sample_size = av_get_bytes_per_sample(format);
269  uint32_t nlevels = 0;
270  float scale = 1.f, iscale = 1.f;
271  uint32_t pot;
272  int i;
273 
274  atempo->format = format;
275  atempo->channels = channels;
276  atempo->stride = sample_size * channels;
277 
278  // pick a segment window size:
279  atempo->window = sample_rate / 24;
280 
281  // adjust window size to be a power-of-two integer:
282  nlevels = av_log2(atempo->window);
283  pot = 1 << nlevels;
284  av_assert0(pot <= atempo->window);
285 
286  if (pot < atempo->window) {
287  atempo->window = pot * 2;
288  nlevels++;
289  }
290 
291  // initialize audio fragment buffers:
292  RE_MALLOC_OR_FAIL(atempo->frag[0].data, atempo->window, atempo->stride);
293  RE_MALLOC_OR_FAIL(atempo->frag[1].data, atempo->window, atempo->stride);
294  RE_MALLOC_OR_FAIL(atempo->frag[0].xdat_in, (atempo->window + 1), sizeof(AVComplexFloat));
295  RE_MALLOC_OR_FAIL(atempo->frag[1].xdat_in, (atempo->window + 1), sizeof(AVComplexFloat));
296  RE_MALLOC_OR_FAIL(atempo->frag[0].xdat, (atempo->window + 1), sizeof(AVComplexFloat));
297  RE_MALLOC_OR_FAIL(atempo->frag[1].xdat, (atempo->window + 1), sizeof(AVComplexFloat));
298 
299  // initialize rDFT contexts:
300  av_tx_uninit(&atempo->real_to_complex);
301  av_tx_uninit(&atempo->complex_to_real);
302 
303  av_tx_init(&atempo->real_to_complex, &atempo->r2c_fn, AV_TX_FLOAT_RDFT, 0, 1 << (nlevels + 1), &scale, 0);
304  if (!atempo->real_to_complex) {
305  yae_release_buffers(atempo);
306  return AVERROR(ENOMEM);
307  }
308 
309  av_tx_init(&atempo->complex_to_real, &atempo->c2r_fn, AV_TX_FLOAT_RDFT, 1, 1 << (nlevels + 1), &iscale, 0);
310  if (!atempo->complex_to_real) {
311  yae_release_buffers(atempo);
312  return AVERROR(ENOMEM);
313  }
314 
315  RE_MALLOC_OR_FAIL(atempo->correlation_in, (atempo->window + 1), sizeof(AVComplexFloat));
316  RE_MALLOC_OR_FAIL(atempo->correlation, atempo->window, sizeof(AVComplexFloat));
317 
318  atempo->ring = atempo->window * 3;
319  RE_MALLOC_OR_FAIL(atempo->buffer, atempo->ring, atempo->stride);
320 
321  // initialize the Hann window function:
322  RE_MALLOC_OR_FAIL(atempo->hann, atempo->window, sizeof(float));
323 
324  for (i = 0; i < atempo->window; i++) {
325  double t = (double)i / (double)(atempo->window - 1);
326  double h = 0.5 * (1.0 - cos(2.0 * M_PI * t));
327  atempo->hann[i] = (float)h;
328  }
329 
330  yae_clear(atempo);
331  return 0;
332 }
333 
335 {
336  const AudioFragment *prev;
337  ATempoContext *atempo = ctx->priv;
338 
339  prev = yae_prev_frag(atempo);
340  atempo->origin[0] = prev->position[0] + atempo->window / 2;
341  atempo->origin[1] = prev->position[1] + atempo->window / 2;
342  return 0;
343 }
344 
345 /**
346  * A helper macro for initializing complex data buffer with scalar data
347  * of a given type.
348  */
349 #define yae_init_xdat(scalar_type, scalar_max) \
350  do { \
351  const uint8_t *src_end = src + \
352  frag->nsamples * atempo->channels * sizeof(scalar_type); \
353  \
354  float *xdat = frag->xdat_in; \
355  scalar_type tmp; \
356  \
357  if (atempo->channels == 1) { \
358  for (; src < src_end; xdat++) { \
359  tmp = *(const scalar_type *)src; \
360  src += sizeof(scalar_type); \
361  \
362  *xdat = (float)tmp; \
363  } \
364  } else { \
365  float s, max, ti, si; \
366  int i; \
367  \
368  for (; src < src_end; xdat++) { \
369  tmp = *(const scalar_type *)src; \
370  src += sizeof(scalar_type); \
371  \
372  max = (float)tmp; \
373  s = FFMIN((float)scalar_max, \
374  (float)fabsf(max)); \
375  \
376  for (i = 1; i < atempo->channels; i++) { \
377  tmp = *(const scalar_type *)src; \
378  src += sizeof(scalar_type); \
379  \
380  ti = (float)tmp; \
381  si = FFMIN((float)scalar_max, \
382  (float)fabsf(ti)); \
383  \
384  if (s < si) { \
385  s = si; \
386  max = ti; \
387  } \
388  } \
389  \
390  *xdat = max; \
391  } \
392  } \
393  } while (0)
394 
395 /**
396  * Initialize complex data buffer of a given audio fragment
397  * with down-mixed mono data of appropriate scalar type.
398  */
399 static void yae_downmix(ATempoContext *atempo, AudioFragment *frag)
400 {
401  // shortcuts:
402  const uint8_t *src = frag->data;
403 
404  // init complex data buffer used for FFT and Correlation:
405  memset(frag->xdat_in, 0, sizeof(AVComplexFloat) * (atempo->window + 1));
406 
407  if (atempo->format == AV_SAMPLE_FMT_U8) {
408  yae_init_xdat(uint8_t, 127);
409  } else if (atempo->format == AV_SAMPLE_FMT_S16) {
410  yae_init_xdat(int16_t, 32767);
411  } else if (atempo->format == AV_SAMPLE_FMT_S32) {
412  yae_init_xdat(int, 2147483647);
413  } else if (atempo->format == AV_SAMPLE_FMT_FLT) {
414  yae_init_xdat(float, 1);
415  } else if (atempo->format == AV_SAMPLE_FMT_DBL) {
416  yae_init_xdat(double, 1);
417  }
418 }
419 
420 /**
421  * Populate the internal data buffer on as-needed basis.
422  *
423  * @return
424  * 0 if requested data was already available or was successfully loaded,
425  * AVERROR(EAGAIN) if more input data is required.
426  */
427 static int yae_load_data(ATempoContext *atempo,
428  const uint8_t **src_ref,
429  const uint8_t *src_end,
430  int64_t stop_here)
431 {
432  // shortcut:
433  const uint8_t *src = *src_ref;
434  const int read_size = stop_here - atempo->position[0];
435 
436  if (stop_here <= atempo->position[0]) {
437  return 0;
438  }
439 
440  // samples are not expected to be skipped, unless tempo is greater than 2:
441  av_assert0(read_size <= atempo->ring || atempo->tempo > 2.0);
442 
443  while (atempo->position[0] < stop_here && src < src_end) {
444  int src_samples = (src_end - src) / atempo->stride;
445 
446  // load data piece-wise, in order to avoid complicating the logic:
447  int nsamples = FFMIN(read_size, src_samples);
448  int na;
449  int nb;
450 
451  nsamples = FFMIN(nsamples, atempo->ring);
452  na = FFMIN(nsamples, atempo->ring - atempo->tail);
453  nb = FFMIN(nsamples - na, atempo->ring);
454 
455  if (na) {
456  uint8_t *a = atempo->buffer + atempo->tail * atempo->stride;
457  memcpy(a, src, na * atempo->stride);
458 
459  src += na * atempo->stride;
460  atempo->position[0] += na;
461 
462  atempo->size = FFMIN(atempo->size + na, atempo->ring);
463  atempo->tail = (atempo->tail + na) % atempo->ring;
464  atempo->head =
465  atempo->size < atempo->ring ?
466  atempo->tail - atempo->size :
467  atempo->tail;
468  }
469 
470  if (nb) {
471  uint8_t *b = atempo->buffer;
472  memcpy(b, src, nb * atempo->stride);
473 
474  src += nb * atempo->stride;
475  atempo->position[0] += nb;
476 
477  atempo->size = FFMIN(atempo->size + nb, atempo->ring);
478  atempo->tail = (atempo->tail + nb) % atempo->ring;
479  atempo->head =
480  atempo->size < atempo->ring ?
481  atempo->tail - atempo->size :
482  atempo->tail;
483  }
484  }
485 
486  // pass back the updated source buffer pointer:
487  *src_ref = src;
488 
489  // sanity check:
490  av_assert0(atempo->position[0] <= stop_here);
491 
492  return atempo->position[0] == stop_here ? 0 : AVERROR(EAGAIN);
493 }
494 
495 /**
496  * Populate current audio fragment data buffer.
497  *
498  * @return
499  * 0 when the fragment is ready,
500  * AVERROR(EAGAIN) if more input data is required.
501  */
502 static int yae_load_frag(ATempoContext *atempo,
503  const uint8_t **src_ref,
504  const uint8_t *src_end)
505 {
506  // shortcuts:
507  AudioFragment *frag = yae_curr_frag(atempo);
508  uint8_t *dst;
509  int64_t missing, start, zeros;
510  uint32_t nsamples;
511  const uint8_t *a, *b;
512  int i0, i1, n0, n1, na, nb;
513 
514  int64_t stop_here = frag->position[0] + atempo->window;
515  if (src_ref && yae_load_data(atempo, src_ref, src_end, stop_here) != 0) {
516  return AVERROR(EAGAIN);
517  }
518 
519  // calculate the number of samples we don't have:
520  missing =
521  stop_here > atempo->position[0] ?
522  stop_here - atempo->position[0] : 0;
523 
524  nsamples =
525  missing < (int64_t)atempo->window ?
526  (uint32_t)(atempo->window - missing) : 0;
527 
528  // setup the output buffer:
529  frag->nsamples = nsamples;
530  dst = frag->data;
531 
532  start = atempo->position[0] - atempo->size;
533 
534  // what we don't have we substitute with zeros:
535  zeros =
536  frag->position[0] < start ?
537  FFMIN(start - frag->position[0], (int64_t)nsamples) : 0;
538 
539  if (zeros == nsamples) {
540  return 0;
541  }
542 
543  if (frag->position[0] < start) {
544  memset(dst, 0, zeros * atempo->stride);
545  dst += zeros * atempo->stride;
546  }
547 
548  // get the remaining data from the ring buffer:
549  na = (atempo->head < atempo->tail ?
550  atempo->tail - atempo->head :
551  atempo->ring - atempo->head);
552 
553  nb = atempo->head < atempo->tail ? 0 : atempo->tail;
554 
555  // sanity check:
556  av_assert0(nsamples <= zeros + na + nb);
557 
558  a = atempo->buffer + atempo->head * atempo->stride;
559  b = atempo->buffer;
560 
561  i0 = frag->position[0] + zeros - start;
562  i1 = i0 < na ? 0 : i0 - na;
563 
564  n0 = i0 < na ? FFMIN(na - i0, (int)(nsamples - zeros)) : 0;
565  n1 = nsamples - zeros - n0;
566 
567  if (n0) {
568  memcpy(dst, a + i0 * atempo->stride, n0 * atempo->stride);
569  dst += n0 * atempo->stride;
570  }
571 
572  if (n1) {
573  memcpy(dst, b + i1 * atempo->stride, n1 * atempo->stride);
574  }
575 
576  return 0;
577 }
578 
579 /**
580  * Prepare for loading next audio fragment.
581  */
583 {
584  const double fragment_step = atempo->tempo * (double)(atempo->window / 2);
585 
586  const AudioFragment *prev;
587  AudioFragment *frag;
588 
589  atempo->nfrag++;
590  prev = yae_prev_frag(atempo);
591  frag = yae_curr_frag(atempo);
592 
593  frag->position[0] = prev->position[0] + (int64_t)fragment_step;
594  frag->position[1] = prev->position[1] + atempo->window / 2;
595  frag->nsamples = 0;
596 }
597 
598 /**
599  * Calculate cross-correlation via rDFT.
600  *
601  * Multiply two vectors of complex numbers (result of real_to_complex rDFT)
602  * and transform back via complex_to_real rDFT.
603  */
604 static void yae_xcorr_via_rdft(float *xcorr_in,
605  float *xcorr,
606  AVTXContext *complex_to_real,
607  av_tx_fn c2r_fn,
608  const AVComplexFloat *xa,
609  const AVComplexFloat *xb,
610  const int window)
611 {
612  AVComplexFloat *xc = (AVComplexFloat *)xcorr_in;
613  int i;
614 
615  for (i = 0; i <= window; i++, xa++, xb++, xc++) {
616  xc->re = (xa->re * xb->re + xa->im * xb->im);
617  xc->im = (xa->im * xb->re - xa->re * xb->im);
618  }
619 
620  // apply inverse rDFT:
621  c2r_fn(complex_to_real, xcorr, xcorr_in, sizeof(*xc));
622 }
623 
624 /**
625  * Calculate alignment offset for given fragment
626  * relative to the previous fragment.
627  *
628  * @return alignment offset of current fragment relative to previous.
629  */
630 static int yae_align(AudioFragment *frag,
631  const AudioFragment *prev,
632  const int window,
633  const int delta_max,
634  const int drift,
635  float *correlation_in,
636  float *correlation,
637  AVTXContext *complex_to_real,
638  av_tx_fn c2r_fn)
639 {
640  int best_offset = -drift;
641  float best_metric = -FLT_MAX;
642  float *xcorr;
643 
644  int i0;
645  int i1;
646  int i;
647 
648  yae_xcorr_via_rdft(correlation_in,
649  correlation,
650  complex_to_real,
651  c2r_fn,
652  (const AVComplexFloat *)prev->xdat,
653  (const AVComplexFloat *)frag->xdat,
654  window);
655 
656  // identify search window boundaries:
657  i0 = FFMAX(window / 2 - delta_max - drift, 0);
658  i0 = FFMIN(i0, window);
659 
660  i1 = FFMIN(window / 2 + delta_max - drift, window - window / 16);
661  i1 = FFMAX(i1, 0);
662 
663  // identify cross-correlation peaks within search window:
664  xcorr = correlation + i0;
665 
666  for (i = i0; i < i1; i++, xcorr++) {
667  float metric = *xcorr;
668 
669  // normalize:
670  float drifti = (float)(drift + i);
671  metric *= drifti * (float)(i - i0) * (float)(i1 - i);
672 
673  if (metric > best_metric) {
674  best_metric = metric;
675  best_offset = i - window / 2;
676  }
677  }
678 
679  return best_offset;
680 }
681 
682 /**
683  * Adjust current fragment position for better alignment
684  * with previous fragment.
685  *
686  * @return alignment correction.
687  */
689 {
690  const AudioFragment *prev = yae_prev_frag(atempo);
691  AudioFragment *frag = yae_curr_frag(atempo);
692 
693  const double prev_output_position =
694  (double)(prev->position[1] - atempo->origin[1] + atempo->window / 2) *
695  atempo->tempo;
696 
697  const double ideal_output_position =
698  (double)(prev->position[0] - atempo->origin[0] + atempo->window / 2);
699 
700  const int drift = (int)(prev_output_position - ideal_output_position);
701 
702  const int delta_max = atempo->window / 2;
703  const int correction = yae_align(frag,
704  prev,
705  atempo->window,
706  delta_max,
707  drift,
708  atempo->correlation_in,
709  atempo->correlation,
710  atempo->complex_to_real,
711  atempo->c2r_fn);
712 
713  if (correction) {
714  // adjust fragment position:
715  frag->position[0] -= correction;
716 
717  // clear so that the fragment can be reloaded:
718  frag->nsamples = 0;
719  }
720 
721  return correction;
722 }
723 
724 /**
725  * A helper macro for blending the overlap region of previous
726  * and current audio fragment.
727  */
728 #define yae_blend(scalar_type) \
729  do { \
730  const scalar_type *aaa = (const scalar_type *)a; \
731  const scalar_type *bbb = (const scalar_type *)b; \
732  \
733  scalar_type *out = (scalar_type *)dst; \
734  scalar_type *out_end = (scalar_type *)dst_end; \
735  int64_t i; \
736  \
737  for (i = 0; i < overlap && out < out_end; \
738  i++, atempo->position[1]++, wa++, wb++) { \
739  float w0 = *wa; \
740  float w1 = *wb; \
741  int j; \
742  \
743  for (j = 0; j < atempo->channels; \
744  j++, aaa++, bbb++, out++) { \
745  float t0 = (float)*aaa; \
746  float t1 = (float)*bbb; \
747  \
748  *out = \
749  frag->position[0] + i < 0 ? \
750  *aaa : \
751  (scalar_type)(t0 * w0 + t1 * w1); \
752  } \
753  } \
754  dst = (uint8_t *)out; \
755  } while (0)
756 
757 /**
758  * Blend the overlap region of previous and current audio fragment
759  * and output the results to the given destination buffer.
760  *
761  * @return
762  * 0 if the overlap region was completely stored in the dst buffer,
763  * AVERROR(EAGAIN) if more destination buffer space is required.
764  */
765 static int yae_overlap_add(ATempoContext *atempo,
766  uint8_t **dst_ref,
767  uint8_t *dst_end)
768 {
769  // shortcuts:
770  const AudioFragment *prev = yae_prev_frag(atempo);
771  const AudioFragment *frag = yae_curr_frag(atempo);
772 
773  const int64_t start_here = FFMAX(atempo->position[1],
774  frag->position[1]);
775 
776  const int64_t stop_here = FFMIN(prev->position[1] + prev->nsamples,
777  frag->position[1] + frag->nsamples);
778 
779  const int64_t overlap = stop_here - start_here;
780 
781  const int64_t ia = start_here - prev->position[1];
782  const int64_t ib = start_here - frag->position[1];
783 
784  const float *wa = atempo->hann + ia;
785  const float *wb = atempo->hann + ib;
786 
787  const uint8_t *a = prev->data + ia * atempo->stride;
788  const uint8_t *b = frag->data + ib * atempo->stride;
789 
790  uint8_t *dst = *dst_ref;
791 
792  av_assert0(start_here <= stop_here &&
793  frag->position[1] <= start_here &&
794  overlap <= frag->nsamples);
795 
796  if (atempo->format == AV_SAMPLE_FMT_U8) {
797  yae_blend(uint8_t);
798  } else if (atempo->format == AV_SAMPLE_FMT_S16) {
799  yae_blend(int16_t);
800  } else if (atempo->format == AV_SAMPLE_FMT_S32) {
801  yae_blend(int);
802  } else if (atempo->format == AV_SAMPLE_FMT_FLT) {
803  yae_blend(float);
804  } else if (atempo->format == AV_SAMPLE_FMT_DBL) {
805  yae_blend(double);
806  }
807 
808  // pass-back the updated destination buffer pointer:
809  *dst_ref = dst;
810 
811  return atempo->position[1] == stop_here ? 0 : AVERROR(EAGAIN);
812 }
813 
814 /**
815  * Feed as much data to the filter as it is able to consume
816  * and receive as much processed data in the destination buffer
817  * as it is able to produce or store.
818  */
819 static void
821  const uint8_t **src_ref,
822  const uint8_t *src_end,
823  uint8_t **dst_ref,
824  uint8_t *dst_end)
825 {
826  while (1) {
827  if (atempo->state == YAE_LOAD_FRAGMENT) {
828  // load additional data for the current fragment:
829  if (yae_load_frag(atempo, src_ref, src_end) != 0) {
830  break;
831  }
832 
833  // down-mix to mono:
834  yae_downmix(atempo, yae_curr_frag(atempo));
835 
836  // apply rDFT:
837  atempo->r2c_fn(atempo->real_to_complex, yae_curr_frag(atempo)->xdat, yae_curr_frag(atempo)->xdat_in, sizeof(float));
838 
839  // must load the second fragment before alignment can start:
840  if (!atempo->nfrag) {
841  yae_advance_to_next_frag(atempo);
842  continue;
843  }
844 
845  atempo->state = YAE_ADJUST_POSITION;
846  }
847 
848  if (atempo->state == YAE_ADJUST_POSITION) {
849  // adjust position for better alignment:
850  if (yae_adjust_position(atempo)) {
851  // reload the fragment at the corrected position, so that the
852  // Hann window blending would not require normalization:
853  atempo->state = YAE_RELOAD_FRAGMENT;
854  } else {
855  atempo->state = YAE_OUTPUT_OVERLAP_ADD;
856  }
857  }
858 
859  if (atempo->state == YAE_RELOAD_FRAGMENT) {
860  // load additional data if necessary due to position adjustment:
861  if (yae_load_frag(atempo, src_ref, src_end) != 0) {
862  break;
863  }
864 
865  // down-mix to mono:
866  yae_downmix(atempo, yae_curr_frag(atempo));
867 
868  // apply rDFT:
869  atempo->r2c_fn(atempo->real_to_complex, yae_curr_frag(atempo)->xdat, yae_curr_frag(atempo)->xdat_in, sizeof(float));
870 
871  atempo->state = YAE_OUTPUT_OVERLAP_ADD;
872  }
873 
874  if (atempo->state == YAE_OUTPUT_OVERLAP_ADD) {
875  // overlap-add and output the result:
876  if (yae_overlap_add(atempo, dst_ref, dst_end) != 0) {
877  break;
878  }
879 
880  // advance to the next fragment, repeat:
881  yae_advance_to_next_frag(atempo);
882  atempo->state = YAE_LOAD_FRAGMENT;
883  }
884  }
885 }
886 
887 /**
888  * Flush any buffered data from the filter.
889  *
890  * @return
891  * 0 if all data was completely stored in the dst buffer,
892  * AVERROR(EAGAIN) if more destination buffer space is required.
893  */
894 static int yae_flush(ATempoContext *atempo,
895  uint8_t **dst_ref,
896  uint8_t *dst_end)
897 {
898  AudioFragment *frag = yae_curr_frag(atempo);
899  int64_t overlap_end;
900  int64_t start_here;
901  int64_t stop_here;
902  int64_t offset;
903 
904  const uint8_t *src;
905  uint8_t *dst;
906 
907  int src_size;
908  int dst_size;
909  int nbytes;
910 
911  atempo->state = YAE_FLUSH_OUTPUT;
912 
913  if (!atempo->nfrag) {
914  // there is nothing to flush:
915  return 0;
916  }
917 
918  if (atempo->position[0] == frag->position[0] + frag->nsamples &&
919  atempo->position[1] == frag->position[1] + frag->nsamples) {
920  // the current fragment is already flushed:
921  return 0;
922  }
923 
924  if (frag->position[0] + frag->nsamples < atempo->position[0]) {
925  // finish loading the current (possibly partial) fragment:
926  yae_load_frag(atempo, NULL, NULL);
927 
928  if (atempo->nfrag) {
929  // down-mix to mono:
930  yae_downmix(atempo, frag);
931 
932  // apply rDFT:
933  atempo->r2c_fn(atempo->real_to_complex, frag->xdat, frag->xdat_in, sizeof(float));
934 
935  // align current fragment to previous fragment:
936  if (yae_adjust_position(atempo)) {
937  // reload the current fragment due to adjusted position:
938  yae_load_frag(atempo, NULL, NULL);
939  }
940  }
941  }
942 
943  // flush the overlap region:
944  overlap_end = frag->position[1] + FFMIN(atempo->window / 2,
945  frag->nsamples);
946 
947  while (atempo->position[1] < overlap_end) {
948  if (yae_overlap_add(atempo, dst_ref, dst_end) != 0) {
949  return AVERROR(EAGAIN);
950  }
951  }
952 
953  // check whether all of the input samples have been consumed:
954  if (frag->position[0] + frag->nsamples < atempo->position[0]) {
955  yae_advance_to_next_frag(atempo);
956  return AVERROR(EAGAIN);
957  }
958 
959  // flush the remainder of the current fragment:
960  start_here = FFMAX(atempo->position[1], overlap_end);
961  stop_here = frag->position[1] + frag->nsamples;
962  offset = start_here - frag->position[1];
963  av_assert0(start_here <= stop_here && frag->position[1] <= start_here);
964 
965  src = frag->data + offset * atempo->stride;
966  dst = (uint8_t *)*dst_ref;
967 
968  src_size = (int)(stop_here - start_here) * atempo->stride;
969  dst_size = dst_end - dst;
970  nbytes = FFMIN(src_size, dst_size);
971 
972  memcpy(dst, src, nbytes);
973  dst += nbytes;
974 
975  atempo->position[1] += (nbytes / atempo->stride);
976 
977  // pass-back the updated destination buffer pointer:
978  *dst_ref = (uint8_t *)dst;
979 
980  return atempo->position[1] == stop_here ? 0 : AVERROR(EAGAIN);
981 }
982 
984 {
985  ATempoContext *atempo = ctx->priv;
986  atempo->format = AV_SAMPLE_FMT_NONE;
987  atempo->state = YAE_LOAD_FRAGMENT;
988  return 0;
989 }
990 
992 {
993  ATempoContext *atempo = ctx->priv;
994  yae_release_buffers(atempo);
995 }
996 
997  // WSOLA necessitates an internal sliding window ring buffer
998  // for incoming audio stream.
999  //
1000  // Planar sample formats are too cumbersome to store in a ring buffer,
1001  // therefore planar sample formats are not supported.
1002  //
1003  static const enum AVSampleFormat sample_fmts[] = {
1010  };
1011 
1013 {
1014  AVFilterContext *ctx = inlink->dst;
1015  ATempoContext *atempo = ctx->priv;
1016 
1017  enum AVSampleFormat format = inlink->format;
1018  int sample_rate = (int)inlink->sample_rate;
1019 
1020  return yae_reset(atempo, format, sample_rate, inlink->ch_layout.nb_channels);
1021 }
1022 
1023 static int push_samples(ATempoContext *atempo,
1024  AVFilterLink *outlink,
1025  int n_out)
1026 {
1027  int ret;
1028 
1029  atempo->dst_buffer->sample_rate = outlink->sample_rate;
1030  atempo->dst_buffer->nb_samples = n_out;
1031 
1032  // adjust the PTS:
1033  atempo->dst_buffer->pts = atempo->start_pts +
1034  av_rescale_q(atempo->nsamples_out,
1035  (AVRational){ 1, outlink->sample_rate },
1036  outlink->time_base);
1037 
1038  ret = ff_filter_frame(outlink, atempo->dst_buffer);
1039  atempo->dst_buffer = NULL;
1040  atempo->dst = NULL;
1041  atempo->dst_end = NULL;
1042  if (ret < 0)
1043  return ret;
1044 
1045  atempo->nsamples_out += n_out;
1046  return 0;
1047 }
1048 
1049 static int filter_frame(AVFilterLink *inlink, AVFrame *src_buffer)
1050 {
1051  AVFilterContext *ctx = inlink->dst;
1052  ATempoContext *atempo = ctx->priv;
1053  AVFilterLink *outlink = ctx->outputs[0];
1054 
1055  int ret = 0;
1056  int n_in = src_buffer->nb_samples;
1057  int n_out = (int)(0.5 + ((double)n_in) / atempo->tempo);
1058 
1059  const uint8_t *src = src_buffer->data[0];
1060  const uint8_t *src_end = src + n_in * atempo->stride;
1061 
1062  if (atempo->start_pts == AV_NOPTS_VALUE)
1063  atempo->start_pts = av_rescale_q(src_buffer->pts,
1064  inlink->time_base,
1065  outlink->time_base);
1066 
1067  while (src < src_end) {
1068  if (!atempo->dst_buffer) {
1069  atempo->dst_buffer = ff_get_audio_buffer(outlink, n_out);
1070  if (!atempo->dst_buffer) {
1071  av_frame_free(&src_buffer);
1072  return AVERROR(ENOMEM);
1073  }
1074  av_frame_copy_props(atempo->dst_buffer, src_buffer);
1075 
1076  atempo->dst = atempo->dst_buffer->data[0];
1077  atempo->dst_end = atempo->dst + n_out * atempo->stride;
1078  }
1079 
1080  yae_apply(atempo, &src, src_end, &atempo->dst, atempo->dst_end);
1081 
1082  if (atempo->dst == atempo->dst_end) {
1083  int n_samples = ((atempo->dst - atempo->dst_buffer->data[0]) /
1084  atempo->stride);
1085  ret = push_samples(atempo, outlink, n_samples);
1086  if (ret < 0)
1087  goto end;
1088  }
1089  }
1090 
1091  atempo->nsamples_in += n_in;
1092 end:
1093  av_frame_free(&src_buffer);
1094  return ret;
1095 }
1096 
1097 static int request_frame(AVFilterLink *outlink)
1098 {
1099  AVFilterContext *ctx = outlink->src;
1100  ATempoContext *atempo = ctx->priv;
1101  int ret;
1102 
1103  ret = ff_request_frame(ctx->inputs[0]);
1104 
1105  if (ret == AVERROR_EOF) {
1106  // flush the filter:
1107  int n_max = atempo->ring;
1108  int n_out;
1109  int err = AVERROR(EAGAIN);
1110 
1111  while (err == AVERROR(EAGAIN)) {
1112  if (!atempo->dst_buffer) {
1113  atempo->dst_buffer = ff_get_audio_buffer(outlink, n_max);
1114  if (!atempo->dst_buffer)
1115  return AVERROR(ENOMEM);
1116 
1117  atempo->dst = atempo->dst_buffer->data[0];
1118  atempo->dst_end = atempo->dst + n_max * atempo->stride;
1119  }
1120 
1121  err = yae_flush(atempo, &atempo->dst, atempo->dst_end);
1122 
1123  n_out = ((atempo->dst - atempo->dst_buffer->data[0]) /
1124  atempo->stride);
1125 
1126  if (n_out) {
1127  ret = push_samples(atempo, outlink, n_out);
1128  if (ret < 0)
1129  return ret;
1130  }
1131  }
1132 
1133  av_frame_free(&atempo->dst_buffer);
1134  atempo->dst = NULL;
1135  atempo->dst_end = NULL;
1136 
1137  return AVERROR_EOF;
1138  }
1139 
1140  return ret;
1141 }
1142 
1144  const char *cmd,
1145  const char *arg,
1146  char *res,
1147  int res_len,
1148  int flags)
1149 {
1150  int ret = ff_filter_process_command(ctx, cmd, arg, res, res_len, flags);
1151 
1152  if (ret < 0)
1153  return ret;
1154 
1155  return yae_update(ctx);
1156 }
1157 
1158 static const AVFilterPad atempo_inputs[] = {
1159  {
1160  .name = "default",
1161  .type = AVMEDIA_TYPE_AUDIO,
1162  .filter_frame = filter_frame,
1163  .config_props = config_props,
1164  },
1165 };
1166 
1167 static const AVFilterPad atempo_outputs[] = {
1168  {
1169  .name = "default",
1170  .request_frame = request_frame,
1171  .type = AVMEDIA_TYPE_AUDIO,
1172  },
1173 };
1174 
1176  .name = "atempo",
1177  .description = NULL_IF_CONFIG_SMALL("Adjust audio tempo."),
1178  .init = init,
1179  .uninit = uninit,
1180  .process_command = process_command,
1181  .priv_size = sizeof(ATempoContext),
1182  .priv_class = &atempo_class,
1186 };
ff_get_audio_buffer
AVFrame * ff_get_audio_buffer(AVFilterLink *link, int nb_samples)
Request an audio samples buffer with a specific set of permissions.
Definition: audio.c:97
yae_update
static int yae_update(AVFilterContext *ctx)
Definition: af_atempo.c:334
ATempoContext::stride
int stride
Definition: af_atempo.c:117
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
opt.h
push_samples
static int push_samples(ATempoContext *atempo, AVFilterLink *outlink, int n_out)
Definition: af_atempo.c:1023
ATempoContext::channels
int channels
Definition: af_atempo.c:113
config_props
static int config_props(AVFilterLink *inlink)
Definition: af_atempo.c:1012
ff_filter_frame
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:1015
AVERROR_EOF
#define AVERROR_EOF
End of file.
Definition: error.h:57
ATempoContext::size
int size
Definition: af_atempo.c:98
AVTXContext
Definition: tx_priv.h:235
ATempoContext::dst_end
uint8_t * dst_end
Definition: af_atempo.c:152
inlink
The exact code depends on how similar the blocks are and how related they are to the and needs to apply these operations to the correct inlink or outlink if there are several Macros are available to factor that when no extra processing is inlink
Definition: filter_design.txt:212
yae_downmix
static void yae_downmix(ATempoContext *atempo, AudioFragment *frag)
Initialize complex data buffer of a given audio fragment with down-mixed mono data of appropriate sca...
Definition: af_atempo.c:399
av_frame_free
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
Definition: frame.c:160
YAE_FLUSH_OUTPUT
@ YAE_FLUSH_OUTPUT
Definition: af_atempo.c:81
yae_load_data
static int yae_load_data(ATempoContext *atempo, const uint8_t **src_ref, const uint8_t *src_end, int64_t stop_here)
Populate the internal data buffer on as-needed basis.
Definition: af_atempo.c:427
AVFrame
This structure describes decoded (raw) audio or video data.
Definition: frame.h:375
AVFrame::pts
int64_t pts
Presentation timestamp in time_base units (time when frame should be shown to user).
Definition: frame.h:487
AVOption
AVOption.
Definition: opt.h:346
b
#define b
Definition: input.c:41
ATempoContext::position
int64_t position[2]
Definition: af_atempo.c:104
YAE_RELOAD_FRAGMENT
@ YAE_RELOAD_FRAGMENT
Definition: af_atempo.c:79
ff_request_frame
int ff_request_frame(AVFilterLink *link)
Request an input frame from the filter at the other end of the link.
Definition: avfilter.c:463
ATempoContext::nsamples_out
uint64_t nsamples_out
Definition: af_atempo.c:154
float.h
AVComplexFloat
Definition: tx.h:27
FFMAX
#define FFMAX(a, b)
Definition: macros.h:47
AVFilter::name
const char * name
Filter name.
Definition: avfilter.h:170
AudioFragment::xdat_in
float * xdat_in
Definition: af_atempo.c:69
OFFSET
#define OFFSET(x)
Definition: af_atempo.c:160
ATempoContext::frag
AudioFragment frag[2]
Definition: af_atempo.c:134
ATempoContext::tail
int tail
Definition: af_atempo.c:100
sample_rate
sample_rate
Definition: ffmpeg_filter.c:424
av_tx_init
av_cold int av_tx_init(AVTXContext **ctx, av_tx_fn *tx, enum AVTXType type, int inv, int len, const void *scale, uint64_t flags)
Initialize a transform context with the given configuration (i)MDCTs with an odd length are currently...
Definition: tx.c:903
init
static av_cold int init(AVFilterContext *ctx)
Definition: af_atempo.c:983
AVFrame::data
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:396
ATempoContext::c2r_fn
av_tx_fn c2r_fn
Definition: af_atempo.c:145
ATempoContext
Filter state machine.
Definition: af_atempo.c:87
AVComplexFloat::im
float im
Definition: tx.h:28
window
static SDL_Window * window
Definition: ffplay.c:361
ATempoContext::complex_to_real
AVTXContext * complex_to_real
Definition: af_atempo.c:144
YAE_ADJUST_POSITION
@ YAE_ADJUST_POSITION
Definition: af_atempo.c:78
samplefmt.h
ATempoContext::state
FilterState state
Definition: af_atempo.c:140
ATempoContext::origin
int64_t origin[2]
Definition: af_atempo.c:131
atempo_outputs
static const AVFilterPad atempo_outputs[]
Definition: af_atempo.c:1167
AVFilterPad
A filter pad used for either input or output.
Definition: internal.h:33
AV_OPT_FLAG_AUDIO_PARAM
#define AV_OPT_FLAG_AUDIO_PARAM
Definition: opt.h:274
avassert.h
av_cold
#define av_cold
Definition: attributes.h:90
yae_apply
static void yae_apply(ATempoContext *atempo, const uint8_t **src_ref, const uint8_t *src_end, uint8_t **dst_ref, uint8_t *dst_end)
Feed as much data to the filter as it is able to consume and receive as much processed data in the de...
Definition: af_atempo.c:820
av_tx_fn
void(* av_tx_fn)(AVTXContext *s, void *out, void *in, ptrdiff_t stride)
Function pointer to a function to perform the transform.
Definition: tx.h:151
float
float
Definition: af_crystalizer.c:121
ATempoContext::dst
uint8_t * dst
Definition: af_atempo.c:151
format
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample format(the sample packing is implied by the sample format) and sample rate. The lists are not just lists
yae_init_xdat
#define yae_init_xdat(scalar_type, scalar_max)
A helper macro for initializing complex data buffer with scalar data of a given type.
Definition: af_atempo.c:349
AV_OPT_TYPE_DOUBLE
@ AV_OPT_TYPE_DOUBLE
Definition: opt.h:237
AVMEDIA_TYPE_AUDIO
@ AVMEDIA_TYPE_AUDIO
Definition: avutil.h:202
av_assert0
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:40
YAE_ATEMPO_MAX
#define YAE_ATEMPO_MAX
Definition: af_atempo.c:158
YAE_OUTPUT_OVERLAP_ADD
@ YAE_OUTPUT_OVERLAP_ADD
Definition: af_atempo.c:80
ctx
AVFormatContext * ctx
Definition: movenc.c:49
channels
channels
Definition: aptx.h:31
av_rescale_q
int64_t av_rescale_q(int64_t a, AVRational bq, AVRational cq)
Rescale a 64-bit integer by 2 rational numbers.
Definition: mathematics.c:142
ff_af_atempo
const AVFilter ff_af_atempo
Definition: af_atempo.c:1175
ATempoContext::buffer
uint8_t * buffer
Definition: af_atempo.c:92
ATempoContext::ring
int ring
Definition: af_atempo.c:95
FILTER_INPUTS
#define FILTER_INPUTS(array)
Definition: internal.h:182
arg
const char * arg
Definition: jacosubdec.c:67
ATempoContext::tempo
double tempo
Definition: af_atempo.c:127
AVClass
Describe the class of an AVClass context structure.
Definition: log.h:66
NULL
#define NULL
Definition: coverity.c:32
av_frame_copy_props
int av_frame_copy_props(AVFrame *dst, const AVFrame *src)
Copy only "metadata" fields from src to dst.
Definition: frame.c:709
AudioFragment::position
int64_t position[2]
Definition: af_atempo.c:59
AVRational
Rational number (pair of numerator and denominator).
Definition: rational.h:58
ATempoContext::head
int head
Definition: af_atempo.c:99
AVFILTER_DEFINE_CLASS
AVFILTER_DEFINE_CLASS(atempo)
filter_frame
static int filter_frame(AVFilterLink *inlink, AVFrame *src_buffer)
Definition: af_atempo.c:1049
yae_xcorr_via_rdft
static void yae_xcorr_via_rdft(float *xcorr_in, float *xcorr, AVTXContext *complex_to_real, av_tx_fn c2r_fn, const AVComplexFloat *xa, const AVComplexFloat *xb, const int window)
Calculate cross-correlation via rDFT.
Definition: af_atempo.c:604
double
double
Definition: af_crystalizer.c:131
yae_curr_frag
static AudioFragment * yae_curr_frag(ATempoContext *atempo)
Definition: af_atempo.c:173
yae_reset
static int yae_reset(ATempoContext *atempo, enum AVSampleFormat format, int sample_rate, int channels)
Prepare filter for processing audio data of given format, sample rate and number of channels.
Definition: af_atempo.c:263
process_command
static int process_command(AVFilterContext *ctx, const char *cmd, const char *arg, char *res, int res_len, int flags)
Definition: af_atempo.c:1143
yae_overlap_add
static int yae_overlap_add(ATempoContext *atempo, uint8_t **dst_ref, uint8_t *dst_end)
Blend the overlap region of previous and current audio fragment and output the results to the given d...
Definition: af_atempo.c:765
ATempoContext::nsamples_in
uint64_t nsamples_in
Definition: af_atempo.c:153
AV_OPT_FLAG_FILTERING_PARAM
#define AV_OPT_FLAG_FILTERING_PARAM
A generic parameter which can be set by the user for filtering.
Definition: opt.h:298
yae_load_frag
static int yae_load_frag(ATempoContext *atempo, const uint8_t **src_ref, const uint8_t *src_end)
Populate current audio fragment data buffer.
Definition: af_atempo.c:502
NULL_IF_CONFIG_SMALL
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
Definition: internal.h:94
yae_prev_frag
static AudioFragment * yae_prev_frag(ATempoContext *atempo)
Definition: af_atempo.c:178
AVFrame::sample_rate
int sample_rate
Sample rate of the audio data.
Definition: frame.h:574
AV_SAMPLE_FMT_NONE
@ AV_SAMPLE_FMT_NONE
Definition: samplefmt.h:56
AVComplexFloat::re
float re
Definition: tx.h:28
AV_NOPTS_VALUE
#define AV_NOPTS_VALUE
Undefined timestamp value.
Definition: avutil.h:248
FILTER_SAMPLEFMTS_ARRAY
#define FILTER_SAMPLEFMTS_ARRAY(array)
Definition: internal.h:165
ff_filter_process_command
int ff_filter_process_command(AVFilterContext *ctx, const char *cmd, const char *arg, char *res, int res_len, int flags)
Generic processing of user supplied commands that are set in the same way as the filter options.
Definition: avfilter.c:887
ATempoContext::hann
float * hann
Definition: af_atempo.c:124
AudioFragment::nsamples
int nsamples
Definition: af_atempo.c:65
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
AudioFragment::data
uint8_t * data
Definition: af_atempo.c:62
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
yae_blend
#define yae_blend(scalar_type)
A helper macro for blending the overlap region of previous and current audio fragment.
Definition: af_atempo.c:728
M_PI
#define M_PI
Definition: mathematics.h:67
av_tx_uninit
av_cold void av_tx_uninit(AVTXContext **ctx)
Frees a context and sets *ctx to NULL, does nothing when *ctx == NULL.
Definition: tx.c:295
internal.h
AVFrame::nb_samples
int nb_samples
number of audio samples (per channel) described by this frame
Definition: frame.h:455
RE_MALLOC_OR_FAIL
#define RE_MALLOC_OR_FAIL(field, field_size, element_size)
Definition: af_atempo.c:249
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:256
av_get_bytes_per_sample
int av_get_bytes_per_sample(enum AVSampleFormat sample_fmt)
Return number of bytes per sample.
Definition: samplefmt.c:108
YAE_ATEMPO_MIN
#define YAE_ATEMPO_MIN
Definition: af_atempo.c:157
AV_SAMPLE_FMT_U8
@ AV_SAMPLE_FMT_U8
unsigned 8 bits
Definition: samplefmt.h:57
yae_advance_to_next_frag
static void yae_advance_to_next_frag(ATempoContext *atempo)
Prepare for loading next audio fragment.
Definition: af_atempo.c:582
ATempoContext::dst_buffer
AVFrame * dst_buffer
Definition: af_atempo.c:150
sample_fmts
static enum AVSampleFormat sample_fmts[]
Definition: af_atempo.c:1003
yae_clear
static void yae_clear(ATempoContext *atempo)
Reset filter to initial state, do not deallocate existing local buffers.
Definition: af_atempo.c:186
ATempoContext::r2c_fn
av_tx_fn r2c_fn
Definition: af_atempo.c:145
AVSampleFormat
AVSampleFormat
Audio sample formats.
Definition: samplefmt.h:55
ATempoContext::window
int window
Definition: af_atempo.c:120
FFMIN
#define FFMIN(a, b)
Definition: macros.h:49
AV_SAMPLE_FMT_S16
@ AV_SAMPLE_FMT_S16
signed 16 bits
Definition: samplefmt.h:58
AVFilterPad::name
const char * name
Pad name.
Definition: internal.h:39
atempo_inputs
static const AVFilterPad atempo_inputs[]
Definition: af_atempo.c:1158
yae_flush
static int yae_flush(ATempoContext *atempo, uint8_t **dst_ref, uint8_t *dst_end)
Flush any buffered data from the filter.
Definition: af_atempo.c:894
AVFilter
Filter definition.
Definition: avfilter.h:166
ret
ret
Definition: filter_design.txt:187
atempo_options
static const AVOption atempo_options[]
Definition: af_atempo.c:162
ATempoContext::correlation_in
float * correlation_in
Definition: af_atempo.c:146
ATempoContext::nfrag
uint64_t nfrag
Definition: af_atempo.c:137
request_frame
static int request_frame(AVFilterLink *outlink)
Definition: af_atempo.c:1097
ATempoContext::format
enum AVSampleFormat format
Definition: af_atempo.c:110
AV_TX_FLOAT_RDFT
@ AV_TX_FLOAT_RDFT
Real to complex and complex to real DFTs.
Definition: tx.h:90
ATempoContext::real_to_complex
AVTXContext * real_to_complex
Definition: af_atempo.c:143
channel_layout.h
yae_adjust_position
static int yae_adjust_position(ATempoContext *atempo)
Adjust current fragment position for better alignment with previous fragment.
Definition: af_atempo.c:688
AV_OPT_FLAG_RUNTIME_PARAM
#define AV_OPT_FLAG_RUNTIME_PARAM
A generic parameter which can be set by the user at runtime.
Definition: opt.h:294
avfilter.h
FilterState
FilterState
Filter state machine states.
Definition: af_atempo.c:76
ATempoContext::correlation
float * correlation
Definition: af_atempo.c:147
correlation
static void correlation(int32_t *corr, int32_t *ener, const int16_t *buffer, int16_t lag, int16_t blen, int16_t srange, int16_t scale)
Definition: ilbcdec.c:917
AVFilterContext
An instance of a filter.
Definition: avfilter.h:407
mem.h
audio.h
ib
#define ib(width, name)
Definition: cbs_h2645.c:258
yae_align
static int yae_align(AudioFragment *frag, const AudioFragment *prev, const int window, const int delta_max, const int drift, float *correlation_in, float *correlation, AVTXContext *complex_to_real, av_tx_fn c2r_fn)
Calculate alignment offset for given fragment relative to the previous fragment.
Definition: af_atempo.c:630
ATempoContext::start_pts
int64_t start_pts
Definition: af_atempo.c:107
scale
static void scale(int *out, const int *in, const int w, const int h, const int shift)
Definition: intra.c:291
yae_release_buffers
static void yae_release_buffers(ATempoContext *atempo)
Reset filter to initial state and deallocate all buffers.
Definition: af_atempo.c:227
FILTER_OUTPUTS
#define FILTER_OUTPUTS(array)
Definition: internal.h:183
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:34
src
INIT_CLIP pixel * src
Definition: h264pred_template.c:418
uninit
static av_cold void uninit(AVFilterContext *ctx)
Definition: af_atempo.c:991
flags
#define flags(name, subs,...)
Definition: cbs_av1.c:474
h
h
Definition: vp9dsp_template.c:2038
AV_SAMPLE_FMT_DBL
@ AV_SAMPLE_FMT_DBL
double
Definition: samplefmt.h:61
int
int
Definition: ffmpeg_filter.c:424
AV_SAMPLE_FMT_S32
@ AV_SAMPLE_FMT_S32
signed 32 bits
Definition: samplefmt.h:59
AudioFragment
A fragment of audio waveform.
Definition: af_atempo.c:55
av_log2
int av_log2(unsigned v)
Definition: intmath.c:26
AV_SAMPLE_FMT_FLT
@ AV_SAMPLE_FMT_FLT
float
Definition: samplefmt.h:60
YAE_LOAD_FRAGMENT
@ YAE_LOAD_FRAGMENT
Definition: af_atempo.c:77
AudioFragment::xdat
float * xdat
Definition: af_atempo.c:70
tx.h