FFmpeg
af_axcorrelate.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2019 Paul B Mahol
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "libavutil/avassert.h"
22 #include "libavutil/audio_fifo.h"
24 #include "libavutil/common.h"
25 #include "libavutil/opt.h"
26 
27 #include "audio.h"
28 #include "avfilter.h"
29 #include "formats.h"
30 #include "filters.h"
31 #include "internal.h"
32 
33 typedef struct AudioXCorrelateContext {
34  const AVClass *class;
35 
36  int size;
37  int algo;
38  int64_t pts;
39 
45  int used;
46 
49 
51 {
54  static const enum AVSampleFormat sample_fmts[] = {
57  };
58  int ret;
59 
61  if (!layouts)
62  return AVERROR(ENOMEM);
64  if (ret < 0)
65  return ret;
66 
68  if (!formats)
69  return AVERROR(ENOMEM);
71  if (ret < 0)
72  return ret;
73 
75  if (!formats)
76  return AVERROR(ENOMEM);
78 }
79 
80 static float mean_sum(const float *in, int size)
81 {
82  float mean_sum = 0.f;
83 
84  for (int i = 0; i < size; i++)
85  mean_sum += in[i];
86 
87  return mean_sum;
88 }
89 
90 static float square_sum(const float *x, const float *y, int size)
91 {
92  float square_sum = 0.f;
93 
94  for (int i = 0; i < size; i++)
95  square_sum += x[i] * y[i];
96 
97  return square_sum;
98 }
99 
100 static float xcorrelate(const float *x, const float *y, float sumx, float sumy, int size)
101 {
102  const float xm = sumx / size, ym = sumy / size;
103  float num = 0.f, den, den0 = 0.f, den1 = 0.f;
104 
105  for (int i = 0; i < size; i++) {
106  float xd = x[i] - xm;
107  float yd = y[i] - ym;
108 
109  num += xd * yd;
110  den0 += xd * xd;
111  den1 += yd * yd;
112  }
113 
114  num /= size;
115  den = sqrtf((den0 * den1) / (size * size));
116 
117  return den <= 1e-6f ? 0.f : num / den;
118 }
119 
121 {
122  AudioXCorrelateContext *s = ctx->priv;
123  const int size = s->size;
124  int used;
125 
126  for (int ch = 0; ch < out->channels; ch++) {
127  const float *x = (const float *)s->cache[0]->extended_data[ch];
128  const float *y = (const float *)s->cache[1]->extended_data[ch];
129  float *sumx = (float *)s->mean_sum[0]->extended_data[ch];
130  float *sumy = (float *)s->mean_sum[1]->extended_data[ch];
131  float *dst = (float *)out->extended_data[ch];
132 
133  used = s->used;
134  if (!used) {
135  sumx[0] = mean_sum(x, size);
136  sumy[0] = mean_sum(y, size);
137  used = 1;
138  }
139 
140  for (int n = 0; n < out->nb_samples; n++) {
141  dst[n] = xcorrelate(x + n, y + n, sumx[0], sumy[0], size);
142 
143  sumx[0] -= x[n];
144  sumx[0] += x[n + size];
145  sumy[0] -= y[n];
146  sumy[0] += y[n + size];
147  }
148  }
149 
150  return used;
151 }
152 
154 {
155  AudioXCorrelateContext *s = ctx->priv;
156  const int size = s->size;
157  int used;
158 
159  for (int ch = 0; ch < out->channels; ch++) {
160  const float *x = (const float *)s->cache[0]->extended_data[ch];
161  const float *y = (const float *)s->cache[1]->extended_data[ch];
162  float *num_sum = (float *)s->num_sum->extended_data[ch];
163  float *den_sumx = (float *)s->den_sum[0]->extended_data[ch];
164  float *den_sumy = (float *)s->den_sum[1]->extended_data[ch];
165  float *dst = (float *)out->extended_data[ch];
166 
167  used = s->used;
168  if (!used) {
169  num_sum[0] = square_sum(x, y, size);
170  den_sumx[0] = square_sum(x, x, size);
171  den_sumy[0] = square_sum(y, y, size);
172  used = 1;
173  }
174 
175  for (int n = 0; n < out->nb_samples; n++) {
176  float num, den;
177 
178  num = num_sum[0] / size;
179  den = sqrtf((den_sumx[0] * den_sumy[0]) / (size * size));
180 
181  dst[n] = den <= 1e-6f ? 0.f : num / den;
182 
183  num_sum[0] -= x[n] * y[n];
184  num_sum[0] += x[n + size] * y[n + size];
185  den_sumx[0] -= x[n] * x[n];
186  den_sumx[0] = FFMAX(den_sumx[0], 0.f);
187  den_sumx[0] += x[n + size] * x[n + size];
188  den_sumy[0] -= y[n] * y[n];
189  den_sumy[0] = FFMAX(den_sumy[0], 0.f);
190  den_sumy[0] += y[n + size] * y[n + size];
191  }
192  }
193 
194  return used;
195 }
196 
198 {
199  AudioXCorrelateContext *s = ctx->priv;
200  AVFrame *frame = NULL;
201  int ret, status;
202  int available;
203  int64_t pts;
204 
206 
207  for (int i = 0; i < 2; i++) {
208  ret = ff_inlink_consume_frame(ctx->inputs[i], &frame);
209  if (ret > 0) {
210  if (s->pts == AV_NOPTS_VALUE)
211  s->pts = frame->pts;
212  ret = av_audio_fifo_write(s->fifo[i], (void **)frame->extended_data,
213  frame->nb_samples);
215  if (ret < 0)
216  return ret;
217  }
218  }
219 
220  available = FFMIN(av_audio_fifo_size(s->fifo[0]), av_audio_fifo_size(s->fifo[1]));
221  if (available > s->size) {
222  const int out_samples = available - s->size;
223  AVFrame *out;
224 
225  if (!s->cache[0] || s->cache[0]->nb_samples < available) {
226  av_frame_free(&s->cache[0]);
227  s->cache[0] = ff_get_audio_buffer(ctx->outputs[0], available);
228  if (!s->cache[0])
229  return AVERROR(ENOMEM);
230  }
231 
232  if (!s->cache[1] || s->cache[1]->nb_samples < available) {
233  av_frame_free(&s->cache[1]);
234  s->cache[1] = ff_get_audio_buffer(ctx->outputs[0], available);
235  if (!s->cache[1])
236  return AVERROR(ENOMEM);
237  }
238 
239  ret = av_audio_fifo_peek(s->fifo[0], (void **)s->cache[0]->extended_data, available);
240  if (ret < 0)
241  return ret;
242 
243  ret = av_audio_fifo_peek(s->fifo[1], (void **)s->cache[1]->extended_data, available);
244  if (ret < 0)
245  return ret;
246 
247  out = ff_get_audio_buffer(ctx->outputs[0], out_samples);
248  if (!out)
249  return AVERROR(ENOMEM);
250 
251  s->used = s->xcorrelate(ctx, out);
252 
253  out->pts = s->pts;
254  s->pts += out_samples;
255 
256  av_audio_fifo_drain(s->fifo[0], out_samples);
257  av_audio_fifo_drain(s->fifo[1], out_samples);
258 
259  return ff_filter_frame(ctx->outputs[0], out);
260  }
261 
262  if (av_audio_fifo_size(s->fifo[0]) > s->size &&
263  av_audio_fifo_size(s->fifo[1]) > s->size) {
265  return 0;
266  }
267 
268  for (int i = 0; i < 2; i++) {
269  if (ff_inlink_acknowledge_status(ctx->inputs[i], &status, &pts)) {
270  ff_outlink_set_status(ctx->outputs[0], status, pts);
271  return 0;
272  }
273  }
274 
275  if (ff_outlink_frame_wanted(ctx->outputs[0])) {
276  for (int i = 0; i < 2; i++) {
277  if (av_audio_fifo_size(s->fifo[i]) > s->size)
278  continue;
279  ff_inlink_request_frame(ctx->inputs[i]);
280  return 0;
281  }
282  }
283 
284  return FFERROR_NOT_READY;
285 }
286 
287 static int config_output(AVFilterLink *outlink)
288 {
289  AVFilterContext *ctx = outlink->src;
290  AVFilterLink *inlink = ctx->inputs[0];
291  AudioXCorrelateContext *s = ctx->priv;
292 
293  s->pts = AV_NOPTS_VALUE;
294 
295  outlink->format = inlink->format;
296  outlink->channels = inlink->channels;
297  s->fifo[0] = av_audio_fifo_alloc(outlink->format, outlink->channels, s->size);
298  s->fifo[1] = av_audio_fifo_alloc(outlink->format, outlink->channels, s->size);
299  if (!s->fifo[0] || !s->fifo[1])
300  return AVERROR(ENOMEM);
301 
302  s->mean_sum[0] = ff_get_audio_buffer(outlink, 1);
303  s->mean_sum[1] = ff_get_audio_buffer(outlink, 1);
304  s->num_sum = ff_get_audio_buffer(outlink, 1);
305  s->den_sum[0] = ff_get_audio_buffer(outlink, 1);
306  s->den_sum[1] = ff_get_audio_buffer(outlink, 1);
307  if (!s->mean_sum[0] || !s->mean_sum[1] || !s->num_sum ||
308  !s->den_sum[0] || !s->den_sum[1])
309  return AVERROR(ENOMEM);
310 
311  switch (s->algo) {
312  case 0: s->xcorrelate = xcorrelate_slow; break;
313  case 1: s->xcorrelate = xcorrelate_fast; break;
314  }
315 
316  return 0;
317 }
318 
320 {
321  AudioXCorrelateContext *s = ctx->priv;
322 
323  av_audio_fifo_free(s->fifo[0]);
324  av_audio_fifo_free(s->fifo[1]);
325  av_frame_free(&s->cache[0]);
326  av_frame_free(&s->cache[1]);
327  av_frame_free(&s->mean_sum[0]);
328  av_frame_free(&s->mean_sum[1]);
329  av_frame_free(&s->num_sum);
330  av_frame_free(&s->den_sum[0]);
331  av_frame_free(&s->den_sum[1]);
332 }
333 
334 static const AVFilterPad inputs[] = {
335  {
336  .name = "axcorrelate0",
337  .type = AVMEDIA_TYPE_AUDIO,
338  },
339  {
340  .name = "axcorrelate1",
341  .type = AVMEDIA_TYPE_AUDIO,
342  },
343  { NULL }
344 };
345 
346 static const AVFilterPad outputs[] = {
347  {
348  .name = "default",
349  .type = AVMEDIA_TYPE_AUDIO,
350  .config_props = config_output,
351  },
352  { NULL }
353 };
354 
355 #define AF AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
356 #define OFFSET(x) offsetof(AudioXCorrelateContext, x)
357 
358 static const AVOption axcorrelate_options[] = {
359  { "size", "set segment size", OFFSET(size), AV_OPT_TYPE_INT, {.i64=256}, 2, 131072, AF },
360  { "algo", "set alghorithm", OFFSET(algo), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, AF, "algo" },
361  { "slow", "slow algorithm", 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, AF, "algo" },
362  { "fast", "fast algorithm", 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, AF, "algo" },
363  { NULL }
364 };
365 
366 AVFILTER_DEFINE_CLASS(axcorrelate);
367 
369  .name = "axcorrelate",
370  .description = NULL_IF_CONFIG_SMALL("Cross-correlate two audio streams."),
371  .priv_size = sizeof(AudioXCorrelateContext),
372  .priv_class = &axcorrelate_class,
374  .activate = activate,
375  .uninit = uninit,
376  .inputs = inputs,
377  .outputs = outputs,
378 };
av_audio_fifo_free
void av_audio_fifo_free(AVAudioFifo *af)
Free an AVAudioFifo.
Definition: audio_fifo.c:45
formats
formats
Definition: signature.h:48
ff_get_audio_buffer
AVFrame * ff_get_audio_buffer(AVFilterLink *link, int nb_samples)
Request an audio samples buffer with a specific set of permissions.
Definition: audio.c:86
AV_SAMPLE_FMT_FLTP
@ AV_SAMPLE_FMT_FLTP
float, planar
Definition: samplefmt.h:69
AVFilterChannelLayouts
A list of supported channel layouts.
Definition: formats.h:86
status
they must not be accessed directly The fifo field contains the frames that are queued in the input for processing by the filter The status_in and status_out fields contains the queued status(EOF or error) of the link
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
opt.h
ff_make_format_list
AVFilterFormats * ff_make_format_list(const int *fmts)
Create a list of supported formats.
Definition: formats.c:286
out
FILE * out
Definition: movenc.c:54
ff_filter_frame
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:978
sample_fmts
static enum AVSampleFormat sample_fmts[]
Definition: adpcmenc.c:953
layouts
enum MovChannelLayoutTag * layouts
Definition: mov_chan.c:434
FFERROR_NOT_READY
return FFERROR_NOT_READY
Definition: filter_design.txt:204
inlink
The exact code depends on how similar the blocks are and how related they are to the and needs to apply these operations to the correct inlink or outlink if there are several Macros are available to factor that when no extra processing is inlink
Definition: filter_design.txt:212
AVFILTER_DEFINE_CLASS
AVFILTER_DEFINE_CLASS(axcorrelate)
av_frame_free
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
Definition: frame.c:111
ff_all_channel_counts
AVFilterChannelLayouts * ff_all_channel_counts(void)
Construct an AVFilterChannelLayouts coding for any channel layout, with known or unknown disposition.
Definition: formats.c:429
AVFrame
This structure describes decoded (raw) audio or video data.
Definition: frame.h:303
AVOption
AVOption.
Definition: opt.h:248
AudioXCorrelateContext::fifo
AVAudioFifo * fifo[2]
Definition: af_axcorrelate.c:40
AVFilter::name
const char * name
Filter name.
Definition: avfilter.h:149
query_formats
static int query_formats(AVFilterContext *ctx)
Definition: af_axcorrelate.c:50
AVFilterFormats
A list of supported formats for one end of a filter link.
Definition: formats.h:65
formats.h
ff_inlink_consume_frame
int ff_inlink_consume_frame(AVFilterLink *link, AVFrame **rframe)
Take a frame from the link's FIFO and update the link's stats.
Definition: avfilter.c:1376
FF_FILTER_FORWARD_STATUS_BACK_ALL
#define FF_FILTER_FORWARD_STATUS_BACK_ALL(outlink, filter)
Forward the status on an output link to all input links.
Definition: filters.h:212
AVAudioFifo
Context for an Audio FIFO Buffer.
Definition: audio_fifo.c:34
av_audio_fifo_drain
int av_audio_fifo_drain(AVAudioFifo *af, int nb_samples)
Drain data from an AVAudioFifo.
Definition: audio_fifo.c:201
pts
static int64_t pts
Definition: transcode_aac.c:652
xcorrelate_fast
static int xcorrelate_fast(AVFilterContext *ctx, AVFrame *out)
Definition: af_axcorrelate.c:153
AVFilterPad
A filter pad used for either input or output.
Definition: internal.h:54
square_sum
static float square_sum(const float *x, const float *y, int size)
Definition: af_axcorrelate.c:90
avassert.h
av_cold
#define av_cold
Definition: attributes.h:90
ff_set_common_formats
int ff_set_common_formats(AVFilterContext *ctx, AVFilterFormats *formats)
A helper for query_formats() which sets all links to the same list of formats.
Definition: formats.c:580
ff_outlink_set_status
static void ff_outlink_set_status(AVFilterLink *link, int status, int64_t pts)
Set the status field of a link from the source filter.
Definition: filters.h:189
OFFSET
#define OFFSET(x)
Definition: af_axcorrelate.c:356
ff_inlink_request_frame
void ff_inlink_request_frame(AVFilterLink *link)
Mark that a frame is wanted on the link.
Definition: avfilter.c:1502
AudioXCorrelateContext::num_sum
AVFrame * num_sum
Definition: af_axcorrelate.c:43
s
#define s(width, name)
Definition: cbs_vp9.c:257
config_output
static int config_output(AVFilterLink *outlink)
Definition: af_axcorrelate.c:287
av_audio_fifo_write
int av_audio_fifo_write(AVAudioFifo *af, void **data, int nb_samples)
Write data to an AVAudioFifo.
Definition: audio_fifo.c:112
AudioXCorrelateContext::used
int used
Definition: af_axcorrelate.c:45
AVMEDIA_TYPE_AUDIO
@ AVMEDIA_TYPE_AUDIO
Definition: avutil.h:202
filters.h
AudioXCorrelateContext
Definition: af_axcorrelate.c:33
ctx
AVFormatContext * ctx
Definition: movenc.c:48
ff_af_axcorrelate
const AVFilter ff_af_axcorrelate
Definition: af_axcorrelate.c:368
f
#define f(width, name)
Definition: cbs_vp9.c:255
mean_sum
static float mean_sum(const float *in, int size)
Definition: af_axcorrelate.c:80
if
if(ret)
Definition: filter_design.txt:179
AVClass
Describe the class of an AVClass context structure.
Definition: log.h:67
NULL
#define NULL
Definition: coverity.c:32
av_audio_fifo_alloc
AVAudioFifo * av_audio_fifo_alloc(enum AVSampleFormat sample_fmt, int channels, int nb_samples)
Allocate an AVAudioFifo.
Definition: audio_fifo.c:59
AudioXCorrelateContext::mean_sum
AVFrame * mean_sum[2]
Definition: af_axcorrelate.c:42
AudioXCorrelateContext::algo
int algo
Definition: af_axcorrelate.c:37
ff_inlink_acknowledge_status
int ff_inlink_acknowledge_status(AVFilterLink *link, int *rstatus, int64_t *rpts)
Test and acknowledge the change of status on the link.
Definition: avfilter.c:1331
xcorrelate_slow
static int xcorrelate_slow(AVFilterContext *ctx, AVFrame *out)
Definition: af_axcorrelate.c:120
NULL_IF_CONFIG_SMALL
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
Definition: internal.h:117
FFMAX
#define FFMAX(a, b)
Definition: common.h:103
AF
#define AF
Definition: af_axcorrelate.c:355
AV_SAMPLE_FMT_NONE
@ AV_SAMPLE_FMT_NONE
Definition: samplefmt.h:59
size
int size
Definition: twinvq_data.h:10344
AV_NOPTS_VALUE
#define AV_NOPTS_VALUE
Undefined timestamp value.
Definition: avutil.h:248
AudioXCorrelateContext::size
int size
Definition: af_axcorrelate.c:36
FFMIN
#define FFMIN(a, b)
Definition: common.h:105
AudioXCorrelateContext::xcorrelate
int(* xcorrelate)(AVFilterContext *ctx, AVFrame *out)
Definition: af_axcorrelate.c:47
av_audio_fifo_size
int av_audio_fifo_size(AVAudioFifo *af)
Get the current number of samples in the AVAudioFifo available for reading.
Definition: audio_fifo.c:228
uninit
static av_cold void uninit(AVFilterContext *ctx)
Definition: af_axcorrelate.c:319
internal.h
AudioXCorrelateContext::cache
AVFrame * cache[2]
Definition: af_axcorrelate.c:41
i
int i
Definition: input.c:407
algo
Definition: dct.c:53
AudioXCorrelateContext::den_sum
AVFrame * den_sum[2]
Definition: af_axcorrelate.c:44
available
if no frame is available
Definition: filter_design.txt:166
common.h
axcorrelate_options
static const AVOption axcorrelate_options[]
Definition: af_axcorrelate.c:358
AVSampleFormat
AVSampleFormat
Audio sample formats.
Definition: samplefmt.h:58
audio_fifo.h
inputs
static const AVFilterPad inputs[]
Definition: af_axcorrelate.c:334
AVFilterPad::name
const char * name
Pad name.
Definition: internal.h:60
AVFilter
Filter definition.
Definition: avfilter.h:145
ret
ret
Definition: filter_design.txt:187
frame
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several the filter must be ready for frames arriving randomly on any input any filter with several inputs will most likely require some kind of queuing mechanism It is perfectly acceptable to have a limited queue and to drop frames when the inputs are too unbalanced request_frame For filters that do not use the this method is called when a frame is wanted on an output For a it should directly call filter_frame on the corresponding output For a if there are queued frames already one of these frames should be pushed If the filter should request a frame on one of its repeatedly until at least one frame has been pushed Return or at least make progress towards producing a frame
Definition: filter_design.txt:264
activate
static int activate(AVFilterContext *ctx)
Definition: af_axcorrelate.c:197
ff_all_samplerates
AVFilterFormats * ff_all_samplerates(void)
Definition: formats.c:414
channel_layout.h
AudioXCorrelateContext::pts
int64_t pts
Definition: af_axcorrelate.c:38
AV_OPT_TYPE_INT
@ AV_OPT_TYPE_INT
Definition: opt.h:225
avfilter.h
AVFilterContext
An instance of a filter.
Definition: avfilter.h:333
audio.h
ff_set_common_samplerates
int ff_set_common_samplerates(AVFilterContext *ctx, AVFilterFormats *samplerates)
Definition: formats.c:568
ff_outlink_frame_wanted
the definition of that something depends on the semantic of the filter The callback must examine the status of the filter s links and proceed accordingly The status of output links is stored in the status_in and status_out fields and tested by the ff_outlink_frame_wanted() function. If this function returns true
xcorrelate
static float xcorrelate(const float *x, const float *y, float sumx, float sumy, int size)
Definition: af_axcorrelate.c:100
int
int
Definition: ffmpeg_filter.c:156
av_audio_fifo_peek
int av_audio_fifo_peek(AVAudioFifo *af, void **data, int nb_samples)
Peek data from an AVAudioFifo.
Definition: audio_fifo.c:138
AV_OPT_TYPE_CONST
@ AV_OPT_TYPE_CONST
Definition: opt.h:234
ff_filter_set_ready
void ff_filter_set_ready(AVFilterContext *filter, unsigned priority)
Mark a filter ready and schedule it for activation.
Definition: avfilter.c:186
ff_set_common_channel_layouts
int ff_set_common_channel_layouts(AVFilterContext *ctx, AVFilterChannelLayouts *channel_layouts)
A helper for query_formats() which sets all links to the same list of channel layouts/sample rates.
Definition: formats.c:561
outputs
static const AVFilterPad outputs[]
Definition: af_axcorrelate.c:346