FFmpeg
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
af_amix.c
Go to the documentation of this file.
1 /*
2  * Audio Mix Filter
3  * Copyright (c) 2012 Justin Ruggles <justin.ruggles@gmail.com>
4  *
5  * This file is part of Libav.
6  *
7  * Libav is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * Libav is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with Libav; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 /**
23  * @file
24  * Audio Mix Filter
25  *
26  * Mixes audio from multiple sources into a single output. The channel layout,
27  * sample rate, and sample format will be the same for all inputs and the
28  * output.
29  */
30 
31 #include "libavutil/audio_fifo.h"
32 #include "libavutil/avassert.h"
33 #include "libavutil/avstring.h"
35 #include "libavutil/common.h"
36 #include "libavutil/float_dsp.h"
37 #include "libavutil/mathematics.h"
38 #include "libavutil/opt.h"
39 #include "libavutil/samplefmt.h"
40 
41 #include "audio.h"
42 #include "avfilter.h"
43 #include "formats.h"
44 #include "internal.h"
45 
46 #define INPUT_OFF 0 /**< input has reached EOF */
47 #define INPUT_ON 1 /**< input is active */
48 #define INPUT_INACTIVE 2 /**< input is on, but is currently inactive */
49 
50 #define DURATION_LONGEST 0
51 #define DURATION_SHORTEST 1
52 #define DURATION_FIRST 2
53 
54 
55 typedef struct FrameInfo {
57  int64_t pts;
58  struct FrameInfo *next;
59 } FrameInfo;
60 
61 /**
62  * Linked list used to store timestamps and frame sizes of all frames in the
63  * FIFO for the first input.
64  *
65  * This is needed to keep timestamps synchronized for the case where multiple
66  * input frames are pushed to the filter for processing before a frame is
67  * requested by the output link.
68  */
69 typedef struct FrameList {
70  int nb_frames;
74 } FrameList;
75 
76 static void frame_list_clear(FrameList *frame_list)
77 {
78  if (frame_list) {
79  while (frame_list->list) {
80  FrameInfo *info = frame_list->list;
81  frame_list->list = info->next;
82  av_free(info);
83  }
84  frame_list->nb_frames = 0;
85  frame_list->nb_samples = 0;
86  frame_list->end = NULL;
87  }
88 }
89 
90 static int frame_list_next_frame_size(FrameList *frame_list)
91 {
92  if (!frame_list->list)
93  return 0;
94  return frame_list->list->nb_samples;
95 }
96 
97 static int64_t frame_list_next_pts(FrameList *frame_list)
98 {
99  if (!frame_list->list)
100  return AV_NOPTS_VALUE;
101  return frame_list->list->pts;
102 }
103 
104 static void frame_list_remove_samples(FrameList *frame_list, int nb_samples)
105 {
106  if (nb_samples >= frame_list->nb_samples) {
107  frame_list_clear(frame_list);
108  } else {
109  int samples = nb_samples;
110  while (samples > 0) {
111  FrameInfo *info = frame_list->list;
112  av_assert0(info != NULL);
113  if (info->nb_samples <= samples) {
114  samples -= info->nb_samples;
115  frame_list->list = info->next;
116  if (!frame_list->list)
117  frame_list->end = NULL;
118  frame_list->nb_frames--;
119  frame_list->nb_samples -= info->nb_samples;
120  av_free(info);
121  } else {
122  info->nb_samples -= samples;
123  info->pts += samples;
124  frame_list->nb_samples -= samples;
125  samples = 0;
126  }
127  }
128  }
129 }
130 
131 static int frame_list_add_frame(FrameList *frame_list, int nb_samples, int64_t pts)
132 {
133  FrameInfo *info = av_malloc(sizeof(*info));
134  if (!info)
135  return AVERROR(ENOMEM);
136  info->nb_samples = nb_samples;
137  info->pts = pts;
138  info->next = NULL;
139 
140  if (!frame_list->list) {
141  frame_list->list = info;
142  frame_list->end = info;
143  } else {
144  av_assert0(frame_list->end != NULL);
145  frame_list->end->next = info;
146  frame_list->end = info;
147  }
148  frame_list->nb_frames++;
149  frame_list->nb_samples += nb_samples;
150 
151  return 0;
152 }
153 
154 
155 typedef struct MixContext {
156  const AVClass *class; /**< class for AVOptions */
158 
159  int nb_inputs; /**< number of inputs */
160  int active_inputs; /**< number of input currently active */
161  int duration_mode; /**< mode for determining duration */
162  float dropout_transition; /**< transition time when an input drops out */
163 
164  int nb_channels; /**< number of channels */
165  int sample_rate; /**< sample rate */
166  int planar;
167  AVAudioFifo **fifos; /**< audio fifo for each input */
168  uint8_t *input_state; /**< current state of each input */
169  float *input_scale; /**< mixing scale factor for each input */
170  float scale_norm; /**< normalization factor for all inputs */
171  int64_t next_pts; /**< calculated pts for next output frame */
172  FrameList *frame_list; /**< list of frame info for the first input */
173 } MixContext;
174 
175 #define OFFSET(x) offsetof(MixContext, x)
176 #define A AV_OPT_FLAG_AUDIO_PARAM
177 #define F AV_OPT_FLAG_FILTERING_PARAM
178 static const AVOption amix_options[] = {
179  { "inputs", "Number of inputs.",
180  OFFSET(nb_inputs), AV_OPT_TYPE_INT, { .i64 = 2 }, 1, 32, A|F },
181  { "duration", "How to determine the end-of-stream.",
182  OFFSET(duration_mode), AV_OPT_TYPE_INT, { .i64 = DURATION_LONGEST }, 0, 2, A|F, "duration" },
183  { "longest", "Duration of longest input.", 0, AV_OPT_TYPE_CONST, { .i64 = DURATION_LONGEST }, INT_MIN, INT_MAX, A|F, "duration" },
184  { "shortest", "Duration of shortest input.", 0, AV_OPT_TYPE_CONST, { .i64 = DURATION_SHORTEST }, INT_MIN, INT_MAX, A|F, "duration" },
185  { "first", "Duration of first input.", 0, AV_OPT_TYPE_CONST, { .i64 = DURATION_FIRST }, INT_MIN, INT_MAX, A|F, "duration" },
186  { "dropout_transition", "Transition time, in seconds, for volume "
187  "renormalization when an input stream ends.",
188  OFFSET(dropout_transition), AV_OPT_TYPE_FLOAT, { .dbl = 2.0 }, 0, INT_MAX, A|F },
189  { NULL },
190 };
191 
193 
194 /**
195  * Update the scaling factors to apply to each input during mixing.
196  *
197  * This balances the full volume range between active inputs and handles
198  * volume transitions when EOF is encountered on an input but mixing continues
199  * with the remaining inputs.
200  */
202 {
203  int i;
204 
205  if (s->scale_norm > s->active_inputs) {
206  s->scale_norm -= nb_samples / (s->dropout_transition * s->sample_rate);
208  }
209 
210  for (i = 0; i < s->nb_inputs; i++) {
211  if (s->input_state[i] == INPUT_ON)
212  s->input_scale[i] = 1.0f / s->scale_norm;
213  else
214  s->input_scale[i] = 0.0f;
215  }
216 }
217 
218 static int config_output(AVFilterLink *outlink)
219 {
220  AVFilterContext *ctx = outlink->src;
221  MixContext *s = ctx->priv;
222  int i;
223  char buf[64];
224 
225  s->planar = av_sample_fmt_is_planar(outlink->format);
226  s->sample_rate = outlink->sample_rate;
227  outlink->time_base = (AVRational){ 1, outlink->sample_rate };
229 
230  s->frame_list = av_mallocz(sizeof(*s->frame_list));
231  if (!s->frame_list)
232  return AVERROR(ENOMEM);
233 
234  s->fifos = av_mallocz(s->nb_inputs * sizeof(*s->fifos));
235  if (!s->fifos)
236  return AVERROR(ENOMEM);
237 
239  for (i = 0; i < s->nb_inputs; i++) {
240  s->fifos[i] = av_audio_fifo_alloc(outlink->format, s->nb_channels, 1024);
241  if (!s->fifos[i])
242  return AVERROR(ENOMEM);
243  }
244 
246  if (!s->input_state)
247  return AVERROR(ENOMEM);
248  memset(s->input_state, INPUT_ON, s->nb_inputs);
249  s->active_inputs = s->nb_inputs;
250 
251  s->input_scale = av_mallocz(s->nb_inputs * sizeof(*s->input_scale));
252  if (!s->input_scale)
253  return AVERROR(ENOMEM);
254  s->scale_norm = s->active_inputs;
255  calculate_scales(s, 0);
256 
257  av_get_channel_layout_string(buf, sizeof(buf), -1, outlink->channel_layout);
258 
259  av_log(ctx, AV_LOG_VERBOSE,
260  "inputs:%d fmt:%s srate:%d cl:%s\n", s->nb_inputs,
261  av_get_sample_fmt_name(outlink->format), outlink->sample_rate, buf);
262 
263  return 0;
264 }
265 
266 /**
267  * Read samples from the input FIFOs, mix, and write to the output link.
268  */
269 static int output_frame(AVFilterLink *outlink, int nb_samples)
270 {
271  AVFilterContext *ctx = outlink->src;
272  MixContext *s = ctx->priv;
273  AVFilterBufferRef *out_buf, *in_buf;
274  int i;
275 
276  calculate_scales(s, nb_samples);
277 
278  out_buf = ff_get_audio_buffer(outlink, AV_PERM_WRITE, nb_samples);
279  if (!out_buf)
280  return AVERROR(ENOMEM);
281 
282  in_buf = ff_get_audio_buffer(outlink, AV_PERM_WRITE, nb_samples);
283  if (!in_buf) {
284  avfilter_unref_buffer(out_buf);
285  return AVERROR(ENOMEM);
286  }
287 
288  for (i = 0; i < s->nb_inputs; i++) {
289  if (s->input_state[i] == INPUT_ON) {
290  int planes, plane_size, p;
291 
292  av_audio_fifo_read(s->fifos[i], (void **)in_buf->extended_data,
293  nb_samples);
294 
295  planes = s->planar ? s->nb_channels : 1;
296  plane_size = nb_samples * (s->planar ? 1 : s->nb_channels);
297  plane_size = FFALIGN(plane_size, 16);
298 
299  for (p = 0; p < planes; p++) {
300  s->fdsp.vector_fmac_scalar((float *)out_buf->extended_data[p],
301  (float *) in_buf->extended_data[p],
302  s->input_scale[i], plane_size);
303  }
304  }
305  }
306  avfilter_unref_buffer(in_buf);
307 
308  out_buf->pts = s->next_pts;
309  if (s->next_pts != AV_NOPTS_VALUE)
310  s->next_pts += nb_samples;
311 
312  return ff_filter_frame(outlink, out_buf);
313 }
314 
315 /**
316  * Returns the smallest number of samples available in the input FIFOs other
317  * than that of the first input.
318  */
320 {
321  int i;
322  int available_samples = INT_MAX;
323 
324  av_assert0(s->nb_inputs > 1);
325 
326  for (i = 1; i < s->nb_inputs; i++) {
327  int nb_samples;
328  if (s->input_state[i] == INPUT_OFF)
329  continue;
330  nb_samples = av_audio_fifo_size(s->fifos[i]);
331  available_samples = FFMIN(available_samples, nb_samples);
332  }
333  if (available_samples == INT_MAX)
334  return 0;
335  return available_samples;
336 }
337 
338 /**
339  * Requests a frame, if needed, from each input link other than the first.
340  */
341 static int request_samples(AVFilterContext *ctx, int min_samples)
342 {
343  MixContext *s = ctx->priv;
344  int i, ret;
345 
346  av_assert0(s->nb_inputs > 1);
347 
348  for (i = 1; i < s->nb_inputs; i++) {
349  ret = 0;
350  if (s->input_state[i] == INPUT_OFF)
351  continue;
352  while (!ret && av_audio_fifo_size(s->fifos[i]) < min_samples)
353  ret = ff_request_frame(ctx->inputs[i]);
354  if (ret == AVERROR_EOF) {
355  if (av_audio_fifo_size(s->fifos[i]) == 0) {
356  s->input_state[i] = INPUT_OFF;
357  continue;
358  }
359  } else if (ret < 0)
360  return ret;
361  }
362  return 0;
363 }
364 
365 /**
366  * Calculates the number of active inputs and determines EOF based on the
367  * duration option.
368  *
369  * @return 0 if mixing should continue, or AVERROR_EOF if mixing should stop.
370  */
372 {
373  int i;
374  int active_inputs = 0;
375  for (i = 0; i < s->nb_inputs; i++)
376  active_inputs += !!(s->input_state[i] != INPUT_OFF);
377  s->active_inputs = active_inputs;
378 
379  if (!active_inputs ||
380  (s->duration_mode == DURATION_FIRST && s->input_state[0] == INPUT_OFF) ||
381  (s->duration_mode == DURATION_SHORTEST && active_inputs != s->nb_inputs))
382  return AVERROR_EOF;
383  return 0;
384 }
385 
386 static int request_frame(AVFilterLink *outlink)
387 {
388  AVFilterContext *ctx = outlink->src;
389  MixContext *s = ctx->priv;
390  int ret;
391  int wanted_samples, available_samples;
392 
393  ret = calc_active_inputs(s);
394  if (ret < 0)
395  return ret;
396 
397  if (s->input_state[0] == INPUT_OFF) {
398  ret = request_samples(ctx, 1);
399  if (ret < 0)
400  return ret;
401 
402  ret = calc_active_inputs(s);
403  if (ret < 0)
404  return ret;
405 
406  available_samples = get_available_samples(s);
407  if (!available_samples)
408  return AVERROR(EAGAIN);
409 
410  return output_frame(outlink, available_samples);
411  }
412 
413  if (s->frame_list->nb_frames == 0) {
414  ret = ff_request_frame(ctx->inputs[0]);
415  if (ret == AVERROR_EOF) {
416  s->input_state[0] = INPUT_OFF;
417  if (s->nb_inputs == 1)
418  return AVERROR_EOF;
419  else
420  return AVERROR(EAGAIN);
421  } else if (ret < 0)
422  return ret;
423  }
425 
426  wanted_samples = frame_list_next_frame_size(s->frame_list);
427 
428  if (s->active_inputs > 1) {
429  ret = request_samples(ctx, wanted_samples);
430  if (ret < 0)
431  return ret;
432 
433  ret = calc_active_inputs(s);
434  if (ret < 0)
435  return ret;
436  }
437 
438  if (s->active_inputs > 1) {
439  available_samples = get_available_samples(s);
440  if (!available_samples)
441  return AVERROR(EAGAIN);
442  available_samples = FFMIN(available_samples, wanted_samples);
443  } else {
444  available_samples = wanted_samples;
445  }
446 
448  frame_list_remove_samples(s->frame_list, available_samples);
449 
450  return output_frame(outlink, available_samples);
451 }
452 
453 static int filter_frame(AVFilterLink *inlink, AVFilterBufferRef *buf)
454 {
455  AVFilterContext *ctx = inlink->dst;
456  MixContext *s = ctx->priv;
457  AVFilterLink *outlink = ctx->outputs[0];
458  int i, ret = 0;
459 
460  for (i = 0; i < ctx->nb_inputs; i++)
461  if (ctx->inputs[i] == inlink)
462  break;
463  if (i >= ctx->nb_inputs) {
464  av_log(ctx, AV_LOG_ERROR, "unknown input link\n");
465  ret = AVERROR(EINVAL);
466  goto fail;
467  }
468 
469  if (i == 0) {
470  int64_t pts = av_rescale_q(buf->pts, inlink->time_base,
471  outlink->time_base);
472  ret = frame_list_add_frame(s->frame_list, buf->audio->nb_samples, pts);
473  if (ret < 0)
474  goto fail;
475  }
476 
477  ret = av_audio_fifo_write(s->fifos[i], (void **)buf->extended_data,
478  buf->audio->nb_samples);
479 
480 fail:
482 
483  return ret;
484 }
485 
486 static int init(AVFilterContext *ctx, const char *args)
487 {
488  MixContext *s = ctx->priv;
489  int i, ret;
490 
491  s->class = &amix_class;
493 
494  if ((ret = av_set_options_string(s, args, "=", ":")) < 0)
495  return ret;
496  av_opt_free(s);
497 
498  for (i = 0; i < s->nb_inputs; i++) {
499  char name[32];
500  AVFilterPad pad = { 0 };
501 
502  snprintf(name, sizeof(name), "input%d", i);
503  pad.type = AVMEDIA_TYPE_AUDIO;
504  pad.name = av_strdup(name);
506 
507  ff_insert_inpad(ctx, i, &pad);
508  }
509 
510  avpriv_float_dsp_init(&s->fdsp, 0);
511 
512  return 0;
513 }
514 
515 static void uninit(AVFilterContext *ctx)
516 {
517  int i;
518  MixContext *s = ctx->priv;
519 
520  if (s->fifos) {
521  for (i = 0; i < s->nb_inputs; i++)
522  av_audio_fifo_free(s->fifos[i]);
523  av_freep(&s->fifos);
524  }
526  av_freep(&s->frame_list);
527  av_freep(&s->input_state);
528  av_freep(&s->input_scale);
529 
530  for (i = 0; i < ctx->nb_inputs; i++)
531  av_freep(&ctx->input_pads[i].name);
532 }
533 
535 {
537  ff_add_format(&formats, AV_SAMPLE_FMT_FLT);
539  ff_set_common_formats(ctx, formats);
542  return 0;
543 }
544 
546  {
547  .name = "default",
548  .type = AVMEDIA_TYPE_AUDIO,
549  .config_props = config_output,
550  .request_frame = request_frame
551  },
552  { NULL }
553 };
554 
556  .name = "amix",
557  .description = NULL_IF_CONFIG_SMALL("Audio mixing."),
558  .priv_size = sizeof(MixContext),
559 
560  .init = init,
561  .uninit = uninit,
563 
564  .inputs = NULL,
565  .outputs = avfilter_af_amix_outputs,
566  .priv_class = &amix_class,
567 };