FFmpeg
af_speechnorm.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2020 Paul B Mahol
3  *
4  * Speech Normalizer
5  *
6  * This file is part of FFmpeg.
7  *
8  * FFmpeg is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * FFmpeg is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with FFmpeg; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21  */
22 
23 /**
24  * @file
25  * Speech Normalizer
26  */
27 
28 #include <float.h>
29 
30 #include "libavutil/avassert.h"
31 #include "libavutil/opt.h"
32 
33 #define FF_BUFQUEUE_SIZE (1024)
34 #include "bufferqueue.h"
35 
36 #include "audio.h"
37 #include "avfilter.h"
38 #include "filters.h"
39 #include "internal.h"
40 
41 #define MAX_ITEMS 882000
42 #define MIN_PEAK (1. / 32768.)
43 
44 typedef struct PeriodItem {
45  int size;
46  int type;
47  double max_peak;
48 } PeriodItem;
49 
50 typedef struct ChannelContext {
51  int state;
52  int bypass;
54  double gain_state;
55  double pi_max_peak;
56  int pi_start;
57  int pi_end;
58  int pi_size;
60 
61 typedef struct SpeechNormalizerContext {
62  const AVClass *class;
63 
64  double peak_value;
65  double max_expansion;
68  double raise_amount;
69  double fall_amount;
70  uint64_t channels;
71  int invert;
72  int link;
73 
75  double prev_gain;
76 
78  int eof;
79  int64_t pts;
80 
81  struct FFBufQueue queue;
82 
83  void (*analyze_channel)(AVFilterContext *ctx, ChannelContext *cc,
84  const uint8_t *srcp, int nb_samples);
86  AVFrame *in, int nb_samples);
88 
89 #define OFFSET(x) offsetof(SpeechNormalizerContext, x)
90 #define FLAGS AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_RUNTIME_PARAM
91 
92 static const AVOption speechnorm_options[] = {
93  { "peak", "set the peak value", OFFSET(peak_value), AV_OPT_TYPE_DOUBLE, {.dbl=0.95}, 0.0, 1.0, FLAGS },
94  { "p", "set the peak value", OFFSET(peak_value), AV_OPT_TYPE_DOUBLE, {.dbl=0.95}, 0.0, 1.0, FLAGS },
95  { "expansion", "set the max expansion factor", OFFSET(max_expansion), AV_OPT_TYPE_DOUBLE, {.dbl=2.0}, 1.0, 50.0, FLAGS },
96  { "e", "set the max expansion factor", OFFSET(max_expansion), AV_OPT_TYPE_DOUBLE, {.dbl=2.0}, 1.0, 50.0, FLAGS },
97  { "compression", "set the max compression factor", OFFSET(max_compression), AV_OPT_TYPE_DOUBLE, {.dbl=2.0}, 1.0, 50.0, FLAGS },
98  { "c", "set the max compression factor", OFFSET(max_compression), AV_OPT_TYPE_DOUBLE, {.dbl=2.0}, 1.0, 50.0, FLAGS },
99  { "threshold", "set the threshold value", OFFSET(threshold_value), AV_OPT_TYPE_DOUBLE, {.dbl=0}, 0.0, 1.0, FLAGS },
100  { "t", "set the threshold value", OFFSET(threshold_value), AV_OPT_TYPE_DOUBLE, {.dbl=0}, 0.0, 1.0, FLAGS },
101  { "raise", "set the expansion raising amount", OFFSET(raise_amount), AV_OPT_TYPE_DOUBLE, {.dbl=0.001}, 0.0, 1.0, FLAGS },
102  { "r", "set the expansion raising amount", OFFSET(raise_amount), AV_OPT_TYPE_DOUBLE, {.dbl=0.001}, 0.0, 1.0, FLAGS },
103  { "fall", "set the compression raising amount", OFFSET(fall_amount), AV_OPT_TYPE_DOUBLE, {.dbl=0.001}, 0.0, 1.0, FLAGS },
104  { "f", "set the compression raising amount", OFFSET(fall_amount), AV_OPT_TYPE_DOUBLE, {.dbl=0.001}, 0.0, 1.0, FLAGS },
105  { "channels", "set channels to filter", OFFSET(channels), AV_OPT_TYPE_CHANNEL_LAYOUT, {.i64=-1}, INT64_MIN, INT64_MAX, FLAGS },
106  { "h", "set channels to filter", OFFSET(channels), AV_OPT_TYPE_CHANNEL_LAYOUT, {.i64=-1}, INT64_MIN, INT64_MAX, FLAGS },
107  { "invert", "set inverted filtering", OFFSET(invert), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
108  { "i", "set inverted filtering", OFFSET(invert), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
109  { "link", "set linked channels filtering", OFFSET(link), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
110  { "l", "set linked channels filtering", OFFSET(link), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
111  { NULL }
112 };
113 
114 AVFILTER_DEFINE_CLASS(speechnorm);
115 
117 {
120  static const enum AVSampleFormat sample_fmts[] = {
123  };
124  int ret;
125 
126  layouts = ff_all_channel_counts();
127  if (!layouts)
128  return AVERROR(ENOMEM);
129  ret = ff_set_common_channel_layouts(ctx, layouts);
130  if (ret < 0)
131  return ret;
132 
133  formats = ff_make_format_list(sample_fmts);
134  if (!formats)
135  return AVERROR(ENOMEM);
136  ret = ff_set_common_formats(ctx, formats);
137  if (ret < 0)
138  return ret;
139 
140  formats = ff_all_samplerates();
141  if (!formats)
142  return AVERROR(ENOMEM);
143  return ff_set_common_samplerates(ctx, formats);
144 }
145 
146 static int get_pi_samples(PeriodItem *pi, int start, int end, int remain)
147 {
148  int sum;
149 
150  if (pi[start].type == 0)
151  return remain;
152 
153  sum = remain;
154  while (start != end) {
155  start++;
156  if (start >= MAX_ITEMS)
157  start = 0;
158  if (pi[start].type == 0)
159  break;
160  av_assert0(pi[start].size > 0);
161  sum += pi[start].size;
162  }
163 
164  return sum;
165 }
166 
168 {
170  AVFilterLink *inlink = ctx->inputs[0];
171  int min_pi_nb_samples;
172 
173  min_pi_nb_samples = get_pi_samples(s->cc[0].pi, s->cc[0].pi_start, s->cc[0].pi_end, s->cc[0].pi_size);
174  for (int ch = 1; ch < inlink->channels && min_pi_nb_samples > 0; ch++) {
175  ChannelContext *cc = &s->cc[ch];
176 
177  min_pi_nb_samples = FFMIN(min_pi_nb_samples, get_pi_samples(cc->pi, cc->pi_start, cc->pi_end, cc->pi_size));
178  }
179 
180  return min_pi_nb_samples;
181 }
182 
183 static void consume_pi(ChannelContext *cc, int nb_samples)
184 {
185  if (cc->pi_size >= nb_samples) {
186  cc->pi_size -= nb_samples;
187  } else {
188  av_assert0(0);
189  }
190 }
191 
192 static double next_gain(AVFilterContext *ctx, double pi_max_peak, int bypass, double state)
193 {
195  const double expansion = FFMIN(s->max_expansion, s->peak_value / pi_max_peak);
196  const double compression = 1. / s->max_compression;
197  const int type = s->invert ? pi_max_peak <= s->threshold_value : pi_max_peak >= s->threshold_value;
198 
199  if (bypass) {
200  return 1.;
201  } else if (type) {
202  return FFMIN(expansion, state + s->raise_amount);
203  } else {
204  return FFMIN(expansion, FFMAX(compression, state - s->fall_amount));
205  }
206 }
207 
208 static void next_pi(AVFilterContext *ctx, ChannelContext *cc, int bypass)
209 {
210  av_assert0(cc->pi_size >= 0);
211  if (cc->pi_size == 0) {
213  int start = cc->pi_start;
214 
215  av_assert0(cc->pi[start].size > 0);
216  av_assert0(cc->pi[start].type > 0 || s->eof);
217  cc->pi_size = cc->pi[start].size;
218  cc->pi_max_peak = cc->pi[start].max_peak;
219  av_assert0(cc->pi_start != cc->pi_end || s->eof);
220  start++;
221  if (start >= MAX_ITEMS)
222  start = 0;
223  cc->pi_start = start;
224  cc->gain_state = next_gain(ctx, cc->pi_max_peak, bypass, cc->gain_state);
225  }
226 }
227 
228 static double min_gain(AVFilterContext *ctx, ChannelContext *cc, int max_size)
229 {
231  double min_gain = s->max_expansion;
232  double gain_state = cc->gain_state;
233  int size = cc->pi_size;
234  int idx = cc->pi_start;
235 
236  min_gain = FFMIN(min_gain, gain_state);
237  while (size <= max_size) {
238  if (idx == cc->pi_end)
239  break;
240  gain_state = next_gain(ctx, cc->pi[idx].max_peak, 0, gain_state);
241  min_gain = FFMIN(min_gain, gain_state);
242  size += cc->pi[idx].size;
243  idx++;
244  if (idx >= MAX_ITEMS)
245  idx = 0;
246  }
247 
248  return min_gain;
249 }
250 
251 #define ANALYZE_CHANNEL(name, ptype, zero) \
252 static void analyze_channel_## name (AVFilterContext *ctx, ChannelContext *cc, \
253  const uint8_t *srcp, int nb_samples) \
254 { \
255  SpeechNormalizerContext *s = ctx->priv; \
256  const ptype *src = (const ptype *)srcp; \
257  int n = 0; \
258  \
259  if (cc->state < 0) \
260  cc->state = src[0] >= zero; \
261  \
262  while (n < nb_samples) { \
263  if ((cc->state != (src[n] >= zero)) || \
264  (cc->pi[cc->pi_end].size > s->max_period)) { \
265  double max_peak = cc->pi[cc->pi_end].max_peak; \
266  int state = cc->state; \
267  cc->state = src[n] >= zero; \
268  av_assert0(cc->pi[cc->pi_end].size > 0); \
269  if (cc->pi[cc->pi_end].max_peak >= MIN_PEAK || \
270  cc->pi[cc->pi_end].size > s->max_period) { \
271  cc->pi[cc->pi_end].type = 1; \
272  cc->pi_end++; \
273  if (cc->pi_end >= MAX_ITEMS) \
274  cc->pi_end = 0; \
275  if (cc->state != state) \
276  cc->pi[cc->pi_end].max_peak = DBL_MIN; \
277  else \
278  cc->pi[cc->pi_end].max_peak = max_peak; \
279  cc->pi[cc->pi_end].type = 0; \
280  cc->pi[cc->pi_end].size = 0; \
281  av_assert0(cc->pi_end != cc->pi_start); \
282  } \
283  } \
284  \
285  if (cc->state) { \
286  while (src[n] >= zero) { \
287  cc->pi[cc->pi_end].max_peak = FFMAX(cc->pi[cc->pi_end].max_peak, src[n]); \
288  cc->pi[cc->pi_end].size++; \
289  n++; \
290  if (n >= nb_samples) \
291  break; \
292  } \
293  } else { \
294  while (src[n] < zero) { \
295  cc->pi[cc->pi_end].max_peak = FFMAX(cc->pi[cc->pi_end].max_peak, -src[n]); \
296  cc->pi[cc->pi_end].size++; \
297  n++; \
298  if (n >= nb_samples) \
299  break; \
300  } \
301  } \
302  } \
303 }
304 
305 ANALYZE_CHANNEL(dbl, double, 0.0)
306 ANALYZE_CHANNEL(flt, float, 0.f)
307 
308 #define FILTER_CHANNELS(name, ptype) \
309 static void filter_channels_## name (AVFilterContext *ctx, \
310  AVFrame *in, int nb_samples) \
311 { \
312  SpeechNormalizerContext *s = ctx->priv; \
313  AVFilterLink *inlink = ctx->inputs[0]; \
314  \
315  for (int ch = 0; ch < inlink->channels; ch++) { \
316  ChannelContext *cc = &s->cc[ch]; \
317  ptype *dst = (ptype *)in->extended_data[ch]; \
318  const int bypass = !(av_channel_layout_extract_channel(inlink->channel_layout, ch) & s->channels); \
319  int n = 0; \
320  \
321  while (n < nb_samples) { \
322  ptype gain; \
323  int size; \
324  \
325  next_pi(ctx, cc, bypass); \
326  size = FFMIN(nb_samples - n, cc->pi_size); \
327  av_assert0(size > 0); \
328  gain = cc->gain_state; \
329  consume_pi(cc, size); \
330  for (int i = n; i < n + size; i++) \
331  dst[i] *= gain; \
332  n += size; \
333  } \
334  } \
335 }
336 
337 FILTER_CHANNELS(dbl, double)
338 FILTER_CHANNELS(flt, float)
339 
340 static double lerp(double min, double max, double mix)
341 {
342  return min + (max - min) * mix;
343 }
344 
345 #define FILTER_LINK_CHANNELS(name, ptype) \
346 static void filter_link_channels_## name (AVFilterContext *ctx, \
347  AVFrame *in, int nb_samples) \
348 { \
349  SpeechNormalizerContext *s = ctx->priv; \
350  AVFilterLink *inlink = ctx->inputs[0]; \
351  int n = 0; \
352  \
353  while (n < nb_samples) { \
354  int min_size = nb_samples - n; \
355  int max_size = 1; \
356  ptype gain = s->max_expansion; \
357  \
358  for (int ch = 0; ch < inlink->channels; ch++) { \
359  ChannelContext *cc = &s->cc[ch]; \
360  \
361  cc->bypass = !(av_channel_layout_extract_channel(inlink->channel_layout, ch) & s->channels); \
362  \
363  next_pi(ctx, cc, cc->bypass); \
364  min_size = FFMIN(min_size, cc->pi_size); \
365  max_size = FFMAX(max_size, cc->pi_size); \
366  } \
367  \
368  av_assert0(min_size > 0); \
369  for (int ch = 0; ch < inlink->channels; ch++) { \
370  ChannelContext *cc = &s->cc[ch]; \
371  \
372  if (cc->bypass) \
373  continue; \
374  gain = FFMIN(gain, min_gain(ctx, cc, max_size)); \
375  } \
376  \
377  for (int ch = 0; ch < inlink->channels; ch++) { \
378  ChannelContext *cc = &s->cc[ch]; \
379  ptype *dst = (ptype *)in->extended_data[ch]; \
380  \
381  consume_pi(cc, min_size); \
382  if (cc->bypass) \
383  continue; \
384  \
385  for (int i = n; i < n + min_size; i++) { \
386  ptype g = lerp(s->prev_gain, gain, (i - n) / (double)min_size); \
387  dst[i] *= g; \
388  } \
389  } \
390  \
391  s->prev_gain = gain; \
392  n += min_size; \
393  } \
394 }
395 
396 FILTER_LINK_CHANNELS(dbl, double)
397 FILTER_LINK_CHANNELS(flt, float)
398 
400 {
401  SpeechNormalizerContext *s = ctx->priv;
402  AVFilterLink *outlink = ctx->outputs[0];
403  AVFilterLink *inlink = ctx->inputs[0];
404  int ret;
405 
406  while (s->queue.available > 0) {
407  int min_pi_nb_samples;
408  AVFrame *in;
409 
410  in = ff_bufqueue_peek(&s->queue, 0);
411  if (!in)
412  break;
413 
414  min_pi_nb_samples = available_samples(ctx);
415  if (min_pi_nb_samples < in->nb_samples && !s->eof)
416  break;
417 
418  in = ff_bufqueue_get(&s->queue);
419 
421 
422  s->filter_channels[s->link](ctx, in, in->nb_samples);
423 
424  s->pts = in->pts + in->nb_samples;
425 
426  return ff_filter_frame(outlink, in);
427  }
428 
429  for (int f = 0; f < ff_inlink_queued_frames(inlink); f++) {
430  AVFrame *in;
431 
432  ret = ff_inlink_consume_frame(inlink, &in);
433  if (ret < 0)
434  return ret;
435  if (ret == 0)
436  break;
437 
438  ff_bufqueue_add(ctx, &s->queue, in);
439 
440  for (int ch = 0; ch < inlink->channels; ch++) {
441  ChannelContext *cc = &s->cc[ch];
442 
443  s->analyze_channel(ctx, cc, in->extended_data[ch], in->nb_samples);
444  }
445  }
446 
447  return 1;
448 }
449 
451 {
452  AVFilterLink *inlink = ctx->inputs[0];
453  AVFilterLink *outlink = ctx->outputs[0];
455  int ret, status;
456  int64_t pts;
457 
458  FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
459 
460  ret = filter_frame(ctx);
461  if (ret <= 0)
462  return ret;
463 
464  if (!s->eof && ff_inlink_acknowledge_status(inlink, &status, &pts)) {
465  if (status == AVERROR_EOF)
466  s->eof = 1;
467  }
468 
469  if (s->eof && ff_inlink_queued_samples(inlink) == 0 &&
470  s->queue.available == 0) {
471  ff_outlink_set_status(outlink, AVERROR_EOF, s->pts);
472  return 0;
473  }
474 
475  if (s->queue.available > 0) {
476  AVFrame *in = ff_bufqueue_peek(&s->queue, 0);
477  const int nb_samples = available_samples(ctx);
478 
479  if (nb_samples >= in->nb_samples || s->eof) {
480  ff_filter_set_ready(ctx, 10);
481  return 0;
482  }
483  }
484 
485  FF_FILTER_FORWARD_WANTED(outlink, inlink);
486 
487  return FFERROR_NOT_READY;
488 }
489 
491 {
492  AVFilterContext *ctx = inlink->dst;
494 
495  s->max_period = inlink->sample_rate / 10;
496 
497  s->prev_gain = 1.;
498  s->cc = av_calloc(inlink->channels, sizeof(*s->cc));
499  if (!s->cc)
500  return AVERROR(ENOMEM);
501 
502  for (int ch = 0; ch < inlink->channels; ch++) {
503  ChannelContext *cc = &s->cc[ch];
504 
505  cc->state = -1;
506  cc->gain_state = 1.;
507  }
508 
509  switch (inlink->format) {
510  case AV_SAMPLE_FMT_FLTP:
511  s->analyze_channel = analyze_channel_flt;
512  s->filter_channels[0] = filter_channels_flt;
513  s->filter_channels[1] = filter_link_channels_flt;
514  break;
515  case AV_SAMPLE_FMT_DBLP:
516  s->analyze_channel = analyze_channel_dbl;
517  s->filter_channels[0] = filter_channels_dbl;
518  s->filter_channels[1] = filter_link_channels_dbl;
519  break;
520  default:
521  av_assert0(0);
522  }
523 
524  return 0;
525 }
526 
527 static int process_command(AVFilterContext *ctx, const char *cmd, const char *args,
528  char *res, int res_len, int flags)
529 {
531  int link = s->link;
532  int ret;
533 
534  ret = ff_filter_process_command(ctx, cmd, args, res, res_len, flags);
535  if (ret < 0)
536  return ret;
537  if (link != s->link)
538  s->prev_gain = 1.;
539 
540  return 0;
541 }
542 
544 {
546 
548  av_freep(&s->cc);
549 }
550 
551 static const AVFilterPad inputs[] = {
552  {
553  .name = "default",
554  .type = AVMEDIA_TYPE_AUDIO,
555  .config_props = config_input,
556  },
557  { NULL }
558 };
559 
560 static const AVFilterPad outputs[] = {
561  {
562  .name = "default",
563  .type = AVMEDIA_TYPE_AUDIO,
564  },
565  { NULL }
566 };
567 
569  .name = "speechnorm",
570  .description = NULL_IF_CONFIG_SMALL("Speech Normalizer."),
571  .query_formats = query_formats,
572  .priv_size = sizeof(SpeechNormalizerContext),
573  .priv_class = &speechnorm_class,
574  .activate = activate,
575  .uninit = uninit,
576  .inputs = inputs,
577  .outputs = outputs,
579 };
static AVFrame * ff_bufqueue_get(struct FFBufQueue *queue)
Get the first buffer from the queue and remove it.
Definition: bufferqueue.h:98
static int filter_channels(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
Definition: af_adenorm.c:226
float, planar
Definition: samplefmt.h:69
int ff_inlink_consume_frame(AVFilterLink *link, AVFrame **rframe)
Take a frame from the link&#39;s FIFO and update the link&#39;s stats.
Definition: avfilter.c:1491
#define NULL
Definition: coverity.c:32
This structure describes decoded (raw) audio or video data.
Definition: frame.h:314
AVOption.
Definition: opt.h:248
Main libavfilter public API header.
double, planar
Definition: samplefmt.h:70
GLint GLenum type
Definition: opengl_enc.c:104
int ff_set_common_channel_layouts(AVFilterContext *ctx, AVFilterChannelLayouts *channel_layouts)
A helper for query_formats() which sets all links to the same list of channel layouts/sample rates...
Definition: formats.c:569
return FFERROR_NOT_READY
static void ff_outlink_set_status(AVFilterLink *link, int status, int64_t pts)
Set the status field of a link from the source filter.
Definition: filters.h:189
void(* analyze_channel)(AVFilterContext *ctx, ChannelContext *cc, const uint8_t *srcp, int nb_samples)
Definition: af_speechnorm.c:83
void * av_calloc(size_t nmemb, size_t size)
Non-inlined equivalent of av_mallocz_array().
Definition: mem.c:245
AVFilterFormats * ff_make_format_list(const int *fmts)
Create a list of supported formats.
Definition: formats.c:287
Structure holding the queue.
Definition: bufferqueue.h:49
void(* filter_channels[2])(AVFilterContext *ctx, AVFrame *in, int nb_samples)
Definition: af_speechnorm.c:85
static int process_command(AVFilterContext *ctx, const char *cmd, const char *args, char *res, int res_len, int flags)
const char * name
Pad name.
Definition: internal.h:60
AVFilterLink ** inputs
array of pointers to input links
Definition: avfilter.h:349
PeriodItem pi[MAX_ITEMS]
Definition: af_speechnorm.c:53
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:37
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:1093
uint8_t
#define av_cold
Definition: attributes.h:88
AVOptions.
struct FFBufQueue queue
Definition: af_speechnorm.c:81
#define f(width, name)
Definition: cbs_vp9.c:255
#define FLAGS
Definition: af_speechnorm.c:90
#define AVERROR_EOF
End of file.
Definition: error.h:55
#define max(a, b)
Definition: cuda_runtime.h:33
#define MAX_ITEMS
Definition: af_speechnorm.c:41
ptrdiff_t size
Definition: opengl_enc.c:100
channels
Definition: aptx.h:33
static struct @321 state
#define FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink)
Forward the status on an output link to an input link.
Definition: filters.h:199
#define ANALYZE_CHANNEL(name, ptype, zero)
A filter pad used for either input or output.
Definition: internal.h:54
int ff_inlink_acknowledge_status(AVFilterLink *link, int *rstatus, int64_t *rpts)
Test and acknowledge the change of status on the link.
Definition: avfilter.c:1446
int ff_set_common_formats(AVFilterContext *ctx, AVFilterFormats *formats)
A helper for query_formats() which sets all links to the same list of formats.
Definition: formats.c:588
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification. ...
Definition: internal.h:115
AVFILTER_DEFINE_CLASS(speechnorm)
int ff_filter_process_command(AVFilterContext *ctx, const char *cmd, const char *arg, char *res, int res_len, int flags)
Generic processing of user supplied commands that are set in the same way as the filter options...
Definition: avfilter.c:881
void * priv
private data for use by the filter
Definition: avfilter.h:356
static int available_samples(AVFilterContext *ctx)
simple assert() macros that are a bit more flexible than ISO C assert().
#define FFMAX(a, b)
Definition: common.h:103
static double lerp(double min, double max, double mix)
AVFrame * queue[FF_BUFQUEUE_SIZE]
Definition: bufferqueue.h:50
#define FILTER_LINK_CHANNELS(name, ptype)
#define FFMIN(a, b)
Definition: common.h:105
#define FILTER_CHANNELS(name, ptype)
typedef void(APIENTRY *FF_PFNGLACTIVETEXTUREPROC)(GLenum texture)
static const AVFilterPad inputs[]
int ff_inlink_queued_samples(AVFilterLink *link)
Definition: avfilter.c:1471
static const AVOption speechnorm_options[]
Definition: af_speechnorm.c:92
AVFormatContext * ctx
Definition: movenc.c:48
static int get_pi_samples(PeriodItem *pi, int start, int end, int remain)
#define s(width, name)
Definition: cbs_vp9.c:257
static void invert(float *h, int n)
Definition: asrc_sinc.c:201
static void ff_bufqueue_discard_all(struct FFBufQueue *queue)
Unref and remove all buffers from the queue.
Definition: bufferqueue.h:111
A list of supported channel layouts.
Definition: formats.h:86
static int mix(int c0, int c1)
Definition: 4xm.c:715
AVSampleFormat
Audio sample formats.
Definition: samplefmt.h:58
unsigned short available
number of available buffers
Definition: bufferqueue.h:52
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi-0x80)*(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi-0x80)*(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(const int16_t *) pi >> 8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t,*(const int16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t,*(const int16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(const int32_t *) pi >> 24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t,*(const int32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t,*(const int32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(const float *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(const float *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(const float *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(const double *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(const double *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(const double *) pi *(1U<< 31))))#define SET_CONV_FUNC_GROUP(ofmt, ifmt) static void set_generic_function(AudioConvert *ac){}void ff_audio_convert_free(AudioConvert **ac){if(!*ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);}AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt, int channels, int sample_rate, int apply_map){AudioConvert *ac;int in_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) return NULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method!=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt) > 2){ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc){av_free(ac);return NULL;}return ac;}in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar){ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar?ac->channels:1;}else if(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;else ac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);return ac;}int ff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in){int use_generic=1;int len=in->nb_samples;int p;if(ac->dc){av_log(ac->avr, AV_LOG_TRACE,"%d samples - audio_convert: %s to %s (dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));return ff_convert_dither(ac-> in
Describe the class of an AVClass context structure.
Definition: log.h:67
Filter definition.
Definition: avfilter.h:145
they must not be accessed directly The fifo field contains the frames that are queued in the input for processing by the filter The status_in and status_out fields contains the queued status(EOF or error) of the link
static double next_gain(AVFilterContext *ctx, double pi_max_peak, int bypass, double state)
const char * name
Filter name.
Definition: avfilter.h:149
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a link
size_t ff_inlink_queued_frames(AVFilterLink *link)
Get the number of frames available on the link.
Definition: avfilter.c:1461
static int filter_frame(AVFilterContext *ctx)
AVFilterLink ** outputs
array of pointers to output links
Definition: avfilter.h:353
enum MovChannelLayoutTag * layouts
Definition: mov_chan.c:434
static void next_pi(AVFilterContext *ctx, ChannelContext *cc, int bypass)
AVFilterFormats * ff_all_samplerates(void)
Definition: formats.c:422
AVFilter ff_af_speechnorm
static int64_t pts
int av_frame_make_writable(AVFrame *frame)
Ensure that the frame data is writable, avoiding data copy if possible.
Definition: frame.c:611
#define flags(name, subs,...)
Definition: cbs_av1.c:561
static int query_formats(AVFilterContext *ctx)
The exact code depends on how similar the blocks are and how related they are to the and needs to apply these operations to the correct inlink or outlink if there are several Macros are available to factor that when no extra processing is inlink
#define OFFSET(x)
Definition: af_speechnorm.c:89
void ff_filter_set_ready(AVFilterContext *filter, unsigned priority)
Mark a filter ready and schedule it for activation.
Definition: avfilter.c:193
FF_FILTER_FORWARD_WANTED(outlink, inlink)
static int activate(AVFilterContext *ctx)
static int config_input(AVFilterLink *inlink)
ChannelContext * cc
Definition: af_speechnorm.c:74
A list of supported formats for one end of a filter link.
Definition: formats.h:65
An instance of a filter.
Definition: avfilter.h:341
static av_cold void uninit(AVFilterContext *ctx)
static enum AVSampleFormat sample_fmts[]
Definition: adpcmenc.c:940
static double min_gain(AVFilterContext *ctx, ChannelContext *cc, int max_size)
#define av_freep(p)
static void ff_bufqueue_add(void *log, struct FFBufQueue *queue, AVFrame *buf)
Add a buffer to the queue.
Definition: bufferqueue.h:71
double max_peak
Definition: af_speechnorm.c:47
formats
Definition: signature.h:48
static void consume_pi(ChannelContext *cc, int nb_samples)
internal API functions
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later.That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another.Frame references ownership and permissions
AVFilterChannelLayouts * ff_all_channel_counts(void)
Construct an AVFilterChannelLayouts coding for any channel layout, with known or unknown disposition...
Definition: formats.c:437
uint8_t ** extended_data
pointers to the data planes/channels.
Definition: frame.h:361
float min
int nb_samples
number of audio samples (per channel) described by this frame
Definition: frame.h:380
static const AVFilterPad outputs[]
int ff_set_common_samplerates(AVFilterContext *ctx, AVFilterFormats *samplerates)
Definition: formats.c:576
double pi_max_peak
Definition: af_speechnorm.c:55
static AVFrame * ff_bufqueue_peek(struct FFBufQueue *queue, unsigned index)
Get a buffer from the queue without altering it.
Definition: bufferqueue.h:87