FFmpeg
af_speechnorm.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2020 Paul B Mahol
3  *
4  * Speech Normalizer
5  *
6  * This file is part of FFmpeg.
7  *
8  * FFmpeg is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * FFmpeg is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with FFmpeg; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21  */
22 
23 /**
24  * @file
25  * Speech Normalizer
26  */
27 
28 #include <float.h>
29 
30 #include "libavutil/avassert.h"
32 #include "libavutil/opt.h"
33 
34 #define FF_BUFQUEUE_SIZE (1024)
35 #include "bufferqueue.h"
36 
37 #include "audio.h"
38 #include "avfilter.h"
39 #include "filters.h"
40 #include "internal.h"
41 
42 #define MAX_ITEMS 882000
43 #define MIN_PEAK (1. / 32768.)
44 
45 typedef struct PeriodItem {
46  int size;
47  int type;
48  double max_peak;
49 } PeriodItem;
50 
51 typedef struct ChannelContext {
52  int state;
53  int bypass;
55  double gain_state;
56  double pi_max_peak;
57  int pi_start;
58  int pi_end;
59  int pi_size;
61 
62 typedef struct SpeechNormalizerContext {
63  const AVClass *class;
64 
65  double peak_value;
66  double max_expansion;
69  double raise_amount;
70  double fall_amount;
73  int invert;
74  int link;
75 
77  double prev_gain;
78 
80  int eof;
81  int64_t pts;
82 
83  struct FFBufQueue queue;
84 
86  const uint8_t *srcp, int nb_samples);
88  AVFrame *in, AVFrame *out, int nb_samples);
90 
91 #define OFFSET(x) offsetof(SpeechNormalizerContext, x)
92 #define FLAGS AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_RUNTIME_PARAM
93 
94 static const AVOption speechnorm_options[] = {
95  { "peak", "set the peak value", OFFSET(peak_value), AV_OPT_TYPE_DOUBLE, {.dbl=0.95}, 0.0, 1.0, FLAGS },
96  { "p", "set the peak value", OFFSET(peak_value), AV_OPT_TYPE_DOUBLE, {.dbl=0.95}, 0.0, 1.0, FLAGS },
97  { "expansion", "set the max expansion factor", OFFSET(max_expansion), AV_OPT_TYPE_DOUBLE, {.dbl=2.0}, 1.0, 50.0, FLAGS },
98  { "e", "set the max expansion factor", OFFSET(max_expansion), AV_OPT_TYPE_DOUBLE, {.dbl=2.0}, 1.0, 50.0, FLAGS },
99  { "compression", "set the max compression factor", OFFSET(max_compression), AV_OPT_TYPE_DOUBLE, {.dbl=2.0}, 1.0, 50.0, FLAGS },
100  { "c", "set the max compression factor", OFFSET(max_compression), AV_OPT_TYPE_DOUBLE, {.dbl=2.0}, 1.0, 50.0, FLAGS },
101  { "threshold", "set the threshold value", OFFSET(threshold_value), AV_OPT_TYPE_DOUBLE, {.dbl=0}, 0.0, 1.0, FLAGS },
102  { "t", "set the threshold value", OFFSET(threshold_value), AV_OPT_TYPE_DOUBLE, {.dbl=0}, 0.0, 1.0, FLAGS },
103  { "raise", "set the expansion raising amount", OFFSET(raise_amount), AV_OPT_TYPE_DOUBLE, {.dbl=0.001}, 0.0, 1.0, FLAGS },
104  { "r", "set the expansion raising amount", OFFSET(raise_amount), AV_OPT_TYPE_DOUBLE, {.dbl=0.001}, 0.0, 1.0, FLAGS },
105  { "fall", "set the compression raising amount", OFFSET(fall_amount), AV_OPT_TYPE_DOUBLE, {.dbl=0.001}, 0.0, 1.0, FLAGS },
106  { "f", "set the compression raising amount", OFFSET(fall_amount), AV_OPT_TYPE_DOUBLE, {.dbl=0.001}, 0.0, 1.0, FLAGS },
107  { "channels", "set channels to filter", OFFSET(ch_layout_str), AV_OPT_TYPE_STRING, {.str="all"}, 0, 0, FLAGS },
108  { "h", "set channels to filter", OFFSET(ch_layout_str), AV_OPT_TYPE_STRING, {.str="all"}, 0, 0, FLAGS },
109  { "invert", "set inverted filtering", OFFSET(invert), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
110  { "i", "set inverted filtering", OFFSET(invert), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
111  { "link", "set linked channels filtering", OFFSET(link), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
112  { "l", "set linked channels filtering", OFFSET(link), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
113  { NULL }
114 };
115 
116 AVFILTER_DEFINE_CLASS(speechnorm);
117 
118 static int get_pi_samples(PeriodItem *pi, int start, int end, int remain)
119 {
120  int sum;
121 
122  if (pi[start].type == 0)
123  return remain;
124 
125  sum = remain;
126  while (start != end) {
127  start++;
128  if (start >= MAX_ITEMS)
129  start = 0;
130  if (pi[start].type == 0)
131  break;
132  av_assert1(pi[start].size > 0);
133  sum += pi[start].size;
134  }
135 
136  return sum;
137 }
138 
140 {
141  SpeechNormalizerContext *s = ctx->priv;
142  AVFilterLink *inlink = ctx->inputs[0];
143  int min_pi_nb_samples;
144 
145  min_pi_nb_samples = get_pi_samples(s->cc[0].pi, s->cc[0].pi_start, s->cc[0].pi_end, s->cc[0].pi_size);
146  for (int ch = 1; ch < inlink->ch_layout.nb_channels && min_pi_nb_samples > 0; ch++) {
147  ChannelContext *cc = &s->cc[ch];
148 
149  min_pi_nb_samples = FFMIN(min_pi_nb_samples, get_pi_samples(cc->pi, cc->pi_start, cc->pi_end, cc->pi_size));
150  }
151 
152  return min_pi_nb_samples;
153 }
154 
155 static void consume_pi(ChannelContext *cc, int nb_samples)
156 {
157  if (cc->pi_size >= nb_samples) {
158  cc->pi_size -= nb_samples;
159  } else {
160  av_assert1(0);
161  }
162 }
163 
164 static double next_gain(AVFilterContext *ctx, double pi_max_peak, int bypass, double state)
165 {
166  SpeechNormalizerContext *s = ctx->priv;
167  const double expansion = FFMIN(s->max_expansion, s->peak_value / pi_max_peak);
168  const double compression = 1. / s->max_compression;
169  const int type = s->invert ? pi_max_peak <= s->threshold_value : pi_max_peak >= s->threshold_value;
170 
171  if (bypass) {
172  return 1.;
173  } else if (type) {
174  return FFMIN(expansion, state + s->raise_amount);
175  } else {
176  return FFMIN(expansion, FFMAX(compression, state - s->fall_amount));
177  }
178 }
179 
180 static void next_pi(AVFilterContext *ctx, ChannelContext *cc, int bypass)
181 {
182  av_assert1(cc->pi_size >= 0);
183  if (cc->pi_size == 0) {
184  SpeechNormalizerContext *s = ctx->priv;
185  int start = cc->pi_start;
186 
187  av_assert1(cc->pi[start].size > 0);
188  av_assert0(cc->pi[start].type > 0 || s->eof);
189  cc->pi_size = cc->pi[start].size;
190  cc->pi_max_peak = cc->pi[start].max_peak;
191  av_assert1(cc->pi_start != cc->pi_end || s->eof);
192  start++;
193  if (start >= MAX_ITEMS)
194  start = 0;
195  cc->pi_start = start;
196  cc->gain_state = next_gain(ctx, cc->pi_max_peak, bypass, cc->gain_state);
197  }
198 }
199 
200 static double min_gain(AVFilterContext *ctx, ChannelContext *cc, int max_size)
201 {
202  SpeechNormalizerContext *s = ctx->priv;
203  double min_gain = s->max_expansion;
204  double gain_state = cc->gain_state;
205  int size = cc->pi_size;
206  int idx = cc->pi_start;
207 
208  min_gain = FFMIN(min_gain, gain_state);
209  while (size <= max_size) {
210  if (idx == cc->pi_end)
211  break;
212  gain_state = next_gain(ctx, cc->pi[idx].max_peak, 0, gain_state);
213  min_gain = FFMIN(min_gain, gain_state);
214  size += cc->pi[idx].size;
215  idx++;
216  if (idx >= MAX_ITEMS)
217  idx = 0;
218  }
219 
220  return min_gain;
221 }
222 
223 #define ANALYZE_CHANNEL(name, ptype, zero, min_peak) \
224 static void analyze_channel_## name (AVFilterContext *ctx, ChannelContext *cc, \
225  const uint8_t *srcp, int nb_samples) \
226 { \
227  SpeechNormalizerContext *s = ctx->priv; \
228  const ptype *src = (const ptype *)srcp; \
229  const int max_period = s->max_period; \
230  PeriodItem *pi = (PeriodItem *)&cc->pi; \
231  int pi_end = cc->pi_end; \
232  int n = 0; \
233  \
234  if (cc->state < 0) \
235  cc->state = src[0] >= zero; \
236  \
237  while (n < nb_samples) { \
238  ptype new_max_peak; \
239  int new_size; \
240  \
241  if ((cc->state != (src[n] >= zero)) || \
242  (pi[pi_end].size > max_period)) { \
243  ptype max_peak = pi[pi_end].max_peak; \
244  int state = cc->state; \
245  \
246  cc->state = src[n] >= zero; \
247  av_assert1(pi[pi_end].size > 0); \
248  if (max_peak >= min_peak || \
249  pi[pi_end].size > max_period) { \
250  pi[pi_end].type = 1; \
251  pi_end++; \
252  if (pi_end >= MAX_ITEMS) \
253  pi_end = 0; \
254  if (cc->state != state) \
255  pi[pi_end].max_peak = DBL_MIN; \
256  else \
257  pi[pi_end].max_peak = max_peak; \
258  pi[pi_end].type = 0; \
259  pi[pi_end].size = 0; \
260  av_assert1(pi_end != cc->pi_start); \
261  } \
262  } \
263  \
264  new_max_peak = pi[pi_end].max_peak; \
265  new_size = pi[pi_end].size; \
266  if (cc->state) { \
267  while (src[n] >= zero) { \
268  new_max_peak = FFMAX(new_max_peak, src[n]); \
269  new_size++; \
270  n++; \
271  if (n >= nb_samples) \
272  break; \
273  } \
274  } else { \
275  while (src[n] < zero) { \
276  new_max_peak = FFMAX(new_max_peak, -src[n]); \
277  new_size++; \
278  n++; \
279  if (n >= nb_samples) \
280  break; \
281  } \
282  } \
283  \
284  pi[pi_end].max_peak = new_max_peak; \
285  pi[pi_end].size = new_size; \
286  } \
287  cc->pi_end = pi_end; \
288 }
289 
290 ANALYZE_CHANNEL(dbl, double, 0.0, MIN_PEAK)
291 ANALYZE_CHANNEL(flt, float, 0.f, (float)MIN_PEAK)
292 
293 #define FILTER_CHANNELS(name, ptype) \
294 static void filter_channels_## name (AVFilterContext *ctx, \
295  AVFrame *in, AVFrame *out, int nb_samples) \
296 { \
297  SpeechNormalizerContext *s = ctx->priv; \
298  AVFilterLink *inlink = ctx->inputs[0]; \
299  \
300  for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) { \
301  ChannelContext *cc = &s->cc[ch]; \
302  const ptype *src = (const ptype *)in->extended_data[ch]; \
303  ptype *dst = (ptype *)out->extended_data[ch]; \
304  enum AVChannel channel = av_channel_layout_channel_from_index(&inlink->ch_layout, ch); \
305  const int bypass = av_channel_layout_index_from_channel(&s->ch_layout, channel) < 0; \
306  int n = 0; \
307  \
308  while (n < nb_samples) { \
309  ptype gain; \
310  int size; \
311  \
312  next_pi(ctx, cc, bypass); \
313  size = FFMIN(nb_samples - n, cc->pi_size); \
314  av_assert1(size > 0); \
315  gain = cc->gain_state; \
316  consume_pi(cc, size); \
317  for (int i = n; !ctx->is_disabled && i < n + size; i++) \
318  dst[i] = src[i] * gain; \
319  n += size; \
320  } \
321  } \
322 }
323 
324 FILTER_CHANNELS(dbl, double)
325 FILTER_CHANNELS(flt, float)
326 
327 static double dlerp(double min, double max, double mix)
328 {
329  return min + (max - min) * mix;
330 }
331 
332 static float flerp(float min, float max, float mix)
333 {
334  return min + (max - min) * mix;
335 }
336 
337 #define FILTER_LINK_CHANNELS(name, ptype, tlerp) \
338 static void filter_link_channels_## name (AVFilterContext *ctx, \
339  AVFrame *in, AVFrame *out, \
340  int nb_samples) \
341 { \
342  SpeechNormalizerContext *s = ctx->priv; \
343  AVFilterLink *inlink = ctx->inputs[0]; \
344  int n = 0; \
345  \
346  while (n < nb_samples) { \
347  int min_size = nb_samples - n; \
348  int max_size = 1; \
349  ptype gain = s->max_expansion; \
350  \
351  for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) { \
352  ChannelContext *cc = &s->cc[ch]; \
353  \
354  enum AVChannel channel = av_channel_layout_channel_from_index(&inlink->ch_layout, ch); \
355  cc->bypass = av_channel_layout_index_from_channel(&s->ch_layout, channel) < 0; \
356  \
357  next_pi(ctx, cc, cc->bypass); \
358  min_size = FFMIN(min_size, cc->pi_size); \
359  max_size = FFMAX(max_size, cc->pi_size); \
360  } \
361  \
362  av_assert1(min_size > 0); \
363  for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) { \
364  ChannelContext *cc = &s->cc[ch]; \
365  \
366  if (cc->bypass) \
367  continue; \
368  gain = FFMIN(gain, min_gain(ctx, cc, max_size)); \
369  } \
370  \
371  for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) { \
372  ChannelContext *cc = &s->cc[ch]; \
373  const ptype *src = (const ptype *)in->extended_data[ch]; \
374  ptype *dst = (ptype *)out->extended_data[ch]; \
375  \
376  consume_pi(cc, min_size); \
377  if (cc->bypass) \
378  continue; \
379  \
380  for (int i = n; !ctx->is_disabled && i < n + min_size; i++) { \
381  ptype g = tlerp(s->prev_gain, gain, (i - n) / (ptype)min_size); \
382  dst[i] = src[i] * g; \
383  } \
384  } \
385  \
386  s->prev_gain = gain; \
387  n += min_size; \
388  } \
389 }
390 
391 FILTER_LINK_CHANNELS(dbl, double, dlerp)
392 FILTER_LINK_CHANNELS(flt, float, flerp)
393 
395 {
396  SpeechNormalizerContext *s = ctx->priv;
397  AVFilterLink *outlink = ctx->outputs[0];
398  AVFilterLink *inlink = ctx->inputs[0];
399  int ret;
400 
401  while (s->queue.available > 0) {
402  int min_pi_nb_samples;
403  AVFrame *in, *out;
404 
405  in = ff_bufqueue_peek(&s->queue, 0);
406  if (!in)
407  break;
408 
409  min_pi_nb_samples = available_samples(ctx);
410  if (min_pi_nb_samples < in->nb_samples && !s->eof)
411  break;
412 
413  in = ff_bufqueue_get(&s->queue);
414 
415  if (av_frame_is_writable(in)) {
416  out = in;
417  } else {
418  out = ff_get_audio_buffer(outlink, in->nb_samples);
419  if (!out) {
420  av_frame_free(&in);
421  return AVERROR(ENOMEM);
422  }
424  }
425 
426  s->filter_channels[s->link](ctx, in, out, in->nb_samples);
427 
428  s->pts = in->pts + av_rescale_q(in->nb_samples, av_make_q(1, outlink->sample_rate),
429  outlink->time_base);
430 
431  if (out != in)
432  av_frame_free(&in);
433  return ff_filter_frame(outlink, out);
434  }
435 
436  for (int f = 0; f < ff_inlink_queued_frames(inlink); f++) {
437  AVFrame *in;
438 
440  if (ret < 0)
441  return ret;
442  if (ret == 0)
443  break;
444 
445  ff_bufqueue_add(ctx, &s->queue, in);
446 
447  for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
448  ChannelContext *cc = &s->cc[ch];
449 
450  s->analyze_channel(ctx, cc, in->extended_data[ch], in->nb_samples);
451  }
452  }
453 
454  return 1;
455 }
456 
458 {
459  AVFilterLink *inlink = ctx->inputs[0];
460  AVFilterLink *outlink = ctx->outputs[0];
461  SpeechNormalizerContext *s = ctx->priv;
462  int ret, status;
463  int64_t pts;
464 
465  ret = av_channel_layout_copy(&s->ch_layout, &inlink->ch_layout);
466  if (ret < 0)
467  return ret;
468  if (strcmp(s->ch_layout_str, "all"))
469  av_channel_layout_from_string(&s->ch_layout,
470  s->ch_layout_str);
471 
473 
474  ret = filter_frame(ctx);
475  if (ret <= 0)
476  return ret;
477 
478  if (!s->eof && ff_inlink_acknowledge_status(inlink, &status, &pts)) {
479  if (status == AVERROR_EOF)
480  s->eof = 1;
481  }
482 
483  if (s->eof && ff_inlink_queued_samples(inlink) == 0 &&
484  s->queue.available == 0) {
485  ff_outlink_set_status(outlink, AVERROR_EOF, s->pts);
486  return 0;
487  }
488 
489  if (s->queue.available > 0) {
490  AVFrame *in = ff_bufqueue_peek(&s->queue, 0);
491  const int nb_samples = available_samples(ctx);
492 
493  if (nb_samples >= in->nb_samples || s->eof) {
495  return 0;
496  }
497  }
498 
500 
501  return FFERROR_NOT_READY;
502 }
503 
505 {
506  AVFilterContext *ctx = inlink->dst;
507  SpeechNormalizerContext *s = ctx->priv;
508 
509  s->max_period = inlink->sample_rate / 10;
510 
511  s->prev_gain = 1.;
512  s->cc = av_calloc(inlink->ch_layout.nb_channels, sizeof(*s->cc));
513  if (!s->cc)
514  return AVERROR(ENOMEM);
515 
516  for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
517  ChannelContext *cc = &s->cc[ch];
518 
519  cc->state = -1;
520  cc->gain_state = 1.;
521  }
522 
523  switch (inlink->format) {
524  case AV_SAMPLE_FMT_FLTP:
525  s->analyze_channel = analyze_channel_flt;
526  s->filter_channels[0] = filter_channels_flt;
527  s->filter_channels[1] = filter_link_channels_flt;
528  break;
529  case AV_SAMPLE_FMT_DBLP:
530  s->analyze_channel = analyze_channel_dbl;
531  s->filter_channels[0] = filter_channels_dbl;
532  s->filter_channels[1] = filter_link_channels_dbl;
533  break;
534  default:
535  av_assert1(0);
536  }
537 
538  return 0;
539 }
540 
541 static int process_command(AVFilterContext *ctx, const char *cmd, const char *args,
542  char *res, int res_len, int flags)
543 {
544  SpeechNormalizerContext *s = ctx->priv;
545  int link = s->link;
546  int ret;
547 
548  ret = ff_filter_process_command(ctx, cmd, args, res, res_len, flags);
549  if (ret < 0)
550  return ret;
551  if (link != s->link)
552  s->prev_gain = 1.;
553 
554  return 0;
555 }
556 
558 {
559  SpeechNormalizerContext *s = ctx->priv;
560 
561  ff_bufqueue_discard_all(&s->queue);
562  av_channel_layout_uninit(&s->ch_layout);
563  av_freep(&s->cc);
564 }
565 
566 static const AVFilterPad inputs[] = {
567  {
568  .name = "default",
569  .type = AVMEDIA_TYPE_AUDIO,
570  .config_props = config_input,
571  },
572 };
573 
574 static const AVFilterPad outputs[] = {
575  {
576  .name = "default",
577  .type = AVMEDIA_TYPE_AUDIO,
578  },
579 };
580 
582  .name = "speechnorm",
583  .description = NULL_IF_CONFIG_SMALL("Speech Normalizer."),
584  .priv_size = sizeof(SpeechNormalizerContext),
585  .priv_class = &speechnorm_class,
586  .activate = activate,
587  .uninit = uninit,
592  .process_command = process_command,
593 };
inputs
static const AVFilterPad inputs[]
Definition: af_speechnorm.c:566
ff_get_audio_buffer
AVFrame * ff_get_audio_buffer(AVFilterLink *link, int nb_samples)
Request an audio samples buffer with a specific set of permissions.
Definition: audio.c:100
AV_SAMPLE_FMT_FLTP
@ AV_SAMPLE_FMT_FLTP
float, planar
Definition: samplefmt.h:66
get_pi_samples
static int get_pi_samples(PeriodItem *pi, int start, int end, int remain)
Definition: af_speechnorm.c:118
status
they must not be accessed directly The fifo field contains the frames that are queued in the input for processing by the filter The status_in and status_out fields contains the queued status(EOF or error) of the link
mix
static int mix(int c0, int c1)
Definition: 4xm.c:717
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
opt.h
out
FILE * out
Definition: movenc.c:54
consume_pi
static void consume_pi(ChannelContext *cc, int nb_samples)
Definition: af_speechnorm.c:155
ff_filter_frame
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:999
PeriodItem::type
int type
Definition: af_speechnorm.c:47
AVERROR_EOF
#define AVERROR_EOF
End of file.
Definition: error.h:57
FFERROR_NOT_READY
return FFERROR_NOT_READY
Definition: filter_design.txt:204
process_command
static int process_command(AVFilterContext *ctx, const char *cmd, const char *args, char *res, int res_len, int flags)
Definition: af_speechnorm.c:541
inlink
The exact code depends on how similar the blocks are and how related they are to the and needs to apply these operations to the correct inlink or outlink if there are several Macros are available to factor that when no extra processing is inlink
Definition: filter_design.txt:212
av_frame_free
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
Definition: frame.c:111
AVFrame
This structure describes decoded (raw) audio or video data.
Definition: frame.h:325
SpeechNormalizerContext::ch_layout
AVChannelLayout ch_layout
Definition: af_speechnorm.c:72
AVOption
AVOption.
Definition: opt.h:251
PeriodItem
Definition: af_speechnorm.c:45
SpeechNormalizerContext::peak_value
double peak_value
Definition: af_speechnorm.c:65
float.h
SpeechNormalizerContext::filter_channels
void(* filter_channels[2])(AVFilterContext *ctx, AVFrame *in, AVFrame *out, int nb_samples)
Definition: af_speechnorm.c:87
max
#define max(a, b)
Definition: cuda_runtime.h:33
FFMAX
#define FFMAX(a, b)
Definition: macros.h:47
AVFilter::name
const char * name
Filter name.
Definition: avfilter.h:175
ChannelContext::gain_state
double gain_state
Definition: af_speechnorm.c:55
FF_FILTER_FORWARD_STATUS_BACK
#define FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink)
Forward the status on an output link to an input link.
Definition: filters.h:199
SpeechNormalizerContext::link
int link
Definition: af_speechnorm.c:74
MAX_ITEMS
#define MAX_ITEMS
Definition: af_speechnorm.c:42
ff_bufqueue_get
static AVFrame * ff_bufqueue_get(struct FFBufQueue *queue)
Get the first buffer from the queue and remove it.
Definition: bufferqueue.h:98
SpeechNormalizerContext::threshold_value
double threshold_value
Definition: af_speechnorm.c:68
av_channel_layout_copy
int av_channel_layout_copy(AVChannelLayout *dst, const AVChannelLayout *src)
Make a copy of a channel layout.
Definition: channel_layout.c:637
ff_inlink_consume_frame
int ff_inlink_consume_frame(AVFilterLink *link, AVFrame **rframe)
Take a frame from the link's FIFO and update the link's stats.
Definition: avfilter.c:1394
dlerp
static double dlerp(double min, double max, double mix)
Definition: af_speechnorm.c:327
type
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf type
Definition: writing_filters.txt:86
pts
static int64_t pts
Definition: transcode_aac.c:654
AVFILTER_DEFINE_CLASS
AVFILTER_DEFINE_CLASS(speechnorm)
AVFilterPad
A filter pad used for either input or output.
Definition: internal.h:49
avassert.h
av_cold
#define av_cold
Definition: attributes.h:90
SpeechNormalizerContext::eof
int eof
Definition: af_speechnorm.c:80
ff_outlink_set_status
static void ff_outlink_set_status(AVFilterLink *link, int status, int64_t pts)
Set the status field of a link from the source filter.
Definition: filters.h:189
s
#define s(width, name)
Definition: cbs_vp9.c:256
available_samples
static int available_samples(AVFilterContext *ctx)
Definition: af_speechnorm.c:139
SpeechNormalizerContext::raise_amount
double raise_amount
Definition: af_speechnorm.c:69
AV_OPT_TYPE_DOUBLE
@ AV_OPT_TYPE_DOUBLE
Definition: opt.h:227
AVMEDIA_TYPE_AUDIO
@ AVMEDIA_TYPE_AUDIO
Definition: avutil.h:202
av_assert0
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:37
filters.h
ctx
AVFormatContext * ctx
Definition: movenc.c:48
av_rescale_q
int64_t av_rescale_q(int64_t a, AVRational bq, AVRational cq)
Rescale a 64-bit integer by 2 rational numbers.
Definition: mathematics.c:142
SpeechNormalizerContext::cc
ChannelContext * cc
Definition: af_speechnorm.c:76
outputs
static const AVFilterPad outputs[]
Definition: af_speechnorm.c:574
FILTER_INPUTS
#define FILTER_INPUTS(array)
Definition: internal.h:190
link
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a link
Definition: filter_design.txt:23
min_gain
static double min_gain(AVFilterContext *ctx, ChannelContext *cc, int max_size)
Definition: af_speechnorm.c:200
AVClass
Describe the class of an AVClass context structure.
Definition: log.h:66
NULL
#define NULL
Definition: coverity.c:32
av_frame_copy_props
int av_frame_copy_props(AVFrame *dst, const AVFrame *src)
Copy only "metadata" fields from src to dst.
Definition: frame.c:596
ff_bufqueue_discard_all
static void ff_bufqueue_discard_all(struct FFBufQueue *queue)
Unref and remove all buffers from the queue.
Definition: bufferqueue.h:111
ChannelContext::pi_max_peak
double pi_max_peak
Definition: af_speechnorm.c:56
PeriodItem::size
int size
Definition: af_speechnorm.c:46
ChannelContext::pi_end
int pi_end
Definition: af_speechnorm.c:58
ff_inlink_acknowledge_status
int ff_inlink_acknowledge_status(AVFilterLink *link, int *rstatus, int64_t *rpts)
Test and acknowledge the change of status on the link.
Definition: avfilter.c:1348
ff_inlink_queued_frames
size_t ff_inlink_queued_frames(AVFilterLink *link)
Get the number of frames available on the link.
Definition: avfilter.c:1363
av_channel_layout_uninit
void av_channel_layout_uninit(AVChannelLayout *channel_layout)
Free any allocated data in the channel layout and reset the channel count to 0.
Definition: channel_layout.c:630
bufferqueue.h
f
f
Definition: af_crystalizer.c:122
NULL_IF_CONFIG_SMALL
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
Definition: internal.h:117
AVChannelLayout
An AVChannelLayout holds information about the channel layout of audio data.
Definition: channel_layout.h:290
size
int size
Definition: twinvq_data.h:10344
av_make_q
static AVRational av_make_q(int num, int den)
Create an AVRational.
Definition: rational.h:71
state
static struct @327 state
av_frame_is_writable
int av_frame_is_writable(AVFrame *frame)
Check if the frame data is writable.
Definition: frame.c:523
SpeechNormalizerContext::max_period
int max_period
Definition: af_speechnorm.c:79
ff_filter_process_command
int ff_filter_process_command(AVFilterContext *ctx, const char *cmd, const char *arg, char *res, int res_len, int flags)
Generic processing of user supplied commands that are set in the same way as the filter options.
Definition: avfilter.c:863
FF_FILTER_FORWARD_WANTED
FF_FILTER_FORWARD_WANTED(outlink, inlink)
ff_bufqueue_add
static void ff_bufqueue_add(void *log, struct FFBufQueue *queue, AVFrame *buf)
Add a buffer to the queue.
Definition: bufferqueue.h:71
FILTER_CHANNELS
#define FILTER_CHANNELS(name, ptype)
Definition: af_speechnorm.c:293
next_gain
static double next_gain(AVFilterContext *ctx, double pi_max_peak, int bypass, double state)
Definition: af_speechnorm.c:164
SpeechNormalizerContext
Definition: af_speechnorm.c:62
internal.h
AVFrame::nb_samples
int nb_samples
number of audio samples (per channel) described by this frame
Definition: frame.h:405
ff_bufqueue_peek
static AVFrame * ff_bufqueue_peek(struct FFBufQueue *queue, unsigned index)
Get a buffer from the queue without altering it.
Definition: bufferqueue.h:87
FFBufQueue
Structure holding the queue.
Definition: bufferqueue.h:49
invert
static void invert(float *h, int n)
Definition: asrc_sinc.c:198
AVFrame::extended_data
uint8_t ** extended_data
pointers to the data planes/channels.
Definition: frame.h:386
av_assert1
#define av_assert1(cond)
assert() equivalent, that does not lie in speed critical code.
Definition: avassert.h:53
FLAGS
#define FLAGS
Definition: af_speechnorm.c:92
FFMIN
#define FFMIN(a, b)
Definition: macros.h:49
SpeechNormalizerContext::invert
int invert
Definition: af_speechnorm.c:73
SpeechNormalizerContext::queue
struct FFBufQueue queue
Definition: af_speechnorm.c:83
AVFilterPad::name
const char * name
Pad name.
Definition: internal.h:55
ff_inlink_queued_samples
int ff_inlink_queued_samples(AVFilterLink *link)
Definition: avfilter.c:1373
ANALYZE_CHANNEL
#define ANALYZE_CHANNEL(name, ptype, zero, min_peak)
Definition: af_speechnorm.c:223
ChannelContext::pi_size
int pi_size
Definition: af_speechnorm.c:59
av_calloc
void * av_calloc(size_t nmemb, size_t size)
Definition: mem.c:272
SpeechNormalizerContext::fall_amount
double fall_amount
Definition: af_speechnorm.c:70
activate
static int activate(AVFilterContext *ctx)
Definition: af_speechnorm.c:457
AVFilter
Filter definition.
Definition: avfilter.h:171
ret
ret
Definition: filter_design.txt:187
PeriodItem::max_peak
double max_peak
Definition: af_speechnorm.c:48
MIN_PEAK
#define MIN_PEAK
Definition: af_speechnorm.c:43
ChannelContext::bypass
int bypass
Definition: af_speechnorm.c:53
uninit
static av_cold void uninit(AVFilterContext *ctx)
Definition: af_speechnorm.c:557
config_input
static int config_input(AVFilterLink *inlink)
Definition: af_speechnorm.c:504
channel_layout.h
av_channel_layout_from_string
int av_channel_layout_from_string(AVChannelLayout *channel_layout, const char *str)
Initialize a channel layout from a given string description.
Definition: channel_layout.c:402
next_pi
static void next_pi(AVFilterContext *ctx, ChannelContext *cc, int bypass)
Definition: af_speechnorm.c:180
avfilter.h
ChannelContext::pi_start
int pi_start
Definition: af_speechnorm.c:57
AV_SAMPLE_FMT_DBLP
@ AV_SAMPLE_FMT_DBLP
double, planar
Definition: samplefmt.h:67
ChannelContext
Definition: hcadec.c:32
ChannelContext::state
int state
Definition: af_speechnorm.c:52
SpeechNormalizerContext::max_compression
double max_compression
Definition: af_speechnorm.c:67
ChannelContext::pi
PeriodItem pi[MAX_ITEMS]
Definition: af_speechnorm.c:54
AVFilterContext
An instance of a filter.
Definition: avfilter.h:408
filter_frame
static int filter_frame(AVFilterContext *ctx)
Definition: af_speechnorm.c:394
FILTER_LINK_CHANNELS
#define FILTER_LINK_CHANNELS(name, ptype, tlerp)
Definition: af_speechnorm.c:337
flerp
static float flerp(float min, float max, float mix)
Definition: af_speechnorm.c:332
audio.h
AV_OPT_TYPE_BOOL
@ AV_OPT_TYPE_BOOL
Definition: opt.h:244
FILTER_OUTPUTS
#define FILTER_OUTPUTS(array)
Definition: internal.h:191
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:34
SpeechNormalizerContext::ch_layout_str
char * ch_layout_str
Definition: af_speechnorm.c:71
AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL
#define AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL
Same as AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC, except that the filter will have its filter_frame() c...
Definition: avfilter.h:160
OFFSET
#define OFFSET(x)
Definition: af_speechnorm.c:91
flags
#define flags(name, subs,...)
Definition: cbs_av1.c:561
AV_OPT_TYPE_STRING
@ AV_OPT_TYPE_STRING
Definition: opt.h:229
SpeechNormalizerContext::pts
int64_t pts
Definition: af_speechnorm.c:81
SpeechNormalizerContext::max_expansion
double max_expansion
Definition: af_speechnorm.c:66
speechnorm_options
static const AVOption speechnorm_options[]
Definition: af_speechnorm.c:94
SpeechNormalizerContext::analyze_channel
void(* analyze_channel)(AVFilterContext *ctx, ChannelContext *cc, const uint8_t *srcp, int nb_samples)
Definition: af_speechnorm.c:85
ff_af_speechnorm
const AVFilter ff_af_speechnorm
Definition: af_speechnorm.c:581
FILTER_SAMPLEFMTS
#define FILTER_SAMPLEFMTS(...)
Definition: internal.h:178
ff_filter_set_ready
void ff_filter_set_ready(AVFilterContext *filter, unsigned priority)
Mark a filter ready and schedule it for activation.
Definition: avfilter.c:191
min
float min
Definition: vorbis_enc_data.h:429
SpeechNormalizerContext::prev_gain
double prev_gain
Definition: af_speechnorm.c:77