FFmpeg
Data Structures | Macros | Functions | Variables
af_speechnorm.c File Reference
#include <float.h>
#include "libavutil/avassert.h"
#include "libavutil/channel_layout.h"
#include "libavutil/opt.h"
#include "bufferqueue.h"
#include "audio.h"
#include "avfilter.h"
#include "filters.h"
#include "internal.h"

Go to the source code of this file.

Data Structures

struct  PeriodItem
 
struct  ChannelContext
 
struct  SpeechNormalizerContext
 

Macros

#define FF_BUFQUEUE_SIZE   (1024)
 
#define MAX_ITEMS   882000
 
#define MIN_PEAK   (1. / 32768.)
 
#define OFFSET(x)   offsetof(SpeechNormalizerContext, x)
 
#define FLAGS   AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_RUNTIME_PARAM
 
#define ANALYZE_CHANNEL(name, ptype, zero, min_peak)
 
#define FILTER_CHANNELS(name, ptype)
 
#define FILTER_LINK_CHANNELS(name, ptype, tlerp)
 

Functions

 AVFILTER_DEFINE_CLASS (speechnorm)
 
static int get_pi_samples (PeriodItem *pi, int start, int end, int remain)
 
static int available_samples (AVFilterContext *ctx)
 
static void consume_pi (ChannelContext *cc, int nb_samples)
 
static double next_gain (AVFilterContext *ctx, double pi_max_peak, int bypass, double state, double pi_rms_sum, int pi_size)
 
static void next_pi (AVFilterContext *ctx, ChannelContext *cc, int bypass)
 
static double min_gain (AVFilterContext *ctx, ChannelContext *cc, int max_size)
 
static double dlerp (double min, double max, double mix)
 
static float flerp (float min, float max, float mix)
 
static int filter_frame (AVFilterContext *ctx)
 
static int activate (AVFilterContext *ctx)
 
static int config_input (AVFilterLink *inlink)
 
static int process_command (AVFilterContext *ctx, const char *cmd, const char *args, char *res, int res_len, int flags)
 
static av_cold void uninit (AVFilterContext *ctx)
 

Variables

static const AVOption speechnorm_options []
 
static const AVFilterPad inputs []
 
const AVFilter ff_af_speechnorm
 

Detailed Description

Speech Normalizer

Definition in file af_speechnorm.c.

Macro Definition Documentation

◆ FF_BUFQUEUE_SIZE

#define FF_BUFQUEUE_SIZE   (1024)

Definition at line 34 of file af_speechnorm.c.

◆ MAX_ITEMS

#define MAX_ITEMS   882000

Definition at line 42 of file af_speechnorm.c.

◆ MIN_PEAK

#define MIN_PEAK   (1. / 32768.)

Definition at line 43 of file af_speechnorm.c.

◆ OFFSET

#define OFFSET (   x)    offsetof(SpeechNormalizerContext, x)

Definition at line 94 of file af_speechnorm.c.

◆ FLAGS

Definition at line 95 of file af_speechnorm.c.

◆ ANALYZE_CHANNEL

#define ANALYZE_CHANNEL (   name,
  ptype,
  zero,
  min_peak 
)

Definition at line 235 of file af_speechnorm.c.

◆ FILTER_CHANNELS

#define FILTER_CHANNELS (   name,
  ptype 
)
Value:
static void filter_channels_## name (AVFilterContext *ctx, \
AVFrame *in, AVFrame *out, int nb_samples) \
{ \
SpeechNormalizerContext *s = ctx->priv; \
AVFilterLink *inlink = ctx->inputs[0]; \
for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) { \
ChannelContext *cc = &s->cc[ch]; \
const ptype *src = (const ptype *)in->extended_data[ch]; \
ptype *dst = (ptype *)out->extended_data[ch]; \
const int bypass = av_channel_layout_index_from_channel(&s->ch_layout, channel) < 0; \
int n = 0; \
\
while (n < nb_samples) { \
ptype gain; \
int size; \
next_pi(ctx, cc, bypass); \
size = FFMIN(nb_samples - n, cc->pi_size); \
av_assert1(size > 0); \
gain = cc->gain_state; \
consume_pi(cc, size); \
for (int i = n; !ctx->is_disabled && i < n + size; i++) \
dst[i] = src[i] * gain; \
n += size; \
} \
} \
}

Definition at line 314 of file af_speechnorm.c.

◆ FILTER_LINK_CHANNELS

#define FILTER_LINK_CHANNELS (   name,
  ptype,
  tlerp 
)

Definition at line 358 of file af_speechnorm.c.

Function Documentation

◆ AVFILTER_DEFINE_CLASS()

AVFILTER_DEFINE_CLASS ( speechnorm  )

◆ get_pi_samples()

static int get_pi_samples ( PeriodItem pi,
int  start,
int  end,
int  remain 
)
static

Definition at line 123 of file af_speechnorm.c.

Referenced by available_samples().

◆ available_samples()

static int available_samples ( AVFilterContext ctx)
static

Definition at line 144 of file af_speechnorm.c.

Referenced by activate(), and filter_frame().

◆ consume_pi()

static void consume_pi ( ChannelContext cc,
int  nb_samples 
)
static

Definition at line 160 of file af_speechnorm.c.

◆ next_gain()

static double next_gain ( AVFilterContext ctx,
double  pi_max_peak,
int  bypass,
double  state,
double  pi_rms_sum,
int  pi_size 
)
static

Definition at line 169 of file af_speechnorm.c.

Referenced by min_gain(), and next_pi().

◆ next_pi()

static void next_pi ( AVFilterContext ctx,
ChannelContext cc,
int  bypass 
)
static

Definition at line 189 of file af_speechnorm.c.

◆ min_gain()

static double min_gain ( AVFilterContext ctx,
ChannelContext cc,
int  max_size 
)
static

Definition at line 211 of file af_speechnorm.c.

◆ dlerp()

static double dlerp ( double  min,
double  max,
double  mix 
)
static

Definition at line 348 of file af_speechnorm.c.

◆ flerp()

static float flerp ( float  min,
float  max,
float  mix 
)
static

Definition at line 353 of file af_speechnorm.c.

◆ filter_frame()

static int filter_frame ( AVFilterContext ctx)
static

Definition at line 413 of file af_speechnorm.c.

Referenced by activate().

◆ activate()

static int activate ( AVFilterContext ctx)
static

Definition at line 476 of file af_speechnorm.c.

◆ config_input()

static int config_input ( AVFilterLink inlink)
static

Definition at line 523 of file af_speechnorm.c.

◆ process_command()

static int process_command ( AVFilterContext ctx,
const char *  cmd,
const char *  args,
char *  res,
int  res_len,
int  flags 
)
static

Definition at line 560 of file af_speechnorm.c.

◆ uninit()

static av_cold void uninit ( AVFilterContext ctx)
static

Definition at line 576 of file af_speechnorm.c.

Variable Documentation

◆ speechnorm_options

const AVOption speechnorm_options[]
static
Initial value:
= {
{ "peak", "set the peak value", OFFSET(peak_value), AV_OPT_TYPE_DOUBLE, {.dbl=0.95}, 0.0, 1.0, FLAGS },
{ "p", "set the peak value", OFFSET(peak_value), AV_OPT_TYPE_DOUBLE, {.dbl=0.95}, 0.0, 1.0, FLAGS },
{ "expansion", "set the max expansion factor", OFFSET(max_expansion), AV_OPT_TYPE_DOUBLE, {.dbl=2.0}, 1.0, 50.0, FLAGS },
{ "e", "set the max expansion factor", OFFSET(max_expansion), AV_OPT_TYPE_DOUBLE, {.dbl=2.0}, 1.0, 50.0, FLAGS },
{ "compression", "set the max compression factor", OFFSET(max_compression), AV_OPT_TYPE_DOUBLE, {.dbl=2.0}, 1.0, 50.0, FLAGS },
{ "c", "set the max compression factor", OFFSET(max_compression), AV_OPT_TYPE_DOUBLE, {.dbl=2.0}, 1.0, 50.0, FLAGS },
{ "threshold", "set the threshold value", OFFSET(threshold_value), AV_OPT_TYPE_DOUBLE, {.dbl=0}, 0.0, 1.0, FLAGS },
{ "t", "set the threshold value", OFFSET(threshold_value), AV_OPT_TYPE_DOUBLE, {.dbl=0}, 0.0, 1.0, FLAGS },
{ "raise", "set the expansion raising amount", OFFSET(raise_amount), AV_OPT_TYPE_DOUBLE, {.dbl=0.001}, 0.0, 1.0, FLAGS },
{ "r", "set the expansion raising amount", OFFSET(raise_amount), AV_OPT_TYPE_DOUBLE, {.dbl=0.001}, 0.0, 1.0, FLAGS },
{ "fall", "set the compression raising amount", OFFSET(fall_amount), AV_OPT_TYPE_DOUBLE, {.dbl=0.001}, 0.0, 1.0, FLAGS },
{ "f", "set the compression raising amount", OFFSET(fall_amount), AV_OPT_TYPE_DOUBLE, {.dbl=0.001}, 0.0, 1.0, FLAGS },
{ "channels", "set channels to filter", OFFSET(ch_layout_str), AV_OPT_TYPE_STRING, {.str="all"}, 0, 0, FLAGS },
{ "h", "set channels to filter", OFFSET(ch_layout_str), AV_OPT_TYPE_STRING, {.str="all"}, 0, 0, FLAGS },
{ "invert", "set inverted filtering", OFFSET(invert), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
{ "i", "set inverted filtering", OFFSET(invert), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
{ "link", "set linked channels filtering", OFFSET(link), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
{ "l", "set linked channels filtering", OFFSET(link), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
{ "rms", "set the RMS value", OFFSET(rms_value), AV_OPT_TYPE_DOUBLE, {.dbl=0.0}, 0.0, 1.0, FLAGS },
{ "m", "set the RMS value", OFFSET(rms_value), AV_OPT_TYPE_DOUBLE, {.dbl=0.0}, 0.0, 1.0, FLAGS },
{ NULL }
}

Definition at line 97 of file af_speechnorm.c.

◆ inputs

const AVFilterPad inputs[]
static
Initial value:
= {
{
.name = "default",
.config_props = config_input,
},
}

Definition at line 585 of file af_speechnorm.c.

◆ ff_af_speechnorm

const AVFilter ff_af_speechnorm
Initial value:
= {
.name = "speechnorm",
.description = NULL_IF_CONFIG_SMALL("Speech Normalizer."),
.priv_size = sizeof(SpeechNormalizerContext),
.priv_class = &speechnorm_class,
.process_command = process_command,
}

Definition at line 593 of file af_speechnorm.c.

inputs
static const AVFilterPad inputs[]
Definition: af_speechnorm.c:585
AV_SAMPLE_FMT_FLTP
@ AV_SAMPLE_FMT_FLTP
float, planar
Definition: samplefmt.h:66
name
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf default minimum maximum flags name is the option name
Definition: writing_filters.txt:88
out
FILE * out
Definition: movenc.c:54
process_command
static int process_command(AVFilterContext *ctx, const char *cmd, const char *args, char *res, int res_len, int flags)
Definition: af_speechnorm.c:560
inlink
The exact code depends on how similar the blocks are and how related they are to the and needs to apply these operations to the correct inlink or outlink if there are several Macros are available to factor that when no extra processing is inlink
Definition: filter_design.txt:212
AVFrame
This structure describes decoded (raw) audio or video data.
Definition: frame.h:375
av_channel_layout_channel_from_index
enum AVChannel av_channel_layout_channel_from_index(const AVChannelLayout *channel_layout, unsigned int idx)
Get the channel with the given index in a channel layout.
Definition: channel_layout.c:664
s
#define s(width, name)
Definition: cbs_vp9.c:198
AV_OPT_TYPE_DOUBLE
@ AV_OPT_TYPE_DOUBLE
Definition: opt.h:237
AVMEDIA_TYPE_AUDIO
@ AVMEDIA_TYPE_AUDIO
Definition: avutil.h:202
ctx
AVFormatContext * ctx
Definition: movenc.c:48
FILTER_INPUTS
#define FILTER_INPUTS(array)
Definition: internal.h:182
link
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a link
Definition: filter_design.txt:23
NULL
#define NULL
Definition: coverity.c:32
ff_audio_default_filterpad
const AVFilterPad ff_audio_default_filterpad[1]
An AVFilterPad array whose only entry has name "default" and is of type AVMEDIA_TYPE_AUDIO.
Definition: audio.c:33
NULL_IF_CONFIG_SMALL
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
Definition: internal.h:106
for
for(k=2;k<=8;++k)
Definition: h264pred_template.c:425
size
int size
Definition: twinvq_data.h:10344
SpeechNormalizerContext
Definition: af_speechnorm.c:64
AVChannel
AVChannel
Definition: channel_layout.h:47
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:255
invert
static void invert(float *h, int n)
Definition: asrc_sinc.c:184
FLAGS
#define FLAGS
Definition: af_speechnorm.c:95
FFMIN
#define FFMIN(a, b)
Definition: macros.h:49
activate
static int activate(AVFilterContext *ctx)
Definition: af_speechnorm.c:476
uninit
static av_cold void uninit(AVFilterContext *ctx)
Definition: af_speechnorm.c:576
config_input
static int config_input(AVFilterLink *inlink)
Definition: af_speechnorm.c:523
av_channel_layout_index_from_channel
int av_channel_layout_index_from_channel(const AVChannelLayout *channel_layout, enum AVChannel channel)
Get the index of a given channel in a channel layout.
Definition: channel_layout.c:704
next_pi
static void next_pi(AVFilterContext *ctx, ChannelContext *cc, int bypass)
Definition: af_speechnorm.c:189
AV_SAMPLE_FMT_DBLP
@ AV_SAMPLE_FMT_DBLP
double, planar
Definition: samplefmt.h:67
AVFilterContext
An instance of a filter.
Definition: avfilter.h:407
AV_OPT_TYPE_BOOL
@ AV_OPT_TYPE_BOOL
Definition: opt.h:251
FILTER_OUTPUTS
#define FILTER_OUTPUTS(array)
Definition: internal.h:183
src
INIT_CLIP pixel * src
Definition: h264pred_template.c:418
AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL
#define AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL
Same as AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC, except that the filter will have its filter_frame() c...
Definition: avfilter.h:155
OFFSET
#define OFFSET(x)
Definition: af_speechnorm.c:94
AV_OPT_TYPE_STRING
@ AV_OPT_TYPE_STRING
Definition: opt.h:239
channel
channel
Definition: ebur128.h:39
FILTER_SAMPLEFMTS
#define FILTER_SAMPLEFMTS(...)
Definition: internal.h:170