libavcodec/wmavoice.c File Reference

Windows Media Audio Voice compatible decoder. More...

#include <math.h>
#include "avcodec.h"
#include "get_bits.h"
#include "put_bits.h"
#include "wmavoice_data.h"
#include "celp_math.h"
#include "celp_filters.h"
#include "acelp_vectors.h"
#include "acelp_filters.h"
#include "lsp.h"
#include "libavutil/lzo.h"
#include "dct.h"
#include "rdft.h"
#include "sinewin.h"

Go to the source code of this file.

Data Structures

struct frame_type_desc

Description of frame types. More...

struct WMAVoiceContext

WMA Voice decoding context. More...

Defines

#define UNCHECKED_BITSTREAM_READER   1

#define MAX_BLOCKS   8

maximum number of blocks per frame

#define MAX_LSPS   16

maximum filter order

#define MAX_LSPS_ALIGN16   16

same as MAX_LSPS; needs to be multiple

#define MAX_FRAMES   3

maximum number of frames per superframe

#define MAX_FRAMESIZE   160

maximum number of samples per frame

#define MAX_SIGNAL_HISTORY   416

maximum excitation signal history

#define MAX_SFRAMESIZE   (MAX_FRAMESIZE * MAX_FRAMES)

maximum number of samples per superframe

#define SFRAME_CACHE_MAXSIZE   256

maximum cache size for frame data that

#define VLC_NBITS   6

number of bits to read per VLC iteration

#define log_range(var, assign)

Enumerations

enum { ACB_TYPE_NONE = 0, ACB_TYPE_ASYMMETRIC = 1, ACB_TYPE_HAMMING = 2 }

Adaptive codebook types. More...

enum { FCB_TYPE_SILENCE = 0, FCB_TYPE_HARDCODED = 1, FCB_TYPE_AW_PULSES = 2, FCB_TYPE_EXC_PULSES = 3 }

Fixed codebook types. More...

Functions

static av_cold int decode_vbmtree (GetBitContext *gb, int8_t vbm_tree[25])

Set up the variable bit mode (VBM) tree from container extradata.

static av_cold int wmavoice_decode_init (AVCodecContext *ctx)

Set up decoder with parameters from demuxer (extradata etc.

static void dequant_lsps (double *lsps, int num, const uint16_t *values, const uint16_t *sizes, int n_stages, const uint8_t *table, const double *mul_q, const double *base_q)

Dequantize LSPs.

static int pRNG (int frame_cntr, int block_num, int block_size)

Generate a random number from frame_cntr and block_idx, which will lief in the range [0, 1000 - block_size] (so it can be used as an index in a table of size 1000 of which you want to read block_size entries).

static void synth_block_hardcoded (WMAVoiceContext *s, GetBitContext *gb, int block_idx, int size, const struct frame_type_desc *frame_desc, float *excitation)

Parse hardcoded signal for a single block.

static void synth_block_fcb_acb (WMAVoiceContext *s, GetBitContext *gb, int block_idx, int size, int block_pitch_sh2, const struct frame_type_desc *frame_desc, float *excitation)

Parse FCB/ACB signal for a single block.

static void synth_block (WMAVoiceContext *s, GetBitContext *gb, int block_idx, int size, int block_pitch_sh2, const double *lsps, const double *prev_lsps, const struct frame_type_desc *frame_desc, float *excitation, float *synth)

Parse data in a single block.

static int synth_frame (AVCodecContext *ctx, GetBitContext *gb, int frame_idx, float *samples, const double *lsps, const double *prev_lsps, float *excitation, float *synth)

Synthesize output samples for a single frame.

static void stabilize_lsps (double *lsps, int num)

Ensure minimum value for first item, maximum value for last value, proper spacing between each value and proper ordering.

static int check_bits_for_superframe (GetBitContext *orig_gb, WMAVoiceContext *s)

Test if there's enough bits to read 1 superframe.

static int synth_superframe (AVCodecContext *ctx, int *got_frame_ptr)

Synthesize output samples for a single superframe.

static int parse_packet_header (WMAVoiceContext *s)

Parse the packet header at the start of each packet (input data to this decoder).

static void copy_bits (PutBitContext *pb, const uint8_t *data, int size, GetBitContext *gb, int nbits)

Copy (unaligned) bits from gb/data/size to pb.

static int wmavoice_decode_packet (AVCodecContext *ctx, void *data, int *got_frame_ptr, AVPacket *avpkt)

Packet decoding: a packet is anything that the (ASF) demuxer contains, and we expect that the demuxer / application provides it to us as such (else you'll probably get garbage as output).

static av_cold int wmavoice_decode_end (AVCodecContext *ctx)

static av_cold void wmavoice_flush (AVCodecContext *ctx)

Postfilter functions

Postfilter functions (gain control, wiener denoise filter, DC filter, kalman smoothening, plus surrounding code to wrap it)

static void adaptive_gain_control (float *out, const float *in, const float *speech_synth, int size, float alpha, float *gain_mem)

Adaptive gain control (as used in postfilter).

static int kalman_smoothen (WMAVoiceContext *s, int pitch, const float *in, float *out, int size)

Kalman smoothing function.

static float tilt_factor (const float *lpcs, int n_lpcs)

Get the tilt factor of a formant filter from its transfer function.

static void calc_input_response (WMAVoiceContext *s, float *lpcs, int fcb_type, float *coeffs, int remainder)

Derive denoise filter coefficients (in real domain) from the LPCs.

static void wiener_denoise (WMAVoiceContext *s, int fcb_type, float *synth_pf, int size, const float *lpcs)

This function applies a Wiener filter on the (noisy) speech signal as a means to denoise it.

static void postfilter (WMAVoiceContext *s, const float *synth, float *samples, int size, const float *lpcs, float *zero_exc_pf, int fcb_type, int pitch)

Averaging projection filter, the postfilter used in WMAVoice.

LSP dequantization routines

LSP dequantization routines, for 10/16LSPs and independent/residual coding.

Note:
we assume enough bits are available, caller should check. lsp10i() consumes 24 bits; lsp10r() consumes an additional 24 bits; lsp16i() consumes 34 bits; lsp16r() consumes an additional 26 bits.

static void dequant_lsp10i (GetBitContext *gb, double *lsps)

Parse 10 independently-coded LSPs.

static void dequant_lsp10r (GetBitContext *gb, double *i_lsps, const double *old, double *a1, double *a2, int q_mode)

Parse 10 independently-coded LSPs, and then derive the tables to generate LSPs for the other frames from them (residual coding).

static void dequant_lsp16i (GetBitContext *gb, double *lsps)

Parse 16 independently-coded LSPs.

static void dequant_lsp16r (GetBitContext *gb, double *i_lsps, const double *old, double *a1, double *a2, int q_mode)

Parse 16 independently-coded LSPs, and then derive the tables to generate LSPs for the other frames from them (residual coding).

Pitch-adaptive window coding functions

The next few functions are for pitch-adaptive window coding.

static void aw_parse_coords (WMAVoiceContext *s, GetBitContext *gb, const int *pitch)

Parse the offset of the first pitch-adaptive window pulses, and the distribution of pulses between the two blocks in this frame.

static void aw_pulse_set2 (WMAVoiceContext *s, GetBitContext *gb, int block_idx, AMRFixed *fcb)

Apply second set of pitch-adaptive window pulses.

static void aw_pulse_set1 (WMAVoiceContext *s, GetBitContext *gb, int block_idx, AMRFixed *fcb)

Apply first set of pitch-adaptive window pulses.

Variables

static VLC frame_type_vlc

Frame type VLC coding.

static struct frame_type_desc frame_descs [17]

Description of frame types.

AVCodec ff_wmavoice_decoder

Detailed Description

Windows Media Audio Voice compatible decoder.

Author:: Ronald S. Bultje <rsbultje@gmail.com>

Definition in file wmavoice.c.

Define Documentation

#define log_range	(	var,
		assign		)

Value:

do { \
        float tmp = log10f(assign);  var = tmp; \
        max       = FFMAX(max, tmp); min = FFMIN(min, tmp); \
    } while (0)

Referenced by calc_input_response().

#define MAX_BLOCKS 8

maximum number of blocks per frame

Definition at line 45 of file wmavoice.c.

Referenced by synth_frame().

#define MAX_FRAMES 3

maximum number of frames per superframe

Definition at line 49 of file wmavoice.c.

#define MAX_FRAMESIZE 160

maximum number of samples per frame

Definition at line 50 of file wmavoice.c.

#define MAX_LSPS 16

maximum filter order

Definition at line 46 of file wmavoice.c.

Referenced by synth_block(), synth_frame(), synth_superframe(), and wmavoice_flush().

#define MAX_LSPS_ALIGN16 16

same as MAX_LSPS; needs to be multiple

of 16 for ASM input buffer alignment

Definition at line 47 of file wmavoice.c.

Referenced by postfilter(), and wmavoice_flush().

#define MAX_SFRAMESIZE (MAX_FRAMESIZE * MAX_FRAMES)

maximum number of samples per superframe

Definition at line 52 of file wmavoice.c.

Referenced by synth_superframe().

#define MAX_SIGNAL_HISTORY 416

maximum excitation signal history

Definition at line 51 of file wmavoice.c.

Referenced by synth_superframe(), wmavoice_decode_init(), and wmavoice_flush().

#define SFRAME_CACHE_MAXSIZE 256

maximum cache size for frame data that

was split over two packets

Definition at line 54 of file wmavoice.c.

Referenced by wmavoice_decode_packet().

#define UNCHECKED_BITSTREAM_READER 1

Definition at line 28 of file wmavoice.c.

#define VLC_NBITS 6

number of bits to read per VLC iteration

Definition at line 56 of file wmavoice.c.

Referenced by decode_vbmtree().

Enumeration Type Documentation

anonymous enum

Adaptive codebook types.

Enumerator:

ACB_TYPE_NONE

no adaptive codebook (only hardcoded fixed)

ACB_TYPE_ASYMMETRIC

adaptive codebook with per-frame pitch, which we interpolate to get a per-sample pitch.

Signal is generated using an asymmetric sinc window function

Note:: see wmavoice_ipol1_coeffs

ACB_TYPE_HAMMING

Per-block pitch with signal generation using a Hamming sinc window function.

Note:: see wmavoice_ipol2_coeffs

Definition at line 66 of file wmavoice.c.

anonymous enum

Fixed codebook types.

Enumerator:

FCB_TYPE_SILENCE	comfort noise during silence generated from a hardcoded (fixed) codebook with per-frame (low) gain values
FCB_TYPE_HARDCODED	hardcoded (fixed) codebook with per-block gain values
FCB_TYPE_AW_PULSES	Pitch-adaptive window (AW) pulse signals, used in particular for low-bitrate streams.
FCB_TYPE_EXC_PULSES	Innovation (fixed) codebook pulse sets in combinations of either single pulses or pulse pairs.

Definition at line 81 of file wmavoice.c.

Function Documentation

static void adaptive_gain_control	(	float *	out,
		const float *	in,
		const float *	speech_synth,
		int	size,
		float	alpha,
		float *	gain_mem
	)			`[static]`

Adaptive gain control (as used in postfilter).

Identical to ff_adaptive_gain_control() in acelp_vectors.c, except that the energy here is calculated using sum(abs(...)), whereas the other codecs (e.g. AMR-NB, SIPRO) use sqrt(dotproduct(...)).

Parameters:

	out	output buffer for filtered samples
	in	input buffer containing the samples as they are after the postfilter steps so far
	speech_synth	input buffer containing speech synth before postfilter
	size	input buffer size
	alpha	exponential filter factor
	gain_mem	pointer to filter memory (single float)

Definition at line 471 of file wmavoice.c.

Referenced by postfilter().

static void aw_parse_coords	(	WMAVoiceContext *	s,
		GetBitContext *	gb,
		const int *	pitch
	)			`[static]`

Parse the offset of the first pitch-adaptive window pulses, and the distribution of pulses between the two blocks in this frame.

Parameters:

	s	WMA Voice decoding context private data
	gb	bit I/O context
	pitch	pitch for each block in this frame

Definition at line 999 of file wmavoice.c.

Referenced by synth_frame().

static void aw_pulse_set1	(	WMAVoiceContext *	s,
		GetBitContext *	gb,
		int	block_idx,
		AMRFixed *	fcb
	)			`[static]`

Apply first set of pitch-adaptive window pulses.

Parameters:

	s	WMA Voice decoding context private data
	gb	bit I/O context
	block_idx	block index in frame [0, 1]
	fcb	storage location for fixed codebook pulse info

Definition at line 1139 of file wmavoice.c.

Referenced by synth_block_fcb_acb().

static void aw_pulse_set2	(	WMAVoiceContext *	s,
		GetBitContext *	gb,
		int	block_idx,
		AMRFixed *	fcb
	)			`[static]`

Apply second set of pitch-adaptive window pulses.

Parameters:

	s	WMA Voice decoding context private data
	gb	bit I/O context
	block_idx	block index in frame [0, 1]
	fcb	structure containing fixed codebook vector info

Definition at line 1050 of file wmavoice.c.

Referenced by synth_block_fcb_acb().

static void calc_input_response	(	WMAVoiceContext *	s,
		float *	lpcs,
		int	fcb_type,
		float *	coeffs,
		int	remainder
	)			`[static]`

Derive denoise filter coefficients (in real domain) from the LPCs.

Definition at line 570 of file wmavoice.c.

Referenced by wiener_denoise().

static int check_bits_for_superframe	(	GetBitContext *	orig_gb,
		WMAVoiceContext *	s
	)			`[static]`

Test if there's enough bits to read 1 superframe.

Parameters:

	orig_gb	bit I/O context used for reading. This function does not modify the state of the bitreader; it only uses it to copy the current stream position
	s	WMA Voice decoding context private data

Returns:: -1 if unsupported, 1 on not enough bits or 0 if OK.

Definition at line 1647 of file wmavoice.c.

Referenced by synth_superframe().

static void copy_bits	(	PutBitContext *	pb,
		const uint8_t *	data,
		int	size,
		GetBitContext *	gb,
		int	nbits
	)			`[static]`

Copy (unaligned) bits from gb/data/size to pb.

Parameters:

	pb	target buffer to copy bits into
	data	source buffer to copy bits from
	size	size of the source data, in bytes
	gb	bit I/O context specifying the current position in the source. data. This function might use this to align the bit position to a whole-byte boundary before calling avpriv_copy_bits() on aligned source data
	nbits	the amount of bits to copy from source to target

Note:: after calling this function, the current position in the input bit I/O context is undefined.

Definition at line 1902 of file wmavoice.c.

static av_cold int decode_vbmtree	(	GetBitContext *	gb,
		int8_t	vbm_tree[25]
	)			`[static]`

Set up the variable bit mode (VBM) tree from container extradata.

Parameters:

	gb	bit I/O context. The bit context (s->gb) should be loaded with byte 23-46 of the container extradata (i.e. the ones containing the VBM tree).
	vbm_tree	pointer to array to which the decoded VBM tree will be written.

Returns:: 0 on success, <0 on error.

Definition at line 304 of file wmavoice.c.

Referenced by wmavoice_decode_init().

static void dequant_lsp10i	(	GetBitContext *	gb,
		double *	lsps
	)			`[static]`

Parse 10 independently-coded LSPs.

Definition at line 854 of file wmavoice.c.

Referenced by dequant_lsp10r(), and synth_superframe().

static void dequant_lsp10r	(	GetBitContext *	gb,
		double *	i_lsps,
		const double *	old,
		double *	a1,
		double *	a2,
		int	q_mode
	)			`[static]`

Parse 10 independently-coded LSPs, and then derive the tables to generate LSPs for the other frames from them (residual coding).

Definition at line 880 of file wmavoice.c.

Referenced by synth_superframe().

static void dequant_lsp16i	(	GetBitContext *	gb,
		double *	lsps
	)			`[static]`

Parse 16 independently-coded LSPs.

Definition at line 916 of file wmavoice.c.

Referenced by dequant_lsp16r(), and synth_superframe().

static void dequant_lsp16r	(	GetBitContext *	gb,
		double *	i_lsps,
		const double *	old,
		double *	a1,
		double *	a2,
		int	q_mode
	)			`[static]`

Parse 16 independently-coded LSPs, and then derive the tables to generate LSPs for the other frames from them (residual coding).

Definition at line 949 of file wmavoice.c.

Referenced by synth_superframe().

static void dequant_lsps	(	double *	lsps,
		int	num,
		const uint16_t *	values,
		const uint16_t *	sizes,
		int	n_stages,
		const uint8_t *	table,
		const double *	mul_q,
		const double *	base_q
	)			`[static]`

Dequantize LSPs.

Parameters:

	lsps	output pointer to the array that will hold the LSPs
	num	number of LSPs to be dequantized
	values	quantized values, contains n_stages values
	sizes	range (i.e. max value) of each quantized value
	n_stages	number of dequantization runs
	table	dequantization table to be used
	mul_q	LSF multiplier
	base_q	base (lowest) LSF values

Definition at line 822 of file wmavoice.c.

Referenced by dequant_lsp10i(), dequant_lsp10r(), dequant_lsp16i(), and dequant_lsp16r().

static int kalman_smoothen	(	WMAVoiceContext *	s,
		int	pitch,
		const float *	in,
		float *	out,
		int	size
	)			`[static]`

Kalman smoothing function.

This function looks back pitch +/- 3 samples back into history to find the best fitting curve (that one giving the optimal gain of the two signals, i.e. the highest dot product between the two), and then uses that signal history to smoothen the output of the speech synthesis filter.

Parameters:

	s	WMA Voice decoding context
	pitch	pitch of the speech signal
	in	input speech signal
	out	output pointer for smoothened signal
	size	input/output buffer size

Returns:: -1 if no smoothening took place, e.g. because no optimal fit could be found, or 0 on success.

Definition at line 511 of file wmavoice.c.

Referenced by postfilter().

static int parse_packet_header ( WMAVoiceContext * s ) [static]

Parse the packet header at the start of each packet (input data to this decoder).

Parameters:

s

WMA Voice decoding context private data

Returns:: 1 if not enough bits were available, or 0 on success.

Definition at line 1867 of file wmavoice.c.

Referenced by gxf_header(), gxf_packet(), gxf_resync_media(), and wmavoice_decode_packet().

static void postfilter	(	WMAVoiceContext *	s,
		const float *	synth,
		float *	samples,
		int	size,
		const float *	lpcs,
		float *	zero_exc_pf,
		int	fcb_type,
		int	pitch
	)			`[static]`

Averaging projection filter, the postfilter used in WMAVoice.

This uses the following steps:

A zero-synthesis filter (generate excitation from synth signal)
Kalman smoothing on excitation, based on pitch
Re-synthesized smoothened output
Iterative Wiener denoise filter
Adaptive gain filter
DC filter

Parameters:

	s	WMAVoice decoding context
	synth	Speech synthesis output (before postfilter)
	samples	Output buffer for filtered samples
	size	Buffer size of synth & samples
	lpcs	Generated LPCs used for speech synthesis
	zero_exc_pf	destination for zero synthesis filter (16-byte aligned)
	fcb_type	Frame type (silence, hardcoded, AW-pulses or FCB-pulses)
	pitch	Pitch of the input signal

Definition at line 768 of file wmavoice.c.

static int pRNG	(	int	frame_cntr,
		int	block_num,
		int	block_size
	)			`[static]`

Generate a random number from frame_cntr and block_idx, which will lief in the range [0, 1000 - block_size] (so it can be used as an index in a table of size 1000 of which you want to read block_size entries).

Parameters:

	frame_cntr	current frame number
	block_num	current block index
	block_size	amount of entries we want to read from a table that has 1000 entries

Returns:: a (non-)random number in the [0, 1000 - block_size] range.

Definition at line 1200 of file wmavoice.c.

Referenced by synth_block_hardcoded().

static void stabilize_lsps	(	double *	lsps,
		int	num
	)			`[static]`

Ensure minimum value for first item, maximum value for last value, proper spacing between each value and proper ordering.

Parameters:

	lsps	array of LSPs
	num	size of LSP array

Note:: basically a double version of ff_acelp_reorder_lsf(), might be useful to put in a generic location later on. Parts are also present in ff_set_min_dist_lsf() + ff_sort_nearly_sorted_floats(), which is in float.

Definition at line 1609 of file wmavoice.c.

Referenced by synth_superframe().

static void synth_block	(	WMAVoiceContext *	s,
		GetBitContext *	gb,
		int	block_idx,
		int	size,
		int	block_pitch_sh2,
		const double *	lsps,
		const double *	prev_lsps,
		const struct frame_type_desc *	frame_desc,
		float *	excitation,
		float *	synth
	)			`[static]`

Parse data in a single block.

Note:: we assume enough bits are available, caller should check.

Parameters:

	s	WMA Voice decoding context private data
	gb	bit I/O context
	block_idx	index of the to-be-read block
	size	amount of samples to be read in this block
	block_pitch_sh2	pitch for this block << 2
	lsps	LSPs for (the end of) this frame
	prev_lsps	LSPs for the last frame
	frame_desc	frame type descriptor
	excitation	target memory for the ACB+FCB interpolated signal
	synth	target memory for the speech synthesis filter output

Returns:: 0 on success, <0 on error.

Definition at line 1390 of file wmavoice.c.

Referenced by synth_frame().

static void synth_block_fcb_acb	(	WMAVoiceContext *	s,
		GetBitContext *	gb,
		int	block_idx,
		int	size,
		int	block_pitch_sh2,
		const struct frame_type_desc *	frame_desc,
		float *	excitation
	)			`[static]`

Parse FCB/ACB signal for a single block.

Note:: see synth_block().

Definition at line 1267 of file wmavoice.c.

Referenced by synth_block().

static void synth_block_hardcoded	(	WMAVoiceContext *	s,
		GetBitContext *	gb,
		int	block_idx,
		int	size,
		const struct frame_type_desc *	frame_desc,
		float *	excitation
	)			`[static]`

Parse hardcoded signal for a single block.

Note:: see synth_block().

Definition at line 1236 of file wmavoice.c.

Referenced by synth_block().

static int synth_frame	(	AVCodecContext *	ctx,
		GetBitContext *	gb,
		int	frame_idx,
		float *	samples,
		const double *	lsps,
		const double *	prev_lsps,
		float *	excitation,
		float *	synth
	)			`[static]`

Synthesize output samples for a single frame.

Note:: we assume enough bits are available, caller should check.

Parameters:

	ctx	WMA Voice decoder context
	gb	bit I/O context (s->gb or one for cross-packet superframes)
	frame_idx	Frame number within superframe [0-2]
	samples	pointer to output sample buffer, has space for at least 160 samples
	lsps	LSP array
	prev_lsps	array of previous frame's LSPs
	excitation	target buffer for excitation signal
	synth	target buffer for synthesized speech data

Returns:: 0 on success, <0 on error.

Definition at line 1433 of file wmavoice.c.

Referenced by synth_superframe().

static int synth_superframe	(	AVCodecContext *	ctx,
		int *	got_frame_ptr
	)			`[static]`

Synthesize output samples for a single superframe.

If we have any data cached in s->sframe_cache, that will be used instead of whatever is loaded in s->gb.

WMA Voice superframes contain 3 frames, each containing 160 audio samples, to give a total of 480 samples per frame. See synth_frame() for frame parsing. In addition to 3 frames, superframes can also contain the LSPs (if these are globally specified for all frames (residually); they can also be specified individually per-frame. See the s->has_residual_lsps option), and can specify the number of samples encoded in this superframe (if less than 480), usually used to prevent blanks at track boundaries.

Parameters:

	ctx	WMA Voice decoder context
	samples	pointer to output buffer for voice samples
	data_size	pointer containing the size of samples on input, and the amount of samples filled on output

Returns:: 0 on success, <0 on error or 1 if there was not enough data to fully parse the superframe

Definition at line 1735 of file wmavoice.c.

Referenced by wmavoice_decode_packet().

static float tilt_factor	(	const float *	lpcs,
		int	n_lpcs
	)			`[static]`

Get the tilt factor of a formant filter from its transfer function.

See also:: tilt_factor() in amrnbdec.c, which does essentially the same, but somehow (??) it does a speech synthesis filter in the middle, which is missing here

Parameters:

	lpcs	LPC coefficients
	n_lpcs	Size of LPC buffer

Returns:: the tilt factor

Definition at line 557 of file wmavoice.c.

static void wiener_denoise	(	WMAVoiceContext *	s,
		int	fcb_type,
		float *	synth_pf,
		int	size,
		const float *	lpcs
	)			`[static]`

This function applies a Wiener filter on the (noisy) speech signal as a means to denoise it.

take RDFT of LPCs to get the power spectrum of the noise + speech;
using this power spectrum, calculate (for each frequency) the Wiener filter gain, which depends on the frequency power and desired level of noise subtraction (when set too high, this leads to artifacts) We can do this symmetrically over the X-axis (so 0-4kHz is the inverse of 4-8kHz);
by doing a phase shift, calculate the Hilbert transform of this array of per-frequency filter-gains to get the filtering coefficients;
smoothen/normalize/de-tilt these filter coefficients as desired;
take RDFT of noisy sound, apply the coefficients and take its IRDFT to get the denoised speech signal;
the leftover (i.e. output of the IRDFT on denoised speech data beyond the frame boundary) are saved and applied to subsequent frames by an overlap-add method (otherwise you get clicking-artifacts).

Parameters:

	s	WMA Voice decoding context
	fcb_type	Frame (codebook) type
	synth_pf	input: the noisy speech signal, output: denoised speech data; should be 16-byte aligned (for ASM purposes)
	size	size of the speech data
	lpcs	LPCs used to synthesize this frame's speech data

Definition at line 686 of file wmavoice.c.

Referenced by postfilter().

static av_cold int wmavoice_decode_end ( AVCodecContext * ctx ) [static]

Definition at line 2008 of file wmavoice.c.

static av_cold int wmavoice_decode_init ( AVCodecContext * ctx ) [static]

Set up decoder with parameters from demuxer (extradata etc.

).

Extradata layout:

byte 0-18: WMAPro-in-WMAVoice extradata (see wmaprodec.c),
byte 19-22: flags field (annoyingly in LE; see below for known values),
byte 23-46: variable bitmode tree (really just 17 * 3 bits, rest is 0).

Definition at line 339 of file wmavoice.c.

static int wmavoice_decode_packet	(	AVCodecContext *	ctx,
		void *	data,
		int *	got_frame_ptr,
		AVPacket *	avpkt
	)			`[static]`

Packet decoding: a packet is anything that the (ASF) demuxer contains, and we expect that the demuxer / application provides it to us as such (else you'll probably get garbage as output).

Every packet has a size of ctx->block_align bytes, starts with a packet header (see parse_packet_header()), and then a series of superframes. Superframe boundaries may exceed packets, i.e. superframes can split data over multiple (two) packets.

For more information about frames, see synth_superframe().

Definition at line 1931 of file wmavoice.c.

static av_cold void wmavoice_flush ( AVCodecContext * ctx ) [static]

Definition at line 2022 of file wmavoice.c.

Variable Documentation

AVCodec ff_wmavoice_decoder

Initial value:

 {
    .name           = "wmavoice",
    .type           = AVMEDIA_TYPE_AUDIO,
    .id             = CODEC_ID_WMAVOICE,
    .priv_data_size = sizeof(WMAVoiceContext),
    .init           = wmavoice_decode_init,
    .close          = wmavoice_decode_end,
    .decode         = wmavoice_decode_packet,
    .capabilities   = CODEC_CAP_SUBFRAMES | CODEC_CAP_DR1,
    .flush     = wmavoice_flush,
    .long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio Voice"),
}

Definition at line 2050 of file wmavoice.c.

struct frame_type_desc frame_descs[17] [static]

Description of frame types.

Referenced by check_bits_for_superframe(), and synth_frame().

VLC frame_type_vlc [static]

Frame type VLC coding.

Definition at line 61 of file wmavoice.c.


Data Structures
struct	frame_type_desc
	Description of frame types. More...
struct	WMAVoiceContext
	WMA Voice decoding context. More...
Defines
#define	UNCHECKED_BITSTREAM_READER 1
#define	MAX_BLOCKS 8
	maximum number of blocks per frame
#define	MAX_LSPS 16
	maximum filter order
#define	MAX_LSPS_ALIGN16 16
	same as MAX_LSPS; needs to be multiple
#define	MAX_FRAMES 3
	maximum number of frames per superframe
#define	MAX_FRAMESIZE 160
	maximum number of samples per frame
#define	MAX_SIGNAL_HISTORY 416
	maximum excitation signal history
#define	MAX_SFRAMESIZE (MAX_FRAMESIZE * MAX_FRAMES)
	maximum number of samples per superframe
#define	SFRAME_CACHE_MAXSIZE 256
	maximum cache size for frame data that
#define	VLC_NBITS 6
	number of bits to read per VLC iteration
#define	log_range(var, assign)
Enumerations
enum	{ ACB_TYPE_NONE = 0, ACB_TYPE_ASYMMETRIC = 1, ACB_TYPE_HAMMING = 2 }
	Adaptive codebook types. More...
enum	{ FCB_TYPE_SILENCE = 0, FCB_TYPE_HARDCODED = 1, FCB_TYPE_AW_PULSES = 2, FCB_TYPE_EXC_PULSES = 3 }
	Fixed codebook types. More...
Functions
static av_cold int	decode_vbmtree (GetBitContext *gb, int8_t vbm_tree[25])
	Set up the variable bit mode (VBM) tree from container extradata.
static av_cold int	wmavoice_decode_init (AVCodecContext *ctx)
	Set up decoder with parameters from demuxer (extradata etc.
static void	dequant_lsps (double lsps, int num, const uint16_t values, const uint16_t sizes, int n_stages, const uint8_t table, const double mul_q, const double base_q)
	Dequantize LSPs.
static int	pRNG (int frame_cntr, int block_num, int block_size)
	Generate a random number from frame_cntr and block_idx, which will lief in the range [0, 1000 - block_size] (so it can be used as an index in a table of size 1000 of which you want to read block_size entries).
static void	synth_block_hardcoded (WMAVoiceContext s, GetBitContext gb, int block_idx, int size, const struct frame_type_desc frame_desc, float excitation)
	Parse hardcoded signal for a single block.
static void	synth_block_fcb_acb (WMAVoiceContext s, GetBitContext gb, int block_idx, int size, int block_pitch_sh2, const struct frame_type_desc frame_desc, float excitation)
	Parse FCB/ACB signal for a single block.
static void	synth_block (WMAVoiceContext s, GetBitContext gb, int block_idx, int size, int block_pitch_sh2, const double lsps, const double prev_lsps, const struct frame_type_desc frame_desc, float excitation, float *synth)
	Parse data in a single block.
static int	synth_frame (AVCodecContext ctx, GetBitContext gb, int frame_idx, float samples, const double lsps, const double prev_lsps, float excitation, float *synth)
	Synthesize output samples for a single frame.
static void	stabilize_lsps (double *lsps, int num)
	Ensure minimum value for first item, maximum value for last value, proper spacing between each value and proper ordering.
static int	check_bits_for_superframe (GetBitContext orig_gb, WMAVoiceContext s)
	Test if there's enough bits to read 1 superframe.
static int	synth_superframe (AVCodecContext ctx, int got_frame_ptr)
	Synthesize output samples for a single superframe.
static int	parse_packet_header (WMAVoiceContext *s)
	Parse the packet header at the start of each packet (input data to this decoder).
static void	copy_bits (PutBitContext pb, const uint8_t data, int size, GetBitContext *gb, int nbits)
	Copy (unaligned) bits from gb/data/size to pb.
static int	wmavoice_decode_packet (AVCodecContext ctx, void data, int got_frame_ptr, AVPacket avpkt)
	Packet decoding: a packet is anything that the (ASF) demuxer contains, and we expect that the demuxer / application provides it to us as such (else you'll probably get garbage as output).
static av_cold int	wmavoice_decode_end (AVCodecContext *ctx)
static av_cold void	wmavoice_flush (AVCodecContext *ctx)
Postfilter functions
Postfilter functions (gain control, wiener denoise filter, DC filter, kalman smoothening, plus surrounding code to wrap it)
static void	adaptive_gain_control (float out, const float in, const float speech_synth, int size, float alpha, float gain_mem)
	Adaptive gain control (as used in postfilter).
static int	kalman_smoothen (WMAVoiceContext s, int pitch, const float in, float *out, int size)
	Kalman smoothing function.
static float	tilt_factor (const float *lpcs, int n_lpcs)
	Get the tilt factor of a formant filter from its transfer function.
static void	calc_input_response (WMAVoiceContext s, float lpcs, int fcb_type, float *coeffs, int remainder)
	Derive denoise filter coefficients (in real domain) from the LPCs.
static void	wiener_denoise (WMAVoiceContext s, int fcb_type, float synth_pf, int size, const float *lpcs)
	This function applies a Wiener filter on the (noisy) speech signal as a means to denoise it.
static void	postfilter (WMAVoiceContext s, const float synth, float samples, int size, const float lpcs, float *zero_exc_pf, int fcb_type, int pitch)
	Averaging projection filter, the postfilter used in WMAVoice.
LSP dequantization routines
LSP dequantization routines, for 10/16LSPs and independent/residual coding. Note: we assume enough bits are available, caller should check. lsp10i() consumes 24 bits; lsp10r() consumes an additional 24 bits; lsp16i() consumes 34 bits; lsp16r() consumes an additional 26 bits.
static void	dequant_lsp10i (GetBitContext gb, double lsps)
	Parse 10 independently-coded LSPs.
static void	dequant_lsp10r (GetBitContext gb, double i_lsps, const double old, double a1, double *a2, int q_mode)
	Parse 10 independently-coded LSPs, and then derive the tables to generate LSPs for the other frames from them (residual coding).
static void	dequant_lsp16i (GetBitContext gb, double lsps)
	Parse 16 independently-coded LSPs.
static void	dequant_lsp16r (GetBitContext gb, double i_lsps, const double old, double a1, double *a2, int q_mode)
	Parse 16 independently-coded LSPs, and then derive the tables to generate LSPs for the other frames from them (residual coding).
Pitch-adaptive window coding functions
The next few functions are for pitch-adaptive window coding.
static void	aw_parse_coords (WMAVoiceContext s, GetBitContext gb, const int *pitch)
	Parse the offset of the first pitch-adaptive window pulses, and the distribution of pulses between the two blocks in this frame.
static void	aw_pulse_set2 (WMAVoiceContext s, GetBitContext gb, int block_idx, AMRFixed *fcb)
	Apply second set of pitch-adaptive window pulses.
static void	aw_pulse_set1 (WMAVoiceContext s, GetBitContext gb, int block_idx, AMRFixed *fcb)
	Apply first set of pitch-adaptive window pulses.
Variables
static VLC	frame_type_vlc
	Frame type VLC coding.
static struct frame_type_desc	frame_descs [17]
	Description of frame types.
AVCodec	ff_wmavoice_decoder