FFmpeg
wmavoice.c
Go to the documentation of this file.
1 /*
2  * Windows Media Audio Voice decoder.
3  * Copyright (c) 2009 Ronald S. Bultje
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 /**
23  * @file
24  * @brief Windows Media Audio Voice compatible decoder
25  * @author Ronald S. Bultje <rsbultje@gmail.com>
26  */
27 
28 #include <math.h>
29 
31 #include "libavutil/float_dsp.h"
32 #include "libavutil/mem.h"
33 #include "libavutil/thread.h"
34 #include "avcodec.h"
35 #include "internal.h"
36 #include "get_bits.h"
37 #include "put_bits.h"
38 #include "wmavoice_data.h"
39 #include "celp_filters.h"
40 #include "acelp_vectors.h"
41 #include "acelp_filters.h"
42 #include "lsp.h"
43 #include "dct.h"
44 #include "rdft.h"
45 #include "sinewin.h"
46 
47 #define MAX_BLOCKS 8 ///< maximum number of blocks per frame
48 #define MAX_LSPS 16 ///< maximum filter order
49 #define MAX_LSPS_ALIGN16 16 ///< same as #MAX_LSPS; needs to be multiple
50  ///< of 16 for ASM input buffer alignment
51 #define MAX_FRAMES 3 ///< maximum number of frames per superframe
52 #define MAX_FRAMESIZE 160 ///< maximum number of samples per frame
53 #define MAX_SIGNAL_HISTORY 416 ///< maximum excitation signal history
54 #define MAX_SFRAMESIZE (MAX_FRAMESIZE * MAX_FRAMES)
55  ///< maximum number of samples per superframe
56 #define SFRAME_CACHE_MAXSIZE 256 ///< maximum cache size for frame data that
57  ///< was split over two packets
58 #define VLC_NBITS 6 ///< number of bits to read per VLC iteration
59 
60 /**
61  * Frame type VLC coding.
62  */
64 
65 /**
66  * Adaptive codebook types.
67  */
68 enum {
69  ACB_TYPE_NONE = 0, ///< no adaptive codebook (only hardcoded fixed)
70  ACB_TYPE_ASYMMETRIC = 1, ///< adaptive codebook with per-frame pitch, which
71  ///< we interpolate to get a per-sample pitch.
72  ///< Signal is generated using an asymmetric sinc
73  ///< window function
74  ///< @note see #wmavoice_ipol1_coeffs
75  ACB_TYPE_HAMMING = 2 ///< Per-block pitch with signal generation using
76  ///< a Hamming sinc window function
77  ///< @note see #wmavoice_ipol2_coeffs
78 };
79 
80 /**
81  * Fixed codebook types.
82  */
83 enum {
84  FCB_TYPE_SILENCE = 0, ///< comfort noise during silence
85  ///< generated from a hardcoded (fixed) codebook
86  ///< with per-frame (low) gain values
87  FCB_TYPE_HARDCODED = 1, ///< hardcoded (fixed) codebook with per-block
88  ///< gain values
89  FCB_TYPE_AW_PULSES = 2, ///< Pitch-adaptive window (AW) pulse signals,
90  ///< used in particular for low-bitrate streams
91  FCB_TYPE_EXC_PULSES = 3, ///< Innovation (fixed) codebook pulse sets in
92  ///< combinations of either single pulses or
93  ///< pulse pairs
94 };
95 
96 /**
97  * Description of frame types.
98  */
99 static const struct frame_type_desc {
100  uint8_t n_blocks; ///< amount of blocks per frame (each block
101  ///< (contains 160/#n_blocks samples)
102  uint8_t log_n_blocks; ///< log2(#n_blocks)
103  uint8_t acb_type; ///< Adaptive codebook type (ACB_TYPE_*)
104  uint8_t fcb_type; ///< Fixed codebook type (FCB_TYPE_*)
105  uint8_t dbl_pulses; ///< how many pulse vectors have pulse pairs
106  ///< (rather than just one single pulse)
107  ///< only if #fcb_type == #FCB_TYPE_EXC_PULSES
108 } frame_descs[17] = {
109  { 1, 0, ACB_TYPE_NONE, FCB_TYPE_SILENCE, 0 },
110  { 2, 1, ACB_TYPE_NONE, FCB_TYPE_HARDCODED, 0 },
126 };
127 
128 /**
129  * WMA Voice decoding context.
130  */
131 typedef struct WMAVoiceContext {
132  /**
133  * @name Global values specified in the stream header / extradata or used all over.
134  * @{
135  */
136  GetBitContext gb; ///< packet bitreader. During decoder init,
137  ///< it contains the extradata from the
138  ///< demuxer. During decoding, it contains
139  ///< packet data.
140  int8_t vbm_tree[25]; ///< converts VLC codes to frame type
141 
142  int spillover_bitsize; ///< number of bits used to specify
143  ///< #spillover_nbits in the packet header
144  ///< = ceil(log2(ctx->block_align << 3))
145  int history_nsamples; ///< number of samples in history for signal
146  ///< prediction (through ACB)
147 
148  /* postfilter specific values */
149  int do_apf; ///< whether to apply the averaged
150  ///< projection filter (APF)
151  int denoise_strength; ///< strength of denoising in Wiener filter
152  ///< [0-11]
153  int denoise_tilt_corr; ///< Whether to apply tilt correction to the
154  ///< Wiener filter coefficients (postfilter)
155  int dc_level; ///< Predicted amount of DC noise, based
156  ///< on which a DC removal filter is used
157 
158  int lsps; ///< number of LSPs per frame [10 or 16]
159  int lsp_q_mode; ///< defines quantizer defaults [0, 1]
160  int lsp_def_mode; ///< defines different sets of LSP defaults
161  ///< [0, 1]
162 
163  int min_pitch_val; ///< base value for pitch parsing code
164  int max_pitch_val; ///< max value + 1 for pitch parsing
165  int pitch_nbits; ///< number of bits used to specify the
166  ///< pitch value in the frame header
167  int block_pitch_nbits; ///< number of bits used to specify the
168  ///< first block's pitch value
169  int block_pitch_range; ///< range of the block pitch
170  int block_delta_pitch_nbits; ///< number of bits used to specify the
171  ///< delta pitch between this and the last
172  ///< block's pitch value, used in all but
173  ///< first block
174  int block_delta_pitch_hrange; ///< 1/2 range of the delta (full range is
175  ///< from -this to +this-1)
176  uint16_t block_conv_table[4]; ///< boundaries for block pitch unit/scale
177  ///< conversion
178 
179  /**
180  * @}
181  *
182  * @name Packet values specified in the packet header or related to a packet.
183  *
184  * A packet is considered to be a single unit of data provided to this
185  * decoder by the demuxer.
186  * @{
187  */
188  int spillover_nbits; ///< number of bits of the previous packet's
189  ///< last superframe preceding this
190  ///< packet's first full superframe (useful
191  ///< for re-synchronization also)
192  int has_residual_lsps; ///< if set, superframes contain one set of
193  ///< LSPs that cover all frames, encoded as
194  ///< independent and residual LSPs; if not
195  ///< set, each frame contains its own, fully
196  ///< independent, LSPs
197  int skip_bits_next; ///< number of bits to skip at the next call
198  ///< to #wmavoice_decode_packet() (since
199  ///< they're part of the previous superframe)
200 
202  ///< cache for superframe data split over
203  ///< multiple packets
204  int sframe_cache_size; ///< set to >0 if we have data from an
205  ///< (incomplete) superframe from a previous
206  ///< packet that spilled over in the current
207  ///< packet; specifies the amount of bits in
208  ///< #sframe_cache
209  PutBitContext pb; ///< bitstream writer for #sframe_cache
210 
211  /**
212  * @}
213  *
214  * @name Frame and superframe values
215  * Superframe and frame data - these can change from frame to frame,
216  * although some of them do in that case serve as a cache / history for
217  * the next frame or superframe.
218  * @{
219  */
220  double prev_lsps[MAX_LSPS]; ///< LSPs of the last frame of the previous
221  ///< superframe
222  int last_pitch_val; ///< pitch value of the previous frame
223  int last_acb_type; ///< frame type [0-2] of the previous frame
224  int pitch_diff_sh16; ///< ((cur_pitch_val - #last_pitch_val)
225  ///< << 16) / #MAX_FRAMESIZE
226  float silence_gain; ///< set for use in blocks if #ACB_TYPE_NONE
227 
228  int aw_idx_is_ext; ///< whether the AW index was encoded in
229  ///< 8 bits (instead of 6)
230  int aw_pulse_range; ///< the range over which #aw_pulse_set1()
231  ///< can apply the pulse, relative to the
232  ///< value in aw_first_pulse_off. The exact
233  ///< position of the first AW-pulse is within
234  ///< [pulse_off, pulse_off + this], and
235  ///< depends on bitstream values; [16 or 24]
236  int aw_n_pulses[2]; ///< number of AW-pulses in each block; note
237  ///< that this number can be negative (in
238  ///< which case it basically means "zero")
239  int aw_first_pulse_off[2]; ///< index of first sample to which to
240  ///< apply AW-pulses, or -0xff if unset
241  int aw_next_pulse_off_cache; ///< the position (relative to start of the
242  ///< second block) at which pulses should
243  ///< start to be positioned, serves as a
244  ///< cache for pitch-adaptive window pulses
245  ///< between blocks
246 
247  int frame_cntr; ///< current frame index [0 - 0xFFFE]; is
248  ///< only used for comfort noise in #pRNG()
249  int nb_superframes; ///< number of superframes in current packet
250  float gain_pred_err[6]; ///< cache for gain prediction
252  ///< cache of the signal of previous
253  ///< superframes, used as a history for
254  ///< signal generation
255  float synth_history[MAX_LSPS]; ///< see #excitation_history
256  /**
257  * @}
258  *
259  * @name Postfilter values
260  *
261  * Variables used for postfilter implementation, mostly history for
262  * smoothing and so on, and context variables for FFT/iFFT.
263  * @{
264  */
265  RDFTContext rdft, irdft; ///< contexts for FFT-calculation in the
266  ///< postfilter (for denoise filter)
267  DCTContext dct, dst; ///< contexts for phase shift (in Hilbert
268  ///< transform, part of postfilter)
269  float sin[511], cos[511]; ///< 8-bit cosine/sine windows over [-pi,pi]
270  ///< range
271  float postfilter_agc; ///< gain control memory, used in
272  ///< #adaptive_gain_control()
273  float dcf_mem[2]; ///< DC filter history
275  ///< zero filter output (i.e. excitation)
276  ///< by postfilter
278  int denoise_filter_cache_size; ///< samples in #denoise_filter_cache
279  DECLARE_ALIGNED(32, float, tilted_lpcs_pf)[0x80];
280  ///< aligned buffer for LPC tilting
282  ///< aligned buffer for denoise coefficients
284  ///< aligned buffer for postfilter speech
285  ///< synthesis
286  /**
287  * @}
288  */
290 
291 /**
292  * Set up the variable bit mode (VBM) tree from container extradata.
293  * @param gb bit I/O context.
294  * The bit context (s->gb) should be loaded with byte 23-46 of the
295  * container extradata (i.e. the ones containing the VBM tree).
296  * @param vbm_tree pointer to array to which the decoded VBM tree will be
297  * written.
298  * @return 0 on success, <0 on error.
299  */
300 static av_cold int decode_vbmtree(GetBitContext *gb, int8_t vbm_tree[25])
301 {
302  int cntr[8] = { 0 }, n, res;
303 
304  memset(vbm_tree, 0xff, sizeof(vbm_tree[0]) * 25);
305  for (n = 0; n < 17; n++) {
306  res = get_bits(gb, 3);
307  if (cntr[res] > 3) // should be >= 3 + (res == 7))
308  return -1;
309  vbm_tree[res * 3 + cntr[res]++] = n;
310  }
311  return 0;
312 }
313 
315 {
316  static const uint8_t bits[] = {
317  2, 2, 2, 4, 4, 4,
318  6, 6, 6, 8, 8, 8,
319  10, 10, 10, 12, 12, 12,
320  14, 14, 14, 14
321  };
322  static const uint16_t codes[] = {
323  0x0000, 0x0001, 0x0002, // 00/01/10
324  0x000c, 0x000d, 0x000e, // 11+00/01/10
325  0x003c, 0x003d, 0x003e, // 1111+00/01/10
326  0x00fc, 0x00fd, 0x00fe, // 111111+00/01/10
327  0x03fc, 0x03fd, 0x03fe, // 11111111+00/01/10
328  0x0ffc, 0x0ffd, 0x0ffe, // 1111111111+00/01/10
329  0x3ffc, 0x3ffd, 0x3ffe, 0x3fff // 111111111111+xx
330  };
331 
333  bits, 1, 1, codes, 2, 2, 132);
334 }
335 
337 {
339  int n;
340 
341  s->postfilter_agc = 0;
342  s->sframe_cache_size = 0;
343  s->skip_bits_next = 0;
344  for (n = 0; n < s->lsps; n++)
345  s->prev_lsps[n] = M_PI * (n + 1.0) / (s->lsps + 1.0);
346  memset(s->excitation_history, 0,
347  sizeof(*s->excitation_history) * MAX_SIGNAL_HISTORY);
348  memset(s->synth_history, 0,
349  sizeof(*s->synth_history) * MAX_LSPS);
350  memset(s->gain_pred_err, 0,
351  sizeof(s->gain_pred_err));
352 
353  if (s->do_apf) {
354  memset(&s->synth_filter_out_buf[MAX_LSPS_ALIGN16 - s->lsps], 0,
355  sizeof(*s->synth_filter_out_buf) * s->lsps);
356  memset(s->dcf_mem, 0,
357  sizeof(*s->dcf_mem) * 2);
358  memset(s->zero_exc_pf, 0,
359  sizeof(*s->zero_exc_pf) * s->history_nsamples);
360  memset(s->denoise_filter_cache, 0, sizeof(s->denoise_filter_cache));
361  }
362 }
363 
364 /**
365  * Set up decoder with parameters from demuxer (extradata etc.).
366  */
368 {
369  static AVOnce init_static_once = AV_ONCE_INIT;
370  int n, flags, pitch_range, lsp16_flag;
372 
373  ff_thread_once(&init_static_once, wmavoice_init_static_data);
374 
375  /**
376  * Extradata layout:
377  * - byte 0-18: WMAPro-in-WMAVoice extradata (see wmaprodec.c),
378  * - byte 19-22: flags field (annoyingly in LE; see below for known
379  * values),
380  * - byte 23-46: variable bitmode tree (really just 17 * 3 bits,
381  * rest is 0).
382  */
383  if (ctx->extradata_size != 46) {
385  "Invalid extradata size %d (should be 46)\n",
386  ctx->extradata_size);
387  return AVERROR_INVALIDDATA;
388  }
389  if (ctx->block_align <= 0 || ctx->block_align > (1<<22)) {
390  av_log(ctx, AV_LOG_ERROR, "Invalid block alignment %d.\n", ctx->block_align);
391  return AVERROR_INVALIDDATA;
392  }
393 
394  flags = AV_RL32(ctx->extradata + 18);
395  s->spillover_bitsize = 3 + av_ceil_log2(ctx->block_align);
396  s->do_apf = flags & 0x1;
397  if (s->do_apf) {
398  ff_rdft_init(&s->rdft, 7, DFT_R2C);
399  ff_rdft_init(&s->irdft, 7, IDFT_C2R);
400  ff_dct_init(&s->dct, 6, DCT_I);
401  ff_dct_init(&s->dst, 6, DST_I);
402 
403  ff_sine_window_init(s->cos, 256);
404  memcpy(&s->sin[255], s->cos, 256 * sizeof(s->cos[0]));
405  for (n = 0; n < 255; n++) {
406  s->sin[n] = -s->sin[510 - n];
407  s->cos[510 - n] = s->cos[n];
408  }
409  }
410  s->denoise_strength = (flags >> 2) & 0xF;
411  if (s->denoise_strength >= 12) {
413  "Invalid denoise filter strength %d (max=11)\n",
414  s->denoise_strength);
415  return AVERROR_INVALIDDATA;
416  }
417  s->denoise_tilt_corr = !!(flags & 0x40);
418  s->dc_level = (flags >> 7) & 0xF;
419  s->lsp_q_mode = !!(flags & 0x2000);
420  s->lsp_def_mode = !!(flags & 0x4000);
421  lsp16_flag = flags & 0x1000;
422  if (lsp16_flag) {
423  s->lsps = 16;
424  } else {
425  s->lsps = 10;
426  }
427  for (n = 0; n < s->lsps; n++)
428  s->prev_lsps[n] = M_PI * (n + 1.0) / (s->lsps + 1.0);
429 
430  init_get_bits(&s->gb, ctx->extradata + 22, (ctx->extradata_size - 22) << 3);
431  if (decode_vbmtree(&s->gb, s->vbm_tree) < 0) {
432  av_log(ctx, AV_LOG_ERROR, "Invalid VBM tree; broken extradata?\n");
433  return AVERROR_INVALIDDATA;
434  }
435 
436  if (ctx->sample_rate >= INT_MAX / (256 * 37))
437  return AVERROR_INVALIDDATA;
438 
439  s->min_pitch_val = ((ctx->sample_rate << 8) / 400 + 50) >> 8;
440  s->max_pitch_val = ((ctx->sample_rate << 8) * 37 / 2000 + 50) >> 8;
441  pitch_range = s->max_pitch_val - s->min_pitch_val;
442  if (pitch_range <= 0) {
443  av_log(ctx, AV_LOG_ERROR, "Invalid pitch range; broken extradata?\n");
444  return AVERROR_INVALIDDATA;
445  }
446  s->pitch_nbits = av_ceil_log2(pitch_range);
447  s->last_pitch_val = 40;
448  s->last_acb_type = ACB_TYPE_NONE;
449  s->history_nsamples = s->max_pitch_val + 8;
450 
451  if (s->min_pitch_val < 1 || s->history_nsamples > MAX_SIGNAL_HISTORY) {
452  int min_sr = ((((1 << 8) - 50) * 400) + 0xFF) >> 8,
453  max_sr = ((((MAX_SIGNAL_HISTORY - 8) << 8) + 205) * 2000 / 37) >> 8;
454 
456  "Unsupported samplerate %d (min=%d, max=%d)\n",
457  ctx->sample_rate, min_sr, max_sr); // 322-22097 Hz
458 
459  return AVERROR(ENOSYS);
460  }
461 
462  s->block_conv_table[0] = s->min_pitch_val;
463  s->block_conv_table[1] = (pitch_range * 25) >> 6;
464  s->block_conv_table[2] = (pitch_range * 44) >> 6;
465  s->block_conv_table[3] = s->max_pitch_val - 1;
466  s->block_delta_pitch_hrange = (pitch_range >> 3) & ~0xF;
467  if (s->block_delta_pitch_hrange <= 0) {
468  av_log(ctx, AV_LOG_ERROR, "Invalid delta pitch hrange; broken extradata?\n");
469  return AVERROR_INVALIDDATA;
470  }
471  s->block_delta_pitch_nbits = 1 + av_ceil_log2(s->block_delta_pitch_hrange);
472  s->block_pitch_range = s->block_conv_table[2] +
473  s->block_conv_table[3] + 1 +
474  2 * (s->block_conv_table[1] - 2 * s->min_pitch_val);
475  s->block_pitch_nbits = av_ceil_log2(s->block_pitch_range);
476 
477  ctx->channels = 1;
478  ctx->channel_layout = AV_CH_LAYOUT_MONO;
479  ctx->sample_fmt = AV_SAMPLE_FMT_FLT;
480 
481  return 0;
482 }
483 
484 /**
485  * @name Postfilter functions
486  * Postfilter functions (gain control, wiener denoise filter, DC filter,
487  * kalman smoothening, plus surrounding code to wrap it)
488  * @{
489  */
490 /**
491  * Adaptive gain control (as used in postfilter).
492  *
493  * Identical to #ff_adaptive_gain_control() in acelp_vectors.c, except
494  * that the energy here is calculated using sum(abs(...)), whereas the
495  * other codecs (e.g. AMR-NB, SIPRO) use sqrt(dotproduct(...)).
496  *
497  * @param out output buffer for filtered samples
498  * @param in input buffer containing the samples as they are after the
499  * postfilter steps so far
500  * @param speech_synth input buffer containing speech synth before postfilter
501  * @param size input buffer size
502  * @param alpha exponential filter factor
503  * @param gain_mem pointer to filter memory (single float)
504  */
505 static void adaptive_gain_control(float *out, const float *in,
506  const float *speech_synth,
507  int size, float alpha, float *gain_mem)
508 {
509  int i;
510  float speech_energy = 0.0, postfilter_energy = 0.0, gain_scale_factor;
511  float mem = *gain_mem;
512 
513  for (i = 0; i < size; i++) {
514  speech_energy += fabsf(speech_synth[i]);
515  postfilter_energy += fabsf(in[i]);
516  }
517  gain_scale_factor = postfilter_energy == 0.0 ? 0.0 :
518  (1.0 - alpha) * speech_energy / postfilter_energy;
519 
520  for (i = 0; i < size; i++) {
521  mem = alpha * mem + gain_scale_factor;
522  out[i] = in[i] * mem;
523  }
524 
525  *gain_mem = mem;
526 }
527 
528 /**
529  * Kalman smoothing function.
530  *
531  * This function looks back pitch +/- 3 samples back into history to find
532  * the best fitting curve (that one giving the optimal gain of the two
533  * signals, i.e. the highest dot product between the two), and then
534  * uses that signal history to smoothen the output of the speech synthesis
535  * filter.
536  *
537  * @param s WMA Voice decoding context
538  * @param pitch pitch of the speech signal
539  * @param in input speech signal
540  * @param out output pointer for smoothened signal
541  * @param size input/output buffer size
542  *
543  * @returns -1 if no smoothening took place, e.g. because no optimal
544  * fit could be found, or 0 on success.
545  */
546 static int kalman_smoothen(WMAVoiceContext *s, int pitch,
547  const float *in, float *out, int size)
548 {
549  int n;
550  float optimal_gain = 0, dot;
551  const float *ptr = &in[-FFMAX(s->min_pitch_val, pitch - 3)],
552  *end = &in[-FFMIN(s->max_pitch_val, pitch + 3)],
553  *best_hist_ptr = NULL;
554 
555  /* find best fitting point in history */
556  do {
557  dot = avpriv_scalarproduct_float_c(in, ptr, size);
558  if (dot > optimal_gain) {
559  optimal_gain = dot;
560  best_hist_ptr = ptr;
561  }
562  } while (--ptr >= end);
563 
564  if (optimal_gain <= 0)
565  return -1;
566  dot = avpriv_scalarproduct_float_c(best_hist_ptr, best_hist_ptr, size);
567  if (dot <= 0) // would be 1.0
568  return -1;
569 
570  if (optimal_gain <= dot) {
571  dot = dot / (dot + 0.6 * optimal_gain); // 0.625-1.000
572  } else
573  dot = 0.625;
574 
575  /* actual smoothing */
576  for (n = 0; n < size; n++)
577  out[n] = best_hist_ptr[n] + dot * (in[n] - best_hist_ptr[n]);
578 
579  return 0;
580 }
581 
582 /**
583  * Get the tilt factor of a formant filter from its transfer function
584  * @see #tilt_factor() in amrnbdec.c, which does essentially the same,
585  * but somehow (??) it does a speech synthesis filter in the
586  * middle, which is missing here
587  *
588  * @param lpcs LPC coefficients
589  * @param n_lpcs Size of LPC buffer
590  * @returns the tilt factor
591  */
592 static float tilt_factor(const float *lpcs, int n_lpcs)
593 {
594  float rh0, rh1;
595 
596  rh0 = 1.0 + avpriv_scalarproduct_float_c(lpcs, lpcs, n_lpcs);
597  rh1 = lpcs[0] + avpriv_scalarproduct_float_c(lpcs, &lpcs[1], n_lpcs - 1);
598 
599  return rh1 / rh0;
600 }
601 
602 /**
603  * Derive denoise filter coefficients (in real domain) from the LPCs.
604  */
605 static void calc_input_response(WMAVoiceContext *s, float *lpcs,
606  int fcb_type, float *coeffs, int remainder)
607 {
608  float last_coeff, min = 15.0, max = -15.0;
609  float irange, angle_mul, gain_mul, range, sq;
610  int n, idx;
611 
612  /* Create frequency power spectrum of speech input (i.e. RDFT of LPCs) */
613  s->rdft.rdft_calc(&s->rdft, lpcs);
614 #define log_range(var, assign) do { \
615  float tmp = log10f(assign); var = tmp; \
616  max = FFMAX(max, tmp); min = FFMIN(min, tmp); \
617  } while (0)
618  log_range(last_coeff, lpcs[1] * lpcs[1]);
619  for (n = 1; n < 64; n++)
620  log_range(lpcs[n], lpcs[n * 2] * lpcs[n * 2] +
621  lpcs[n * 2 + 1] * lpcs[n * 2 + 1]);
622  log_range(lpcs[0], lpcs[0] * lpcs[0]);
623 #undef log_range
624  range = max - min;
625  lpcs[64] = last_coeff;
626 
627  /* Now, use this spectrum to pick out these frequencies with higher
628  * (relative) power/energy (which we then take to be "not noise"),
629  * and set up a table (still in lpc[]) of (relative) gains per frequency.
630  * These frequencies will be maintained, while others ("noise") will be
631  * decreased in the filter output. */
632  irange = 64.0 / range; // so irange*(max-value) is in the range [0, 63]
633  gain_mul = range * (fcb_type == FCB_TYPE_HARDCODED ? (5.0 / 13.0) :
634  (5.0 / 14.7));
635  angle_mul = gain_mul * (8.0 * M_LN10 / M_PI);
636  for (n = 0; n <= 64; n++) {
637  float pwr;
638 
639  idx = lrint((max - lpcs[n]) * irange - 1);
640  idx = FFMAX(0, idx);
641  pwr = wmavoice_denoise_power_table[s->denoise_strength][idx];
642  lpcs[n] = angle_mul * pwr;
643 
644  /* 70.57 =~ 1/log10(1.0331663) */
645  idx = av_clipf((pwr * gain_mul - 0.0295) * 70.570526123, 0, INT_MAX / 2);
646 
647  if (idx > 127) { // fall back if index falls outside table range
648  coeffs[n] = wmavoice_energy_table[127] *
649  powf(1.0331663, idx - 127);
650  } else
651  coeffs[n] = wmavoice_energy_table[FFMAX(0, idx)];
652  }
653 
654  /* calculate the Hilbert transform of the gains, which we do (since this
655  * is a sine input) by doing a phase shift (in theory, H(sin())=cos()).
656  * Hilbert_Transform(RDFT(x)) = Laplace_Transform(x), which calculates the
657  * "moment" of the LPCs in this filter. */
658  s->dct.dct_calc(&s->dct, lpcs);
659  s->dst.dct_calc(&s->dst, lpcs);
660 
661  /* Split out the coefficient indexes into phase/magnitude pairs */
662  idx = 255 + av_clip(lpcs[64], -255, 255);
663  coeffs[0] = coeffs[0] * s->cos[idx];
664  idx = 255 + av_clip(lpcs[64] - 2 * lpcs[63], -255, 255);
665  last_coeff = coeffs[64] * s->cos[idx];
666  for (n = 63;; n--) {
667  idx = 255 + av_clip(-lpcs[64] - 2 * lpcs[n - 1], -255, 255);
668  coeffs[n * 2 + 1] = coeffs[n] * s->sin[idx];
669  coeffs[n * 2] = coeffs[n] * s->cos[idx];
670 
671  if (!--n) break;
672 
673  idx = 255 + av_clip( lpcs[64] - 2 * lpcs[n - 1], -255, 255);
674  coeffs[n * 2 + 1] = coeffs[n] * s->sin[idx];
675  coeffs[n * 2] = coeffs[n] * s->cos[idx];
676  }
677  coeffs[1] = last_coeff;
678 
679  /* move into real domain */
680  s->irdft.rdft_calc(&s->irdft, coeffs);
681 
682  /* tilt correction and normalize scale */
683  memset(&coeffs[remainder], 0, sizeof(coeffs[0]) * (128 - remainder));
684  if (s->denoise_tilt_corr) {
685  float tilt_mem = 0;
686 
687  coeffs[remainder - 1] = 0;
688  ff_tilt_compensation(&tilt_mem,
689  -1.8 * tilt_factor(coeffs, remainder - 1),
690  coeffs, remainder);
691  }
692  sq = (1.0 / 64.0) * sqrtf(1 / avpriv_scalarproduct_float_c(coeffs, coeffs,
693  remainder));
694  for (n = 0; n < remainder; n++)
695  coeffs[n] *= sq;
696 }
697 
698 /**
699  * This function applies a Wiener filter on the (noisy) speech signal as
700  * a means to denoise it.
701  *
702  * - take RDFT of LPCs to get the power spectrum of the noise + speech;
703  * - using this power spectrum, calculate (for each frequency) the Wiener
704  * filter gain, which depends on the frequency power and desired level
705  * of noise subtraction (when set too high, this leads to artifacts)
706  * We can do this symmetrically over the X-axis (so 0-4kHz is the inverse
707  * of 4-8kHz);
708  * - by doing a phase shift, calculate the Hilbert transform of this array
709  * of per-frequency filter-gains to get the filtering coefficients;
710  * - smoothen/normalize/de-tilt these filter coefficients as desired;
711  * - take RDFT of noisy sound, apply the coefficients and take its IRDFT
712  * to get the denoised speech signal;
713  * - the leftover (i.e. output of the IRDFT on denoised speech data beyond
714  * the frame boundary) are saved and applied to subsequent frames by an
715  * overlap-add method (otherwise you get clicking-artifacts).
716  *
717  * @param s WMA Voice decoding context
718  * @param fcb_type Frame (codebook) type
719  * @param synth_pf input: the noisy speech signal, output: denoised speech
720  * data; should be 16-byte aligned (for ASM purposes)
721  * @param size size of the speech data
722  * @param lpcs LPCs used to synthesize this frame's speech data
723  */
724 static void wiener_denoise(WMAVoiceContext *s, int fcb_type,
725  float *synth_pf, int size,
726  const float *lpcs)
727 {
728  int remainder, lim, n;
729 
730  if (fcb_type != FCB_TYPE_SILENCE) {
731  float *tilted_lpcs = s->tilted_lpcs_pf,
732  *coeffs = s->denoise_coeffs_pf, tilt_mem = 0;
733 
734  tilted_lpcs[0] = 1.0;
735  memcpy(&tilted_lpcs[1], lpcs, sizeof(lpcs[0]) * s->lsps);
736  memset(&tilted_lpcs[s->lsps + 1], 0,
737  sizeof(tilted_lpcs[0]) * (128 - s->lsps - 1));
738  ff_tilt_compensation(&tilt_mem, 0.7 * tilt_factor(lpcs, s->lsps),
739  tilted_lpcs, s->lsps + 2);
740 
741  /* The IRDFT output (127 samples for 7-bit filter) beyond the frame
742  * size is applied to the next frame. All input beyond this is zero,
743  * and thus all output beyond this will go towards zero, hence we can
744  * limit to min(size-1, 127-size) as a performance consideration. */
745  remainder = FFMIN(127 - size, size - 1);
746  calc_input_response(s, tilted_lpcs, fcb_type, coeffs, remainder);
747 
748  /* apply coefficients (in frequency spectrum domain), i.e. complex
749  * number multiplication */
750  memset(&synth_pf[size], 0, sizeof(synth_pf[0]) * (128 - size));
751  s->rdft.rdft_calc(&s->rdft, synth_pf);
752  s->rdft.rdft_calc(&s->rdft, coeffs);
753  synth_pf[0] *= coeffs[0];
754  synth_pf[1] *= coeffs[1];
755  for (n = 1; n < 64; n++) {
756  float v1 = synth_pf[n * 2], v2 = synth_pf[n * 2 + 1];
757  synth_pf[n * 2] = v1 * coeffs[n * 2] - v2 * coeffs[n * 2 + 1];
758  synth_pf[n * 2 + 1] = v2 * coeffs[n * 2] + v1 * coeffs[n * 2 + 1];
759  }
760  s->irdft.rdft_calc(&s->irdft, synth_pf);
761  }
762 
763  /* merge filter output with the history of previous runs */
764  if (s->denoise_filter_cache_size) {
765  lim = FFMIN(s->denoise_filter_cache_size, size);
766  for (n = 0; n < lim; n++)
767  synth_pf[n] += s->denoise_filter_cache[n];
768  s->denoise_filter_cache_size -= lim;
769  memmove(s->denoise_filter_cache, &s->denoise_filter_cache[size],
770  sizeof(s->denoise_filter_cache[0]) * s->denoise_filter_cache_size);
771  }
772 
773  /* move remainder of filter output into a cache for future runs */
774  if (fcb_type != FCB_TYPE_SILENCE) {
775  lim = FFMIN(remainder, s->denoise_filter_cache_size);
776  for (n = 0; n < lim; n++)
777  s->denoise_filter_cache[n] += synth_pf[size + n];
778  if (lim < remainder) {
779  memcpy(&s->denoise_filter_cache[lim], &synth_pf[size + lim],
780  sizeof(s->denoise_filter_cache[0]) * (remainder - lim));
781  s->denoise_filter_cache_size = remainder;
782  }
783  }
784 }
785 
786 /**
787  * Averaging projection filter, the postfilter used in WMAVoice.
788  *
789  * This uses the following steps:
790  * - A zero-synthesis filter (generate excitation from synth signal)
791  * - Kalman smoothing on excitation, based on pitch
792  * - Re-synthesized smoothened output
793  * - Iterative Wiener denoise filter
794  * - Adaptive gain filter
795  * - DC filter
796  *
797  * @param s WMAVoice decoding context
798  * @param synth Speech synthesis output (before postfilter)
799  * @param samples Output buffer for filtered samples
800  * @param size Buffer size of synth & samples
801  * @param lpcs Generated LPCs used for speech synthesis
802  * @param zero_exc_pf destination for zero synthesis filter (16-byte aligned)
803  * @param fcb_type Frame type (silence, hardcoded, AW-pulses or FCB-pulses)
804  * @param pitch Pitch of the input signal
805  */
806 static void postfilter(WMAVoiceContext *s, const float *synth,
807  float *samples, int size,
808  const float *lpcs, float *zero_exc_pf,
809  int fcb_type, int pitch)
810 {
811  float synth_filter_in_buf[MAX_FRAMESIZE / 2],
812  *synth_pf = &s->synth_filter_out_buf[MAX_LSPS_ALIGN16],
813  *synth_filter_in = zero_exc_pf;
814 
815  av_assert0(size <= MAX_FRAMESIZE / 2);
816 
817  /* generate excitation from input signal */
818  ff_celp_lp_zero_synthesis_filterf(zero_exc_pf, lpcs, synth, size, s->lsps);
819 
820  if (fcb_type >= FCB_TYPE_AW_PULSES &&
821  !kalman_smoothen(s, pitch, zero_exc_pf, synth_filter_in_buf, size))
822  synth_filter_in = synth_filter_in_buf;
823 
824  /* re-synthesize speech after smoothening, and keep history */
825  ff_celp_lp_synthesis_filterf(synth_pf, lpcs,
826  synth_filter_in, size, s->lsps);
827  memcpy(&synth_pf[-s->lsps], &synth_pf[size - s->lsps],
828  sizeof(synth_pf[0]) * s->lsps);
829 
830  wiener_denoise(s, fcb_type, synth_pf, size, lpcs);
831 
832  adaptive_gain_control(samples, synth_pf, synth, size, 0.99,
833  &s->postfilter_agc);
834 
835  if (s->dc_level > 8) {
836  /* remove ultra-low frequency DC noise / highpass filter;
837  * coefficients are identical to those used in SIPR decoding,
838  * and very closely resemble those used in AMR-NB decoding. */
840  (const float[2]) { -1.99997, 1.0 },
841  (const float[2]) { -1.9330735188, 0.93589198496 },
842  0.93980580475, s->dcf_mem, size);
843  }
844 }
845 /**
846  * @}
847  */
848 
849 /**
850  * Dequantize LSPs
851  * @param lsps output pointer to the array that will hold the LSPs
852  * @param num number of LSPs to be dequantized
853  * @param values quantized values, contains n_stages values
854  * @param sizes range (i.e. max value) of each quantized value
855  * @param n_stages number of dequantization runs
856  * @param table dequantization table to be used
857  * @param mul_q LSF multiplier
858  * @param base_q base (lowest) LSF values
859  */
860 static void dequant_lsps(double *lsps, int num,
861  const uint16_t *values,
862  const uint16_t *sizes,
863  int n_stages, const uint8_t *table,
864  const double *mul_q,
865  const double *base_q)
866 {
867  int n, m;
868 
869  memset(lsps, 0, num * sizeof(*lsps));
870  for (n = 0; n < n_stages; n++) {
871  const uint8_t *t_off = &table[values[n] * num];
872  double base = base_q[n], mul = mul_q[n];
873 
874  for (m = 0; m < num; m++)
875  lsps[m] += base + mul * t_off[m];
876 
877  table += sizes[n] * num;
878  }
879 }
880 
881 /**
882  * @name LSP dequantization routines
883  * LSP dequantization routines, for 10/16LSPs and independent/residual coding.
884  * lsp10i() consumes 24 bits; lsp10r() consumes an additional 24 bits;
885  * lsp16i() consumes 34 bits; lsp16r() consumes an additional 26 bits.
886  * @{
887  */
888 /**
889  * Parse 10 independently-coded LSPs.
890  */
891 static void dequant_lsp10i(GetBitContext *gb, double *lsps)
892 {
893  static const uint16_t vec_sizes[4] = { 256, 64, 32, 32 };
894  static const double mul_lsf[4] = {
895  5.2187144800e-3, 1.4626986422e-3,
896  9.6179549166e-4, 1.1325736225e-3
897  };
898  static const double base_lsf[4] = {
899  M_PI * -2.15522e-1, M_PI * -6.1646e-2,
900  M_PI * -3.3486e-2, M_PI * -5.7408e-2
901  };
902  uint16_t v[4];
903 
904  v[0] = get_bits(gb, 8);
905  v[1] = get_bits(gb, 6);
906  v[2] = get_bits(gb, 5);
907  v[3] = get_bits(gb, 5);
908 
909  dequant_lsps(lsps, 10, v, vec_sizes, 4, wmavoice_dq_lsp10i,
910  mul_lsf, base_lsf);
911 }
912 
913 /**
914  * Parse 10 independently-coded LSPs, and then derive the tables to
915  * generate LSPs for the other frames from them (residual coding).
916  */
918  double *i_lsps, const double *old,
919  double *a1, double *a2, int q_mode)
920 {
921  static const uint16_t vec_sizes[3] = { 128, 64, 64 };
922  static const double mul_lsf[3] = {
923  2.5807601174e-3, 1.2354460219e-3, 1.1763821673e-3
924  };
925  static const double base_lsf[3] = {
926  M_PI * -1.07448e-1, M_PI * -5.2706e-2, M_PI * -5.1634e-2
927  };
928  const float (*ipol_tab)[2][10] = q_mode ?
930  uint16_t interpol, v[3];
931  int n;
932 
933  dequant_lsp10i(gb, i_lsps);
934 
935  interpol = get_bits(gb, 5);
936  v[0] = get_bits(gb, 7);
937  v[1] = get_bits(gb, 6);
938  v[2] = get_bits(gb, 6);
939 
940  for (n = 0; n < 10; n++) {
941  double delta = old[n] - i_lsps[n];
942  a1[n] = ipol_tab[interpol][0][n] * delta + i_lsps[n];
943  a1[10 + n] = ipol_tab[interpol][1][n] * delta + i_lsps[n];
944  }
945 
946  dequant_lsps(a2, 20, v, vec_sizes, 3, wmavoice_dq_lsp10r,
947  mul_lsf, base_lsf);
948 }
949 
950 /**
951  * Parse 16 independently-coded LSPs.
952  */
953 static void dequant_lsp16i(GetBitContext *gb, double *lsps)
954 {
955  static const uint16_t vec_sizes[5] = { 256, 64, 128, 64, 128 };
956  static const double mul_lsf[5] = {
957  3.3439586280e-3, 6.9908173703e-4,
958  3.3216608306e-3, 1.0334960326e-3,
959  3.1899104283e-3
960  };
961  static const double base_lsf[5] = {
962  M_PI * -1.27576e-1, M_PI * -2.4292e-2,
963  M_PI * -1.28094e-1, M_PI * -3.2128e-2,
964  M_PI * -1.29816e-1
965  };
966  uint16_t v[5];
967 
968  v[0] = get_bits(gb, 8);
969  v[1] = get_bits(gb, 6);
970  v[2] = get_bits(gb, 7);
971  v[3] = get_bits(gb, 6);
972  v[4] = get_bits(gb, 7);
973 
974  dequant_lsps( lsps, 5, v, vec_sizes, 2,
975  wmavoice_dq_lsp16i1, mul_lsf, base_lsf);
976  dequant_lsps(&lsps[5], 5, &v[2], &vec_sizes[2], 2,
977  wmavoice_dq_lsp16i2, &mul_lsf[2], &base_lsf[2]);
978  dequant_lsps(&lsps[10], 6, &v[4], &vec_sizes[4], 1,
979  wmavoice_dq_lsp16i3, &mul_lsf[4], &base_lsf[4]);
980 }
981 
982 /**
983  * Parse 16 independently-coded LSPs, and then derive the tables to
984  * generate LSPs for the other frames from them (residual coding).
985  */
987  double *i_lsps, const double *old,
988  double *a1, double *a2, int q_mode)
989 {
990  static const uint16_t vec_sizes[3] = { 128, 128, 128 };
991  static const double mul_lsf[3] = {
992  1.2232979501e-3, 1.4062241527e-3, 1.6114744851e-3
993  };
994  static const double base_lsf[3] = {
995  M_PI * -5.5830e-2, M_PI * -5.2908e-2, M_PI * -5.4776e-2
996  };
997  const float (*ipol_tab)[2][16] = q_mode ?
999  uint16_t interpol, v[3];
1000  int n;
1001 
1002  dequant_lsp16i(gb, i_lsps);
1003 
1004  interpol = get_bits(gb, 5);
1005  v[0] = get_bits(gb, 7);
1006  v[1] = get_bits(gb, 7);
1007  v[2] = get_bits(gb, 7);
1008 
1009  for (n = 0; n < 16; n++) {
1010  double delta = old[n] - i_lsps[n];
1011  a1[n] = ipol_tab[interpol][0][n] * delta + i_lsps[n];
1012  a1[16 + n] = ipol_tab[interpol][1][n] * delta + i_lsps[n];
1013  }
1014 
1015  dequant_lsps( a2, 10, v, vec_sizes, 1,
1016  wmavoice_dq_lsp16r1, mul_lsf, base_lsf);
1017  dequant_lsps(&a2[10], 10, &v[1], &vec_sizes[1], 1,
1018  wmavoice_dq_lsp16r2, &mul_lsf[1], &base_lsf[1]);
1019  dequant_lsps(&a2[20], 12, &v[2], &vec_sizes[2], 1,
1020  wmavoice_dq_lsp16r3, &mul_lsf[2], &base_lsf[2]);
1021 }
1022 
1023 /**
1024  * @}
1025  * @name Pitch-adaptive window coding functions
1026  * The next few functions are for pitch-adaptive window coding.
1027  * @{
1028  */
1029 /**
1030  * Parse the offset of the first pitch-adaptive window pulses, and
1031  * the distribution of pulses between the two blocks in this frame.
1032  * @param s WMA Voice decoding context private data
1033  * @param gb bit I/O context
1034  * @param pitch pitch for each block in this frame
1035  */
1037  const int *pitch)
1038 {
1039  static const int16_t start_offset[94] = {
1040  -11, -9, -7, -5, -3, -1, 1, 3, 5, 7, 9, 11,
1041  13, 15, 18, 17, 19, 20, 21, 22, 23, 24, 25, 26,
1042  27, 28, 29, 30, 31, 32, 33, 35, 37, 39, 41, 43,
1043  45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 67,
1044  69, 71, 73, 75, 77, 79, 81, 83, 85, 87, 89, 91,
1045  93, 95, 97, 99, 101, 103, 105, 107, 109, 111, 113, 115,
1046  117, 119, 121, 123, 125, 127, 129, 131, 133, 135, 137, 139,
1047  141, 143, 145, 147, 149, 151, 153, 155, 157, 159
1048  };
1049  int bits, offset;
1050 
1051  /* position of pulse */
1052  s->aw_idx_is_ext = 0;
1053  if ((bits = get_bits(gb, 6)) >= 54) {
1054  s->aw_idx_is_ext = 1;
1055  bits += (bits - 54) * 3 + get_bits(gb, 2);
1056  }
1057 
1058  /* for a repeated pulse at pulse_off with a pitch_lag of pitch[], count
1059  * the distribution of the pulses in each block contained in this frame. */
1060  s->aw_pulse_range = FFMIN(pitch[0], pitch[1]) > 32 ? 24 : 16;
1061  for (offset = start_offset[bits]; offset < 0; offset += pitch[0]) ;
1062  s->aw_n_pulses[0] = (pitch[0] - 1 + MAX_FRAMESIZE / 2 - offset) / pitch[0];
1063  s->aw_first_pulse_off[0] = offset - s->aw_pulse_range / 2;
1064  offset += s->aw_n_pulses[0] * pitch[0];
1065  s->aw_n_pulses[1] = (pitch[1] - 1 + MAX_FRAMESIZE - offset) / pitch[1];
1066  s->aw_first_pulse_off[1] = offset - (MAX_FRAMESIZE + s->aw_pulse_range) / 2;
1067 
1068  /* if continuing from a position before the block, reset position to
1069  * start of block (when corrected for the range over which it can be
1070  * spread in aw_pulse_set1()). */
1071  if (start_offset[bits] < MAX_FRAMESIZE / 2) {
1072  while (s->aw_first_pulse_off[1] - pitch[1] + s->aw_pulse_range > 0)
1073  s->aw_first_pulse_off[1] -= pitch[1];
1074  if (start_offset[bits] < 0)
1075  while (s->aw_first_pulse_off[0] - pitch[0] + s->aw_pulse_range > 0)
1076  s->aw_first_pulse_off[0] -= pitch[0];
1077  }
1078 }
1079 
1080 /**
1081  * Apply second set of pitch-adaptive window pulses.
1082  * @param s WMA Voice decoding context private data
1083  * @param gb bit I/O context
1084  * @param block_idx block index in frame [0, 1]
1085  * @param fcb structure containing fixed codebook vector info
1086  * @return -1 on error, 0 otherwise
1087  */
1089  int block_idx, AMRFixed *fcb)
1090 {
1091  uint16_t use_mask_mem[9]; // only 5 are used, rest is padding
1092  uint16_t *use_mask = use_mask_mem + 2;
1093  /* in this function, idx is the index in the 80-bit (+ padding) use_mask
1094  * bit-array. Since use_mask consists of 16-bit values, the lower 4 bits
1095  * of idx are the position of the bit within a particular item in the
1096  * array (0 being the most significant bit, and 15 being the least
1097  * significant bit), and the remainder (>> 4) is the index in the
1098  * use_mask[]-array. This is faster and uses less memory than using a
1099  * 80-byte/80-int array. */
1100  int pulse_off = s->aw_first_pulse_off[block_idx],
1101  pulse_start, n, idx, range, aidx, start_off = 0;
1102 
1103  /* set offset of first pulse to within this block */
1104  if (s->aw_n_pulses[block_idx] > 0)
1105  while (pulse_off + s->aw_pulse_range < 1)
1106  pulse_off += fcb->pitch_lag;
1107 
1108  /* find range per pulse */
1109  if (s->aw_n_pulses[0] > 0) {
1110  if (block_idx == 0) {
1111  range = 32;
1112  } else /* block_idx = 1 */ {
1113  range = 8;
1114  if (s->aw_n_pulses[block_idx] > 0)
1115  pulse_off = s->aw_next_pulse_off_cache;
1116  }
1117  } else
1118  range = 16;
1119  pulse_start = s->aw_n_pulses[block_idx] > 0 ? pulse_off - range / 2 : 0;
1120 
1121  /* aw_pulse_set1() already applies pulses around pulse_off (to be exactly,
1122  * in the range of [pulse_off, pulse_off + s->aw_pulse_range], and thus
1123  * we exclude that range from being pulsed again in this function. */
1124  memset(&use_mask[-2], 0, 2 * sizeof(use_mask[0]));
1125  memset( use_mask, -1, 5 * sizeof(use_mask[0]));
1126  memset(&use_mask[5], 0, 2 * sizeof(use_mask[0]));
1127  if (s->aw_n_pulses[block_idx] > 0)
1128  for (idx = pulse_off; idx < MAX_FRAMESIZE / 2; idx += fcb->pitch_lag) {
1129  int excl_range = s->aw_pulse_range; // always 16 or 24
1130  uint16_t *use_mask_ptr = &use_mask[idx >> 4];
1131  int first_sh = 16 - (idx & 15);
1132  *use_mask_ptr++ &= 0xFFFFu << first_sh;
1133  excl_range -= first_sh;
1134  if (excl_range >= 16) {
1135  *use_mask_ptr++ = 0;
1136  *use_mask_ptr &= 0xFFFF >> (excl_range - 16);
1137  } else
1138  *use_mask_ptr &= 0xFFFF >> excl_range;
1139  }
1140 
1141  /* find the 'aidx'th offset that is not excluded */
1142  aidx = get_bits(gb, s->aw_n_pulses[0] > 0 ? 5 - 2 * block_idx : 4);
1143  for (n = 0; n <= aidx; pulse_start++) {
1144  for (idx = pulse_start; idx < 0; idx += fcb->pitch_lag) ;
1145  if (idx >= MAX_FRAMESIZE / 2) { // find from zero
1146  if (use_mask[0]) idx = 0x0F;
1147  else if (use_mask[1]) idx = 0x1F;
1148  else if (use_mask[2]) idx = 0x2F;
1149  else if (use_mask[3]) idx = 0x3F;
1150  else if (use_mask[4]) idx = 0x4F;
1151  else return -1;
1152  idx -= av_log2_16bit(use_mask[idx >> 4]);
1153  }
1154  if (use_mask[idx >> 4] & (0x8000 >> (idx & 15))) {
1155  use_mask[idx >> 4] &= ~(0x8000 >> (idx & 15));
1156  n++;
1157  start_off = idx;
1158  }
1159  }
1160 
1161  fcb->x[fcb->n] = start_off;
1162  fcb->y[fcb->n] = get_bits1(gb) ? -1.0 : 1.0;
1163  fcb->n++;
1164 
1165  /* set offset for next block, relative to start of that block */
1166  n = (MAX_FRAMESIZE / 2 - start_off) % fcb->pitch_lag;
1167  s->aw_next_pulse_off_cache = n ? fcb->pitch_lag - n : 0;
1168  return 0;
1169 }
1170 
1171 /**
1172  * Apply first set of pitch-adaptive window pulses.
1173  * @param s WMA Voice decoding context private data
1174  * @param gb bit I/O context
1175  * @param block_idx block index in frame [0, 1]
1176  * @param fcb storage location for fixed codebook pulse info
1177  */
1179  int block_idx, AMRFixed *fcb)
1180 {
1181  int val = get_bits(gb, 12 - 2 * (s->aw_idx_is_ext && !block_idx));
1182  float v;
1183 
1184  if (s->aw_n_pulses[block_idx] > 0) {
1185  int n, v_mask, i_mask, sh, n_pulses;
1186 
1187  if (s->aw_pulse_range == 24) { // 3 pulses, 1:sign + 3:index each
1188  n_pulses = 3;
1189  v_mask = 8;
1190  i_mask = 7;
1191  sh = 4;
1192  } else { // 4 pulses, 1:sign + 2:index each
1193  n_pulses = 4;
1194  v_mask = 4;
1195  i_mask = 3;
1196  sh = 3;
1197  }
1198 
1199  for (n = n_pulses - 1; n >= 0; n--, val >>= sh) {
1200  fcb->y[fcb->n] = (val & v_mask) ? -1.0 : 1.0;
1201  fcb->x[fcb->n] = (val & i_mask) * n_pulses + n +
1202  s->aw_first_pulse_off[block_idx];
1203  while (fcb->x[fcb->n] < 0)
1204  fcb->x[fcb->n] += fcb->pitch_lag;
1205  if (fcb->x[fcb->n] < MAX_FRAMESIZE / 2)
1206  fcb->n++;
1207  }
1208  } else {
1209  int num2 = (val & 0x1FF) >> 1, delta, idx;
1210 
1211  if (num2 < 1 * 79) { delta = 1; idx = num2 + 1; }
1212  else if (num2 < 2 * 78) { delta = 3; idx = num2 + 1 - 1 * 77; }
1213  else if (num2 < 3 * 77) { delta = 5; idx = num2 + 1 - 2 * 76; }
1214  else { delta = 7; idx = num2 + 1 - 3 * 75; }
1215  v = (val & 0x200) ? -1.0 : 1.0;
1216 
1217  fcb->no_repeat_mask |= 3 << fcb->n;
1218  fcb->x[fcb->n] = idx - delta;
1219  fcb->y[fcb->n] = v;
1220  fcb->x[fcb->n + 1] = idx;
1221  fcb->y[fcb->n + 1] = (val & 1) ? -v : v;
1222  fcb->n += 2;
1223  }
1224 }
1225 
1226 /**
1227  * @}
1228  *
1229  * Generate a random number from frame_cntr and block_idx, which will live
1230  * in the range [0, 1000 - block_size] (so it can be used as an index in a
1231  * table of size 1000 of which you want to read block_size entries).
1232  *
1233  * @param frame_cntr current frame number
1234  * @param block_num current block index
1235  * @param block_size amount of entries we want to read from a table
1236  * that has 1000 entries
1237  * @return a (non-)random number in the [0, 1000 - block_size] range.
1238  */
1239 static int pRNG(int frame_cntr, int block_num, int block_size)
1240 {
1241  /* array to simplify the calculation of z:
1242  * y = (x % 9) * 5 + 6;
1243  * z = (49995 * x) / y;
1244  * Since y only has 9 values, we can remove the division by using a
1245  * LUT and using FASTDIV-style divisions. For each of the 9 values
1246  * of y, we can rewrite z as:
1247  * z = x * (49995 / y) + x * ((49995 % y) / y)
1248  * In this table, each col represents one possible value of y, the
1249  * first number is 49995 / y, and the second is the FASTDIV variant
1250  * of 49995 % y / y. */
1251  static const unsigned int div_tbl[9][2] = {
1252  { 8332, 3 * 715827883U }, // y = 6
1253  { 4545, 0 * 390451573U }, // y = 11
1254  { 3124, 11 * 268435456U }, // y = 16
1255  { 2380, 15 * 204522253U }, // y = 21
1256  { 1922, 23 * 165191050U }, // y = 26
1257  { 1612, 23 * 138547333U }, // y = 31
1258  { 1388, 27 * 119304648U }, // y = 36
1259  { 1219, 16 * 104755300U }, // y = 41
1260  { 1086, 39 * 93368855U } // y = 46
1261  };
1262  unsigned int z, y, x = MUL16(block_num, 1877) + frame_cntr;
1263  if (x >= 0xFFFF) x -= 0xFFFF; // max value of x is 8*1877+0xFFFE=0x13AA6,
1264  // so this is effectively a modulo (%)
1265  y = x - 9 * MULH(477218589, x); // x % 9
1266  z = (uint16_t) (x * div_tbl[y][0] + UMULH(x, div_tbl[y][1]));
1267  // z = x * 49995 / (y * 5 + 6)
1268  return z % (1000 - block_size);
1269 }
1270 
1271 /**
1272  * Parse hardcoded signal for a single block.
1273  * @note see #synth_block().
1274  */
1276  int block_idx, int size,
1277  const struct frame_type_desc *frame_desc,
1278  float *excitation)
1279 {
1280  float gain;
1281  int n, r_idx;
1282 
1284 
1285  /* Set the offset from which we start reading wmavoice_std_codebook */
1286  if (frame_desc->fcb_type == FCB_TYPE_SILENCE) {
1287  r_idx = pRNG(s->frame_cntr, block_idx, size);
1288  gain = s->silence_gain;
1289  } else /* FCB_TYPE_HARDCODED */ {
1290  r_idx = get_bits(gb, 8);
1291  gain = wmavoice_gain_universal[get_bits(gb, 6)];
1292  }
1293 
1294  /* Clear gain prediction parameters */
1295  memset(s->gain_pred_err, 0, sizeof(s->gain_pred_err));
1296 
1297  /* Apply gain to hardcoded codebook and use that as excitation signal */
1298  for (n = 0; n < size; n++)
1299  excitation[n] = wmavoice_std_codebook[r_idx + n] * gain;
1300 }
1301 
1302 /**
1303  * Parse FCB/ACB signal for a single block.
1304  * @note see #synth_block().
1305  */
1307  int block_idx, int size,
1308  int block_pitch_sh2,
1309  const struct frame_type_desc *frame_desc,
1310  float *excitation)
1311 {
1312  static const float gain_coeff[6] = {
1313  0.8169, -0.06545, 0.1726, 0.0185, -0.0359, 0.0458
1314  };
1315  float pulses[MAX_FRAMESIZE / 2], pred_err, acb_gain, fcb_gain;
1316  int n, idx, gain_weight;
1317  AMRFixed fcb;
1318 
1319  av_assert0(size <= MAX_FRAMESIZE / 2);
1320  memset(pulses, 0, sizeof(*pulses) * size);
1321 
1322  fcb.pitch_lag = block_pitch_sh2 >> 2;
1323  fcb.pitch_fac = 1.0;
1324  fcb.no_repeat_mask = 0;
1325  fcb.n = 0;
1326 
1327  /* For the other frame types, this is where we apply the innovation
1328  * (fixed) codebook pulses of the speech signal. */
1329  if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
1330  aw_pulse_set1(s, gb, block_idx, &fcb);
1331  if (aw_pulse_set2(s, gb, block_idx, &fcb)) {
1332  /* Conceal the block with silence and return.
1333  * Skip the correct amount of bits to read the next
1334  * block from the correct offset. */
1335  int r_idx = pRNG(s->frame_cntr, block_idx, size);
1336 
1337  for (n = 0; n < size; n++)
1338  excitation[n] =
1339  wmavoice_std_codebook[r_idx + n] * s->silence_gain;
1340  skip_bits(gb, 7 + 1);
1341  return;
1342  }
1343  } else /* FCB_TYPE_EXC_PULSES */ {
1344  int offset_nbits = 5 - frame_desc->log_n_blocks;
1345 
1346  fcb.no_repeat_mask = -1;
1347  /* similar to ff_decode_10_pulses_35bits(), but with single pulses
1348  * (instead of double) for a subset of pulses */
1349  for (n = 0; n < 5; n++) {
1350  float sign;
1351  int pos1, pos2;
1352 
1353  sign = get_bits1(gb) ? 1.0 : -1.0;
1354  pos1 = get_bits(gb, offset_nbits);
1355  fcb.x[fcb.n] = n + 5 * pos1;
1356  fcb.y[fcb.n++] = sign;
1357  if (n < frame_desc->dbl_pulses) {
1358  pos2 = get_bits(gb, offset_nbits);
1359  fcb.x[fcb.n] = n + 5 * pos2;
1360  fcb.y[fcb.n++] = (pos1 < pos2) ? -sign : sign;
1361  }
1362  }
1363  }
1364  ff_set_fixed_vector(pulses, &fcb, 1.0, size);
1365 
1366  /* Calculate gain for adaptive & fixed codebook signal.
1367  * see ff_amr_set_fixed_gain(). */
1368  idx = get_bits(gb, 7);
1369  fcb_gain = expf(avpriv_scalarproduct_float_c(s->gain_pred_err,
1370  gain_coeff, 6) -
1371  5.2409161640 + wmavoice_gain_codebook_fcb[idx]);
1372  acb_gain = wmavoice_gain_codebook_acb[idx];
1373  pred_err = av_clipf(wmavoice_gain_codebook_fcb[idx],
1374  -2.9957322736 /* log(0.05) */,
1375  1.6094379124 /* log(5.0) */);
1376 
1377  gain_weight = 8 >> frame_desc->log_n_blocks;
1378  memmove(&s->gain_pred_err[gain_weight], s->gain_pred_err,
1379  sizeof(*s->gain_pred_err) * (6 - gain_weight));
1380  for (n = 0; n < gain_weight; n++)
1381  s->gain_pred_err[n] = pred_err;
1382 
1383  /* Calculation of adaptive codebook */
1384  if (frame_desc->acb_type == ACB_TYPE_ASYMMETRIC) {
1385  int len;
1386  for (n = 0; n < size; n += len) {
1387  int next_idx_sh16;
1388  int abs_idx = block_idx * size + n;
1389  int pitch_sh16 = (s->last_pitch_val << 16) +
1390  s->pitch_diff_sh16 * abs_idx;
1391  int pitch = (pitch_sh16 + 0x6FFF) >> 16;
1392  int idx_sh16 = ((pitch << 16) - pitch_sh16) * 8 + 0x58000;
1393  idx = idx_sh16 >> 16;
1394  if (s->pitch_diff_sh16) {
1395  if (s->pitch_diff_sh16 > 0) {
1396  next_idx_sh16 = (idx_sh16) &~ 0xFFFF;
1397  } else
1398  next_idx_sh16 = (idx_sh16 + 0x10000) &~ 0xFFFF;
1399  len = av_clip((idx_sh16 - next_idx_sh16) / s->pitch_diff_sh16 / 8,
1400  1, size - n);
1401  } else
1402  len = size;
1403 
1404  ff_acelp_interpolatef(&excitation[n], &excitation[n - pitch],
1406  idx, 9, len);
1407  }
1408  } else /* ACB_TYPE_HAMMING */ {
1409  int block_pitch = block_pitch_sh2 >> 2;
1410  idx = block_pitch_sh2 & 3;
1411  if (idx) {
1412  ff_acelp_interpolatef(excitation, &excitation[-block_pitch],
1414  idx, 8, size);
1415  } else
1416  av_memcpy_backptr((uint8_t *) excitation, sizeof(float) * block_pitch,
1417  sizeof(float) * size);
1418  }
1419 
1420  /* Interpolate ACB/FCB and use as excitation signal */
1421  ff_weighted_vector_sumf(excitation, excitation, pulses,
1422  acb_gain, fcb_gain, size);
1423 }
1424 
1425 /**
1426  * Parse data in a single block.
1427  *
1428  * @param s WMA Voice decoding context private data
1429  * @param gb bit I/O context
1430  * @param block_idx index of the to-be-read block
1431  * @param size amount of samples to be read in this block
1432  * @param block_pitch_sh2 pitch for this block << 2
1433  * @param lsps LSPs for (the end of) this frame
1434  * @param prev_lsps LSPs for the last frame
1435  * @param frame_desc frame type descriptor
1436  * @param excitation target memory for the ACB+FCB interpolated signal
1437  * @param synth target memory for the speech synthesis filter output
1438  * @return 0 on success, <0 on error.
1439  */
1441  int block_idx, int size,
1442  int block_pitch_sh2,
1443  const double *lsps, const double *prev_lsps,
1444  const struct frame_type_desc *frame_desc,
1445  float *excitation, float *synth)
1446 {
1447  double i_lsps[MAX_LSPS];
1448  float lpcs[MAX_LSPS];
1449  float fac;
1450  int n;
1451 
1452  if (frame_desc->acb_type == ACB_TYPE_NONE)
1453  synth_block_hardcoded(s, gb, block_idx, size, frame_desc, excitation);
1454  else
1455  synth_block_fcb_acb(s, gb, block_idx, size, block_pitch_sh2,
1456  frame_desc, excitation);
1457 
1458  /* convert interpolated LSPs to LPCs */
1459  fac = (block_idx + 0.5) / frame_desc->n_blocks;
1460  for (n = 0; n < s->lsps; n++) // LSF -> LSP
1461  i_lsps[n] = cos(prev_lsps[n] + fac * (lsps[n] - prev_lsps[n]));
1462  ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
1463 
1464  /* Speech synthesis */
1465  ff_celp_lp_synthesis_filterf(synth, lpcs, excitation, size, s->lsps);
1466 }
1467 
1468 /**
1469  * Synthesize output samples for a single frame.
1470  *
1471  * @param ctx WMA Voice decoder context
1472  * @param gb bit I/O context (s->gb or one for cross-packet superframes)
1473  * @param frame_idx Frame number within superframe [0-2]
1474  * @param samples pointer to output sample buffer, has space for at least 160
1475  * samples
1476  * @param lsps LSP array
1477  * @param prev_lsps array of previous frame's LSPs
1478  * @param excitation target buffer for excitation signal
1479  * @param synth target buffer for synthesized speech data
1480  * @return 0 on success, <0 on error.
1481  */
1482 static int synth_frame(AVCodecContext *ctx, GetBitContext *gb, int frame_idx,
1483  float *samples,
1484  const double *lsps, const double *prev_lsps,
1485  float *excitation, float *synth)
1486 {
1488  int n, n_blocks_x2, log_n_blocks_x2, av_uninit(cur_pitch_val);
1489  int pitch[MAX_BLOCKS], av_uninit(last_block_pitch);
1490 
1491  /* Parse frame type ("frame header"), see frame_descs */
1492  int bd_idx = s->vbm_tree[get_vlc2(gb, frame_type_vlc.table, 6, 3)], block_nsamples;
1493 
1494  if (bd_idx < 0) {
1496  "Invalid frame type VLC code, skipping\n");
1497  return AVERROR_INVALIDDATA;
1498  }
1499 
1500  block_nsamples = MAX_FRAMESIZE / frame_descs[bd_idx].n_blocks;
1501 
1502  /* Pitch calculation for ACB_TYPE_ASYMMETRIC ("pitch-per-frame") */
1503  if (frame_descs[bd_idx].acb_type == ACB_TYPE_ASYMMETRIC) {
1504  /* Pitch is provided per frame, which is interpreted as the pitch of
1505  * the last sample of the last block of this frame. We can interpolate
1506  * the pitch of other blocks (and even pitch-per-sample) by gradually
1507  * incrementing/decrementing prev_frame_pitch to cur_pitch_val. */
1508  n_blocks_x2 = frame_descs[bd_idx].n_blocks << 1;
1509  log_n_blocks_x2 = frame_descs[bd_idx].log_n_blocks + 1;
1510  cur_pitch_val = s->min_pitch_val + get_bits(gb, s->pitch_nbits);
1511  cur_pitch_val = FFMIN(cur_pitch_val, s->max_pitch_val - 1);
1512  if (s->last_acb_type == ACB_TYPE_NONE ||
1513  20 * abs(cur_pitch_val - s->last_pitch_val) >
1514  (cur_pitch_val + s->last_pitch_val))
1515  s->last_pitch_val = cur_pitch_val;
1516 
1517  /* pitch per block */
1518  for (n = 0; n < frame_descs[bd_idx].n_blocks; n++) {
1519  int fac = n * 2 + 1;
1520 
1521  pitch[n] = (MUL16(fac, cur_pitch_val) +
1522  MUL16((n_blocks_x2 - fac), s->last_pitch_val) +
1523  frame_descs[bd_idx].n_blocks) >> log_n_blocks_x2;
1524  }
1525 
1526  /* "pitch-diff-per-sample" for calculation of pitch per sample */
1527  s->pitch_diff_sh16 =
1528  (cur_pitch_val - s->last_pitch_val) * (1 << 16) / MAX_FRAMESIZE;
1529  }
1530 
1531  /* Global gain (if silence) and pitch-adaptive window coordinates */
1532  switch (frame_descs[bd_idx].fcb_type) {
1533  case FCB_TYPE_SILENCE:
1534  s->silence_gain = wmavoice_gain_silence[get_bits(gb, 8)];
1535  break;
1536  case FCB_TYPE_AW_PULSES:
1537  aw_parse_coords(s, gb, pitch);
1538  break;
1539  }
1540 
1541  for (n = 0; n < frame_descs[bd_idx].n_blocks; n++) {
1542  int bl_pitch_sh2;
1543 
1544  /* Pitch calculation for ACB_TYPE_HAMMING ("pitch-per-block") */
1545  switch (frame_descs[bd_idx].acb_type) {
1546  case ACB_TYPE_HAMMING: {
1547  /* Pitch is given per block. Per-block pitches are encoded as an
1548  * absolute value for the first block, and then delta values
1549  * relative to this value) for all subsequent blocks. The scale of
1550  * this pitch value is semi-logarithmic compared to its use in the
1551  * decoder, so we convert it to normal scale also. */
1552  int block_pitch,
1553  t1 = (s->block_conv_table[1] - s->block_conv_table[0]) << 2,
1554  t2 = (s->block_conv_table[2] - s->block_conv_table[1]) << 1,
1555  t3 = s->block_conv_table[3] - s->block_conv_table[2] + 1;
1556 
1557  if (n == 0) {
1558  block_pitch = get_bits(gb, s->block_pitch_nbits);
1559  } else
1560  block_pitch = last_block_pitch - s->block_delta_pitch_hrange +
1561  get_bits(gb, s->block_delta_pitch_nbits);
1562  /* Convert last_ so that any next delta is within _range */
1563  last_block_pitch = av_clip(block_pitch,
1564  s->block_delta_pitch_hrange,
1565  s->block_pitch_range -
1566  s->block_delta_pitch_hrange);
1567 
1568  /* Convert semi-log-style scale back to normal scale */
1569  if (block_pitch < t1) {
1570  bl_pitch_sh2 = (s->block_conv_table[0] << 2) + block_pitch;
1571  } else {
1572  block_pitch -= t1;
1573  if (block_pitch < t2) {
1574  bl_pitch_sh2 =
1575  (s->block_conv_table[1] << 2) + (block_pitch << 1);
1576  } else {
1577  block_pitch -= t2;
1578  if (block_pitch < t3) {
1579  bl_pitch_sh2 =
1580  (s->block_conv_table[2] + block_pitch) << 2;
1581  } else
1582  bl_pitch_sh2 = s->block_conv_table[3] << 2;
1583  }
1584  }
1585  pitch[n] = bl_pitch_sh2 >> 2;
1586  break;
1587  }
1588 
1589  case ACB_TYPE_ASYMMETRIC: {
1590  bl_pitch_sh2 = pitch[n] << 2;
1591  break;
1592  }
1593 
1594  default: // ACB_TYPE_NONE has no pitch
1595  bl_pitch_sh2 = 0;
1596  break;
1597  }
1598 
1599  synth_block(s, gb, n, block_nsamples, bl_pitch_sh2,
1600  lsps, prev_lsps, &frame_descs[bd_idx],
1601  &excitation[n * block_nsamples],
1602  &synth[n * block_nsamples]);
1603  }
1604 
1605  /* Averaging projection filter, if applicable. Else, just copy samples
1606  * from synthesis buffer */
1607  if (s->do_apf) {
1608  double i_lsps[MAX_LSPS];
1609  float lpcs[MAX_LSPS];
1610 
1611  for (n = 0; n < s->lsps; n++) // LSF -> LSP
1612  i_lsps[n] = cos(0.5 * (prev_lsps[n] + lsps[n]));
1613  ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
1614  postfilter(s, synth, samples, 80, lpcs,
1615  &s->zero_exc_pf[s->history_nsamples + MAX_FRAMESIZE * frame_idx],
1616  frame_descs[bd_idx].fcb_type, pitch[0]);
1617 
1618  for (n = 0; n < s->lsps; n++) // LSF -> LSP
1619  i_lsps[n] = cos(lsps[n]);
1620  ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
1621  postfilter(s, &synth[80], &samples[80], 80, lpcs,
1622  &s->zero_exc_pf[s->history_nsamples + MAX_FRAMESIZE * frame_idx + 80],
1623  frame_descs[bd_idx].fcb_type, pitch[0]);
1624  } else
1625  memcpy(samples, synth, 160 * sizeof(synth[0]));
1626 
1627  /* Cache values for next frame */
1628  s->frame_cntr++;
1629  if (s->frame_cntr >= 0xFFFF) s->frame_cntr -= 0xFFFF; // i.e. modulo (%)
1630  s->last_acb_type = frame_descs[bd_idx].acb_type;
1631  switch (frame_descs[bd_idx].acb_type) {
1632  case ACB_TYPE_NONE:
1633  s->last_pitch_val = 0;
1634  break;
1635  case ACB_TYPE_ASYMMETRIC:
1636  s->last_pitch_val = cur_pitch_val;
1637  break;
1638  case ACB_TYPE_HAMMING:
1639  s->last_pitch_val = pitch[frame_descs[bd_idx].n_blocks - 1];
1640  break;
1641  }
1642 
1643  return 0;
1644 }
1645 
1646 /**
1647  * Ensure minimum value for first item, maximum value for last value,
1648  * proper spacing between each value and proper ordering.
1649  *
1650  * @param lsps array of LSPs
1651  * @param num size of LSP array
1652  *
1653  * @note basically a double version of #ff_acelp_reorder_lsf(), might be
1654  * useful to put in a generic location later on. Parts are also
1655  * present in #ff_set_min_dist_lsf() + #ff_sort_nearly_sorted_floats(),
1656  * which is in float.
1657  */
1658 static void stabilize_lsps(double *lsps, int num)
1659 {
1660  int n, m, l;
1661 
1662  /* set minimum value for first, maximum value for last and minimum
1663  * spacing between LSF values.
1664  * Very similar to ff_set_min_dist_lsf(), but in double. */
1665  lsps[0] = FFMAX(lsps[0], 0.0015 * M_PI);
1666  for (n = 1; n < num; n++)
1667  lsps[n] = FFMAX(lsps[n], lsps[n - 1] + 0.0125 * M_PI);
1668  lsps[num - 1] = FFMIN(lsps[num - 1], 0.9985 * M_PI);
1669 
1670  /* reorder (looks like one-time / non-recursed bubblesort).
1671  * Very similar to ff_sort_nearly_sorted_floats(), but in double. */
1672  for (n = 1; n < num; n++) {
1673  if (lsps[n] < lsps[n - 1]) {
1674  for (m = 1; m < num; m++) {
1675  double tmp = lsps[m];
1676  for (l = m - 1; l >= 0; l--) {
1677  if (lsps[l] <= tmp) break;
1678  lsps[l + 1] = lsps[l];
1679  }
1680  lsps[l + 1] = tmp;
1681  }
1682  break;
1683  }
1684  }
1685 }
1686 
1687 /**
1688  * Synthesize output samples for a single superframe. If we have any data
1689  * cached in s->sframe_cache, that will be used instead of whatever is loaded
1690  * in s->gb.
1691  *
1692  * WMA Voice superframes contain 3 frames, each containing 160 audio samples,
1693  * to give a total of 480 samples per frame. See #synth_frame() for frame
1694  * parsing. In addition to 3 frames, superframes can also contain the LSPs
1695  * (if these are globally specified for all frames (residually); they can
1696  * also be specified individually per-frame. See the s->has_residual_lsps
1697  * option), and can specify the number of samples encoded in this superframe
1698  * (if less than 480), usually used to prevent blanks at track boundaries.
1699  *
1700  * @param ctx WMA Voice decoder context
1701  * @return 0 on success, <0 on error or 1 if there was not enough data to
1702  * fully parse the superframe
1703  */
1705  int *got_frame_ptr)
1706 {
1708  GetBitContext *gb = &s->gb, s_gb;
1709  int n, res, n_samples = MAX_SFRAMESIZE;
1710  double lsps[MAX_FRAMES][MAX_LSPS];
1711  const double *mean_lsf = s->lsps == 16 ?
1712  wmavoice_mean_lsf16[s->lsp_def_mode] : wmavoice_mean_lsf10[s->lsp_def_mode];
1713  float excitation[MAX_SIGNAL_HISTORY + MAX_SFRAMESIZE + 12];
1714  float synth[MAX_LSPS + MAX_SFRAMESIZE];
1715  float *samples;
1716 
1717  memcpy(synth, s->synth_history,
1718  s->lsps * sizeof(*synth));
1719  memcpy(excitation, s->excitation_history,
1720  s->history_nsamples * sizeof(*excitation));
1721 
1722  if (s->sframe_cache_size > 0) {
1723  gb = &s_gb;
1724  init_get_bits(gb, s->sframe_cache, s->sframe_cache_size);
1725  s->sframe_cache_size = 0;
1726  }
1727 
1728  /* First bit is speech/music bit, it differentiates between WMAVoice
1729  * speech samples (the actual codec) and WMAVoice music samples, which
1730  * are really WMAPro-in-WMAVoice-superframes. I've never seen those in
1731  * the wild yet. */
1732  if (!get_bits1(gb)) {
1733  avpriv_request_sample(ctx, "WMAPro-in-WMAVoice");
1734  return AVERROR_PATCHWELCOME;
1735  }
1736 
1737  /* (optional) nr. of samples in superframe; always <= 480 and >= 0 */
1738  if (get_bits1(gb)) {
1739  if ((n_samples = get_bits(gb, 12)) > MAX_SFRAMESIZE) {
1741  "Superframe encodes > %d samples (%d), not allowed\n",
1742  MAX_SFRAMESIZE, n_samples);
1743  return AVERROR_INVALIDDATA;
1744  }
1745  }
1746 
1747  /* Parse LSPs, if global for the superframe (can also be per-frame). */
1748  if (s->has_residual_lsps) {
1749  double prev_lsps[MAX_LSPS], a1[MAX_LSPS * 2], a2[MAX_LSPS * 2];
1750 
1751  for (n = 0; n < s->lsps; n++)
1752  prev_lsps[n] = s->prev_lsps[n] - mean_lsf[n];
1753 
1754  if (s->lsps == 10) {
1755  dequant_lsp10r(gb, lsps[2], prev_lsps, a1, a2, s->lsp_q_mode);
1756  } else /* s->lsps == 16 */
1757  dequant_lsp16r(gb, lsps[2], prev_lsps, a1, a2, s->lsp_q_mode);
1758 
1759  for (n = 0; n < s->lsps; n++) {
1760  lsps[0][n] = mean_lsf[n] + (a1[n] - a2[n * 2]);
1761  lsps[1][n] = mean_lsf[n] + (a1[s->lsps + n] - a2[n * 2 + 1]);
1762  lsps[2][n] += mean_lsf[n];
1763  }
1764  for (n = 0; n < 3; n++)
1765  stabilize_lsps(lsps[n], s->lsps);
1766  }
1767 
1768  /* synth_superframe can run multiple times per packet
1769  * free potential previous frame */
1771 
1772  /* get output buffer */
1773  frame->nb_samples = MAX_SFRAMESIZE;
1774  if ((res = ff_get_buffer(ctx, frame, 0)) < 0)
1775  return res;
1776  frame->nb_samples = n_samples;
1777  samples = (float *)frame->data[0];
1778 
1779  /* Parse frames, optionally preceded by per-frame (independent) LSPs. */
1780  for (n = 0; n < 3; n++) {
1781  if (!s->has_residual_lsps) {
1782  int m;
1783 
1784  if (s->lsps == 10) {
1785  dequant_lsp10i(gb, lsps[n]);
1786  } else /* s->lsps == 16 */
1787  dequant_lsp16i(gb, lsps[n]);
1788 
1789  for (m = 0; m < s->lsps; m++)
1790  lsps[n][m] += mean_lsf[m];
1791  stabilize_lsps(lsps[n], s->lsps);
1792  }
1793 
1794  if ((res = synth_frame(ctx, gb, n,
1795  &samples[n * MAX_FRAMESIZE],
1796  lsps[n], n == 0 ? s->prev_lsps : lsps[n - 1],
1797  &excitation[s->history_nsamples + n * MAX_FRAMESIZE],
1798  &synth[s->lsps + n * MAX_FRAMESIZE]))) {
1799  *got_frame_ptr = 0;
1800  return res;
1801  }
1802  }
1803 
1804  /* Statistics? FIXME - we don't check for length, a slight overrun
1805  * will be caught by internal buffer padding, and anything else
1806  * will be skipped, not read. */
1807  if (get_bits1(gb)) {
1808  res = get_bits(gb, 4);
1809  skip_bits(gb, 10 * (res + 1));
1810  }
1811 
1812  if (get_bits_left(gb) < 0) {
1814  return AVERROR_INVALIDDATA;
1815  }
1816 
1817  *got_frame_ptr = 1;
1818 
1819  /* Update history */
1820  memcpy(s->prev_lsps, lsps[2],
1821  s->lsps * sizeof(*s->prev_lsps));
1822  memcpy(s->synth_history, &synth[MAX_SFRAMESIZE],
1823  s->lsps * sizeof(*synth));
1824  memcpy(s->excitation_history, &excitation[MAX_SFRAMESIZE],
1825  s->history_nsamples * sizeof(*excitation));
1826  if (s->do_apf)
1827  memmove(s->zero_exc_pf, &s->zero_exc_pf[MAX_SFRAMESIZE],
1828  s->history_nsamples * sizeof(*s->zero_exc_pf));
1829 
1830  return 0;
1831 }
1832 
1833 /**
1834  * Parse the packet header at the start of each packet (input data to this
1835  * decoder).
1836  *
1837  * @param s WMA Voice decoding context private data
1838  * @return <0 on error, nb_superframes on success.
1839  */
1841 {
1842  GetBitContext *gb = &s->gb;
1843  unsigned int res, n_superframes = 0;
1844 
1845  skip_bits(gb, 4); // packet sequence number
1846  s->has_residual_lsps = get_bits1(gb);
1847  do {
1848  if (get_bits_left(gb) < 6 + s->spillover_bitsize)
1849  return AVERROR_INVALIDDATA;
1850 
1851  res = get_bits(gb, 6); // number of superframes per packet
1852  // (minus first one if there is spillover)
1853  n_superframes += res;
1854  } while (res == 0x3F);
1855  s->spillover_nbits = get_bits(gb, s->spillover_bitsize);
1856 
1857  return get_bits_left(gb) >= 0 ? n_superframes : AVERROR_INVALIDDATA;
1858 }
1859 
1860 /**
1861  * Copy (unaligned) bits from gb/data/size to pb.
1862  *
1863  * @param pb target buffer to copy bits into
1864  * @param data source buffer to copy bits from
1865  * @param size size of the source data, in bytes
1866  * @param gb bit I/O context specifying the current position in the source.
1867  * data. This function might use this to align the bit position to
1868  * a whole-byte boundary before calling #avpriv_copy_bits() on aligned
1869  * source data
1870  * @param nbits the amount of bits to copy from source to target
1871  *
1872  * @note after calling this function, the current position in the input bit
1873  * I/O context is undefined.
1874  */
1875 static void copy_bits(PutBitContext *pb,
1876  const uint8_t *data, int size,
1877  GetBitContext *gb, int nbits)
1878 {
1879  int rmn_bytes, rmn_bits;
1880 
1881  rmn_bits = rmn_bytes = get_bits_left(gb);
1882  if (rmn_bits < nbits)
1883  return;
1884  if (nbits > pb->size_in_bits - put_bits_count(pb))
1885  return;
1886  rmn_bits &= 7; rmn_bytes >>= 3;
1887  if ((rmn_bits = FFMIN(rmn_bits, nbits)) > 0)
1888  put_bits(pb, rmn_bits, get_bits(gb, rmn_bits));
1889  avpriv_copy_bits(pb, data + size - rmn_bytes,
1890  FFMIN(nbits - rmn_bits, rmn_bytes << 3));
1891 }
1892 
1893 /**
1894  * Packet decoding: a packet is anything that the (ASF) demuxer contains,
1895  * and we expect that the demuxer / application provides it to us as such
1896  * (else you'll probably get garbage as output). Every packet has a size of
1897  * ctx->block_align bytes, starts with a packet header (see
1898  * #parse_packet_header()), and then a series of superframes. Superframe
1899  * boundaries may exceed packets, i.e. superframes can split data over
1900  * multiple (two) packets.
1901  *
1902  * For more information about frames, see #synth_superframe().
1903  */
1905  int *got_frame_ptr, AVPacket *avpkt)
1906 {
1908  GetBitContext *gb = &s->gb;
1909  int size, res, pos;
1910 
1911  /* Packets are sometimes a multiple of ctx->block_align, with a packet
1912  * header at each ctx->block_align bytes. However, FFmpeg's ASF demuxer
1913  * feeds us ASF packets, which may concatenate multiple "codec" packets
1914  * in a single "muxer" packet, so we artificially emulate that by
1915  * capping the packet size at ctx->block_align. */
1916  for (size = avpkt->size; size > ctx->block_align; size -= ctx->block_align);
1917  init_get_bits8(&s->gb, avpkt->data, size);
1918 
1919  /* size == ctx->block_align is used to indicate whether we are dealing with
1920  * a new packet or a packet of which we already read the packet header
1921  * previously. */
1922  if (!(size % ctx->block_align)) { // new packet header
1923  if (!size) {
1924  s->spillover_nbits = 0;
1925  s->nb_superframes = 0;
1926  } else {
1927  if ((res = parse_packet_header(s)) < 0)
1928  return res;
1929  s->nb_superframes = res;
1930  }
1931 
1932  /* If the packet header specifies a s->spillover_nbits, then we want
1933  * to push out all data of the previous packet (+ spillover) before
1934  * continuing to parse new superframes in the current packet. */
1935  if (s->sframe_cache_size > 0) {
1936  int cnt = get_bits_count(gb);
1937  if (cnt + s->spillover_nbits > avpkt->size * 8) {
1938  s->spillover_nbits = avpkt->size * 8 - cnt;
1939  }
1940  copy_bits(&s->pb, avpkt->data, size, gb, s->spillover_nbits);
1941  flush_put_bits(&s->pb);
1942  s->sframe_cache_size += s->spillover_nbits;
1943  if ((res = synth_superframe(ctx, data, got_frame_ptr)) == 0 &&
1944  *got_frame_ptr) {
1945  cnt += s->spillover_nbits;
1946  s->skip_bits_next = cnt & 7;
1947  res = cnt >> 3;
1948  return res;
1949  } else
1950  skip_bits_long (gb, s->spillover_nbits - cnt +
1951  get_bits_count(gb)); // resync
1952  } else if (s->spillover_nbits) {
1953  skip_bits_long(gb, s->spillover_nbits); // resync
1954  }
1955  } else if (s->skip_bits_next)
1956  skip_bits(gb, s->skip_bits_next);
1957 
1958  /* Try parsing superframes in current packet */
1959  s->sframe_cache_size = 0;
1960  s->skip_bits_next = 0;
1961  pos = get_bits_left(gb);
1962  if (s->nb_superframes-- == 0) {
1963  *got_frame_ptr = 0;
1964  return size;
1965  } else if (s->nb_superframes > 0) {
1966  if ((res = synth_superframe(ctx, data, got_frame_ptr)) < 0) {
1967  return res;
1968  } else if (*got_frame_ptr) {
1969  int cnt = get_bits_count(gb);
1970  s->skip_bits_next = cnt & 7;
1971  res = cnt >> 3;
1972  return res;
1973  }
1974  } else if ((s->sframe_cache_size = pos) > 0) {
1975  /* ... cache it for spillover in next packet */
1976  init_put_bits(&s->pb, s->sframe_cache, SFRAME_CACHE_MAXSIZE);
1977  copy_bits(&s->pb, avpkt->data, size, gb, s->sframe_cache_size);
1978  // FIXME bad - just copy bytes as whole and add use the
1979  // skip_bits_next field
1980  }
1981 
1982  return size;
1983 }
1984 
1986 {
1988 
1989  if (s->do_apf) {
1990  ff_rdft_end(&s->rdft);
1991  ff_rdft_end(&s->irdft);
1992  ff_dct_end(&s->dct);
1993  ff_dct_end(&s->dst);
1994  }
1995 
1996  return 0;
1997 }
1998 
2000  .name = "wmavoice",
2001  .long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio Voice"),
2002  .type = AVMEDIA_TYPE_AUDIO,
2003  .id = AV_CODEC_ID_WMAVOICE,
2004  .priv_data_size = sizeof(WMAVoiceContext),
2006  .close = wmavoice_decode_end,
2009  .flush = wmavoice_flush,
2010 };
AVCodec
AVCodec.
Definition: avcodec.h:3481
WMAVoiceContext::has_residual_lsps
int has_residual_lsps
if set, superframes contain one set of LSPs that cover all frames, encoded as independent and residua...
Definition: wmavoice.c:192
skip_bits_long
static void skip_bits_long(GetBitContext *s, int n)
Skips the specified number of bits.
Definition: get_bits.h:291
AMRFixed::x
int x[10]
Definition: acelp_vectors.h:55
DCT_I
@ DCT_I
Definition: avfft.h:96
wmavoice_std_codebook
static const float wmavoice_std_codebook[1000]
Definition: wmavoice_data.h:2585
interpol
static int interpol(MBContext *s, uint32_t *color, int x, int y, int linesize)
Definition: vsrc_mandelbrot.c:195
MAX_LSPS
#define MAX_LSPS
maximum filter order
Definition: wmavoice.c:48
WMAVoiceContext::aw_next_pulse_off_cache
int aw_next_pulse_off_cache
the position (relative to start of the second block) at which pulses should start to be positioned,...
Definition: wmavoice.c:241
WMAVoiceContext::max_pitch_val
int max_pitch_val
max value + 1 for pitch parsing
Definition: wmavoice.c:164
aw_pulse_set2
static int aw_pulse_set2(WMAVoiceContext *s, GetBitContext *gb, int block_idx, AMRFixed *fcb)
Apply second set of pitch-adaptive window pulses.
Definition: wmavoice.c:1088
init
static av_cold int init(AVCodecContext *avctx)
Definition: avrndec.c:35
acelp_vectors.h
get_bits_left
static int get_bits_left(GetBitContext *gb)
Definition: get_bits.h:849
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
wmavoice_dq_lsp10i
static const uint8_t wmavoice_dq_lsp10i[0xf00]
Definition: wmavoice_data.h:33
out
FILE * out
Definition: movenc.c:54
n
int n
Definition: avisynth_c.h:760
u
#define u(width, name, range_min, range_max)
Definition: cbs_h2645.c:252
thread.h
frame_descs
static const struct frame_type_desc frame_descs[17]
INIT_VLC_STATIC
#define INIT_VLC_STATIC(vlc, bits, a, b, c, d, e, f, g, static_size)
Definition: vlc.h:75
AV_CH_LAYOUT_MONO
#define AV_CH_LAYOUT_MONO
Definition: channel_layout.h:85
rdft.h
wmavoice_dq_lsp16r3
static const uint8_t wmavoice_dq_lsp16r3[0x600]
Definition: wmavoice_data.h:1526
dequant_lsps
static void dequant_lsps(double *lsps, int num, const uint16_t *values, const uint16_t *sizes, int n_stages, const uint8_t *table, const double *mul_q, const double *base_q)
Dequantize LSPs.
Definition: wmavoice.c:860
init_put_bits
static void init_put_bits(PutBitContext *s, uint8_t *buffer, int buffer_size)
Initialize the PutBitContext s.
Definition: put_bits.h:48
WMAVoiceContext::excitation_history
float excitation_history[MAX_SIGNAL_HISTORY]
cache of the signal of previous superframes, used as a history for signal generation
Definition: wmavoice.c:251
get_bits_count
static int get_bits_count(const GetBitContext *s)
Definition: get_bits.h:219
end
static av_cold int end(AVCodecContext *avctx)
Definition: avrndec.c:90
av_log2_16bit
int av_log2_16bit(unsigned v)
Definition: intmath.c:31
AVFrame
This structure describes decoded (raw) audio or video data.
Definition: frame.h:295
put_bits
static void put_bits(Jpeg2000EncoderContext *s, int val, int n)
put n times val bit
Definition: j2kenc.c:208
tmp
static uint8_t tmp[11]
Definition: aes_ctr.c:26
aw_pulse_set1
static void aw_pulse_set1(WMAVoiceContext *s, GetBitContext *gb, int block_idx, AMRFixed *fcb)
Apply first set of pitch-adaptive window pulses.
Definition: wmavoice.c:1178
ff_acelp_apply_order_2_transfer_function
void ff_acelp_apply_order_2_transfer_function(float *out, const float *in, const float zero_coeffs[2], const float pole_coeffs[2], float gain, float mem[2], int n)
Apply an order 2 rational transfer function in-place.
Definition: acelp_filters.c:119
WMAVoiceContext::irdft
RDFTContext irdft
contexts for FFT-calculation in the postfilter (for denoise filter)
Definition: wmavoice.c:265
internal.h
AVPacket::data
uint8_t * data
Definition: avcodec.h:1477
pRNG
static int pRNG(int frame_cntr, int block_num, int block_size)
Generate a random number from frame_cntr and block_idx, which will live in the range [0,...
Definition: wmavoice.c:1239
table
static const uint16_t table[]
Definition: prosumer.c:206
data
const char data[16]
Definition: mxf.c:91
WMAVoiceContext::silence_gain
float silence_gain
set for use in blocks if ACB_TYPE_NONE
Definition: wmavoice.c:226
expf
#define expf(x)
Definition: libm.h:283
WMAVoiceContext::denoise_filter_cache_size
int denoise_filter_cache_size
samples in denoise_filter_cache
Definition: wmavoice.c:278
wmavoice_denoise_power_table
static const float wmavoice_denoise_power_table[12][64]
LUT for f(x,y) = pow((y + 6.9) / 64, 0.025 * (x + 1)).
Definition: wmavoice_data.h:3064
wmavoice_gain_codebook_acb
static const float wmavoice_gain_codebook_acb[128]
Definition: wmavoice_data.h:2874
get_vlc2
static av_always_inline int get_vlc2(GetBitContext *s, VLC_TYPE(*table)[2], int bits, int max_depth)
Parse a vlc code.
Definition: get_bits.h:797
base
uint8_t base
Definition: vp3data.h:202
t1
#define t1
Definition: regdef.h:29
max
#define max(a, b)
Definition: cuda_runtime.h:33
ff_celp_lp_synthesis_filterf
void ff_celp_lp_synthesis_filterf(float *out, const float *filter_coeffs, const float *in, int buffer_length, int filter_length)
LP synthesis filter.
Definition: celp_filters.c:84
WMAVoiceContext::aw_idx_is_ext
int aw_idx_is_ext
whether the AW index was encoded in 8 bits (instead of 6)
Definition: wmavoice.c:228
init_get_bits
static int init_get_bits(GetBitContext *s, const uint8_t *buffer, int bit_size)
Initialize GetBitContext.
Definition: get_bits.h:659
WMAVoiceContext::dc_level
int dc_level
Predicted amount of DC noise, based on which a DC removal filter is used.
Definition: wmavoice.c:155
wmavoice_dq_lsp16i1
static const uint8_t wmavoice_dq_lsp16i1[0x640]
Definition: wmavoice_data.h:420
WMAVoiceContext::block_conv_table
uint16_t block_conv_table[4]
boundaries for block pitch unit/scale conversion
Definition: wmavoice.c:176
frame_type_desc::log_n_blocks
uint8_t log_n_blocks
log2(n_blocks)
Definition: wmavoice.c:102
skip_bits
static void skip_bits(GetBitContext *s, int n)
Definition: get_bits.h:467
mem
int mem
Definition: avisynth_c.h:916
WMAVoiceContext::aw_pulse_range
int aw_pulse_range
the range over which aw_pulse_set1() can apply the pulse, relative to the value in aw_first_pulse_off...
Definition: wmavoice.c:230
get_bits
static unsigned int get_bits(GetBitContext *s, int n)
Read 1-25 bits.
Definition: get_bits.h:379
wmavoice_decode_packet
static int wmavoice_decode_packet(AVCodecContext *ctx, void *data, int *got_frame_ptr, AVPacket *avpkt)
Packet decoding: a packet is anything that the (ASF) demuxer contains, and we expect that the demuxer...
Definition: wmavoice.c:1904
U
#define U(x)
Definition: vp56_arith.h:37
WMAVoiceContext::tilted_lpcs_pf
float tilted_lpcs_pf[0x80]
aligned buffer for LPC tilting
Definition: wmavoice.c:279
PutBitContext::size_in_bits
int size_in_bits
Definition: put_bits.h:39
AMRFixed::pitch_fac
float pitch_fac
Definition: acelp_vectors.h:59
GetBitContext
Definition: get_bits.h:61
MULH
#define MULH
Definition: mathops.h:42
FCB_TYPE_SILENCE
@ FCB_TYPE_SILENCE
comfort noise during silence generated from a hardcoded (fixed) codebook with per-frame (low) gain va...
Definition: wmavoice.c:84
wmavoice_flush
static av_cold void wmavoice_flush(AVCodecContext *ctx)
Definition: wmavoice.c:336
IDFT_C2R
@ IDFT_C2R
Definition: avfft.h:73
ff_rdft_end
av_cold void ff_rdft_end(RDFTContext *s)
Definition: rdft.c:114
calc_input_response
static void calc_input_response(WMAVoiceContext *s, float *lpcs, int fcb_type, float *coeffs, int remainder)
Derive denoise filter coefficients (in real domain) from the LPCs.
Definition: wmavoice.c:605
frame_type_desc::n_blocks
uint8_t n_blocks
amount of blocks per frame (each block (contains 160/n_blocks samples)
Definition: wmavoice.c:100
dequant_lsp10i
static void dequant_lsp10i(GetBitContext *gb, double *lsps)
Parse 10 independently-coded LSPs.
Definition: wmavoice.c:891
synth_block
static void synth_block(WMAVoiceContext *s, GetBitContext *gb, int block_idx, int size, int block_pitch_sh2, const double *lsps, const double *prev_lsps, const struct frame_type_desc *frame_desc, float *excitation, float *synth)
Parse data in a single block.
Definition: wmavoice.c:1440
MAX_SFRAMESIZE
#define MAX_SFRAMESIZE
maximum number of samples per superframe
Definition: wmavoice.c:54
wmavoice_gain_codebook_fcb
static const float wmavoice_gain_codebook_fcb[128]
Definition: wmavoice_data.h:2893
WMAVoiceContext::denoise_filter_cache
float denoise_filter_cache[MAX_FRAMESIZE]
Definition: wmavoice.c:277
FCB_TYPE_EXC_PULSES
@ FCB_TYPE_EXC_PULSES
Innovation (fixed) codebook pulse sets in combinations of either single pulses or pulse pairs.
Definition: wmavoice.c:91
WMAVoiceContext::sin
float sin[511]
Definition: wmavoice.c:269
a1
#define a1
Definition: regdef.h:47
AV_CODEC_ID_WMAVOICE
@ AV_CODEC_ID_WMAVOICE
Definition: avcodec.h:600
lrint
#define lrint
Definition: tablegen.h:53
MUL16
#define MUL16(ra, rb)
Definition: mathops.h:88
ff_thread_once
static int ff_thread_once(char *control, void(*routine)(void))
Definition: thread.h:162
AV_LOG_ERROR
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:176
av_cold
#define av_cold
Definition: attributes.h:84
dct.h
init_get_bits8
static int init_get_bits8(GetBitContext *s, const uint8_t *buffer, int byte_size)
Initialize GetBitContext.
Definition: get_bits.h:677
MAX_LSPS_ALIGN16
#define MAX_LSPS_ALIGN16
same as MAX_LSPS; needs to be multiple
Definition: wmavoice.c:49
av_memcpy_backptr
void av_memcpy_backptr(uint8_t *dst, int back, int cnt)
Overlapping memcpy() implementation.
Definition: mem.c:426
decode
static void decode(AVCodecContext *dec_ctx, AVPacket *pkt, AVFrame *frame, FILE *outfile)
Definition: decode_audio.c:42
wmavoice_dq_lsp10r
static const uint8_t wmavoice_dq_lsp10r[0x1400]
Definition: wmavoice_data.h:749
WMAVoiceContext::sframe_cache_size
int sframe_cache_size
set to >0 if we have data from an (incomplete) superframe from a previous packet that spilled over in...
Definition: wmavoice.c:204
s
#define s(width, name)
Definition: cbs_vp9.c:257
WMAVoiceContext::lsp_q_mode
int lsp_q_mode
defines quantizer defaults [0, 1]
Definition: wmavoice.c:159
frame_type_desc::fcb_type
uint8_t fcb_type
Fixed codebook type (FCB_TYPE_*)
Definition: wmavoice.c:104
log_range
#define log_range(var, assign)
WMAVoiceContext::prev_lsps
double prev_lsps[MAX_LSPS]
LSPs of the last frame of the previous superframe.
Definition: wmavoice.c:220
AVMEDIA_TYPE_AUDIO
@ AVMEDIA_TYPE_AUDIO
Definition: avutil.h:202
WMAVoiceContext::aw_n_pulses
int aw_n_pulses[2]
number of AW-pulses in each block; note that this number can be negative (in which case it basically ...
Definition: wmavoice.c:236
AMRFixed
Sparse representation for the algebraic codebook (fixed) vector.
Definition: acelp_vectors.h:53
bits
uint8_t bits
Definition: vp3data.h:202
adaptive_gain_control
static void adaptive_gain_control(float *out, const float *in, const float *speech_synth, int size, float alpha, float *gain_mem)
Adaptive gain control (as used in postfilter).
Definition: wmavoice.c:505
av_assert0
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:37
wmavoice_lsp16_intercoeff_a
static const float wmavoice_lsp16_intercoeff_a[32][2][16]
Definition: wmavoice_data.h:2047
ctx
AVFormatContext * ctx
Definition: movenc.c:48
get_bits.h
wmavoice_mean_lsf10
static const double wmavoice_mean_lsf10[2][10]
Definition: wmavoice_data.h:2565
WMAVoiceContext::spillover_nbits
int spillover_nbits
number of bits of the previous packet's last superframe preceding this packet's first full superframe...
Definition: wmavoice.c:188
UMULH
static av_always_inline unsigned UMULH(unsigned a, unsigned b)
Definition: mathops.h:68
AMRFixed::y
float y[10]
Definition: acelp_vectors.h:56
wmavoice_gain_silence
static const float wmavoice_gain_silence[256]
Definition: wmavoice_data.h:2788
PutBitContext
Definition: put_bits.h:35
WMAVoiceContext::vbm_tree
int8_t vbm_tree[25]
converts VLC codes to frame type
Definition: wmavoice.c:140
wmavoice_dq_lsp16i3
static const uint8_t wmavoice_dq_lsp16i3[0x300]
Definition: wmavoice_data.h:682
if
if(ret)
Definition: filter_design.txt:179
AMRFixed::no_repeat_mask
int no_repeat_mask
Definition: acelp_vectors.h:57
postfilter
static void postfilter(WMAVoiceContext *s, const float *synth, float *samples, int size, const float *lpcs, float *zero_exc_pf, int fcb_type, int pitch)
Averaging projection filter, the postfilter used in WMAVoice.
Definition: wmavoice.c:806
AV_ONCE_INIT
#define AV_ONCE_INIT
Definition: thread.h:160
flush
static void flush(AVCodecContext *avctx)
Definition: aacdec_template.c:500
NULL
#define NULL
Definition: coverity.c:32
sizes
static const int sizes[][2]
Definition: img2dec.c:53
WMAVoiceContext::history_nsamples
int history_nsamples
number of samples in history for signal prediction (through ACB)
Definition: wmavoice.c:145
WMAVoiceContext::synth_history
float synth_history[MAX_LSPS]
see excitation_history
Definition: wmavoice.c:255
AVERROR_PATCHWELCOME
#define AVERROR_PATCHWELCOME
Not yet implemented in FFmpeg, patches welcome.
Definition: error.h:62
last_coeff
static const uint8_t last_coeff[3]
Definition: qdm2data.h:257
WMAVoiceContext::denoise_strength
int denoise_strength
strength of denoising in Wiener filter [0-11]
Definition: wmavoice.c:151
MAX_SIGNAL_HISTORY
#define MAX_SIGNAL_HISTORY
maximum excitation signal history
Definition: wmavoice.c:53
WMAVoiceContext::sframe_cache
uint8_t sframe_cache[SFRAME_CACHE_MAXSIZE+AV_INPUT_BUFFER_PADDING_SIZE]
cache for superframe data split over multiple packets
Definition: wmavoice.c:201
get_bits1
static unsigned int get_bits1(GetBitContext *s)
Definition: get_bits.h:498
dequant_lsp10r
static void dequant_lsp10r(GetBitContext *gb, double *i_lsps, const double *old, double *a1, double *a2, int q_mode)
Parse 10 independently-coded LSPs, and then derive the tables to generate LSPs for the other frames f...
Definition: wmavoice.c:917
WMAVoiceContext::pitch_nbits
int pitch_nbits
number of bits used to specify the pitch value in the frame header
Definition: wmavoice.c:165
WMAVoiceContext::block_delta_pitch_nbits
int block_delta_pitch_nbits
number of bits used to specify the delta pitch between this and the last block's pitch value,...
Definition: wmavoice.c:170
kalman_smoothen
static int kalman_smoothen(WMAVoiceContext *s, int pitch, const float *in, float *out, int size)
Kalman smoothing function.
Definition: wmavoice.c:546
WMAVoiceContext::denoise_coeffs_pf
float denoise_coeffs_pf[0x80]
aligned buffer for denoise coefficients
Definition: wmavoice.c:281
WMAVoiceContext::skip_bits_next
int skip_bits_next
number of bits to skip at the next call to wmavoice_decode_packet() (since they're part of the previo...
Definition: wmavoice.c:197
DFT_R2C
@ DFT_R2C
Definition: avfft.h:72
abs
#define abs(x)
Definition: cuda_runtime.h:35
FCB_TYPE_AW_PULSES
@ FCB_TYPE_AW_PULSES
Pitch-adaptive window (AW) pulse signals, used in particular for low-bitrate streams.
Definition: wmavoice.c:89
celp_filters.h
MAX_FRAMESIZE
#define MAX_FRAMESIZE
maximum number of samples per frame
Definition: wmavoice.c:52
MAX_FRAMES
#define MAX_FRAMES
maximum number of frames per superframe
Definition: wmavoice.c:51
decode_vbmtree
static av_cold int decode_vbmtree(GetBitContext *gb, int8_t vbm_tree[25])
Set up the variable bit mode (VBM) tree from container extradata.
Definition: wmavoice.c:300
AVOnce
#define AVOnce
Definition: thread.h:159
aw_parse_coords
static void aw_parse_coords(WMAVoiceContext *s, GetBitContext *gb, const int *pitch)
Parse the offset of the first pitch-adaptive window pulses, and the distribution of pulses between th...
Definition: wmavoice.c:1036
wmavoice_init_static_data
static av_cold void wmavoice_init_static_data(void)
Definition: wmavoice.c:314
float_dsp.h
for
for(j=16;j >0;--j)
Definition: h264pred_template.c:469
avpriv_copy_bits
void avpriv_copy_bits(PutBitContext *pb, const uint8_t *src, int length)
Copy the content of src to the bitstream.
Definition: bitstream.c:64
WMAVoiceContext::dcf_mem
float dcf_mem[2]
DC filter history.
Definition: wmavoice.c:273
ff_get_buffer
int ff_get_buffer(AVCodecContext *avctx, AVFrame *frame, int flags)
Get a buffer for a frame.
Definition: decode.c:1965
AV_CODEC_CAP_DR1
#define AV_CODEC_CAP_DR1
Codec uses get_buffer() for allocating buffers and supports custom allocators.
Definition: avcodec.h:981
parse_packet_header
static int parse_packet_header(WMAVoiceContext *s)
Parse the packet header at the start of each packet (input data to this decoder).
Definition: wmavoice.c:1840
AVPacket::size
int size
Definition: avcodec.h:1478
NULL_IF_CONFIG_SMALL
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
Definition: internal.h:188
powf
#define powf(x, y)
Definition: libm.h:50
frame_type_vlc
static VLC frame_type_vlc
Frame type VLC coding.
Definition: wmavoice.c:63
ff_wmavoice_decoder
AVCodec ff_wmavoice_decoder
Definition: wmavoice.c:1999
WMAVoiceContext::spillover_bitsize
int spillover_bitsize
number of bits used to specify spillover_nbits in the packet header = ceil(log2(ctx->block_align << 3...
Definition: wmavoice.c:142
FFMAX
#define FFMAX(a, b)
Definition: common.h:94
WMAVoiceContext::pb
PutBitContext pb
bitstream writer for sframe_cache
Definition: wmavoice.c:209
WMAVoiceContext::last_pitch_val
int last_pitch_val
pitch value of the previous frame
Definition: wmavoice.c:222
size
int size
Definition: twinvq_data.h:11134
wiener_denoise
static void wiener_denoise(WMAVoiceContext *s, int fcb_type, float *synth_pf, int size, const float *lpcs)
This function applies a Wiener filter on the (noisy) speech signal as a means to denoise it.
Definition: wmavoice.c:724
ACB_TYPE_NONE
@ ACB_TYPE_NONE
no adaptive codebook (only hardcoded fixed)
Definition: wmavoice.c:69
wmavoice_lsp10_intercoeff_b
static const float wmavoice_lsp10_intercoeff_b[32][2][10]
Definition: wmavoice_data.h:1852
WMAVoiceContext::dct
DCTContext dct
Definition: wmavoice.c:267
dequant_lsp16i
static void dequant_lsp16i(GetBitContext *gb, double *lsps)
Parse 16 independently-coded LSPs.
Definition: wmavoice.c:953
val
const char const char void * val
Definition: avisynth_c.h:863
wmavoice_dq_lsp16r1
static const uint8_t wmavoice_dq_lsp16r1[0x500]
Definition: wmavoice_data.h:1264
WMAVoiceContext::aw_first_pulse_off
int aw_first_pulse_off[2]
index of first sample to which to apply AW-pulses, or -0xff if unset
Definition: wmavoice.c:239
FFMIN
#define FFMIN(a, b)
Definition: common.h:96
WMAVoiceContext::zero_exc_pf
float zero_exc_pf[MAX_SIGNAL_HISTORY+MAX_SFRAMESIZE]
zero filter output (i.e.
Definition: wmavoice.c:274
sinewin.h
wmavoice_dq_lsp16r2
static const uint8_t wmavoice_dq_lsp16r2[0x500]
Definition: wmavoice_data.h:1395
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
frame_type_desc
Description of frame types.
Definition: wmavoice.c:99
WMAVoiceContext::block_pitch_range
int block_pitch_range
range of the block pitch
Definition: wmavoice.c:169
stabilize_lsps
static void stabilize_lsps(double *lsps, int num)
Ensure minimum value for first item, maximum value for last value, proper spacing between each value ...
Definition: wmavoice.c:1658
ff_dct_end
av_cold void ff_dct_end(DCTContext *s)
Definition: dct.c:221
M_PI
#define M_PI
Definition: mathematics.h:52
ff_tilt_compensation
void ff_tilt_compensation(float *mem, float tilt, float *samples, int size)
Apply tilt compensation filter, 1 - tilt * z-1.
Definition: acelp_filters.c:136
wmavoice_energy_table
static const float wmavoice_energy_table[128]
LUT for 1.071575641632 * pow(1.0331663, n - 127)
Definition: wmavoice_data.h:3026
wmavoice_decode_init
static av_cold int wmavoice_decode_init(AVCodecContext *ctx)
Set up decoder with parameters from demuxer (extradata etc.).
Definition: wmavoice.c:367
WMAVoiceContext::block_delta_pitch_hrange
int block_delta_pitch_hrange
1/2 range of the delta (full range is from -this to +this-1)
Definition: wmavoice.c:174
DST_I
@ DST_I
Definition: avfft.h:97
wmavoice_ipol2_coeffs
static const float wmavoice_ipol2_coeffs[32]
Hamming-window sinc function (num = 32, x = [ 0, 31 ]): (0.54 + 0.46 * cos(2 * M_PI * x / (num - 1)))...
Definition: wmavoice_data.h:3012
ff_rdft_init
av_cold int ff_rdft_init(RDFTContext *s, int nbits, enum RDFTransformType trans)
Set up a real FFT.
Definition: rdft.c:88
DECLARE_ALIGNED
#define DECLARE_ALIGNED(n, t, v)
Definition: mem.h:112
WMAVoiceContext::pitch_diff_sh16
int pitch_diff_sh16
((cur_pitch_val - last_pitch_val) << 16) / MAX_FRAMESIZE
Definition: wmavoice.c:224
in
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi - 0x80) *(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi - 0x80) *(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(const int16_t *) pi >> 8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t, *(const int16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t, *(const int16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(const int32_t *) pi >> 24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t, *(const int32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t, *(const int32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(const float *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(const float *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(const float *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(const double *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(const double *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(const double *) pi *(1U<< 31)))) #define SET_CONV_FUNC_GROUP(ofmt, ifmt) static void set_generic_function(AudioConvert *ac) { } void ff_audio_convert_free(AudioConvert **ac) { if(! *ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);} AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt, int channels, int sample_rate, int apply_map) { AudioConvert *ac;int in_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) return NULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method !=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt) > 2) { ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc) { av_free(ac);return NULL;} return ac;} in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar) { ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar ? ac->channels :1;} else if(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;else ac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);return ac;} int ff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in) { int use_generic=1;int len=in->nb_samples;int p;if(ac->dc) { av_log(ac->avr, AV_LOG_TRACE, "%d samples - audio_convert: %s to %s (dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));return ff_convert_dither(ac-> in
Definition: audio_convert.c:326
WMAVoiceContext::gain_pred_err
float gain_pred_err[6]
cache for gain prediction
Definition: wmavoice.c:250
ff_dct_init
av_cold int ff_dct_init(DCTContext *s, int nbits, enum DCTTransformType inverse)
Set up DCT.
Definition: dct.c:177
FCB_TYPE_HARDCODED
@ FCB_TYPE_HARDCODED
hardcoded (fixed) codebook with per-block gain values
Definition: wmavoice.c:87
ACB_TYPE_ASYMMETRIC
@ ACB_TYPE_ASYMMETRIC
adaptive codebook with per-frame pitch, which we interpolate to get a per-sample pitch.
Definition: wmavoice.c:70
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:259
put_bits_count
static int put_bits_count(PutBitContext *s)
Definition: put_bits.h:85
WMAVoiceContext::nb_superframes
int nb_superframes
number of superframes in current packet
Definition: wmavoice.c:249
ff_sine_window_init
void AAC_RENAME() ff_sine_window_init(INTFLOAT *window, int n)
Generate a sine window.
Definition: sinewin_tablegen.h:70
t3
#define t3
Definition: regdef.h:31
WMAVoiceContext::cos
float cos[511]
8-bit cosine/sine windows over [-pi,pi] range
Definition: wmavoice.c:269
RDFTContext
Definition: rdft.h:28
a2
#define a2
Definition: regdef.h:48
WMAVoiceContext::denoise_tilt_corr
int denoise_tilt_corr
Whether to apply tilt correction to the Wiener filter coefficients (postfilter)
Definition: wmavoice.c:153
delta
float delta
Definition: vorbis_enc_data.h:457
wmavoice_lsp16_intercoeff_b
static const float wmavoice_lsp16_intercoeff_b[32][2][16]
Definition: wmavoice_data.h:2306
uint8_t
uint8_t
Definition: audio_convert.c:194
av_frame_unref
void av_frame_unref(AVFrame *frame)
Unreference all the buffers referenced by frame and reset the frame fields.
Definition: frame.c:553
acelp_filters.h
ff_weighted_vector_sumf
void ff_weighted_vector_sumf(float *out, const float *in_a, const float *in_b, float weight_coeff_a, float weight_coeff_b, int length)
float implementation of weighted sum of two vectors.
Definition: acelp_vectors.c:193
DCTContext
Definition: dct.h:32
WMAVoiceContext::lsp_def_mode
int lsp_def_mode
defines different sets of LSP defaults [0, 1]
Definition: wmavoice.c:160
wmavoice_gain_universal
static const float wmavoice_gain_universal[64]
Definition: wmavoice_data.h:2855
AVCodec::name
const char * name
Name of the codec implementation.
Definition: avcodec.h:3488
ACB_TYPE_HAMMING
@ ACB_TYPE_HAMMING
Per-block pitch with signal generation using a Hamming sinc window function.
Definition: wmavoice.c:75
len
int len
Definition: vorbis_enc_data.h:452
WMAVoiceContext::synth_filter_out_buf
float synth_filter_out_buf[0x80+MAX_LSPS_ALIGN16]
aligned buffer for postfilter speech synthesis
Definition: wmavoice.c:283
tilt_factor
static float tilt_factor(const float *lpcs, int n_lpcs)
Get the tilt factor of a formant filter from its transfer function.
Definition: wmavoice.c:592
WMAVoiceContext::rdft
RDFTContext rdft
Definition: wmavoice.c:265
VLC_NBITS
#define VLC_NBITS
number of bits to read per VLC iteration
Definition: wmavoice.c:58
wmavoice_data.h
Windows Media Voice (WMAVoice) tables.
avcodec.h
WMAVoiceContext::min_pitch_val
int min_pitch_val
base value for pitch parsing code
Definition: wmavoice.c:163
WMAVoiceContext::last_acb_type
int last_acb_type
frame type [0-2] of the previous frame
Definition: wmavoice.c:223
av_uninit
#define av_uninit(x)
Definition: attributes.h:148
frame
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several the filter must be ready for frames arriving randomly on any input any filter with several inputs will most likely require some kind of queuing mechanism It is perfectly acceptable to have a limited queue and to drop frames when the inputs are too unbalanced request_frame For filters that do not use the this method is called when a frame is wanted on an output For a it should directly call filter_frame on the corresponding output For a if there are queued frames already one of these frames should be pushed If the filter should request a frame on one of its repeatedly until at least one frame has been pushed Return or at least make progress towards producing a frame
Definition: filter_design.txt:264
lsp.h
ff_celp_lp_zero_synthesis_filterf
void ff_celp_lp_zero_synthesis_filterf(float *out, const float *filter_coeffs, const float *in, int buffer_length, int filter_length)
LP zero synthesis filter.
Definition: celp_filters.c:199
WMAVoiceContext::do_apf
int do_apf
whether to apply the averaged projection filter (APF)
Definition: wmavoice.c:149
AMRFixed::n
int n
Definition: acelp_vectors.h:54
wmavoice_dq_lsp16i2
static const uint8_t wmavoice_dq_lsp16i2[0x3c0]
Definition: wmavoice_data.h:583
AV_INPUT_BUFFER_PADDING_SIZE
#define AV_INPUT_BUFFER_PADDING_SIZE
Definition: avcodec.h:790
wmavoice_mean_lsf16
static const double wmavoice_mean_lsf16[2][16]
Definition: wmavoice_data.h:2574
AV_RL32
uint64_t_TMPL AV_WL64 unsigned int_TMPL AV_RL32
Definition: bytestream.h:88
wmavoice_decode_end
static av_cold int wmavoice_decode_end(AVCodecContext *ctx)
Definition: wmavoice.c:1985
WMAVoiceContext::lsps
int lsps
number of LSPs per frame [10 or 16]
Definition: wmavoice.c:158
AVCodecContext
main external API structure.
Definition: avcodec.h:1565
channel_layout.h
t2
#define t2
Definition: regdef.h:30
WMAVoiceContext::dst
DCTContext dst
contexts for phase shift (in Hilbert transform, part of postfilter)
Definition: wmavoice.c:267
WMAVoiceContext::block_pitch_nbits
int block_pitch_nbits
number of bits used to specify the first block's pitch value
Definition: wmavoice.c:167
VLC
Definition: vlc.h:26
synth_superframe
static int synth_superframe(AVCodecContext *ctx, AVFrame *frame, int *got_frame_ptr)
Synthesize output samples for a single superframe.
Definition: wmavoice.c:1704
WMAVoiceContext::frame_cntr
int frame_cntr
current frame index [0 - 0xFFFE]; is only used for comfort noise in pRNG()
Definition: wmavoice.c:247
wmavoice_ipol1_coeffs
static const float wmavoice_ipol1_coeffs[17 *9]
Definition: wmavoice_data.h:2960
values
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several the filter must be ready for frames arriving randomly on any input any filter with several inputs will most likely require some kind of queuing mechanism It is perfectly acceptable to have a limited queue and to drop frames when the inputs are too unbalanced request_frame For filters that do not use the this method is called when a frame is wanted on an output For a it should directly call filter_frame on the corresponding output For a if there are queued frames already one of these frames should be pushed If the filter should request a frame on one of its repeatedly until at least one frame has been pushed Return values
Definition: filter_design.txt:263
ff_set_fixed_vector
void ff_set_fixed_vector(float *out, const AMRFixed *in, float scale, int size)
Add fixed vector to an array from a sparse representation.
Definition: acelp_vectors.c:235
mean_lsf
static const float mean_lsf[10]
Definition: siprdata.h:27
AV_CODEC_CAP_DELAY
#define AV_CODEC_CAP_DELAY
Encoder or decoder requires flushing with NULL input at the end in order to give the complete and cor...
Definition: avcodec.h:1006
samples
Filter the word “frame” indicates either a video frame or a group of audio samples
Definition: filter_design.txt:8
copy_bits
static void copy_bits(PutBitContext *pb, const uint8_t *data, int size, GetBitContext *gb, int nbits)
Copy (unaligned) bits from gb/data/size to pb.
Definition: wmavoice.c:1875
avpriv_scalarproduct_float_c
float avpriv_scalarproduct_float_c(const float *v1, const float *v2, int len)
Return the scalar product of two vectors.
Definition: float_dsp.c:124
synth_frame
static int synth_frame(AVCodecContext *ctx, GetBitContext *gb, int frame_idx, float *samples, const double *lsps, const double *prev_lsps, float *excitation, float *synth)
Synthesize output samples for a single frame.
Definition: wmavoice.c:1482
mem.h
AV_CODEC_CAP_SUBFRAMES
#define AV_CODEC_CAP_SUBFRAMES
Codec can output multiple frames per AVPacket Normally demuxers return one frame at a time,...
Definition: avcodec.h:1024
M_LN10
#define M_LN10
Definition: mathematics.h:43
WMAVoiceContext::gb
GetBitContext gb
packet bitreader.
Definition: wmavoice.c:136
avpriv_request_sample
#define avpriv_request_sample(...)
Definition: tableprint_vlc.h:39
synth_block_fcb_acb
static void synth_block_fcb_acb(WMAVoiceContext *s, GetBitContext *gb, int block_idx, int size, int block_pitch_sh2, const struct frame_type_desc *frame_desc, float *excitation)
Parse FCB/ACB signal for a single block.
Definition: wmavoice.c:1306
flush_put_bits
static void flush_put_bits(PutBitContext *s)
Pad the end of the output stream with zeros.
Definition: put_bits.h:101
alpha
static const int16_t alpha[]
Definition: ilbcdata.h:55
AVPacket
This structure stores compressed data.
Definition: avcodec.h:1454
synth_block_hardcoded
static void synth_block_hardcoded(WMAVoiceContext *s, GetBitContext *gb, int block_idx, int size, const struct frame_type_desc *frame_desc, float *excitation)
Parse hardcoded signal for a single block.
Definition: wmavoice.c:1275
SFRAME_CACHE_MAXSIZE
#define SFRAME_CACHE_MAXSIZE
maximum cache size for frame data that
Definition: wmavoice.c:56
AMRFixed::pitch_lag
int pitch_lag
Definition: acelp_vectors.h:58
flags
#define flags(name, subs,...)
Definition: cbs_av1.c:565
av_log
#define av_log(a,...)
Definition: tableprint_vlc.h:28
wmavoice_lsp10_intercoeff_a
static const float wmavoice_lsp10_intercoeff_a[32][2][10]
Definition: wmavoice_data.h:1657
AVERROR_INVALIDDATA
#define AVERROR_INVALIDDATA
Invalid data found when processing input.
Definition: error.h:59
dequant_lsp16r
static void dequant_lsp16r(GetBitContext *gb, double *i_lsps, const double *old, double *a1, double *a2, int q_mode)
Parse 16 independently-coded LSPs, and then derive the tables to generate LSPs for the other frames f...
Definition: wmavoice.c:986
frame_type_desc::dbl_pulses
uint8_t dbl_pulses
how many pulse vectors have pulse pairs (rather than just one single pulse) only if fcb_type == FCB_T...
Definition: wmavoice.c:105
frame_type_desc::acb_type
uint8_t acb_type
Adaptive codebook type (ACB_TYPE_*)
Definition: wmavoice.c:103
MAX_BLOCKS
#define MAX_BLOCKS
maximum number of blocks per frame
Definition: wmavoice.c:47
ff_acelp_lspd2lpc
void ff_acelp_lspd2lpc(const double *lsp, float *lpc, int lp_half_order)
Reconstruct LPC coefficients from the line spectral pair frequencies.
Definition: lsp.c:209
WMAVoiceContext::postfilter_agc
float postfilter_agc
gain control memory, used in adaptive_gain_control()
Definition: wmavoice.c:271
put_bits.h
pulses
static const int8_t pulses[4]
Number of non-zero pulses in the MP-MLQ excitation.
Definition: g723_1.h:723
ff_acelp_interpolatef
void ff_acelp_interpolatef(float *out, const float *in, const float *filter_coeffs, int precision, int frac_pos, int filter_length, int length)
Floating point version of ff_acelp_interpolate()
Definition: acelp_filters.c:78
VLC::table
VLC_TYPE(* table)[2]
code, bits
Definition: vlc.h:28
AVFormatContext::priv_data
void * priv_data
Format private data.
Definition: avformat.h:1370
AV_SAMPLE_FMT_FLT
@ AV_SAMPLE_FMT_FLT
float
Definition: samplefmt.h:63
min
float min
Definition: vorbis_enc_data.h:456
WMAVoiceContext
WMA Voice decoding context.
Definition: wmavoice.c:131