FFmpeg: libavcodec/wmavoice.c Source File

00001 /*
00002  * Windows Media Audio Voice decoder.
00003  * Copyright (c) 2009 Ronald S. Bultje
00004  *
00005  * This file is part of FFmpeg.
00006  *
00007  * FFmpeg is free software; you can redistribute it and/or
00008  * modify it under the terms of the GNU Lesser General Public
00009  * License as published by the Free Software Foundation; either
00010  * version 2.1 of the License, or (at your option) any later version.
00011  *
00012  * FFmpeg is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015  * Lesser General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU Lesser General Public
00018  * License along with FFmpeg; if not, write to the Free Software
00019  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00020  */
00021 
00028 #define UNCHECKED_BITSTREAM_READER 1
00029 
00030 #include <math.h>
00031 
00032 #include "libavutil/channel_layout.h"
00033 #include "libavutil/mem.h"
00034 #include "dsputil.h"
00035 #include "avcodec.h"
00036 #include "internal.h"
00037 #include "get_bits.h"
00038 #include "put_bits.h"
00039 #include "wmavoice_data.h"
00040 #include "celp_filters.h"
00041 #include "acelp_vectors.h"
00042 #include "acelp_filters.h"
00043 #include "lsp.h"
00044 #include "dct.h"
00045 #include "rdft.h"
00046 #include "sinewin.h"
00047 
00048 #define MAX_BLOCKS           8   
00049 #define MAX_LSPS             16  
00050 #define MAX_LSPS_ALIGN16     16  
00051 
00052 #define MAX_FRAMES           3   
00053 #define MAX_FRAMESIZE        160 
00054 #define MAX_SIGNAL_HISTORY   416 
00055 #define MAX_SFRAMESIZE       (MAX_FRAMESIZE * MAX_FRAMES)
00057 #define SFRAME_CACHE_MAXSIZE 256 
00058 
00059 #define VLC_NBITS            6   
00060 
00061 
00064 static VLC frame_type_vlc;
00065 
00069 enum {
00070     ACB_TYPE_NONE       = 0, 
00071     ACB_TYPE_ASYMMETRIC = 1, 
00072 
00073 
00074 
00075 
00076     ACB_TYPE_HAMMING    = 2  
00077 
00078 
00079 };
00080 
00084 enum {
00085     FCB_TYPE_SILENCE    = 0, 
00086 
00087 
00088     FCB_TYPE_HARDCODED  = 1, 
00089 
00090     FCB_TYPE_AW_PULSES  = 2, 
00091 
00092     FCB_TYPE_EXC_PULSES = 3, 
00093 
00094 
00095 };
00096 
00100 static const struct frame_type_desc {
00101     uint8_t n_blocks;     
00102 
00103     uint8_t log_n_blocks; 
00104     uint8_t acb_type;     
00105     uint8_t fcb_type;     
00106     uint8_t dbl_pulses;   
00107 
00108 
00109     uint16_t frame_size;  
00110 
00111 } frame_descs[17] = {
00112     { 1, 0, ACB_TYPE_NONE,       FCB_TYPE_SILENCE,    0,   0 },
00113     { 2, 1, ACB_TYPE_NONE,       FCB_TYPE_HARDCODED,  0,  28 },
00114     { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_AW_PULSES,  0,  46 },
00115     { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 2,  80 },
00116     { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 5, 104 },
00117     { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 0, 108 },
00118     { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 2, 132 },
00119     { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 5, 168 },
00120     { 2, 1, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 0,  64 },
00121     { 2, 1, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 2,  80 },
00122     { 2, 1, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 5, 104 },
00123     { 4, 2, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 0, 108 },
00124     { 4, 2, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 2, 132 },
00125     { 4, 2, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 5, 168 },
00126     { 8, 3, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 0, 176 },
00127     { 8, 3, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 2, 208 },
00128     { 8, 3, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 5, 256 }
00129 };
00130 
00134 typedef struct {
00139     AVFrame frame;
00140     GetBitContext gb;             
00141 
00142 
00143 
00144     int8_t vbm_tree[25];          
00145 
00146     int spillover_bitsize;        
00147 
00148 
00149     int history_nsamples;         
00150 
00151 
00152     /* postfilter specific values */
00153     int do_apf;                   
00154 
00155     int denoise_strength;         
00156 
00157     int denoise_tilt_corr;        
00158 
00159     int dc_level;                 
00160 
00161 
00162     int lsps;                     
00163     int lsp_q_mode;               
00164     int lsp_def_mode;             
00165 
00166     int frame_lsp_bitsize;        
00167 
00168     int sframe_lsp_bitsize;       
00169 
00170 
00171     int min_pitch_val;            
00172     int max_pitch_val;            
00173     int pitch_nbits;              
00174 
00175     int block_pitch_nbits;        
00176 
00177     int block_pitch_range;        
00178     int block_delta_pitch_nbits;  
00179 
00180 
00181 
00182     int block_delta_pitch_hrange; 
00183 
00184     uint16_t block_conv_table[4]; 
00185 
00186 
00196     int spillover_nbits;          
00197 
00198 
00199 
00200     int has_residual_lsps;        
00201 
00202 
00203 
00204 
00205     int skip_bits_next;           
00206 
00207 
00208 
00209     uint8_t sframe_cache[SFRAME_CACHE_MAXSIZE + FF_INPUT_BUFFER_PADDING_SIZE];
00212     int sframe_cache_size;        
00213 
00214 
00215 
00216 
00217     PutBitContext pb;             
00218 
00228     double prev_lsps[MAX_LSPS];   
00229 
00230     int last_pitch_val;           
00231     int last_acb_type;            
00232     int pitch_diff_sh16;          
00233 
00234     float silence_gain;           
00235 
00236     int aw_idx_is_ext;            
00237 
00238     int aw_pulse_range;           
00239 
00240 
00241 
00242 
00243 
00244     int aw_n_pulses[2];           
00245 
00246 
00247     int aw_first_pulse_off[2];    
00248 
00249     int aw_next_pulse_off_cache;  
00250 
00251 
00252 
00253 
00254 
00255     int frame_cntr;               
00256 
00257     float gain_pred_err[6];       
00258     float excitation_history[MAX_SIGNAL_HISTORY];
00262     float synth_history[MAX_LSPS]; 
00263 
00272     RDFTContext rdft, irdft;      
00273 
00274     DCTContext dct, dst;          
00275 
00276     float sin[511], cos[511];     
00277 
00278     float postfilter_agc;         
00279 
00280     float dcf_mem[2];             
00281     float zero_exc_pf[MAX_SIGNAL_HISTORY + MAX_SFRAMESIZE];
00284     float denoise_filter_cache[MAX_FRAMESIZE];
00285     int   denoise_filter_cache_size; 
00286     DECLARE_ALIGNED(32, float, tilted_lpcs_pf)[0x80];
00288     DECLARE_ALIGNED(32, float, denoise_coeffs_pf)[0x80];
00290     DECLARE_ALIGNED(32, float, synth_filter_out_buf)[0x80 + MAX_LSPS_ALIGN16];
00293 
00296 } WMAVoiceContext;
00297 
00307 static av_cold int decode_vbmtree(GetBitContext *gb, int8_t vbm_tree[25])
00308 {
00309     static const uint8_t bits[] = {
00310          2,  2,  2,  4,  4,  4,
00311          6,  6,  6,  8,  8,  8,
00312         10, 10, 10, 12, 12, 12,
00313         14, 14, 14, 14
00314     };
00315     static const uint16_t codes[] = {
00316           0x0000, 0x0001, 0x0002,        //              00/01/10
00317           0x000c, 0x000d, 0x000e,        //           11+00/01/10
00318           0x003c, 0x003d, 0x003e,        //         1111+00/01/10
00319           0x00fc, 0x00fd, 0x00fe,        //       111111+00/01/10
00320           0x03fc, 0x03fd, 0x03fe,        //     11111111+00/01/10
00321           0x0ffc, 0x0ffd, 0x0ffe,        //   1111111111+00/01/10
00322           0x3ffc, 0x3ffd, 0x3ffe, 0x3fff // 111111111111+xx
00323     };
00324     int cntr[8] = { 0 }, n, res;
00325 
00326     memset(vbm_tree, 0xff, sizeof(vbm_tree[0]) * 25);
00327     for (n = 0; n < 17; n++) {
00328         res = get_bits(gb, 3);
00329         if (cntr[res] > 3) // should be >= 3 + (res == 7))
00330             return -1;
00331         vbm_tree[res * 3 + cntr[res]++] = n;
00332     }
00333     INIT_VLC_STATIC(&frame_type_vlc, VLC_NBITS, sizeof(bits),
00334                     bits, 1, 1, codes, 2, 2, 132);
00335     return 0;
00336 }
00337 
00341 static av_cold int wmavoice_decode_init(AVCodecContext *ctx)
00342 {
00343     int n, flags, pitch_range, lsp16_flag;
00344     WMAVoiceContext *s = ctx->priv_data;
00345 
00354     if (ctx->extradata_size != 46) {
00355         av_log(ctx, AV_LOG_ERROR,
00356                "Invalid extradata size %d (should be 46)\n",
00357                ctx->extradata_size);
00358         return -1;
00359     }
00360     flags                = AV_RL32(ctx->extradata + 18);
00361     s->spillover_bitsize = 3 + av_ceil_log2(ctx->block_align);
00362     s->do_apf            =    flags & 0x1;
00363     if (s->do_apf) {
00364         ff_rdft_init(&s->rdft,  7, DFT_R2C);
00365         ff_rdft_init(&s->irdft, 7, IDFT_C2R);
00366         ff_dct_init(&s->dct,  6, DCT_I);
00367         ff_dct_init(&s->dst,  6, DST_I);
00368 
00369         ff_sine_window_init(s->cos, 256);
00370         memcpy(&s->sin[255], s->cos, 256 * sizeof(s->cos[0]));
00371         for (n = 0; n < 255; n++) {
00372             s->sin[n]       = -s->sin[510 - n];
00373             s->cos[510 - n] =  s->cos[n];
00374         }
00375     }
00376     s->denoise_strength  =   (flags >> 2) & 0xF;
00377     if (s->denoise_strength >= 12) {
00378         av_log(ctx, AV_LOG_ERROR,
00379                "Invalid denoise filter strength %d (max=11)\n",
00380                s->denoise_strength);
00381         return -1;
00382     }
00383     s->denoise_tilt_corr = !!(flags & 0x40);
00384     s->dc_level          =   (flags >> 7) & 0xF;
00385     s->lsp_q_mode        = !!(flags & 0x2000);
00386     s->lsp_def_mode      = !!(flags & 0x4000);
00387     lsp16_flag           =    flags & 0x1000;
00388     if (lsp16_flag) {
00389         s->lsps               = 16;
00390         s->frame_lsp_bitsize  = 34;
00391         s->sframe_lsp_bitsize = 60;
00392     } else {
00393         s->lsps               = 10;
00394         s->frame_lsp_bitsize  = 24;
00395         s->sframe_lsp_bitsize = 48;
00396     }
00397     for (n = 0; n < s->lsps; n++)
00398         s->prev_lsps[n] = M_PI * (n + 1.0) / (s->lsps + 1.0);
00399 
00400     init_get_bits(&s->gb, ctx->extradata + 22, (ctx->extradata_size - 22) << 3);
00401     if (decode_vbmtree(&s->gb, s->vbm_tree) < 0) {
00402         av_log(ctx, AV_LOG_ERROR, "Invalid VBM tree; broken extradata?\n");
00403         return -1;
00404     }
00405 
00406     s->min_pitch_val    = ((ctx->sample_rate << 8)      /  400 + 50) >> 8;
00407     s->max_pitch_val    = ((ctx->sample_rate << 8) * 37 / 2000 + 50) >> 8;
00408     pitch_range         = s->max_pitch_val - s->min_pitch_val;
00409     if (pitch_range <= 0) {
00410         av_log(ctx, AV_LOG_ERROR, "Invalid pitch range; broken extradata?\n");
00411         return -1;
00412     }
00413     s->pitch_nbits      = av_ceil_log2(pitch_range);
00414     s->last_pitch_val   = 40;
00415     s->last_acb_type    = ACB_TYPE_NONE;
00416     s->history_nsamples = s->max_pitch_val + 8;
00417 
00418     if (s->min_pitch_val < 1 || s->history_nsamples > MAX_SIGNAL_HISTORY) {
00419         int min_sr = ((((1 << 8) - 50) * 400) + 0xFF) >> 8,
00420             max_sr = ((((MAX_SIGNAL_HISTORY - 8) << 8) + 205) * 2000 / 37) >> 8;
00421 
00422         av_log(ctx, AV_LOG_ERROR,
00423                "Unsupported samplerate %d (min=%d, max=%d)\n",
00424                ctx->sample_rate, min_sr, max_sr); // 322-22097 Hz
00425 
00426         return -1;
00427     }
00428 
00429     s->block_conv_table[0]      = s->min_pitch_val;
00430     s->block_conv_table[1]      = (pitch_range * 25) >> 6;
00431     s->block_conv_table[2]      = (pitch_range * 44) >> 6;
00432     s->block_conv_table[3]      = s->max_pitch_val - 1;
00433     s->block_delta_pitch_hrange = (pitch_range >> 3) & ~0xF;
00434     if (s->block_delta_pitch_hrange <= 0) {
00435         av_log(ctx, AV_LOG_ERROR, "Invalid delta pitch hrange; broken extradata?\n");
00436         return -1;
00437     }
00438     s->block_delta_pitch_nbits  = 1 + av_ceil_log2(s->block_delta_pitch_hrange);
00439     s->block_pitch_range        = s->block_conv_table[2] +
00440                                   s->block_conv_table[3] + 1 +
00441                                   2 * (s->block_conv_table[1] - 2 * s->min_pitch_val);
00442     s->block_pitch_nbits        = av_ceil_log2(s->block_pitch_range);
00443 
00444     ctx->channels               = 1;
00445     ctx->channel_layout         = AV_CH_LAYOUT_MONO;
00446     ctx->sample_fmt             = AV_SAMPLE_FMT_FLT;
00447 
00448     avcodec_get_frame_defaults(&s->frame);
00449     ctx->coded_frame = &s->frame;
00450 
00451     return 0;
00452 }
00453 
00475 static void adaptive_gain_control(float *out, const float *in,
00476                                   const float *speech_synth,
00477                                   int size, float alpha, float *gain_mem)
00478 {
00479     int i;
00480     float speech_energy = 0.0, postfilter_energy = 0.0, gain_scale_factor;
00481     float mem = *gain_mem;
00482 
00483     for (i = 0; i < size; i++) {
00484         speech_energy     += fabsf(speech_synth[i]);
00485         postfilter_energy += fabsf(in[i]);
00486     }
00487     gain_scale_factor = (1.0 - alpha) * speech_energy / postfilter_energy;
00488 
00489     for (i = 0; i < size; i++) {
00490         mem = alpha * mem + gain_scale_factor;
00491         out[i] = in[i] * mem;
00492     }
00493 
00494     *gain_mem = mem;
00495 }
00496 
00515 static int kalman_smoothen(WMAVoiceContext *s, int pitch,
00516                            const float *in, float *out, int size)
00517 {
00518     int n;
00519     float optimal_gain = 0, dot;
00520     const float *ptr = &in[-FFMAX(s->min_pitch_val, pitch - 3)],
00521                 *end = &in[-FFMIN(s->max_pitch_val, pitch + 3)],
00522                 *best_hist_ptr = NULL;
00523 
00524     /* find best fitting point in history */
00525     do {
00526         dot = ff_scalarproduct_float_c(in, ptr, size);
00527         if (dot > optimal_gain) {
00528             optimal_gain  = dot;
00529             best_hist_ptr = ptr;
00530         }
00531     } while (--ptr >= end);
00532 
00533     if (optimal_gain <= 0)
00534         return -1;
00535     dot = ff_scalarproduct_float_c(best_hist_ptr, best_hist_ptr, size);
00536     if (dot <= 0) // would be 1.0
00537         return -1;
00538 
00539     if (optimal_gain <= dot) {
00540         dot = dot / (dot + 0.6 * optimal_gain); // 0.625-1.000
00541     } else
00542         dot = 0.625;
00543 
00544     /* actual smoothing */
00545     for (n = 0; n < size; n++)
00546         out[n] = best_hist_ptr[n] + dot * (in[n] - best_hist_ptr[n]);
00547 
00548     return 0;
00549 }
00550 
00561 static float tilt_factor(const float *lpcs, int n_lpcs)
00562 {
00563     float rh0, rh1;
00564 
00565     rh0 = 1.0     + ff_scalarproduct_float_c(lpcs,  lpcs,    n_lpcs);
00566     rh1 = lpcs[0] + ff_scalarproduct_float_c(lpcs, &lpcs[1], n_lpcs - 1);
00567 
00568     return rh1 / rh0;
00569 }
00570 
00574 static void calc_input_response(WMAVoiceContext *s, float *lpcs,
00575                                 int fcb_type, float *coeffs, int remainder)
00576 {
00577     float last_coeff, min = 15.0, max = -15.0;
00578     float irange, angle_mul, gain_mul, range, sq;
00579     int n, idx;
00580 
00581     /* Create frequency power spectrum of speech input (i.e. RDFT of LPCs) */
00582     s->rdft.rdft_calc(&s->rdft, lpcs);
00583 #define log_range(var, assign) do { \
00584         float tmp = log10f(assign);  var = tmp; \
00585         max       = FFMAX(max, tmp); min = FFMIN(min, tmp); \
00586     } while (0)
00587     log_range(last_coeff,  lpcs[1]         * lpcs[1]);
00588     for (n = 1; n < 64; n++)
00589         log_range(lpcs[n], lpcs[n * 2]     * lpcs[n * 2] +
00590                            lpcs[n * 2 + 1] * lpcs[n * 2 + 1]);
00591     log_range(lpcs[0],     lpcs[0]         * lpcs[0]);
00592 #undef log_range
00593     range    = max - min;
00594     lpcs[64] = last_coeff;
00595 
00596     /* Now, use this spectrum to pick out these frequencies with higher
00597      * (relative) power/energy (which we then take to be "not noise"),
00598      * and set up a table (still in lpc[]) of (relative) gains per frequency.
00599      * These frequencies will be maintained, while others ("noise") will be
00600      * decreased in the filter output. */
00601     irange    = 64.0 / range; // so irange*(max-value) is in the range [0, 63]
00602     gain_mul  = range * (fcb_type == FCB_TYPE_HARDCODED ? (5.0 / 13.0) :
00603                                                           (5.0 / 14.7));
00604     angle_mul = gain_mul * (8.0 * M_LN10 / M_PI);
00605     for (n = 0; n <= 64; n++) {
00606         float pwr;
00607 
00608         idx = FFMAX(0, lrint((max - lpcs[n]) * irange) - 1);
00609         pwr = wmavoice_denoise_power_table[s->denoise_strength][idx];
00610         lpcs[n] = angle_mul * pwr;
00611 
00612         /* 70.57 =~ 1/log10(1.0331663) */
00613         idx = (pwr * gain_mul - 0.0295) * 70.570526123;
00614         if (idx > 127) { // fallback if index falls outside table range
00615             coeffs[n] = wmavoice_energy_table[127] *
00616                         powf(1.0331663, idx - 127);
00617         } else
00618             coeffs[n] = wmavoice_energy_table[FFMAX(0, idx)];
00619     }
00620 
00621     /* calculate the Hilbert transform of the gains, which we do (since this
00622      * is a sinus input) by doing a phase shift (in theory, H(sin())=cos()).
00623      * Hilbert_Transform(RDFT(x)) = Laplace_Transform(x), which calculates the
00624      * "moment" of the LPCs in this filter. */
00625     s->dct.dct_calc(&s->dct, lpcs);
00626     s->dst.dct_calc(&s->dst, lpcs);
00627 
00628     /* Split out the coefficient indexes into phase/magnitude pairs */
00629     idx = 255 + av_clip(lpcs[64],               -255, 255);
00630     coeffs[0]  = coeffs[0]  * s->cos[idx];
00631     idx = 255 + av_clip(lpcs[64] - 2 * lpcs[63], -255, 255);
00632     last_coeff = coeffs[64] * s->cos[idx];
00633     for (n = 63;; n--) {
00634         idx = 255 + av_clip(-lpcs[64] - 2 * lpcs[n - 1], -255, 255);
00635         coeffs[n * 2 + 1] = coeffs[n] * s->sin[idx];
00636         coeffs[n * 2]     = coeffs[n] * s->cos[idx];
00637 
00638         if (!--n) break;
00639 
00640         idx = 255 + av_clip( lpcs[64] - 2 * lpcs[n - 1], -255, 255);
00641         coeffs[n * 2 + 1] = coeffs[n] * s->sin[idx];
00642         coeffs[n * 2]     = coeffs[n] * s->cos[idx];
00643     }
00644     coeffs[1] = last_coeff;
00645 
00646     /* move into real domain */
00647     s->irdft.rdft_calc(&s->irdft, coeffs);
00648 
00649     /* tilt correction and normalize scale */
00650     memset(&coeffs[remainder], 0, sizeof(coeffs[0]) * (128 - remainder));
00651     if (s->denoise_tilt_corr) {
00652         float tilt_mem = 0;
00653 
00654         coeffs[remainder - 1] = 0;
00655         ff_tilt_compensation(&tilt_mem,
00656                              -1.8 * tilt_factor(coeffs, remainder - 1),
00657                              coeffs, remainder);
00658     }
00659     sq = (1.0 / 64.0) * sqrtf(1 / ff_scalarproduct_float_c(coeffs, coeffs, remainder));
00660     for (n = 0; n < remainder; n++)
00661         coeffs[n] *= sq;
00662 }
00663 
00690 static void wiener_denoise(WMAVoiceContext *s, int fcb_type,
00691                            float *synth_pf, int size,
00692                            const float *lpcs)
00693 {
00694     int remainder, lim, n;
00695 
00696     if (fcb_type != FCB_TYPE_SILENCE) {
00697         float *tilted_lpcs = s->tilted_lpcs_pf,
00698               *coeffs = s->denoise_coeffs_pf, tilt_mem = 0;
00699 
00700         tilted_lpcs[0]           = 1.0;
00701         memcpy(&tilted_lpcs[1], lpcs, sizeof(lpcs[0]) * s->lsps);
00702         memset(&tilted_lpcs[s->lsps + 1], 0,
00703                sizeof(tilted_lpcs[0]) * (128 - s->lsps - 1));
00704         ff_tilt_compensation(&tilt_mem, 0.7 * tilt_factor(lpcs, s->lsps),
00705                              tilted_lpcs, s->lsps + 2);
00706 
00707         /* The IRDFT output (127 samples for 7-bit filter) beyond the frame
00708          * size is applied to the next frame. All input beyond this is zero,
00709          * and thus all output beyond this will go towards zero, hence we can
00710          * limit to min(size-1, 127-size) as a performance consideration. */
00711         remainder = FFMIN(127 - size, size - 1);
00712         calc_input_response(s, tilted_lpcs, fcb_type, coeffs, remainder);
00713 
00714         /* apply coefficients (in frequency spectrum domain), i.e. complex
00715          * number multiplication */
00716         memset(&synth_pf[size], 0, sizeof(synth_pf[0]) * (128 - size));
00717         s->rdft.rdft_calc(&s->rdft, synth_pf);
00718         s->rdft.rdft_calc(&s->rdft, coeffs);
00719         synth_pf[0] *= coeffs[0];
00720         synth_pf[1] *= coeffs[1];
00721         for (n = 1; n < 64; n++) {
00722             float v1 = synth_pf[n * 2], v2 = synth_pf[n * 2 + 1];
00723             synth_pf[n * 2]     = v1 * coeffs[n * 2] - v2 * coeffs[n * 2 + 1];
00724             synth_pf[n * 2 + 1] = v2 * coeffs[n * 2] + v1 * coeffs[n * 2 + 1];
00725         }
00726         s->irdft.rdft_calc(&s->irdft, synth_pf);
00727     }
00728 
00729     /* merge filter output with the history of previous runs */
00730     if (s->denoise_filter_cache_size) {
00731         lim = FFMIN(s->denoise_filter_cache_size, size);
00732         for (n = 0; n < lim; n++)
00733             synth_pf[n] += s->denoise_filter_cache[n];
00734         s->denoise_filter_cache_size -= lim;
00735         memmove(s->denoise_filter_cache, &s->denoise_filter_cache[size],
00736                 sizeof(s->denoise_filter_cache[0]) * s->denoise_filter_cache_size);
00737     }
00738 
00739     /* move remainder of filter output into a cache for future runs */
00740     if (fcb_type != FCB_TYPE_SILENCE) {
00741         lim = FFMIN(remainder, s->denoise_filter_cache_size);
00742         for (n = 0; n < lim; n++)
00743             s->denoise_filter_cache[n] += synth_pf[size + n];
00744         if (lim < remainder) {
00745             memcpy(&s->denoise_filter_cache[lim], &synth_pf[size + lim],
00746                    sizeof(s->denoise_filter_cache[0]) * (remainder - lim));
00747             s->denoise_filter_cache_size = remainder;
00748         }
00749     }
00750 }
00751 
00772 static void postfilter(WMAVoiceContext *s, const float *synth,
00773                        float *samples,    int size,
00774                        const float *lpcs, float *zero_exc_pf,
00775                        int fcb_type,      int pitch)
00776 {
00777     float synth_filter_in_buf[MAX_FRAMESIZE / 2],
00778           *synth_pf = &s->synth_filter_out_buf[MAX_LSPS_ALIGN16],
00779           *synth_filter_in = zero_exc_pf;
00780 
00781     av_assert0(size <= MAX_FRAMESIZE / 2);
00782 
00783     /* generate excitation from input signal */
00784     ff_celp_lp_zero_synthesis_filterf(zero_exc_pf, lpcs, synth, size, s->lsps);
00785 
00786     if (fcb_type >= FCB_TYPE_AW_PULSES &&
00787         !kalman_smoothen(s, pitch, zero_exc_pf, synth_filter_in_buf, size))
00788         synth_filter_in = synth_filter_in_buf;
00789 
00790     /* re-synthesize speech after smoothening, and keep history */
00791     ff_celp_lp_synthesis_filterf(synth_pf, lpcs,
00792                                  synth_filter_in, size, s->lsps);
00793     memcpy(&synth_pf[-s->lsps], &synth_pf[size - s->lsps],
00794            sizeof(synth_pf[0]) * s->lsps);
00795 
00796     wiener_denoise(s, fcb_type, synth_pf, size, lpcs);
00797 
00798     adaptive_gain_control(samples, synth_pf, synth, size, 0.99,
00799                           &s->postfilter_agc);
00800 
00801     if (s->dc_level > 8) {
00802         /* remove ultra-low frequency DC noise / highpass filter;
00803          * coefficients are identical to those used in SIPR decoding,
00804          * and very closely resemble those used in AMR-NB decoding. */
00805         ff_acelp_apply_order_2_transfer_function(samples, samples,
00806             (const float[2]) { -1.99997,      1.0 },
00807             (const float[2]) { -1.9330735188, 0.93589198496 },
00808             0.93980580475, s->dcf_mem, size);
00809     }
00810 }
00826 static void dequant_lsps(double *lsps, int num,
00827                          const uint16_t *values,
00828                          const uint16_t *sizes,
00829                          int n_stages, const uint8_t *table,
00830                          const double *mul_q,
00831                          const double *base_q)
00832 {
00833     int n, m;
00834 
00835     memset(lsps, 0, num * sizeof(*lsps));
00836     for (n = 0; n < n_stages; n++) {
00837         const uint8_t *t_off = &table[values[n] * num];
00838         double base = base_q[n], mul = mul_q[n];
00839 
00840         for (m = 0; m < num; m++)
00841             lsps[m] += base + mul * t_off[m];
00842 
00843         table += sizes[n] * num;
00844     }
00845 }
00846 
00858 static void dequant_lsp10i(GetBitContext *gb, double *lsps)
00859 {
00860     static const uint16_t vec_sizes[4] = { 256, 64, 32, 32 };
00861     static const double mul_lsf[4] = {
00862         5.2187144800e-3,    1.4626986422e-3,
00863         9.6179549166e-4,    1.1325736225e-3
00864     };
00865     static const double base_lsf[4] = {
00866         M_PI * -2.15522e-1, M_PI * -6.1646e-2,
00867         M_PI * -3.3486e-2,  M_PI * -5.7408e-2
00868     };
00869     uint16_t v[4];
00870 
00871     v[0] = get_bits(gb, 8);
00872     v[1] = get_bits(gb, 6);
00873     v[2] = get_bits(gb, 5);
00874     v[3] = get_bits(gb, 5);
00875 
00876     dequant_lsps(lsps, 10, v, vec_sizes, 4, wmavoice_dq_lsp10i,
00877                  mul_lsf, base_lsf);
00878 }
00879 
00884 static void dequant_lsp10r(GetBitContext *gb,
00885                            double *i_lsps, const double *old,
00886                            double *a1, double *a2, int q_mode)
00887 {
00888     static const uint16_t vec_sizes[3] = { 128, 64, 64 };
00889     static const double mul_lsf[3] = {
00890         2.5807601174e-3,    1.2354460219e-3,   1.1763821673e-3
00891     };
00892     static const double base_lsf[3] = {
00893         M_PI * -1.07448e-1, M_PI * -5.2706e-2, M_PI * -5.1634e-2
00894     };
00895     const float (*ipol_tab)[2][10] = q_mode ?
00896         wmavoice_lsp10_intercoeff_b : wmavoice_lsp10_intercoeff_a;
00897     uint16_t interpol, v[3];
00898     int n;
00899 
00900     dequant_lsp10i(gb, i_lsps);
00901 
00902     interpol = get_bits(gb, 5);
00903     v[0]     = get_bits(gb, 7);
00904     v[1]     = get_bits(gb, 6);
00905     v[2]     = get_bits(gb, 6);
00906 
00907     for (n = 0; n < 10; n++) {
00908         double delta = old[n] - i_lsps[n];
00909         a1[n]        = ipol_tab[interpol][0][n] * delta + i_lsps[n];
00910         a1[10 + n]   = ipol_tab[interpol][1][n] * delta + i_lsps[n];
00911     }
00912 
00913     dequant_lsps(a2, 20, v, vec_sizes, 3, wmavoice_dq_lsp10r,
00914                  mul_lsf, base_lsf);
00915 }
00916 
00920 static void dequant_lsp16i(GetBitContext *gb, double *lsps)
00921 {
00922     static const uint16_t vec_sizes[5] = { 256, 64, 128, 64, 128 };
00923     static const double mul_lsf[5] = {
00924         3.3439586280e-3,    6.9908173703e-4,
00925         3.3216608306e-3,    1.0334960326e-3,
00926         3.1899104283e-3
00927     };
00928     static const double base_lsf[5] = {
00929         M_PI * -1.27576e-1, M_PI * -2.4292e-2,
00930         M_PI * -1.28094e-1, M_PI * -3.2128e-2,
00931         M_PI * -1.29816e-1
00932     };
00933     uint16_t v[5];
00934 
00935     v[0] = get_bits(gb, 8);
00936     v[1] = get_bits(gb, 6);
00937     v[2] = get_bits(gb, 7);
00938     v[3] = get_bits(gb, 6);
00939     v[4] = get_bits(gb, 7);
00940 
00941     dequant_lsps( lsps,     5,  v,     vec_sizes,    2,
00942                  wmavoice_dq_lsp16i1,  mul_lsf,     base_lsf);
00943     dequant_lsps(&lsps[5],  5, &v[2], &vec_sizes[2], 2,
00944                  wmavoice_dq_lsp16i2, &mul_lsf[2], &base_lsf[2]);
00945     dequant_lsps(&lsps[10], 6, &v[4], &vec_sizes[4], 1,
00946                  wmavoice_dq_lsp16i3, &mul_lsf[4], &base_lsf[4]);
00947 }
00948 
00953 static void dequant_lsp16r(GetBitContext *gb,
00954                            double *i_lsps, const double *old,
00955                            double *a1, double *a2, int q_mode)
00956 {
00957     static const uint16_t vec_sizes[3] = { 128, 128, 128 };
00958     static const double mul_lsf[3] = {
00959         1.2232979501e-3,   1.4062241527e-3,   1.6114744851e-3
00960     };
00961     static const double base_lsf[3] = {
00962         M_PI * -5.5830e-2, M_PI * -5.2908e-2, M_PI * -5.4776e-2
00963     };
00964     const float (*ipol_tab)[2][16] = q_mode ?
00965         wmavoice_lsp16_intercoeff_b : wmavoice_lsp16_intercoeff_a;
00966     uint16_t interpol, v[3];
00967     int n;
00968 
00969     dequant_lsp16i(gb, i_lsps);
00970 
00971     interpol = get_bits(gb, 5);
00972     v[0]     = get_bits(gb, 7);
00973     v[1]     = get_bits(gb, 7);
00974     v[2]     = get_bits(gb, 7);
00975 
00976     for (n = 0; n < 16; n++) {
00977         double delta = old[n] - i_lsps[n];
00978         a1[n]        = ipol_tab[interpol][0][n] * delta + i_lsps[n];
00979         a1[16 + n]   = ipol_tab[interpol][1][n] * delta + i_lsps[n];
00980     }
00981 
00982     dequant_lsps( a2,     10,  v,     vec_sizes,    1,
00983                  wmavoice_dq_lsp16r1,  mul_lsf,     base_lsf);
00984     dequant_lsps(&a2[10], 10, &v[1], &vec_sizes[1], 1,
00985                  wmavoice_dq_lsp16r2, &mul_lsf[1], &base_lsf[1]);
00986     dequant_lsps(&a2[20], 12, &v[2], &vec_sizes[2], 1,
00987                  wmavoice_dq_lsp16r3, &mul_lsf[2], &base_lsf[2]);
00988 }
00989 
01003 static void aw_parse_coords(WMAVoiceContext *s, GetBitContext *gb,
01004                             const int *pitch)
01005 {
01006     static const int16_t start_offset[94] = {
01007         -11,  -9,  -7,  -5,  -3,  -1,   1,   3,   5,   7,   9,  11,
01008          13,  15,  18,  17,  19,  20,  21,  22,  23,  24,  25,  26,
01009          27,  28,  29,  30,  31,  32,  33,  35,  37,  39,  41,  43,
01010          45,  47,  49,  51,  53,  55,  57,  59,  61,  63,  65,  67,
01011          69,  71,  73,  75,  77,  79,  81,  83,  85,  87,  89,  91,
01012          93,  95,  97,  99, 101, 103, 105, 107, 109, 111, 113, 115,
01013         117, 119, 121, 123, 125, 127, 129, 131, 133, 135, 137, 139,
01014         141, 143, 145, 147, 149, 151, 153, 155, 157, 159
01015     };
01016     int bits, offset;
01017 
01018     /* position of pulse */
01019     s->aw_idx_is_ext = 0;
01020     if ((bits = get_bits(gb, 6)) >= 54) {
01021         s->aw_idx_is_ext = 1;
01022         bits += (bits - 54) * 3 + get_bits(gb, 2);
01023     }
01024 
01025     /* for a repeated pulse at pulse_off with a pitch_lag of pitch[], count
01026      * the distribution of the pulses in each block contained in this frame. */
01027     s->aw_pulse_range        = FFMIN(pitch[0], pitch[1]) > 32 ? 24 : 16;
01028     for (offset = start_offset[bits]; offset < 0; offset += pitch[0]) ;
01029     s->aw_n_pulses[0]        = (pitch[0] - 1 + MAX_FRAMESIZE / 2 - offset) / pitch[0];
01030     s->aw_first_pulse_off[0] = offset - s->aw_pulse_range / 2;
01031     offset                  += s->aw_n_pulses[0] * pitch[0];
01032     s->aw_n_pulses[1]        = (pitch[1] - 1 + MAX_FRAMESIZE - offset) / pitch[1];
01033     s->aw_first_pulse_off[1] = offset - (MAX_FRAMESIZE + s->aw_pulse_range) / 2;
01034 
01035     /* if continuing from a position before the block, reset position to
01036      * start of block (when corrected for the range over which it can be
01037      * spread in aw_pulse_set1()). */
01038     if (start_offset[bits] < MAX_FRAMESIZE / 2) {
01039         while (s->aw_first_pulse_off[1] - pitch[1] + s->aw_pulse_range > 0)
01040             s->aw_first_pulse_off[1] -= pitch[1];
01041         if (start_offset[bits] < 0)
01042             while (s->aw_first_pulse_off[0] - pitch[0] + s->aw_pulse_range > 0)
01043                 s->aw_first_pulse_off[0] -= pitch[0];
01044     }
01045 }
01046 
01054 static void aw_pulse_set2(WMAVoiceContext *s, GetBitContext *gb,
01055                           int block_idx, AMRFixed *fcb)
01056 {
01057     uint16_t use_mask_mem[9]; // only 5 are used, rest is padding
01058     uint16_t *use_mask = use_mask_mem + 2;
01059     /* in this function, idx is the index in the 80-bit (+ padding) use_mask
01060      * bit-array. Since use_mask consists of 16-bit values, the lower 4 bits
01061      * of idx are the position of the bit within a particular item in the
01062      * array (0 being the most significant bit, and 15 being the least
01063      * significant bit), and the remainder (>> 4) is the index in the
01064      * use_mask[]-array. This is faster and uses less memory than using a
01065      * 80-byte/80-int array. */
01066     int pulse_off = s->aw_first_pulse_off[block_idx],
01067         pulse_start, n, idx, range, aidx, start_off = 0;
01068 
01069     /* set offset of first pulse to within this block */
01070     if (s->aw_n_pulses[block_idx] > 0)
01071         while (pulse_off + s->aw_pulse_range < 1)
01072             pulse_off += fcb->pitch_lag;
01073 
01074     /* find range per pulse */
01075     if (s->aw_n_pulses[0] > 0) {
01076         if (block_idx == 0) {
01077             range = 32;
01078         } else /* block_idx = 1 */ {
01079             range = 8;
01080             if (s->aw_n_pulses[block_idx] > 0)
01081                 pulse_off = s->aw_next_pulse_off_cache;
01082         }
01083     } else
01084         range = 16;
01085     pulse_start = s->aw_n_pulses[block_idx] > 0 ? pulse_off - range / 2 : 0;
01086 
01087     /* aw_pulse_set1() already applies pulses around pulse_off (to be exactly,
01088      * in the range of [pulse_off, pulse_off + s->aw_pulse_range], and thus
01089      * we exclude that range from being pulsed again in this function. */
01090     memset(&use_mask[-2], 0, 2 * sizeof(use_mask[0]));
01091     memset( use_mask,   -1, 5 * sizeof(use_mask[0]));
01092     memset(&use_mask[5], 0, 2 * sizeof(use_mask[0]));
01093     if (s->aw_n_pulses[block_idx] > 0)
01094         for (idx = pulse_off; idx < MAX_FRAMESIZE / 2; idx += fcb->pitch_lag) {
01095             int excl_range         = s->aw_pulse_range; // always 16 or 24
01096             uint16_t *use_mask_ptr = &use_mask[idx >> 4];
01097             int first_sh           = 16 - (idx & 15);
01098             *use_mask_ptr++       &= 0xFFFFu << first_sh;
01099             excl_range            -= first_sh;
01100             if (excl_range >= 16) {
01101                 *use_mask_ptr++    = 0;
01102                 *use_mask_ptr     &= 0xFFFF >> (excl_range - 16);
01103             } else
01104                 *use_mask_ptr     &= 0xFFFF >> excl_range;
01105         }
01106 
01107     /* find the 'aidx'th offset that is not excluded */
01108     aidx = get_bits(gb, s->aw_n_pulses[0] > 0 ? 5 - 2 * block_idx : 4);
01109     for (n = 0; n <= aidx; pulse_start++) {
01110         for (idx = pulse_start; idx < 0; idx += fcb->pitch_lag) ;
01111         if (idx >= MAX_FRAMESIZE / 2) { // find from zero
01112             if (use_mask[0])      idx = 0x0F;
01113             else if (use_mask[1]) idx = 0x1F;
01114             else if (use_mask[2]) idx = 0x2F;
01115             else if (use_mask[3]) idx = 0x3F;
01116             else if (use_mask[4]) idx = 0x4F;
01117             else                  return;
01118             idx -= av_log2_16bit(use_mask[idx >> 4]);
01119         }
01120         if (use_mask[idx >> 4] & (0x8000 >> (idx & 15))) {
01121             use_mask[idx >> 4] &= ~(0x8000 >> (idx & 15));
01122             n++;
01123             start_off = idx;
01124         }
01125     }
01126 
01127     fcb->x[fcb->n] = start_off;
01128     fcb->y[fcb->n] = get_bits1(gb) ? -1.0 : 1.0;
01129     fcb->n++;
01130 
01131     /* set offset for next block, relative to start of that block */
01132     n = (MAX_FRAMESIZE / 2 - start_off) % fcb->pitch_lag;
01133     s->aw_next_pulse_off_cache = n ? fcb->pitch_lag - n : 0;
01134 }
01135 
01143 static void aw_pulse_set1(WMAVoiceContext *s, GetBitContext *gb,
01144                           int block_idx, AMRFixed *fcb)
01145 {
01146     int val = get_bits(gb, 12 - 2 * (s->aw_idx_is_ext && !block_idx));
01147     float v;
01148 
01149     if (s->aw_n_pulses[block_idx] > 0) {
01150         int n, v_mask, i_mask, sh, n_pulses;
01151 
01152         if (s->aw_pulse_range == 24) { // 3 pulses, 1:sign + 3:index each
01153             n_pulses = 3;
01154             v_mask   = 8;
01155             i_mask   = 7;
01156             sh       = 4;
01157         } else { // 4 pulses, 1:sign + 2:index each
01158             n_pulses = 4;
01159             v_mask   = 4;
01160             i_mask   = 3;
01161             sh       = 3;
01162         }
01163 
01164         for (n = n_pulses - 1; n >= 0; n--, val >>= sh) {
01165             fcb->y[fcb->n] = (val & v_mask) ? -1.0 : 1.0;
01166             fcb->x[fcb->n] = (val & i_mask) * n_pulses + n +
01167                                  s->aw_first_pulse_off[block_idx];
01168             while (fcb->x[fcb->n] < 0)
01169                 fcb->x[fcb->n] += fcb->pitch_lag;
01170             if (fcb->x[fcb->n] < MAX_FRAMESIZE / 2)
01171                 fcb->n++;
01172         }
01173     } else {
01174         int num2 = (val & 0x1FF) >> 1, delta, idx;
01175 
01176         if (num2 < 1 * 79)      { delta = 1; idx = num2 + 1; }
01177         else if (num2 < 2 * 78) { delta = 3; idx = num2 + 1 - 1 * 77; }
01178         else if (num2 < 3 * 77) { delta = 5; idx = num2 + 1 - 2 * 76; }
01179         else                    { delta = 7; idx = num2 + 1 - 3 * 75; }
01180         v = (val & 0x200) ? -1.0 : 1.0;
01181 
01182         fcb->no_repeat_mask |= 3 << fcb->n;
01183         fcb->x[fcb->n]       = idx - delta;
01184         fcb->y[fcb->n]       = v;
01185         fcb->x[fcb->n + 1]   = idx;
01186         fcb->y[fcb->n + 1]   = (val & 1) ? -v : v;
01187         fcb->n              += 2;
01188     }
01189 }
01190 
01204 static int pRNG(int frame_cntr, int block_num, int block_size)
01205 {
01206     /* array to simplify the calculation of z:
01207      * y = (x % 9) * 5 + 6;
01208      * z = (49995 * x) / y;
01209      * Since y only has 9 values, we can remove the division by using a
01210      * LUT and using FASTDIV-style divisions. For each of the 9 values
01211      * of y, we can rewrite z as:
01212      * z = x * (49995 / y) + x * ((49995 % y) / y)
01213      * In this table, each col represents one possible value of y, the
01214      * first number is 49995 / y, and the second is the FASTDIV variant
01215      * of 49995 % y / y. */
01216     static const unsigned int div_tbl[9][2] = {
01217         { 8332,  3 * 715827883U }, // y =  6
01218         { 4545,  0 * 390451573U }, // y = 11
01219         { 3124, 11 * 268435456U }, // y = 16
01220         { 2380, 15 * 204522253U }, // y = 21
01221         { 1922, 23 * 165191050U }, // y = 26
01222         { 1612, 23 * 138547333U }, // y = 31
01223         { 1388, 27 * 119304648U }, // y = 36
01224         { 1219, 16 * 104755300U }, // y = 41
01225         { 1086, 39 *  93368855U }  // y = 46
01226     };
01227     unsigned int z, y, x = MUL16(block_num, 1877) + frame_cntr;
01228     if (x >= 0xFFFF) x -= 0xFFFF;   // max value of x is 8*1877+0xFFFE=0x13AA6,
01229                                     // so this is effectively a modulo (%)
01230     y = x - 9 * MULH(477218589, x); // x % 9
01231     z = (uint16_t) (x * div_tbl[y][0] + UMULH(x, div_tbl[y][1]));
01232                                     // z = x * 49995 / (y * 5 + 6)
01233     return z % (1000 - block_size);
01234 }
01235 
01240 static void synth_block_hardcoded(WMAVoiceContext *s, GetBitContext *gb,
01241                                  int block_idx, int size,
01242                                  const struct frame_type_desc *frame_desc,
01243                                  float *excitation)
01244 {
01245     float gain;
01246     int n, r_idx;
01247 
01248     av_assert0(size <= MAX_FRAMESIZE);
01249 
01250     /* Set the offset from which we start reading wmavoice_std_codebook */
01251     if (frame_desc->fcb_type == FCB_TYPE_SILENCE) {
01252         r_idx = pRNG(s->frame_cntr, block_idx, size);
01253         gain  = s->silence_gain;
01254     } else /* FCB_TYPE_HARDCODED */ {
01255         r_idx = get_bits(gb, 8);
01256         gain  = wmavoice_gain_universal[get_bits(gb, 6)];
01257     }
01258 
01259     /* Clear gain prediction parameters */
01260     memset(s->gain_pred_err, 0, sizeof(s->gain_pred_err));
01261 
01262     /* Apply gain to hardcoded codebook and use that as excitation signal */
01263     for (n = 0; n < size; n++)
01264         excitation[n] = wmavoice_std_codebook[r_idx + n] * gain;
01265 }
01266 
01271 static void synth_block_fcb_acb(WMAVoiceContext *s, GetBitContext *gb,
01272                                 int block_idx, int size,
01273                                 int block_pitch_sh2,
01274                                 const struct frame_type_desc *frame_desc,
01275                                 float *excitation)
01276 {
01277     static const float gain_coeff[6] = {
01278         0.8169, -0.06545, 0.1726, 0.0185, -0.0359, 0.0458
01279     };
01280     float pulses[MAX_FRAMESIZE / 2], pred_err, acb_gain, fcb_gain;
01281     int n, idx, gain_weight;
01282     AMRFixed fcb;
01283 
01284     av_assert0(size <= MAX_FRAMESIZE / 2);
01285     memset(pulses, 0, sizeof(*pulses) * size);
01286 
01287     fcb.pitch_lag      = block_pitch_sh2 >> 2;
01288     fcb.pitch_fac      = 1.0;
01289     fcb.no_repeat_mask = 0;
01290     fcb.n              = 0;
01291 
01292     /* For the other frame types, this is where we apply the innovation
01293      * (fixed) codebook pulses of the speech signal. */
01294     if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
01295         aw_pulse_set1(s, gb, block_idx, &fcb);
01296         aw_pulse_set2(s, gb, block_idx, &fcb);
01297     } else /* FCB_TYPE_EXC_PULSES */ {
01298         int offset_nbits = 5 - frame_desc->log_n_blocks;
01299 
01300         fcb.no_repeat_mask = -1;
01301         /* similar to ff_decode_10_pulses_35bits(), but with single pulses
01302          * (instead of double) for a subset of pulses */
01303         for (n = 0; n < 5; n++) {
01304             float sign;
01305             int pos1, pos2;
01306 
01307             sign           = get_bits1(gb) ? 1.0 : -1.0;
01308             pos1           = get_bits(gb, offset_nbits);
01309             fcb.x[fcb.n]   = n + 5 * pos1;
01310             fcb.y[fcb.n++] = sign;
01311             if (n < frame_desc->dbl_pulses) {
01312                 pos2           = get_bits(gb, offset_nbits);
01313                 fcb.x[fcb.n]   = n + 5 * pos2;
01314                 fcb.y[fcb.n++] = (pos1 < pos2) ? -sign : sign;
01315             }
01316         }
01317     }
01318     ff_set_fixed_vector(pulses, &fcb, 1.0, size);
01319 
01320     /* Calculate gain for adaptive & fixed codebook signal.
01321      * see ff_amr_set_fixed_gain(). */
01322     idx = get_bits(gb, 7);
01323     fcb_gain = expf(ff_scalarproduct_float_c(s->gain_pred_err, gain_coeff, 6) -
01324                     5.2409161640 + wmavoice_gain_codebook_fcb[idx]);
01325     acb_gain = wmavoice_gain_codebook_acb[idx];
01326     pred_err = av_clipf(wmavoice_gain_codebook_fcb[idx],
01327                         -2.9957322736 /* log(0.05) */,
01328                          1.6094379124 /* log(5.0)  */);
01329 
01330     gain_weight = 8 >> frame_desc->log_n_blocks;
01331     memmove(&s->gain_pred_err[gain_weight], s->gain_pred_err,
01332             sizeof(*s->gain_pred_err) * (6 - gain_weight));
01333     for (n = 0; n < gain_weight; n++)
01334         s->gain_pred_err[n] = pred_err;
01335 
01336     /* Calculation of adaptive codebook */
01337     if (frame_desc->acb_type == ACB_TYPE_ASYMMETRIC) {
01338         int len;
01339         for (n = 0; n < size; n += len) {
01340             int next_idx_sh16;
01341             int abs_idx    = block_idx * size + n;
01342             int pitch_sh16 = (s->last_pitch_val << 16) +
01343                              s->pitch_diff_sh16 * abs_idx;
01344             int pitch      = (pitch_sh16 + 0x6FFF) >> 16;
01345             int idx_sh16   = ((pitch << 16) - pitch_sh16) * 8 + 0x58000;
01346             idx            = idx_sh16 >> 16;
01347             if (s->pitch_diff_sh16) {
01348                 if (s->pitch_diff_sh16 > 0) {
01349                     next_idx_sh16 = (idx_sh16) &~ 0xFFFF;
01350                 } else
01351                     next_idx_sh16 = (idx_sh16 + 0x10000) &~ 0xFFFF;
01352                 len = av_clip((idx_sh16 - next_idx_sh16) / s->pitch_diff_sh16 / 8,
01353                               1, size - n);
01354             } else
01355                 len = size;
01356 
01357             ff_acelp_interpolatef(&excitation[n], &excitation[n - pitch],
01358                                   wmavoice_ipol1_coeffs, 17,
01359                                   idx, 9, len);
01360         }
01361     } else /* ACB_TYPE_HAMMING */ {
01362         int block_pitch = block_pitch_sh2 >> 2;
01363         idx             = block_pitch_sh2 & 3;
01364         if (idx) {
01365             ff_acelp_interpolatef(excitation, &excitation[-block_pitch],
01366                                   wmavoice_ipol2_coeffs, 4,
01367                                   idx, 8, size);
01368         } else
01369             av_memcpy_backptr((uint8_t *) excitation, sizeof(float) * block_pitch,
01370                               sizeof(float) * size);
01371     }
01372 
01373     /* Interpolate ACB/FCB and use as excitation signal */
01374     ff_weighted_vector_sumf(excitation, excitation, pulses,
01375                             acb_gain, fcb_gain, size);
01376 }
01377 
01394 static void synth_block(WMAVoiceContext *s, GetBitContext *gb,
01395                         int block_idx, int size,
01396                         int block_pitch_sh2,
01397                         const double *lsps, const double *prev_lsps,
01398                         const struct frame_type_desc *frame_desc,
01399                         float *excitation, float *synth)
01400 {
01401     double i_lsps[MAX_LSPS];
01402     float lpcs[MAX_LSPS];
01403     float fac;
01404     int n;
01405 
01406     if (frame_desc->acb_type == ACB_TYPE_NONE)
01407         synth_block_hardcoded(s, gb, block_idx, size, frame_desc, excitation);
01408     else
01409         synth_block_fcb_acb(s, gb, block_idx, size, block_pitch_sh2,
01410                             frame_desc, excitation);
01411 
01412     /* convert interpolated LSPs to LPCs */
01413     fac = (block_idx + 0.5) / frame_desc->n_blocks;
01414     for (n = 0; n < s->lsps; n++) // LSF -> LSP
01415         i_lsps[n] = cos(prev_lsps[n] + fac * (lsps[n] - prev_lsps[n]));
01416     ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
01417 
01418     /* Speech synthesis */
01419     ff_celp_lp_synthesis_filterf(synth, lpcs, excitation, size, s->lsps);
01420 }
01421 
01437 static int synth_frame(AVCodecContext *ctx, GetBitContext *gb, int frame_idx,
01438                        float *samples,
01439                        const double *lsps, const double *prev_lsps,
01440                        float *excitation, float *synth)
01441 {
01442     WMAVoiceContext *s = ctx->priv_data;
01443     int n, n_blocks_x2, log_n_blocks_x2, cur_pitch_val;
01444     int pitch[MAX_BLOCKS], last_block_pitch;
01445 
01446     /* Parse frame type ("frame header"), see frame_descs */
01447     int bd_idx = s->vbm_tree[get_vlc2(gb, frame_type_vlc.table, 6, 3)], block_nsamples;
01448 
01449     if (bd_idx < 0) {
01450         av_log(ctx, AV_LOG_ERROR,
01451                "Invalid frame type VLC code, skipping\n");
01452         return -1;
01453     }
01454 
01455     block_nsamples = MAX_FRAMESIZE / frame_descs[bd_idx].n_blocks;
01456 
01457     /* Pitch calculation for ACB_TYPE_ASYMMETRIC ("pitch-per-frame") */
01458     if (frame_descs[bd_idx].acb_type == ACB_TYPE_ASYMMETRIC) {
01459         /* Pitch is provided per frame, which is interpreted as the pitch of
01460          * the last sample of the last block of this frame. We can interpolate
01461          * the pitch of other blocks (and even pitch-per-sample) by gradually
01462          * incrementing/decrementing prev_frame_pitch to cur_pitch_val. */
01463         n_blocks_x2      = frame_descs[bd_idx].n_blocks << 1;
01464         log_n_blocks_x2  = frame_descs[bd_idx].log_n_blocks + 1;
01465         cur_pitch_val    = s->min_pitch_val + get_bits(gb, s->pitch_nbits);
01466         cur_pitch_val    = FFMIN(cur_pitch_val, s->max_pitch_val - 1);
01467         if (s->last_acb_type == ACB_TYPE_NONE ||
01468             20 * abs(cur_pitch_val - s->last_pitch_val) >
01469                 (cur_pitch_val + s->last_pitch_val))
01470             s->last_pitch_val = cur_pitch_val;
01471 
01472         /* pitch per block */
01473         for (n = 0; n < frame_descs[bd_idx].n_blocks; n++) {
01474             int fac = n * 2 + 1;
01475 
01476             pitch[n] = (MUL16(fac,                 cur_pitch_val) +
01477                         MUL16((n_blocks_x2 - fac), s->last_pitch_val) +
01478                         frame_descs[bd_idx].n_blocks) >> log_n_blocks_x2;
01479         }
01480 
01481         /* "pitch-diff-per-sample" for calculation of pitch per sample */
01482         s->pitch_diff_sh16 =
01483             ((cur_pitch_val - s->last_pitch_val) << 16) / MAX_FRAMESIZE;
01484     }
01485 
01486     /* Global gain (if silence) and pitch-adaptive window coordinates */
01487     switch (frame_descs[bd_idx].fcb_type) {
01488     case FCB_TYPE_SILENCE:
01489         s->silence_gain = wmavoice_gain_silence[get_bits(gb, 8)];
01490         break;
01491     case FCB_TYPE_AW_PULSES:
01492         aw_parse_coords(s, gb, pitch);
01493         break;
01494     }
01495 
01496     for (n = 0; n < frame_descs[bd_idx].n_blocks; n++) {
01497         int bl_pitch_sh2;
01498 
01499         /* Pitch calculation for ACB_TYPE_HAMMING ("pitch-per-block") */
01500         switch (frame_descs[bd_idx].acb_type) {
01501         case ACB_TYPE_HAMMING: {
01502             /* Pitch is given per block. Per-block pitches are encoded as an
01503              * absolute value for the first block, and then delta values
01504              * relative to this value) for all subsequent blocks. The scale of
01505              * this pitch value is semi-logaritmic compared to its use in the
01506              * decoder, so we convert it to normal scale also. */
01507             int block_pitch,
01508                 t1 = (s->block_conv_table[1] - s->block_conv_table[0]) << 2,
01509                 t2 = (s->block_conv_table[2] - s->block_conv_table[1]) << 1,
01510                 t3 =  s->block_conv_table[3] - s->block_conv_table[2] + 1;
01511 
01512             if (n == 0) {
01513                 block_pitch = get_bits(gb, s->block_pitch_nbits);
01514             } else
01515                 block_pitch = last_block_pitch - s->block_delta_pitch_hrange +
01516                                  get_bits(gb, s->block_delta_pitch_nbits);
01517             /* Convert last_ so that any next delta is within _range */
01518             last_block_pitch = av_clip(block_pitch,
01519                                        s->block_delta_pitch_hrange,
01520                                        s->block_pitch_range -
01521                                            s->block_delta_pitch_hrange);
01522 
01523             /* Convert semi-log-style scale back to normal scale */
01524             if (block_pitch < t1) {
01525                 bl_pitch_sh2 = (s->block_conv_table[0] << 2) + block_pitch;
01526             } else {
01527                 block_pitch -= t1;
01528                 if (block_pitch < t2) {
01529                     bl_pitch_sh2 =
01530                         (s->block_conv_table[1] << 2) + (block_pitch << 1);
01531                 } else {
01532                     block_pitch -= t2;
01533                     if (block_pitch < t3) {
01534                         bl_pitch_sh2 =
01535                             (s->block_conv_table[2] + block_pitch) << 2;
01536                     } else
01537                         bl_pitch_sh2 = s->block_conv_table[3] << 2;
01538                 }
01539             }
01540             pitch[n] = bl_pitch_sh2 >> 2;
01541             break;
01542         }
01543 
01544         case ACB_TYPE_ASYMMETRIC: {
01545             bl_pitch_sh2 = pitch[n] << 2;
01546             break;
01547         }
01548 
01549         default: // ACB_TYPE_NONE has no pitch
01550             bl_pitch_sh2 = 0;
01551             break;
01552         }
01553 
01554         synth_block(s, gb, n, block_nsamples, bl_pitch_sh2,
01555                     lsps, prev_lsps, &frame_descs[bd_idx],
01556                     &excitation[n * block_nsamples],
01557                     &synth[n * block_nsamples]);
01558     }
01559 
01560     /* Averaging projection filter, if applicable. Else, just copy samples
01561      * from synthesis buffer */
01562     if (s->do_apf) {
01563         double i_lsps[MAX_LSPS];
01564         float lpcs[MAX_LSPS];
01565 
01566         for (n = 0; n < s->lsps; n++) // LSF -> LSP
01567             i_lsps[n] = cos(0.5 * (prev_lsps[n] + lsps[n]));
01568         ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
01569         postfilter(s, synth, samples, 80, lpcs,
01570                    &s->zero_exc_pf[s->history_nsamples + MAX_FRAMESIZE * frame_idx],
01571                    frame_descs[bd_idx].fcb_type, pitch[0]);
01572 
01573         for (n = 0; n < s->lsps; n++) // LSF -> LSP
01574             i_lsps[n] = cos(lsps[n]);
01575         ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
01576         postfilter(s, &synth[80], &samples[80], 80, lpcs,
01577                    &s->zero_exc_pf[s->history_nsamples + MAX_FRAMESIZE * frame_idx + 80],
01578                    frame_descs[bd_idx].fcb_type, pitch[0]);
01579     } else
01580         memcpy(samples, synth, 160 * sizeof(synth[0]));
01581 
01582     /* Cache values for next frame */
01583     s->frame_cntr++;
01584     if (s->frame_cntr >= 0xFFFF) s->frame_cntr -= 0xFFFF; // i.e. modulo (%)
01585     s->last_acb_type = frame_descs[bd_idx].acb_type;
01586     switch (frame_descs[bd_idx].acb_type) {
01587     case ACB_TYPE_NONE:
01588         s->last_pitch_val = 0;
01589         break;
01590     case ACB_TYPE_ASYMMETRIC:
01591         s->last_pitch_val = cur_pitch_val;
01592         break;
01593     case ACB_TYPE_HAMMING:
01594         s->last_pitch_val = pitch[frame_descs[bd_idx].n_blocks - 1];
01595         break;
01596     }
01597 
01598     return 0;
01599 }
01600 
01613 static void stabilize_lsps(double *lsps, int num)
01614 {
01615     int n, m, l;
01616 
01617     /* set minimum value for first, maximum value for last and minimum
01618      * spacing between LSF values.
01619      * Very similar to ff_set_min_dist_lsf(), but in double. */
01620     lsps[0]       = FFMAX(lsps[0],       0.0015 * M_PI);
01621     for (n = 1; n < num; n++)
01622         lsps[n]   = FFMAX(lsps[n],       lsps[n - 1] + 0.0125 * M_PI);
01623     lsps[num - 1] = FFMIN(lsps[num - 1], 0.9985 * M_PI);
01624 
01625     /* reorder (looks like one-time / non-recursed bubblesort).
01626      * Very similar to ff_sort_nearly_sorted_floats(), but in double. */
01627     for (n = 1; n < num; n++) {
01628         if (lsps[n] < lsps[n - 1]) {
01629             for (m = 1; m < num; m++) {
01630                 double tmp = lsps[m];
01631                 for (l = m - 1; l >= 0; l--) {
01632                     if (lsps[l] <= tmp) break;
01633                     lsps[l + 1] = lsps[l];
01634                 }
01635                 lsps[l + 1] = tmp;
01636             }
01637             break;
01638         }
01639     }
01640 }
01641 
01651 static int check_bits_for_superframe(GetBitContext *orig_gb,
01652                                      WMAVoiceContext *s)
01653 {
01654     GetBitContext s_gb, *gb = &s_gb;
01655     int n, need_bits, bd_idx;
01656     const struct frame_type_desc *frame_desc;
01657 
01658     /* initialize a copy */
01659     init_get_bits(gb, orig_gb->buffer, orig_gb->size_in_bits);
01660     skip_bits_long(gb, get_bits_count(orig_gb));
01661     av_assert1(get_bits_left(gb) == get_bits_left(orig_gb));
01662 
01663     /* superframe header */
01664     if (get_bits_left(gb) < 14)
01665         return 1;
01666     if (!get_bits1(gb))
01667         return -1;                        // WMAPro-in-WMAVoice superframe
01668     if (get_bits1(gb)) skip_bits(gb, 12); // number of  samples in superframe
01669     if (s->has_residual_lsps) {           // residual LSPs (for all frames)
01670         if (get_bits_left(gb) < s->sframe_lsp_bitsize)
01671             return 1;
01672         skip_bits_long(gb, s->sframe_lsp_bitsize);
01673     }
01674 
01675     /* frames */
01676     for (n = 0; n < MAX_FRAMES; n++) {
01677         int aw_idx_is_ext = 0;
01678 
01679         if (!s->has_residual_lsps) {     // independent LSPs (per-frame)
01680            if (get_bits_left(gb) < s->frame_lsp_bitsize) return 1;
01681            skip_bits_long(gb, s->frame_lsp_bitsize);
01682         }
01683         bd_idx = s->vbm_tree[get_vlc2(gb, frame_type_vlc.table, 6, 3)];
01684         if (bd_idx < 0)
01685             return -1;                   // invalid frame type VLC code
01686         frame_desc = &frame_descs[bd_idx];
01687         if (frame_desc->acb_type == ACB_TYPE_ASYMMETRIC) {
01688             if (get_bits_left(gb) < s->pitch_nbits)
01689                 return 1;
01690             skip_bits_long(gb, s->pitch_nbits);
01691         }
01692         if (frame_desc->fcb_type == FCB_TYPE_SILENCE) {
01693             skip_bits(gb, 8);
01694         } else if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
01695             int tmp = get_bits(gb, 6);
01696             if (tmp >= 0x36) {
01697                 skip_bits(gb, 2);
01698                 aw_idx_is_ext = 1;
01699             }
01700         }
01701 
01702         /* blocks */
01703         if (frame_desc->acb_type == ACB_TYPE_HAMMING) {
01704             need_bits = s->block_pitch_nbits +
01705                 (frame_desc->n_blocks - 1) * s->block_delta_pitch_nbits;
01706         } else if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
01707             need_bits = 2 * !aw_idx_is_ext;
01708         } else
01709             need_bits = 0;
01710         need_bits += frame_desc->frame_size;
01711         if (get_bits_left(gb) < need_bits)
01712             return 1;
01713         skip_bits_long(gb, need_bits);
01714     }
01715 
01716     return 0;
01717 }
01718 
01736 static int synth_superframe(AVCodecContext *ctx, int *got_frame_ptr)
01737 {
01738     WMAVoiceContext *s = ctx->priv_data;
01739     GetBitContext *gb = &s->gb, s_gb;
01740     int n, res, n_samples = 480;
01741     double lsps[MAX_FRAMES][MAX_LSPS];
01742     const double *mean_lsf = s->lsps == 16 ?
01743         wmavoice_mean_lsf16[s->lsp_def_mode] : wmavoice_mean_lsf10[s->lsp_def_mode];
01744     float excitation[MAX_SIGNAL_HISTORY + MAX_SFRAMESIZE + 12];
01745     float synth[MAX_LSPS + MAX_SFRAMESIZE];
01746     float *samples;
01747 
01748     memcpy(synth,      s->synth_history,
01749            s->lsps             * sizeof(*synth));
01750     memcpy(excitation, s->excitation_history,
01751            s->history_nsamples * sizeof(*excitation));
01752 
01753     if (s->sframe_cache_size > 0) {
01754         gb = &s_gb;
01755         init_get_bits(gb, s->sframe_cache, s->sframe_cache_size);
01756         s->sframe_cache_size = 0;
01757     }
01758 
01759     if ((res = check_bits_for_superframe(gb, s)) == 1) {
01760         *got_frame_ptr = 0;
01761         return 1;
01762     }
01763 
01764     /* First bit is speech/music bit, it differentiates between WMAVoice
01765      * speech samples (the actual codec) and WMAVoice music samples, which
01766      * are really WMAPro-in-WMAVoice-superframes. I've never seen those in
01767      * the wild yet. */
01768     if (!get_bits1(gb)) {
01769         av_log_missing_feature(ctx, "WMAPro-in-WMAVoice", 1);
01770         return AVERROR_PATCHWELCOME;
01771     }
01772 
01773     /* (optional) nr. of samples in superframe; always <= 480 and >= 0 */
01774     if (get_bits1(gb)) {
01775         if ((n_samples = get_bits(gb, 12)) > 480) {
01776             av_log(ctx, AV_LOG_ERROR,
01777                    "Superframe encodes >480 samples (%d), not allowed\n",
01778                    n_samples);
01779             return -1;
01780         }
01781     }
01782     /* Parse LSPs, if global for the superframe (can also be per-frame). */
01783     if (s->has_residual_lsps) {
01784         double prev_lsps[MAX_LSPS], a1[MAX_LSPS * 2], a2[MAX_LSPS * 2];
01785 
01786         for (n = 0; n < s->lsps; n++)
01787             prev_lsps[n] = s->prev_lsps[n] - mean_lsf[n];
01788 
01789         if (s->lsps == 10) {
01790             dequant_lsp10r(gb, lsps[2], prev_lsps, a1, a2, s->lsp_q_mode);
01791         } else /* s->lsps == 16 */
01792             dequant_lsp16r(gb, lsps[2], prev_lsps, a1, a2, s->lsp_q_mode);
01793 
01794         for (n = 0; n < s->lsps; n++) {
01795             lsps[0][n]  = mean_lsf[n] + (a1[n]           - a2[n * 2]);
01796             lsps[1][n]  = mean_lsf[n] + (a1[s->lsps + n] - a2[n * 2 + 1]);
01797             lsps[2][n] += mean_lsf[n];
01798         }
01799         for (n = 0; n < 3; n++)
01800             stabilize_lsps(lsps[n], s->lsps);
01801     }
01802 
01803     /* get output buffer */
01804     s->frame.nb_samples = 480;
01805     if ((res = ff_get_buffer(ctx, &s->frame)) < 0) {
01806         av_log(ctx, AV_LOG_ERROR, "get_buffer() failed\n");
01807         return res;
01808     }
01809     s->frame.nb_samples = n_samples;
01810     samples = (float *)s->frame.data[0];
01811 
01812     /* Parse frames, optionally preceded by per-frame (independent) LSPs. */
01813     for (n = 0; n < 3; n++) {
01814         if (!s->has_residual_lsps) {
01815             int m;
01816 
01817             if (s->lsps == 10) {
01818                 dequant_lsp10i(gb, lsps[n]);
01819             } else /* s->lsps == 16 */
01820                 dequant_lsp16i(gb, lsps[n]);
01821 
01822             for (m = 0; m < s->lsps; m++)
01823                 lsps[n][m] += mean_lsf[m];
01824             stabilize_lsps(lsps[n], s->lsps);
01825         }
01826 
01827         if ((res = synth_frame(ctx, gb, n,
01828                                &samples[n * MAX_FRAMESIZE],
01829                                lsps[n], n == 0 ? s->prev_lsps : lsps[n - 1],
01830                                &excitation[s->history_nsamples + n * MAX_FRAMESIZE],
01831                                &synth[s->lsps + n * MAX_FRAMESIZE]))) {
01832             *got_frame_ptr = 0;
01833             return res;
01834         }
01835     }
01836 
01837     /* Statistics? FIXME - we don't check for length, a slight overrun
01838      * will be caught by internal buffer padding, and anything else
01839      * will be skipped, not read. */
01840     if (get_bits1(gb)) {
01841         res = get_bits(gb, 4);
01842         skip_bits(gb, 10 * (res + 1));
01843     }
01844 
01845     *got_frame_ptr = 1;
01846 
01847     /* Update history */
01848     memcpy(s->prev_lsps,           lsps[2],
01849            s->lsps             * sizeof(*s->prev_lsps));
01850     memcpy(s->synth_history,      &synth[MAX_SFRAMESIZE],
01851            s->lsps             * sizeof(*synth));
01852     memcpy(s->excitation_history, &excitation[MAX_SFRAMESIZE],
01853            s->history_nsamples * sizeof(*excitation));
01854     if (s->do_apf)
01855         memmove(s->zero_exc_pf,       &s->zero_exc_pf[MAX_SFRAMESIZE],
01856                 s->history_nsamples * sizeof(*s->zero_exc_pf));
01857 
01858     return 0;
01859 }
01860 
01868 static int parse_packet_header(WMAVoiceContext *s)
01869 {
01870     GetBitContext *gb = &s->gb;
01871     unsigned int res;
01872 
01873     if (get_bits_left(gb) < 11)
01874         return 1;
01875     skip_bits(gb, 4);          // packet sequence number
01876     s->has_residual_lsps = get_bits1(gb);
01877     do {
01878         res = get_bits(gb, 6); // number of superframes per packet
01879                                // (minus first one if there is spillover)
01880         if (get_bits_left(gb) < 6 * (res == 0x3F) + s->spillover_bitsize)
01881             return 1;
01882     } while (res == 0x3F);
01883     s->spillover_nbits   = get_bits(gb, s->spillover_bitsize);
01884 
01885     return 0;
01886 }
01887 
01903 static void copy_bits(PutBitContext *pb,
01904                       const uint8_t *data, int size,
01905                       GetBitContext *gb, int nbits)
01906 {
01907     int rmn_bytes, rmn_bits;
01908 
01909     rmn_bits = rmn_bytes = get_bits_left(gb);
01910     if (rmn_bits < nbits)
01911         return;
01912     if (nbits > pb->size_in_bits - put_bits_count(pb))
01913         return;
01914     rmn_bits &= 7; rmn_bytes >>= 3;
01915     if ((rmn_bits = FFMIN(rmn_bits, nbits)) > 0)
01916         put_bits(pb, rmn_bits, get_bits(gb, rmn_bits));
01917     avpriv_copy_bits(pb, data + size - rmn_bytes,
01918                  FFMIN(nbits - rmn_bits, rmn_bytes << 3));
01919 }
01920 
01932 static int wmavoice_decode_packet(AVCodecContext *ctx, void *data,
01933                                   int *got_frame_ptr, AVPacket *avpkt)
01934 {
01935     WMAVoiceContext *s = ctx->priv_data;
01936     GetBitContext *gb = &s->gb;
01937     int size, res, pos;
01938 
01939     /* Packets are sometimes a multiple of ctx->block_align, with a packet
01940      * header at each ctx->block_align bytes. However, FFmpeg's ASF demuxer
01941      * feeds us ASF packets, which may concatenate multiple "codec" packets
01942      * in a single "muxer" packet, so we artificially emulate that by
01943      * capping the packet size at ctx->block_align. */
01944     for (size = avpkt->size; size > ctx->block_align; size -= ctx->block_align);
01945     if (!size) {
01946         *got_frame_ptr = 0;
01947         return 0;
01948     }
01949     init_get_bits(&s->gb, avpkt->data, size << 3);
01950 
01951     /* size == ctx->block_align is used to indicate whether we are dealing with
01952      * a new packet or a packet of which we already read the packet header
01953      * previously. */
01954     if (size == ctx->block_align) { // new packet header
01955         if ((res = parse_packet_header(s)) < 0)
01956             return res;
01957 
01958         /* If the packet header specifies a s->spillover_nbits, then we want
01959          * to push out all data of the previous packet (+ spillover) before
01960          * continuing to parse new superframes in the current packet. */
01961         if (s->spillover_nbits > 0) {
01962             if (s->sframe_cache_size > 0) {
01963                 int cnt = get_bits_count(gb);
01964                 copy_bits(&s->pb, avpkt->data, size, gb, s->spillover_nbits);
01965                 flush_put_bits(&s->pb);
01966                 s->sframe_cache_size += s->spillover_nbits;
01967                 if ((res = synth_superframe(ctx, got_frame_ptr)) == 0 &&
01968                     *got_frame_ptr) {
01969                     cnt += s->spillover_nbits;
01970                     s->skip_bits_next = cnt & 7;
01971                     *(AVFrame *)data = s->frame;
01972                     return cnt >> 3;
01973                 } else
01974                     skip_bits_long (gb, s->spillover_nbits - cnt +
01975                                     get_bits_count(gb)); // resync
01976             } else
01977                 skip_bits_long(gb, s->spillover_nbits);  // resync
01978         }
01979     } else if (s->skip_bits_next)
01980         skip_bits(gb, s->skip_bits_next);
01981 
01982     /* Try parsing superframes in current packet */
01983     s->sframe_cache_size = 0;
01984     s->skip_bits_next = 0;
01985     pos = get_bits_left(gb);
01986     if ((res = synth_superframe(ctx, got_frame_ptr)) < 0) {
01987         return res;
01988     } else if (*got_frame_ptr) {
01989         int cnt = get_bits_count(gb);
01990         s->skip_bits_next = cnt & 7;
01991         *(AVFrame *)data = s->frame;
01992         return cnt >> 3;
01993     } else if ((s->sframe_cache_size = pos) > 0) {
01994         /* rewind bit reader to start of last (incomplete) superframe... */
01995         init_get_bits(gb, avpkt->data, size << 3);
01996         skip_bits_long(gb, (size << 3) - pos);
01997         av_assert1(get_bits_left(gb) == pos);
01998 
01999         /* ...and cache it for spillover in next packet */
02000         init_put_bits(&s->pb, s->sframe_cache, SFRAME_CACHE_MAXSIZE);
02001         copy_bits(&s->pb, avpkt->data, size, gb, s->sframe_cache_size);
02002         // FIXME bad - just copy bytes as whole and add use the
02003         // skip_bits_next field
02004     }
02005 
02006     return size;
02007 }
02008 
02009 static av_cold int wmavoice_decode_end(AVCodecContext *ctx)
02010 {
02011     WMAVoiceContext *s = ctx->priv_data;
02012 
02013     if (s->do_apf) {
02014         ff_rdft_end(&s->rdft);
02015         ff_rdft_end(&s->irdft);
02016         ff_dct_end(&s->dct);
02017         ff_dct_end(&s->dst);
02018     }
02019 
02020     return 0;
02021 }
02022 
02023 static av_cold void wmavoice_flush(AVCodecContext *ctx)
02024 {
02025     WMAVoiceContext *s = ctx->priv_data;
02026     int n;
02027 
02028     s->postfilter_agc    = 0;
02029     s->sframe_cache_size = 0;
02030     s->skip_bits_next    = 0;
02031     for (n = 0; n < s->lsps; n++)
02032         s->prev_lsps[n] = M_PI * (n + 1.0) / (s->lsps + 1.0);
02033     memset(s->excitation_history, 0,
02034            sizeof(*s->excitation_history) * MAX_SIGNAL_HISTORY);
02035     memset(s->synth_history,      0,
02036            sizeof(*s->synth_history)      * MAX_LSPS);
02037     memset(s->gain_pred_err,      0,
02038            sizeof(s->gain_pred_err));
02039 
02040     if (s->do_apf) {
02041         memset(&s->synth_filter_out_buf[MAX_LSPS_ALIGN16 - s->lsps], 0,
02042                sizeof(*s->synth_filter_out_buf) * s->lsps);
02043         memset(s->dcf_mem,              0,
02044                sizeof(*s->dcf_mem)              * 2);
02045         memset(s->zero_exc_pf,          0,
02046                sizeof(*s->zero_exc_pf)          * s->history_nsamples);
02047         memset(s->denoise_filter_cache, 0, sizeof(s->denoise_filter_cache));
02048     }
02049 }
02050 
02051 AVCodec ff_wmavoice_decoder = {
02052     .name           = "wmavoice",
02053     .type           = AVMEDIA_TYPE_AUDIO,
02054     .id             = AV_CODEC_ID_WMAVOICE,
02055     .priv_data_size = sizeof(WMAVoiceContext),
02056     .init           = wmavoice_decode_init,
02057     .close          = wmavoice_decode_end,
02058     .decode         = wmavoice_decode_packet,
02059     .capabilities   = CODEC_CAP_SUBFRAMES | CODEC_CAP_DR1,
02060     .flush          = wmavoice_flush,
02061     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Audio Voice"),
02062 };