00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00028 #define UNCHECKED_BITSTREAM_READER 1
00029
00030 #include <math.h>
00031
00032 #include "libavutil/channel_layout.h"
00033 #include "libavutil/mem.h"
00034 #include "dsputil.h"
00035 #include "avcodec.h"
00036 #include "internal.h"
00037 #include "get_bits.h"
00038 #include "put_bits.h"
00039 #include "wmavoice_data.h"
00040 #include "celp_filters.h"
00041 #include "acelp_vectors.h"
00042 #include "acelp_filters.h"
00043 #include "lsp.h"
00044 #include "dct.h"
00045 #include "rdft.h"
00046 #include "sinewin.h"
00047
00048 #define MAX_BLOCKS 8
00049 #define MAX_LSPS 16
00050 #define MAX_LSPS_ALIGN16 16
00051
00052 #define MAX_FRAMES 3
00053 #define MAX_FRAMESIZE 160
00054 #define MAX_SIGNAL_HISTORY 416
00055 #define MAX_SFRAMESIZE (MAX_FRAMESIZE * MAX_FRAMES)
00057 #define SFRAME_CACHE_MAXSIZE 256
00058
00059 #define VLC_NBITS 6
00060
00061
00064 static VLC frame_type_vlc;
00065
00069 enum {
00070 ACB_TYPE_NONE = 0,
00071 ACB_TYPE_ASYMMETRIC = 1,
00072
00073
00074
00075
00076 ACB_TYPE_HAMMING = 2
00077
00078
00079 };
00080
00084 enum {
00085 FCB_TYPE_SILENCE = 0,
00086
00087
00088 FCB_TYPE_HARDCODED = 1,
00089
00090 FCB_TYPE_AW_PULSES = 2,
00091
00092 FCB_TYPE_EXC_PULSES = 3,
00093
00094
00095 };
00096
00100 static const struct frame_type_desc {
00101 uint8_t n_blocks;
00102
00103 uint8_t log_n_blocks;
00104 uint8_t acb_type;
00105 uint8_t fcb_type;
00106 uint8_t dbl_pulses;
00107
00108
00109 uint16_t frame_size;
00110
00111 } frame_descs[17] = {
00112 { 1, 0, ACB_TYPE_NONE, FCB_TYPE_SILENCE, 0, 0 },
00113 { 2, 1, ACB_TYPE_NONE, FCB_TYPE_HARDCODED, 0, 28 },
00114 { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_AW_PULSES, 0, 46 },
00115 { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 2, 80 },
00116 { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 5, 104 },
00117 { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 0, 108 },
00118 { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 2, 132 },
00119 { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 5, 168 },
00120 { 2, 1, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 0, 64 },
00121 { 2, 1, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 2, 80 },
00122 { 2, 1, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 5, 104 },
00123 { 4, 2, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 0, 108 },
00124 { 4, 2, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 2, 132 },
00125 { 4, 2, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 5, 168 },
00126 { 8, 3, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 0, 176 },
00127 { 8, 3, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 2, 208 },
00128 { 8, 3, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 5, 256 }
00129 };
00130
00134 typedef struct {
00139 AVFrame frame;
00140 GetBitContext gb;
00141
00142
00143
00144 int8_t vbm_tree[25];
00145
00146 int spillover_bitsize;
00147
00148
00149 int history_nsamples;
00150
00151
00152
00153 int do_apf;
00154
00155 int denoise_strength;
00156
00157 int denoise_tilt_corr;
00158
00159 int dc_level;
00160
00161
00162 int lsps;
00163 int lsp_q_mode;
00164 int lsp_def_mode;
00165
00166 int frame_lsp_bitsize;
00167
00168 int sframe_lsp_bitsize;
00169
00170
00171 int min_pitch_val;
00172 int max_pitch_val;
00173 int pitch_nbits;
00174
00175 int block_pitch_nbits;
00176
00177 int block_pitch_range;
00178 int block_delta_pitch_nbits;
00179
00180
00181
00182 int block_delta_pitch_hrange;
00183
00184 uint16_t block_conv_table[4];
00185
00186
00196 int spillover_nbits;
00197
00198
00199
00200 int has_residual_lsps;
00201
00202
00203
00204
00205 int skip_bits_next;
00206
00207
00208
00209 uint8_t sframe_cache[SFRAME_CACHE_MAXSIZE + FF_INPUT_BUFFER_PADDING_SIZE];
00212 int sframe_cache_size;
00213
00214
00215
00216
00217 PutBitContext pb;
00218
00228 double prev_lsps[MAX_LSPS];
00229
00230 int last_pitch_val;
00231 int last_acb_type;
00232 int pitch_diff_sh16;
00233
00234 float silence_gain;
00235
00236 int aw_idx_is_ext;
00237
00238 int aw_pulse_range;
00239
00240
00241
00242
00243
00244 int aw_n_pulses[2];
00245
00246
00247 int aw_first_pulse_off[2];
00248
00249 int aw_next_pulse_off_cache;
00250
00251
00252
00253
00254
00255 int frame_cntr;
00256
00257 float gain_pred_err[6];
00258 float excitation_history[MAX_SIGNAL_HISTORY];
00262 float synth_history[MAX_LSPS];
00263
00272 RDFTContext rdft, irdft;
00273
00274 DCTContext dct, dst;
00275
00276 float sin[511], cos[511];
00277
00278 float postfilter_agc;
00279
00280 float dcf_mem[2];
00281 float zero_exc_pf[MAX_SIGNAL_HISTORY + MAX_SFRAMESIZE];
00284 float denoise_filter_cache[MAX_FRAMESIZE];
00285 int denoise_filter_cache_size;
00286 DECLARE_ALIGNED(32, float, tilted_lpcs_pf)[0x80];
00288 DECLARE_ALIGNED(32, float, denoise_coeffs_pf)[0x80];
00290 DECLARE_ALIGNED(32, float, synth_filter_out_buf)[0x80 + MAX_LSPS_ALIGN16];
00293
00296 } WMAVoiceContext;
00297
00307 static av_cold int decode_vbmtree(GetBitContext *gb, int8_t vbm_tree[25])
00308 {
00309 static const uint8_t bits[] = {
00310 2, 2, 2, 4, 4, 4,
00311 6, 6, 6, 8, 8, 8,
00312 10, 10, 10, 12, 12, 12,
00313 14, 14, 14, 14
00314 };
00315 static const uint16_t codes[] = {
00316 0x0000, 0x0001, 0x0002,
00317 0x000c, 0x000d, 0x000e,
00318 0x003c, 0x003d, 0x003e,
00319 0x00fc, 0x00fd, 0x00fe,
00320 0x03fc, 0x03fd, 0x03fe,
00321 0x0ffc, 0x0ffd, 0x0ffe,
00322 0x3ffc, 0x3ffd, 0x3ffe, 0x3fff
00323 };
00324 int cntr[8] = { 0 }, n, res;
00325
00326 memset(vbm_tree, 0xff, sizeof(vbm_tree[0]) * 25);
00327 for (n = 0; n < 17; n++) {
00328 res = get_bits(gb, 3);
00329 if (cntr[res] > 3)
00330 return -1;
00331 vbm_tree[res * 3 + cntr[res]++] = n;
00332 }
00333 INIT_VLC_STATIC(&frame_type_vlc, VLC_NBITS, sizeof(bits),
00334 bits, 1, 1, codes, 2, 2, 132);
00335 return 0;
00336 }
00337
00341 static av_cold int wmavoice_decode_init(AVCodecContext *ctx)
00342 {
00343 int n, flags, pitch_range, lsp16_flag;
00344 WMAVoiceContext *s = ctx->priv_data;
00345
00354 if (ctx->extradata_size != 46) {
00355 av_log(ctx, AV_LOG_ERROR,
00356 "Invalid extradata size %d (should be 46)\n",
00357 ctx->extradata_size);
00358 return -1;
00359 }
00360 flags = AV_RL32(ctx->extradata + 18);
00361 s->spillover_bitsize = 3 + av_ceil_log2(ctx->block_align);
00362 s->do_apf = flags & 0x1;
00363 if (s->do_apf) {
00364 ff_rdft_init(&s->rdft, 7, DFT_R2C);
00365 ff_rdft_init(&s->irdft, 7, IDFT_C2R);
00366 ff_dct_init(&s->dct, 6, DCT_I);
00367 ff_dct_init(&s->dst, 6, DST_I);
00368
00369 ff_sine_window_init(s->cos, 256);
00370 memcpy(&s->sin[255], s->cos, 256 * sizeof(s->cos[0]));
00371 for (n = 0; n < 255; n++) {
00372 s->sin[n] = -s->sin[510 - n];
00373 s->cos[510 - n] = s->cos[n];
00374 }
00375 }
00376 s->denoise_strength = (flags >> 2) & 0xF;
00377 if (s->denoise_strength >= 12) {
00378 av_log(ctx, AV_LOG_ERROR,
00379 "Invalid denoise filter strength %d (max=11)\n",
00380 s->denoise_strength);
00381 return -1;
00382 }
00383 s->denoise_tilt_corr = !!(flags & 0x40);
00384 s->dc_level = (flags >> 7) & 0xF;
00385 s->lsp_q_mode = !!(flags & 0x2000);
00386 s->lsp_def_mode = !!(flags & 0x4000);
00387 lsp16_flag = flags & 0x1000;
00388 if (lsp16_flag) {
00389 s->lsps = 16;
00390 s->frame_lsp_bitsize = 34;
00391 s->sframe_lsp_bitsize = 60;
00392 } else {
00393 s->lsps = 10;
00394 s->frame_lsp_bitsize = 24;
00395 s->sframe_lsp_bitsize = 48;
00396 }
00397 for (n = 0; n < s->lsps; n++)
00398 s->prev_lsps[n] = M_PI * (n + 1.0) / (s->lsps + 1.0);
00399
00400 init_get_bits(&s->gb, ctx->extradata + 22, (ctx->extradata_size - 22) << 3);
00401 if (decode_vbmtree(&s->gb, s->vbm_tree) < 0) {
00402 av_log(ctx, AV_LOG_ERROR, "Invalid VBM tree; broken extradata?\n");
00403 return -1;
00404 }
00405
00406 s->min_pitch_val = ((ctx->sample_rate << 8) / 400 + 50) >> 8;
00407 s->max_pitch_val = ((ctx->sample_rate << 8) * 37 / 2000 + 50) >> 8;
00408 pitch_range = s->max_pitch_val - s->min_pitch_val;
00409 if (pitch_range <= 0) {
00410 av_log(ctx, AV_LOG_ERROR, "Invalid pitch range; broken extradata?\n");
00411 return -1;
00412 }
00413 s->pitch_nbits = av_ceil_log2(pitch_range);
00414 s->last_pitch_val = 40;
00415 s->last_acb_type = ACB_TYPE_NONE;
00416 s->history_nsamples = s->max_pitch_val + 8;
00417
00418 if (s->min_pitch_val < 1 || s->history_nsamples > MAX_SIGNAL_HISTORY) {
00419 int min_sr = ((((1 << 8) - 50) * 400) + 0xFF) >> 8,
00420 max_sr = ((((MAX_SIGNAL_HISTORY - 8) << 8) + 205) * 2000 / 37) >> 8;
00421
00422 av_log(ctx, AV_LOG_ERROR,
00423 "Unsupported samplerate %d (min=%d, max=%d)\n",
00424 ctx->sample_rate, min_sr, max_sr);
00425
00426 return -1;
00427 }
00428
00429 s->block_conv_table[0] = s->min_pitch_val;
00430 s->block_conv_table[1] = (pitch_range * 25) >> 6;
00431 s->block_conv_table[2] = (pitch_range * 44) >> 6;
00432 s->block_conv_table[3] = s->max_pitch_val - 1;
00433 s->block_delta_pitch_hrange = (pitch_range >> 3) & ~0xF;
00434 if (s->block_delta_pitch_hrange <= 0) {
00435 av_log(ctx, AV_LOG_ERROR, "Invalid delta pitch hrange; broken extradata?\n");
00436 return -1;
00437 }
00438 s->block_delta_pitch_nbits = 1 + av_ceil_log2(s->block_delta_pitch_hrange);
00439 s->block_pitch_range = s->block_conv_table[2] +
00440 s->block_conv_table[3] + 1 +
00441 2 * (s->block_conv_table[1] - 2 * s->min_pitch_val);
00442 s->block_pitch_nbits = av_ceil_log2(s->block_pitch_range);
00443
00444 ctx->channels = 1;
00445 ctx->channel_layout = AV_CH_LAYOUT_MONO;
00446 ctx->sample_fmt = AV_SAMPLE_FMT_FLT;
00447
00448 avcodec_get_frame_defaults(&s->frame);
00449 ctx->coded_frame = &s->frame;
00450
00451 return 0;
00452 }
00453
00475 static void adaptive_gain_control(float *out, const float *in,
00476 const float *speech_synth,
00477 int size, float alpha, float *gain_mem)
00478 {
00479 int i;
00480 float speech_energy = 0.0, postfilter_energy = 0.0, gain_scale_factor;
00481 float mem = *gain_mem;
00482
00483 for (i = 0; i < size; i++) {
00484 speech_energy += fabsf(speech_synth[i]);
00485 postfilter_energy += fabsf(in[i]);
00486 }
00487 gain_scale_factor = (1.0 - alpha) * speech_energy / postfilter_energy;
00488
00489 for (i = 0; i < size; i++) {
00490 mem = alpha * mem + gain_scale_factor;
00491 out[i] = in[i] * mem;
00492 }
00493
00494 *gain_mem = mem;
00495 }
00496
00515 static int kalman_smoothen(WMAVoiceContext *s, int pitch,
00516 const float *in, float *out, int size)
00517 {
00518 int n;
00519 float optimal_gain = 0, dot;
00520 const float *ptr = &in[-FFMAX(s->min_pitch_val, pitch - 3)],
00521 *end = &in[-FFMIN(s->max_pitch_val, pitch + 3)],
00522 *best_hist_ptr = NULL;
00523
00524
00525 do {
00526 dot = ff_scalarproduct_float_c(in, ptr, size);
00527 if (dot > optimal_gain) {
00528 optimal_gain = dot;
00529 best_hist_ptr = ptr;
00530 }
00531 } while (--ptr >= end);
00532
00533 if (optimal_gain <= 0)
00534 return -1;
00535 dot = ff_scalarproduct_float_c(best_hist_ptr, best_hist_ptr, size);
00536 if (dot <= 0)
00537 return -1;
00538
00539 if (optimal_gain <= dot) {
00540 dot = dot / (dot + 0.6 * optimal_gain);
00541 } else
00542 dot = 0.625;
00543
00544
00545 for (n = 0; n < size; n++)
00546 out[n] = best_hist_ptr[n] + dot * (in[n] - best_hist_ptr[n]);
00547
00548 return 0;
00549 }
00550
00561 static float tilt_factor(const float *lpcs, int n_lpcs)
00562 {
00563 float rh0, rh1;
00564
00565 rh0 = 1.0 + ff_scalarproduct_float_c(lpcs, lpcs, n_lpcs);
00566 rh1 = lpcs[0] + ff_scalarproduct_float_c(lpcs, &lpcs[1], n_lpcs - 1);
00567
00568 return rh1 / rh0;
00569 }
00570
00574 static void calc_input_response(WMAVoiceContext *s, float *lpcs,
00575 int fcb_type, float *coeffs, int remainder)
00576 {
00577 float last_coeff, min = 15.0, max = -15.0;
00578 float irange, angle_mul, gain_mul, range, sq;
00579 int n, idx;
00580
00581
00582 s->rdft.rdft_calc(&s->rdft, lpcs);
00583 #define log_range(var, assign) do { \
00584 float tmp = log10f(assign); var = tmp; \
00585 max = FFMAX(max, tmp); min = FFMIN(min, tmp); \
00586 } while (0)
00587 log_range(last_coeff, lpcs[1] * lpcs[1]);
00588 for (n = 1; n < 64; n++)
00589 log_range(lpcs[n], lpcs[n * 2] * lpcs[n * 2] +
00590 lpcs[n * 2 + 1] * lpcs[n * 2 + 1]);
00591 log_range(lpcs[0], lpcs[0] * lpcs[0]);
00592 #undef log_range
00593 range = max - min;
00594 lpcs[64] = last_coeff;
00595
00596
00597
00598
00599
00600
00601 irange = 64.0 / range;
00602 gain_mul = range * (fcb_type == FCB_TYPE_HARDCODED ? (5.0 / 13.0) :
00603 (5.0 / 14.7));
00604 angle_mul = gain_mul * (8.0 * M_LN10 / M_PI);
00605 for (n = 0; n <= 64; n++) {
00606 float pwr;
00607
00608 idx = FFMAX(0, lrint((max - lpcs[n]) * irange) - 1);
00609 pwr = wmavoice_denoise_power_table[s->denoise_strength][idx];
00610 lpcs[n] = angle_mul * pwr;
00611
00612
00613 idx = (pwr * gain_mul - 0.0295) * 70.570526123;
00614 if (idx > 127) {
00615 coeffs[n] = wmavoice_energy_table[127] *
00616 powf(1.0331663, idx - 127);
00617 } else
00618 coeffs[n] = wmavoice_energy_table[FFMAX(0, idx)];
00619 }
00620
00621
00622
00623
00624
00625 s->dct.dct_calc(&s->dct, lpcs);
00626 s->dst.dct_calc(&s->dst, lpcs);
00627
00628
00629 idx = 255 + av_clip(lpcs[64], -255, 255);
00630 coeffs[0] = coeffs[0] * s->cos[idx];
00631 idx = 255 + av_clip(lpcs[64] - 2 * lpcs[63], -255, 255);
00632 last_coeff = coeffs[64] * s->cos[idx];
00633 for (n = 63;; n--) {
00634 idx = 255 + av_clip(-lpcs[64] - 2 * lpcs[n - 1], -255, 255);
00635 coeffs[n * 2 + 1] = coeffs[n] * s->sin[idx];
00636 coeffs[n * 2] = coeffs[n] * s->cos[idx];
00637
00638 if (!--n) break;
00639
00640 idx = 255 + av_clip( lpcs[64] - 2 * lpcs[n - 1], -255, 255);
00641 coeffs[n * 2 + 1] = coeffs[n] * s->sin[idx];
00642 coeffs[n * 2] = coeffs[n] * s->cos[idx];
00643 }
00644 coeffs[1] = last_coeff;
00645
00646
00647 s->irdft.rdft_calc(&s->irdft, coeffs);
00648
00649
00650 memset(&coeffs[remainder], 0, sizeof(coeffs[0]) * (128 - remainder));
00651 if (s->denoise_tilt_corr) {
00652 float tilt_mem = 0;
00653
00654 coeffs[remainder - 1] = 0;
00655 ff_tilt_compensation(&tilt_mem,
00656 -1.8 * tilt_factor(coeffs, remainder - 1),
00657 coeffs, remainder);
00658 }
00659 sq = (1.0 / 64.0) * sqrtf(1 / ff_scalarproduct_float_c(coeffs, coeffs, remainder));
00660 for (n = 0; n < remainder; n++)
00661 coeffs[n] *= sq;
00662 }
00663
00690 static void wiener_denoise(WMAVoiceContext *s, int fcb_type,
00691 float *synth_pf, int size,
00692 const float *lpcs)
00693 {
00694 int remainder, lim, n;
00695
00696 if (fcb_type != FCB_TYPE_SILENCE) {
00697 float *tilted_lpcs = s->tilted_lpcs_pf,
00698 *coeffs = s->denoise_coeffs_pf, tilt_mem = 0;
00699
00700 tilted_lpcs[0] = 1.0;
00701 memcpy(&tilted_lpcs[1], lpcs, sizeof(lpcs[0]) * s->lsps);
00702 memset(&tilted_lpcs[s->lsps + 1], 0,
00703 sizeof(tilted_lpcs[0]) * (128 - s->lsps - 1));
00704 ff_tilt_compensation(&tilt_mem, 0.7 * tilt_factor(lpcs, s->lsps),
00705 tilted_lpcs, s->lsps + 2);
00706
00707
00708
00709
00710
00711 remainder = FFMIN(127 - size, size - 1);
00712 calc_input_response(s, tilted_lpcs, fcb_type, coeffs, remainder);
00713
00714
00715
00716 memset(&synth_pf[size], 0, sizeof(synth_pf[0]) * (128 - size));
00717 s->rdft.rdft_calc(&s->rdft, synth_pf);
00718 s->rdft.rdft_calc(&s->rdft, coeffs);
00719 synth_pf[0] *= coeffs[0];
00720 synth_pf[1] *= coeffs[1];
00721 for (n = 1; n < 64; n++) {
00722 float v1 = synth_pf[n * 2], v2 = synth_pf[n * 2 + 1];
00723 synth_pf[n * 2] = v1 * coeffs[n * 2] - v2 * coeffs[n * 2 + 1];
00724 synth_pf[n * 2 + 1] = v2 * coeffs[n * 2] + v1 * coeffs[n * 2 + 1];
00725 }
00726 s->irdft.rdft_calc(&s->irdft, synth_pf);
00727 }
00728
00729
00730 if (s->denoise_filter_cache_size) {
00731 lim = FFMIN(s->denoise_filter_cache_size, size);
00732 for (n = 0; n < lim; n++)
00733 synth_pf[n] += s->denoise_filter_cache[n];
00734 s->denoise_filter_cache_size -= lim;
00735 memmove(s->denoise_filter_cache, &s->denoise_filter_cache[size],
00736 sizeof(s->denoise_filter_cache[0]) * s->denoise_filter_cache_size);
00737 }
00738
00739
00740 if (fcb_type != FCB_TYPE_SILENCE) {
00741 lim = FFMIN(remainder, s->denoise_filter_cache_size);
00742 for (n = 0; n < lim; n++)
00743 s->denoise_filter_cache[n] += synth_pf[size + n];
00744 if (lim < remainder) {
00745 memcpy(&s->denoise_filter_cache[lim], &synth_pf[size + lim],
00746 sizeof(s->denoise_filter_cache[0]) * (remainder - lim));
00747 s->denoise_filter_cache_size = remainder;
00748 }
00749 }
00750 }
00751
00772 static void postfilter(WMAVoiceContext *s, const float *synth,
00773 float *samples, int size,
00774 const float *lpcs, float *zero_exc_pf,
00775 int fcb_type, int pitch)
00776 {
00777 float synth_filter_in_buf[MAX_FRAMESIZE / 2],
00778 *synth_pf = &s->synth_filter_out_buf[MAX_LSPS_ALIGN16],
00779 *synth_filter_in = zero_exc_pf;
00780
00781 av_assert0(size <= MAX_FRAMESIZE / 2);
00782
00783
00784 ff_celp_lp_zero_synthesis_filterf(zero_exc_pf, lpcs, synth, size, s->lsps);
00785
00786 if (fcb_type >= FCB_TYPE_AW_PULSES &&
00787 !kalman_smoothen(s, pitch, zero_exc_pf, synth_filter_in_buf, size))
00788 synth_filter_in = synth_filter_in_buf;
00789
00790
00791 ff_celp_lp_synthesis_filterf(synth_pf, lpcs,
00792 synth_filter_in, size, s->lsps);
00793 memcpy(&synth_pf[-s->lsps], &synth_pf[size - s->lsps],
00794 sizeof(synth_pf[0]) * s->lsps);
00795
00796 wiener_denoise(s, fcb_type, synth_pf, size, lpcs);
00797
00798 adaptive_gain_control(samples, synth_pf, synth, size, 0.99,
00799 &s->postfilter_agc);
00800
00801 if (s->dc_level > 8) {
00802
00803
00804
00805 ff_acelp_apply_order_2_transfer_function(samples, samples,
00806 (const float[2]) { -1.99997, 1.0 },
00807 (const float[2]) { -1.9330735188, 0.93589198496 },
00808 0.93980580475, s->dcf_mem, size);
00809 }
00810 }
00826 static void dequant_lsps(double *lsps, int num,
00827 const uint16_t *values,
00828 const uint16_t *sizes,
00829 int n_stages, const uint8_t *table,
00830 const double *mul_q,
00831 const double *base_q)
00832 {
00833 int n, m;
00834
00835 memset(lsps, 0, num * sizeof(*lsps));
00836 for (n = 0; n < n_stages; n++) {
00837 const uint8_t *t_off = &table[values[n] * num];
00838 double base = base_q[n], mul = mul_q[n];
00839
00840 for (m = 0; m < num; m++)
00841 lsps[m] += base + mul * t_off[m];
00842
00843 table += sizes[n] * num;
00844 }
00845 }
00846
00858 static void dequant_lsp10i(GetBitContext *gb, double *lsps)
00859 {
00860 static const uint16_t vec_sizes[4] = { 256, 64, 32, 32 };
00861 static const double mul_lsf[4] = {
00862 5.2187144800e-3, 1.4626986422e-3,
00863 9.6179549166e-4, 1.1325736225e-3
00864 };
00865 static const double base_lsf[4] = {
00866 M_PI * -2.15522e-1, M_PI * -6.1646e-2,
00867 M_PI * -3.3486e-2, M_PI * -5.7408e-2
00868 };
00869 uint16_t v[4];
00870
00871 v[0] = get_bits(gb, 8);
00872 v[1] = get_bits(gb, 6);
00873 v[2] = get_bits(gb, 5);
00874 v[3] = get_bits(gb, 5);
00875
00876 dequant_lsps(lsps, 10, v, vec_sizes, 4, wmavoice_dq_lsp10i,
00877 mul_lsf, base_lsf);
00878 }
00879
00884 static void dequant_lsp10r(GetBitContext *gb,
00885 double *i_lsps, const double *old,
00886 double *a1, double *a2, int q_mode)
00887 {
00888 static const uint16_t vec_sizes[3] = { 128, 64, 64 };
00889 static const double mul_lsf[3] = {
00890 2.5807601174e-3, 1.2354460219e-3, 1.1763821673e-3
00891 };
00892 static const double base_lsf[3] = {
00893 M_PI * -1.07448e-1, M_PI * -5.2706e-2, M_PI * -5.1634e-2
00894 };
00895 const float (*ipol_tab)[2][10] = q_mode ?
00896 wmavoice_lsp10_intercoeff_b : wmavoice_lsp10_intercoeff_a;
00897 uint16_t interpol, v[3];
00898 int n;
00899
00900 dequant_lsp10i(gb, i_lsps);
00901
00902 interpol = get_bits(gb, 5);
00903 v[0] = get_bits(gb, 7);
00904 v[1] = get_bits(gb, 6);
00905 v[2] = get_bits(gb, 6);
00906
00907 for (n = 0; n < 10; n++) {
00908 double delta = old[n] - i_lsps[n];
00909 a1[n] = ipol_tab[interpol][0][n] * delta + i_lsps[n];
00910 a1[10 + n] = ipol_tab[interpol][1][n] * delta + i_lsps[n];
00911 }
00912
00913 dequant_lsps(a2, 20, v, vec_sizes, 3, wmavoice_dq_lsp10r,
00914 mul_lsf, base_lsf);
00915 }
00916
00920 static void dequant_lsp16i(GetBitContext *gb, double *lsps)
00921 {
00922 static const uint16_t vec_sizes[5] = { 256, 64, 128, 64, 128 };
00923 static const double mul_lsf[5] = {
00924 3.3439586280e-3, 6.9908173703e-4,
00925 3.3216608306e-3, 1.0334960326e-3,
00926 3.1899104283e-3
00927 };
00928 static const double base_lsf[5] = {
00929 M_PI * -1.27576e-1, M_PI * -2.4292e-2,
00930 M_PI * -1.28094e-1, M_PI * -3.2128e-2,
00931 M_PI * -1.29816e-1
00932 };
00933 uint16_t v[5];
00934
00935 v[0] = get_bits(gb, 8);
00936 v[1] = get_bits(gb, 6);
00937 v[2] = get_bits(gb, 7);
00938 v[3] = get_bits(gb, 6);
00939 v[4] = get_bits(gb, 7);
00940
00941 dequant_lsps( lsps, 5, v, vec_sizes, 2,
00942 wmavoice_dq_lsp16i1, mul_lsf, base_lsf);
00943 dequant_lsps(&lsps[5], 5, &v[2], &vec_sizes[2], 2,
00944 wmavoice_dq_lsp16i2, &mul_lsf[2], &base_lsf[2]);
00945 dequant_lsps(&lsps[10], 6, &v[4], &vec_sizes[4], 1,
00946 wmavoice_dq_lsp16i3, &mul_lsf[4], &base_lsf[4]);
00947 }
00948
00953 static void dequant_lsp16r(GetBitContext *gb,
00954 double *i_lsps, const double *old,
00955 double *a1, double *a2, int q_mode)
00956 {
00957 static const uint16_t vec_sizes[3] = { 128, 128, 128 };
00958 static const double mul_lsf[3] = {
00959 1.2232979501e-3, 1.4062241527e-3, 1.6114744851e-3
00960 };
00961 static const double base_lsf[3] = {
00962 M_PI * -5.5830e-2, M_PI * -5.2908e-2, M_PI * -5.4776e-2
00963 };
00964 const float (*ipol_tab)[2][16] = q_mode ?
00965 wmavoice_lsp16_intercoeff_b : wmavoice_lsp16_intercoeff_a;
00966 uint16_t interpol, v[3];
00967 int n;
00968
00969 dequant_lsp16i(gb, i_lsps);
00970
00971 interpol = get_bits(gb, 5);
00972 v[0] = get_bits(gb, 7);
00973 v[1] = get_bits(gb, 7);
00974 v[2] = get_bits(gb, 7);
00975
00976 for (n = 0; n < 16; n++) {
00977 double delta = old[n] - i_lsps[n];
00978 a1[n] = ipol_tab[interpol][0][n] * delta + i_lsps[n];
00979 a1[16 + n] = ipol_tab[interpol][1][n] * delta + i_lsps[n];
00980 }
00981
00982 dequant_lsps( a2, 10, v, vec_sizes, 1,
00983 wmavoice_dq_lsp16r1, mul_lsf, base_lsf);
00984 dequant_lsps(&a2[10], 10, &v[1], &vec_sizes[1], 1,
00985 wmavoice_dq_lsp16r2, &mul_lsf[1], &base_lsf[1]);
00986 dequant_lsps(&a2[20], 12, &v[2], &vec_sizes[2], 1,
00987 wmavoice_dq_lsp16r3, &mul_lsf[2], &base_lsf[2]);
00988 }
00989
01003 static void aw_parse_coords(WMAVoiceContext *s, GetBitContext *gb,
01004 const int *pitch)
01005 {
01006 static const int16_t start_offset[94] = {
01007 -11, -9, -7, -5, -3, -1, 1, 3, 5, 7, 9, 11,
01008 13, 15, 18, 17, 19, 20, 21, 22, 23, 24, 25, 26,
01009 27, 28, 29, 30, 31, 32, 33, 35, 37, 39, 41, 43,
01010 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 67,
01011 69, 71, 73, 75, 77, 79, 81, 83, 85, 87, 89, 91,
01012 93, 95, 97, 99, 101, 103, 105, 107, 109, 111, 113, 115,
01013 117, 119, 121, 123, 125, 127, 129, 131, 133, 135, 137, 139,
01014 141, 143, 145, 147, 149, 151, 153, 155, 157, 159
01015 };
01016 int bits, offset;
01017
01018
01019 s->aw_idx_is_ext = 0;
01020 if ((bits = get_bits(gb, 6)) >= 54) {
01021 s->aw_idx_is_ext = 1;
01022 bits += (bits - 54) * 3 + get_bits(gb, 2);
01023 }
01024
01025
01026
01027 s->aw_pulse_range = FFMIN(pitch[0], pitch[1]) > 32 ? 24 : 16;
01028 for (offset = start_offset[bits]; offset < 0; offset += pitch[0]) ;
01029 s->aw_n_pulses[0] = (pitch[0] - 1 + MAX_FRAMESIZE / 2 - offset) / pitch[0];
01030 s->aw_first_pulse_off[0] = offset - s->aw_pulse_range / 2;
01031 offset += s->aw_n_pulses[0] * pitch[0];
01032 s->aw_n_pulses[1] = (pitch[1] - 1 + MAX_FRAMESIZE - offset) / pitch[1];
01033 s->aw_first_pulse_off[1] = offset - (MAX_FRAMESIZE + s->aw_pulse_range) / 2;
01034
01035
01036
01037
01038 if (start_offset[bits] < MAX_FRAMESIZE / 2) {
01039 while (s->aw_first_pulse_off[1] - pitch[1] + s->aw_pulse_range > 0)
01040 s->aw_first_pulse_off[1] -= pitch[1];
01041 if (start_offset[bits] < 0)
01042 while (s->aw_first_pulse_off[0] - pitch[0] + s->aw_pulse_range > 0)
01043 s->aw_first_pulse_off[0] -= pitch[0];
01044 }
01045 }
01046
01054 static void aw_pulse_set2(WMAVoiceContext *s, GetBitContext *gb,
01055 int block_idx, AMRFixed *fcb)
01056 {
01057 uint16_t use_mask_mem[9];
01058 uint16_t *use_mask = use_mask_mem + 2;
01059
01060
01061
01062
01063
01064
01065
01066 int pulse_off = s->aw_first_pulse_off[block_idx],
01067 pulse_start, n, idx, range, aidx, start_off = 0;
01068
01069
01070 if (s->aw_n_pulses[block_idx] > 0)
01071 while (pulse_off + s->aw_pulse_range < 1)
01072 pulse_off += fcb->pitch_lag;
01073
01074
01075 if (s->aw_n_pulses[0] > 0) {
01076 if (block_idx == 0) {
01077 range = 32;
01078 } else {
01079 range = 8;
01080 if (s->aw_n_pulses[block_idx] > 0)
01081 pulse_off = s->aw_next_pulse_off_cache;
01082 }
01083 } else
01084 range = 16;
01085 pulse_start = s->aw_n_pulses[block_idx] > 0 ? pulse_off - range / 2 : 0;
01086
01087
01088
01089
01090 memset(&use_mask[-2], 0, 2 * sizeof(use_mask[0]));
01091 memset( use_mask, -1, 5 * sizeof(use_mask[0]));
01092 memset(&use_mask[5], 0, 2 * sizeof(use_mask[0]));
01093 if (s->aw_n_pulses[block_idx] > 0)
01094 for (idx = pulse_off; idx < MAX_FRAMESIZE / 2; idx += fcb->pitch_lag) {
01095 int excl_range = s->aw_pulse_range;
01096 uint16_t *use_mask_ptr = &use_mask[idx >> 4];
01097 int first_sh = 16 - (idx & 15);
01098 *use_mask_ptr++ &= 0xFFFFu << first_sh;
01099 excl_range -= first_sh;
01100 if (excl_range >= 16) {
01101 *use_mask_ptr++ = 0;
01102 *use_mask_ptr &= 0xFFFF >> (excl_range - 16);
01103 } else
01104 *use_mask_ptr &= 0xFFFF >> excl_range;
01105 }
01106
01107
01108 aidx = get_bits(gb, s->aw_n_pulses[0] > 0 ? 5 - 2 * block_idx : 4);
01109 for (n = 0; n <= aidx; pulse_start++) {
01110 for (idx = pulse_start; idx < 0; idx += fcb->pitch_lag) ;
01111 if (idx >= MAX_FRAMESIZE / 2) {
01112 if (use_mask[0]) idx = 0x0F;
01113 else if (use_mask[1]) idx = 0x1F;
01114 else if (use_mask[2]) idx = 0x2F;
01115 else if (use_mask[3]) idx = 0x3F;
01116 else if (use_mask[4]) idx = 0x4F;
01117 else return;
01118 idx -= av_log2_16bit(use_mask[idx >> 4]);
01119 }
01120 if (use_mask[idx >> 4] & (0x8000 >> (idx & 15))) {
01121 use_mask[idx >> 4] &= ~(0x8000 >> (idx & 15));
01122 n++;
01123 start_off = idx;
01124 }
01125 }
01126
01127 fcb->x[fcb->n] = start_off;
01128 fcb->y[fcb->n] = get_bits1(gb) ? -1.0 : 1.0;
01129 fcb->n++;
01130
01131
01132 n = (MAX_FRAMESIZE / 2 - start_off) % fcb->pitch_lag;
01133 s->aw_next_pulse_off_cache = n ? fcb->pitch_lag - n : 0;
01134 }
01135
01143 static void aw_pulse_set1(WMAVoiceContext *s, GetBitContext *gb,
01144 int block_idx, AMRFixed *fcb)
01145 {
01146 int val = get_bits(gb, 12 - 2 * (s->aw_idx_is_ext && !block_idx));
01147 float v;
01148
01149 if (s->aw_n_pulses[block_idx] > 0) {
01150 int n, v_mask, i_mask, sh, n_pulses;
01151
01152 if (s->aw_pulse_range == 24) {
01153 n_pulses = 3;
01154 v_mask = 8;
01155 i_mask = 7;
01156 sh = 4;
01157 } else {
01158 n_pulses = 4;
01159 v_mask = 4;
01160 i_mask = 3;
01161 sh = 3;
01162 }
01163
01164 for (n = n_pulses - 1; n >= 0; n--, val >>= sh) {
01165 fcb->y[fcb->n] = (val & v_mask) ? -1.0 : 1.0;
01166 fcb->x[fcb->n] = (val & i_mask) * n_pulses + n +
01167 s->aw_first_pulse_off[block_idx];
01168 while (fcb->x[fcb->n] < 0)
01169 fcb->x[fcb->n] += fcb->pitch_lag;
01170 if (fcb->x[fcb->n] < MAX_FRAMESIZE / 2)
01171 fcb->n++;
01172 }
01173 } else {
01174 int num2 = (val & 0x1FF) >> 1, delta, idx;
01175
01176 if (num2 < 1 * 79) { delta = 1; idx = num2 + 1; }
01177 else if (num2 < 2 * 78) { delta = 3; idx = num2 + 1 - 1 * 77; }
01178 else if (num2 < 3 * 77) { delta = 5; idx = num2 + 1 - 2 * 76; }
01179 else { delta = 7; idx = num2 + 1 - 3 * 75; }
01180 v = (val & 0x200) ? -1.0 : 1.0;
01181
01182 fcb->no_repeat_mask |= 3 << fcb->n;
01183 fcb->x[fcb->n] = idx - delta;
01184 fcb->y[fcb->n] = v;
01185 fcb->x[fcb->n + 1] = idx;
01186 fcb->y[fcb->n + 1] = (val & 1) ? -v : v;
01187 fcb->n += 2;
01188 }
01189 }
01190
01204 static int pRNG(int frame_cntr, int block_num, int block_size)
01205 {
01206
01207
01208
01209
01210
01211
01212
01213
01214
01215
01216 static const unsigned int div_tbl[9][2] = {
01217 { 8332, 3 * 715827883U },
01218 { 4545, 0 * 390451573U },
01219 { 3124, 11 * 268435456U },
01220 { 2380, 15 * 204522253U },
01221 { 1922, 23 * 165191050U },
01222 { 1612, 23 * 138547333U },
01223 { 1388, 27 * 119304648U },
01224 { 1219, 16 * 104755300U },
01225 { 1086, 39 * 93368855U }
01226 };
01227 unsigned int z, y, x = MUL16(block_num, 1877) + frame_cntr;
01228 if (x >= 0xFFFF) x -= 0xFFFF;
01229
01230 y = x - 9 * MULH(477218589, x);
01231 z = (uint16_t) (x * div_tbl[y][0] + UMULH(x, div_tbl[y][1]));
01232
01233 return z % (1000 - block_size);
01234 }
01235
01240 static void synth_block_hardcoded(WMAVoiceContext *s, GetBitContext *gb,
01241 int block_idx, int size,
01242 const struct frame_type_desc *frame_desc,
01243 float *excitation)
01244 {
01245 float gain;
01246 int n, r_idx;
01247
01248 av_assert0(size <= MAX_FRAMESIZE);
01249
01250
01251 if (frame_desc->fcb_type == FCB_TYPE_SILENCE) {
01252 r_idx = pRNG(s->frame_cntr, block_idx, size);
01253 gain = s->silence_gain;
01254 } else {
01255 r_idx = get_bits(gb, 8);
01256 gain = wmavoice_gain_universal[get_bits(gb, 6)];
01257 }
01258
01259
01260 memset(s->gain_pred_err, 0, sizeof(s->gain_pred_err));
01261
01262
01263 for (n = 0; n < size; n++)
01264 excitation[n] = wmavoice_std_codebook[r_idx + n] * gain;
01265 }
01266
01271 static void synth_block_fcb_acb(WMAVoiceContext *s, GetBitContext *gb,
01272 int block_idx, int size,
01273 int block_pitch_sh2,
01274 const struct frame_type_desc *frame_desc,
01275 float *excitation)
01276 {
01277 static const float gain_coeff[6] = {
01278 0.8169, -0.06545, 0.1726, 0.0185, -0.0359, 0.0458
01279 };
01280 float pulses[MAX_FRAMESIZE / 2], pred_err, acb_gain, fcb_gain;
01281 int n, idx, gain_weight;
01282 AMRFixed fcb;
01283
01284 av_assert0(size <= MAX_FRAMESIZE / 2);
01285 memset(pulses, 0, sizeof(*pulses) * size);
01286
01287 fcb.pitch_lag = block_pitch_sh2 >> 2;
01288 fcb.pitch_fac = 1.0;
01289 fcb.no_repeat_mask = 0;
01290 fcb.n = 0;
01291
01292
01293
01294 if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
01295 aw_pulse_set1(s, gb, block_idx, &fcb);
01296 aw_pulse_set2(s, gb, block_idx, &fcb);
01297 } else {
01298 int offset_nbits = 5 - frame_desc->log_n_blocks;
01299
01300 fcb.no_repeat_mask = -1;
01301
01302
01303 for (n = 0; n < 5; n++) {
01304 float sign;
01305 int pos1, pos2;
01306
01307 sign = get_bits1(gb) ? 1.0 : -1.0;
01308 pos1 = get_bits(gb, offset_nbits);
01309 fcb.x[fcb.n] = n + 5 * pos1;
01310 fcb.y[fcb.n++] = sign;
01311 if (n < frame_desc->dbl_pulses) {
01312 pos2 = get_bits(gb, offset_nbits);
01313 fcb.x[fcb.n] = n + 5 * pos2;
01314 fcb.y[fcb.n++] = (pos1 < pos2) ? -sign : sign;
01315 }
01316 }
01317 }
01318 ff_set_fixed_vector(pulses, &fcb, 1.0, size);
01319
01320
01321
01322 idx = get_bits(gb, 7);
01323 fcb_gain = expf(ff_scalarproduct_float_c(s->gain_pred_err, gain_coeff, 6) -
01324 5.2409161640 + wmavoice_gain_codebook_fcb[idx]);
01325 acb_gain = wmavoice_gain_codebook_acb[idx];
01326 pred_err = av_clipf(wmavoice_gain_codebook_fcb[idx],
01327 -2.9957322736 ,
01328 1.6094379124 );
01329
01330 gain_weight = 8 >> frame_desc->log_n_blocks;
01331 memmove(&s->gain_pred_err[gain_weight], s->gain_pred_err,
01332 sizeof(*s->gain_pred_err) * (6 - gain_weight));
01333 for (n = 0; n < gain_weight; n++)
01334 s->gain_pred_err[n] = pred_err;
01335
01336
01337 if (frame_desc->acb_type == ACB_TYPE_ASYMMETRIC) {
01338 int len;
01339 for (n = 0; n < size; n += len) {
01340 int next_idx_sh16;
01341 int abs_idx = block_idx * size + n;
01342 int pitch_sh16 = (s->last_pitch_val << 16) +
01343 s->pitch_diff_sh16 * abs_idx;
01344 int pitch = (pitch_sh16 + 0x6FFF) >> 16;
01345 int idx_sh16 = ((pitch << 16) - pitch_sh16) * 8 + 0x58000;
01346 idx = idx_sh16 >> 16;
01347 if (s->pitch_diff_sh16) {
01348 if (s->pitch_diff_sh16 > 0) {
01349 next_idx_sh16 = (idx_sh16) &~ 0xFFFF;
01350 } else
01351 next_idx_sh16 = (idx_sh16 + 0x10000) &~ 0xFFFF;
01352 len = av_clip((idx_sh16 - next_idx_sh16) / s->pitch_diff_sh16 / 8,
01353 1, size - n);
01354 } else
01355 len = size;
01356
01357 ff_acelp_interpolatef(&excitation[n], &excitation[n - pitch],
01358 wmavoice_ipol1_coeffs, 17,
01359 idx, 9, len);
01360 }
01361 } else {
01362 int block_pitch = block_pitch_sh2 >> 2;
01363 idx = block_pitch_sh2 & 3;
01364 if (idx) {
01365 ff_acelp_interpolatef(excitation, &excitation[-block_pitch],
01366 wmavoice_ipol2_coeffs, 4,
01367 idx, 8, size);
01368 } else
01369 av_memcpy_backptr((uint8_t *) excitation, sizeof(float) * block_pitch,
01370 sizeof(float) * size);
01371 }
01372
01373
01374 ff_weighted_vector_sumf(excitation, excitation, pulses,
01375 acb_gain, fcb_gain, size);
01376 }
01377
01394 static void synth_block(WMAVoiceContext *s, GetBitContext *gb,
01395 int block_idx, int size,
01396 int block_pitch_sh2,
01397 const double *lsps, const double *prev_lsps,
01398 const struct frame_type_desc *frame_desc,
01399 float *excitation, float *synth)
01400 {
01401 double i_lsps[MAX_LSPS];
01402 float lpcs[MAX_LSPS];
01403 float fac;
01404 int n;
01405
01406 if (frame_desc->acb_type == ACB_TYPE_NONE)
01407 synth_block_hardcoded(s, gb, block_idx, size, frame_desc, excitation);
01408 else
01409 synth_block_fcb_acb(s, gb, block_idx, size, block_pitch_sh2,
01410 frame_desc, excitation);
01411
01412
01413 fac = (block_idx + 0.5) / frame_desc->n_blocks;
01414 for (n = 0; n < s->lsps; n++)
01415 i_lsps[n] = cos(prev_lsps[n] + fac * (lsps[n] - prev_lsps[n]));
01416 ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
01417
01418
01419 ff_celp_lp_synthesis_filterf(synth, lpcs, excitation, size, s->lsps);
01420 }
01421
01437 static int synth_frame(AVCodecContext *ctx, GetBitContext *gb, int frame_idx,
01438 float *samples,
01439 const double *lsps, const double *prev_lsps,
01440 float *excitation, float *synth)
01441 {
01442 WMAVoiceContext *s = ctx->priv_data;
01443 int n, n_blocks_x2, log_n_blocks_x2, cur_pitch_val;
01444 int pitch[MAX_BLOCKS], last_block_pitch;
01445
01446
01447 int bd_idx = s->vbm_tree[get_vlc2(gb, frame_type_vlc.table, 6, 3)], block_nsamples;
01448
01449 if (bd_idx < 0) {
01450 av_log(ctx, AV_LOG_ERROR,
01451 "Invalid frame type VLC code, skipping\n");
01452 return -1;
01453 }
01454
01455 block_nsamples = MAX_FRAMESIZE / frame_descs[bd_idx].n_blocks;
01456
01457
01458 if (frame_descs[bd_idx].acb_type == ACB_TYPE_ASYMMETRIC) {
01459
01460
01461
01462
01463 n_blocks_x2 = frame_descs[bd_idx].n_blocks << 1;
01464 log_n_blocks_x2 = frame_descs[bd_idx].log_n_blocks + 1;
01465 cur_pitch_val = s->min_pitch_val + get_bits(gb, s->pitch_nbits);
01466 cur_pitch_val = FFMIN(cur_pitch_val, s->max_pitch_val - 1);
01467 if (s->last_acb_type == ACB_TYPE_NONE ||
01468 20 * abs(cur_pitch_val - s->last_pitch_val) >
01469 (cur_pitch_val + s->last_pitch_val))
01470 s->last_pitch_val = cur_pitch_val;
01471
01472
01473 for (n = 0; n < frame_descs[bd_idx].n_blocks; n++) {
01474 int fac = n * 2 + 1;
01475
01476 pitch[n] = (MUL16(fac, cur_pitch_val) +
01477 MUL16((n_blocks_x2 - fac), s->last_pitch_val) +
01478 frame_descs[bd_idx].n_blocks) >> log_n_blocks_x2;
01479 }
01480
01481
01482 s->pitch_diff_sh16 =
01483 ((cur_pitch_val - s->last_pitch_val) << 16) / MAX_FRAMESIZE;
01484 }
01485
01486
01487 switch (frame_descs[bd_idx].fcb_type) {
01488 case FCB_TYPE_SILENCE:
01489 s->silence_gain = wmavoice_gain_silence[get_bits(gb, 8)];
01490 break;
01491 case FCB_TYPE_AW_PULSES:
01492 aw_parse_coords(s, gb, pitch);
01493 break;
01494 }
01495
01496 for (n = 0; n < frame_descs[bd_idx].n_blocks; n++) {
01497 int bl_pitch_sh2;
01498
01499
01500 switch (frame_descs[bd_idx].acb_type) {
01501 case ACB_TYPE_HAMMING: {
01502
01503
01504
01505
01506
01507 int block_pitch,
01508 t1 = (s->block_conv_table[1] - s->block_conv_table[0]) << 2,
01509 t2 = (s->block_conv_table[2] - s->block_conv_table[1]) << 1,
01510 t3 = s->block_conv_table[3] - s->block_conv_table[2] + 1;
01511
01512 if (n == 0) {
01513 block_pitch = get_bits(gb, s->block_pitch_nbits);
01514 } else
01515 block_pitch = last_block_pitch - s->block_delta_pitch_hrange +
01516 get_bits(gb, s->block_delta_pitch_nbits);
01517
01518 last_block_pitch = av_clip(block_pitch,
01519 s->block_delta_pitch_hrange,
01520 s->block_pitch_range -
01521 s->block_delta_pitch_hrange);
01522
01523
01524 if (block_pitch < t1) {
01525 bl_pitch_sh2 = (s->block_conv_table[0] << 2) + block_pitch;
01526 } else {
01527 block_pitch -= t1;
01528 if (block_pitch < t2) {
01529 bl_pitch_sh2 =
01530 (s->block_conv_table[1] << 2) + (block_pitch << 1);
01531 } else {
01532 block_pitch -= t2;
01533 if (block_pitch < t3) {
01534 bl_pitch_sh2 =
01535 (s->block_conv_table[2] + block_pitch) << 2;
01536 } else
01537 bl_pitch_sh2 = s->block_conv_table[3] << 2;
01538 }
01539 }
01540 pitch[n] = bl_pitch_sh2 >> 2;
01541 break;
01542 }
01543
01544 case ACB_TYPE_ASYMMETRIC: {
01545 bl_pitch_sh2 = pitch[n] << 2;
01546 break;
01547 }
01548
01549 default:
01550 bl_pitch_sh2 = 0;
01551 break;
01552 }
01553
01554 synth_block(s, gb, n, block_nsamples, bl_pitch_sh2,
01555 lsps, prev_lsps, &frame_descs[bd_idx],
01556 &excitation[n * block_nsamples],
01557 &synth[n * block_nsamples]);
01558 }
01559
01560
01561
01562 if (s->do_apf) {
01563 double i_lsps[MAX_LSPS];
01564 float lpcs[MAX_LSPS];
01565
01566 for (n = 0; n < s->lsps; n++)
01567 i_lsps[n] = cos(0.5 * (prev_lsps[n] + lsps[n]));
01568 ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
01569 postfilter(s, synth, samples, 80, lpcs,
01570 &s->zero_exc_pf[s->history_nsamples + MAX_FRAMESIZE * frame_idx],
01571 frame_descs[bd_idx].fcb_type, pitch[0]);
01572
01573 for (n = 0; n < s->lsps; n++)
01574 i_lsps[n] = cos(lsps[n]);
01575 ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
01576 postfilter(s, &synth[80], &samples[80], 80, lpcs,
01577 &s->zero_exc_pf[s->history_nsamples + MAX_FRAMESIZE * frame_idx + 80],
01578 frame_descs[bd_idx].fcb_type, pitch[0]);
01579 } else
01580 memcpy(samples, synth, 160 * sizeof(synth[0]));
01581
01582
01583 s->frame_cntr++;
01584 if (s->frame_cntr >= 0xFFFF) s->frame_cntr -= 0xFFFF;
01585 s->last_acb_type = frame_descs[bd_idx].acb_type;
01586 switch (frame_descs[bd_idx].acb_type) {
01587 case ACB_TYPE_NONE:
01588 s->last_pitch_val = 0;
01589 break;
01590 case ACB_TYPE_ASYMMETRIC:
01591 s->last_pitch_val = cur_pitch_val;
01592 break;
01593 case ACB_TYPE_HAMMING:
01594 s->last_pitch_val = pitch[frame_descs[bd_idx].n_blocks - 1];
01595 break;
01596 }
01597
01598 return 0;
01599 }
01600
01613 static void stabilize_lsps(double *lsps, int num)
01614 {
01615 int n, m, l;
01616
01617
01618
01619
01620 lsps[0] = FFMAX(lsps[0], 0.0015 * M_PI);
01621 for (n = 1; n < num; n++)
01622 lsps[n] = FFMAX(lsps[n], lsps[n - 1] + 0.0125 * M_PI);
01623 lsps[num - 1] = FFMIN(lsps[num - 1], 0.9985 * M_PI);
01624
01625
01626
01627 for (n = 1; n < num; n++) {
01628 if (lsps[n] < lsps[n - 1]) {
01629 for (m = 1; m < num; m++) {
01630 double tmp = lsps[m];
01631 for (l = m - 1; l >= 0; l--) {
01632 if (lsps[l] <= tmp) break;
01633 lsps[l + 1] = lsps[l];
01634 }
01635 lsps[l + 1] = tmp;
01636 }
01637 break;
01638 }
01639 }
01640 }
01641
01651 static int check_bits_for_superframe(GetBitContext *orig_gb,
01652 WMAVoiceContext *s)
01653 {
01654 GetBitContext s_gb, *gb = &s_gb;
01655 int n, need_bits, bd_idx;
01656 const struct frame_type_desc *frame_desc;
01657
01658
01659 init_get_bits(gb, orig_gb->buffer, orig_gb->size_in_bits);
01660 skip_bits_long(gb, get_bits_count(orig_gb));
01661 av_assert1(get_bits_left(gb) == get_bits_left(orig_gb));
01662
01663
01664 if (get_bits_left(gb) < 14)
01665 return 1;
01666 if (!get_bits1(gb))
01667 return -1;
01668 if (get_bits1(gb)) skip_bits(gb, 12);
01669 if (s->has_residual_lsps) {
01670 if (get_bits_left(gb) < s->sframe_lsp_bitsize)
01671 return 1;
01672 skip_bits_long(gb, s->sframe_lsp_bitsize);
01673 }
01674
01675
01676 for (n = 0; n < MAX_FRAMES; n++) {
01677 int aw_idx_is_ext = 0;
01678
01679 if (!s->has_residual_lsps) {
01680 if (get_bits_left(gb) < s->frame_lsp_bitsize) return 1;
01681 skip_bits_long(gb, s->frame_lsp_bitsize);
01682 }
01683 bd_idx = s->vbm_tree[get_vlc2(gb, frame_type_vlc.table, 6, 3)];
01684 if (bd_idx < 0)
01685 return -1;
01686 frame_desc = &frame_descs[bd_idx];
01687 if (frame_desc->acb_type == ACB_TYPE_ASYMMETRIC) {
01688 if (get_bits_left(gb) < s->pitch_nbits)
01689 return 1;
01690 skip_bits_long(gb, s->pitch_nbits);
01691 }
01692 if (frame_desc->fcb_type == FCB_TYPE_SILENCE) {
01693 skip_bits(gb, 8);
01694 } else if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
01695 int tmp = get_bits(gb, 6);
01696 if (tmp >= 0x36) {
01697 skip_bits(gb, 2);
01698 aw_idx_is_ext = 1;
01699 }
01700 }
01701
01702
01703 if (frame_desc->acb_type == ACB_TYPE_HAMMING) {
01704 need_bits = s->block_pitch_nbits +
01705 (frame_desc->n_blocks - 1) * s->block_delta_pitch_nbits;
01706 } else if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
01707 need_bits = 2 * !aw_idx_is_ext;
01708 } else
01709 need_bits = 0;
01710 need_bits += frame_desc->frame_size;
01711 if (get_bits_left(gb) < need_bits)
01712 return 1;
01713 skip_bits_long(gb, need_bits);
01714 }
01715
01716 return 0;
01717 }
01718
01736 static int synth_superframe(AVCodecContext *ctx, int *got_frame_ptr)
01737 {
01738 WMAVoiceContext *s = ctx->priv_data;
01739 GetBitContext *gb = &s->gb, s_gb;
01740 int n, res, n_samples = 480;
01741 double lsps[MAX_FRAMES][MAX_LSPS];
01742 const double *mean_lsf = s->lsps == 16 ?
01743 wmavoice_mean_lsf16[s->lsp_def_mode] : wmavoice_mean_lsf10[s->lsp_def_mode];
01744 float excitation[MAX_SIGNAL_HISTORY + MAX_SFRAMESIZE + 12];
01745 float synth[MAX_LSPS + MAX_SFRAMESIZE];
01746 float *samples;
01747
01748 memcpy(synth, s->synth_history,
01749 s->lsps * sizeof(*synth));
01750 memcpy(excitation, s->excitation_history,
01751 s->history_nsamples * sizeof(*excitation));
01752
01753 if (s->sframe_cache_size > 0) {
01754 gb = &s_gb;
01755 init_get_bits(gb, s->sframe_cache, s->sframe_cache_size);
01756 s->sframe_cache_size = 0;
01757 }
01758
01759 if ((res = check_bits_for_superframe(gb, s)) == 1) {
01760 *got_frame_ptr = 0;
01761 return 1;
01762 }
01763
01764
01765
01766
01767
01768 if (!get_bits1(gb)) {
01769 av_log_missing_feature(ctx, "WMAPro-in-WMAVoice", 1);
01770 return AVERROR_PATCHWELCOME;
01771 }
01772
01773
01774 if (get_bits1(gb)) {
01775 if ((n_samples = get_bits(gb, 12)) > 480) {
01776 av_log(ctx, AV_LOG_ERROR,
01777 "Superframe encodes >480 samples (%d), not allowed\n",
01778 n_samples);
01779 return -1;
01780 }
01781 }
01782
01783 if (s->has_residual_lsps) {
01784 double prev_lsps[MAX_LSPS], a1[MAX_LSPS * 2], a2[MAX_LSPS * 2];
01785
01786 for (n = 0; n < s->lsps; n++)
01787 prev_lsps[n] = s->prev_lsps[n] - mean_lsf[n];
01788
01789 if (s->lsps == 10) {
01790 dequant_lsp10r(gb, lsps[2], prev_lsps, a1, a2, s->lsp_q_mode);
01791 } else
01792 dequant_lsp16r(gb, lsps[2], prev_lsps, a1, a2, s->lsp_q_mode);
01793
01794 for (n = 0; n < s->lsps; n++) {
01795 lsps[0][n] = mean_lsf[n] + (a1[n] - a2[n * 2]);
01796 lsps[1][n] = mean_lsf[n] + (a1[s->lsps + n] - a2[n * 2 + 1]);
01797 lsps[2][n] += mean_lsf[n];
01798 }
01799 for (n = 0; n < 3; n++)
01800 stabilize_lsps(lsps[n], s->lsps);
01801 }
01802
01803
01804 s->frame.nb_samples = 480;
01805 if ((res = ff_get_buffer(ctx, &s->frame)) < 0) {
01806 av_log(ctx, AV_LOG_ERROR, "get_buffer() failed\n");
01807 return res;
01808 }
01809 s->frame.nb_samples = n_samples;
01810 samples = (float *)s->frame.data[0];
01811
01812
01813 for (n = 0; n < 3; n++) {
01814 if (!s->has_residual_lsps) {
01815 int m;
01816
01817 if (s->lsps == 10) {
01818 dequant_lsp10i(gb, lsps[n]);
01819 } else
01820 dequant_lsp16i(gb, lsps[n]);
01821
01822 for (m = 0; m < s->lsps; m++)
01823 lsps[n][m] += mean_lsf[m];
01824 stabilize_lsps(lsps[n], s->lsps);
01825 }
01826
01827 if ((res = synth_frame(ctx, gb, n,
01828 &samples[n * MAX_FRAMESIZE],
01829 lsps[n], n == 0 ? s->prev_lsps : lsps[n - 1],
01830 &excitation[s->history_nsamples + n * MAX_FRAMESIZE],
01831 &synth[s->lsps + n * MAX_FRAMESIZE]))) {
01832 *got_frame_ptr = 0;
01833 return res;
01834 }
01835 }
01836
01837
01838
01839
01840 if (get_bits1(gb)) {
01841 res = get_bits(gb, 4);
01842 skip_bits(gb, 10 * (res + 1));
01843 }
01844
01845 *got_frame_ptr = 1;
01846
01847
01848 memcpy(s->prev_lsps, lsps[2],
01849 s->lsps * sizeof(*s->prev_lsps));
01850 memcpy(s->synth_history, &synth[MAX_SFRAMESIZE],
01851 s->lsps * sizeof(*synth));
01852 memcpy(s->excitation_history, &excitation[MAX_SFRAMESIZE],
01853 s->history_nsamples * sizeof(*excitation));
01854 if (s->do_apf)
01855 memmove(s->zero_exc_pf, &s->zero_exc_pf[MAX_SFRAMESIZE],
01856 s->history_nsamples * sizeof(*s->zero_exc_pf));
01857
01858 return 0;
01859 }
01860
01868 static int parse_packet_header(WMAVoiceContext *s)
01869 {
01870 GetBitContext *gb = &s->gb;
01871 unsigned int res;
01872
01873 if (get_bits_left(gb) < 11)
01874 return 1;
01875 skip_bits(gb, 4);
01876 s->has_residual_lsps = get_bits1(gb);
01877 do {
01878 res = get_bits(gb, 6);
01879
01880 if (get_bits_left(gb) < 6 * (res == 0x3F) + s->spillover_bitsize)
01881 return 1;
01882 } while (res == 0x3F);
01883 s->spillover_nbits = get_bits(gb, s->spillover_bitsize);
01884
01885 return 0;
01886 }
01887
01903 static void copy_bits(PutBitContext *pb,
01904 const uint8_t *data, int size,
01905 GetBitContext *gb, int nbits)
01906 {
01907 int rmn_bytes, rmn_bits;
01908
01909 rmn_bits = rmn_bytes = get_bits_left(gb);
01910 if (rmn_bits < nbits)
01911 return;
01912 if (nbits > pb->size_in_bits - put_bits_count(pb))
01913 return;
01914 rmn_bits &= 7; rmn_bytes >>= 3;
01915 if ((rmn_bits = FFMIN(rmn_bits, nbits)) > 0)
01916 put_bits(pb, rmn_bits, get_bits(gb, rmn_bits));
01917 avpriv_copy_bits(pb, data + size - rmn_bytes,
01918 FFMIN(nbits - rmn_bits, rmn_bytes << 3));
01919 }
01920
01932 static int wmavoice_decode_packet(AVCodecContext *ctx, void *data,
01933 int *got_frame_ptr, AVPacket *avpkt)
01934 {
01935 WMAVoiceContext *s = ctx->priv_data;
01936 GetBitContext *gb = &s->gb;
01937 int size, res, pos;
01938
01939
01940
01941
01942
01943
01944 for (size = avpkt->size; size > ctx->block_align; size -= ctx->block_align);
01945 if (!size) {
01946 *got_frame_ptr = 0;
01947 return 0;
01948 }
01949 init_get_bits(&s->gb, avpkt->data, size << 3);
01950
01951
01952
01953
01954 if (size == ctx->block_align) {
01955 if ((res = parse_packet_header(s)) < 0)
01956 return res;
01957
01958
01959
01960
01961 if (s->spillover_nbits > 0) {
01962 if (s->sframe_cache_size > 0) {
01963 int cnt = get_bits_count(gb);
01964 copy_bits(&s->pb, avpkt->data, size, gb, s->spillover_nbits);
01965 flush_put_bits(&s->pb);
01966 s->sframe_cache_size += s->spillover_nbits;
01967 if ((res = synth_superframe(ctx, got_frame_ptr)) == 0 &&
01968 *got_frame_ptr) {
01969 cnt += s->spillover_nbits;
01970 s->skip_bits_next = cnt & 7;
01971 *(AVFrame *)data = s->frame;
01972 return cnt >> 3;
01973 } else
01974 skip_bits_long (gb, s->spillover_nbits - cnt +
01975 get_bits_count(gb));
01976 } else
01977 skip_bits_long(gb, s->spillover_nbits);
01978 }
01979 } else if (s->skip_bits_next)
01980 skip_bits(gb, s->skip_bits_next);
01981
01982
01983 s->sframe_cache_size = 0;
01984 s->skip_bits_next = 0;
01985 pos = get_bits_left(gb);
01986 if ((res = synth_superframe(ctx, got_frame_ptr)) < 0) {
01987 return res;
01988 } else if (*got_frame_ptr) {
01989 int cnt = get_bits_count(gb);
01990 s->skip_bits_next = cnt & 7;
01991 *(AVFrame *)data = s->frame;
01992 return cnt >> 3;
01993 } else if ((s->sframe_cache_size = pos) > 0) {
01994
01995 init_get_bits(gb, avpkt->data, size << 3);
01996 skip_bits_long(gb, (size << 3) - pos);
01997 av_assert1(get_bits_left(gb) == pos);
01998
01999
02000 init_put_bits(&s->pb, s->sframe_cache, SFRAME_CACHE_MAXSIZE);
02001 copy_bits(&s->pb, avpkt->data, size, gb, s->sframe_cache_size);
02002
02003
02004 }
02005
02006 return size;
02007 }
02008
02009 static av_cold int wmavoice_decode_end(AVCodecContext *ctx)
02010 {
02011 WMAVoiceContext *s = ctx->priv_data;
02012
02013 if (s->do_apf) {
02014 ff_rdft_end(&s->rdft);
02015 ff_rdft_end(&s->irdft);
02016 ff_dct_end(&s->dct);
02017 ff_dct_end(&s->dst);
02018 }
02019
02020 return 0;
02021 }
02022
02023 static av_cold void wmavoice_flush(AVCodecContext *ctx)
02024 {
02025 WMAVoiceContext *s = ctx->priv_data;
02026 int n;
02027
02028 s->postfilter_agc = 0;
02029 s->sframe_cache_size = 0;
02030 s->skip_bits_next = 0;
02031 for (n = 0; n < s->lsps; n++)
02032 s->prev_lsps[n] = M_PI * (n + 1.0) / (s->lsps + 1.0);
02033 memset(s->excitation_history, 0,
02034 sizeof(*s->excitation_history) * MAX_SIGNAL_HISTORY);
02035 memset(s->synth_history, 0,
02036 sizeof(*s->synth_history) * MAX_LSPS);
02037 memset(s->gain_pred_err, 0,
02038 sizeof(s->gain_pred_err));
02039
02040 if (s->do_apf) {
02041 memset(&s->synth_filter_out_buf[MAX_LSPS_ALIGN16 - s->lsps], 0,
02042 sizeof(*s->synth_filter_out_buf) * s->lsps);
02043 memset(s->dcf_mem, 0,
02044 sizeof(*s->dcf_mem) * 2);
02045 memset(s->zero_exc_pf, 0,
02046 sizeof(*s->zero_exc_pf) * s->history_nsamples);
02047 memset(s->denoise_filter_cache, 0, sizeof(s->denoise_filter_cache));
02048 }
02049 }
02050
02051 AVCodec ff_wmavoice_decoder = {
02052 .name = "wmavoice",
02053 .type = AVMEDIA_TYPE_AUDIO,
02054 .id = AV_CODEC_ID_WMAVOICE,
02055 .priv_data_size = sizeof(WMAVoiceContext),
02056 .init = wmavoice_decode_init,
02057 .close = wmavoice_decode_end,
02058 .decode = wmavoice_decode_packet,
02059 .capabilities = CODEC_CAP_SUBFRAMES | CODEC_CAP_DR1,
02060 .flush = wmavoice_flush,
02061 .long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio Voice"),
02062 };