FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
wmaenc.c
Go to the documentation of this file.
1 /*
2  * WMA compatible encoder
3  * Copyright (c) 2007 Michael Niedermayer
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 #include "libavutil/attributes.h"
23 
24 #include "avcodec.h"
25 #include "internal.h"
26 #include "wma.h"
27 #include "libavutil/avassert.h"
28 
29 
31 {
32  WMACodecContext *s = avctx->priv_data;
33  int i, flags1, flags2, block_align;
34  uint8_t *extradata;
35 
36  s->avctx = avctx;
37 
38  if (avctx->channels > MAX_CHANNELS) {
39  av_log(avctx, AV_LOG_ERROR,
40  "too many channels: got %i, need %i or fewer\n",
41  avctx->channels, MAX_CHANNELS);
42  return AVERROR(EINVAL);
43  }
44 
45  if (avctx->sample_rate > 48000) {
46  av_log(avctx, AV_LOG_ERROR, "sample rate is too high: %d > 48kHz\n",
47  avctx->sample_rate);
48  return AVERROR(EINVAL);
49  }
50 
51  if (avctx->bit_rate < 24 * 1000) {
52  av_log(avctx, AV_LOG_ERROR,
53  "bitrate too low: got %i, need 24000 or higher\n",
54  avctx->bit_rate);
55  return AVERROR(EINVAL);
56  }
57 
58  /* extract flag infos */
59  flags1 = 0;
60  flags2 = 1;
61  if (avctx->codec->id == AV_CODEC_ID_WMAV1) {
62  extradata = av_malloc(4);
63  if (!extradata)
64  return AVERROR(ENOMEM);
65  avctx->extradata_size = 4;
66  AV_WL16(extradata, flags1);
67  AV_WL16(extradata + 2, flags2);
68  } else if (avctx->codec->id == AV_CODEC_ID_WMAV2) {
69  extradata = av_mallocz(10);
70  if (!extradata)
71  return AVERROR(ENOMEM);
72  avctx->extradata_size = 10;
73  AV_WL32(extradata, flags1);
74  AV_WL16(extradata + 4, flags2);
75  } else {
76  av_assert0(0);
77  }
78  avctx->extradata = extradata;
79  s->use_exp_vlc = flags2 & 0x0001;
80  s->use_bit_reservoir = flags2 & 0x0002;
81  s->use_variable_block_len = flags2 & 0x0004;
82  if (avctx->channels == 2)
83  s->ms_stereo = 1;
84 
85  ff_wma_init(avctx, flags2);
86 
87  /* init MDCT */
88  for (i = 0; i < s->nb_block_sizes; i++)
89  ff_mdct_init(&s->mdct_ctx[i], s->frame_len_bits - i + 1, 0, 1.0);
90 
91  block_align = avctx->bit_rate * (int64_t) s->frame_len /
92  (avctx->sample_rate * 8);
93  block_align = FFMIN(block_align, MAX_CODED_SUPERFRAME_SIZE);
94  avctx->block_align = block_align;
95  avctx->frame_size = avctx->initial_padding = s->frame_len;
96 
97  return 0;
98 }
99 
101 {
102  WMACodecContext *s = avctx->priv_data;
103  float **audio = (float **) frame->extended_data;
104  int len = frame->nb_samples;
105  int window_index = s->frame_len_bits - s->block_len_bits;
106  FFTContext *mdct = &s->mdct_ctx[window_index];
107  int ch;
108  const float *win = s->windows[window_index];
109  int window_len = 1 << s->block_len_bits;
110  float n = 2.0 * 32768.0 / window_len;
111 
112  for (ch = 0; ch < avctx->channels; ch++) {
113  memcpy(s->output, s->frame_out[ch], window_len * sizeof(*s->output));
114  s->fdsp->vector_fmul_scalar(s->frame_out[ch], audio[ch], n, len);
115  s->fdsp->vector_fmul_reverse(&s->output[window_len], s->frame_out[ch],
116  win, len);
117  s->fdsp->vector_fmul(s->frame_out[ch], s->frame_out[ch], win, len);
118  mdct->mdct_calc(mdct, s->coefs[ch], s->output);
119  }
120 }
121 
122 // FIXME use for decoding too
123 static void init_exp(WMACodecContext *s, int ch, const int *exp_param)
124 {
125  int n;
126  const uint16_t *ptr;
127  float v, *q, max_scale, *q_end;
128 
129  ptr = s->exponent_bands[s->frame_len_bits - s->block_len_bits];
130  q = s->exponents[ch];
131  q_end = q + s->block_len;
132  max_scale = 0;
133  while (q < q_end) {
134  /* XXX: use a table */
135  v = pow(10, *exp_param++ *(1.0 / 16.0));
136  max_scale = FFMAX(max_scale, v);
137  n = *ptr++;
138  do {
139  *q++ = v;
140  } while (--n);
141  }
142  s->max_exponent[ch] = max_scale;
143 }
144 
145 static void encode_exp_vlc(WMACodecContext *s, int ch, const int *exp_param)
146 {
147  int last_exp;
148  const uint16_t *ptr;
149  float *q, *q_end;
150 
151  ptr = s->exponent_bands[s->frame_len_bits - s->block_len_bits];
152  q = s->exponents[ch];
153  q_end = q + s->block_len;
154  if (s->version == 1) {
155  last_exp = *exp_param++;
156  av_assert0(last_exp - 10 >= 0 && last_exp - 10 < 32);
157  put_bits(&s->pb, 5, last_exp - 10);
158  q += *ptr++;
159  } else
160  last_exp = 36;
161  while (q < q_end) {
162  int exp = *exp_param++;
163  int code = exp - last_exp + 60;
164  av_assert1(code >= 0 && code < 120);
167  /* XXX: use a table */
168  q += *ptr++;
169  last_exp = exp;
170  }
171 }
172 
173 static int encode_block(WMACodecContext *s, float (*src_coefs)[BLOCK_MAX_SIZE],
174  int total_gain)
175 {
176  int v, bsize, ch, coef_nb_bits, parse_exponents;
177  float mdct_norm;
178  int nb_coefs[MAX_CHANNELS];
179  static const int fixed_exp[25] = {
180  20, 20, 20, 20, 20,
181  20, 20, 20, 20, 20,
182  20, 20, 20, 20, 20,
183  20, 20, 20, 20, 20,
184  20, 20, 20, 20, 20
185  };
186 
187  // FIXME remove duplication relative to decoder
188  if (s->use_variable_block_len) {
189  av_assert0(0); // FIXME not implemented
190  } else {
191  /* fixed block len */
195  }
196 
197  s->block_len = 1 << s->block_len_bits;
198 // av_assert0((s->block_pos + s->block_len) <= s->frame_len);
199  bsize = s->frame_len_bits - s->block_len_bits;
200 
201  // FIXME factor
202  v = s->coefs_end[bsize] - s->coefs_start;
203  for (ch = 0; ch < s->avctx->channels; ch++)
204  nb_coefs[ch] = v;
205  {
206  int n4 = s->block_len / 2;
207  mdct_norm = 1.0 / (float) n4;
208  if (s->version == 1)
209  mdct_norm *= sqrt(n4);
210  }
211 
212  if (s->avctx->channels == 2)
213  put_bits(&s->pb, 1, !!s->ms_stereo);
214 
215  for (ch = 0; ch < s->avctx->channels; ch++) {
216  // FIXME only set channel_coded when needed, instead of always
217  s->channel_coded[ch] = 1;
218  if (s->channel_coded[ch])
219  init_exp(s, ch, fixed_exp);
220  }
221 
222  for (ch = 0; ch < s->avctx->channels; ch++) {
223  if (s->channel_coded[ch]) {
224  WMACoef *coefs1;
225  float *coefs, *exponents, mult;
226  int i, n;
227 
228  coefs1 = s->coefs1[ch];
229  exponents = s->exponents[ch];
230  mult = pow(10, total_gain * 0.05) / s->max_exponent[ch];
231  mult *= mdct_norm;
232  coefs = src_coefs[ch];
233  if (s->use_noise_coding && 0) {
234  av_assert0(0); // FIXME not implemented
235  } else {
236  coefs += s->coefs_start;
237  n = nb_coefs[ch];
238  for (i = 0; i < n; i++) {
239  double t = *coefs++ / (exponents[i] * mult);
240  if (t < -32768 || t > 32767)
241  return -1;
242 
243  coefs1[i] = lrint(t);
244  }
245  }
246  }
247  }
248 
249  v = 0;
250  for (ch = 0; ch < s->avctx->channels; ch++) {
251  int a = s->channel_coded[ch];
252  put_bits(&s->pb, 1, a);
253  v |= a;
254  }
255 
256  if (!v)
257  return 1;
258 
259  for (v = total_gain - 1; v >= 127; v -= 127)
260  put_bits(&s->pb, 7, 127);
261  put_bits(&s->pb, 7, v);
262 
263  coef_nb_bits = ff_wma_total_gain_to_bits(total_gain);
264 
265  if (s->use_noise_coding) {
266  for (ch = 0; ch < s->avctx->channels; ch++) {
267  if (s->channel_coded[ch]) {
268  int i, n;
269  n = s->exponent_high_sizes[bsize];
270  for (i = 0; i < n; i++) {
271  put_bits(&s->pb, 1, s->high_band_coded[ch][i] = 0);
272  if (0)
273  nb_coefs[ch] -= s->exponent_high_bands[bsize][i];
274  }
275  }
276  }
277  }
278 
279  parse_exponents = 1;
280  if (s->block_len_bits != s->frame_len_bits)
281  put_bits(&s->pb, 1, parse_exponents);
282 
283  if (parse_exponents) {
284  for (ch = 0; ch < s->avctx->channels; ch++) {
285  if (s->channel_coded[ch]) {
286  if (s->use_exp_vlc) {
287  encode_exp_vlc(s, ch, fixed_exp);
288  } else {
289  av_assert0(0); // FIXME not implemented
290 // encode_exp_lsp(s, ch);
291  }
292  }
293  }
294  } else
295  av_assert0(0); // FIXME not implemented
296 
297  for (ch = 0; ch < s->avctx->channels; ch++) {
298  if (s->channel_coded[ch]) {
299  int run, tindex;
300  WMACoef *ptr, *eptr;
301  tindex = (ch == 1 && s->ms_stereo);
302  ptr = &s->coefs1[ch][0];
303  eptr = ptr + nb_coefs[ch];
304 
305  run = 0;
306  for (; ptr < eptr; ptr++) {
307  if (*ptr) {
308  int level = *ptr;
309  int abs_level = FFABS(level);
310  int code = 0;
311  if (abs_level <= s->coef_vlcs[tindex]->max_level)
312  if (run < s->coef_vlcs[tindex]->levels[abs_level - 1])
313  code = run + s->int_table[tindex][abs_level - 1];
314 
315  av_assert2(code < s->coef_vlcs[tindex]->n);
316  put_bits(&s->pb, s->coef_vlcs[tindex]->huffbits[code],
317  s->coef_vlcs[tindex]->huffcodes[code]);
318 
319  if (code == 0) {
320  if (1 << coef_nb_bits <= abs_level)
321  return -1;
322 
323  put_bits(&s->pb, coef_nb_bits, abs_level);
324  put_bits(&s->pb, s->frame_len_bits, run);
325  }
326  // FIXME the sign is flipped somewhere
327  put_bits(&s->pb, 1, level < 0);
328  run = 0;
329  } else
330  run++;
331  }
332  if (run)
333  put_bits(&s->pb, s->coef_vlcs[tindex]->huffbits[1],
334  s->coef_vlcs[tindex]->huffcodes[1]);
335  }
336  if (s->version == 1 && s->avctx->channels >= 2)
338  }
339  return 0;
340 }
341 
342 static int encode_frame(WMACodecContext *s, float (*src_coefs)[BLOCK_MAX_SIZE],
343  uint8_t *buf, int buf_size, int total_gain)
344 {
345  init_put_bits(&s->pb, buf, buf_size);
346 
347  if (s->use_bit_reservoir)
348  av_assert0(0); // FIXME not implemented
349  else if (encode_block(s, src_coefs, total_gain) < 0)
350  return INT_MAX;
351 
353 
354  return put_bits_count(&s->pb) / 8 - s->avctx->block_align;
355 }
356 
357 static int encode_superframe(AVCodecContext *avctx, AVPacket *avpkt,
358  const AVFrame *frame, int *got_packet_ptr)
359 {
360  WMACodecContext *s = avctx->priv_data;
361  int i, total_gain, ret, error;
362 
363  s->block_len_bits = s->frame_len_bits; // required by non variable block len
364  s->block_len = 1 << s->block_len_bits;
365 
366  apply_window_and_mdct(avctx, frame);
367 
368  if (s->ms_stereo) {
369  float a, b;
370  int i;
371 
372  for (i = 0; i < s->block_len; i++) {
373  a = s->coefs[0][i] * 0.5;
374  b = s->coefs[1][i] * 0.5;
375  s->coefs[0][i] = a + b;
376  s->coefs[1][i] = a - b;
377  }
378  }
379 
380  if ((ret = ff_alloc_packet2(avctx, avpkt, 2 * MAX_CODED_SUPERFRAME_SIZE)) < 0)
381  return ret;
382 
383  total_gain = 128;
384  for (i = 64; i; i >>= 1) {
385  error = encode_frame(s, s->coefs, avpkt->data, avpkt->size,
386  total_gain - i);
387  if (error <= 0)
388  total_gain -= i;
389  }
390 
391  while(total_gain <= 128 && error > 0)
392  error = encode_frame(s, s->coefs, avpkt->data, avpkt->size, total_gain++);
393  if (error > 0) {
394  av_log(avctx, AV_LOG_ERROR, "Invalid input data or requested bitrate too low, cannot encode\n");
395  avpkt->size = 0;
396  return AVERROR(EINVAL);
397  }
398  av_assert0((put_bits_count(&s->pb) & 7) == 0);
399  i= avctx->block_align - (put_bits_count(&s->pb)+7)/8;
400  av_assert0(i>=0);
401  while(i--)
402  put_bits(&s->pb, 8, 'N');
403 
404  flush_put_bits(&s->pb);
405  av_assert0(put_bits_ptr(&s->pb) - s->pb.buf == avctx->block_align);
406 
407  if (frame->pts != AV_NOPTS_VALUE)
408  avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->initial_padding);
409 
410  avpkt->size = avctx->block_align;
411  *got_packet_ptr = 1;
412  return 0;
413 }
414 
415 #if CONFIG_WMAV1_ENCODER
416 AVCodec ff_wmav1_encoder = {
417  .name = "wmav1",
418  .long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio 1"),
419  .type = AVMEDIA_TYPE_AUDIO,
420  .id = AV_CODEC_ID_WMAV1,
421  .priv_data_size = sizeof(WMACodecContext),
422  .init = encode_init,
423  .encode2 = encode_superframe,
424  .close = ff_wma_end,
425  .sample_fmts = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
427 };
428 #endif
429 #if CONFIG_WMAV2_ENCODER
430 AVCodec ff_wmav2_encoder = {
431  .name = "wmav2",
432  .long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio 2"),
433  .type = AVMEDIA_TYPE_AUDIO,
434  .id = AV_CODEC_ID_WMAV2,
435  .priv_data_size = sizeof(WMACodecContext),
436  .init = encode_init,
437  .encode2 = encode_superframe,
438  .close = ff_wma_end,
439  .sample_fmts = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
441 };
442 #endif