FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
g722enc.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) CMU 1993 Computer Science, Speech Group
3  * Chengxiang Lu and Alex Hauptmann
4  * Copyright (c) 2005 Steve Underwood <steveu at coppice.org>
5  * Copyright (c) 2009 Kenan Gillet
6  * Copyright (c) 2010 Martin Storsjo
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24 
25 /**
26  * @file
27  * G.722 ADPCM audio encoder
28  */
29 
30 #include "libavutil/avassert.h"
31 #include "avcodec.h"
32 #include "internal.h"
33 #include "g722.h"
34 #include "libavutil/common.h"
35 
36 #define FREEZE_INTERVAL 128
37 
38 /* This is an arbitrary value. Allowing insanely large values leads to strange
39  problems, so we limit it to a reasonable value */
40 #define MAX_FRAME_SIZE 32768
41 
42 /* We clip the value of avctx->trellis to prevent data type overflows and
43  undefined behavior. Using larger values is insanely slow anyway. */
44 #define MIN_TRELLIS 0
45 #define MAX_TRELLIS 16
46 
48 {
49  G722Context *c = avctx->priv_data;
50  int i;
51  for (i = 0; i < 2; i++) {
52  av_freep(&c->paths[i]);
53  av_freep(&c->node_buf[i]);
54  av_freep(&c->nodep_buf[i]);
55  }
56  return 0;
57 }
58 
60 {
61  G722Context *c = avctx->priv_data;
62  int ret;
63 
64  if (avctx->channels != 1) {
65  av_log(avctx, AV_LOG_ERROR, "Only mono tracks are allowed.\n");
66  return AVERROR_INVALIDDATA;
67  }
68 
69  c->band[0].scale_factor = 8;
70  c->band[1].scale_factor = 2;
71  c->prev_samples_pos = 22;
72 
73  if (avctx->trellis) {
74  int frontier = 1 << avctx->trellis;
75  int max_paths = frontier * FREEZE_INTERVAL;
76  int i;
77  for (i = 0; i < 2; i++) {
78  c->paths[i] = av_mallocz(max_paths * sizeof(**c->paths));
79  c->node_buf[i] = av_mallocz(2 * frontier * sizeof(**c->node_buf));
80  c->nodep_buf[i] = av_mallocz(2 * frontier * sizeof(**c->nodep_buf));
81  if (!c->paths[i] || !c->node_buf[i] || !c->nodep_buf[i]) {
82  ret = AVERROR(ENOMEM);
83  goto error;
84  }
85  }
86  }
87 
88  if (avctx->frame_size) {
89  /* validate frame size */
90  if (avctx->frame_size & 1 || avctx->frame_size > MAX_FRAME_SIZE) {
91  int new_frame_size;
92 
93  if (avctx->frame_size == 1)
94  new_frame_size = 2;
95  else if (avctx->frame_size > MAX_FRAME_SIZE)
96  new_frame_size = MAX_FRAME_SIZE;
97  else
98  new_frame_size = avctx->frame_size - 1;
99 
100  av_log(avctx, AV_LOG_WARNING, "Requested frame size is not "
101  "allowed. Using %d instead of %d\n", new_frame_size,
102  avctx->frame_size);
103  avctx->frame_size = new_frame_size;
104  }
105  } else {
106  /* This is arbitrary. We use 320 because it's 20ms @ 16kHz, which is
107  a common packet size for VoIP applications */
108  avctx->frame_size = 320;
109  }
110  avctx->initial_padding = 22;
111 
112  if (avctx->trellis) {
113  /* validate trellis */
114  if (avctx->trellis < MIN_TRELLIS || avctx->trellis > MAX_TRELLIS) {
115  int new_trellis = av_clip(avctx->trellis, MIN_TRELLIS, MAX_TRELLIS);
116  av_log(avctx, AV_LOG_WARNING, "Requested trellis value is not "
117  "allowed. Using %d instead of %d\n", new_trellis,
118  avctx->trellis);
119  avctx->trellis = new_trellis;
120  }
121  }
122 
123  return 0;
124 error:
125  g722_encode_close(avctx);
126  return ret;
127 }
128 
129 static const int16_t low_quant[33] = {
130  35, 72, 110, 150, 190, 233, 276, 323,
131  370, 422, 473, 530, 587, 650, 714, 786,
132  858, 940, 1023, 1121, 1219, 1339, 1458, 1612,
133  1765, 1980, 2195, 2557, 2919
134 };
135 
136 static inline void filter_samples(G722Context *c, const int16_t *samples,
137  int *xlow, int *xhigh)
138 {
139  int xout1, xout2;
140  c->prev_samples[c->prev_samples_pos++] = samples[0];
141  c->prev_samples[c->prev_samples_pos++] = samples[1];
142  ff_g722_apply_qmf(c->prev_samples + c->prev_samples_pos - 24, &xout1, &xout2);
143  *xlow = xout1 + xout2 >> 14;
144  *xhigh = xout1 - xout2 >> 14;
146  memmove(c->prev_samples,
147  c->prev_samples + c->prev_samples_pos - 22,
148  22 * sizeof(c->prev_samples[0]));
149  c->prev_samples_pos = 22;
150  }
151 }
152 
153 static inline int encode_high(const struct G722Band *state, int xhigh)
154 {
155  int diff = av_clip_int16(xhigh - state->s_predictor);
156  int pred = 141 * state->scale_factor >> 8;
157  /* = diff >= 0 ? (diff < pred) + 2 : diff >= -pred */
158  return ((diff ^ (diff >> (sizeof(diff)*8-1))) < pred) + 2*(diff >= 0);
159 }
160 
161 static inline int encode_low(const struct G722Band* state, int xlow)
162 {
163  int diff = av_clip_int16(xlow - state->s_predictor);
164  /* = diff >= 0 ? diff : -(diff + 1) */
165  int limit = diff ^ (diff >> (sizeof(diff)*8-1));
166  int i = 0;
167  limit = limit + 1 << 10;
168  if (limit > low_quant[8] * state->scale_factor)
169  i = 9;
170  while (i < 29 && limit > low_quant[i] * state->scale_factor)
171  i++;
172  return (diff < 0 ? (i < 2 ? 63 : 33) : 61) - i;
173 }
174 
175 static void g722_encode_trellis(G722Context *c, int trellis,
176  uint8_t *dst, int nb_samples,
177  const int16_t *samples)
178 {
179  int i, j, k;
180  int frontier = 1 << trellis;
181  struct TrellisNode **nodes[2];
182  struct TrellisNode **nodes_next[2];
183  int pathn[2] = {0, 0}, froze = -1;
184  struct TrellisPath *p[2];
185 
186  for (i = 0; i < 2; i++) {
187  nodes[i] = c->nodep_buf[i];
188  nodes_next[i] = c->nodep_buf[i] + frontier;
189  memset(c->nodep_buf[i], 0, 2 * frontier * sizeof(*c->nodep_buf[i]));
190  nodes[i][0] = c->node_buf[i] + frontier;
191  nodes[i][0]->ssd = 0;
192  nodes[i][0]->path = 0;
193  nodes[i][0]->state = c->band[i];
194  }
195 
196  for (i = 0; i < nb_samples >> 1; i++) {
197  int xlow, xhigh;
198  struct TrellisNode *next[2];
199  int heap_pos[2] = {0, 0};
200 
201  for (j = 0; j < 2; j++) {
202  next[j] = c->node_buf[j] + frontier*(i & 1);
203  memset(nodes_next[j], 0, frontier * sizeof(**nodes_next));
204  }
205 
206  filter_samples(c, &samples[2*i], &xlow, &xhigh);
207 
208  for (j = 0; j < frontier && nodes[0][j]; j++) {
209  /* Only k >> 2 affects the future adaptive state, therefore testing
210  * small steps that don't change k >> 2 is useless, the original
211  * value from encode_low is better than them. Since we step k
212  * in steps of 4, make sure range is a multiple of 4, so that
213  * we don't miss the original value from encode_low. */
214  int range = j < frontier/2 ? 4 : 0;
215  struct TrellisNode *cur_node = nodes[0][j];
216 
217  int ilow = encode_low(&cur_node->state, xlow);
218 
219  for (k = ilow - range; k <= ilow + range && k <= 63; k += 4) {
220  int decoded, dec_diff, pos;
221  uint32_t ssd;
222  struct TrellisNode* node;
223 
224  if (k < 0)
225  continue;
226 
227  decoded = av_clip((cur_node->state.scale_factor *
228  ff_g722_low_inv_quant6[k] >> 10)
229  + cur_node->state.s_predictor, -16384, 16383);
230  dec_diff = xlow - decoded;
231 
232 #define STORE_NODE(index, UPDATE, VALUE)\
233  ssd = cur_node->ssd + dec_diff*dec_diff;\
234  /* Check for wraparound. Using 64 bit ssd counters would \
235  * be simpler, but is slower on x86 32 bit. */\
236  if (ssd < cur_node->ssd)\
237  continue;\
238  if (heap_pos[index] < frontier) {\
239  pos = heap_pos[index]++;\
240  av_assert2(pathn[index] < FREEZE_INTERVAL * frontier);\
241  node = nodes_next[index][pos] = next[index]++;\
242  node->path = pathn[index]++;\
243  } else {\
244  /* Try to replace one of the leaf nodes with the new \
245  * one, but not always testing the same leaf position */\
246  pos = (frontier>>1) + (heap_pos[index] & ((frontier>>1) - 1));\
247  if (ssd >= nodes_next[index][pos]->ssd)\
248  continue;\
249  heap_pos[index]++;\
250  node = nodes_next[index][pos];\
251  }\
252  node->ssd = ssd;\
253  node->state = cur_node->state;\
254  UPDATE;\
255  c->paths[index][node->path].value = VALUE;\
256  c->paths[index][node->path].prev = cur_node->path;\
257  /* Sift the newly inserted node up in the heap to restore \
258  * the heap property */\
259  while (pos > 0) {\
260  int parent = (pos - 1) >> 1;\
261  if (nodes_next[index][parent]->ssd <= ssd)\
262  break;\
263  FFSWAP(struct TrellisNode*, nodes_next[index][parent],\
264  nodes_next[index][pos]);\
265  pos = parent;\
266  }
267  STORE_NODE(0, ff_g722_update_low_predictor(&node->state, k >> 2), k);
268  }
269  }
270 
271  for (j = 0; j < frontier && nodes[1][j]; j++) {
272  int ihigh;
273  struct TrellisNode *cur_node = nodes[1][j];
274 
275  /* We don't try to get any initial guess for ihigh via
276  * encode_high - since there's only 4 possible values, test
277  * them all. Testing all of these gives a much, much larger
278  * gain than testing a larger range around ilow. */
279  for (ihigh = 0; ihigh < 4; ihigh++) {
280  int dhigh, decoded, dec_diff, pos;
281  uint32_t ssd;
282  struct TrellisNode* node;
283 
284  dhigh = cur_node->state.scale_factor *
285  ff_g722_high_inv_quant[ihigh] >> 10;
286  decoded = av_clip(dhigh + cur_node->state.s_predictor,
287  -16384, 16383);
288  dec_diff = xhigh - decoded;
289 
290  STORE_NODE(1, ff_g722_update_high_predictor(&node->state, dhigh, ihigh), ihigh);
291  }
292  }
293 
294  for (j = 0; j < 2; j++) {
295  FFSWAP(struct TrellisNode**, nodes[j], nodes_next[j]);
296 
297  if (nodes[j][0]->ssd > (1 << 16)) {
298  for (k = 1; k < frontier && nodes[j][k]; k++)
299  nodes[j][k]->ssd -= nodes[j][0]->ssd;
300  nodes[j][0]->ssd = 0;
301  }
302  }
303 
304  if (i == froze + FREEZE_INTERVAL) {
305  p[0] = &c->paths[0][nodes[0][0]->path];
306  p[1] = &c->paths[1][nodes[1][0]->path];
307  for (j = i; j > froze; j--) {
308  dst[j] = p[1]->value << 6 | p[0]->value;
309  p[0] = &c->paths[0][p[0]->prev];
310  p[1] = &c->paths[1][p[1]->prev];
311  }
312  froze = i;
313  pathn[0] = pathn[1] = 0;
314  memset(nodes[0] + 1, 0, (frontier - 1)*sizeof(**nodes));
315  memset(nodes[1] + 1, 0, (frontier - 1)*sizeof(**nodes));
316  }
317  }
318 
319  p[0] = &c->paths[0][nodes[0][0]->path];
320  p[1] = &c->paths[1][nodes[1][0]->path];
321  for (j = i; j > froze; j--) {
322  dst[j] = p[1]->value << 6 | p[0]->value;
323  p[0] = &c->paths[0][p[0]->prev];
324  p[1] = &c->paths[1][p[1]->prev];
325  }
326  c->band[0] = nodes[0][0]->state;
327  c->band[1] = nodes[1][0]->state;
328 }
329 
331  const int16_t *samples)
332 {
333  int xlow, xhigh, ilow, ihigh;
334  filter_samples(c, samples, &xlow, &xhigh);
335  ihigh = encode_high(&c->band[1], xhigh);
336  ilow = encode_low (&c->band[0], xlow);
338  ff_g722_high_inv_quant[ihigh] >> 10, ihigh);
339  ff_g722_update_low_predictor(&c->band[0], ilow >> 2);
340  *dst = ihigh << 6 | ilow;
341 }
342 
344  uint8_t *dst, int nb_samples,
345  const int16_t *samples)
346 {
347  int i;
348  for (i = 0; i < nb_samples; i += 2)
349  encode_byte(c, dst++, &samples[i]);
350 }
351 
352 static int g722_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
353  const AVFrame *frame, int *got_packet_ptr)
354 {
355  G722Context *c = avctx->priv_data;
356  const int16_t *samples = (const int16_t *)frame->data[0];
357  int nb_samples, out_size, ret;
358 
359  out_size = (frame->nb_samples + 1) / 2;
360  if ((ret = ff_alloc_packet2(avctx, avpkt, out_size)) < 0)
361  return ret;
362 
363  nb_samples = frame->nb_samples - (frame->nb_samples & 1);
364 
365  if (avctx->trellis)
366  g722_encode_trellis(c, avctx->trellis, avpkt->data, nb_samples, samples);
367  else
368  g722_encode_no_trellis(c, avpkt->data, nb_samples, samples);
369 
370  /* handle last frame with odd frame_size */
371  if (nb_samples < frame->nb_samples) {
372  int16_t last_samples[2] = { samples[nb_samples], samples[nb_samples] };
373  encode_byte(c, &avpkt->data[nb_samples >> 1], last_samples);
374  }
375 
376  if (frame->pts != AV_NOPTS_VALUE)
377  avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->initial_padding);
378  *got_packet_ptr = 1;
379  return 0;
380 }
381 
383  .name = "g722",
384  .long_name = NULL_IF_CONFIG_SMALL("G.722 ADPCM"),
385  .type = AVMEDIA_TYPE_AUDIO,
387  .priv_data_size = sizeof(G722Context),
390  .encode2 = g722_encode_frame,
391  .capabilities = CODEC_CAP_SMALL_LAST_FRAME,
392  .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,
394 };