FFmpeg
g722enc.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) CMU 1993 Computer Science, Speech Group
3  * Chengxiang Lu and Alex Hauptmann
4  * Copyright (c) 2005 Steve Underwood <steveu at coppice.org>
5  * Copyright (c) 2009 Kenan Gillet
6  * Copyright (c) 2010 Martin Storsjo
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24 
25 /**
26  * @file
27  * G.722 ADPCM audio encoder
28  */
29 
30 #include "libavutil/avassert.h"
31 #include "avcodec.h"
32 #include "internal.h"
33 #include "g722.h"
34 #include "libavutil/common.h"
35 
36 #define FREEZE_INTERVAL 128
37 
38 /* This is an arbitrary value. Allowing insanely large values leads to strange
39  problems, so we limit it to a reasonable value */
40 #define MAX_FRAME_SIZE 32768
41 
42 /* We clip the value of avctx->trellis to prevent data type overflows and
43  undefined behavior. Using larger values is insanely slow anyway. */
44 #define MIN_TRELLIS 0
45 #define MAX_TRELLIS 16
46 
48 {
49  G722Context *c = avctx->priv_data;
50  int i;
51  for (i = 0; i < 2; i++) {
52  av_freep(&c->paths[i]);
53  av_freep(&c->node_buf[i]);
54  av_freep(&c->nodep_buf[i]);
55  }
56  return 0;
57 }
58 
60 {
61  G722Context *c = avctx->priv_data;
62 
63  c->band[0].scale_factor = 8;
64  c->band[1].scale_factor = 2;
65  c->prev_samples_pos = 22;
66 
67  if (avctx->frame_size) {
68  /* validate frame size */
69  if (avctx->frame_size & 1 || avctx->frame_size > MAX_FRAME_SIZE) {
70  int new_frame_size;
71 
72  if (avctx->frame_size == 1)
73  new_frame_size = 2;
74  else if (avctx->frame_size > MAX_FRAME_SIZE)
75  new_frame_size = MAX_FRAME_SIZE;
76  else
77  new_frame_size = avctx->frame_size - 1;
78 
79  av_log(avctx, AV_LOG_WARNING, "Requested frame size is not "
80  "allowed. Using %d instead of %d\n", new_frame_size,
81  avctx->frame_size);
82  avctx->frame_size = new_frame_size;
83  }
84  } else {
85  /* This is arbitrary. We use 320 because it's 20ms @ 16kHz, which is
86  a common packet size for VoIP applications */
87  avctx->frame_size = 320;
88  }
89  avctx->initial_padding = 22;
90 
91  if (avctx->trellis) {
92  /* validate trellis */
93  if (avctx->trellis < MIN_TRELLIS || avctx->trellis > MAX_TRELLIS) {
94  int new_trellis = av_clip(avctx->trellis, MIN_TRELLIS, MAX_TRELLIS);
95  av_log(avctx, AV_LOG_WARNING, "Requested trellis value is not "
96  "allowed. Using %d instead of %d\n", new_trellis,
97  avctx->trellis);
98  avctx->trellis = new_trellis;
99  }
100  if (avctx->trellis) {
101  int frontier = 1 << avctx->trellis;
102  int max_paths = frontier * FREEZE_INTERVAL;
103 
104  for (int i = 0; i < 2; i++) {
105  c->paths[i] = av_calloc(max_paths, sizeof(**c->paths));
106  c->node_buf[i] = av_calloc(frontier, 2 * sizeof(**c->node_buf));
107  c->nodep_buf[i] = av_calloc(frontier, 2 * sizeof(**c->nodep_buf));
108  if (!c->paths[i] || !c->node_buf[i] || !c->nodep_buf[i])
109  return AVERROR(ENOMEM);
110  }
111  }
112  }
113 
114  ff_g722dsp_init(&c->dsp);
115 
116  return 0;
117 }
118 
119 static const int16_t low_quant[33] = {
120  35, 72, 110, 150, 190, 233, 276, 323,
121  370, 422, 473, 530, 587, 650, 714, 786,
122  858, 940, 1023, 1121, 1219, 1339, 1458, 1612,
123  1765, 1980, 2195, 2557, 2919
124 };
125 
126 static inline void filter_samples(G722Context *c, const int16_t *samples,
127  int *xlow, int *xhigh)
128 {
129  int xout[2];
130  c->prev_samples[c->prev_samples_pos++] = samples[0];
131  c->prev_samples[c->prev_samples_pos++] = samples[1];
132  c->dsp.apply_qmf(c->prev_samples + c->prev_samples_pos - 24, xout);
133  *xlow = xout[0] + xout[1] >> 14;
134  *xhigh = xout[0] - xout[1] >> 14;
136  memmove(c->prev_samples,
137  c->prev_samples + c->prev_samples_pos - 22,
138  22 * sizeof(c->prev_samples[0]));
139  c->prev_samples_pos = 22;
140  }
141 }
142 
143 static inline int encode_high(const struct G722Band *state, int xhigh)
144 {
145  int diff = av_clip_int16(xhigh - state->s_predictor);
146  int pred = 141 * state->scale_factor >> 8;
147  /* = diff >= 0 ? (diff < pred) + 2 : diff >= -pred */
148  return ((diff ^ (diff >> (sizeof(diff)*8-1))) < pred) + 2*(diff >= 0);
149 }
150 
151 static inline int encode_low(const struct G722Band* state, int xlow)
152 {
153  int diff = av_clip_int16(xlow - state->s_predictor);
154  /* = diff >= 0 ? diff : -(diff + 1) */
155  int limit = diff ^ (diff >> (sizeof(diff)*8-1));
156  int i = 0;
157  limit = limit + 1 << 10;
158  if (limit > low_quant[8] * state->scale_factor)
159  i = 9;
160  while (i < 29 && limit > low_quant[i] * state->scale_factor)
161  i++;
162  return (diff < 0 ? (i < 2 ? 63 : 33) : 61) - i;
163 }
164 
165 static void g722_encode_trellis(G722Context *c, int trellis,
166  uint8_t *dst, int nb_samples,
167  const int16_t *samples)
168 {
169  int i, j, k;
170  int frontier = 1 << trellis;
171  struct TrellisNode **nodes[2];
172  struct TrellisNode **nodes_next[2];
173  int pathn[2] = {0, 0}, froze = -1;
174  struct TrellisPath *p[2];
175 
176  for (i = 0; i < 2; i++) {
177  nodes[i] = c->nodep_buf[i];
178  nodes_next[i] = c->nodep_buf[i] + frontier;
179  memset(c->nodep_buf[i], 0, 2 * frontier * sizeof(*c->nodep_buf[i]));
180  nodes[i][0] = c->node_buf[i] + frontier;
181  nodes[i][0]->ssd = 0;
182  nodes[i][0]->path = 0;
183  nodes[i][0]->state = c->band[i];
184  }
185 
186  for (i = 0; i < nb_samples >> 1; i++) {
187  int xlow, xhigh;
188  struct TrellisNode *next[2];
189  int heap_pos[2] = {0, 0};
190 
191  for (j = 0; j < 2; j++) {
192  next[j] = c->node_buf[j] + frontier*(i & 1);
193  memset(nodes_next[j], 0, frontier * sizeof(**nodes_next));
194  }
195 
196  filter_samples(c, &samples[2*i], &xlow, &xhigh);
197 
198  for (j = 0; j < frontier && nodes[0][j]; j++) {
199  /* Only k >> 2 affects the future adaptive state, therefore testing
200  * small steps that don't change k >> 2 is useless, the original
201  * value from encode_low is better than them. Since we step k
202  * in steps of 4, make sure range is a multiple of 4, so that
203  * we don't miss the original value from encode_low. */
204  int range = j < frontier/2 ? 4 : 0;
205  struct TrellisNode *cur_node = nodes[0][j];
206 
207  int ilow = encode_low(&cur_node->state, xlow);
208 
209  for (k = ilow - range; k <= ilow + range && k <= 63; k += 4) {
210  int decoded, dec_diff, pos;
211  uint32_t ssd;
212  struct TrellisNode* node;
213 
214  if (k < 0)
215  continue;
216 
217  decoded = av_clip_intp2((cur_node->state.scale_factor *
218  ff_g722_low_inv_quant6[k] >> 10)
219  + cur_node->state.s_predictor, 14);
220  dec_diff = xlow - decoded;
221 
222 #define STORE_NODE(index, UPDATE, VALUE)\
223  ssd = cur_node->ssd + dec_diff*dec_diff;\
224  /* Check for wraparound. Using 64 bit ssd counters would \
225  * be simpler, but is slower on x86 32 bit. */\
226  if (ssd < cur_node->ssd)\
227  continue;\
228  if (heap_pos[index] < frontier) {\
229  pos = heap_pos[index]++;\
230  av_assert2(pathn[index] < FREEZE_INTERVAL * frontier);\
231  node = nodes_next[index][pos] = next[index]++;\
232  node->path = pathn[index]++;\
233  } else {\
234  /* Try to replace one of the leaf nodes with the new \
235  * one, but not always testing the same leaf position */\
236  pos = (frontier>>1) + (heap_pos[index] & ((frontier>>1) - 1));\
237  if (ssd >= nodes_next[index][pos]->ssd)\
238  continue;\
239  heap_pos[index]++;\
240  node = nodes_next[index][pos];\
241  }\
242  node->ssd = ssd;\
243  node->state = cur_node->state;\
244  UPDATE;\
245  c->paths[index][node->path].value = VALUE;\
246  c->paths[index][node->path].prev = cur_node->path;\
247  /* Sift the newly inserted node up in the heap to restore \
248  * the heap property */\
249  while (pos > 0) {\
250  int parent = (pos - 1) >> 1;\
251  if (nodes_next[index][parent]->ssd <= ssd)\
252  break;\
253  FFSWAP(struct TrellisNode*, nodes_next[index][parent],\
254  nodes_next[index][pos]);\
255  pos = parent;\
256  }
257  STORE_NODE(0, ff_g722_update_low_predictor(&node->state, k >> 2), k);
258  }
259  }
260 
261  for (j = 0; j < frontier && nodes[1][j]; j++) {
262  int ihigh;
263  struct TrellisNode *cur_node = nodes[1][j];
264 
265  /* We don't try to get any initial guess for ihigh via
266  * encode_high - since there's only 4 possible values, test
267  * them all. Testing all of these gives a much, much larger
268  * gain than testing a larger range around ilow. */
269  for (ihigh = 0; ihigh < 4; ihigh++) {
270  int dhigh, decoded, dec_diff, pos;
271  uint32_t ssd;
272  struct TrellisNode* node;
273 
274  dhigh = cur_node->state.scale_factor *
275  ff_g722_high_inv_quant[ihigh] >> 10;
276  decoded = av_clip_intp2(dhigh + cur_node->state.s_predictor, 14);
277  dec_diff = xhigh - decoded;
278 
279  STORE_NODE(1, ff_g722_update_high_predictor(&node->state, dhigh, ihigh), ihigh);
280  }
281  }
282 
283  for (j = 0; j < 2; j++) {
284  FFSWAP(struct TrellisNode**, nodes[j], nodes_next[j]);
285 
286  if (nodes[j][0]->ssd > (1 << 16)) {
287  for (k = 1; k < frontier && nodes[j][k]; k++)
288  nodes[j][k]->ssd -= nodes[j][0]->ssd;
289  nodes[j][0]->ssd = 0;
290  }
291  }
292 
293  if (i == froze + FREEZE_INTERVAL) {
294  p[0] = &c->paths[0][nodes[0][0]->path];
295  p[1] = &c->paths[1][nodes[1][0]->path];
296  for (j = i; j > froze; j--) {
297  dst[j] = p[1]->value << 6 | p[0]->value;
298  p[0] = &c->paths[0][p[0]->prev];
299  p[1] = &c->paths[1][p[1]->prev];
300  }
301  froze = i;
302  pathn[0] = pathn[1] = 0;
303  memset(nodes[0] + 1, 0, (frontier - 1)*sizeof(**nodes));
304  memset(nodes[1] + 1, 0, (frontier - 1)*sizeof(**nodes));
305  }
306  }
307 
308  p[0] = &c->paths[0][nodes[0][0]->path];
309  p[1] = &c->paths[1][nodes[1][0]->path];
310  for (j = i; j > froze; j--) {
311  dst[j] = p[1]->value << 6 | p[0]->value;
312  p[0] = &c->paths[0][p[0]->prev];
313  p[1] = &c->paths[1][p[1]->prev];
314  }
315  c->band[0] = nodes[0][0]->state;
316  c->band[1] = nodes[1][0]->state;
317 }
318 
320  const int16_t *samples)
321 {
322  int xlow, xhigh, ilow, ihigh;
323  filter_samples(c, samples, &xlow, &xhigh);
324  ihigh = encode_high(&c->band[1], xhigh);
325  ilow = encode_low (&c->band[0], xlow);
327  ff_g722_high_inv_quant[ihigh] >> 10, ihigh);
328  ff_g722_update_low_predictor(&c->band[0], ilow >> 2);
329  *dst = ihigh << 6 | ilow;
330 }
331 
333  uint8_t *dst, int nb_samples,
334  const int16_t *samples)
335 {
336  int i;
337  for (i = 0; i < nb_samples; i += 2)
338  encode_byte(c, dst++, &samples[i]);
339 }
340 
341 static int g722_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
342  const AVFrame *frame, int *got_packet_ptr)
343 {
344  G722Context *c = avctx->priv_data;
345  const int16_t *samples = (const int16_t *)frame->data[0];
346  int nb_samples, out_size, ret;
347 
348  out_size = (frame->nb_samples + 1) / 2;
349  if ((ret = ff_alloc_packet2(avctx, avpkt, out_size, 0)) < 0)
350  return ret;
351 
352  nb_samples = frame->nb_samples - (frame->nb_samples & 1);
353 
354  if (avctx->trellis)
355  g722_encode_trellis(c, avctx->trellis, avpkt->data, nb_samples, samples);
356  else
357  g722_encode_no_trellis(c, avpkt->data, nb_samples, samples);
358 
359  /* handle last frame with odd frame_size */
360  if (nb_samples < frame->nb_samples) {
361  int16_t last_samples[2] = { samples[nb_samples], samples[nb_samples] };
362  encode_byte(c, &avpkt->data[nb_samples >> 1], last_samples);
363  }
364 
365  if (frame->pts != AV_NOPTS_VALUE)
366  avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->initial_padding);
367  *got_packet_ptr = 1;
368  return 0;
369 }
370 
372  .name = "g722",
373  .long_name = NULL_IF_CONFIG_SMALL("G.722 ADPCM"),
374  .type = AVMEDIA_TYPE_AUDIO,
376  .priv_data_size = sizeof(G722Context),
378  .close = g722_encode_close,
379  .encode2 = g722_encode_frame,
380  .capabilities = AV_CODEC_CAP_SMALL_LAST_FRAME,
382  .channel_layouts = (const uint64_t[]){ AV_CH_LAYOUT_MONO, 0 },
383  .caps_internal = FF_CODEC_CAP_INIT_CLEANUP,
384 };
#define FF_CODEC_CAP_INIT_CLEANUP
The codec allows calling the close function for deallocation even if the init function returned a fai...
Definition: internal.h:49
struct G722Context::TrellisNode ** nodep_buf[2]
int path
Definition: adpcmenc.c:47
This structure describes decoded (raw) audio or video data.
Definition: frame.h:318
#define AV_LOG_WARNING
Something somehow does not look correct.
Definition: log.h:200
static av_cold int init(AVCodecContext *avctx)
Definition: avrndec.c:31
#define av_clip_intp2
Definition: common.h:143
struct G722Context::TrellisPath * paths[2]
int out_size
Definition: movenc.c:55
static void filter_samples(G722Context *c, const int16_t *samples, int *xlow, int *xhigh)
Definition: g722enc.c:126
#define MIN_TRELLIS
Definition: g722enc.c:44
AVCodec.
Definition: codec.h:197
static void g722_encode_no_trellis(G722Context *c, uint8_t *dst, int nb_samples, const int16_t *samples)
Definition: g722enc.c:332
void * av_calloc(size_t nmemb, size_t size)
Non-inlined equivalent of av_mallocz_array().
Definition: mem.c:245
void(* apply_qmf)(const int16_t *prev_samples, int xout[2])
Definition: g722dsp.h:27
int ff_alloc_packet2(AVCodecContext *avctx, AVPacket *avpkt, int64_t size, int64_t min_size)
Check AVPacket size and/or allocate data.
Definition: encode.c:33
static int encode_high(const struct G722Band *state, int xhigh)
Definition: g722enc.c:143
uint8_t
#define av_cold
Definition: attributes.h:88
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
#define PREV_SAMPLES_BUF_SIZE
Definition: g722.h:32
int64_t pts
Presentation timestamp in time_base units (time when frame should be shown to user).
Definition: frame.h:411
static av_cold int g722_encode_init(AVCodecContext *avctx)
Definition: g722enc.c:59
#define av_clip
Definition: common.h:122
uint8_t * data
Definition: packet.h:369
static struct @321 state
#define av_log(a,...)
uint32_t ssd
Definition: adpcmenc.c:46
struct G722Context::TrellisNode * node_buf[2]
const int16_t ff_g722_low_inv_quant6[64]
Definition: g722.c:63
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification. ...
Definition: internal.h:117
unsigned int pos
Definition: spdifenc.c:412
int initial_padding
Audio only.
Definition: avcodec.h:2062
#define av_clip_int16
Definition: common.h:137
int16_t prev_samples[PREV_SAMPLES_BUF_SIZE]
memory of past decoded samples
Definition: g722.h:37
simple assert() macros that are a bit more flexible than ISO C assert().
AVCodec ff_adpcm_g722_encoder
Definition: g722enc.c:371
const char * name
Name of the codec implementation.
Definition: codec.h:204
#define FREEZE_INTERVAL
Definition: g722enc.c:36
struct G722Context::G722Band band[2]
#define AV_CODEC_CAP_SMALL_LAST_FRAME
Codec can be fed a final frame with a smaller size.
Definition: codec.h:82
static void g722_encode_trellis(G722Context *c, int trellis, uint8_t *dst, int nb_samples, const int16_t *samples)
Definition: g722enc.c:165
#define MAX_FRAME_SIZE
Definition: g722enc.c:40
static av_cold int g722_encode_close(AVCodecContext *avctx)
Definition: g722enc.c:47
static int encode_low(const struct G722Band *state, int xlow)
Definition: g722enc.c:151
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several the filter must be ready for frames arriving randomly on any input any filter with several inputs will most likely require some kind of queuing mechanism It is perfectly acceptable to have a limited queue and to drop frames when the inputs are too unbalanced request_frame For filters that do not use the this method is called when a frame is wanted on an output For a it should directly call filter_frame on the corresponding output For a if there are queued frames already one of these frames should be pushed If the filter should request a frame on one of its repeatedly until at least one frame has been pushed Return or at least make progress towards producing a frame
void ff_g722_update_low_predictor(struct G722Band *band, const int ilow)
Definition: g722.c:143
static const float pred[4]
Definition: siprdata.h:259
G722DSPContext dsp
Definition: g722.h:66
int frame_size
Number of samples per channel in an audio frame.
Definition: avcodec.h:1216
static const int16_t low_quant[33]
Definition: g722enc.c:119
Libavcodec external API header.
AVSampleFormat
Audio sample formats.
Definition: samplefmt.h:58
main external API structure.
Definition: avcodec.h:536
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:332
common internal api header.
common internal and external API header
signed 16 bits
Definition: samplefmt.h:61
int prev_samples_pos
the number of values in prev_samples
Definition: g722.h:38
int trellis
trellis RD quantization
Definition: avcodec.h:1487
void * priv_data
Definition: avcodec.h:563
#define STORE_NODE(index, UPDATE, VALUE)
static av_always_inline int diff(const uint32_t a, const uint32_t b)
const int16_t ff_g722_high_inv_quant[4]
Definition: g722.c:51
av_cold void ff_g722dsp_init(G722DSPContext *c)
Definition: g722dsp.c:68
static av_always_inline void encode_byte(G722Context *c, uint8_t *dst, const int16_t *samples)
Definition: g722enc.c:319
static enum AVSampleFormat sample_fmts[]
Definition: adpcmenc.c:941
Filter the word “frame” indicates either a video frame or a group of audio samples
#define av_freep(p)
static int g722_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, const AVFrame *frame, int *got_packet_ptr)
Definition: g722enc.c:341
#define av_always_inline
Definition: attributes.h:45
static av_always_inline int64_t ff_samples_to_time_base(AVCodecContext *avctx, int64_t samples)
Rescale from sample rate to AVCodecContext.time_base.
Definition: internal.h:272
#define MAX_TRELLIS
Definition: g722enc.c:45
#define FFSWAP(type, a, b)
Definition: common.h:108
void ff_g722_update_high_predictor(struct G722Band *band, const int dhigh, const int ihigh)
Definition: g722.c:154
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later.That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another.Frame references ownership and permissions
#define AV_CH_LAYOUT_MONO
This structure stores compressed data.
Definition: packet.h:346
int16_t scale_factor
delayed quantizer scale factor
Definition: g722.h:52
int nb_samples
number of audio samples (per channel) described by this frame
Definition: frame.h:384
int64_t pts
Presentation timestamp in AVStream->time_base units; the time at which the decompressed packet will b...
Definition: packet.h:362
int i
Definition: input.c:407
#define AV_NOPTS_VALUE
Undefined timestamp value.
Definition: avutil.h:248