FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
g722enc.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) CMU 1993 Computer Science, Speech Group
3  * Chengxiang Lu and Alex Hauptmann
4  * Copyright (c) 2005 Steve Underwood <steveu at coppice.org>
5  * Copyright (c) 2009 Kenan Gillet
6  * Copyright (c) 2010 Martin Storsjo
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24 
25 /**
26  * @file
27  * G.722 ADPCM audio encoder
28  */
29 
30 #include "libavutil/avassert.h"
31 #include "avcodec.h"
32 #include "internal.h"
33 #include "g722.h"
34 #include "libavutil/common.h"
35 
36 #define FREEZE_INTERVAL 128
37 
38 /* This is an arbitrary value. Allowing insanely large values leads to strange
39  problems, so we limit it to a reasonable value */
40 #define MAX_FRAME_SIZE 32768
41 
42 /* We clip the value of avctx->trellis to prevent data type overflows and
43  undefined behavior. Using larger values is insanely slow anyway. */
44 #define MIN_TRELLIS 0
45 #define MAX_TRELLIS 16
46 
48 {
49  G722Context *c = avctx->priv_data;
50  int i;
51  for (i = 0; i < 2; i++) {
52  av_freep(&c->paths[i]);
53  av_freep(&c->node_buf[i]);
54  av_freep(&c->nodep_buf[i]);
55  }
56  return 0;
57 }
58 
60 {
61  G722Context *c = avctx->priv_data;
62  int ret;
63 
64  if (avctx->channels != 1) {
65  av_log(avctx, AV_LOG_ERROR, "Only mono tracks are allowed.\n");
66  return AVERROR_INVALIDDATA;
67  }
68 
69  c->band[0].scale_factor = 8;
70  c->band[1].scale_factor = 2;
71  c->prev_samples_pos = 22;
72 
73  if (avctx->trellis) {
74  int frontier = 1 << avctx->trellis;
75  int max_paths = frontier * FREEZE_INTERVAL;
76  int i;
77  for (i = 0; i < 2; i++) {
78  c->paths[i] = av_mallocz_array(max_paths, sizeof(**c->paths));
79  c->node_buf[i] = av_mallocz_array(frontier, 2 * sizeof(**c->node_buf));
80  c->nodep_buf[i] = av_mallocz_array(frontier, 2 * sizeof(**c->nodep_buf));
81  if (!c->paths[i] || !c->node_buf[i] || !c->nodep_buf[i]) {
82  ret = AVERROR(ENOMEM);
83  goto error;
84  }
85  }
86  }
87 
88  if (avctx->frame_size) {
89  /* validate frame size */
90  if (avctx->frame_size & 1 || avctx->frame_size > MAX_FRAME_SIZE) {
91  int new_frame_size;
92 
93  if (avctx->frame_size == 1)
94  new_frame_size = 2;
95  else if (avctx->frame_size > MAX_FRAME_SIZE)
96  new_frame_size = MAX_FRAME_SIZE;
97  else
98  new_frame_size = avctx->frame_size - 1;
99 
100  av_log(avctx, AV_LOG_WARNING, "Requested frame size is not "
101  "allowed. Using %d instead of %d\n", new_frame_size,
102  avctx->frame_size);
103  avctx->frame_size = new_frame_size;
104  }
105  } else {
106  /* This is arbitrary. We use 320 because it's 20ms @ 16kHz, which is
107  a common packet size for VoIP applications */
108  avctx->frame_size = 320;
109  }
110  avctx->initial_padding = 22;
111 
112  if (avctx->trellis) {
113  /* validate trellis */
114  if (avctx->trellis < MIN_TRELLIS || avctx->trellis > MAX_TRELLIS) {
115  int new_trellis = av_clip(avctx->trellis, MIN_TRELLIS, MAX_TRELLIS);
116  av_log(avctx, AV_LOG_WARNING, "Requested trellis value is not "
117  "allowed. Using %d instead of %d\n", new_trellis,
118  avctx->trellis);
119  avctx->trellis = new_trellis;
120  }
121  }
122 
123  ff_g722dsp_init(&c->dsp);
124 
125  return 0;
126 error:
127  g722_encode_close(avctx);
128  return ret;
129 }
130 
131 static const int16_t low_quant[33] = {
132  35, 72, 110, 150, 190, 233, 276, 323,
133  370, 422, 473, 530, 587, 650, 714, 786,
134  858, 940, 1023, 1121, 1219, 1339, 1458, 1612,
135  1765, 1980, 2195, 2557, 2919
136 };
137 
138 static inline void filter_samples(G722Context *c, const int16_t *samples,
139  int *xlow, int *xhigh)
140 {
141  int xout[2];
142  c->prev_samples[c->prev_samples_pos++] = samples[0];
143  c->prev_samples[c->prev_samples_pos++] = samples[1];
144  c->dsp.apply_qmf(c->prev_samples + c->prev_samples_pos - 24, xout);
145  *xlow = xout[0] + xout[1] >> 14;
146  *xhigh = xout[0] - xout[1] >> 14;
148  memmove(c->prev_samples,
149  c->prev_samples + c->prev_samples_pos - 22,
150  22 * sizeof(c->prev_samples[0]));
151  c->prev_samples_pos = 22;
152  }
153 }
154 
155 static inline int encode_high(const struct G722Band *state, int xhigh)
156 {
157  int diff = av_clip_int16(xhigh - state->s_predictor);
158  int pred = 141 * state->scale_factor >> 8;
159  /* = diff >= 0 ? (diff < pred) + 2 : diff >= -pred */
160  return ((diff ^ (diff >> (sizeof(diff)*8-1))) < pred) + 2*(diff >= 0);
161 }
162 
163 static inline int encode_low(const struct G722Band* state, int xlow)
164 {
165  int diff = av_clip_int16(xlow - state->s_predictor);
166  /* = diff >= 0 ? diff : -(diff + 1) */
167  int limit = diff ^ (diff >> (sizeof(diff)*8-1));
168  int i = 0;
169  limit = limit + 1 << 10;
170  if (limit > low_quant[8] * state->scale_factor)
171  i = 9;
172  while (i < 29 && limit > low_quant[i] * state->scale_factor)
173  i++;
174  return (diff < 0 ? (i < 2 ? 63 : 33) : 61) - i;
175 }
176 
177 static void g722_encode_trellis(G722Context *c, int trellis,
178  uint8_t *dst, int nb_samples,
179  const int16_t *samples)
180 {
181  int i, j, k;
182  int frontier = 1 << trellis;
183  struct TrellisNode **nodes[2];
184  struct TrellisNode **nodes_next[2];
185  int pathn[2] = {0, 0}, froze = -1;
186  struct TrellisPath *p[2];
187 
188  for (i = 0; i < 2; i++) {
189  nodes[i] = c->nodep_buf[i];
190  nodes_next[i] = c->nodep_buf[i] + frontier;
191  memset(c->nodep_buf[i], 0, 2 * frontier * sizeof(*c->nodep_buf[i]));
192  nodes[i][0] = c->node_buf[i] + frontier;
193  nodes[i][0]->ssd = 0;
194  nodes[i][0]->path = 0;
195  nodes[i][0]->state = c->band[i];
196  }
197 
198  for (i = 0; i < nb_samples >> 1; i++) {
199  int xlow, xhigh;
200  struct TrellisNode *next[2];
201  int heap_pos[2] = {0, 0};
202 
203  for (j = 0; j < 2; j++) {
204  next[j] = c->node_buf[j] + frontier*(i & 1);
205  memset(nodes_next[j], 0, frontier * sizeof(**nodes_next));
206  }
207 
208  filter_samples(c, &samples[2*i], &xlow, &xhigh);
209 
210  for (j = 0; j < frontier && nodes[0][j]; j++) {
211  /* Only k >> 2 affects the future adaptive state, therefore testing
212  * small steps that don't change k >> 2 is useless, the original
213  * value from encode_low is better than them. Since we step k
214  * in steps of 4, make sure range is a multiple of 4, so that
215  * we don't miss the original value from encode_low. */
216  int range = j < frontier/2 ? 4 : 0;
217  struct TrellisNode *cur_node = nodes[0][j];
218 
219  int ilow = encode_low(&cur_node->state, xlow);
220 
221  for (k = ilow - range; k <= ilow + range && k <= 63; k += 4) {
222  int decoded, dec_diff, pos;
223  uint32_t ssd;
224  struct TrellisNode* node;
225 
226  if (k < 0)
227  continue;
228 
229  decoded = av_clip_intp2((cur_node->state.scale_factor *
230  ff_g722_low_inv_quant6[k] >> 10)
231  + cur_node->state.s_predictor, 14);
232  dec_diff = xlow - decoded;
233 
234 #define STORE_NODE(index, UPDATE, VALUE)\
235  ssd = cur_node->ssd + dec_diff*dec_diff;\
236  /* Check for wraparound. Using 64 bit ssd counters would \
237  * be simpler, but is slower on x86 32 bit. */\
238  if (ssd < cur_node->ssd)\
239  continue;\
240  if (heap_pos[index] < frontier) {\
241  pos = heap_pos[index]++;\
242  av_assert2(pathn[index] < FREEZE_INTERVAL * frontier);\
243  node = nodes_next[index][pos] = next[index]++;\
244  node->path = pathn[index]++;\
245  } else {\
246  /* Try to replace one of the leaf nodes with the new \
247  * one, but not always testing the same leaf position */\
248  pos = (frontier>>1) + (heap_pos[index] & ((frontier>>1) - 1));\
249  if (ssd >= nodes_next[index][pos]->ssd)\
250  continue;\
251  heap_pos[index]++;\
252  node = nodes_next[index][pos];\
253  }\
254  node->ssd = ssd;\
255  node->state = cur_node->state;\
256  UPDATE;\
257  c->paths[index][node->path].value = VALUE;\
258  c->paths[index][node->path].prev = cur_node->path;\
259  /* Sift the newly inserted node up in the heap to restore \
260  * the heap property */\
261  while (pos > 0) {\
262  int parent = (pos - 1) >> 1;\
263  if (nodes_next[index][parent]->ssd <= ssd)\
264  break;\
265  FFSWAP(struct TrellisNode*, nodes_next[index][parent],\
266  nodes_next[index][pos]);\
267  pos = parent;\
268  }
269  STORE_NODE(0, ff_g722_update_low_predictor(&node->state, k >> 2), k);
270  }
271  }
272 
273  for (j = 0; j < frontier && nodes[1][j]; j++) {
274  int ihigh;
275  struct TrellisNode *cur_node = nodes[1][j];
276 
277  /* We don't try to get any initial guess for ihigh via
278  * encode_high - since there's only 4 possible values, test
279  * them all. Testing all of these gives a much, much larger
280  * gain than testing a larger range around ilow. */
281  for (ihigh = 0; ihigh < 4; ihigh++) {
282  int dhigh, decoded, dec_diff, pos;
283  uint32_t ssd;
284  struct TrellisNode* node;
285 
286  dhigh = cur_node->state.scale_factor *
287  ff_g722_high_inv_quant[ihigh] >> 10;
288  decoded = av_clip_intp2(dhigh + cur_node->state.s_predictor, 14);
289  dec_diff = xhigh - decoded;
290 
291  STORE_NODE(1, ff_g722_update_high_predictor(&node->state, dhigh, ihigh), ihigh);
292  }
293  }
294 
295  for (j = 0; j < 2; j++) {
296  FFSWAP(struct TrellisNode**, nodes[j], nodes_next[j]);
297 
298  if (nodes[j][0]->ssd > (1 << 16)) {
299  for (k = 1; k < frontier && nodes[j][k]; k++)
300  nodes[j][k]->ssd -= nodes[j][0]->ssd;
301  nodes[j][0]->ssd = 0;
302  }
303  }
304 
305  if (i == froze + FREEZE_INTERVAL) {
306  p[0] = &c->paths[0][nodes[0][0]->path];
307  p[1] = &c->paths[1][nodes[1][0]->path];
308  for (j = i; j > froze; j--) {
309  dst[j] = p[1]->value << 6 | p[0]->value;
310  p[0] = &c->paths[0][p[0]->prev];
311  p[1] = &c->paths[1][p[1]->prev];
312  }
313  froze = i;
314  pathn[0] = pathn[1] = 0;
315  memset(nodes[0] + 1, 0, (frontier - 1)*sizeof(**nodes));
316  memset(nodes[1] + 1, 0, (frontier - 1)*sizeof(**nodes));
317  }
318  }
319 
320  p[0] = &c->paths[0][nodes[0][0]->path];
321  p[1] = &c->paths[1][nodes[1][0]->path];
322  for (j = i; j > froze; j--) {
323  dst[j] = p[1]->value << 6 | p[0]->value;
324  p[0] = &c->paths[0][p[0]->prev];
325  p[1] = &c->paths[1][p[1]->prev];
326  }
327  c->band[0] = nodes[0][0]->state;
328  c->band[1] = nodes[1][0]->state;
329 }
330 
332  const int16_t *samples)
333 {
334  int xlow, xhigh, ilow, ihigh;
335  filter_samples(c, samples, &xlow, &xhigh);
336  ihigh = encode_high(&c->band[1], xhigh);
337  ilow = encode_low (&c->band[0], xlow);
339  ff_g722_high_inv_quant[ihigh] >> 10, ihigh);
340  ff_g722_update_low_predictor(&c->band[0], ilow >> 2);
341  *dst = ihigh << 6 | ilow;
342 }
343 
345  uint8_t *dst, int nb_samples,
346  const int16_t *samples)
347 {
348  int i;
349  for (i = 0; i < nb_samples; i += 2)
350  encode_byte(c, dst++, &samples[i]);
351 }
352 
353 static int g722_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
354  const AVFrame *frame, int *got_packet_ptr)
355 {
356  G722Context *c = avctx->priv_data;
357  const int16_t *samples = (const int16_t *)frame->data[0];
358  int nb_samples, out_size, ret;
359 
360  out_size = (frame->nb_samples + 1) / 2;
361  if ((ret = ff_alloc_packet2(avctx, avpkt, out_size, 0)) < 0)
362  return ret;
363 
364  nb_samples = frame->nb_samples - (frame->nb_samples & 1);
365 
366  if (avctx->trellis)
367  g722_encode_trellis(c, avctx->trellis, avpkt->data, nb_samples, samples);
368  else
369  g722_encode_no_trellis(c, avpkt->data, nb_samples, samples);
370 
371  /* handle last frame with odd frame_size */
372  if (nb_samples < frame->nb_samples) {
373  int16_t last_samples[2] = { samples[nb_samples], samples[nb_samples] };
374  encode_byte(c, &avpkt->data[nb_samples >> 1], last_samples);
375  }
376 
377  if (frame->pts != AV_NOPTS_VALUE)
378  avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->initial_padding);
379  *got_packet_ptr = 1;
380  return 0;
381 }
382 
384  .name = "g722",
385  .long_name = NULL_IF_CONFIG_SMALL("G.722 ADPCM"),
386  .type = AVMEDIA_TYPE_AUDIO,
388  .priv_data_size = sizeof(G722Context),
390  .close = g722_encode_close,
391  .encode2 = g722_encode_frame,
392  .capabilities = AV_CODEC_CAP_SMALL_LAST_FRAME,
393  .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,
395 };
#define AVERROR_INVALIDDATA
Invalid data found when processing input.
Definition: error.h:59
struct G722Context::TrellisNode ** nodep_buf[2]
int path
Definition: adpcmenc.c:45
This structure describes decoded (raw) audio or video data.
Definition: frame.h:181
void(* apply_qmf)(const int16_t *prev_samples, int xout[2])
Definition: g722dsp.h:27
#define AV_LOG_WARNING
Something somehow does not look correct.
Definition: log.h:182
static av_cold int init(AVCodecContext *avctx)
Definition: avrndec.c:35
struct G722Context::TrellisPath * paths[2]
static void filter_samples(G722Context *c, const int16_t *samples, int *xlow, int *xhigh)
Definition: g722enc.c:138
#define MIN_TRELLIS
Definition: g722enc.c:44
AVCodec.
Definition: avcodec.h:3392
static void g722_encode_no_trellis(G722Context *c, uint8_t *dst, int nb_samples, const int16_t *samples)
Definition: g722enc.c:344
static int encode_high(const struct G722Band *state, int xhigh)
Definition: g722enc.c:155
uint8_t
#define av_cold
Definition: attributes.h:82
#define PREV_SAMPLES_BUF_SIZE
Definition: g722.h:32
int64_t pts
Presentation timestamp in time_base units (time when frame should be shown to user).
Definition: frame.h:262
static av_cold int g722_encode_init(AVCodecContext *avctx)
Definition: g722enc.c:59
static AVFrame * frame
uint8_t * data
Definition: avcodec.h:1467
#define av_log(a,...)
uint32_t ssd
Definition: adpcmenc.c:44
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:176
struct G722Context::TrellisNode * node_buf[2]
#define AVERROR(e)
Definition: error.h:43
const int16_t ff_g722_low_inv_quant6[64]
Definition: g722.c:63
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification. ...
Definition: internal.h:176
int initial_padding
Audio only.
Definition: avcodec.h:3204
int out_size
Definition: movenc-test.c:55
int16_t prev_samples[PREV_SAMPLES_BUF_SIZE]
memory of past decoded samples
Definition: g722.h:37
simple assert() macros that are a bit more flexible than ISO C assert().
AVCodec ff_adpcm_g722_encoder
Definition: g722enc.c:383
const char * name
Name of the codec implementation.
Definition: avcodec.h:3399
#define FREEZE_INTERVAL
Definition: g722enc.c:36
av_cold void ff_g722dsp_init(G722DSPContext *c)
Definition: g722dsp.c:68
struct G722Context::G722Band band[2]
#define AV_CODEC_CAP_SMALL_LAST_FRAME
Codec can be fed a final frame with a smaller size.
Definition: avcodec.h:886
static void g722_encode_trellis(G722Context *c, int trellis, uint8_t *dst, int nb_samples, const int16_t *samples)
Definition: g722enc.c:177
#define MAX_FRAME_SIZE
Definition: g722enc.c:40
static av_cold int g722_encode_close(AVCodecContext *avctx)
Definition: g722enc.c:47
static int encode_low(const struct G722Band *state, int xlow)
Definition: g722enc.c:163
void ff_g722_update_low_predictor(struct G722Band *band, const int ilow)
Definition: g722.c:143
static const float pred[4]
Definition: siprdata.h:259
G722DSPContext dsp
Definition: g722.h:66
int frame_size
Number of samples per channel in an audio frame.
Definition: avcodec.h:2307
static const int16_t low_quant[33]
Definition: g722enc.c:131
Libavcodec external API header.
AVSampleFormat
Audio sample formats.
Definition: samplefmt.h:59
main external API structure.
Definition: avcodec.h:1532
int ff_alloc_packet2(AVCodecContext *avctx, AVPacket *avpkt, int64_t size, int64_t min_size)
Check AVPacket size and/or allocate data.
Definition: utils.c:1621
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:192
common internal api header.
common internal and external API header
signed 16 bits
Definition: samplefmt.h:62
static double c[64]
int prev_samples_pos
the number of values in prev_samples
Definition: g722.h:38
int trellis
trellis RD quantization
Definition: avcodec.h:2622
void * priv_data
Definition: avcodec.h:1574
#define STORE_NODE(index, UPDATE, VALUE)
static av_always_inline int diff(const uint32_t a, const uint32_t b)
const int16_t ff_g722_high_inv_quant[4]
Definition: g722.c:51
int channels
number of audio channels
Definition: avcodec.h:2288
static av_always_inline void encode_byte(G722Context *c, uint8_t *dst, const int16_t *samples)
Definition: g722enc.c:331
static void * av_mallocz_array(size_t nmemb, size_t size)
Definition: mem.h:229
static enum AVSampleFormat sample_fmts[]
Definition: adpcmenc.c:701
static struct @205 state
#define av_freep(p)
static int g722_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, const AVFrame *frame, int *got_packet_ptr)
Definition: g722enc.c:353
#define av_always_inline
Definition: attributes.h:39
static av_always_inline int64_t ff_samples_to_time_base(AVCodecContext *avctx, int64_t samples)
Rescale from sample rate to AVCodecContext.time_base.
Definition: internal.h:235
#define MAX_TRELLIS
Definition: g722enc.c:45
#define FFSWAP(type, a, b)
Definition: common.h:99
void ff_g722_update_high_predictor(struct G722Band *band, const int dhigh, const int ihigh)
Definition: g722.c:154
This structure stores compressed data.
Definition: avcodec.h:1444
int16_t scale_factor
delayed quantizer scale factor
Definition: g722.h:52
int nb_samples
number of audio samples (per channel) described by this frame
Definition: frame.h:235
int64_t pts
Presentation timestamp in AVStream->time_base units; the time at which the decompressed packet will b...
Definition: avcodec.h:1460
#define AV_NOPTS_VALUE
Undefined timestamp value.
Definition: avutil.h:240