FFmpeg
aacpsdsp.c
Go to the documentation of this file.
1 /*
2  * This file is part of FFmpeg.
3  *
4  * FFmpeg is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation; either version 2 of the License, or
7  * (at your option) any later version.
8  *
9  * FFmpeg is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License along
15  * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
16  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
17  */
18 
19 #include "libavcodec/aacpsdsp.h"
20 #include "libavutil/intfloat.h"
21 
22 #include "checkasm.h"
23 
24 #define N 32
25 #define STRIDE 128
26 #define BUF_SIZE (N * STRIDE)
27 
28 #define randomize(buf, len) do { \
29  int i; \
30  for (i = 0; i < len; i++) { \
31  const INTFLOAT f = (INTFLOAT)rnd() / UINT_MAX; \
32  (buf)[i] = f; \
33  } \
34 } while (0)
35 
36 #define EPS 0.005
37 
38 static void clear_less_significant_bits(INTFLOAT *buf, int len, int bits)
39 {
40  int i;
41  for (i = 0; i < len; i++) {
42  union av_intfloat32 u = { .f = buf[i] };
43  u.i &= (0xffffffff << bits);
44  buf[i] = u.f;
45  }
46 }
47 
48 static void test_add_squares(void)
49 {
53 
54  declare_func(void, INTFLOAT *dst,
55  const INTFLOAT (*src)[2], int n);
56 
57  randomize((INTFLOAT *)src, BUF_SIZE * 2);
58  randomize(dst0, BUF_SIZE);
59  memcpy(dst1, dst0, BUF_SIZE * sizeof(INTFLOAT));
60  call_ref(dst0, src, BUF_SIZE);
61  call_new(dst1, src, BUF_SIZE);
62  if (!float_near_abs_eps_array(dst0, dst1, EPS, BUF_SIZE))
63  fail();
64  bench_new(dst1, src, BUF_SIZE);
65 }
66 
67 static void test_mul_pair_single(void)
68 {
69  LOCAL_ALIGNED_16(INTFLOAT, dst0, [BUF_SIZE], [2]);
70  LOCAL_ALIGNED_16(INTFLOAT, dst1, [BUF_SIZE], [2]);
73 
74  declare_func(void, INTFLOAT (*dst)[2],
75  INTFLOAT (*src0)[2], INTFLOAT *src1, int n);
76 
78  randomize(src1, BUF_SIZE);
79  call_ref(dst0, src0, src1, BUF_SIZE);
80  call_new(dst1, src0, src1, BUF_SIZE);
81  if (!float_near_abs_eps_array((float *)dst0, (float *)dst1, EPS, BUF_SIZE * 2))
82  fail();
83  bench_new(dst1, src0, src1, BUF_SIZE);
84 }
85 
86 static void test_hybrid_analysis(void)
87 {
88  LOCAL_ALIGNED_16(INTFLOAT, dst0, [BUF_SIZE], [2]);
89  LOCAL_ALIGNED_16(INTFLOAT, dst1, [BUF_SIZE], [2]);
90  LOCAL_ALIGNED_16(INTFLOAT, in, [13], [2]);
91  LOCAL_ALIGNED_16(INTFLOAT, filter, [N], [8][2]);
92 
93  declare_func(void, INTFLOAT (*out)[2], INTFLOAT (*in)[2],
94  const INTFLOAT (*filter)[8][2],
95  ptrdiff_t stride, int n);
96 
97  randomize((INTFLOAT *)in, 13 * 2);
98  randomize((INTFLOAT *)filter, N * 8 * 2);
99 
100  randomize((INTFLOAT *)dst0, BUF_SIZE * 2);
101  memcpy(dst1, dst0, BUF_SIZE * 2 * sizeof(INTFLOAT));
102 
103  call_ref(dst0, in, filter, STRIDE, N);
104  call_new(dst1, in, filter, STRIDE, N);
105 
106  if (!float_near_abs_eps_array((float *)dst0, (float *)dst1, EPS, BUF_SIZE * 2))
107  fail();
108  bench_new(dst1, in, filter, STRIDE, N);
109 }
110 
112 {
113  LOCAL_ALIGNED_16(INTFLOAT, in, [2], [38][64]);
114  LOCAL_ALIGNED_16(INTFLOAT, out0, [91], [32][2]);
115  LOCAL_ALIGNED_16(INTFLOAT, out1, [91], [32][2]);
116 
117  declare_func(void, INTFLOAT (*out)[32][2], INTFLOAT L[2][38][64],
118  int i, int len);
119 
120  randomize((INTFLOAT *)out0, 91 * 32 * 2);
121  randomize((INTFLOAT *)in, 2 * 38 * 64);
122  memcpy(out1, out0, 91 * 32 * 2 * sizeof(INTFLOAT));
123 
124  /* len is hardcoded to 32 as that's the only value used in
125  libavcodec. asm functions are likely to be optimized
126  hardcoding this value in their loops and could fail with
127  anything else.
128  i is hardcoded to the two values currently used by the
129  aac decoder because the arm neon implementation is
130  micro-optimized for them and will fail for almost every
131  other value. */
132  call_ref(out0, in, 3, 32);
133  call_new(out1, in, 3, 32);
134 
135  /* the function just moves data around, so memcmp is enough */
136  if (memcmp(out0, out1, 91 * 32 * 2 * sizeof(INTFLOAT)))
137  fail();
138 
139  call_ref(out0, in, 5, 32);
140  call_new(out1, in, 5, 32);
141 
142  if (memcmp(out0, out1, 91 * 32 * 2 * sizeof(INTFLOAT)))
143  fail();
144 
145  bench_new(out1, in, 3, 32);
146 }
147 
149 {
150  LOCAL_ALIGNED_16(INTFLOAT, out0, [2], [38][64]);
151  LOCAL_ALIGNED_16(INTFLOAT, out1, [2], [38][64]);
152  LOCAL_ALIGNED_16(INTFLOAT, in, [91], [32][2]);
153 
154  declare_func(void, INTFLOAT out[2][38][64], INTFLOAT (*in)[32][2],
155  int i, int len);
156 
157  randomize((INTFLOAT *)in, 91 * 32 * 2);
158  randomize((INTFLOAT *)out0, 2 * 38 * 64);
159  memcpy(out1, out0, 2 * 38 * 64 * sizeof(INTFLOAT));
160 
161  /* len is hardcoded to 32 as that's the only value used in
162  libavcodec. asm functions are likely to be optimized
163  hardcoding this value in their loops and could fail with
164  anything else.
165  i is hardcoded to the two values currently used by the
166  aac decoder because the arm neon implementation is
167  micro-optimized for them and will fail for almost every
168  other value. */
169  call_ref(out0, in, 3, 32);
170  call_new(out1, in, 3, 32);
171 
172  /* the function just moves data around, so memcmp is enough */
173  if (memcmp(out0, out1, 2 * 38 * 64 * sizeof(INTFLOAT)))
174  fail();
175 
176  call_ref(out0, in, 5, 32);
177  call_new(out1, in, 5, 32);
178 
179  if (memcmp(out0, out1, 2 * 38 * 64 * sizeof(INTFLOAT)))
180  fail();
181 
182  bench_new(out1, in, 3, 32);
183 }
184 
186 {
187  int i;
188  LOCAL_ALIGNED_16(INTFLOAT, l, [BUF_SIZE], [2]);
190  LOCAL_ALIGNED_16(INTFLOAT, l0, [BUF_SIZE], [2]);
191  LOCAL_ALIGNED_16(INTFLOAT, r0, [BUF_SIZE], [2]);
192  LOCAL_ALIGNED_16(INTFLOAT, l1, [BUF_SIZE], [2]);
193  LOCAL_ALIGNED_16(INTFLOAT, r1, [BUF_SIZE], [2]);
194  LOCAL_ALIGNED_16(INTFLOAT, h, [2], [4]);
195  LOCAL_ALIGNED_16(INTFLOAT, h_step, [2], [4]);
196 
197  declare_func(void, INTFLOAT (*l)[2], INTFLOAT (*r)[2],
198  INTFLOAT h[2][4], INTFLOAT h_step[2][4], int len);
199 
200  randomize((INTFLOAT *)l, BUF_SIZE * 2);
201  randomize((INTFLOAT *)r, BUF_SIZE * 2);
202 
203  for (i = 0; i < 2; i++) {
204  if (check_func(psdsp->stereo_interpolate[i], "ps_stereo_interpolate%s", i ? "_ipdopd" : "")) {
205  memcpy(l0, l, BUF_SIZE * 2 * sizeof(INTFLOAT));
206  memcpy(l1, l, BUF_SIZE * 2 * sizeof(INTFLOAT));
207  memcpy(r0, r, BUF_SIZE * 2 * sizeof(INTFLOAT));
208  memcpy(r1, r, BUF_SIZE * 2 * sizeof(INTFLOAT));
209 
210  randomize((INTFLOAT *)h, 2 * 4);
211  randomize((INTFLOAT *)h_step, 2 * 4);
212  // Clear the least significant 14 bits of h_step, to avoid
213  // divergence when accumulating h_step BUF_SIZE times into
214  // a float variable which may or may not have extra intermediate
215  // precision. Therefore clear roughly log2(BUF_SIZE) less
216  // significant bits, to get the same result regardless of any
217  // extra precision in the accumulator.
218  clear_less_significant_bits((INTFLOAT *)h_step, 2 * 4, 14);
219 
220  call_ref(l0, r0, h, h_step, BUF_SIZE);
221  call_new(l1, r1, h, h_step, BUF_SIZE);
222  if (!float_near_abs_eps_array((float *)l0, (float *)l1, EPS, BUF_SIZE * 2) ||
223  !float_near_abs_eps_array((float *)r0, (float *)r1, EPS, BUF_SIZE * 2))
224  fail();
225 
226  memcpy(l1, l, BUF_SIZE * 2 * sizeof(INTFLOAT));
227  memcpy(r1, r, BUF_SIZE * 2 * sizeof(INTFLOAT));
228  bench_new(l1, r1, h, h_step, BUF_SIZE);
229  }
230  }
231 }
232 
234 {
235  PSDSPContext psdsp;
236 
237  ff_psdsp_init(&psdsp);
238 
239  if (check_func(psdsp.add_squares, "ps_add_squares"))
241  report("add_squares");
242 
243  if (check_func(psdsp.mul_pair_single, "ps_mul_pair_single"))
245  report("mul_pair_single");
246 
247  if (check_func(psdsp.hybrid_analysis, "ps_hybrid_analysis"))
249  report("hybrid_analysis");
250 
251  if (check_func(psdsp.hybrid_analysis_ileave, "ps_hybrid_analysis_ileave"))
253  report("hybrid_analysis_ileave");
254 
255  if (check_func(psdsp.hybrid_synthesis_deint, "ps_hybrid_synthesis_deint"))
257  report("hybrid_synthesis_deint");
258 
259  test_stereo_interpolate(&psdsp);
260  report("stereo_interpolate");
261 }
void AAC_RENAME() ff_psdsp_init(PSDSPContext *s)
void(* mul_pair_single)(INTFLOAT(*dst)[2], INTFLOAT(*src0)[2], INTFLOAT *src1, int n)
Definition: aacpsdsp.h:34
int float_near_abs_eps_array(const float *a, const float *b, float eps, unsigned len)
Definition: checkasm.c:325
#define EPS
Definition: aacpsdsp.c:36
#define N
Definition: aacpsdsp.c:24
void(* hybrid_synthesis_deint)(INTFLOAT out[2][38][64], INTFLOAT(*in)[32][2], int i, int len)
Definition: aacpsdsp.h:41
#define report
Definition: checkasm.h:126
void checkasm_check_aacpsdsp(void)
Definition: aacpsdsp.c:233
float INTFLOAT
Definition: aac_defines.h:86
static void test_hybrid_synthesis_deint(void)
Definition: aacpsdsp.c:148
void(* add_squares)(INTFLOAT *dst, const INTFLOAT(*src)[2], int n)
Definition: aacpsdsp.h:33
#define src
Definition: vp8dsp.c:254
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce then the filter should push the output frames on the output link immediately As an exception to the previous rule if the input frame is enough to produce several output frames then the filter needs output only at least one per link The additional frames can be left buffered in the filter
#define declare_func(ret,...)
Definition: checkasm.h:118
const char * r
Definition: vf_curves.c:114
void(* hybrid_analysis)(INTFLOAT(*out)[2], INTFLOAT(*in)[2], const INTFLOAT(*filter)[8][2], ptrdiff_t stride, int n)
Definition: aacpsdsp.h:36
uint8_t bits
Definition: vp3data.h:202
#define STRIDE
Definition: aacpsdsp.c:25
#define fail()
Definition: checkasm.h:123
static void clear_less_significant_bits(INTFLOAT *buf, int len, int bits)
Definition: aacpsdsp.c:38
void(* stereo_interpolate[2])(INTFLOAT(*l)[2], INTFLOAT(*r)[2], INTFLOAT h[2][4], INTFLOAT h_step[2][4], int len)
Definition: aacpsdsp.h:49
uint32_t i
Definition: intfloat.h:28
#define L(x)
Definition: vp56_arith.h:36
#define call_ref(...)
Definition: checkasm.h:129
static void test_hybrid_analysis_ileave(void)
Definition: aacpsdsp.c:111
static void test_add_squares(void)
Definition: aacpsdsp.c:48
static void test_mul_pair_single(void)
Definition: aacpsdsp.c:67
#define src1
Definition: h264pred.c:139
#define randomize(buf, len)
Definition: aacpsdsp.c:28
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi-0x80)*(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi-0x80)*(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(const int16_t *) pi >> 8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t,*(const int16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t,*(const int16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(const int32_t *) pi >> 24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t,*(const int32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t,*(const int32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(const float *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(const float *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(const float *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(const double *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(const double *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(const double *) pi *(1U<< 31))))#define SET_CONV_FUNC_GROUP(ofmt, ifmt) static void set_generic_function(AudioConvert *ac){}void ff_audio_convert_free(AudioConvert **ac){if(!*ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);}AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt, int channels, int sample_rate, int apply_map){AudioConvert *ac;int in_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) return NULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method!=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt) > 2){ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc){av_free(ac);return NULL;}return ac;}in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar){ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar?ac->channels:1;}else if(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;else ac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);return ac;}int ff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in){int use_generic=1;int len=in->nb_samples;int p;if(ac->dc){av_log(ac->avr, AV_LOG_TRACE,"%d samples - audio_convert: %s to %s (dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));return ff_convert_dither(ac-> in
#define check_func(func,...)
Definition: checkasm.h:114
#define BUF_SIZE
Definition: aacpsdsp.c:26
#define src0
Definition: h264pred.c:138
GLint GLenum GLboolean GLsizei stride
Definition: opengl_enc.c:104
void(* hybrid_analysis_ileave)(INTFLOAT(*out)[32][2], INTFLOAT L[2][38][64], int i, int len)
Definition: aacpsdsp.h:39
static void test_stereo_interpolate(PSDSPContext *psdsp)
Definition: aacpsdsp.c:185
int len
#define bench_new(...)
Definition: checkasm.h:261
static void test_hybrid_analysis(void)
Definition: aacpsdsp.c:86
FILE * out
Definition: movenc.c:54
#define LOCAL_ALIGNED_16(t, v,...)
Definition: internal.h:131
#define call_new(...)
Definition: checkasm.h:201
int i
Definition: input.c:406