FFmpeg
aacpsdsp.c
Go to the documentation of this file.
1 /*
2  * This file is part of FFmpeg.
3  *
4  * FFmpeg is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation; either version 2 of the License, or
7  * (at your option) any later version.
8  *
9  * FFmpeg is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License along
15  * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
16  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
17  */
18 
19 #include "libavcodec/aacpsdsp.h"
20 #include "libavutil/intfloat.h"
21 #include "libavutil/mem_internal.h"
22 
23 #include "checkasm.h"
24 
25 #define N 32
26 #define STRIDE 128
27 #define BUF_SIZE (N * STRIDE)
28 
29 #define randomize(buf, len) do { \
30  int i; \
31  for (i = 0; i < len; i++) { \
32  const INTFLOAT f = (INTFLOAT)rnd() / UINT_MAX; \
33  (buf)[i] = f; \
34  } \
35 } while (0)
36 
37 #define EPS 0.005
38 
39 static void clear_less_significant_bits(INTFLOAT *buf, int len, int bits)
40 {
41  int i;
42  for (i = 0; i < len; i++) {
43  union av_intfloat32 u = { .f = buf[i] };
44  u.i &= (0xffffffff << bits);
45  buf[i] = u.f;
46  }
47 }
48 
49 static void test_add_squares(void)
50 {
54 
55  declare_func(void, INTFLOAT *dst,
56  const INTFLOAT (*src)[2], int n);
57 
58  randomize((INTFLOAT *)src, BUF_SIZE * 2);
59  randomize(dst0, BUF_SIZE);
60  memcpy(dst1, dst0, BUF_SIZE * sizeof(INTFLOAT));
61  call_ref(dst0, src, BUF_SIZE);
62  call_new(dst1, src, BUF_SIZE);
63  if (!float_near_abs_eps_array(dst0, dst1, EPS, BUF_SIZE))
64  fail();
65  bench_new(dst1, src, BUF_SIZE);
66 }
67 
68 static void test_mul_pair_single(void)
69 {
70  LOCAL_ALIGNED_16(INTFLOAT, dst0, [BUF_SIZE], [2]);
71  LOCAL_ALIGNED_16(INTFLOAT, dst1, [BUF_SIZE], [2]);
74 
75  declare_func(void, INTFLOAT (*dst)[2],
76  INTFLOAT (*src0)[2], INTFLOAT *src1, int n);
77 
80  call_ref(dst0, src0, src1, BUF_SIZE);
81  call_new(dst1, src0, src1, BUF_SIZE);
82  if (!float_near_abs_eps_array((float *)dst0, (float *)dst1, EPS, BUF_SIZE * 2))
83  fail();
84  bench_new(dst1, src0, src1, BUF_SIZE);
85 }
86 
87 static void test_hybrid_analysis(void)
88 {
89  LOCAL_ALIGNED_16(INTFLOAT, dst0, [BUF_SIZE], [2]);
90  LOCAL_ALIGNED_16(INTFLOAT, dst1, [BUF_SIZE], [2]);
91  LOCAL_ALIGNED_16(INTFLOAT, in, [13], [2]);
92  LOCAL_ALIGNED_16(INTFLOAT, filter, [N], [8][2]);
93 
94  declare_func(void, INTFLOAT (*out)[2], INTFLOAT (*in)[2],
95  const INTFLOAT (*filter)[8][2],
96  ptrdiff_t stride, int n);
97 
98  randomize((INTFLOAT *)in, 13 * 2);
99  randomize((INTFLOAT *)filter, N * 8 * 2);
100 
101  randomize((INTFLOAT *)dst0, BUF_SIZE * 2);
102  memcpy(dst1, dst0, BUF_SIZE * 2 * sizeof(INTFLOAT));
103 
104  call_ref(dst0, in, filter, STRIDE, N);
105  call_new(dst1, in, filter, STRIDE, N);
106 
107  if (!float_near_abs_eps_array((float *)dst0, (float *)dst1, EPS, BUF_SIZE * 2))
108  fail();
109  bench_new(dst1, in, filter, STRIDE, N);
110 }
111 
113 {
114  LOCAL_ALIGNED_16(INTFLOAT, in, [2], [38][64]);
115  LOCAL_ALIGNED_16(INTFLOAT, out0, [91], [32][2]);
116  LOCAL_ALIGNED_16(INTFLOAT, out1, [91], [32][2]);
117 
118  declare_func(void, INTFLOAT (*out)[32][2], INTFLOAT L[2][38][64],
119  int i, int len);
120 
121  randomize((INTFLOAT *)out0, 91 * 32 * 2);
122  randomize((INTFLOAT *)in, 2 * 38 * 64);
123  memcpy(out1, out0, 91 * 32 * 2 * sizeof(INTFLOAT));
124 
125  /* len is hardcoded to 32 as that's the only value used in
126  libavcodec. asm functions are likely to be optimized
127  hardcoding this value in their loops and could fail with
128  anything else.
129  i is hardcoded to the two values currently used by the
130  aac decoder because the arm neon implementation is
131  micro-optimized for them and will fail for almost every
132  other value. */
133  call_ref(out0, in, 3, 32);
134  call_new(out1, in, 3, 32);
135 
136  /* the function just moves data around, so memcmp is enough */
137  if (memcmp(out0, out1, 91 * 32 * 2 * sizeof(INTFLOAT)))
138  fail();
139 
140  call_ref(out0, in, 5, 32);
141  call_new(out1, in, 5, 32);
142 
143  if (memcmp(out0, out1, 91 * 32 * 2 * sizeof(INTFLOAT)))
144  fail();
145 
146  bench_new(out1, in, 3, 32);
147 }
148 
150 {
151  LOCAL_ALIGNED_16(INTFLOAT, out0, [2], [38][64]);
152  LOCAL_ALIGNED_16(INTFLOAT, out1, [2], [38][64]);
153  LOCAL_ALIGNED_16(INTFLOAT, in, [91], [32][2]);
154 
155  declare_func(void, INTFLOAT out[2][38][64], INTFLOAT (*in)[32][2],
156  int i, int len);
157 
158  randomize((INTFLOAT *)in, 91 * 32 * 2);
159  randomize((INTFLOAT *)out0, 2 * 38 * 64);
160  memcpy(out1, out0, 2 * 38 * 64 * sizeof(INTFLOAT));
161 
162  /* len is hardcoded to 32 as that's the only value used in
163  libavcodec. asm functions are likely to be optimized
164  hardcoding this value in their loops and could fail with
165  anything else.
166  i is hardcoded to the two values currently used by the
167  aac decoder because the arm neon implementation is
168  micro-optimized for them and will fail for almost every
169  other value. */
170  call_ref(out0, in, 3, 32);
171  call_new(out1, in, 3, 32);
172 
173  /* the function just moves data around, so memcmp is enough */
174  if (memcmp(out0, out1, 2 * 38 * 64 * sizeof(INTFLOAT)))
175  fail();
176 
177  call_ref(out0, in, 5, 32);
178  call_new(out1, in, 5, 32);
179 
180  if (memcmp(out0, out1, 2 * 38 * 64 * sizeof(INTFLOAT)))
181  fail();
182 
183  bench_new(out1, in, 3, 32);
184 }
185 
187 {
188  int i;
189  LOCAL_ALIGNED_16(INTFLOAT, l, [BUF_SIZE], [2]);
191  LOCAL_ALIGNED_16(INTFLOAT, l0, [BUF_SIZE], [2]);
192  LOCAL_ALIGNED_16(INTFLOAT, r0, [BUF_SIZE], [2]);
193  LOCAL_ALIGNED_16(INTFLOAT, l1, [BUF_SIZE], [2]);
194  LOCAL_ALIGNED_16(INTFLOAT, r1, [BUF_SIZE], [2]);
195  LOCAL_ALIGNED_16(INTFLOAT, h, [2], [4]);
196  LOCAL_ALIGNED_16(INTFLOAT, h_step, [2], [4]);
197 
198  declare_func(void, INTFLOAT (*l)[2], INTFLOAT (*r)[2],
199  INTFLOAT h[2][4], INTFLOAT h_step[2][4], int len);
200 
201  randomize((INTFLOAT *)l, BUF_SIZE * 2);
202  randomize((INTFLOAT *)r, BUF_SIZE * 2);
203 
204  for (i = 0; i < 2; i++) {
205  if (check_func(psdsp->stereo_interpolate[i], "ps_stereo_interpolate%s", i ? "_ipdopd" : "")) {
206  memcpy(l0, l, BUF_SIZE * 2 * sizeof(INTFLOAT));
207  memcpy(l1, l, BUF_SIZE * 2 * sizeof(INTFLOAT));
208  memcpy(r0, r, BUF_SIZE * 2 * sizeof(INTFLOAT));
209  memcpy(r1, r, BUF_SIZE * 2 * sizeof(INTFLOAT));
210 
211  randomize((INTFLOAT *)h, 2 * 4);
212  randomize((INTFLOAT *)h_step, 2 * 4);
213  // Clear the least significant 14 bits of h_step, to avoid
214  // divergence when accumulating h_step BUF_SIZE times into
215  // a float variable which may or may not have extra intermediate
216  // precision. Therefore clear roughly log2(BUF_SIZE) less
217  // significant bits, to get the same result regardless of any
218  // extra precision in the accumulator.
219  clear_less_significant_bits((INTFLOAT *)h_step, 2 * 4, 14);
220 
221  call_ref(l0, r0, h, h_step, BUF_SIZE);
222  call_new(l1, r1, h, h_step, BUF_SIZE);
223  if (!float_near_abs_eps_array((float *)l0, (float *)l1, EPS, BUF_SIZE * 2) ||
224  !float_near_abs_eps_array((float *)r0, (float *)r1, EPS, BUF_SIZE * 2))
225  fail();
226 
227  memcpy(l1, l, BUF_SIZE * 2 * sizeof(INTFLOAT));
228  memcpy(r1, r, BUF_SIZE * 2 * sizeof(INTFLOAT));
229  bench_new(l1, r1, h, h_step, BUF_SIZE);
230  }
231  }
232 }
233 
235 {
236  PSDSPContext psdsp;
237 
238  ff_psdsp_init(&psdsp);
239 
240  if (check_func(psdsp.add_squares, "ps_add_squares"))
242  report("add_squares");
243 
244  if (check_func(psdsp.mul_pair_single, "ps_mul_pair_single"))
246  report("mul_pair_single");
247 
248  if (check_func(psdsp.hybrid_analysis, "ps_hybrid_analysis"))
250  report("hybrid_analysis");
251 
252  if (check_func(psdsp.hybrid_analysis_ileave, "ps_hybrid_analysis_ileave"))
254  report("hybrid_analysis_ileave");
255 
256  if (check_func(psdsp.hybrid_synthesis_deint, "ps_hybrid_synthesis_deint"))
258  report("hybrid_synthesis_deint");
259 
260  test_stereo_interpolate(&psdsp);
261  report("stereo_interpolate");
262 }
stride
int stride
Definition: mace.c:144
test_hybrid_analysis
static void test_hybrid_analysis(void)
Definition: aacpsdsp.c:87
r
const char * r
Definition: vf_curves.c:116
N
#define N
Definition: aacpsdsp.c:25
mem_internal.h
test_mul_pair_single
static void test_mul_pair_single(void)
Definition: aacpsdsp.c:68
out
FILE * out
Definition: movenc.c:54
u
#define u(width, name, range_min, range_max)
Definition: cbs_h2645.c:264
float_near_abs_eps_array
int float_near_abs_eps_array(const float *a, const float *b, float eps, unsigned len)
Definition: checkasm.c:339
randomize
#define randomize(buf, len)
Definition: aacpsdsp.c:29
test_add_squares
static void test_add_squares(void)
Definition: aacpsdsp.c:49
check_func
#define check_func(func,...)
Definition: checkasm.h:125
checkasm_check_aacpsdsp
void checkasm_check_aacpsdsp(void)
Definition: aacpsdsp.c:234
BUF_SIZE
#define BUF_SIZE
Definition: aacpsdsp.c:27
filter
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce then the filter should push the output frames on the output link immediately As an exception to the previous rule if the input frame is enough to produce several output frames then the filter needs output only at least one per link The additional frames can be left buffered in the filter
Definition: filter_design.txt:228
call_ref
#define call_ref(...)
Definition: checkasm.h:140
intfloat.h
PSDSPContext::add_squares
void(* add_squares)(INTFLOAT *dst, const INTFLOAT(*src)[2], int n)
Definition: aacpsdsp.h:33
fail
#define fail()
Definition: checkasm.h:134
checkasm.h
aacpsdsp.h
clear_less_significant_bits
static void clear_less_significant_bits(INTFLOAT *buf, int len, int bits)
Definition: aacpsdsp.c:39
bits
uint8_t bits
Definition: vp3data.h:141
LOCAL_ALIGNED_16
#define LOCAL_ALIGNED_16(t, v,...)
Definition: mem_internal.h:130
call_new
#define call_new(...)
Definition: checkasm.h:212
src
#define src
Definition: vp8dsp.c:255
PSDSPContext::hybrid_synthesis_deint
void(* hybrid_synthesis_deint)(INTFLOAT out[2][38][64], INTFLOAT(*in)[32][2], int i, int len)
Definition: aacpsdsp.h:41
av_intfloat32
Definition: intfloat.h:27
PSDSPContext::mul_pair_single
void(* mul_pair_single)(INTFLOAT(*dst)[2], INTFLOAT(*src0)[2], INTFLOAT *src1, int n)
Definition: aacpsdsp.h:34
PSDSPContext::hybrid_analysis_ileave
void(* hybrid_analysis_ileave)(INTFLOAT(*out)[32][2], INTFLOAT L[2][38][64], int i, int len)
Definition: aacpsdsp.h:39
src0
#define src0
Definition: h264pred.c:139
src1
#define src1
Definition: h264pred.c:140
report
#define report
Definition: checkasm.h:137
i
int i
Definition: input.c:407
bench_new
#define bench_new(...)
Definition: checkasm.h:272
test_hybrid_analysis_ileave
static void test_hybrid_analysis_ileave(void)
Definition: aacpsdsp.c:112
len
int len
Definition: vorbis_enc_data.h:426
test_hybrid_synthesis_deint
static void test_hybrid_synthesis_deint(void)
Definition: aacpsdsp.c:149
test_stereo_interpolate
static void test_stereo_interpolate(PSDSPContext *psdsp)
Definition: aacpsdsp.c:186
L
#define L(x)
Definition: vp56_arith.h:36
PSDSPContext::stereo_interpolate
void(* stereo_interpolate[2])(INTFLOAT(*l)[2], INTFLOAT(*r)[2], INTFLOAT h[2][4], INTFLOAT h_step[2][4], int len)
Definition: aacpsdsp.h:49
ff_psdsp_init
void AAC_RENAME() ff_psdsp_init(PSDSPContext *s)
Definition: aacpsdsp_template.c:212
PSDSPContext
Definition: aacpsdsp.h:32
declare_func
#define declare_func(ret,...)
Definition: checkasm.h:129
h
h
Definition: vp9dsp_template.c:2038
PSDSPContext::hybrid_analysis
void(* hybrid_analysis)(INTFLOAT(*out)[2], INTFLOAT(*in)[2], const INTFLOAT(*filter)[8][2], ptrdiff_t stride, int n)
Definition: aacpsdsp.h:36
STRIDE
#define STRIDE
Definition: aacpsdsp.c:26
INTFLOAT
float INTFLOAT
Definition: aac_defines.h:88
EPS
#define EPS
Definition: aacpsdsp.c:37