FFmpeg
tx_template.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) Lynne
3  *
4  * Power of two FFT:
5  * Copyright (c) Lynne
6  * Copyright (c) 2008 Loren Merritt
7  * Copyright (c) 2002 Fabrice Bellard
8  * Partly based on libdjbfft by D. J. Bernstein
9  *
10  * This file is part of FFmpeg.
11  *
12  * FFmpeg is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU Lesser General Public
14  * License as published by the Free Software Foundation; either
15  * version 2.1 of the License, or (at your option) any later version.
16  *
17  * FFmpeg is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20  * Lesser General Public License for more details.
21  *
22  * You should have received a copy of the GNU Lesser General Public
23  * License along with FFmpeg; if not, write to the Free Software
24  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25  */
26 
27 /* All costabs for a type are defined here */
28 COSTABLE(16);
29 COSTABLE(32);
30 COSTABLE(64);
31 COSTABLE(128);
32 COSTABLE(256);
33 COSTABLE(512);
34 COSTABLE(1024);
35 COSTABLE(2048);
36 COSTABLE(4096);
37 COSTABLE(8192);
38 COSTABLE(16384);
39 COSTABLE(32768);
40 COSTABLE(65536);
41 COSTABLE(131072);
42 DECLARE_ALIGNED(32, FFTComplex, TX_NAME(ff_cos_53))[4];
43 DECLARE_ALIGNED(32, FFTComplex, TX_NAME(ff_cos_7))[3];
44 DECLARE_ALIGNED(32, FFTComplex, TX_NAME(ff_cos_9))[4];
45 
46 static FFTSample * const cos_tabs[18] = {
47  NULL,
48  NULL,
49  NULL,
50  NULL,
51  TX_NAME(ff_cos_16),
52  TX_NAME(ff_cos_32),
53  TX_NAME(ff_cos_64),
54  TX_NAME(ff_cos_128),
55  TX_NAME(ff_cos_256),
56  TX_NAME(ff_cos_512),
57  TX_NAME(ff_cos_1024),
58  TX_NAME(ff_cos_2048),
59  TX_NAME(ff_cos_4096),
60  TX_NAME(ff_cos_8192),
61  TX_NAME(ff_cos_16384),
62  TX_NAME(ff_cos_32768),
63  TX_NAME(ff_cos_65536),
64  TX_NAME(ff_cos_131072),
65 };
66 
68 {
69  int m = 1 << index;
70  double freq = 2*M_PI/m;
72 
73  for (int i = 0; i < m/4; i++)
74  *tab++ = RESCALE(cos(i*freq));
75 
76  *tab = 0;
77 }
78 
79 #define INIT_FF_COS_TABS_FUNC(index, size) \
80 static av_cold void init_cos_tabs_ ## size (void) \
81 { \
82  init_cos_tabs_idx(index); \
83 }
84 
91 INIT_FF_COS_TABS_FUNC(10, 1024)
92 INIT_FF_COS_TABS_FUNC(11, 2048)
93 INIT_FF_COS_TABS_FUNC(12, 4096)
94 INIT_FF_COS_TABS_FUNC(13, 8192)
95 INIT_FF_COS_TABS_FUNC(14, 16384)
96 INIT_FF_COS_TABS_FUNC(15, 32768)
97 INIT_FF_COS_TABS_FUNC(16, 65536)
98 INIT_FF_COS_TABS_FUNC(17, 131072)
99 
100 static av_cold void ff_init_53_tabs(void)
101 {
102  TX_NAME(ff_cos_53)[0] = (FFTComplex){ RESCALE(cos(2 * M_PI / 12)), RESCALE(cos(2 * M_PI / 12)) };
103  TX_NAME(ff_cos_53)[1] = (FFTComplex){ RESCALE(cos(2 * M_PI / 6)), RESCALE(cos(2 * M_PI / 6)) };
104  TX_NAME(ff_cos_53)[2] = (FFTComplex){ RESCALE(cos(2 * M_PI / 5)), RESCALE(sin(2 * M_PI / 5)) };
105  TX_NAME(ff_cos_53)[3] = (FFTComplex){ RESCALE(cos(2 * M_PI / 10)), RESCALE(sin(2 * M_PI / 10)) };
106 }
107 
108 static av_cold void ff_init_7_tabs(void)
109 {
110  TX_NAME(ff_cos_7)[0] = (FFTComplex){ RESCALE(cos(2 * M_PI / 7)), RESCALE(sin(2 * M_PI / 7)) };
111  TX_NAME(ff_cos_7)[1] = (FFTComplex){ RESCALE(sin(2 * M_PI / 28)), RESCALE(cos(2 * M_PI / 28)) };
112  TX_NAME(ff_cos_7)[2] = (FFTComplex){ RESCALE(cos(2 * M_PI / 14)), RESCALE(sin(2 * M_PI / 14)) };
113 }
114 
115 static av_cold void ff_init_9_tabs(void)
116 {
117  TX_NAME(ff_cos_9)[0] = (FFTComplex){ RESCALE(cos(2 * M_PI / 3)), RESCALE( sin(2 * M_PI / 3)) };
118  TX_NAME(ff_cos_9)[1] = (FFTComplex){ RESCALE(cos(2 * M_PI / 9)), RESCALE( sin(2 * M_PI / 9)) };
119  TX_NAME(ff_cos_9)[2] = (FFTComplex){ RESCALE(cos(2 * M_PI / 36)), RESCALE( sin(2 * M_PI / 36)) };
120  TX_NAME(ff_cos_9)[3] = (FFTComplex){ TX_NAME(ff_cos_9)[1].re + TX_NAME(ff_cos_9)[2].im,
121  TX_NAME(ff_cos_9)[1].im - TX_NAME(ff_cos_9)[2].re };
122 }
123 
128  { NULL },
129  { init_cos_tabs_16, AV_ONCE_INIT },
130  { init_cos_tabs_32, AV_ONCE_INIT },
131  { init_cos_tabs_64, AV_ONCE_INIT },
132  { init_cos_tabs_128, AV_ONCE_INIT },
133  { init_cos_tabs_256, AV_ONCE_INIT },
134  { init_cos_tabs_512, AV_ONCE_INIT },
135  { init_cos_tabs_1024, AV_ONCE_INIT },
136  { init_cos_tabs_2048, AV_ONCE_INIT },
137  { init_cos_tabs_4096, AV_ONCE_INIT },
138  { init_cos_tabs_8192, AV_ONCE_INIT },
139  { init_cos_tabs_16384, AV_ONCE_INIT },
140  { init_cos_tabs_32768, AV_ONCE_INIT },
141  { init_cos_tabs_65536, AV_ONCE_INIT },
142  { init_cos_tabs_131072, AV_ONCE_INIT },
143 };
144 
145 static av_cold void init_cos_tabs(int index)
146 {
149 }
150 
152  ptrdiff_t stride)
153 {
154  FFTComplex tmp[2];
155 #ifdef TX_INT32
156  int64_t mtmp[4];
157 #endif
158 
159  BF(tmp[0].re, tmp[1].im, in[1].im, in[2].im);
160  BF(tmp[0].im, tmp[1].re, in[1].re, in[2].re);
161 
162  out[0*stride].re = in[0].re + tmp[1].re;
163  out[0*stride].im = in[0].im + tmp[1].im;
164 
165 #ifdef TX_INT32
166  mtmp[0] = (int64_t)TX_NAME(ff_cos_53)[0].re * tmp[0].re;
167  mtmp[1] = (int64_t)TX_NAME(ff_cos_53)[0].im * tmp[0].im;
168  mtmp[2] = (int64_t)TX_NAME(ff_cos_53)[1].re * tmp[1].re;
169  mtmp[3] = (int64_t)TX_NAME(ff_cos_53)[1].re * tmp[1].im;
170  out[1*stride].re = in[0].re - (mtmp[2] + mtmp[0] + 0x40000000 >> 31);
171  out[1*stride].im = in[0].im - (mtmp[3] - mtmp[1] + 0x40000000 >> 31);
172  out[2*stride].re = in[0].re - (mtmp[2] - mtmp[0] + 0x40000000 >> 31);
173  out[2*stride].im = in[0].im - (mtmp[3] + mtmp[1] + 0x40000000 >> 31);
174 #else
175  tmp[0].re = TX_NAME(ff_cos_53)[0].re * tmp[0].re;
176  tmp[0].im = TX_NAME(ff_cos_53)[0].im * tmp[0].im;
177  tmp[1].re = TX_NAME(ff_cos_53)[1].re * tmp[1].re;
178  tmp[1].im = TX_NAME(ff_cos_53)[1].re * tmp[1].im;
179  out[1*stride].re = in[0].re - tmp[1].re + tmp[0].re;
180  out[1*stride].im = in[0].im - tmp[1].im - tmp[0].im;
181  out[2*stride].re = in[0].re - tmp[1].re - tmp[0].re;
182  out[2*stride].im = in[0].im - tmp[1].im + tmp[0].im;
183 #endif
184 }
185 
186 #define DECL_FFT5(NAME, D0, D1, D2, D3, D4) \
187 static av_always_inline void NAME(FFTComplex *out, FFTComplex *in, \
188  ptrdiff_t stride) \
189 { \
190  FFTComplex z0[4], t[6]; \
191  \
192  BF(t[1].im, t[0].re, in[1].re, in[4].re); \
193  BF(t[1].re, t[0].im, in[1].im, in[4].im); \
194  BF(t[3].im, t[2].re, in[2].re, in[3].re); \
195  BF(t[3].re, t[2].im, in[2].im, in[3].im); \
196  \
197  out[D0*stride].re = in[0].re + t[0].re + t[2].re; \
198  out[D0*stride].im = in[0].im + t[0].im + t[2].im; \
199  \
200  SMUL(t[4].re, t[0].re, TX_NAME(ff_cos_53)[2].re, TX_NAME(ff_cos_53)[3].re, t[2].re, t[0].re); \
201  SMUL(t[4].im, t[0].im, TX_NAME(ff_cos_53)[2].re, TX_NAME(ff_cos_53)[3].re, t[2].im, t[0].im); \
202  CMUL(t[5].re, t[1].re, TX_NAME(ff_cos_53)[2].im, TX_NAME(ff_cos_53)[3].im, t[3].re, t[1].re); \
203  CMUL(t[5].im, t[1].im, TX_NAME(ff_cos_53)[2].im, TX_NAME(ff_cos_53)[3].im, t[3].im, t[1].im); \
204  \
205  BF(z0[0].re, z0[3].re, t[0].re, t[1].re); \
206  BF(z0[0].im, z0[3].im, t[0].im, t[1].im); \
207  BF(z0[2].re, z0[1].re, t[4].re, t[5].re); \
208  BF(z0[2].im, z0[1].im, t[4].im, t[5].im); \
209  \
210  out[D1*stride].re = in[0].re + z0[3].re; \
211  out[D1*stride].im = in[0].im + z0[0].im; \
212  out[D2*stride].re = in[0].re + z0[2].re; \
213  out[D2*stride].im = in[0].im + z0[1].im; \
214  out[D3*stride].re = in[0].re + z0[1].re; \
215  out[D3*stride].im = in[0].im + z0[2].im; \
216  out[D4*stride].re = in[0].re + z0[0].re; \
217  out[D4*stride].im = in[0].im + z0[3].im; \
218 }
219 
220 DECL_FFT5(fft5, 0, 1, 2, 3, 4)
221 DECL_FFT5(fft5_m1, 0, 6, 12, 3, 9)
222 DECL_FFT5(fft5_m2, 10, 1, 7, 13, 4)
223 DECL_FFT5(fft5_m3, 5, 11, 2, 8, 14)
224 
226  ptrdiff_t stride)
227 {
228  FFTComplex t[6], z[3];
229  const FFTComplex *tab = TX_NAME(ff_cos_7);
230 #ifdef TX_INT32
231  int64_t mtmp[12];
232 #endif
233 
234  BF(t[1].re, t[0].re, in[1].re, in[6].re);
235  BF(t[1].im, t[0].im, in[1].im, in[6].im);
236  BF(t[3].re, t[2].re, in[2].re, in[5].re);
237  BF(t[3].im, t[2].im, in[2].im, in[5].im);
238  BF(t[5].re, t[4].re, in[3].re, in[4].re);
239  BF(t[5].im, t[4].im, in[3].im, in[4].im);
240 
241  out[0*stride].re = in[0].re + t[0].re + t[2].re + t[4].re;
242  out[0*stride].im = in[0].im + t[0].im + t[2].im + t[4].im;
243 
244 #ifdef TX_INT32 /* NOTE: it's possible to do this with 16 mults but 72 adds */
245  mtmp[ 0] = ((int64_t)tab[0].re)*t[0].re - ((int64_t)tab[2].re)*t[4].re;
246  mtmp[ 1] = ((int64_t)tab[0].re)*t[4].re - ((int64_t)tab[1].re)*t[0].re;
247  mtmp[ 2] = ((int64_t)tab[0].re)*t[2].re - ((int64_t)tab[2].re)*t[0].re;
248  mtmp[ 3] = ((int64_t)tab[0].re)*t[0].im - ((int64_t)tab[1].re)*t[2].im;
249  mtmp[ 4] = ((int64_t)tab[0].re)*t[4].im - ((int64_t)tab[1].re)*t[0].im;
250  mtmp[ 5] = ((int64_t)tab[0].re)*t[2].im - ((int64_t)tab[2].re)*t[0].im;
251 
252  mtmp[ 6] = ((int64_t)tab[2].im)*t[1].im + ((int64_t)tab[1].im)*t[5].im;
253  mtmp[ 7] = ((int64_t)tab[0].im)*t[5].im + ((int64_t)tab[2].im)*t[3].im;
254  mtmp[ 8] = ((int64_t)tab[2].im)*t[5].im + ((int64_t)tab[1].im)*t[3].im;
255  mtmp[ 9] = ((int64_t)tab[0].im)*t[1].re + ((int64_t)tab[1].im)*t[3].re;
256  mtmp[10] = ((int64_t)tab[2].im)*t[3].re + ((int64_t)tab[0].im)*t[5].re;
257  mtmp[11] = ((int64_t)tab[2].im)*t[1].re + ((int64_t)tab[1].im)*t[5].re;
258 
259  z[0].re = (int32_t)(mtmp[ 0] - ((int64_t)tab[1].re)*t[2].re + 0x40000000 >> 31);
260  z[1].re = (int32_t)(mtmp[ 1] - ((int64_t)tab[2].re)*t[2].re + 0x40000000 >> 31);
261  z[2].re = (int32_t)(mtmp[ 2] - ((int64_t)tab[1].re)*t[4].re + 0x40000000 >> 31);
262  z[0].im = (int32_t)(mtmp[ 3] - ((int64_t)tab[2].re)*t[4].im + 0x40000000 >> 31);
263  z[1].im = (int32_t)(mtmp[ 4] - ((int64_t)tab[2].re)*t[2].im + 0x40000000 >> 31);
264  z[2].im = (int32_t)(mtmp[ 5] - ((int64_t)tab[1].re)*t[4].im + 0x40000000 >> 31);
265 
266  t[0].re = (int32_t)(mtmp[ 6] - ((int64_t)tab[0].im)*t[3].im + 0x40000000 >> 31);
267  t[2].re = (int32_t)(mtmp[ 7] - ((int64_t)tab[1].im)*t[1].im + 0x40000000 >> 31);
268  t[4].re = (int32_t)(mtmp[ 8] + ((int64_t)tab[0].im)*t[1].im + 0x40000000 >> 31);
269  t[0].im = (int32_t)(mtmp[ 9] + ((int64_t)tab[2].im)*t[5].re + 0x40000000 >> 31);
270  t[2].im = (int32_t)(mtmp[10] - ((int64_t)tab[1].im)*t[1].re + 0x40000000 >> 31);
271  t[4].im = (int32_t)(mtmp[11] - ((int64_t)tab[0].im)*t[3].re + 0x40000000 >> 31);
272 #else
273  z[0].re = tab[0].re*t[0].re - tab[2].re*t[4].re - tab[1].re*t[2].re;
274  z[1].re = tab[0].re*t[4].re - tab[1].re*t[0].re - tab[2].re*t[2].re;
275  z[2].re = tab[0].re*t[2].re - tab[2].re*t[0].re - tab[1].re*t[4].re;
276  z[0].im = tab[0].re*t[0].im - tab[1].re*t[2].im - tab[2].re*t[4].im;
277  z[1].im = tab[0].re*t[4].im - tab[1].re*t[0].im - tab[2].re*t[2].im;
278  z[2].im = tab[0].re*t[2].im - tab[2].re*t[0].im - tab[1].re*t[4].im;
279 
280  /* It's possible to do t[4].re and t[0].im with 2 multiplies only by
281  * multiplying the sum of all with the average of the twiddles */
282 
283  t[0].re = tab[2].im*t[1].im + tab[1].im*t[5].im - tab[0].im*t[3].im;
284  t[2].re = tab[0].im*t[5].im + tab[2].im*t[3].im - tab[1].im*t[1].im;
285  t[4].re = tab[2].im*t[5].im + tab[1].im*t[3].im + tab[0].im*t[1].im;
286  t[0].im = tab[0].im*t[1].re + tab[1].im*t[3].re + tab[2].im*t[5].re;
287  t[2].im = tab[2].im*t[3].re + tab[0].im*t[5].re - tab[1].im*t[1].re;
288  t[4].im = tab[2].im*t[1].re + tab[1].im*t[5].re - tab[0].im*t[3].re;
289 #endif
290 
291  BF(t[1].re, z[0].re, z[0].re, t[4].re);
292  BF(t[3].re, z[1].re, z[1].re, t[2].re);
293  BF(t[5].re, z[2].re, z[2].re, t[0].re);
294  BF(t[1].im, z[0].im, z[0].im, t[0].im);
295  BF(t[3].im, z[1].im, z[1].im, t[2].im);
296  BF(t[5].im, z[2].im, z[2].im, t[4].im);
297 
298  out[1*stride].re = in[0].re + z[0].re;
299  out[1*stride].im = in[0].im + t[1].im;
300  out[2*stride].re = in[0].re + t[3].re;
301  out[2*stride].im = in[0].im + z[1].im;
302  out[3*stride].re = in[0].re + z[2].re;
303  out[3*stride].im = in[0].im + t[5].im;
304  out[4*stride].re = in[0].re + t[5].re;
305  out[4*stride].im = in[0].im + z[2].im;
306  out[5*stride].re = in[0].re + z[1].re;
307  out[5*stride].im = in[0].im + t[3].im;
308  out[6*stride].re = in[0].re + t[1].re;
309  out[6*stride].im = in[0].im + z[0].im;
310 }
311 
313  ptrdiff_t stride)
314 {
315  const FFTComplex *tab = TX_NAME(ff_cos_9);
316  FFTComplex t[16], w[4], x[5], y[5], z[2];
317 #ifdef TX_INT32
318  int64_t mtmp[12];
319 #endif
320 
321  BF(t[1].re, t[0].re, in[1].re, in[8].re);
322  BF(t[1].im, t[0].im, in[1].im, in[8].im);
323  BF(t[3].re, t[2].re, in[2].re, in[7].re);
324  BF(t[3].im, t[2].im, in[2].im, in[7].im);
325  BF(t[5].re, t[4].re, in[3].re, in[6].re);
326  BF(t[5].im, t[4].im, in[3].im, in[6].im);
327  BF(t[7].re, t[6].re, in[4].re, in[5].re);
328  BF(t[7].im, t[6].im, in[4].im, in[5].im);
329 
330  w[0].re = t[0].re - t[6].re;
331  w[0].im = t[0].im - t[6].im;
332  w[1].re = t[2].re - t[6].re;
333  w[1].im = t[2].im - t[6].im;
334  w[2].re = t[1].re - t[7].re;
335  w[2].im = t[1].im - t[7].im;
336  w[3].re = t[3].re + t[7].re;
337  w[3].im = t[3].im + t[7].im;
338 
339  z[0].re = in[0].re + t[4].re;
340  z[0].im = in[0].im + t[4].im;
341 
342  z[1].re = t[0].re + t[2].re + t[6].re;
343  z[1].im = t[0].im + t[2].im + t[6].im;
344 
345  out[0*stride].re = z[0].re + z[1].re;
346  out[0*stride].im = z[0].im + z[1].im;
347 
348 #ifdef TX_INT32
349  mtmp[0] = t[1].re - t[3].re + t[7].re;
350  mtmp[1] = t[1].im - t[3].im + t[7].im;
351 
352  y[3].re = (int32_t)(((int64_t)tab[0].im)*mtmp[0] + 0x40000000 >> 31);
353  y[3].im = (int32_t)(((int64_t)tab[0].im)*mtmp[1] + 0x40000000 >> 31);
354 
355  mtmp[0] = (int32_t)(((int64_t)tab[0].re)*z[1].re + 0x40000000 >> 31);
356  mtmp[1] = (int32_t)(((int64_t)tab[0].re)*z[1].im + 0x40000000 >> 31);
357  mtmp[2] = (int32_t)(((int64_t)tab[0].re)*t[4].re + 0x40000000 >> 31);
358  mtmp[3] = (int32_t)(((int64_t)tab[0].re)*t[4].im + 0x40000000 >> 31);
359 
360  x[3].re = z[0].re + (int32_t)mtmp[0];
361  x[3].im = z[0].im + (int32_t)mtmp[1];
362  z[0].re = in[0].re + (int32_t)mtmp[2];
363  z[0].im = in[0].im + (int32_t)mtmp[3];
364 
365  mtmp[0] = ((int64_t)tab[1].re)*w[0].re;
366  mtmp[1] = ((int64_t)tab[1].re)*w[0].im;
367  mtmp[2] = ((int64_t)tab[2].im)*w[0].re;
368  mtmp[3] = ((int64_t)tab[2].im)*w[0].im;
369  mtmp[4] = ((int64_t)tab[1].im)*w[2].re;
370  mtmp[5] = ((int64_t)tab[1].im)*w[2].im;
371  mtmp[6] = ((int64_t)tab[2].re)*w[2].re;
372  mtmp[7] = ((int64_t)tab[2].re)*w[2].im;
373 
374  x[1].re = (int32_t)(mtmp[0] + ((int64_t)tab[2].im)*w[1].re + 0x40000000 >> 31);
375  x[1].im = (int32_t)(mtmp[1] + ((int64_t)tab[2].im)*w[1].im + 0x40000000 >> 31);
376  x[2].re = (int32_t)(mtmp[2] - ((int64_t)tab[3].re)*w[1].re + 0x40000000 >> 31);
377  x[2].im = (int32_t)(mtmp[3] - ((int64_t)tab[3].re)*w[1].im + 0x40000000 >> 31);
378  y[1].re = (int32_t)(mtmp[4] + ((int64_t)tab[2].re)*w[3].re + 0x40000000 >> 31);
379  y[1].im = (int32_t)(mtmp[5] + ((int64_t)tab[2].re)*w[3].im + 0x40000000 >> 31);
380  y[2].re = (int32_t)(mtmp[6] - ((int64_t)tab[3].im)*w[3].re + 0x40000000 >> 31);
381  y[2].im = (int32_t)(mtmp[7] - ((int64_t)tab[3].im)*w[3].im + 0x40000000 >> 31);
382 
383  y[0].re = (int32_t)(((int64_t)tab[0].im)*t[5].re + 0x40000000 >> 31);
384  y[0].im = (int32_t)(((int64_t)tab[0].im)*t[5].im + 0x40000000 >> 31);
385 
386 #else
387  y[3].re = tab[0].im*(t[1].re - t[3].re + t[7].re);
388  y[3].im = tab[0].im*(t[1].im - t[3].im + t[7].im);
389 
390  x[3].re = z[0].re + tab[0].re*z[1].re;
391  x[3].im = z[0].im + tab[0].re*z[1].im;
392  z[0].re = in[0].re + tab[0].re*t[4].re;
393  z[0].im = in[0].im + tab[0].re*t[4].im;
394 
395  x[1].re = tab[1].re*w[0].re + tab[2].im*w[1].re;
396  x[1].im = tab[1].re*w[0].im + tab[2].im*w[1].im;
397  x[2].re = tab[2].im*w[0].re - tab[3].re*w[1].re;
398  x[2].im = tab[2].im*w[0].im - tab[3].re*w[1].im;
399  y[1].re = tab[1].im*w[2].re + tab[2].re*w[3].re;
400  y[1].im = tab[1].im*w[2].im + tab[2].re*w[3].im;
401  y[2].re = tab[2].re*w[2].re - tab[3].im*w[3].re;
402  y[2].im = tab[2].re*w[2].im - tab[3].im*w[3].im;
403 
404  y[0].re = tab[0].im*t[5].re;
405  y[0].im = tab[0].im*t[5].im;
406 #endif
407 
408  x[4].re = x[1].re + x[2].re;
409  x[4].im = x[1].im + x[2].im;
410 
411  y[4].re = y[1].re - y[2].re;
412  y[4].im = y[1].im - y[2].im;
413  x[1].re = z[0].re + x[1].re;
414  x[1].im = z[0].im + x[1].im;
415  y[1].re = y[0].re + y[1].re;
416  y[1].im = y[0].im + y[1].im;
417  x[2].re = z[0].re + x[2].re;
418  x[2].im = z[0].im + x[2].im;
419  y[2].re = y[2].re - y[0].re;
420  y[2].im = y[2].im - y[0].im;
421  x[4].re = z[0].re - x[4].re;
422  x[4].im = z[0].im - x[4].im;
423  y[4].re = y[0].re - y[4].re;
424  y[4].im = y[0].im - y[4].im;
425 
426  out[1*stride] = (FFTComplex){ x[1].re + y[1].im, x[1].im - y[1].re };
427  out[2*stride] = (FFTComplex){ x[2].re + y[2].im, x[2].im - y[2].re };
428  out[3*stride] = (FFTComplex){ x[3].re + y[3].im, x[3].im - y[3].re };
429  out[4*stride] = (FFTComplex){ x[4].re + y[4].im, x[4].im - y[4].re };
430  out[5*stride] = (FFTComplex){ x[4].re - y[4].im, x[4].im + y[4].re };
431  out[6*stride] = (FFTComplex){ x[3].re - y[3].im, x[3].im + y[3].re };
432  out[7*stride] = (FFTComplex){ x[2].re - y[2].im, x[2].im + y[2].re };
433  out[8*stride] = (FFTComplex){ x[1].re - y[1].im, x[1].im + y[1].re };
434 }
435 
437  ptrdiff_t stride)
438 {
439  FFTComplex tmp[15];
440 
441  for (int i = 0; i < 5; i++)
442  fft3(tmp + i, in + i*3, 5);
443 
444  fft5_m1(out, tmp + 0, stride);
445  fft5_m2(out, tmp + 5, stride);
446  fft5_m3(out, tmp + 10, stride);
447 }
448 
449 #define BUTTERFLIES(a0,a1,a2,a3) \
450  do { \
451  r0=a0.re; \
452  i0=a0.im; \
453  r1=a1.re; \
454  i1=a1.im; \
455  BF(t3, t5, t5, t1); \
456  BF(a2.re, a0.re, r0, t5); \
457  BF(a3.im, a1.im, i1, t3); \
458  BF(t4, t6, t2, t6); \
459  BF(a3.re, a1.re, r1, t4); \
460  BF(a2.im, a0.im, i0, t6); \
461  } while (0)
462 
463 #define TRANSFORM(a0,a1,a2,a3,wre,wim) \
464  do { \
465  CMUL(t1, t2, a2.re, a2.im, wre, -wim); \
466  CMUL(t5, t6, a3.re, a3.im, wre, wim); \
467  BUTTERFLIES(a0, a1, a2, a3); \
468  } while (0)
469 
470 /* z[0...8n-1], w[1...2n-1] */
471 static void split_radix_combine(FFTComplex *z, const FFTSample *cos, int n)
472 {
473  int o1 = 2*n;
474  int o2 = 4*n;
475  int o3 = 6*n;
476  const FFTSample *wim = cos + o1 - 7;
477  FFTSample t1, t2, t3, t4, t5, t6, r0, i0, r1, i1;
478 
479  for (int i = 0; i < n; i += 4) {
480  TRANSFORM(z[0], z[o1 + 0], z[o2 + 0], z[o3 + 0], cos[0], wim[7]);
481  TRANSFORM(z[2], z[o1 + 2], z[o2 + 2], z[o3 + 2], cos[2], wim[5]);
482  TRANSFORM(z[4], z[o1 + 4], z[o2 + 4], z[o3 + 4], cos[4], wim[3]);
483  TRANSFORM(z[6], z[o1 + 6], z[o2 + 6], z[o3 + 6], cos[6], wim[1]);
484 
485  TRANSFORM(z[1], z[o1 + 1], z[o2 + 1], z[o3 + 1], cos[1], wim[6]);
486  TRANSFORM(z[3], z[o1 + 3], z[o2 + 3], z[o3 + 3], cos[3], wim[4]);
487  TRANSFORM(z[5], z[o1 + 5], z[o2 + 5], z[o3 + 5], cos[5], wim[2]);
488  TRANSFORM(z[7], z[o1 + 7], z[o2 + 7], z[o3 + 7], cos[7], wim[0]);
489 
490  z += 2*4;
491  cos += 2*4;
492  wim -= 2*4;
493  }
494 }
495 
496 #define DECL_FFT(n, n2, n4) \
497 static void fft##n(FFTComplex *z) \
498 { \
499  fft##n2(z); \
500  fft##n4(z + n4*2); \
501  fft##n4(z + n4*3); \
502  split_radix_combine(z, TX_NAME(ff_cos_##n), n4/2); \
503 }
504 
505 static void fft2(FFTComplex *z)
506 {
507  FFTComplex tmp;
508  BF(tmp.re, z[0].re, z[0].re, z[1].re);
509  BF(tmp.im, z[0].im, z[0].im, z[1].im);
510  z[1] = tmp;
511 }
512 
513 static void fft4(FFTComplex *z)
514 {
515  FFTSample t1, t2, t3, t4, t5, t6, t7, t8;
516 
517  BF(t3, t1, z[0].re, z[1].re);
518  BF(t8, t6, z[3].re, z[2].re);
519  BF(z[2].re, z[0].re, t1, t6);
520  BF(t4, t2, z[0].im, z[1].im);
521  BF(t7, t5, z[2].im, z[3].im);
522  BF(z[3].im, z[1].im, t4, t8);
523  BF(z[3].re, z[1].re, t3, t7);
524  BF(z[2].im, z[0].im, t2, t5);
525 }
526 
527 static void fft8(FFTComplex *z)
528 {
529  FFTSample t1, t2, t3, t4, t5, t6, r0, i0, r1, i1;
530 
531  fft4(z);
532 
533  BF(t1, z[5].re, z[4].re, -z[5].re);
534  BF(t2, z[5].im, z[4].im, -z[5].im);
535  BF(t5, z[7].re, z[6].re, -z[7].re);
536  BF(t6, z[7].im, z[6].im, -z[7].im);
537 
538  BUTTERFLIES(z[0], z[2], z[4], z[6]);
539  TRANSFORM(z[1], z[3], z[5], z[7], RESCALE(M_SQRT1_2), RESCALE(M_SQRT1_2));
540 }
541 
542 static void fft16(FFTComplex *z)
543 {
544  FFTSample t1, t2, t3, t4, t5, t6, r0, i0, r1, i1;
545  FFTSample cos_16_1 = TX_NAME(ff_cos_16)[1];
546  FFTSample cos_16_2 = TX_NAME(ff_cos_16)[2];
547  FFTSample cos_16_3 = TX_NAME(ff_cos_16)[3];
548 
549  fft8(z + 0);
550  fft4(z + 8);
551  fft4(z + 12);
552 
553  t1 = z[ 8].re;
554  t2 = z[ 8].im;
555  t5 = z[12].re;
556  t6 = z[12].im;
557  BUTTERFLIES(z[0], z[4], z[8], z[12]);
558 
559  TRANSFORM(z[ 2], z[ 6], z[10], z[14], cos_16_2, cos_16_2);
560  TRANSFORM(z[ 1], z[ 5], z[ 9], z[13], cos_16_1, cos_16_3);
561  TRANSFORM(z[ 3], z[ 7], z[11], z[15], cos_16_3, cos_16_1);
562 }
563 
564 DECL_FFT(32,16,8)
565 DECL_FFT(64,32,16)
566 DECL_FFT(128,64,32)
567 DECL_FFT(256,128,64)
568 DECL_FFT(512,256,128)
569 DECL_FFT(1024,512,256)
570 DECL_FFT(2048,1024,512)
571 DECL_FFT(4096,2048,1024)
572 DECL_FFT(8192,4096,2048)
573 DECL_FFT(16384,8192,4096)
574 DECL_FFT(32768,16384,8192)
575 DECL_FFT(65536,32768,16384)
576 DECL_FFT(131072,65536,32768)
577 
578 static void (* const fft_dispatch[])(FFTComplex*) = {
579  NULL, fft2, fft4, fft8, fft16, fft32, fft64, fft128, fft256, fft512,
580  fft1024, fft2048, fft4096, fft8192, fft16384, fft32768, fft65536, fft131072
581 };
582 
583 #define DECL_COMP_FFT(N) \
584 static void compound_fft_##N##xM(AVTXContext *s, void *_out, \
585  void *_in, ptrdiff_t stride) \
586 { \
587  const int m = s->m, *in_map = s->pfatab, *out_map = in_map + N*m; \
588  FFTComplex *in = _in; \
589  FFTComplex *out = _out; \
590  FFTComplex fft##N##in[N]; \
591  void (*fftp)(FFTComplex *z) = fft_dispatch[av_log2(m)]; \
592  \
593  for (int i = 0; i < m; i++) { \
594  for (int j = 0; j < N; j++) \
595  fft##N##in[j] = in[in_map[i*N + j]]; \
596  fft##N(s->tmp + s->revtab_c[i], fft##N##in, m); \
597  } \
598  \
599  for (int i = 0; i < N; i++) \
600  fftp(s->tmp + m*i); \
601  \
602  for (int i = 0; i < N*m; i++) \
603  out[i] = s->tmp[out_map[i]]; \
604 }
605 
606 DECL_COMP_FFT(3)
607 DECL_COMP_FFT(5)
608 DECL_COMP_FFT(7)
609 DECL_COMP_FFT(9)
610 DECL_COMP_FFT(15)
611 
612 static void split_radix_fft(AVTXContext *s, void *_out, void *_in,
613  ptrdiff_t stride)
614 {
615  FFTComplex *in = _in;
616  FFTComplex *out = _out;
617  int m = s->m, mb = av_log2(m);
618 
619  if (s->flags & AV_TX_INPLACE) {
620  FFTComplex tmp;
621  int src, dst, *inplace_idx = s->inplace_idx;
622 
623  src = *inplace_idx++;
624 
625  do {
626  tmp = out[src];
627  dst = s->revtab_c[src];
628  do {
629  FFSWAP(FFTComplex, tmp, out[dst]);
630  dst = s->revtab_c[dst];
631  } while (dst != src); /* Can be > as well, but is less predictable */
632  out[dst] = tmp;
633  } while ((src = *inplace_idx++));
634  } else {
635  for (int i = 0; i < m; i++)
636  out[i] = in[s->revtab_c[i]];
637  }
638 
639  fft_dispatch[mb](out);
640 }
641 
642 static void naive_fft(AVTXContext *s, void *_out, void *_in,
643  ptrdiff_t stride)
644 {
645  FFTComplex *in = _in;
646  FFTComplex *out = _out;
647  const int n = s->n;
648  double phase = s->inv ? 2.0*M_PI/n : -2.0*M_PI/n;
649 
650  for(int i = 0; i < n; i++) {
651  FFTComplex tmp = { 0 };
652  for(int j = 0; j < n; j++) {
653  const double factor = phase*i*j;
654  const FFTComplex mult = {
655  RESCALE(cos(factor)),
656  RESCALE(sin(factor)),
657  };
658  FFTComplex res;
659  CMUL3(res, in[j], mult);
660  tmp.re += res.re;
661  tmp.im += res.im;
662  }
663  out[i] = tmp;
664  }
665 }
666 
667 #define DECL_COMP_IMDCT(N) \
668 static void compound_imdct_##N##xM(AVTXContext *s, void *_dst, void *_src, \
669  ptrdiff_t stride) \
670 { \
671  FFTComplex fft##N##in[N]; \
672  FFTComplex *z = _dst, *exp = s->exptab; \
673  const int m = s->m, len8 = N*m >> 1; \
674  const int *in_map = s->pfatab, *out_map = in_map + N*m; \
675  const FFTSample *src = _src, *in1, *in2; \
676  void (*fftp)(FFTComplex *) = fft_dispatch[av_log2(m)]; \
677  \
678  stride /= sizeof(*src); /* To convert it from bytes */ \
679  in1 = src; \
680  in2 = src + ((N*m*2) - 1) * stride; \
681  \
682  for (int i = 0; i < m; i++) { \
683  for (int j = 0; j < N; j++) { \
684  const int k = in_map[i*N + j]; \
685  FFTComplex tmp = { in2[-k*stride], in1[k*stride] }; \
686  CMUL3(fft##N##in[j], tmp, exp[k >> 1]); \
687  } \
688  fft##N(s->tmp + s->revtab_c[i], fft##N##in, m); \
689  } \
690  \
691  for (int i = 0; i < N; i++) \
692  fftp(s->tmp + m*i); \
693  \
694  for (int i = 0; i < len8; i++) { \
695  const int i0 = len8 + i, i1 = len8 - i - 1; \
696  const int s0 = out_map[i0], s1 = out_map[i1]; \
697  FFTComplex src1 = { s->tmp[s1].im, s->tmp[s1].re }; \
698  FFTComplex src0 = { s->tmp[s0].im, s->tmp[s0].re }; \
699  \
700  CMUL(z[i1].re, z[i0].im, src1.re, src1.im, exp[i1].im, exp[i1].re); \
701  CMUL(z[i0].re, z[i1].im, src0.re, src0.im, exp[i0].im, exp[i0].re); \
702  } \
703 }
704 
709 DECL_COMP_IMDCT(15)
710 
711 #define DECL_COMP_MDCT(N) \
712 static void compound_mdct_##N##xM(AVTXContext *s, void *_dst, void *_src, \
713  ptrdiff_t stride) \
714 { \
715  FFTSample *src = _src, *dst = _dst; \
716  FFTComplex *exp = s->exptab, tmp, fft##N##in[N]; \
717  const int m = s->m, len4 = N*m, len3 = len4 * 3, len8 = len4 >> 1; \
718  const int *in_map = s->pfatab, *out_map = in_map + N*m; \
719  void (*fftp)(FFTComplex *) = fft_dispatch[av_log2(m)]; \
720  \
721  stride /= sizeof(*dst); \
722  \
723  for (int i = 0; i < m; i++) { /* Folding and pre-reindexing */ \
724  for (int j = 0; j < N; j++) { \
725  const int k = in_map[i*N + j]; \
726  if (k < len4) { \
727  tmp.re = FOLD(-src[ len4 + k], src[1*len4 - 1 - k]); \
728  tmp.im = FOLD(-src[ len3 + k], -src[1*len3 - 1 - k]); \
729  } else { \
730  tmp.re = FOLD(-src[ len4 + k], -src[5*len4 - 1 - k]); \
731  tmp.im = FOLD( src[-len4 + k], -src[1*len3 - 1 - k]); \
732  } \
733  CMUL(fft##N##in[j].im, fft##N##in[j].re, tmp.re, tmp.im, \
734  exp[k >> 1].re, exp[k >> 1].im); \
735  } \
736  fft##N(s->tmp + s->revtab_c[i], fft##N##in, m); \
737  } \
738  \
739  for (int i = 0; i < N; i++) \
740  fftp(s->tmp + m*i); \
741  \
742  for (int i = 0; i < len8; i++) { \
743  const int i0 = len8 + i, i1 = len8 - i - 1; \
744  const int s0 = out_map[i0], s1 = out_map[i1]; \
745  FFTComplex src1 = { s->tmp[s1].re, s->tmp[s1].im }; \
746  FFTComplex src0 = { s->tmp[s0].re, s->tmp[s0].im }; \
747  \
748  CMUL(dst[2*i1*stride + stride], dst[2*i0*stride], src0.re, src0.im, \
749  exp[i0].im, exp[i0].re); \
750  CMUL(dst[2*i0*stride + stride], dst[2*i1*stride], src1.re, src1.im, \
751  exp[i1].im, exp[i1].re); \
752  } \
753 }
754 
759 DECL_COMP_MDCT(15)
760 
761 static void monolithic_imdct(AVTXContext *s, void *_dst, void *_src,
762  ptrdiff_t stride)
763 {
764  FFTComplex *z = _dst, *exp = s->exptab;
765  const int m = s->m, len8 = m >> 1;
766  const FFTSample *src = _src, *in1, *in2;
767  void (*fftp)(FFTComplex *) = fft_dispatch[av_log2(m)];
768 
769  stride /= sizeof(*src);
770  in1 = src;
771  in2 = src + ((m*2) - 1) * stride;
772 
773  for (int i = 0; i < m; i++) {
774  FFTComplex tmp = { in2[-2*i*stride], in1[2*i*stride] };
775  CMUL3(z[s->revtab_c[i]], tmp, exp[i]);
776  }
777 
778  fftp(z);
779 
780  for (int i = 0; i < len8; i++) {
781  const int i0 = len8 + i, i1 = len8 - i - 1;
782  FFTComplex src1 = { z[i1].im, z[i1].re };
783  FFTComplex src0 = { z[i0].im, z[i0].re };
784 
785  CMUL(z[i1].re, z[i0].im, src1.re, src1.im, exp[i1].im, exp[i1].re);
786  CMUL(z[i0].re, z[i1].im, src0.re, src0.im, exp[i0].im, exp[i0].re);
787  }
788 }
789 
790 static void monolithic_mdct(AVTXContext *s, void *_dst, void *_src,
791  ptrdiff_t stride)
792 {
793  FFTSample *src = _src, *dst = _dst;
794  FFTComplex *exp = s->exptab, tmp, *z = _dst;
795  const int m = s->m, len4 = m, len3 = len4 * 3, len8 = len4 >> 1;
796  void (*fftp)(FFTComplex *) = fft_dispatch[av_log2(m)];
797 
798  stride /= sizeof(*dst);
799 
800  for (int i = 0; i < m; i++) { /* Folding and pre-reindexing */
801  const int k = 2*i;
802  if (k < len4) {
803  tmp.re = FOLD(-src[ len4 + k], src[1*len4 - 1 - k]);
804  tmp.im = FOLD(-src[ len3 + k], -src[1*len3 - 1 - k]);
805  } else {
806  tmp.re = FOLD(-src[ len4 + k], -src[5*len4 - 1 - k]);
807  tmp.im = FOLD( src[-len4 + k], -src[1*len3 - 1 - k]);
808  }
809  CMUL(z[s->revtab_c[i]].im, z[s->revtab_c[i]].re, tmp.re, tmp.im,
810  exp[i].re, exp[i].im);
811  }
812 
813  fftp(z);
814 
815  for (int i = 0; i < len8; i++) {
816  const int i0 = len8 + i, i1 = len8 - i - 1;
817  FFTComplex src1 = { z[i1].re, z[i1].im };
818  FFTComplex src0 = { z[i0].re, z[i0].im };
819 
820  CMUL(dst[2*i1*stride + stride], dst[2*i0*stride], src0.re, src0.im,
821  exp[i0].im, exp[i0].re);
822  CMUL(dst[2*i0*stride + stride], dst[2*i1*stride], src1.re, src1.im,
823  exp[i1].im, exp[i1].re);
824  }
825 }
826 
827 static void naive_imdct(AVTXContext *s, void *_dst, void *_src,
828  ptrdiff_t stride)
829 {
830  int len = s->n;
831  int len2 = len*2;
832  FFTSample *src = _src;
833  FFTSample *dst = _dst;
834  double scale = s->scale;
835  const double phase = M_PI/(4.0*len2);
836 
837  stride /= sizeof(*src);
838 
839  for (int i = 0; i < len; i++) {
840  double sum_d = 0.0;
841  double sum_u = 0.0;
842  double i_d = phase * (4*len - 2*i - 1);
843  double i_u = phase * (3*len2 + 2*i + 1);
844  for (int j = 0; j < len2; j++) {
845  double a = (2 * j + 1);
846  double a_d = cos(a * i_d);
847  double a_u = cos(a * i_u);
848  double val = UNSCALE(src[j*stride]);
849  sum_d += a_d * val;
850  sum_u += a_u * val;
851  }
852  dst[i + 0] = RESCALE( sum_d*scale);
853  dst[i + len] = RESCALE(-sum_u*scale);
854  }
855 }
856 
857 static void naive_mdct(AVTXContext *s, void *_dst, void *_src,
858  ptrdiff_t stride)
859 {
860  int len = s->n*2;
861  FFTSample *src = _src;
862  FFTSample *dst = _dst;
863  double scale = s->scale;
864  const double phase = M_PI/(4.0*len);
865 
866  stride /= sizeof(*dst);
867 
868  for (int i = 0; i < len; i++) {
869  double sum = 0.0;
870  for (int j = 0; j < len*2; j++) {
871  int a = (2*j + 1 + len) * (2*i + 1);
872  sum += UNSCALE(src[j]) * cos(a * phase);
873  }
874  dst[i*stride] = RESCALE(sum*scale);
875  }
876 }
877 
878 static void full_imdct_wrapper_fn(AVTXContext *s, void *_dst, void *_src,
879  ptrdiff_t stride)
880 {
881  int len = s->m*s->n*4;
882  int len2 = len >> 1;
883  int len4 = len >> 2;
884  FFTSample *dst = _dst;
885 
886  s->top_tx(s, dst + len4, _src, stride);
887 
888  stride /= sizeof(*dst);
889 
890  for (int i = 0; i < len4; i++) {
891  dst[ i*stride] = -dst[(len2 - i - 1)*stride];
892  dst[(len - i - 1)*stride] = dst[(len2 + i + 0)*stride];
893  }
894 }
895 
896 static int gen_mdct_exptab(AVTXContext *s, int len4, double scale)
897 {
898  const double theta = (scale < 0 ? len4 : 0) + 1.0/8.0;
899 
900  if (!(s->exptab = av_malloc_array(len4, sizeof(*s->exptab))))
901  return AVERROR(ENOMEM);
902 
903  scale = sqrt(fabs(scale));
904  for (int i = 0; i < len4; i++) {
905  const double alpha = M_PI_2 * (i + theta) / len4;
906  s->exptab[i].re = RESCALE(cos(alpha) * scale);
907  s->exptab[i].im = RESCALE(sin(alpha) * scale);
908  }
909 
910  return 0;
911 }
912 
914  enum AVTXType type, int inv, int len,
915  const void *scale, uint64_t flags)
916 {
917  const int is_mdct = ff_tx_type_is_mdct(type);
918  int err, l, n = 1, m = 1, max_ptwo = 1 << (FF_ARRAY_ELEMS(fft_dispatch) - 1);
919 
920  if (is_mdct)
921  len >>= 1;
922 
923  l = len;
924 
925 #define CHECK_FACTOR(DST, FACTOR, SRC) \
926  if (DST == 1 && !(SRC % FACTOR)) { \
927  DST = FACTOR; \
928  SRC /= FACTOR; \
929  }
930  CHECK_FACTOR(n, 15, len)
931  CHECK_FACTOR(n, 9, len)
932  CHECK_FACTOR(n, 7, len)
933  CHECK_FACTOR(n, 5, len)
934  CHECK_FACTOR(n, 3, len)
935 #undef CHECK_FACTOR
936 
937  /* len must be a power of two now */
938  if (!(len & (len - 1)) && len >= 2 && len <= max_ptwo) {
939  m = len;
940  len = 1;
941  }
942 
943  s->n = n;
944  s->m = m;
945  s->inv = inv;
946  s->type = type;
947  s->flags = flags;
948 
949  /* If we weren't able to split the length into factors we can handle,
950  * resort to using the naive and slow FT. This also filters out
951  * direct 3, 5 and 15 transforms as they're too niche. */
952  if (len > 1 || m == 1) {
953  if (is_mdct && (l & 1)) /* Odd (i)MDCTs are not supported yet */
954  return AVERROR(ENOSYS);
955  if (flags & AV_TX_INPLACE) /* Neither are in-place naive transforms */
956  return AVERROR(ENOSYS);
957  s->n = l;
958  s->m = 1;
959  *tx = naive_fft;
960  if (is_mdct) {
961  s->scale = *((SCALE_TYPE *)scale);
962  *tx = inv ? naive_imdct : naive_mdct;
963  if (inv && (flags & AV_TX_FULL_IMDCT)) {
964  s->top_tx = *tx;
965  *tx = full_imdct_wrapper_fn;
966  }
967  }
968  return 0;
969  }
970 
971  if (n > 1 && m > 1) { /* 2D transform case */
972  if ((err = ff_tx_gen_compound_mapping(s)))
973  return err;
974  if (!(s->tmp = av_malloc(n*m*sizeof(*s->tmp))))
975  return AVERROR(ENOMEM);
976  if (!(m & (m - 1))) {
977  *tx = n == 3 ? compound_fft_3xM :
978  n == 5 ? compound_fft_5xM :
979  n == 7 ? compound_fft_7xM :
980  n == 9 ? compound_fft_9xM :
981  compound_fft_15xM;
982  if (is_mdct)
983  *tx = n == 3 ? inv ? compound_imdct_3xM : compound_mdct_3xM :
984  n == 5 ? inv ? compound_imdct_5xM : compound_mdct_5xM :
985  n == 7 ? inv ? compound_imdct_7xM : compound_mdct_7xM :
986  n == 9 ? inv ? compound_imdct_9xM : compound_mdct_9xM :
987  inv ? compound_imdct_15xM : compound_mdct_15xM;
988  }
989  } else { /* Direct transform case */
990  *tx = split_radix_fft;
991  if (is_mdct)
992  *tx = inv ? monolithic_imdct : monolithic_mdct;
993  }
994 
995  if (n == 3 || n == 5 || n == 15)
996  init_cos_tabs(0);
997  else if (n == 7)
998  init_cos_tabs(1);
999  else if (n == 9)
1000  init_cos_tabs(2);
1001 
1002  if (m != 1 && !(m & (m - 1))) {
1003  if ((err = ff_tx_gen_ptwo_revtab(s, n == 1 && !is_mdct && !(flags & AV_TX_INPLACE))))
1004  return err;
1005  if (flags & AV_TX_INPLACE) {
1006  if (is_mdct) /* In-place MDCTs are not supported yet */
1007  return AVERROR(ENOSYS);
1008  if ((err = ff_tx_gen_ptwo_inplace_revtab_idx(s, s->revtab_c)))
1009  return err;
1010  }
1011  for (int i = 4; i <= av_log2(m); i++)
1012  init_cos_tabs(i);
1013  }
1014 
1015  if (is_mdct) {
1016  if (inv && (flags & AV_TX_FULL_IMDCT)) {
1017  s->top_tx = *tx;
1018  *tx = full_imdct_wrapper_fn;
1019  }
1020  return gen_mdct_exptab(s, n*m, *((SCALE_TYPE *)scale));
1021  }
1022 
1023  return 0;
1024 }
func
int(* func)(AVBPrint *dst, const char *in, const char *arg)
Definition: jacosubdec.c:68
stride
int stride
Definition: mace.c:144
monolithic_imdct
static void monolithic_imdct(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
Definition: tx_template.c:761
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
fft16
static void fft16(FFTComplex *z)
Definition: tx_template.c:542
out
FILE * out
Definition: movenc.c:54
split_radix_fft
static void split_radix_fft(AVTXContext *s, void *_out, void *_in, ptrdiff_t stride)
Definition: tx_template.c:612
TRANSFORM
#define TRANSFORM(a0, a1, a2, a3, wre, wim)
Definition: tx_template.c:463
AVTXContext
Definition: tx_priv.h:110
fft2
static void fft2(FFTComplex *z)
Definition: tx_template.c:505
im
float im
Definition: fft.c:78
tmp
static uint8_t tmp[11]
Definition: aes_ctr.c:26
index
fg index
Definition: ffmpeg_filter.c:167
w
uint8_t w
Definition: llviddspenc.c:38
M_PI_2
#define M_PI_2
Definition: mathematics.h:55
CMUL3
#define CMUL3(c, a, b)
Definition: mdct15.c:41
t1
#define t1
Definition: regdef.h:29
fft5
static void fft5(FFTComplex *out, FFTComplex *in, FFTComplex exptab[2])
Definition: mdct15.c:92
av_malloc
#define av_malloc(s)
Definition: tableprint_vlc.h:31
fft4
static void fft4(FFTComplex *z)
Definition: tx_template.c:513
DECL_FFT5
#define DECL_FFT5(NAME, D0, D1, D2, D3, D4)
Definition: tx_template.c:186
cos_tabs_init_once
static CosTabsInitOnce cos_tabs_init_once[]
Definition: tx_template.c:124
ff_init_9_tabs
static av_cold void ff_init_9_tabs(void)
Definition: tx_template.c:115
tab
static const struct twinvq_data tab
Definition: twinvq_data.h:10345
sum_d
static void sum_d(const int *input, int *output, int len)
Definition: dcadct.c:51
val
static double val(void *priv, double ch)
Definition: aeval.c:76
type
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf type
Definition: writing_filters.txt:86
scale
static av_always_inline float scale(float x, float s)
Definition: vf_v360.c:1388
mult
static int16_t mult(Float11 *f1, Float11 *f2)
Definition: g726.c:56
ff_thread_once
static int ff_thread_once(char *control, void(*routine)(void))
Definition: thread.h:175
FF_ARRAY_ELEMS
#define FF_ARRAY_ELEMS(a)
Definition: sinewin_tablegen.c:29
av_cold
#define av_cold
Definition: attributes.h:90
av_tx_fn
void(* av_tx_fn)(AVTXContext *s, void *out, void *in, ptrdiff_t stride)
Function pointer to a function to perform the transform.
Definition: tx.h:102
fft8
static void fft8(FFTComplex *z)
Definition: tx_template.c:527
s
#define s(width, name)
Definition: cbs_vp9.c:257
ff_tx_gen_compound_mapping
int ff_tx_gen_compound_mapping(AVTXContext *s)
Definition: tx.c:45
t7
#define t7
Definition: regdef.h:35
gen_mdct_exptab
static int gen_mdct_exptab(AVTXContext *s, int len4, double scale)
Definition: tx_template.c:896
ff_init_53_tabs
static av_cold void ff_init_53_tabs(void)
Definition: tx_template.c:100
AV_TX_FULL_IMDCT
@ AV_TX_FULL_IMDCT
Performs a full inverse MDCT rather than leaving out samples that can be derived through symmetry.
Definition: tx.h:127
AV_ONCE_INIT
#define AV_ONCE_INIT
Definition: thread.h:173
fabs
static __device__ float fabs(float a)
Definition: cuda_runtime.h:182
monolithic_mdct
static void monolithic_mdct(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
Definition: tx_template.c:790
NULL
#define NULL
Definition: coverity.c:32
t5
#define t5
Definition: regdef.h:33
ff_tx_gen_ptwo_inplace_revtab_idx
int ff_tx_gen_ptwo_inplace_revtab_idx(AVTXContext *s, int *revtab)
Definition: tx.c:127
t6
#define t6
Definition: regdef.h:34
AV_TX_INPLACE
@ AV_TX_INPLACE
Performs an in-place transformation on the input.
Definition: tx.h:113
src
#define src
Definition: vp8dsp.c:255
FFTSample
float FFTSample
Definition: avfft.h:35
exp
int8_t exp
Definition: eval.c:72
DECL_COMP_MDCT
#define DECL_COMP_MDCT(N)
Definition: tx_template.c:711
FFTComplex
void FFTComplex
Definition: tx_priv.h:43
fft9
static av_always_inline void fft9(FFTComplex *out, FFTComplex *in, ptrdiff_t stride)
Definition: tx_template.c:312
AVTXType
AVTXType
Definition: tx.h:39
CosTabsInitOnce
Definition: fft_template.c:69
CHECK_FACTOR
#define CHECK_FACTOR(DST, FACTOR, SRC)
FFTComplex::im
FFTSample im
Definition: avfft.h:38
FFTComplex::re
FFTSample re
Definition: avfft.h:38
t8
#define t8
Definition: regdef.h:53
BF
#define BF(a, b, c, s)
Definition: dct32_template.c:90
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
ff_tx_type_is_mdct
int ff_tx_type_is_mdct(enum AVTXType type)
Definition: tx.c:21
ff_tx_init_mdct_fft
int TX_NAME() ff_tx_init_mdct_fft(AVTXContext *s, av_tx_fn *tx, enum AVTXType type, int inv, int len, const void *scale, uint64_t flags)
Definition: tx_template.c:913
mb
#define mb
Definition: vf_colormatrix.c:101
M_PI
#define M_PI
Definition: mathematics.h:52
src0
#define src0
Definition: h264pred.c:139
DECLARE_ALIGNED
#define DECLARE_ALIGNED(n, t, v)
Definition: mem.h:116
DECL_FFT
#define DECL_FFT(n, n2, n4)
Definition: tx_template.c:496
src1
#define src1
Definition: h264pred.c:140
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:271
t4
#define t4
Definition: regdef.h:32
t3
#define t3
Definition: regdef.h:31
av_malloc_array
#define av_malloc_array(a, b)
Definition: tableprint_vlc.h:32
fft7
static av_always_inline void fft7(FFTComplex *out, FFTComplex *in, ptrdiff_t stride)
Definition: tx_template.c:225
ff_tx_gen_ptwo_revtab
int ff_tx_gen_ptwo_revtab(AVTXContext *s, int invert_lookup)
Definition: tx.c:106
split_radix_combine
static void split_radix_combine(FFTComplex *z, const FFTSample *cos, int n)
Definition: tx_template.c:471
av_always_inline
#define av_always_inline
Definition: attributes.h:49
DECL_COMP_IMDCT
#define DECL_COMP_IMDCT(N)
Definition: tx_template.c:667
COSTABLE
COSTABLE(16)
len
int len
Definition: vorbis_enc_data.h:426
init_cos_tabs
static av_cold void init_cos_tabs(int index)
Definition: tx_template.c:145
fft15
static av_always_inline void fft15(FFTComplex *out, FFTComplex *in, ptrdiff_t stride)
Definition: tx_template.c:436
full_imdct_wrapper_fn
static void full_imdct_wrapper_fn(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
Definition: tx_template.c:878
fft_dispatch
static void(*const fft_dispatch[])(FFTComplex *)
Definition: tx_template.c:578
FFSWAP
#define FFSWAP(type, a, b)
Definition: macros.h:52
ff_init_7_tabs
static av_cold void ff_init_7_tabs(void)
Definition: tx_template.c:108
M_SQRT1_2
#define M_SQRT1_2
Definition: mathematics.h:58
INIT_FF_COS_TABS_FUNC
#define INIT_FF_COS_TABS_FUNC(index, size)
Definition: tx_template.c:79
t2
#define t2
Definition: regdef.h:30
cos_tabs
static FFTSample *const cos_tabs[18]
Definition: tx_template.c:46
BUTTERFLIES
#define BUTTERFLIES(a0, a1, a2, a3)
Definition: tx_template.c:449
factor
static const int factor[16]
Definition: vf_pp7.c:76
TX_NAME
FFTComplex TX_NAME(ff_cos_53)[4]
init_cos_tabs_idx
static av_always_inline void init_cos_tabs_idx(int index)
Definition: tx_template.c:67
alpha
static const int16_t alpha[]
Definition: ilbcdata.h:55
naive_fft
static void naive_fft(AVTXContext *s, void *_out, void *_in, ptrdiff_t stride)
Definition: tx_template.c:642
naive_mdct
static void naive_mdct(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
Definition: tx_template.c:857
int32_t
int32_t
Definition: audioconvert.c:56
flags
#define flags(name, subs,...)
Definition: cbs_av1.c:561
DECL_COMP_FFT
#define DECL_COMP_FFT(N)
Definition: tx_template.c:583
naive_imdct
static void naive_imdct(AVTXContext *s, void *_dst, void *_src, ptrdiff_t stride)
Definition: tx_template.c:827
av_log2
int av_log2(unsigned v)
Definition: intmath.c:26
CMUL
#define CMUL(dre, dim, are, aim, bre, bim)
Definition: fft-internal.h:42
fft3
static av_always_inline void fft3(FFTComplex *out, FFTComplex *in, ptrdiff_t stride)
Definition: tx_template.c:151
FFTComplex
Definition: avfft.h:37
re
float re
Definition: fft.c:78