FFmpeg
fdctdsp.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2003 James Klicman <james@klicman.org>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "config.h"
22 
23 #include "libavutil/attributes.h"
24 #include "libavutil/cpu.h"
25 #include "libavutil/ppc/cpu.h"
27 
28 #include "libavcodec/fdctdsp.h"
29 
30 #include "fdct.h"
31 
32 #if HAVE_ALTIVEC
33 
34 #define vs16(v) ((vector signed short) (v))
35 #define vs32(v) ((vector signed int) (v))
36 #define vu8(v) ((vector unsigned char) (v))
37 #define vu16(v) ((vector unsigned short) (v))
38 #define vu32(v) ((vector unsigned int) (v))
39 
40 #define C1 0.98078528040323044912618224 /* cos(1 * PI / 16) */
41 #define C2 0.92387953251128675612818319 /* cos(2 * PI / 16) */
42 #define C3 0.83146961230254523707878838 /* cos(3 * PI / 16) */
43 #define C4 0.70710678118654752440084436 /* cos(4 * PI / 16) */
44 #define C5 0.55557023301960222474283081 /* cos(5 * PI / 16) */
45 #define C6 0.38268343236508977172845998 /* cos(6 * PI / 16) */
46 #define C7 0.19509032201612826784828487 /* cos(7 * PI / 16) */
47 
48 #define W0 -(2 * C2)
49 #define W1 (2 * C6)
50 #define W2 (M_SQRT2 * C6)
51 #define W3 (M_SQRT2 * C3)
52 #define W4 (M_SQRT2 * (-C1 + C3 + C5 - C7))
53 #define W5 (M_SQRT2 * (C1 + C3 - C5 + C7))
54 #define W6 (M_SQRT2 * (C1 + C3 + C5 - C7))
55 #define W7 (M_SQRT2 * (C1 + C3 - C5 - C7))
56 #define W8 (M_SQRT2 * (C7 - C3))
57 #define W9 (M_SQRT2 * (-C1 - C3))
58 #define WA (M_SQRT2 * (-C3 - C5))
59 #define WB (M_SQRT2 * (C5 - C3))
60 
61 static const vector float fdctconsts[3] = {
62  { W0, W1, W2, W3 },
63  { W4, W5, W6, W7 },
64  { W8, W9, WA, WB }
65 };
66 
67 #define LD_W0 vec_splat(cnsts0, 0)
68 #define LD_W1 vec_splat(cnsts0, 1)
69 #define LD_W2 vec_splat(cnsts0, 2)
70 #define LD_W3 vec_splat(cnsts0, 3)
71 #define LD_W4 vec_splat(cnsts1, 0)
72 #define LD_W5 vec_splat(cnsts1, 1)
73 #define LD_W6 vec_splat(cnsts1, 2)
74 #define LD_W7 vec_splat(cnsts1, 3)
75 #define LD_W8 vec_splat(cnsts2, 0)
76 #define LD_W9 vec_splat(cnsts2, 1)
77 #define LD_WA vec_splat(cnsts2, 2)
78 #define LD_WB vec_splat(cnsts2, 3)
79 
80 #define FDCTROW(b0, b1, b2, b3, b4, b5, b6, b7) /* {{{ */ \
81  x0 = vec_add(b0, b7); /* x0 = b0 + b7; */ \
82  x7 = vec_sub(b0, b7); /* x7 = b0 - b7; */ \
83  x1 = vec_add(b1, b6); /* x1 = b1 + b6; */ \
84  x6 = vec_sub(b1, b6); /* x6 = b1 - b6; */ \
85  x2 = vec_add(b2, b5); /* x2 = b2 + b5; */ \
86  x5 = vec_sub(b2, b5); /* x5 = b2 - b5; */ \
87  x3 = vec_add(b3, b4); /* x3 = b3 + b4; */ \
88  x4 = vec_sub(b3, b4); /* x4 = b3 - b4; */ \
89  \
90  b7 = vec_add(x0, x3); /* b7 = x0 + x3; */ \
91  b1 = vec_add(x1, x2); /* b1 = x1 + x2; */ \
92  b0 = vec_add(b7, b1); /* b0 = b7 + b1; */ \
93  b4 = vec_sub(b7, b1); /* b4 = b7 - b1; */ \
94  \
95  b2 = vec_sub(x0, x3); /* b2 = x0 - x3; */ \
96  b6 = vec_sub(x1, x2); /* b6 = x1 - x2; */ \
97  b5 = vec_add(b6, b2); /* b5 = b6 + b2; */ \
98  cnst = LD_W2; \
99  b5 = vec_madd(cnst, b5, mzero); /* b5 = b5 * W2; */ \
100  cnst = LD_W1; \
101  b2 = vec_madd(cnst, b2, b5); /* b2 = b5 + b2 * W1; */ \
102  cnst = LD_W0; \
103  b6 = vec_madd(cnst, b6, b5); /* b6 = b5 + b6 * W0; */ \
104  \
105  x0 = vec_add(x4, x7); /* x0 = x4 + x7; */ \
106  x1 = vec_add(x5, x6); /* x1 = x5 + x6; */ \
107  x2 = vec_add(x4, x6); /* x2 = x4 + x6; */ \
108  x3 = vec_add(x5, x7); /* x3 = x5 + x7; */ \
109  x8 = vec_add(x2, x3); /* x8 = x2 + x3; */ \
110  cnst = LD_W3; \
111  x8 = vec_madd(cnst, x8, mzero); /* x8 = x8 * W3; */ \
112  \
113  cnst = LD_W8; \
114  x0 = vec_madd(cnst, x0, mzero); /* x0 *= W8; */ \
115  cnst = LD_W9; \
116  x1 = vec_madd(cnst, x1, mzero); /* x1 *= W9; */ \
117  cnst = LD_WA; \
118  x2 = vec_madd(cnst, x2, x8); /* x2 = x2 * WA + x8; */ \
119  cnst = LD_WB; \
120  x3 = vec_madd(cnst, x3, x8); /* x3 = x3 * WB + x8; */ \
121  \
122  cnst = LD_W4; \
123  b7 = vec_madd(cnst, x4, x0); /* b7 = x4 * W4 + x0; */ \
124  cnst = LD_W5; \
125  b5 = vec_madd(cnst, x5, x1); /* b5 = x5 * W5 + x1; */ \
126  cnst = LD_W6; \
127  b3 = vec_madd(cnst, x6, x1); /* b3 = x6 * W6 + x1; */ \
128  cnst = LD_W7; \
129  b1 = vec_madd(cnst, x7, x0); /* b1 = x7 * W7 + x0; */ \
130  \
131  b7 = vec_add(b7, x2); /* b7 = b7 + x2; */ \
132  b5 = vec_add(b5, x3); /* b5 = b5 + x3; */ \
133  b3 = vec_add(b3, x2); /* b3 = b3 + x2; */ \
134  b1 = vec_add(b1, x3) /* b1 = b1 + x3; */ \
135  /* }}} */
136 
137 #define FDCTCOL(b0, b1, b2, b3, b4, b5, b6, b7) /* {{{ */ \
138  x0 = vec_add(b0, b7); /* x0 = b0 + b7; */ \
139  x7 = vec_sub(b0, b7); /* x7 = b0 - b7; */ \
140  x1 = vec_add(b1, b6); /* x1 = b1 + b6; */ \
141  x6 = vec_sub(b1, b6); /* x6 = b1 - b6; */ \
142  x2 = vec_add(b2, b5); /* x2 = b2 + b5; */ \
143  x5 = vec_sub(b2, b5); /* x5 = b2 - b5; */ \
144  x3 = vec_add(b3, b4); /* x3 = b3 + b4; */ \
145  x4 = vec_sub(b3, b4); /* x4 = b3 - b4; */ \
146  \
147  b7 = vec_add(x0, x3); /* b7 = x0 + x3; */ \
148  b1 = vec_add(x1, x2); /* b1 = x1 + x2; */ \
149  b0 = vec_add(b7, b1); /* b0 = b7 + b1; */ \
150  b4 = vec_sub(b7, b1); /* b4 = b7 - b1; */ \
151  \
152  b2 = vec_sub(x0, x3); /* b2 = x0 - x3; */ \
153  b6 = vec_sub(x1, x2); /* b6 = x1 - x2; */ \
154  b5 = vec_add(b6, b2); /* b5 = b6 + b2; */ \
155  cnst = LD_W2; \
156  b5 = vec_madd(cnst, b5, mzero); /* b5 = b5 * W2; */ \
157  cnst = LD_W1; \
158  b2 = vec_madd(cnst, b2, b5); /* b2 = b5 + b2 * W1; */ \
159  cnst = LD_W0; \
160  b6 = vec_madd(cnst, b6, b5); /* b6 = b5 + b6 * W0; */ \
161  \
162  x0 = vec_add(x4, x7); /* x0 = x4 + x7; */ \
163  x1 = vec_add(x5, x6); /* x1 = x5 + x6; */ \
164  x2 = vec_add(x4, x6); /* x2 = x4 + x6; */ \
165  x3 = vec_add(x5, x7); /* x3 = x5 + x7; */ \
166  x8 = vec_add(x2, x3); /* x8 = x2 + x3; */ \
167  cnst = LD_W3; \
168  x8 = vec_madd(cnst, x8, mzero); /* x8 = x8 * W3; */ \
169  \
170  cnst = LD_W8; \
171  x0 = vec_madd(cnst, x0, mzero); /* x0 *= W8; */ \
172  cnst = LD_W9; \
173  x1 = vec_madd(cnst, x1, mzero); /* x1 *= W9; */ \
174  cnst = LD_WA; \
175  x2 = vec_madd(cnst, x2, x8); /* x2 = x2 * WA + x8; */ \
176  cnst = LD_WB; \
177  x3 = vec_madd(cnst, x3, x8); /* x3 = x3 * WB + x8; */ \
178  \
179  cnst = LD_W4; \
180  b7 = vec_madd(cnst, x4, x0); /* b7 = x4 * W4 + x0; */ \
181  cnst = LD_W5; \
182  b5 = vec_madd(cnst, x5, x1); /* b5 = x5 * W5 + x1; */ \
183  cnst = LD_W6; \
184  b3 = vec_madd(cnst, x6, x1); /* b3 = x6 * W6 + x1; */ \
185  cnst = LD_W7; \
186  b1 = vec_madd(cnst, x7, x0); /* b1 = x7 * W7 + x0; */ \
187  \
188  b7 = vec_add(b7, x2); /* b7 += x2; */ \
189  b5 = vec_add(b5, x3); /* b5 += x3; */ \
190  b3 = vec_add(b3, x2); /* b3 += x2; */ \
191  b1 = vec_add(b1, x3) /* b1 += x3; */ \
192  /* }}} */
193 
194 /* two dimensional discrete cosine transform */
195 void ff_fdct_altivec(int16_t *block)
196 {
197  vector signed short *bp;
198  const vector float *cp = fdctconsts;
199  vector float b00, b10, b20, b30, b40, b50, b60, b70;
200  vector float b01, b11, b21, b31, b41, b51, b61, b71;
201  vector float mzero, cnst, cnsts0, cnsts1, cnsts2;
202  vector float x0, x1, x2, x3, x4, x5, x6, x7, x8;
203 
204  /* setup constants {{{ */
205  /* mzero = -0.0 */
206  mzero = ((vector float) vec_splat_u32(-1));
207  mzero = ((vector float) vec_sl(vu32(mzero), vu32(mzero)));
208  cnsts0 = vec_ld(0, cp);
209  cp++;
210  cnsts1 = vec_ld(0, cp);
211  cp++;
212  cnsts2 = vec_ld(0, cp);
213  /* }}} */
214 
215  /* 8x8 matrix transpose (vector short[8]) {{{ */
216 #define MERGE_S16(hl, a, b) vec_merge ## hl(vs16(a), vs16(b))
217 
218  bp = (vector signed short *) block;
219  b00 = ((vector float) vec_ld(0, bp));
220  b40 = ((vector float) vec_ld(16 * 4, bp));
221  b01 = ((vector float) MERGE_S16(h, b00, b40));
222  b11 = ((vector float) MERGE_S16(l, b00, b40));
223  bp++;
224  b10 = ((vector float) vec_ld(0, bp));
225  b50 = ((vector float) vec_ld(16 * 4, bp));
226  b21 = ((vector float) MERGE_S16(h, b10, b50));
227  b31 = ((vector float) MERGE_S16(l, b10, b50));
228  bp++;
229  b20 = ((vector float) vec_ld(0, bp));
230  b60 = ((vector float) vec_ld(16 * 4, bp));
231  b41 = ((vector float) MERGE_S16(h, b20, b60));
232  b51 = ((vector float) MERGE_S16(l, b20, b60));
233  bp++;
234  b30 = ((vector float) vec_ld(0, bp));
235  b70 = ((vector float) vec_ld(16 * 4, bp));
236  b61 = ((vector float) MERGE_S16(h, b30, b70));
237  b71 = ((vector float) MERGE_S16(l, b30, b70));
238 
239  x0 = ((vector float) MERGE_S16(h, b01, b41));
240  x1 = ((vector float) MERGE_S16(l, b01, b41));
241  x2 = ((vector float) MERGE_S16(h, b11, b51));
242  x3 = ((vector float) MERGE_S16(l, b11, b51));
243  x4 = ((vector float) MERGE_S16(h, b21, b61));
244  x5 = ((vector float) MERGE_S16(l, b21, b61));
245  x6 = ((vector float) MERGE_S16(h, b31, b71));
246  x7 = ((vector float) MERGE_S16(l, b31, b71));
247 
248  b00 = ((vector float) MERGE_S16(h, x0, x4));
249  b10 = ((vector float) MERGE_S16(l, x0, x4));
250  b20 = ((vector float) MERGE_S16(h, x1, x5));
251  b30 = ((vector float) MERGE_S16(l, x1, x5));
252  b40 = ((vector float) MERGE_S16(h, x2, x6));
253  b50 = ((vector float) MERGE_S16(l, x2, x6));
254  b60 = ((vector float) MERGE_S16(h, x3, x7));
255  b70 = ((vector float) MERGE_S16(l, x3, x7));
256 
257 #undef MERGE_S16
258  /* }}} */
259 
260  /* Some of the initial calculations can be done as vector short
261  * before conversion to vector float. The following code section
262  * takes advantage of this. */
263 
264  /* fdct rows {{{ */
265  x0 = ((vector float) vec_add(vs16(b00), vs16(b70)));
266  x7 = ((vector float) vec_sub(vs16(b00), vs16(b70)));
267  x1 = ((vector float) vec_add(vs16(b10), vs16(b60)));
268  x6 = ((vector float) vec_sub(vs16(b10), vs16(b60)));
269  x2 = ((vector float) vec_add(vs16(b20), vs16(b50)));
270  x5 = ((vector float) vec_sub(vs16(b20), vs16(b50)));
271  x3 = ((vector float) vec_add(vs16(b30), vs16(b40)));
272  x4 = ((vector float) vec_sub(vs16(b30), vs16(b40)));
273 
274  b70 = ((vector float) vec_add(vs16(x0), vs16(x3)));
275  b10 = ((vector float) vec_add(vs16(x1), vs16(x2)));
276 
277  b00 = ((vector float) vec_add(vs16(b70), vs16(b10)));
278  b40 = ((vector float) vec_sub(vs16(b70), vs16(b10)));
279 
280 #define CTF0(n) \
281  b ## n ## 1 = ((vector float) vec_unpackl(vs16(b ## n ## 0))); \
282  b ## n ## 0 = ((vector float) vec_unpackh(vs16(b ## n ## 0))); \
283  b ## n ## 1 = vec_ctf(vs32(b ## n ## 1), 0); \
284  b ## n ## 0 = vec_ctf(vs32(b ## n ## 0), 0)
285 
286  CTF0(0);
287  CTF0(4);
288 
289  b20 = ((vector float) vec_sub(vs16(x0), vs16(x3)));
290  b60 = ((vector float) vec_sub(vs16(x1), vs16(x2)));
291 
292  CTF0(2);
293  CTF0(6);
294 
295 #undef CTF0
296 
297  x0 = vec_add(b60, b20);
298  x1 = vec_add(b61, b21);
299 
300  cnst = LD_W2;
301  x0 = vec_madd(cnst, x0, mzero);
302  x1 = vec_madd(cnst, x1, mzero);
303  cnst = LD_W1;
304  b20 = vec_madd(cnst, b20, x0);
305  b21 = vec_madd(cnst, b21, x1);
306  cnst = LD_W0;
307  b60 = vec_madd(cnst, b60, x0);
308  b61 = vec_madd(cnst, b61, x1);
309 
310 #define CTFX(x, b) \
311  b ## 0 = ((vector float) vec_unpackh(vs16(x))); \
312  b ## 1 = ((vector float) vec_unpackl(vs16(x))); \
313  b ## 0 = vec_ctf(vs32(b ## 0), 0); \
314  b ## 1 = vec_ctf(vs32(b ## 1), 0)
315 
316  CTFX(x4, b7);
317  CTFX(x5, b5);
318  CTFX(x6, b3);
319  CTFX(x7, b1);
320 
321 #undef CTFX
322 
323  x0 = vec_add(b70, b10);
324  x1 = vec_add(b50, b30);
325  x2 = vec_add(b70, b30);
326  x3 = vec_add(b50, b10);
327  x8 = vec_add(x2, x3);
328  cnst = LD_W3;
329  x8 = vec_madd(cnst, x8, mzero);
330 
331  cnst = LD_W8;
332  x0 = vec_madd(cnst, x0, mzero);
333  cnst = LD_W9;
334  x1 = vec_madd(cnst, x1, mzero);
335  cnst = LD_WA;
336  x2 = vec_madd(cnst, x2, x8);
337  cnst = LD_WB;
338  x3 = vec_madd(cnst, x3, x8);
339 
340  cnst = LD_W4;
341  b70 = vec_madd(cnst, b70, x0);
342  cnst = LD_W5;
343  b50 = vec_madd(cnst, b50, x1);
344  cnst = LD_W6;
345  b30 = vec_madd(cnst, b30, x1);
346  cnst = LD_W7;
347  b10 = vec_madd(cnst, b10, x0);
348 
349  b70 = vec_add(b70, x2);
350  b50 = vec_add(b50, x3);
351  b30 = vec_add(b30, x2);
352  b10 = vec_add(b10, x3);
353 
354  x0 = vec_add(b71, b11);
355  x1 = vec_add(b51, b31);
356  x2 = vec_add(b71, b31);
357  x3 = vec_add(b51, b11);
358  x8 = vec_add(x2, x3);
359  cnst = LD_W3;
360  x8 = vec_madd(cnst, x8, mzero);
361 
362  cnst = LD_W8;
363  x0 = vec_madd(cnst, x0, mzero);
364  cnst = LD_W9;
365  x1 = vec_madd(cnst, x1, mzero);
366  cnst = LD_WA;
367  x2 = vec_madd(cnst, x2, x8);
368  cnst = LD_WB;
369  x3 = vec_madd(cnst, x3, x8);
370 
371  cnst = LD_W4;
372  b71 = vec_madd(cnst, b71, x0);
373  cnst = LD_W5;
374  b51 = vec_madd(cnst, b51, x1);
375  cnst = LD_W6;
376  b31 = vec_madd(cnst, b31, x1);
377  cnst = LD_W7;
378  b11 = vec_madd(cnst, b11, x0);
379 
380  b71 = vec_add(b71, x2);
381  b51 = vec_add(b51, x3);
382  b31 = vec_add(b31, x2);
383  b11 = vec_add(b11, x3);
384  /* }}} */
385 
386  /* 8x8 matrix transpose (vector float[8][2]) {{{ */
387  x0 = vec_mergel(b00, b20);
388  x1 = vec_mergeh(b00, b20);
389  x2 = vec_mergel(b10, b30);
390  x3 = vec_mergeh(b10, b30);
391 
392  b00 = vec_mergeh(x1, x3);
393  b10 = vec_mergel(x1, x3);
394  b20 = vec_mergeh(x0, x2);
395  b30 = vec_mergel(x0, x2);
396 
397  x4 = vec_mergel(b41, b61);
398  x5 = vec_mergeh(b41, b61);
399  x6 = vec_mergel(b51, b71);
400  x7 = vec_mergeh(b51, b71);
401 
402  b41 = vec_mergeh(x5, x7);
403  b51 = vec_mergel(x5, x7);
404  b61 = vec_mergeh(x4, x6);
405  b71 = vec_mergel(x4, x6);
406 
407  x0 = vec_mergel(b01, b21);
408  x1 = vec_mergeh(b01, b21);
409  x2 = vec_mergel(b11, b31);
410  x3 = vec_mergeh(b11, b31);
411 
412  x4 = vec_mergel(b40, b60);
413  x5 = vec_mergeh(b40, b60);
414  x6 = vec_mergel(b50, b70);
415  x7 = vec_mergeh(b50, b70);
416 
417  b40 = vec_mergeh(x1, x3);
418  b50 = vec_mergel(x1, x3);
419  b60 = vec_mergeh(x0, x2);
420  b70 = vec_mergel(x0, x2);
421 
422  b01 = vec_mergeh(x5, x7);
423  b11 = vec_mergel(x5, x7);
424  b21 = vec_mergeh(x4, x6);
425  b31 = vec_mergel(x4, x6);
426  /* }}} */
427 
428  FDCTCOL(b00, b10, b20, b30, b40, b50, b60, b70);
429  FDCTCOL(b01, b11, b21, b31, b41, b51, b61, b71);
430 
431  /* round, convert back to short {{{ */
432 #define CTS(n) \
433  b ## n ## 0 = vec_round(b ## n ## 0); \
434  b ## n ## 1 = vec_round(b ## n ## 1); \
435  b ## n ## 0 = ((vector float) vec_cts(b ## n ## 0, 0)); \
436  b ## n ## 1 = ((vector float) vec_cts(b ## n ## 1, 0)); \
437  b ## n ## 0 = ((vector float) vec_pack(vs32(b ## n ## 0), \
438  vs32(b ## n ## 1))); \
439  vec_st(vs16(b ## n ## 0), 0, bp)
440 
441  bp = (vector signed short *) block;
442  CTS(0);
443  bp++;
444  CTS(1);
445  bp++;
446  CTS(2);
447  bp++;
448  CTS(3);
449  bp++;
450  CTS(4);
451  bp++;
452  CTS(5);
453  bp++;
454  CTS(6);
455  bp++;
456  CTS(7);
457 
458 #undef CTS
459  /* }}} */
460 }
461 
462 #endif /* HAVE_ALTIVEC */
463 
465  unsigned high_bit_depth)
466 {
467 #if HAVE_ALTIVEC
469  return;
470 
471  if (!high_bit_depth) {
472  if (avctx->dct_algo == FF_DCT_AUTO ||
473  avctx->dct_algo == FF_DCT_ALTIVEC) {
474  c->fdct = ff_fdct_altivec;
475  }
476  }
477 #endif /* HAVE_ALTIVEC */
478 }
WA
static const int WA[80]
Definition: ripemd.c:73
W5
#define W5
Definition: simple_idct_alpha.c:39
av_get_cpu_flags
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:95
W1
#define W1
Definition: simple_idct_alpha.c:35
W6
#define W6
Definition: simple_idct_alpha.c:40
FDCTDSPContext
Definition: fdctdsp.h:26
b1
static double b1(void *priv, double x, double y)
Definition: vf_xfade.c:1665
W7
#define W7
Definition: simple_idct_alpha.c:41
AVCodecContext::dct_algo
int dct_algo
DCT algorithm, see FF_DCT_* below.
Definition: avcodec.h:1706
av_cold
#define av_cold
Definition: attributes.h:90
W2
#define W2
Definition: simple_idct_alpha.c:36
b3
static double b3(void *priv, double x, double y)
Definition: vf_xfade.c:1667
W4
#define W4
Definition: simple_idct_alpha.c:38
WB
static const int WB[80]
Definition: ripemd.c:81
ff_fdctdsp_init_ppc
av_cold void ff_fdctdsp_init_ppc(FDCTDSPContext *c, AVCodecContext *avctx, unsigned high_bit_depth)
Definition: fdctdsp.c:464
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
FF_DCT_ALTIVEC
#define FF_DCT_ALTIVEC
Definition: avcodec.h:1711
PPC_ALTIVEC
#define PPC_ALTIVEC(flags)
Definition: cpu.h:25
W3
#define W3
Definition: simple_idct_alpha.c:37
cpu.h
attributes.h
fdctdsp.h
fdct.h
AVCodecContext
main external API structure.
Definition: avcodec.h:536
FF_DCT_AUTO
#define FF_DCT_AUTO
Definition: avcodec.h:1707
util_altivec.h
cpu.h
block
The exact code depends on how similar the blocks are and how related they are to the block
Definition: filter_design.txt:207
h
h
Definition: vp9dsp_template.c:2038
ff_fdct_altivec
void ff_fdct_altivec(int16_t *block)
W0
#define W0
Definition: wmv2dsp_mmi.c:28