34 #define vs16(v) ((vector signed short) (v)) 35 #define vs32(v) ((vector signed int) (v)) 36 #define vu8(v) ((vector unsigned char) (v)) 37 #define vu16(v) ((vector unsigned short) (v)) 38 #define vu32(v) ((vector unsigned int) (v)) 40 #define C1 0.98078528040323044912618224 41 #define C2 0.92387953251128675612818319 42 #define C3 0.83146961230254523707878838 43 #define C4 0.70710678118654752440084436 44 #define C5 0.55557023301960222474283081 45 #define C6 0.38268343236508977172845998 46 #define C7 0.19509032201612826784828487 50 #define W2 (M_SQRT2 * C6) 51 #define W3 (M_SQRT2 * C3) 52 #define W4 (M_SQRT2 * (-C1 + C3 + C5 - C7)) 53 #define W5 (M_SQRT2 * (C1 + C3 - C5 + C7)) 54 #define W6 (M_SQRT2 * (C1 + C3 + C5 - C7)) 55 #define W7 (M_SQRT2 * (C1 + C3 - C5 - C7)) 56 #define W8 (M_SQRT2 * (C7 - C3)) 57 #define W9 (M_SQRT2 * (-C1 - C3)) 58 #define WA (M_SQRT2 * (-C3 - C5)) 59 #define WB (M_SQRT2 * (C5 - C3)) 61 static const vector
float fdctconsts[3] = {
67 #define LD_W0 vec_splat(cnsts0, 0) 68 #define LD_W1 vec_splat(cnsts0, 1) 69 #define LD_W2 vec_splat(cnsts0, 2) 70 #define LD_W3 vec_splat(cnsts0, 3) 71 #define LD_W4 vec_splat(cnsts1, 0) 72 #define LD_W5 vec_splat(cnsts1, 1) 73 #define LD_W6 vec_splat(cnsts1, 2) 74 #define LD_W7 vec_splat(cnsts1, 3) 75 #define LD_W8 vec_splat(cnsts2, 0) 76 #define LD_W9 vec_splat(cnsts2, 1) 77 #define LD_WA vec_splat(cnsts2, 2) 78 #define LD_WB vec_splat(cnsts2, 3) 80 #define FDCTROW(b0, b1, b2, b3, b4, b5, b6, b7) \ 81 x0 = vec_add(b0, b7); \ 82 x7 = vec_sub(b0, b7); \ 83 x1 = vec_add(b1, b6); \ 84 x6 = vec_sub(b1, b6); \ 85 x2 = vec_add(b2, b5); \ 86 x5 = vec_sub(b2, b5); \ 87 x3 = vec_add(b3, b4); \ 88 x4 = vec_sub(b3, b4); \ 90 b7 = vec_add(x0, x3); \ 91 b1 = vec_add(x1, x2); \ 92 b0 = vec_add(b7, b1); \ 93 b4 = vec_sub(b7, b1); \ 95 b2 = vec_sub(x0, x3); \ 96 b6 = vec_sub(x1, x2); \ 97 b5 = vec_add(b6, b2); \ 99 b5 = vec_madd(cnst, b5, mzero); \ 101 b2 = vec_madd(cnst, b2, b5); \ 103 b6 = vec_madd(cnst, b6, b5); \ 105 x0 = vec_add(x4, x7); \ 106 x1 = vec_add(x5, x6); \ 107 x2 = vec_add(x4, x6); \ 108 x3 = vec_add(x5, x7); \ 109 x8 = vec_add(x2, x3); \ 111 x8 = vec_madd(cnst, x8, mzero); \ 114 x0 = vec_madd(cnst, x0, mzero); \ 116 x1 = vec_madd(cnst, x1, mzero); \ 118 x2 = vec_madd(cnst, x2, x8); \ 120 x3 = vec_madd(cnst, x3, x8); \ 123 b7 = vec_madd(cnst, x4, x0); \ 125 b5 = vec_madd(cnst, x5, x1); \ 127 b3 = vec_madd(cnst, x6, x1); \ 129 b1 = vec_madd(cnst, x7, x0); \ 131 b7 = vec_add(b7, x2); \ 132 b5 = vec_add(b5, x3); \ 133 b3 = vec_add(b3, x2); \ 134 b1 = vec_add(b1, x3) \ 137 #define FDCTCOL(b0, b1, b2, b3, b4, b5, b6, b7) \ 138 x0 = vec_add(b0, b7); \ 139 x7 = vec_sub(b0, b7); \ 140 x1 = vec_add(b1, b6); \ 141 x6 = vec_sub(b1, b6); \ 142 x2 = vec_add(b2, b5); \ 143 x5 = vec_sub(b2, b5); \ 144 x3 = vec_add(b3, b4); \ 145 x4 = vec_sub(b3, b4); \ 147 b7 = vec_add(x0, x3); \ 148 b1 = vec_add(x1, x2); \ 149 b0 = vec_add(b7, b1); \ 150 b4 = vec_sub(b7, b1); \ 152 b2 = vec_sub(x0, x3); \ 153 b6 = vec_sub(x1, x2); \ 154 b5 = vec_add(b6, b2); \ 156 b5 = vec_madd(cnst, b5, mzero); \ 158 b2 = vec_madd(cnst, b2, b5); \ 160 b6 = vec_madd(cnst, b6, b5); \ 162 x0 = vec_add(x4, x7); \ 163 x1 = vec_add(x5, x6); \ 164 x2 = vec_add(x4, x6); \ 165 x3 = vec_add(x5, x7); \ 166 x8 = vec_add(x2, x3); \ 168 x8 = vec_madd(cnst, x8, mzero); \ 171 x0 = vec_madd(cnst, x0, mzero); \ 173 x1 = vec_madd(cnst, x1, mzero); \ 175 x2 = vec_madd(cnst, x2, x8); \ 177 x3 = vec_madd(cnst, x3, x8); \ 180 b7 = vec_madd(cnst, x4, x0); \ 182 b5 = vec_madd(cnst, x5, x1); \ 184 b3 = vec_madd(cnst, x6, x1); \ 186 b1 = vec_madd(cnst, x7, x0); \ 188 b7 = vec_add(b7, x2); \ 189 b5 = vec_add(b5, x3); \ 190 b3 = vec_add(b3, x2); \ 191 b1 = vec_add(b1, x3) \ 197 vector
signed short *bp;
198 const vector
float *cp = fdctconsts;
199 vector
float b00, b10, b20, b30, b40, b50, b60, b70;
200 vector
float b01, b11, b21, b31, b41, b51, b61, b71;
201 vector
float mzero, cnst, cnsts0, cnsts1, cnsts2;
202 vector
float x0, x1, x2, x3, x4, x5, x6, x7, x8;
206 mzero = ((vector float) vec_splat_u32(-1));
207 mzero = ((vector float) vec_sl(vu32(mzero), vu32(mzero)));
208 cnsts0 = vec_ld(0, cp);
210 cnsts1 = vec_ld(0, cp);
212 cnsts2 = vec_ld(0, cp);
216 #define MERGE_S16(hl, a, b) vec_merge ## hl(vs16(a), vs16(b)) 218 bp = (vector
signed short *) block;
219 b00 = ((vector float) vec_ld(0, bp));
220 b40 = ((vector float) vec_ld(16 * 4, bp));
221 b01 = ((vector float) MERGE_S16(
h, b00, b40));
222 b11 = ((vector float) MERGE_S16(l, b00, b40));
224 b10 = ((vector float) vec_ld(0, bp));
225 b50 = ((vector float) vec_ld(16 * 4, bp));
226 b21 = ((vector float) MERGE_S16(
h, b10, b50));
227 b31 = ((vector float) MERGE_S16(l, b10, b50));
229 b20 = ((vector float) vec_ld(0, bp));
230 b60 = ((vector float) vec_ld(16 * 4, bp));
231 b41 = ((vector float) MERGE_S16(
h, b20, b60));
232 b51 = ((vector float) MERGE_S16(l, b20, b60));
234 b30 = ((vector float) vec_ld(0, bp));
235 b70 = ((vector float) vec_ld(16 * 4, bp));
236 b61 = ((vector float) MERGE_S16(
h, b30, b70));
237 b71 = ((vector float) MERGE_S16(l, b30, b70));
239 x0 = ((vector float) MERGE_S16(
h, b01, b41));
240 x1 = ((vector float) MERGE_S16(l, b01, b41));
241 x2 = ((vector float) MERGE_S16(
h, b11, b51));
242 x3 = ((vector float) MERGE_S16(l, b11, b51));
243 x4 = ((vector float) MERGE_S16(
h, b21, b61));
244 x5 = ((vector float) MERGE_S16(l, b21, b61));
245 x6 = ((vector float) MERGE_S16(
h, b31, b71));
246 x7 = ((vector float) MERGE_S16(l, b31, b71));
248 b00 = ((vector float) MERGE_S16(
h, x0, x4));
249 b10 = ((vector float) MERGE_S16(l, x0, x4));
250 b20 = ((vector float) MERGE_S16(
h, x1, x5));
251 b30 = ((vector float) MERGE_S16(l, x1, x5));
252 b40 = ((vector float) MERGE_S16(
h, x2, x6));
253 b50 = ((vector float) MERGE_S16(l, x2, x6));
254 b60 = ((vector float) MERGE_S16(
h, x3, x7));
255 b70 = ((vector float) MERGE_S16(l, x3, x7));
265 x0 = ((vector float) vec_add(vs16(b00), vs16(b70)));
266 x7 = ((vector float) vec_sub(vs16(b00), vs16(b70)));
267 x1 = ((vector float) vec_add(vs16(b10), vs16(b60)));
268 x6 = ((vector float) vec_sub(vs16(b10), vs16(b60)));
269 x2 = ((vector float) vec_add(vs16(b20), vs16(b50)));
270 x5 = ((vector float) vec_sub(vs16(b20), vs16(b50)));
271 x3 = ((vector float) vec_add(vs16(b30), vs16(b40)));
272 x4 = ((vector float) vec_sub(vs16(b30), vs16(b40)));
274 b70 = ((vector float) vec_add(vs16(x0), vs16(x3)));
275 b10 = ((vector float) vec_add(vs16(x1), vs16(x2)));
277 b00 = ((vector float) vec_add(vs16(b70), vs16(b10)));
278 b40 = ((vector float) vec_sub(vs16(b70), vs16(b10)));
281 b ## n ## 1 = ((vector float) vec_unpackl(vs16(b ## n ## 0))); \ 282 b ## n ## 0 = ((vector float) vec_unpackh(vs16(b ## n ## 0))); \ 283 b ## n ## 1 = vec_ctf(vs32(b ## n ## 1), 0); \ 284 b ## n ## 0 = vec_ctf(vs32(b ## n ## 0), 0) 289 b20 = ((vector float) vec_sub(vs16(x0), vs16(x3)));
290 b60 = ((vector float) vec_sub(vs16(x1), vs16(x2)));
297 x0 = vec_add(b60, b20);
298 x1 = vec_add(b61, b21);
301 x0 = vec_madd(cnst, x0, mzero);
302 x1 = vec_madd(cnst, x1, mzero);
304 b20 = vec_madd(cnst, b20, x0);
305 b21 = vec_madd(cnst, b21, x1);
307 b60 = vec_madd(cnst, b60, x0);
308 b61 = vec_madd(cnst, b61, x1);
311 b ## 0 = ((vector float) vec_unpackh(vs16(x))); \ 312 b ## 1 = ((vector float) vec_unpackl(vs16(x))); \ 313 b ## 0 = vec_ctf(vs32(b ## 0), 0); \ 314 b ## 1 = vec_ctf(vs32(b ## 1), 0) 323 x0 = vec_add(b70, b10);
324 x1 = vec_add(b50, b30);
325 x2 = vec_add(b70, b30);
326 x3 = vec_add(b50, b10);
327 x8 = vec_add(x2, x3);
329 x8 = vec_madd(cnst, x8, mzero);
332 x0 = vec_madd(cnst, x0, mzero);
334 x1 = vec_madd(cnst, x1, mzero);
336 x2 = vec_madd(cnst, x2, x8);
338 x3 = vec_madd(cnst, x3, x8);
341 b70 = vec_madd(cnst, b70, x0);
343 b50 = vec_madd(cnst, b50, x1);
345 b30 = vec_madd(cnst, b30, x1);
347 b10 = vec_madd(cnst, b10, x0);
349 b70 = vec_add(b70, x2);
350 b50 = vec_add(b50, x3);
351 b30 = vec_add(b30, x2);
352 b10 = vec_add(b10, x3);
354 x0 = vec_add(b71, b11);
355 x1 = vec_add(b51, b31);
356 x2 = vec_add(b71, b31);
357 x3 = vec_add(b51, b11);
358 x8 = vec_add(x2, x3);
360 x8 = vec_madd(cnst, x8, mzero);
363 x0 = vec_madd(cnst, x0, mzero);
365 x1 = vec_madd(cnst, x1, mzero);
367 x2 = vec_madd(cnst, x2, x8);
369 x3 = vec_madd(cnst, x3, x8);
372 b71 = vec_madd(cnst, b71, x0);
374 b51 = vec_madd(cnst, b51, x1);
376 b31 = vec_madd(cnst, b31, x1);
378 b11 = vec_madd(cnst, b11, x0);
380 b71 = vec_add(b71, x2);
381 b51 = vec_add(b51, x3);
382 b31 = vec_add(b31, x2);
383 b11 = vec_add(b11, x3);
387 x0 = vec_mergel(b00, b20);
388 x1 = vec_mergeh(b00, b20);
389 x2 = vec_mergel(b10, b30);
390 x3 = vec_mergeh(b10, b30);
392 b00 = vec_mergeh(x1, x3);
393 b10 = vec_mergel(x1, x3);
394 b20 = vec_mergeh(x0, x2);
395 b30 = vec_mergel(x0, x2);
397 x4 = vec_mergel(b41, b61);
398 x5 = vec_mergeh(b41, b61);
399 x6 = vec_mergel(b51, b71);
400 x7 = vec_mergeh(b51, b71);
402 b41 = vec_mergeh(x5, x7);
403 b51 = vec_mergel(x5, x7);
404 b61 = vec_mergeh(x4, x6);
405 b71 = vec_mergel(x4, x6);
407 x0 = vec_mergel(b01, b21);
408 x1 = vec_mergeh(b01, b21);
409 x2 = vec_mergel(b11, b31);
410 x3 = vec_mergeh(b11, b31);
412 x4 = vec_mergel(b40, b60);
413 x5 = vec_mergeh(b40, b60);
414 x6 = vec_mergel(b50, b70);
415 x7 = vec_mergeh(b50, b70);
417 b40 = vec_mergeh(x1, x3);
418 b50 = vec_mergel(x1, x3);
419 b60 = vec_mergeh(x0, x2);
420 b70 = vec_mergel(x0, x2);
422 b01 = vec_mergeh(x5, x7);
423 b11 = vec_mergel(x5, x7);
424 b21 = vec_mergeh(x4, x6);
425 b31 = vec_mergel(x4, x6);
428 FDCTCOL(b00, b10, b20, b30, b40, b50, b60, b70);
429 FDCTCOL(b01, b11, b21, b31, b41, b51, b61, b71);
433 b ## n ## 0 = vec_round(b ## n ## 0); \ 434 b ## n ## 1 = vec_round(b ## n ## 1); \ 435 b ## n ## 0 = ((vector float) vec_cts(b ## n ## 0, 0)); \ 436 b ## n ## 1 = ((vector float) vec_cts(b ## n ## 1, 0)); \ 437 b ## n ## 0 = ((vector float) vec_pack(vs32(b ## n ## 0), \ 438 vs32(b ## n ## 1))); \ 439 vec_st(vs16(b ## n ## 0), 0, bp) 441 bp = (vector
signed short *) block;
465 unsigned high_bit_depth)
471 if (!high_bit_depth) {
int dct_algo
DCT algorithm, see FF_DCT_* below.
Macro definitions for various function/variable attributes.
The exact code depends on how similar the blocks are and how related they are to the block
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
void ff_fdct_altivec(int16_t *block)
#define PPC_ALTIVEC(flags)
void(* fdct)(int16_t *block)
static double b1(void *priv, double x, double y)
main external API structure.
static double b3(void *priv, double x, double y)
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Contains misc utility macros and inline functions.
av_cold void ff_fdctdsp_init_ppc(FDCTDSPContext *c, AVCodecContext *avctx, unsigned high_bit_depth)