35 {0, 64277, 60547, 54491, 46341, 36410, 25080, 12785};
37 static const vec_u8 interleave_high =
38 {0, 1, 16, 17, 4, 5, 20, 21, 8, 9, 24, 25, 12, 13, 28, 29};
40 static const vec_u8 interleave_high =
41 {2, 3, 18, 19, 6, 7, 22, 23, 10, 11, 26, 27, 14, 15, 30, 31};
45 vec_s16 A, B, C, D, Ad, Bd, Cd, Dd, E, F, G, H;\ 46 vec_s16 Ed, Gd, Add, Bdd, Fd, Hd;\ 47 vec_s16 eight = vec_splat_s16(8);\ 48 vec_u16 four = vec_splat_u16(4);\ 50 vec_s16 C1 = vec_splat(constants, 1);\ 51 vec_s16 C2 = vec_splat(constants, 2);\ 52 vec_s16 C3 = vec_splat(constants, 3);\ 53 vec_s16 C4 = vec_splat(constants, 4);\ 54 vec_s16 C5 = vec_splat(constants, 5);\ 55 vec_s16 C6 = vec_splat(constants, 6);\ 56 vec_s16 C7 = vec_splat(constants, 7);\ 58 vec_s16 b0 = vec_ld(0x00, block);\ 59 vec_s16 b1 = vec_ld(0x10, block);\ 60 vec_s16 b2 = vec_ld(0x20, block);\ 61 vec_s16 b3 = vec_ld(0x30, block);\ 62 vec_s16 b4 = vec_ld(0x40, block);\ 63 vec_s16 b5 = vec_ld(0x50, block);\ 64 vec_s16 b6 = vec_ld(0x60, block);\ 65 vec_s16 b7 = vec_ld(0x70, block); 73 return (
vec_s16)vec_perm(vec_mule(a,C), vec_mulo(a,C), interleave_high);
77 return vec_add(a, M15(a, C));
80 #define IDCT_1D(ADD, SHIFT)\ 81 A = vec_add(M16(b1, C1), M15(b7, C7));\ 82 B = vec_sub(M15(b1, C7), M16(b7, C1));\ 83 C = vec_add(M16(b3, C3), M16(b5, C5));\ 84 D = vec_sub(M16(b5, C3), M16(b3, C5));\ 86 Ad = M16(vec_sub(A, C), C4);\ 87 Bd = M16(vec_sub(B, D), C4);\ 92 E = ADD(M16(vec_add(b0, b4), C4));\ 93 F = ADD(M16(vec_sub(b0, b4), C4));\ 95 G = vec_add(M16(b2, C2), M15(b6, C6));\ 96 H = vec_sub(M15(b2, C6), M16(b6, C2));\ 101 Add = vec_add(F, Ad);\ 102 Bdd = vec_sub(Bd, H);\ 104 Fd = vec_sub(F, Ad);\ 105 Hd = vec_add(Bd, H);\ 107 b0 = SHIFT(vec_add(Gd, Cd));\ 108 b7 = SHIFT(vec_sub(Gd, Cd));\ 110 b1 = SHIFT(vec_add(Add, Hd));\ 111 b2 = SHIFT(vec_sub(Add, Hd));\ 113 b3 = SHIFT(vec_add(Ed, Dd));\ 114 b4 = SHIFT(vec_sub(Ed, Dd));\ 116 b5 = SHIFT(vec_add(Fd, Bdd));\ 117 b6 = SHIFT(vec_sub(Fd, Bdd)); 120 #define ADD8(a) vec_add(a, eight) 121 #define SHIFT4(a) vec_sra(a, four) 129 vec_s16 v2048 = vec_sl(vec_splat_s16(1), vec_splat_u16(11));
130 eight = vec_add(eight, v2048);
133 TRANSPOSE8(
b0,
b1,
b2,
b3, b4, b5, b6, b7);
134 IDCT_1D(ADD8, SHIFT4)
137 t = vec_packsu(a, a);\ 138 vec_ste((vec_u32)t, 0, (unsigned int *)dst);\ 139 vec_ste((vec_u32)t, 4, (unsigned int *)dst); 141 PUT(b0) dst += stride;
142 PUT(b1) dst += stride;
143 PUT(b2) dst += stride;
144 PUT(b3) dst += stride;
145 PUT(b4) dst += stride;
146 PUT(b5) dst += stride;
147 PUT(b6) dst += stride;
149 memset(block, 0, sizeof(*block) * 64);
152 static
void vp3_idct_add_altivec(
uint8_t *dst, ptrdiff_t stride, int16_t block[64])
157 vec_u8 vdst_mask = vec_mergeh(vec_splat_u8(-1), vec_lvsl(0, dst));
162 TRANSPOSE8(b0, b1, b2, b3, b4, b5, b6, b7);
163 IDCT_1D(ADD8, SHIFT4)
167 vdst = vec_ld(0, dst);\ 168 vdst_16 = (vec_s16)vec_perm(vdst, zero_u8v, vdst_mask); 171 vdst = vec_vsx_ld(0,dst);\ 172 vdst_16 = (vec_s16)vec_mergeh(vdst, zero_u8v); 177 vdst_16 = vec_adds(a, vdst_16);\ 178 t = vec_packsu(vdst_16, vdst_16);\ 179 vec_ste((vec_u32)t, 0, (unsigned int *)dst);\ 180 vec_ste((vec_u32)t, 4, (unsigned int *)dst); 182 ADD(b0) dst += stride;
183 ADD(b1) dst += stride;
184 ADD(b2) dst += stride;
185 ADD(b3) dst += stride;
186 ADD(b4) dst += stride;
187 ADD(b5) dst += stride;
188 ADD(b6) dst += stride;
190 memset(block, 0, sizeof(*block) * 64);
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Macro definitions for various function/variable attributes.
The exact code depends on how similar the blocks are and how related they are to the block
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
void(* idct_add)(uint8_t *dest, ptrdiff_t stride, int16_t *block)
#define PPC_ALTIVEC(flags)
static const struct @291 constants[]
static double b0(void *priv, double x, double y)
s EdgeDetect Foobar g libavfilter vf_edgedetect c libavfilter vf_foobar c edit libavfilter and add an entry for foobar following the pattern of the other filters edit libavfilter allfilters and add an entry for foobar following the pattern of the other filters configure make j< whatever > ffmpeg ffmpeg i you should get a foobar png with Lena edge detected That s your new playground is ready Some little details about what s going which in turn will define variables for the build system and the C
static double b1(void *priv, double x, double y)
static double b3(void *priv, double x, double y)
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Contains misc utility macros and inline functions.
#define flags(name, subs,...)
GLint GLenum GLboolean GLsizei stride
av_cold void ff_vp3dsp_init_ppc(VP3DSPContext *c, int flags)
void(* idct_put)(uint8_t *dest, ptrdiff_t stride, int16_t *block)
static double b2(void *priv, double x, double y)