34 int stride,
int h,
int x16,
int y16,
int rounder)
39 (16 - x16) * (16 - y16),
45 register const vector
unsigned char vczero =
46 (
const vector
unsigned char) vec_splat_u8(0);
47 register const vector
unsigned short vcsr8 =
48 (
const vector
unsigned short) vec_splat_u16(8);
49 register vector
unsigned char dstv, dstv2, srcvB, srcvC, srcvD;
50 register vector
unsigned short tempB, tempC, tempD;
51 unsigned long dst_odd = (
unsigned long) dst & 0x0000000F;
52 unsigned long src_really_odd = (
unsigned long) src & 0x0000000F;
53 register vector
unsigned short tempA =
54 vec_ld(0, (
const unsigned short *) ABCD);
55 register vector
unsigned short Av = vec_splat(tempA, 0);
56 register vector
unsigned short Bv = vec_splat(tempA, 1);
57 register vector
unsigned short Cv = vec_splat(tempA, 2);
58 register vector
unsigned short Dv = vec_splat(tempA, 3);
59 register vector
unsigned short rounderV =
60 vec_splat((
vec_u16) vec_lde(0, &rounder_a), 0);
65 register vector
unsigned char src_0 = vec_ld(0, src);
66 register vector
unsigned char src_1 = vec_ld(16, src);
67 register vector
unsigned char srcvA = vec_perm(src_0, src_1,
70 if (src_really_odd != 0x0000000F)
73 srcvB = vec_perm(src_0, src_1, vec_lvsl(1, src));
76 srcvA = vec_mergeh(vczero, srcvA);
77 srcvB = vec_mergeh(vczero, srcvB);
79 for (i = 0; i <
h; i++) {
80 dst_odd = (
unsigned long) dst & 0x0000000F;
81 src_really_odd = (((
unsigned long) src) +
stride) & 0x0000000F;
83 dstv = vec_ld(0, dst);
88 src_0 = vec_ld(stride + 0, src);
89 src_1 = vec_ld(stride + 16, src);
90 srcvC = vec_perm(src_0, src_1, vec_lvsl(stride + 0, src));
92 if (src_really_odd != 0x0000000F)
95 srcvD = vec_perm(src_0, src_1, vec_lvsl(stride + 1, src));
99 srcvC = vec_mergeh(vczero, srcvC);
100 srcvD = vec_mergeh(vczero, srcvD);
105 tempA = vec_mladd((vector
unsigned short) srcvA, Av, rounderV);
106 tempB = vec_mladd((vector
unsigned short) srcvB, Bv, tempA);
107 tempC = vec_mladd((vector
unsigned short) srcvC, Cv, tempB);
108 tempD = vec_mladd((vector
unsigned short) srcvD, Dv, tempC);
113 tempD = vec_sr(tempD, vcsr8);
115 dstv2 = vec_pack(tempD, (vector
unsigned short) vczero);
118 dstv2 = vec_perm(dstv, dstv2, vcprm(0, 1,
s0,
s1));
120 dstv2 = vec_perm(dstv, dstv2, vcprm(
s0,
s1, 2, 3));
122 vec_st(dstv2, 0, dst);
136 c->
gmc1 = gmc1_altivec;
void(* gmc1)(uint8_t *dst, uint8_t *src, int srcStride, int h, int x16, int y16, int rounder)
translational global motion compensation.
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
#define DECLARE_ALIGNED(n, t, v)
Declare a variable that is aligned in memory.
#define PPC_ALTIVEC(flags)
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Contains misc utility macros and inline functions.
GLint GLenum GLboolean GLsizei stride
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf default minimum maximum flags name is the option keep it simple and lowercase description are short
av_cold void ff_mpegvideodsp_init_ppc(MpegVideoDSPContext *c)