Go to the documentation of this file.
30 #define SPLATW(reg) "pshuflw $0, %%" #reg ", %%" #reg "\n\t" \
31 "punpcklqdq %%" #reg ", %%" #reg "\n\t"
39 static void gmc_ssse3(uint8_t *
dst,
const uint8_t *
src,
40 int stride,
int h,
int ox,
int oy,
41 int dxx,
int dxy,
int dyx,
int dyy,
50 const int ix = ox >> (16 +
shift);
51 const int iy = oy >> (16 +
shift);
52 const int ox2 = ox & (1 << (16 +
shift)) - 1;
53 const int oy2 = oy & (1 << (16 +
shift)) - 1;
54 const int oxs = ox2 >> 4;
55 const int oys = oy2 >> 4;
56 const int dxx2 = dxx - (1 << (16 +
shift));
57 const int dyy2 = dyy - (1 << (16 +
shift));
58 const int dxxs = dxx2 >> 4;
59 const int dxys = dxy >> 4;
60 const int dyxs = dyx >> 4;
61 const int dyys = dyy2 >> 4;
62 uint8_t edge_buf[(MAX_H + 1) * EDGE_EMU_STRIDE];
64 const int dxw = dxx2 * (
w - 1);
65 const int dyh = dyy2 * (
h - 1);
66 const int dxh = dxy * (
h - 1);
67 const int dyw = dyx * (
w - 1);
68 int need_emu = (unsigned) ix >=
width -
w ||
width <
w ||
73 ((ox2 + dxw) | (ox2 + dxh) | (ox2 + dxw + dxh) |
74 (oy2 + dyw) | (oy2 + dyh) | (oy2 + dyw + dyh)) >> (16 +
shift) ||
76 (dxx | dxy | dyx | dyy) & 15) {
77 ff_gmc_c(
dst,
src,
stride,
h, ox, oy, dxx, dxy, dyx, dyy,
83 const ptrdiff_t dst_stride =
stride;
84 ptrdiff_t src_stride =
stride;
89 src_stride = EDGE_EMU_STRIDE;
93 xmm_u16 dxy8, dyy8, r8;
98 "movd %[dxxs], %%xmm2 \n\t"
99 "movd %[dyxs], %%xmm3 \n\t"
100 "movd %[oxs], %%xmm1 \n\t"
102 "movd %[oys], %%xmm7 \n\t"
104 "pmullw "MANGLE(pw_0to7)
", %%xmm2 \n\t"
106 "movd %[s], %%xmm6 \n\t"
107 "pmullw "MANGLE(pw_0to7)
", %%xmm3 \n\t"
108 "movq (%[src]), %%xmm5 \n\t"
111 "movd %[dxys], %%xmm0 \n\t"
113 "movd %[dxys], %%xmm11 \n\t"
115 "paddw %%xmm2, %%xmm1 \n\t"
116 "movq 1(%[src]), %%xmm2 \n\t"
119 "movd %[dyys], %%xmm4 \n\t"
121 "movd %[dyys], %%xmm9 \n\t"
123 "paddw %%xmm3, %%xmm7 \n\t"
124 "punpcklbw %%xmm2, %%xmm5 \n\t"
127 "movd %[r], %%xmm2 \n\t"
129 "movdqa %%xmm0, %[dxy8] \n\t"
131 "movdqa %%xmm4, %[dyy8] \n\t"
132 "movdqa %%xmm2, %[r8] \n\t"
135 "movd %[r], %%xmm8 \n\t"
138 "movd %[shift2], %%xmm12 \n\t"
142 "add %[src_stride], %[src] \n\t"
143 "movq (%[src]), %%xmm3 \n\t"
144 "movq 1(%[src]), %%xmm0 \n\t"
145 "movdqa %%xmm1, %%xmm4 \n\t"
146 "psrlw $12, %%xmm4 \n\t"
147 "movdqa %%xmm6, %%xmm2 \n\t"
148 "psubw %%xmm4, %%xmm2 \n\t"
149 "psllw $8, %%xmm4 \n\t"
150 "por %%xmm4, %%xmm2 \n\t"
151 "pmaddubsw %%xmm2, %%xmm5 \n\t"
152 "punpcklbw %%xmm0, %%xmm3 \n\t"
153 "movdqa %%xmm3, %%xmm0 \n\t"
154 "pmaddubsw %%xmm2, %%xmm3 \n\t"
156 "paddw %[dxy8], %%xmm1 \n\t"
158 "paddw %%xmm11, %%xmm1 \n\t"
160 "movdqa %%xmm7, %%xmm4 \n\t"
161 "movdqa %%xmm6, %%xmm2 \n\t"
162 "psrlw $12, %%xmm4 \n\t"
163 "psubw %%xmm4, %%xmm2 \n\t"
164 "pmullw %%xmm5, %%xmm2 \n\t"
166 "paddw %[dyy8], %%xmm7 \n\t"
168 "paddw %%xmm9, %%xmm7 \n\t"
170 "pmullw %%xmm3, %%xmm4 \n\t"
173 "paddw %[r8], %%xmm2 \n\t"
175 "paddw %%xmm8, %%xmm2 \n\t"
177 "paddw %%xmm2, %%xmm4 \n\t"
180 "psrlw %[shift2], %%xmm4 \n\t"
182 "psrlw %%xmm12, %%xmm4 \n\t"
184 "packuswb %%xmm4, %%xmm4 \n\t"
185 "movq %%xmm4, (%[dst]) \n\t"
186 "movdqa %%xmm0, %%xmm5 \n\t"
187 "add %[dst_stride], %[dst] \n\t"
192 #if HAVE_6REGS || HAVE_INLINE_ASM_DIRECT_SYMBOL_REFS
198 , [dxy8]
"=m" (dxy8), [dyy8]
"=m" (dyy8), [r8]
"=m" (r8)
200 : [dst_stride]
"r"(dst_stride), [src_stride]
"r"(src_stride),
207 [oxs]
"g"(oxs), [oys]
"g"(oys), [dxxs]
"g"(dxxs), [dyxs]
"g"(dyxs),
209 :
XMM_CLOBBERS(
"xmm0",
"xmm1",
"xmm2",
"xmm3",
"xmm4",
"xmm5",
"xmm6",
"xmm7",)
211 XMM_CLOBBERS(
"xmm8",
"xmm9",
"xmm10",
"xmm11",
"xmm12",)
220 #if HAVE_SSSE3_INLINE
av_cold void ff_mpeg4videodsp_init_x86(Mpeg4VideoDSPContext *c)
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
static atomic_int cpu_flags
void ff_emulated_edge_mc_sse2(uint8_t *buf, const uint8_t *src, ptrdiff_t buf_stride, ptrdiff_t src_stride, int block_w, int block_h, int src_x, int src_y, int w, int h)
#define NAMED_CONSTRAINTS_ADD(...)
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
static int shift(int a, int b)
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
DECLARE_ASM_CONST(16, double, pd_1)[2]
static const uint8_t shift2[6]
__asm__(".macro parse_r var r\n\t" "\\var = -1\n\t" _IFC_REG(0) _IFC_REG(1) _IFC_REG(2) _IFC_REG(3) _IFC_REG(4) _IFC_REG(5) _IFC_REG(6) _IFC_REG(7) _IFC_REG(8) _IFC_REG(9) _IFC_REG(10) _IFC_REG(11) _IFC_REG(12) _IFC_REG(13) _IFC_REG(14) _IFC_REG(15) _IFC_REG(16) _IFC_REG(17) _IFC_REG(18) _IFC_REG(19) _IFC_REG(20) _IFC_REG(21) _IFC_REG(22) _IFC_REG(23) _IFC_REG(24) _IFC_REG(25) _IFC_REG(26) _IFC_REG(27) _IFC_REG(28) _IFC_REG(29) _IFC_REG(30) _IFC_REG(31) ".iflt \\var\n\t" ".error \"Unable to parse register name \\r\"\n\t" ".endif\n\t" ".endm")
#define XMM_CLOBBERS(...)
#define INLINE_SSSE3(flags)
void ff_gmc_c(uint8_t *dst, const uint8_t *src, int stride, int h, int ox, int oy, int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
#define DECLARE_ALIGNED_16(t, v)