34 int dstStride,
int src1Stride,
int h);
36 int dstStride,
int src1Stride,
int h);
38 int dstStride,
int src1Stride,
int h);
40 int dstStride,
int src1Stride,
int h);
42 int dstStride,
int src1Stride,
int h);
44 int dstStride,
int src1Stride,
int h);
45 #define ff_put_pixels8_l2_sse2 ff_put_pixels8_l2_mmxext 46 #define ff_avg_pixels8_l2_sse2 ff_avg_pixels8_l2_mmxext 47 #define ff_put_pixels16_l2_sse2 ff_put_pixels16_l2_mmxext 48 #define ff_avg_pixels16_l2_sse2 ff_avg_pixels16_l2_mmxext 49 #define ff_put_pixels16_mmxext ff_put_pixels16_mmx 50 #define ff_put_pixels8_mmxext ff_put_pixels8_mmx 51 #define ff_put_pixels4_mmxext ff_put_pixels4_mmx 53 #define DEF_QPEL(OPNAME)\ 54 void ff_ ## OPNAME ## _h264_qpel4_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride);\ 55 void ff_ ## OPNAME ## _h264_qpel8_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride);\ 56 void ff_ ## OPNAME ## _h264_qpel8_h_lowpass_ssse3(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride);\ 57 void ff_ ## OPNAME ## _h264_qpel4_h_lowpass_l2_mmxext(uint8_t *dst, const uint8_t *src, const uint8_t *src2, int dstStride, int src2Stride);\ 58 void ff_ ## OPNAME ## _h264_qpel8_h_lowpass_l2_mmxext(uint8_t *dst, const uint8_t *src, const uint8_t *src2, int dstStride, int src2Stride);\ 59 void ff_ ## OPNAME ## _h264_qpel8_h_lowpass_l2_ssse3(uint8_t *dst, const uint8_t *src, const uint8_t *src2, int dstStride, int src2Stride);\ 60 void ff_ ## OPNAME ## _h264_qpel4_v_lowpass_mmxext(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride);\ 61 void ff_ ## OPNAME ## _h264_qpel8or16_v_lowpass_op_mmxext(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h);\ 62 void ff_ ## OPNAME ## _h264_qpel8or16_v_lowpass_sse2(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h);\ 63 void ff_ ## OPNAME ## _h264_qpel4_hv_lowpass_v_mmxext(const uint8_t *src, int16_t *tmp, int srcStride);\ 64 void ff_ ## OPNAME ## _h264_qpel4_hv_lowpass_h_mmxext(int16_t *tmp, uint8_t *dst, int dstStride);\ 65 void ff_ ## OPNAME ## _h264_qpel8or16_hv1_lowpass_op_mmxext(const uint8_t *src, int16_t *tmp, int srcStride, int size);\ 66 void ff_ ## OPNAME ## _h264_qpel8or16_hv1_lowpass_op_sse2(const uint8_t *src, int16_t *tmp, int srcStride, int size);\ 67 void ff_ ## OPNAME ## _h264_qpel8or16_hv2_lowpass_op_mmxext(uint8_t *dst, int16_t *tmp, int dstStride, int unused, int h);\ 68 void ff_ ## OPNAME ## _h264_qpel8or16_hv2_lowpass_ssse3(uint8_t *dst, int16_t *tmp, int dstStride, int tmpStride, int size);\ 69 void ff_ ## OPNAME ## _pixels4_l2_shift5_mmxext(uint8_t *dst, const int16_t *src16, const uint8_t *src8, int dstStride, int src8Stride, int h);\ 70 void ff_ ## OPNAME ## _pixels8_l2_shift5_mmxext(uint8_t *dst, const int16_t *src16, const uint8_t *src8, int dstStride, int src8Stride, int h); 77 int w = (size + 8) >> 2;
78 src -= 2 * srcStride + 2;
80 ff_put_h264_qpel8or16_hv1_lowpass_op_mmxext(src, tmp, srcStride, size);
86 #define QPEL_H264(OPNAME, OP, MMX)\ 87 static av_always_inline void ff_ ## OPNAME ## h264_qpel4_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, const uint8_t *src, int dstStride, int tmpStride, int srcStride){\ 89 src -= 2*srcStride+2;\ 91 ff_ ## OPNAME ## h264_qpel4_hv_lowpass_v_mmxext(src, tmp, srcStride);\ 96 ff_ ## OPNAME ## h264_qpel4_hv_lowpass_h_mmxext(tmp, dst, dstStride);\ 99 static av_always_inline void ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h){\ 101 ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_op_mmxext(dst, src, dstStride, srcStride, h);\ 104 ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_op_mmxext(dst, src, dstStride, srcStride, h);\ 106 static av_always_inline void ff_ ## OPNAME ## h264_qpel8or16_hv2_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, int dstStride, int tmpStride, int size){\ 109 ff_ ## OPNAME ## h264_qpel8or16_hv2_lowpass_op_mmxext(dst, tmp, dstStride, 0, size);\ 115 static av_always_inline void ff_ ## OPNAME ## h264_qpel8_v_lowpass_ ## MMX(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride){\ 116 ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst , src , dstStride, srcStride, 8);\ 118 static av_always_inline void ff_ ## OPNAME ## h264_qpel16_v_lowpass_ ## MMX(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride){\ 119 ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst , src , dstStride, srcStride, 16);\ 120 ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\ 123 static av_always_inline void ff_ ## OPNAME ## h264_qpel16_h_lowpass_ ## MMX(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride){\ 124 ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst , src , dstStride, srcStride);\ 125 ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\ 128 ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst , src , dstStride, srcStride);\ 129 ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\ 132 static av_always_inline void ff_ ## OPNAME ## h264_qpel16_h_lowpass_l2_ ## MMX(uint8_t *dst, const uint8_t *src, const uint8_t *src2, int dstStride, int src2Stride){\ 133 ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst , src , src2 , dstStride, src2Stride);\ 134 ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst+8, src+8, src2+8, dstStride, src2Stride);\ 137 src2 += 8*src2Stride;\ 138 ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst , src , src2 , dstStride, src2Stride);\ 139 ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst+8, src+8, src2+8, dstStride, src2Stride);\ 142 static av_always_inline void ff_ ## OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, const uint8_t *src, int dstStride, int tmpStride, int srcStride, int size){\ 143 ff_put_h264_qpel8or16_hv1_lowpass_ ## MMX(tmp, src, tmpStride, srcStride, size);\ 144 ff_ ## OPNAME ## h264_qpel8or16_hv2_lowpass_ ## MMX(dst, tmp, dstStride, tmpStride, size);\ 146 static av_always_inline void ff_ ## OPNAME ## h264_qpel8_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, const uint8_t *src, int dstStride, int tmpStride, int srcStride){\ 147 ff_ ## OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst , tmp , src , dstStride, tmpStride, srcStride, 8);\ 150 static av_always_inline void ff_ ## OPNAME ## h264_qpel16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, const uint8_t *src, int dstStride, int tmpStride, int srcStride){\ 151 ff_ ## OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst , tmp , src , dstStride, tmpStride, srcStride, 16);\ 154 static av_always_inline void ff_ ## OPNAME ## pixels16_l2_shift5_ ## MMX(uint8_t *dst, const int16_t *src16, const uint8_t *src8, int dstStride, int src8Stride, int h)\ 156 ff_ ## OPNAME ## pixels8_l2_shift5_ ## MMX(dst , src16 , src8 , dstStride, src8Stride, h);\ 157 ff_ ## OPNAME ## pixels8_l2_shift5_ ## MMX(dst+8, src16+8, src8+8, dstStride, src8Stride, h);\ 162 #define QPEL_H264_H16_XMM(OPNAME, OP, MMX)\ 164 void ff_avg_h264_qpel16_h_lowpass_l2_ssse3(
uint8_t *dst,
const uint8_t *src,
const uint8_t *src2,
int dstStride,
int src2Stride);
165 void ff_put_h264_qpel16_h_lowpass_l2_ssse3(
uint8_t *dst,
const uint8_t *src,
const uint8_t *src2,
int dstStride,
int src2Stride);
168 #define QPEL_H264_H16_XMM(OPNAME, OP, MMX)\ 169 static av_always_inline void ff_ ## OPNAME ## h264_qpel16_h_lowpass_l2_ ## MMX(uint8_t *dst, const uint8_t *src, const uint8_t *src2, int dstStride, int src2Stride){\ 170 ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst , src , src2 , dstStride, src2Stride);\ 171 ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst+8, src+8, src2+8, dstStride, src2Stride);\ 174 src2 += 8*src2Stride;\ 175 ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst , src , src2 , dstStride, src2Stride);\ 176 ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst+8, src+8, src2+8, dstStride, src2Stride);\ 178 #endif // ARCH_X86_64 180 #define QPEL_H264_H_XMM(OPNAME, OP, MMX)\ 181 QPEL_H264_H16_XMM(OPNAME, OP, MMX)\ 182 static av_always_inline void ff_ ## OPNAME ## h264_qpel16_h_lowpass_ ## MMX(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride){\ 183 ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst , src , dstStride, srcStride);\ 184 ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\ 187 ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst , src , dstStride, srcStride);\ 188 ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\ 191 #define QPEL_H264_V_XMM(OPNAME, OP, MMX)\ 192 static av_always_inline void ff_ ## OPNAME ## h264_qpel8_v_lowpass_ ## MMX(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride){\ 193 ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst , src , dstStride, srcStride, 8);\ 195 static av_always_inline void ff_ ## OPNAME ## h264_qpel16_v_lowpass_ ## MMX(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride){\ 196 ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst , src , dstStride, srcStride, 16);\ 197 ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\ 200 static av_always_inline void put_h264_qpel8or16_hv1_lowpass_sse2(int16_t *tmp,
207 src -= 2*srcStride+2;
209 ff_put_h264_qpel8or16_hv1_lowpass_op_sse2(src, tmp, srcStride, size);
215 #define QPEL_H264_HV_XMM(OPNAME, OP, MMX)\ 216 static av_always_inline void ff_ ## OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, const uint8_t *src, int dstStride, int tmpStride, int srcStride, int size){\ 217 put_h264_qpel8or16_hv1_lowpass_sse2(tmp, src, tmpStride, srcStride, size);\ 218 ff_ ## OPNAME ## h264_qpel8or16_hv2_lowpass_ ## MMX(dst, tmp, dstStride, tmpStride, size);\ 220 static av_always_inline void ff_ ## OPNAME ## h264_qpel8_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, const uint8_t *src, int dstStride, int tmpStride, int srcStride){\ 221 ff_ ## OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst, tmp, src, dstStride, tmpStride, srcStride, 8);\ 223 static av_always_inline void ff_ ## OPNAME ## h264_qpel16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, const uint8_t *src, int dstStride, int tmpStride, int srcStride){\ 224 ff_ ## OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst, tmp, src, dstStride, tmpStride, srcStride, 16);\ 227 #define ff_put_h264_qpel8_h_lowpass_l2_sse2 ff_put_h264_qpel8_h_lowpass_l2_mmxext 228 #define ff_avg_h264_qpel8_h_lowpass_l2_sse2 ff_avg_h264_qpel8_h_lowpass_l2_mmxext 229 #define ff_put_h264_qpel16_h_lowpass_l2_sse2 ff_put_h264_qpel16_h_lowpass_l2_mmxext 230 #define ff_avg_h264_qpel16_h_lowpass_l2_sse2 ff_avg_h264_qpel16_h_lowpass_l2_mmxext 232 #define ff_put_h264_qpel8_v_lowpass_ssse3 ff_put_h264_qpel8_v_lowpass_sse2 233 #define ff_avg_h264_qpel8_v_lowpass_ssse3 ff_avg_h264_qpel8_v_lowpass_sse2 234 #define ff_put_h264_qpel16_v_lowpass_ssse3 ff_put_h264_qpel16_v_lowpass_sse2 235 #define ff_avg_h264_qpel16_v_lowpass_ssse3 ff_avg_h264_qpel16_v_lowpass_sse2 237 #define ff_put_h264_qpel8or16_hv2_lowpass_sse2 ff_put_h264_qpel8or16_hv2_lowpass_mmxext 238 #define ff_avg_h264_qpel8or16_hv2_lowpass_sse2 ff_avg_h264_qpel8or16_hv2_lowpass_mmxext 240 #define H264_MC(OPNAME, SIZE, MMX, ALIGN) \ 241 H264_MC_C(OPNAME, SIZE, MMX, ALIGN)\ 242 H264_MC_V(OPNAME, SIZE, MMX, ALIGN)\ 243 H264_MC_H(OPNAME, SIZE, MMX, ALIGN)\ 244 H264_MC_HV(OPNAME, SIZE, MMX, ALIGN)\ 246 static void put_h264_qpel16_mc00_sse2 (
uint8_t *dst,
const uint8_t *src,
251 static void avg_h264_qpel16_mc00_sse2 (
uint8_t *dst,
const uint8_t *src,
256 #define put_h264_qpel8_mc00_sse2 put_h264_qpel8_mc00_mmxext 257 #define avg_h264_qpel8_mc00_sse2 avg_h264_qpel8_mc00_mmxext 259 #define H264_MC_C(OPNAME, SIZE, MMX, ALIGN) \ 260 static void OPNAME ## h264_qpel ## SIZE ## _mc00_ ## MMX (uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\ 262 ff_ ## OPNAME ## pixels ## SIZE ## _ ## MMX(dst, src, stride, SIZE);\ 265 #define H264_MC_H(OPNAME, SIZE, MMX, ALIGN) \ 266 static void OPNAME ## h264_qpel ## SIZE ## _mc10_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\ 268 ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, src, stride, stride);\ 271 static void OPNAME ## h264_qpel ## SIZE ## _mc20_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\ 273 ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_ ## MMX(dst, src, stride, stride);\ 276 static void OPNAME ## h264_qpel ## SIZE ## _mc30_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\ 278 ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, src+1, stride, stride);\ 281 #define H264_MC_V(OPNAME, SIZE, MMX, ALIGN) \ 282 static void OPNAME ## h264_qpel ## SIZE ## _mc01_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\ 284 LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*SIZE]);\ 285 ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\ 286 ff_ ## OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, src, temp, stride, stride, SIZE);\ 289 static void OPNAME ## h264_qpel ## SIZE ## _mc02_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\ 291 ff_ ## OPNAME ## h264_qpel ## SIZE ## _v_lowpass_ ## MMX(dst, src, stride, stride);\ 294 static void OPNAME ## h264_qpel ## SIZE ## _mc03_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\ 296 LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*SIZE]);\ 297 ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\ 298 ff_ ## OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, src+stride, temp, stride, stride, SIZE);\ 301 #define H264_MC_HV(OPNAME, SIZE, MMX, ALIGN) \ 302 static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\ 304 LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*SIZE]);\ 305 ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\ 306 ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, temp, stride, SIZE);\ 309 static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\ 311 LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*SIZE]);\ 312 ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src+1, SIZE, stride);\ 313 ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, temp, stride, SIZE);\ 316 static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\ 318 LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*SIZE]);\ 319 ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\ 320 ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, temp, stride, SIZE);\ 323 static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\ 325 LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*SIZE]);\ 326 ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src+1, SIZE, stride);\ 327 ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, temp, stride, SIZE);\ 330 static void OPNAME ## h264_qpel ## SIZE ## _mc22_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\ 332 LOCAL_ALIGNED(ALIGN, uint16_t, temp, [SIZE*(SIZE<8?12:24)]);\ 333 ff_ ## OPNAME ## h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(dst, temp, src, stride, SIZE, stride);\ 336 static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\ 338 LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE]);\ 339 uint8_t * const halfHV= temp;\ 340 int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\ 341 av_assert2(((int)temp & 7) == 0);\ 342 ff_put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\ 343 ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, halfHV, stride, SIZE);\ 346 static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\ 348 LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE]);\ 349 uint8_t * const halfHV= temp;\ 350 int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\ 351 av_assert2(((int)temp & 7) == 0);\ 352 ff_put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\ 353 ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, halfHV, stride, SIZE);\ 356 static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\ 358 LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE]);\ 359 uint8_t * const halfHV= temp;\ 360 int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\ 361 av_assert2(((int)temp & 7) == 0);\ 362 ff_put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\ 363 ff_ ## OPNAME ## pixels ## SIZE ## _l2_shift5_mmxext(dst, halfV+2, halfHV, stride, SIZE, SIZE);\ 366 static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\ 368 LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE]);\ 369 uint8_t * const halfHV= temp;\ 370 int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\ 371 av_assert2(((int)temp & 7) == 0);\ 372 ff_put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\ 373 ff_ ## OPNAME ## pixels ## SIZE ## _l2_shift5_mmxext(dst, halfV+3, halfHV, stride, SIZE, SIZE);\ 376 #define H264_MC_4816(MMX)\ 377 H264_MC(put_, 4, MMX, 8)\ 378 H264_MC(put_, 8, MMX, 8)\ 379 H264_MC(put_, 16,MMX, 8)\ 380 H264_MC(avg_, 4, MMX, 8)\ 381 H264_MC(avg_, 8, MMX, 8)\ 382 H264_MC(avg_, 16,MMX, 8)\ 384 #define H264_MC_816(QPEL, XMM)\ 385 QPEL(put_, 8, XMM, 16)\ 386 QPEL(put_, 16,XMM, 16)\ 387 QPEL(avg_, 8, XMM, 16)\ 388 QPEL(avg_, 16,XMM, 16)\ 390 QPEL_H264(put_, PUT_OP, mmxext)
391 QPEL_H264(avg_, AVG_MMXEXT_OP, mmxext)
392 QPEL_H264_V_XMM(put_, PUT_OP, sse2)
393 QPEL_H264_V_XMM(avg_,AVG_MMXEXT_OP, sse2)
394 QPEL_H264_HV_XMM(put_, PUT_OP, sse2)
395 QPEL_H264_HV_XMM(avg_,AVG_MMXEXT_OP, sse2)
396 QPEL_H264_H_XMM(put_, PUT_OP, ssse3)
397 QPEL_H264_H_XMM(avg_,AVG_MMXEXT_OP, ssse3)
398 QPEL_H264_HV_XMM(put_, PUT_OP, ssse3)
399 QPEL_H264_HV_XMM(avg_,AVG_MMXEXT_OP, ssse3)
402 H264_MC_816(H264_MC_V, sse2)
403 H264_MC_816(H264_MC_HV, sse2)
404 H264_MC_816(H264_MC_H, ssse3)
405 H264_MC_816(H264_MC_HV, ssse3)
409 #define LUMA_MC_OP(OP, NUM, DEPTH, TYPE, OPT) \ 410 void ff_ ## OP ## _h264_qpel ## NUM ## _ ## TYPE ## _ ## DEPTH ## _ ## OPT \ 411 (uint8_t *dst, const uint8_t *src, ptrdiff_t stride); 413 #define LUMA_MC_ALL(DEPTH, TYPE, OPT) \ 414 LUMA_MC_OP(put, 4, DEPTH, TYPE, OPT) \ 415 LUMA_MC_OP(avg, 4, DEPTH, TYPE, OPT) \ 416 LUMA_MC_OP(put, 8, DEPTH, TYPE, OPT) \ 417 LUMA_MC_OP(avg, 8, DEPTH, TYPE, OPT) \ 418 LUMA_MC_OP(put, 16, DEPTH, TYPE, OPT) \ 419 LUMA_MC_OP(avg, 16, DEPTH, TYPE, OPT) 421 #define LUMA_MC_816(DEPTH, TYPE, OPT) \ 422 LUMA_MC_OP(put, 8, DEPTH, TYPE, OPT) \ 423 LUMA_MC_OP(avg, 8, DEPTH, TYPE, OPT) \ 424 LUMA_MC_OP(put, 16, DEPTH, TYPE, OPT) \ 425 LUMA_MC_OP(avg, 16, DEPTH, TYPE, OPT) 427 LUMA_MC_ALL(10, mc00, mmxext)
428 LUMA_MC_ALL(10, mc10, mmxext)
429 LUMA_MC_ALL(10, mc20, mmxext)
430 LUMA_MC_ALL(10, mc30, mmxext)
431 LUMA_MC_ALL(10, mc01, mmxext)
432 LUMA_MC_ALL(10, mc11, mmxext)
433 LUMA_MC_ALL(10, mc21, mmxext)
434 LUMA_MC_ALL(10, mc31, mmxext)
435 LUMA_MC_ALL(10, mc02, mmxext)
436 LUMA_MC_ALL(10, mc12, mmxext)
437 LUMA_MC_ALL(10, mc22, mmxext)
438 LUMA_MC_ALL(10, mc32, mmxext)
439 LUMA_MC_ALL(10, mc03, mmxext)
440 LUMA_MC_ALL(10, mc13, mmxext)
441 LUMA_MC_ALL(10, mc23, mmxext)
442 LUMA_MC_ALL(10, mc33, mmxext)
444 LUMA_MC_816(10, mc00, sse2)
445 LUMA_MC_816(10, mc10, sse2)
446 LUMA_MC_816(10, mc10, sse2_cache64)
447 LUMA_MC_816(10, mc10, ssse3_cache64)
448 LUMA_MC_816(10, mc20, sse2)
449 LUMA_MC_816(10, mc20, sse2_cache64)
450 LUMA_MC_816(10, mc20, ssse3_cache64)
451 LUMA_MC_816(10, mc30, sse2)
452 LUMA_MC_816(10, mc30, sse2_cache64)
453 LUMA_MC_816(10, mc30, ssse3_cache64)
454 LUMA_MC_816(10, mc01, sse2)
455 LUMA_MC_816(10, mc11, sse2)
456 LUMA_MC_816(10, mc21, sse2)
457 LUMA_MC_816(10, mc31, sse2)
458 LUMA_MC_816(10, mc02, sse2)
459 LUMA_MC_816(10, mc12, sse2)
460 LUMA_MC_816(10, mc22, sse2)
461 LUMA_MC_816(10, mc32, sse2)
462 LUMA_MC_816(10, mc03, sse2)
463 LUMA_MC_816(10, mc13, sse2)
464 LUMA_MC_816(10, mc23, sse2)
465 LUMA_MC_816(10, mc33, sse2)
467 #define QPEL16_OPMC(OP, MC, MMX)\ 468 void ff_ ## OP ## _h264_qpel16_ ## MC ## _10_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride){\ 469 ff_ ## OP ## _h264_qpel8_ ## MC ## _10_ ## MMX(dst , src , stride);\ 470 ff_ ## OP ## _h264_qpel8_ ## MC ## _10_ ## MMX(dst+16, src+16, stride);\ 473 ff_ ## OP ## _h264_qpel8_ ## MC ## _10_ ## MMX(dst , src , stride);\ 474 ff_ ## OP ## _h264_qpel8_ ## MC ## _10_ ## MMX(dst+16, src+16, stride);\ 477 #define QPEL16_OP(MC, MMX)\ 478 QPEL16_OPMC(put, MC, MMX)\ 479 QPEL16_OPMC(avg, MC, MMX) 482 QPEL16_OP(mc00, MMX)\ 483 QPEL16_OP(mc01, MMX)\ 484 QPEL16_OP(mc02, MMX)\ 485 QPEL16_OP(mc03, MMX)\ 486 QPEL16_OP(mc10, MMX)\ 487 QPEL16_OP(mc11, MMX)\ 488 QPEL16_OP(mc12, MMX)\ 489 QPEL16_OP(mc13, MMX)\ 490 QPEL16_OP(mc20, MMX)\ 491 QPEL16_OP(mc21, MMX)\ 492 QPEL16_OP(mc22, MMX)\ 493 QPEL16_OP(mc23, MMX)\ 494 QPEL16_OP(mc30, MMX)\ 495 QPEL16_OP(mc31, MMX)\ 496 QPEL16_OP(mc32, MMX)\ 499 #if ARCH_X86_32 // ARCH_X86_64 implies SSE2+ 505 #define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU, PREFIX) \ 507 c->PFX ## _pixels_tab[IDX][ 0] = PREFIX ## PFX ## SIZE ## _mc00_ ## CPU; \ 508 c->PFX ## _pixels_tab[IDX][ 1] = PREFIX ## PFX ## SIZE ## _mc10_ ## CPU; \ 509 c->PFX ## _pixels_tab[IDX][ 2] = PREFIX ## PFX ## SIZE ## _mc20_ ## CPU; \ 510 c->PFX ## _pixels_tab[IDX][ 3] = PREFIX ## PFX ## SIZE ## _mc30_ ## CPU; \ 511 c->PFX ## _pixels_tab[IDX][ 4] = PREFIX ## PFX ## SIZE ## _mc01_ ## CPU; \ 512 c->PFX ## _pixels_tab[IDX][ 5] = PREFIX ## PFX ## SIZE ## _mc11_ ## CPU; \ 513 c->PFX ## _pixels_tab[IDX][ 6] = PREFIX ## PFX ## SIZE ## _mc21_ ## CPU; \ 514 c->PFX ## _pixels_tab[IDX][ 7] = PREFIX ## PFX ## SIZE ## _mc31_ ## CPU; \ 515 c->PFX ## _pixels_tab[IDX][ 8] = PREFIX ## PFX ## SIZE ## _mc02_ ## CPU; \ 516 c->PFX ## _pixels_tab[IDX][ 9] = PREFIX ## PFX ## SIZE ## _mc12_ ## CPU; \ 517 c->PFX ## _pixels_tab[IDX][10] = PREFIX ## PFX ## SIZE ## _mc22_ ## CPU; \ 518 c->PFX ## _pixels_tab[IDX][11] = PREFIX ## PFX ## SIZE ## _mc32_ ## CPU; \ 519 c->PFX ## _pixels_tab[IDX][12] = PREFIX ## PFX ## SIZE ## _mc03_ ## CPU; \ 520 c->PFX ## _pixels_tab[IDX][13] = PREFIX ## PFX ## SIZE ## _mc13_ ## CPU; \ 521 c->PFX ## _pixels_tab[IDX][14] = PREFIX ## PFX ## SIZE ## _mc23_ ## CPU; \ 522 c->PFX ## _pixels_tab[IDX][15] = PREFIX ## PFX ## SIZE ## _mc33_ ## CPU; \ 525 #define H264_QPEL_FUNCS(x, y, CPU) \ 527 c->put_h264_qpel_pixels_tab[0][x + y * 4] = put_h264_qpel16_mc ## x ## y ## _ ## CPU; \ 528 c->put_h264_qpel_pixels_tab[1][x + y * 4] = put_h264_qpel8_mc ## x ## y ## _ ## CPU; \ 529 c->avg_h264_qpel_pixels_tab[0][x + y * 4] = avg_h264_qpel16_mc ## x ## y ## _ ## CPU; \ 530 c->avg_h264_qpel_pixels_tab[1][x + y * 4] = avg_h264_qpel8_mc ## x ## y ## _ ## CPU; \ 533 #define H264_QPEL_FUNCS_10(x, y, CPU) \ 535 c->put_h264_qpel_pixels_tab[0][x + y * 4] = ff_put_h264_qpel16_mc ## x ## y ## _10_ ## CPU; \ 536 c->put_h264_qpel_pixels_tab[1][x + y * 4] = ff_put_h264_qpel8_mc ## x ## y ## _10_ ## CPU; \ 537 c->avg_h264_qpel_pixels_tab[0][x + y * 4] = ff_avg_h264_qpel16_mc ## x ## y ## _10_ ## CPU; \ 538 c->avg_h264_qpel_pixels_tab[1][x + y * 4] = ff_avg_h264_qpel8_mc ## x ## y ## _10_ ## CPU; \ 544 int high_bit_depth = bit_depth > 8;
548 if (!high_bit_depth) {
555 }
else if (bit_depth == 10) {
568 if (!high_bit_depth) {
583 if (bit_depth == 10) {
595 if (!high_bit_depth) {
601 if (!high_bit_depth) {
616 if (bit_depth == 10) {
628 if (bit_depth == 10) {
void ff_avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
static atomic_int cpu_flags
Macro definitions for various function/variable attributes.
#define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU, PREFIX)
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
#define EXTERNAL_SSE2_FAST(flags)
#define EXTERNAL_SSE2(flags)
void ff_put_pixels8_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dstStride, int src1Stride, int h)
static void bit_depth(AudioStatsContext *s, uint64_t mask, uint64_t imask, AVRational *depth)
H.264 / AVC / MPEG-4 part10 codec.
void ff_avg_pixels8_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dstStride, int src1Stride, int h)
av_cold void ff_h264qpel_init_x86(H264QpelContext *c, int bit_depth)
void ff_put_pixels16_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dstStride, int src1Stride, int h)
#define EXTERNAL_SSSE3(flags)
#define H264_QPEL_FUNCS_10(x, y, CPU)
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
#define EXTERNAL_MMXEXT(flags)
GLint GLenum GLboolean GLsizei stride
void ff_put_pixels16_sse2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
#define H264_QPEL_FUNCS(x, y, CPU)
#define EXTERNAL_AVX(flags)
void ff_avg_pixels16_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dstStride, int src1Stride, int h)