FFmpeg
h264_qpel.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2004-2005 Michael Niedermayer, Loren Merritt
3  * Copyright (c) 2011 Daniel Kang
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 #include <stddef.h>
23 
24 #include "libavutil/attributes.h"
25 #include "libavutil/avassert.h"
26 #include "libavutil/cpu.h"
27 #include "libavutil/mem_internal.h"
28 #include "libavutil/x86/cpu.h"
29 #include "libavcodec/h264qpel.h"
30 #include "fpel.h"
31 #include "qpel.h"
32 
33 void ff_avg_pixels4_mmxext(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
34 void ff_put_pixels4x4_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
35  ptrdiff_t stride);
36 void ff_avg_pixels4x4_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
37  ptrdiff_t stride);
38 #define ff_put_pixels4x4_l2_mmxext(dst, src1, src2, dststride, src1stride) \
39  ff_put_pixels4x4_l2_mmxext((dst), (src1), (src2), (dststride))
40 #define ff_avg_pixels4x4_l2_mmxext(dst, src1, src2, dststride, src1stride) \
41  ff_avg_pixels4x4_l2_mmxext((dst), (src1), (src2), (dststride))
42 #define ff_put_pixels8x8_l2_sse2 ff_put_pixels8x8_l2_mmxext
43 #define ff_avg_pixels8x8_l2_sse2 ff_avg_pixels8x8_l2_mmxext
44 
45 #define DEF_QPEL(OPNAME)\
46 void ff_ ## OPNAME ## _h264_qpel4_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride);\
47 void ff_ ## OPNAME ## _h264_qpel8_h_lowpass_ssse3(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride);\
48 void ff_ ## OPNAME ## _h264_qpel4_h_lowpass_l2_mmxext(uint8_t *dst, const uint8_t *src, const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride);\
49 void ff_ ## OPNAME ## _h264_qpel8_h_lowpass_l2_sse2(uint8_t *dst, const uint8_t *src, const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride);\
50 void ff_ ## OPNAME ## _h264_qpel16_h_lowpass_l2_sse2(uint8_t *dst, const uint8_t *src, const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride);\
51 void ff_ ## OPNAME ## _h264_qpel8_h_lowpass_l2_ssse3(uint8_t *dst, const uint8_t *src, const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride);\
52 void ff_ ## OPNAME ## _h264_qpel4_v_lowpass_mmxext(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride);\
53 void ff_ ## OPNAME ## _h264_qpel8or16_v_lowpass_sse2(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride, int h);\
54 void ff_ ## OPNAME ## _h264_qpel4_hv_lowpass_h_mmxext(int16_t *tmp, uint8_t *dst, ptrdiff_t dstStride);\
55 void ff_ ## OPNAME ## _h264_qpel8or16_hv1_lowpass_op_sse2(const uint8_t *src, int16_t *tmp, ptrdiff_t srcStride, int size);\
56 void ff_ ## OPNAME ## _h264_qpel8_hv2_lowpass_sse2(uint8_t *dst, int16_t *tmp, ptrdiff_t dstStride);\
57 void ff_ ## OPNAME ## _h264_qpel16_hv2_lowpass_sse2(uint8_t *dst, int16_t *tmp, ptrdiff_t dstStride);\
58 void ff_ ## OPNAME ## _h264_qpel8_hv2_lowpass_ssse3(uint8_t *dst, int16_t *tmp, ptrdiff_t dstStride);\
59 void ff_ ## OPNAME ## _h264_qpel16_hv2_lowpass_ssse3(uint8_t *dst, int16_t *tmp, ptrdiff_t dstStride);\
60 void ff_ ## OPNAME ## _pixels4_l2_shift5_mmxext(uint8_t *dst, const int16_t *src16, const uint8_t *src8, ptrdiff_t dstStride);\
61 void ff_ ## OPNAME ## _pixels8_l2_shift5_sse2(uint8_t *dst, const int16_t *src16, const uint8_t *src8, ptrdiff_t dstStride);\
62 void ff_ ## OPNAME ## _pixels16_l2_shift5_sse2(uint8_t *dst, const int16_t *src16, const uint8_t *src8, ptrdiff_t dstStride);\
63 
64 void ff_put_h264_qpel4_hv_lowpass_v_mmxext(const uint8_t *src, int16_t *tmp, ptrdiff_t srcStride);
65 
67 DEF_QPEL(put)
68 
69 #define QPEL_H264(OPNAME, MMX)\
70 static av_always_inline void OPNAME ## h264_qpel4_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
71 {\
72  src -= 2*srcStride+2;\
73  ff_put_h264_qpel4_hv_lowpass_v_mmxext(src, tmp, srcStride);\
74  ff_ ## OPNAME ## h264_qpel4_hv_lowpass_h_mmxext(tmp, dst, dstStride);\
75 }\
76 
77 #define QPEL_H264_H16(OPNAME, EXT) \
78 static av_always_inline void ff_ ## OPNAME ## h264_qpel16_h_lowpass_l2_ ## EXT(uint8_t *dst, const uint8_t *src, const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride)\
79 {\
80  ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## EXT(dst , src , src2 , dstStride, src2Stride);\
81  ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## EXT(dst+8, src+8, src2+8, dstStride, src2Stride);\
82  src += 8*dstStride;\
83  dst += 8*dstStride;\
84  src2 += 8*src2Stride;\
85  ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## EXT(dst , src , src2 , dstStride, src2Stride);\
86  ff_ ## OPNAME ## h264_qpel8_h_lowpass_l2_ ## EXT(dst+8, src+8, src2+8, dstStride, src2Stride);\
87 }\
88 
89 
90 #if ARCH_X86_64
91 #define QPEL_H264_H16_XMM(OPNAME, MMX)\
92 
93 void ff_avg_h264_qpel16_h_lowpass_l2_ssse3(uint8_t *dst, const uint8_t *src, const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride);
94 void ff_put_h264_qpel16_h_lowpass_l2_ssse3(uint8_t *dst, const uint8_t *src, const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride);
95 
96 #else // ARCH_X86_64
97 #define QPEL_H264_H16_XMM(OPNAME, EXT) QPEL_H264_H16(OPNAME, EXT)
98 #endif // ARCH_X86_64
99 
100 #define QPEL_H264_H_XMM(OPNAME, MMX)\
101 QPEL_H264_H16_XMM(OPNAME, MMX)\
102 static av_always_inline void ff_ ## OPNAME ## h264_qpel16_h_lowpass_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
103 {\
104  ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst , src , dstStride, srcStride);\
105  ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\
106  src += 8*srcStride;\
107  dst += 8*dstStride;\
108  ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst , src , dstStride, srcStride);\
109  ff_ ## OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\
110 }\
111 
112 #define QPEL_H264_V_XMM(OPNAME, XMM, XMM2)\
113 static av_always_inline void ff_ ## OPNAME ## h264_qpel8_v_lowpass_ ## XMM(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
114 {\
115  ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_ ## XMM2(dst , src , dstStride, srcStride, 8);\
116 }\
117 static av_always_inline void ff_ ## OPNAME ## h264_qpel16_v_lowpass_ ## XMM(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
118 {\
119  ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_ ## XMM2(dst , src , dstStride, srcStride, 16);\
120  ff_ ## OPNAME ## h264_qpel8or16_v_lowpass_ ## XMM2(dst+8, src+8, dstStride, srcStride, 16);\
121 }
122 
124  const uint8_t *src,
125  ptrdiff_t srcStride,
126  int size)
127 {
128  int w = (size+8)>>3;
129  src -= 2*srcStride+2;
130  while(w--){
131  ff_put_h264_qpel8or16_hv1_lowpass_op_sse2(src, tmp, srcStride, size);
132  tmp += 8;
133  src += 8;
134  }
135 }
136 
137 #define QPEL_H264_HV_XMM(OPNAME, MMX)\
138 static av_always_inline void OPNAME ## h264_qpel8_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
139 {\
140  put_h264_qpel8or16_hv1_lowpass_sse2(tmp, src, srcStride, 8);\
141  ff_ ## OPNAME ## h264_qpel8_hv2_lowpass_ ## MMX(dst, tmp, dstStride);\
142 }\
143 static av_always_inline void OPNAME ## h264_qpel16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
144 {\
145  put_h264_qpel8or16_hv1_lowpass_sse2(tmp, src, srcStride, 16);\
146  ff_ ## OPNAME ## h264_qpel16_hv2_lowpass_ ## MMX(dst, tmp, dstStride);\
147 }\
148 
149 #define H264_MC_V_H_HV(OPNAME, SIZE, MMX, ALIGN, SHIFT5_EXT) \
150 H264_MC_V(OPNAME, SIZE, MMX, ALIGN, SHIFT5_EXT)\
151 H264_MC_H(OPNAME, SIZE, MMX, ALIGN, SHIFT5_EXT)\
152 H264_MC_HV(OPNAME, SIZE, MMX, ALIGN, SHIFT5_EXT)\
153 
154 #define H264_MC_H(OPNAME, SIZE, MMX, ALIGN, UNUSED) \
155 static void OPNAME ## h264_qpel ## SIZE ## _mc10_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
156 {\
157  ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, src, stride, stride);\
158 }\
159 \
160 static void OPNAME ## h264_qpel ## SIZE ## _mc20_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
161 {\
162  ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_ ## MMX(dst, src, stride, stride);\
163 }\
164 \
165 static void OPNAME ## h264_qpel ## SIZE ## _mc30_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
166 {\
167  ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, src+1, stride, stride);\
168 }\
169 
170 #define H264_MC_V(OPNAME, SIZE, MMX, ALIGN, UNUSED) \
171 static void OPNAME ## h264_qpel ## SIZE ## _mc01_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
172 {\
173  LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*SIZE]);\
174  ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\
175  ff_ ## OPNAME ## pixels ## SIZE ## x ## SIZE ## _l2_ ## MMX(dst, src, temp, stride, stride);\
176 }\
177 \
178 static void OPNAME ## h264_qpel ## SIZE ## _mc02_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
179 {\
180  ff_ ## OPNAME ## h264_qpel ## SIZE ## _v_lowpass_ ## MMX(dst, src, stride, stride);\
181 }\
182 \
183 static void OPNAME ## h264_qpel ## SIZE ## _mc03_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
184 {\
185  LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*SIZE]);\
186  ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\
187  ff_ ## OPNAME ## pixels ## SIZE ## x ## SIZE ## _l2_ ## MMX(dst, src+stride, temp, stride, stride);\
188 }\
189 
190 #define H264_MC_HV(OPNAME, SIZE, MMX, ALIGN, SHIFT5_EXT) \
191 static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
192 {\
193  LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*SIZE]);\
194  ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\
195  ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, temp, stride, SIZE);\
196 }\
197 \
198 static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
199 {\
200  LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*SIZE]);\
201  ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src+1, SIZE, stride);\
202  ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, temp, stride, SIZE);\
203 }\
204 \
205 static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
206 {\
207  LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*SIZE]);\
208  ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\
209  ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, temp, stride, SIZE);\
210 }\
211 \
212 static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
213 {\
214  LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*SIZE]);\
215  ff_put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src+1, SIZE, stride);\
216  ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, temp, stride, SIZE);\
217 }\
218 \
219 static void OPNAME ## h264_qpel ## SIZE ## _mc22_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
220 {\
221  LOCAL_ALIGNED(ALIGN, uint16_t, temp, [SIZE*(SIZE<8?12:24)]);\
222  OPNAME ## h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(dst, temp, src, stride, stride);\
223 }\
224 \
225 static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
226 {\
227  LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE]);\
228  uint8_t * const halfHV= temp;\
229  int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
230  av_assert2(((uintptr_t)temp & 7) == 0);\
231  put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, stride);\
232  ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, halfHV, stride, SIZE);\
233 }\
234 \
235 static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
236 {\
237  LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE]);\
238  uint8_t * const halfHV= temp;\
239  int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
240  av_assert2(((uintptr_t)temp & 7) == 0);\
241  put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, stride);\
242  ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, halfHV, stride, SIZE);\
243 }\
244 \
245 static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
246 {\
247  LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE]);\
248  uint8_t * const halfHV= temp;\
249  int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
250  av_assert2(((uintptr_t)temp & 7) == 0);\
251  put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, stride);\
252  ff_ ## OPNAME ## pixels ## SIZE ## _l2_shift5_ ## SHIFT5_EXT(dst, halfV+2, halfHV, stride);\
253 }\
254 \
255 static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
256 {\
257  LOCAL_ALIGNED(ALIGN, uint8_t, temp, [SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE]);\
258  uint8_t * const halfHV= temp;\
259  int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
260  av_assert2(((uintptr_t)temp & 7) == 0);\
261  put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, stride);\
262  ff_ ## OPNAME ## pixels ## SIZE ## _l2_shift5_ ## SHIFT5_EXT(dst, halfV+3, halfHV, stride);\
263 }\
264 
265 #define H264_MC(QPEL, SIZE, MMX, ALIGN, SHIFT5_EXT)\
266 QPEL(put_, SIZE, MMX, ALIGN, SHIFT5_EXT) \
267 QPEL(avg_, SIZE, MMX, ALIGN, SHIFT5_EXT) \
268 
269 #define H264_MC_816(QPEL, XMM, SHIFT5_EXT)\
270 QPEL(put_, 8, XMM, 16, SHIFT5_EXT)\
271 QPEL(put_, 16,XMM, 16, SHIFT5_EXT)\
272 QPEL(avg_, 8, XMM, 16, SHIFT5_EXT)\
273 QPEL(avg_, 16,XMM, 16, SHIFT5_EXT)\
274 
275 QPEL_H264(put_, mmxext)
276 QPEL_H264(avg_, mmxext)
277 QPEL_H264_V_XMM(put_, sse2, sse2)
278 QPEL_H264_V_XMM(avg_, sse2, sse2)
279 QPEL_H264_HV_XMM(put_, sse2)
280 QPEL_H264_HV_XMM(avg_, sse2)
281 QPEL_H264_H_XMM(put_, ssse3)
282 QPEL_H264_H_XMM(avg_, ssse3)
283 QPEL_H264_V_XMM(put_, ssse3, sse2)
284 QPEL_H264_HV_XMM(put_, ssse3)
285 QPEL_H264_HV_XMM(avg_, ssse3)
286 
287 H264_MC(H264_MC_V_H_HV, 4, mmxext, 8, mmxext)
288 H264_MC_816(H264_MC_V, sse2, sse2)
289 H264_MC_816(H264_MC_HV, sse2, sse2)
290 H264_MC_816(H264_MC_H, ssse3, sse2)
291 H264_MC_816(H264_MC_HV, ssse3, sse2)
292 
293 
294 //10bit
295 #define LUMA_MC_OP(OP, NUM, DEPTH, TYPE, OPT) \
296 void ff_ ## OP ## _h264_qpel ## NUM ## _ ## TYPE ## _ ## DEPTH ## _ ## OPT \
297  (uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
298 
299 #define LUMA_MC_4(DEPTH, TYPE, OPT) \
300  LUMA_MC_OP(put, 4, DEPTH, TYPE, OPT) \
301  LUMA_MC_OP(avg, 4, DEPTH, TYPE, OPT)
302 
303 #define LUMA_MC_816(DEPTH, TYPE, OPT) \
304  LUMA_MC_OP(put, 8, DEPTH, TYPE, OPT) \
305  LUMA_MC_OP(avg, 8, DEPTH, TYPE, OPT) \
306  LUMA_MC_OP(put, 16, DEPTH, TYPE, OPT) \
307  LUMA_MC_OP(avg, 16, DEPTH, TYPE, OPT)
308 
309 LUMA_MC_4(10, mc00, mmxext)
310 LUMA_MC_4(10, mc10, mmxext)
311 LUMA_MC_4(10, mc20, mmxext)
312 LUMA_MC_4(10, mc30, mmxext)
313 LUMA_MC_4(10, mc01, mmxext)
314 LUMA_MC_4(10, mc11, mmxext)
315 LUMA_MC_4(10, mc21, mmxext)
316 LUMA_MC_4(10, mc31, mmxext)
317 LUMA_MC_4(10, mc02, mmxext)
318 LUMA_MC_4(10, mc12, mmxext)
319 LUMA_MC_4(10, mc22, mmxext)
320 LUMA_MC_4(10, mc32, mmxext)
321 LUMA_MC_4(10, mc03, mmxext)
322 LUMA_MC_4(10, mc13, mmxext)
323 LUMA_MC_4(10, mc23, mmxext)
324 LUMA_MC_4(10, mc33, mmxext)
325 
326 LUMA_MC_816(10, mc00, sse2)
327 LUMA_MC_816(10, mc10, sse2)
328 LUMA_MC_816(10, mc10, ssse3_cache64)
329 LUMA_MC_816(10, mc20, sse2)
330 LUMA_MC_816(10, mc20, ssse3_cache64)
331 LUMA_MC_816(10, mc30, sse2)
332 LUMA_MC_816(10, mc30, ssse3_cache64)
333 LUMA_MC_816(10, mc01, sse2)
334 LUMA_MC_816(10, mc11, sse2)
335 LUMA_MC_816(10, mc21, sse2)
336 LUMA_MC_816(10, mc31, sse2)
337 LUMA_MC_816(10, mc02, sse2)
338 LUMA_MC_816(10, mc12, sse2)
339 LUMA_MC_816(10, mc22, sse2)
340 LUMA_MC_816(10, mc32, sse2)
341 LUMA_MC_816(10, mc03, sse2)
342 LUMA_MC_816(10, mc13, sse2)
343 LUMA_MC_816(10, mc23, sse2)
344 LUMA_MC_816(10, mc33, sse2)
345 
346 #define SET_QPEL_FUNCS_1PP(PFX, IDX, SIZE, CPU, PREFIX) \
347  do { \
348  c->PFX ## _pixels_tab[IDX][ 1] = PREFIX ## PFX ## SIZE ## _mc10_ ## CPU; \
349  c->PFX ## _pixels_tab[IDX][ 2] = PREFIX ## PFX ## SIZE ## _mc20_ ## CPU; \
350  c->PFX ## _pixels_tab[IDX][ 3] = PREFIX ## PFX ## SIZE ## _mc30_ ## CPU; \
351  c->PFX ## _pixels_tab[IDX][ 4] = PREFIX ## PFX ## SIZE ## _mc01_ ## CPU; \
352  c->PFX ## _pixels_tab[IDX][ 5] = PREFIX ## PFX ## SIZE ## _mc11_ ## CPU; \
353  c->PFX ## _pixels_tab[IDX][ 6] = PREFIX ## PFX ## SIZE ## _mc21_ ## CPU; \
354  c->PFX ## _pixels_tab[IDX][ 7] = PREFIX ## PFX ## SIZE ## _mc31_ ## CPU; \
355  c->PFX ## _pixels_tab[IDX][ 8] = PREFIX ## PFX ## SIZE ## _mc02_ ## CPU; \
356  c->PFX ## _pixels_tab[IDX][ 9] = PREFIX ## PFX ## SIZE ## _mc12_ ## CPU; \
357  c->PFX ## _pixels_tab[IDX][10] = PREFIX ## PFX ## SIZE ## _mc22_ ## CPU; \
358  c->PFX ## _pixels_tab[IDX][11] = PREFIX ## PFX ## SIZE ## _mc32_ ## CPU; \
359  c->PFX ## _pixels_tab[IDX][12] = PREFIX ## PFX ## SIZE ## _mc03_ ## CPU; \
360  c->PFX ## _pixels_tab[IDX][13] = PREFIX ## PFX ## SIZE ## _mc13_ ## CPU; \
361  c->PFX ## _pixels_tab[IDX][14] = PREFIX ## PFX ## SIZE ## _mc23_ ## CPU; \
362  c->PFX ## _pixels_tab[IDX][15] = PREFIX ## PFX ## SIZE ## _mc33_ ## CPU; \
363  } while (0)
364 #define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU, PREFIX) \
365  do { \
366  c->PFX ## _pixels_tab[IDX][ 0] = PREFIX ## PFX ## SIZE ## _mc00_ ## CPU; \
367  SET_QPEL_FUNCS_1PP(PFX, IDX, SIZE, CPU, PREFIX); \
368  } while (0)
369 
370 #define H264_QPEL_FUNCS(x, y, CPU) \
371  do { \
372  c->put_h264_qpel_pixels_tab[0][x + y * 4] = put_h264_qpel16_mc ## x ## y ## _ ## CPU; \
373  c->put_h264_qpel_pixels_tab[1][x + y * 4] = put_h264_qpel8_mc ## x ## y ## _ ## CPU; \
374  c->avg_h264_qpel_pixels_tab[0][x + y * 4] = avg_h264_qpel16_mc ## x ## y ## _ ## CPU; \
375  c->avg_h264_qpel_pixels_tab[1][x + y * 4] = avg_h264_qpel8_mc ## x ## y ## _ ## CPU; \
376  } while (0)
377 
378 #define H264_QPEL_FUNCS_10(x, y, CPU) \
379  do { \
380  c->put_h264_qpel_pixels_tab[0][x + y * 4] = ff_put_h264_qpel16_mc ## x ## y ## _10_ ## CPU; \
381  c->put_h264_qpel_pixels_tab[1][x + y * 4] = ff_put_h264_qpel8_mc ## x ## y ## _10_ ## CPU; \
382  c->avg_h264_qpel_pixels_tab[0][x + y * 4] = ff_avg_h264_qpel16_mc ## x ## y ## _10_ ## CPU; \
383  c->avg_h264_qpel_pixels_tab[1][x + y * 4] = ff_avg_h264_qpel8_mc ## x ## y ## _10_ ## CPU; \
384  } while (0)
385 
387 {
388  int high_bit_depth = bit_depth > 8;
389  int cpu_flags = av_get_cpu_flags();
390 
391  if (EXTERNAL_MMXEXT(cpu_flags)) {
392  if (!high_bit_depth) {
393  SET_QPEL_FUNCS_1PP(put_h264_qpel, 2, 4, mmxext, );
394  c->avg_h264_qpel_pixels_tab[1][0] = ff_avg_pixels8x8_mmxext;
395  SET_QPEL_FUNCS_1PP(avg_h264_qpel, 2, 4, mmxext, );
396  c->avg_h264_qpel_pixels_tab[2][0] = ff_avg_pixels4_mmxext;
397  } else if (bit_depth == 10) {
398  SET_QPEL_FUNCS(put_h264_qpel, 2, 4, 10_mmxext, ff_);
399  SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, 10_mmxext, ff_);
400  }
401  }
402 
403  if (EXTERNAL_SSE2(cpu_flags)) {
404  if (!high_bit_depth) {
405  H264_QPEL_FUNCS(0, 1, sse2);
406  H264_QPEL_FUNCS(0, 2, sse2);
407  H264_QPEL_FUNCS(0, 3, sse2);
408  H264_QPEL_FUNCS(1, 1, sse2);
409  H264_QPEL_FUNCS(1, 2, sse2);
410  H264_QPEL_FUNCS(1, 3, sse2);
411  H264_QPEL_FUNCS(2, 1, sse2);
412  H264_QPEL_FUNCS(2, 2, sse2);
413  H264_QPEL_FUNCS(2, 3, sse2);
414  H264_QPEL_FUNCS(3, 1, sse2);
415  H264_QPEL_FUNCS(3, 2, sse2);
416  H264_QPEL_FUNCS(3, 3, sse2);
417  c->put_h264_qpel_pixels_tab[0][0] = ff_put_pixels16x16_sse2;
418  c->avg_h264_qpel_pixels_tab[0][0] = ff_avg_pixels16x16_sse2;
419  }
420 
421  if (bit_depth == 10) {
422  SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 10_sse2, ff_);
423  SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 10_sse2, ff_);
424  SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 10_sse2, ff_);
425  SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 10_sse2, ff_);
426  H264_QPEL_FUNCS_10(1, 0, sse2);
427  H264_QPEL_FUNCS_10(2, 0, sse2);
428  H264_QPEL_FUNCS_10(3, 0, sse2);
429  }
430  }
431 
432  if (EXTERNAL_SSSE3(cpu_flags)) {
433  if (!high_bit_depth) {
434  H264_QPEL_FUNCS(1, 0, ssse3);
435  H264_QPEL_FUNCS(1, 1, ssse3);
436  H264_QPEL_FUNCS(1, 2, ssse3);
437  H264_QPEL_FUNCS(1, 3, ssse3);
438  H264_QPEL_FUNCS(2, 0, ssse3);
439  H264_QPEL_FUNCS(2, 1, ssse3);
440  H264_QPEL_FUNCS(2, 2, ssse3);
441  H264_QPEL_FUNCS(2, 3, ssse3);
442  H264_QPEL_FUNCS(3, 0, ssse3);
443  H264_QPEL_FUNCS(3, 1, ssse3);
444  H264_QPEL_FUNCS(3, 2, ssse3);
445  H264_QPEL_FUNCS(3, 3, ssse3);
446  }
447 
448  if (bit_depth == 10) {
449  H264_QPEL_FUNCS_10(1, 0, ssse3_cache64);
450  H264_QPEL_FUNCS_10(2, 0, ssse3_cache64);
451  H264_QPEL_FUNCS_10(3, 0, ssse3_cache64);
452  }
453  }
454 }
cpu.h
mem_internal.h
src1
const pixel * src1
Definition: h264pred_template.c:420
ff_put_h264_qpel4_hv_lowpass_v_mmxext
void ff_put_h264_qpel4_hv_lowpass_v_mmxext(const uint8_t *src, int16_t *tmp, ptrdiff_t srcStride)
qpel.h
w
uint8_t w
Definition: llviddspenc.c:38
ff_avg_pixels16x16_sse2
void ff_avg_pixels16x16_sse2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size)
QPEL_H264_HV_XMM
#define QPEL_H264_HV_XMM(OPNAME, MMX)
Definition: h264_qpel.c:137
H264_MC_V_H_HV
#define H264_MC_V_H_HV(OPNAME, SIZE, MMX, ALIGN, SHIFT5_EXT)
Definition: h264_qpel.c:149
av_get_cpu_flags
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:109
bit_depth
static void bit_depth(AudioStatsContext *s, const uint64_t *const mask, uint8_t *depth)
Definition: af_astats.c:246
cpu_flags
static atomic_int cpu_flags
Definition: cpu.c:56
h264qpel.h
ff_h264qpel_init_x86
av_cold void ff_h264qpel_init_x86(H264QpelContext *c, int bit_depth)
Definition: h264_qpel.c:386
H264_MC_V
#define H264_MC_V(OPNAME, SIZE, MMX, ALIGN, UNUSED)
Definition: h264_qpel.c:170
H264_MC_H
#define H264_MC_H(OPNAME, SIZE, MMX, ALIGN, UNUSED)
Definition: h264_qpel.c:154
avassert.h
av_cold
#define av_cold
Definition: attributes.h:106
H264_MC_HV
#define H264_MC_HV(OPNAME, SIZE, MMX, ALIGN, SHIFT5_EXT)
Definition: h264_qpel.c:190
ff_avg_pixels4_mmxext
void ff_avg_pixels4_mmxext(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
H264_QPEL_FUNCS
#define H264_QPEL_FUNCS(x, y, CPU)
Definition: h264_qpel.c:370
tmp
static uint8_t tmp[40]
Definition: aes_ctr.c:52
SET_QPEL_FUNCS
#define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU, PREFIX)
Definition: h264_qpel.c:364
ff_put_pixels16x16_sse2
void ff_put_pixels16x16_sse2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size)
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
QPEL_H264_V_XMM
#define QPEL_H264_V_XMM(OPNAME, XMM, XMM2)
Definition: h264_qpel.c:112
ff_put_pixels4x4_l2_mmxext
#define ff_put_pixels4x4_l2_mmxext(dst, src1, src2, dststride, src1stride)
Definition: h264_qpel.c:38
LUMA_MC_4
#define LUMA_MC_4(DEPTH, TYPE, OPT)
Definition: h264_qpel.c:299
QPEL_H264
#define QPEL_H264(OPNAME, MMX)
Definition: h264_qpel.c:69
dst
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
Definition: dsp.h:87
cpu.h
size
int size
Definition: twinvq_data.h:10344
avg
#define avg(a, b, c, d)
Definition: colorspacedsp_template.c:28
SET_QPEL_FUNCS_1PP
#define SET_QPEL_FUNCS_1PP(PFX, IDX, SIZE, CPU, PREFIX)
Definition: h264_qpel.c:346
attributes.h
EXTERNAL_SSE2
#define EXTERNAL_SSE2(flags)
Definition: cpu.h:52
src2
const pixel * src2
Definition: h264pred_template.c:421
av_always_inline
#define av_always_inline
Definition: attributes.h:63
ff_avg_pixels4x4_l2_mmxext
#define ff_avg_pixels4x4_l2_mmxext(dst, src1, src2, dststride, src1stride)
Definition: h264_qpel.c:40
H264QpelContext
Definition: h264qpel.h:27
QPEL_H264_H_XMM
#define QPEL_H264_H_XMM(OPNAME, MMX)
Definition: h264_qpel.c:100
ff_avg_pixels8x8_mmxext
void ff_avg_pixels8x8_mmxext(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size)
fpel.h
LUMA_MC_816
#define LUMA_MC_816(DEPTH, TYPE, OPT)
Definition: h264_qpel.c:303
DEF_QPEL
#define DEF_QPEL(OPNAME)
Definition: h264_qpel.c:45
H264_MC
#define H264_MC(QPEL, SIZE, MMX, ALIGN, SHIFT5_EXT)
Definition: h264_qpel.c:265
H264_MC_816
#define H264_MC_816(QPEL, XMM, SHIFT5_EXT)
Definition: h264_qpel.c:269
stride
#define stride
Definition: h264pred_template.c:536
EXTERNAL_SSSE3
#define EXTERNAL_SSSE3(flags)
Definition: cpu.h:58
H264_QPEL_FUNCS_10
#define H264_QPEL_FUNCS_10(x, y, CPU)
Definition: h264_qpel.c:378
EXTERNAL_MMXEXT
#define EXTERNAL_MMXEXT(flags)
Definition: cpu.h:50
src
#define src
Definition: vp8dsp.c:248
put_h264_qpel8or16_hv1_lowpass_sse2
static av_always_inline void put_h264_qpel8or16_hv1_lowpass_sse2(int16_t *tmp, const uint8_t *src, ptrdiff_t srcStride, int size)
Definition: h264_qpel.c:123