FFmpeg
hevcdsp_init.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2013 Seppo Tomperi
3  * Copyright (c) 2013 - 2014 Pierre-Edouard Lepere
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 #include "config.h"
23 
24 #include "libavutil/cpu.h"
25 #include "libavutil/mem_internal.h"
26 #include "libavutil/x86/asm.h"
27 #include "libavutil/x86/cpu.h"
28 #include "libavcodec/get_bits.h" /* required for hevcdsp.h GetBitContext */
29 #include "libavcodec/hevcdsp.h"
30 #include "libavcodec/x86/hevcdsp.h"
31 
32 #define LFC_FUNC(DIR, DEPTH, OPT) \
33 void ff_hevc_ ## DIR ## _loop_filter_chroma_ ## DEPTH ## _ ## OPT(uint8_t *pix, ptrdiff_t stride, int *tc, uint8_t *no_p, uint8_t *no_q);
34 
35 #define LFL_FUNC(DIR, DEPTH, OPT) \
36 void ff_hevc_ ## DIR ## _loop_filter_luma_ ## DEPTH ## _ ## OPT(uint8_t *pix, ptrdiff_t stride, int beta, int *tc, uint8_t *no_p, uint8_t *no_q);
37 
38 #define LFC_FUNCS(type, depth, opt) \
39  LFC_FUNC(h, depth, opt) \
40  LFC_FUNC(v, depth, opt)
41 
42 #define LFL_FUNCS(type, depth, opt) \
43  LFL_FUNC(h, depth, opt) \
44  LFL_FUNC(v, depth, opt)
45 
46 LFC_FUNCS(uint8_t, 8, sse2)
47 LFC_FUNCS(uint8_t, 10, sse2)
48 LFC_FUNCS(uint8_t, 12, sse2)
49 LFC_FUNCS(uint8_t, 8, avx)
50 LFC_FUNCS(uint8_t, 10, avx)
51 LFC_FUNCS(uint8_t, 12, avx)
52 LFL_FUNCS(uint8_t, 8, sse2)
53 LFL_FUNCS(uint8_t, 10, sse2)
54 LFL_FUNCS(uint8_t, 12, sse2)
55 LFL_FUNCS(uint8_t, 8, ssse3)
56 LFL_FUNCS(uint8_t, 10, ssse3)
57 LFL_FUNCS(uint8_t, 12, ssse3)
58 LFL_FUNCS(uint8_t, 8, avx)
59 LFL_FUNCS(uint8_t, 10, avx)
60 LFL_FUNCS(uint8_t, 12, avx)
61 
62 #define IDCT_DC_FUNCS(W, opt) \
63 void ff_hevc_idct_ ## W ## _dc_8_ ## opt(int16_t *coeffs); \
64 void ff_hevc_idct_ ## W ## _dc_10_ ## opt(int16_t *coeffs); \
65 void ff_hevc_idct_ ## W ## _dc_12_ ## opt(int16_t *coeffs)
66 
67 IDCT_DC_FUNCS(4x4, mmxext);
68 IDCT_DC_FUNCS(8x8, mmxext);
69 IDCT_DC_FUNCS(8x8, sse2);
70 IDCT_DC_FUNCS(16x16, sse2);
71 IDCT_DC_FUNCS(32x32, sse2);
72 IDCT_DC_FUNCS(16x16, avx2);
73 IDCT_DC_FUNCS(32x32, avx2);
74 
75 #define IDCT_FUNCS(opt) \
76 void ff_hevc_idct_4x4_8_ ## opt(int16_t *coeffs, int col_limit); \
77 void ff_hevc_idct_4x4_10_ ## opt(int16_t *coeffs, int col_limit); \
78 void ff_hevc_idct_8x8_8_ ## opt(int16_t *coeffs, int col_limit); \
79 void ff_hevc_idct_8x8_10_ ## opt(int16_t *coeffs, int col_limit); \
80 void ff_hevc_idct_16x16_8_ ## opt(int16_t *coeffs, int col_limit); \
81 void ff_hevc_idct_16x16_10_ ## opt(int16_t *coeffs, int col_limit); \
82 void ff_hevc_idct_32x32_8_ ## opt(int16_t *coeffs, int col_limit); \
83 void ff_hevc_idct_32x32_10_ ## opt(int16_t *coeffs, int col_limit);
84 
85 IDCT_FUNCS(sse2)
86 IDCT_FUNCS(avx)
87 
88 #define mc_rep_func(name, bitd, step, W, opt) \
89 void ff_hevc_put_hevc_##name##W##_##bitd##_##opt(int16_t *_dst, \
90  uint8_t *_src, ptrdiff_t _srcstride, int height, \
91  intptr_t mx, intptr_t my, int width) \
92 { \
93  int i; \
94  uint8_t *src; \
95  int16_t *dst; \
96  for (i = 0; i < W; i += step) { \
97  src = _src + (i * ((bitd + 7) / 8)); \
98  dst = _dst + i; \
99  ff_hevc_put_hevc_##name##step##_##bitd##_##opt(dst, src, _srcstride, height, mx, my, width); \
100  } \
101 }
102 #define mc_rep_uni_func(name, bitd, step, W, opt) \
103 void ff_hevc_put_hevc_uni_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, \
104  uint8_t *_src, ptrdiff_t _srcstride, int height, \
105  intptr_t mx, intptr_t my, int width) \
106 { \
107  int i; \
108  uint8_t *src; \
109  uint8_t *dst; \
110  for (i = 0; i < W; i += step) { \
111  src = _src + (i * ((bitd + 7) / 8)); \
112  dst = _dst + (i * ((bitd + 7) / 8)); \
113  ff_hevc_put_hevc_uni_##name##step##_##bitd##_##opt(dst, dststride, src, _srcstride, \
114  height, mx, my, width); \
115  } \
116 }
117 #define mc_rep_bi_func(name, bitd, step, W, opt) \
118 void ff_hevc_put_hevc_bi_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, uint8_t *_src, \
119  ptrdiff_t _srcstride, int16_t* _src2, \
120  int height, intptr_t mx, intptr_t my, int width) \
121 { \
122  int i; \
123  uint8_t *src; \
124  uint8_t *dst; \
125  int16_t *src2; \
126  for (i = 0; i < W ; i += step) { \
127  src = _src + (i * ((bitd + 7) / 8)); \
128  dst = _dst + (i * ((bitd + 7) / 8)); \
129  src2 = _src2 + i; \
130  ff_hevc_put_hevc_bi_##name##step##_##bitd##_##opt(dst, dststride, src, _srcstride, src2, \
131  height, mx, my, width); \
132  } \
133 }
134 
135 #define mc_rep_funcs(name, bitd, step, W, opt) \
136  mc_rep_func(name, bitd, step, W, opt) \
137  mc_rep_uni_func(name, bitd, step, W, opt) \
138  mc_rep_bi_func(name, bitd, step, W, opt)
139 
140 #define mc_rep_func2(name, bitd, step1, step2, W, opt) \
141 void ff_hevc_put_hevc_##name##W##_##bitd##_##opt(int16_t *dst, \
142  uint8_t *src, ptrdiff_t _srcstride, int height, \
143  intptr_t mx, intptr_t my, int width) \
144 { \
145  ff_hevc_put_hevc_##name##step1##_##bitd##_##opt(dst, src, _srcstride, height, mx, my, width); \
146  ff_hevc_put_hevc_##name##step2##_##bitd##_##opt(dst + step1, src + (step1 * ((bitd + 7) / 8)), \
147  _srcstride, height, mx, my, width); \
148 }
149 #define mc_rep_uni_func2(name, bitd, step1, step2, W, opt) \
150 void ff_hevc_put_hevc_uni_##name##W##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride, \
151  uint8_t *src, ptrdiff_t _srcstride, int height, \
152  intptr_t mx, intptr_t my, int width) \
153 { \
154  ff_hevc_put_hevc_uni_##name##step1##_##bitd##_##opt(dst, dststride, src, _srcstride, height, mx, my, width);\
155  ff_hevc_put_hevc_uni_##name##step2##_##bitd##_##opt(dst + (step1 * ((bitd + 7) / 8)), dststride, \
156  src + (step1 * ((bitd + 7) / 8)), _srcstride, \
157  height, mx, my, width); \
158 }
159 #define mc_rep_bi_func2(name, bitd, step1, step2, W, opt) \
160 void ff_hevc_put_hevc_bi_##name##W##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, \
161  ptrdiff_t _srcstride, int16_t* src2, \
162  int height, intptr_t mx, intptr_t my, int width) \
163 { \
164  ff_hevc_put_hevc_bi_##name##step1##_##bitd##_##opt(dst, dststride, src, _srcstride, src2, height, mx, my, width);\
165  ff_hevc_put_hevc_bi_##name##step2##_##bitd##_##opt(dst + (step1 * ((bitd + 7) / 8)), dststride, \
166  src + (step1 * ((bitd + 7) / 8)), _srcstride, \
167  src2 + step1, height, mx, my, width); \
168 }
169 
170 #define mc_rep_funcs2(name, bitd, step1, step2, W, opt) \
171  mc_rep_func2(name, bitd, step1, step2, W, opt) \
172  mc_rep_uni_func2(name, bitd, step1, step2, W, opt) \
173  mc_rep_bi_func2(name, bitd, step1, step2, W, opt)
174 
175 #if ARCH_X86_64 && HAVE_SSE4_EXTERNAL
176 
177 #define mc_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) \
178 void ff_hevc_put_hevc_##name##width1##_10_##opt1(int16_t *dst, uint8_t *src, ptrdiff_t _srcstride, \
179  int height, intptr_t mx, intptr_t my, int width) \
180  \
181 { \
182  ff_hevc_put_hevc_##name##width2##_10_##opt1(dst, src, _srcstride, height, mx, my, width); \
183  ff_hevc_put_hevc_##name##width3##_10_##opt2(dst+ width2, src+ width4, _srcstride, height, mx, my, width); \
184 }
185 
186 #define mc_bi_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) \
187 void ff_hevc_put_hevc_bi_##name##width1##_10_##opt1(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, \
188  ptrdiff_t _srcstride, int16_t *src2, \
189  int height, intptr_t mx, intptr_t my, int width) \
190 { \
191  ff_hevc_put_hevc_bi_##name##width2##_10_##opt1(dst, dststride, src, _srcstride, src2, \
192  height, mx, my, width); \
193  ff_hevc_put_hevc_bi_##name##width3##_10_##opt2(dst+width4, dststride, src+width4, _srcstride, src2+width2,\
194  height, mx, my, width); \
195 }
196 
197 #define mc_uni_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) \
198 void ff_hevc_put_hevc_uni_##name##width1##_10_##opt1(uint8_t *dst, ptrdiff_t dststride, \
199  uint8_t *src, ptrdiff_t _srcstride, int height, \
200  intptr_t mx, intptr_t my, int width) \
201 { \
202  ff_hevc_put_hevc_uni_##name##width2##_10_##opt1(dst, dststride, src, _srcstride, \
203  height, mx, my, width); \
204  ff_hevc_put_hevc_uni_##name##width3##_10_##opt2(dst+width4, dststride, src+width4, _srcstride, \
205  height, mx, my, width); \
206 }
207 
208 #define mc_rep_mixs_10(name, width1, width2, width3, opt1, opt2, width4) \
209 mc_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) \
210 mc_bi_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) \
211 mc_uni_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4)
212 
213 #define mc_rep_mix_8(name, width1, width2, width3, opt1, opt2) \
214 void ff_hevc_put_hevc_##name##width1##_8_##opt1(int16_t *dst, uint8_t *src, ptrdiff_t _srcstride, \
215  int height, intptr_t mx, intptr_t my, int width) \
216  \
217 { \
218  ff_hevc_put_hevc_##name##width2##_8_##opt1(dst, src, _srcstride, height, mx, my, width); \
219  ff_hevc_put_hevc_##name##width3##_8_##opt2(dst+ width2, src+ width2, _srcstride, height, mx, my, width); \
220 }
221 
222 #define mc_bi_rep_mix_8(name, width1, width2, width3, opt1, opt2) \
223 void ff_hevc_put_hevc_bi_##name##width1##_8_##opt1(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, \
224  ptrdiff_t _srcstride, int16_t* src2, \
225  int height, intptr_t mx, intptr_t my, int width) \
226 { \
227  ff_hevc_put_hevc_bi_##name##width2##_8_##opt1(dst, dststride, src, _srcstride, \
228  src2, height, mx, my, width); \
229  ff_hevc_put_hevc_bi_##name##width3##_8_##opt2(dst+width2, dststride, src+width2, _srcstride, \
230  src2+width2, height, mx, my, width); \
231 }
232 
233 #define mc_uni_rep_mix_8(name, width1, width2, width3, opt1, opt2) \
234 void ff_hevc_put_hevc_uni_##name##width1##_8_##opt1(uint8_t *dst, ptrdiff_t dststride, \
235  uint8_t *src, ptrdiff_t _srcstride, int height, \
236  intptr_t mx, intptr_t my, int width) \
237 { \
238  ff_hevc_put_hevc_uni_##name##width2##_8_##opt1(dst, dststride, src, _srcstride, \
239  height, mx, my, width); \
240  ff_hevc_put_hevc_uni_##name##width3##_8_##opt2(dst+width2, dststride, src+width2, _srcstride, \
241  height, mx, my, width); \
242 }
243 
244 #define mc_rep_mixs_8(name, width1, width2, width3, opt1, opt2) \
245 mc_rep_mix_8(name, width1, width2, width3, opt1, opt2) \
246 mc_bi_rep_mix_8(name, width1, width2, width3, opt1, opt2) \
247 mc_uni_rep_mix_8(name, width1, width2, width3, opt1, opt2)
248 
249 #if HAVE_AVX2_EXTERNAL
250 
251 mc_rep_mixs_8(pel_pixels, 48, 32, 16, avx2, sse4)
252 mc_rep_mixs_8(epel_hv, 48, 32, 16, avx2, sse4)
253 mc_rep_mixs_8(epel_h , 48, 32, 16, avx2, sse4)
254 mc_rep_mixs_8(epel_v , 48, 32, 16, avx2, sse4)
255 
256 mc_rep_mix_10(pel_pixels, 24, 16, 8, avx2, sse4, 32)
257 mc_bi_rep_mix_10(pel_pixels,24, 16, 8, avx2, sse4, 32)
258 mc_rep_mixs_10(epel_hv, 24, 16, 8, avx2, sse4, 32)
259 mc_rep_mixs_10(epel_h , 24, 16, 8, avx2, sse4, 32)
260 mc_rep_mixs_10(epel_v , 24, 16, 8, avx2, sse4, 32)
261 
262 
263 mc_rep_mixs_10(qpel_h , 24, 16, 8, avx2, sse4, 32)
264 mc_rep_mixs_10(qpel_v , 24, 16, 8, avx2, sse4, 32)
265 mc_rep_mixs_10(qpel_hv, 24, 16, 8, avx2, sse4, 32)
266 
267 
268 mc_rep_uni_func(pel_pixels, 8, 64, 128, avx2)//used for 10bit
269 mc_rep_uni_func(pel_pixels, 8, 32, 96, avx2) //used for 10bit
270 
271 mc_rep_funcs(pel_pixels, 8, 32, 64, avx2)
272 
273 mc_rep_func(pel_pixels, 10, 16, 32, avx2)
274 mc_rep_func(pel_pixels, 10, 16, 48, avx2)
275 mc_rep_func(pel_pixels, 10, 32, 64, avx2)
276 
277 mc_rep_bi_func(pel_pixels, 10, 16, 32, avx2)
278 mc_rep_bi_func(pel_pixels, 10, 16, 48, avx2)
279 mc_rep_bi_func(pel_pixels, 10, 32, 64, avx2)
280 
281 mc_rep_funcs(epel_h, 8, 32, 64, avx2)
282 
283 mc_rep_funcs(epel_v, 8, 32, 64, avx2)
284 
285 mc_rep_funcs(epel_h, 10, 16, 32, avx2)
286 mc_rep_funcs(epel_h, 10, 16, 48, avx2)
287 mc_rep_funcs(epel_h, 10, 32, 64, avx2)
288 
289 mc_rep_funcs(epel_v, 10, 16, 32, avx2)
290 mc_rep_funcs(epel_v, 10, 16, 48, avx2)
291 mc_rep_funcs(epel_v, 10, 32, 64, avx2)
292 
293 
294 mc_rep_funcs(epel_hv, 8, 32, 64, avx2)
295 
296 mc_rep_funcs(epel_hv, 10, 16, 32, avx2)
297 mc_rep_funcs(epel_hv, 10, 16, 48, avx2)
298 mc_rep_funcs(epel_hv, 10, 32, 64, avx2)
299 
300 mc_rep_funcs(qpel_h, 8, 32, 64, avx2)
301 mc_rep_mixs_8(qpel_h , 48, 32, 16, avx2, sse4)
302 
303 mc_rep_funcs(qpel_v, 8, 32, 64, avx2)
304 mc_rep_mixs_8(qpel_v, 48, 32, 16, avx2, sse4)
305 
306 mc_rep_funcs(qpel_h, 10, 16, 32, avx2)
307 mc_rep_funcs(qpel_h, 10, 16, 48, avx2)
308 mc_rep_funcs(qpel_h, 10, 32, 64, avx2)
309 
310 mc_rep_funcs(qpel_v, 10, 16, 32, avx2)
311 mc_rep_funcs(qpel_v, 10, 16, 48, avx2)
312 mc_rep_funcs(qpel_v, 10, 32, 64, avx2)
313 
314 mc_rep_funcs(qpel_hv, 10, 16, 32, avx2)
315 mc_rep_funcs(qpel_hv, 10, 16, 48, avx2)
316 mc_rep_funcs(qpel_hv, 10, 32, 64, avx2)
317 
318 #endif //AVX2
319 
320 mc_rep_funcs(pel_pixels, 8, 16, 64, sse4)
321 mc_rep_funcs(pel_pixels, 8, 16, 48, sse4)
322 mc_rep_funcs(pel_pixels, 8, 16, 32, sse4)
323 mc_rep_funcs(pel_pixels, 8, 8, 24, sse4)
324 mc_rep_funcs(pel_pixels,10, 8, 64, sse4)
325 mc_rep_funcs(pel_pixels,10, 8, 48, sse4)
326 mc_rep_funcs(pel_pixels,10, 8, 32, sse4)
327 mc_rep_funcs(pel_pixels,10, 8, 24, sse4)
328 mc_rep_funcs(pel_pixels,10, 8, 16, sse4)
329 mc_rep_funcs(pel_pixels,10, 4, 12, sse4)
330 mc_rep_funcs(pel_pixels,12, 8, 64, sse4)
331 mc_rep_funcs(pel_pixels,12, 8, 48, sse4)
332 mc_rep_funcs(pel_pixels,12, 8, 32, sse4)
333 mc_rep_funcs(pel_pixels,12, 8, 24, sse4)
334 mc_rep_funcs(pel_pixels,12, 8, 16, sse4)
335 mc_rep_funcs(pel_pixels,12, 4, 12, sse4)
336 
337 mc_rep_funcs(epel_h, 8, 16, 64, sse4)
338 mc_rep_funcs(epel_h, 8, 16, 48, sse4)
339 mc_rep_funcs(epel_h, 8, 16, 32, sse4)
340 mc_rep_funcs(epel_h, 8, 8, 24, sse4)
341 mc_rep_funcs(epel_h,10, 8, 64, sse4)
342 mc_rep_funcs(epel_h,10, 8, 48, sse4)
343 mc_rep_funcs(epel_h,10, 8, 32, sse4)
344 mc_rep_funcs(epel_h,10, 8, 24, sse4)
345 mc_rep_funcs(epel_h,10, 8, 16, sse4)
346 mc_rep_funcs(epel_h,10, 4, 12, sse4)
347 mc_rep_funcs(epel_h,12, 8, 64, sse4)
348 mc_rep_funcs(epel_h,12, 8, 48, sse4)
349 mc_rep_funcs(epel_h,12, 8, 32, sse4)
350 mc_rep_funcs(epel_h,12, 8, 24, sse4)
351 mc_rep_funcs(epel_h,12, 8, 16, sse4)
352 mc_rep_funcs(epel_h,12, 4, 12, sse4)
353 mc_rep_funcs(epel_v, 8, 16, 64, sse4)
354 mc_rep_funcs(epel_v, 8, 16, 48, sse4)
355 mc_rep_funcs(epel_v, 8, 16, 32, sse4)
356 mc_rep_funcs(epel_v, 8, 8, 24, sse4)
357 mc_rep_funcs(epel_v,10, 8, 64, sse4)
358 mc_rep_funcs(epel_v,10, 8, 48, sse4)
359 mc_rep_funcs(epel_v,10, 8, 32, sse4)
360 mc_rep_funcs(epel_v,10, 8, 24, sse4)
361 mc_rep_funcs(epel_v,10, 8, 16, sse4)
362 mc_rep_funcs(epel_v,10, 4, 12, sse4)
363 mc_rep_funcs(epel_v,12, 8, 64, sse4)
364 mc_rep_funcs(epel_v,12, 8, 48, sse4)
365 mc_rep_funcs(epel_v,12, 8, 32, sse4)
366 mc_rep_funcs(epel_v,12, 8, 24, sse4)
367 mc_rep_funcs(epel_v,12, 8, 16, sse4)
368 mc_rep_funcs(epel_v,12, 4, 12, sse4)
369 mc_rep_funcs(epel_hv, 8, 16, 64, sse4)
370 mc_rep_funcs(epel_hv, 8, 16, 48, sse4)
371 mc_rep_funcs(epel_hv, 8, 16, 32, sse4)
372 mc_rep_funcs(epel_hv, 8, 8, 24, sse4)
373 mc_rep_funcs2(epel_hv,8, 8, 4, 12, sse4)
374 mc_rep_funcs(epel_hv,10, 8, 64, sse4)
375 mc_rep_funcs(epel_hv,10, 8, 48, sse4)
376 mc_rep_funcs(epel_hv,10, 8, 32, sse4)
377 mc_rep_funcs(epel_hv,10, 8, 24, sse4)
378 mc_rep_funcs(epel_hv,10, 8, 16, sse4)
379 mc_rep_funcs(epel_hv,10, 4, 12, sse4)
380 mc_rep_funcs(epel_hv,12, 8, 64, sse4)
381 mc_rep_funcs(epel_hv,12, 8, 48, sse4)
382 mc_rep_funcs(epel_hv,12, 8, 32, sse4)
383 mc_rep_funcs(epel_hv,12, 8, 24, sse4)
384 mc_rep_funcs(epel_hv,12, 8, 16, sse4)
385 mc_rep_funcs(epel_hv,12, 4, 12, sse4)
386 
387 mc_rep_funcs(qpel_h, 8, 16, 64, sse4)
388 mc_rep_funcs(qpel_h, 8, 16, 48, sse4)
389 mc_rep_funcs(qpel_h, 8, 16, 32, sse4)
390 mc_rep_funcs(qpel_h, 8, 8, 24, sse4)
391 mc_rep_funcs(qpel_h,10, 8, 64, sse4)
392 mc_rep_funcs(qpel_h,10, 8, 48, sse4)
393 mc_rep_funcs(qpel_h,10, 8, 32, sse4)
394 mc_rep_funcs(qpel_h,10, 8, 24, sse4)
395 mc_rep_funcs(qpel_h,10, 8, 16, sse4)
396 mc_rep_funcs(qpel_h,10, 4, 12, sse4)
397 mc_rep_funcs(qpel_h,12, 8, 64, sse4)
398 mc_rep_funcs(qpel_h,12, 8, 48, sse4)
399 mc_rep_funcs(qpel_h,12, 8, 32, sse4)
400 mc_rep_funcs(qpel_h,12, 8, 24, sse4)
401 mc_rep_funcs(qpel_h,12, 8, 16, sse4)
402 mc_rep_funcs(qpel_h,12, 4, 12, sse4)
403 mc_rep_funcs(qpel_v, 8, 16, 64, sse4)
404 mc_rep_funcs(qpel_v, 8, 16, 48, sse4)
405 mc_rep_funcs(qpel_v, 8, 16, 32, sse4)
406 mc_rep_funcs(qpel_v, 8, 8, 24, sse4)
407 mc_rep_funcs(qpel_v,10, 8, 64, sse4)
408 mc_rep_funcs(qpel_v,10, 8, 48, sse4)
409 mc_rep_funcs(qpel_v,10, 8, 32, sse4)
410 mc_rep_funcs(qpel_v,10, 8, 24, sse4)
411 mc_rep_funcs(qpel_v,10, 8, 16, sse4)
412 mc_rep_funcs(qpel_v,10, 4, 12, sse4)
413 mc_rep_funcs(qpel_v,12, 8, 64, sse4)
414 mc_rep_funcs(qpel_v,12, 8, 48, sse4)
415 mc_rep_funcs(qpel_v,12, 8, 32, sse4)
416 mc_rep_funcs(qpel_v,12, 8, 24, sse4)
417 mc_rep_funcs(qpel_v,12, 8, 16, sse4)
418 mc_rep_funcs(qpel_v,12, 4, 12, sse4)
419 mc_rep_funcs(qpel_hv, 8, 8, 64, sse4)
420 mc_rep_funcs(qpel_hv, 8, 8, 48, sse4)
421 mc_rep_funcs(qpel_hv, 8, 8, 32, sse4)
422 mc_rep_funcs(qpel_hv, 8, 8, 24, sse4)
423 mc_rep_funcs(qpel_hv, 8, 8, 16, sse4)
424 mc_rep_funcs2(qpel_hv,8, 8, 4, 12, sse4)
425 mc_rep_funcs(qpel_hv,10, 8, 64, sse4)
426 mc_rep_funcs(qpel_hv,10, 8, 48, sse4)
427 mc_rep_funcs(qpel_hv,10, 8, 32, sse4)
428 mc_rep_funcs(qpel_hv,10, 8, 24, sse4)
429 mc_rep_funcs(qpel_hv,10, 8, 16, sse4)
430 mc_rep_funcs(qpel_hv,10, 4, 12, sse4)
431 mc_rep_funcs(qpel_hv,12, 8, 64, sse4)
432 mc_rep_funcs(qpel_hv,12, 8, 48, sse4)
433 mc_rep_funcs(qpel_hv,12, 8, 32, sse4)
434 mc_rep_funcs(qpel_hv,12, 8, 24, sse4)
435 mc_rep_funcs(qpel_hv,12, 8, 16, sse4)
436 mc_rep_funcs(qpel_hv,12, 4, 12, sse4)
437 
438 #define mc_rep_uni_w(bitd, step, W, opt) \
439 void ff_hevc_put_hevc_uni_w##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, int16_t *_src, \
440  int height, int denom, int _wx, int _ox) \
441 { \
442  int i; \
443  int16_t *src; \
444  uint8_t *dst; \
445  for (i = 0; i < W; i += step) { \
446  src= _src + i; \
447  dst= _dst + (i * ((bitd + 7) / 8)); \
448  ff_hevc_put_hevc_uni_w##step##_##bitd##_##opt(dst, dststride, src, \
449  height, denom, _wx, _ox); \
450  } \
451 }
452 
453 mc_rep_uni_w(8, 6, 12, sse4)
454 mc_rep_uni_w(8, 8, 16, sse4)
455 mc_rep_uni_w(8, 8, 24, sse4)
456 mc_rep_uni_w(8, 8, 32, sse4)
457 mc_rep_uni_w(8, 8, 48, sse4)
458 mc_rep_uni_w(8, 8, 64, sse4)
459 
460 mc_rep_uni_w(10, 6, 12, sse4)
461 mc_rep_uni_w(10, 8, 16, sse4)
462 mc_rep_uni_w(10, 8, 24, sse4)
463 mc_rep_uni_w(10, 8, 32, sse4)
464 mc_rep_uni_w(10, 8, 48, sse4)
465 mc_rep_uni_w(10, 8, 64, sse4)
466 
467 mc_rep_uni_w(12, 6, 12, sse4)
468 mc_rep_uni_w(12, 8, 16, sse4)
469 mc_rep_uni_w(12, 8, 24, sse4)
470 mc_rep_uni_w(12, 8, 32, sse4)
471 mc_rep_uni_w(12, 8, 48, sse4)
472 mc_rep_uni_w(12, 8, 64, sse4)
473 
474 #define mc_rep_bi_w(bitd, step, W, opt) \
475 void ff_hevc_put_hevc_bi_w##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, int16_t *_src, \
476  int16_t *_src2, int height, \
477  int denom, int _wx0, int _wx1, int _ox0, int _ox1) \
478 { \
479  int i; \
480  int16_t *src; \
481  int16_t *src2; \
482  uint8_t *dst; \
483  for (i = 0; i < W; i += step) { \
484  src = _src + i; \
485  src2 = _src2 + i; \
486  dst = _dst + (i * ((bitd + 7) / 8)); \
487  ff_hevc_put_hevc_bi_w##step##_##bitd##_##opt(dst, dststride, src, src2, \
488  height, denom, _wx0, _wx1, _ox0, _ox1); \
489  } \
490 }
491 
492 mc_rep_bi_w(8, 6, 12, sse4)
493 mc_rep_bi_w(8, 8, 16, sse4)
494 mc_rep_bi_w(8, 8, 24, sse4)
495 mc_rep_bi_w(8, 8, 32, sse4)
496 mc_rep_bi_w(8, 8, 48, sse4)
497 mc_rep_bi_w(8, 8, 64, sse4)
498 
499 mc_rep_bi_w(10, 6, 12, sse4)
500 mc_rep_bi_w(10, 8, 16, sse4)
501 mc_rep_bi_w(10, 8, 24, sse4)
502 mc_rep_bi_w(10, 8, 32, sse4)
503 mc_rep_bi_w(10, 8, 48, sse4)
504 mc_rep_bi_w(10, 8, 64, sse4)
505 
506 mc_rep_bi_w(12, 6, 12, sse4)
507 mc_rep_bi_w(12, 8, 16, sse4)
508 mc_rep_bi_w(12, 8, 24, sse4)
509 mc_rep_bi_w(12, 8, 32, sse4)
510 mc_rep_bi_w(12, 8, 48, sse4)
511 mc_rep_bi_w(12, 8, 64, sse4)
512 
513 #define mc_uni_w_func(name, bitd, W, opt) \
514 void ff_hevc_put_hevc_uni_w_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t _dststride, \
515  uint8_t *_src, ptrdiff_t _srcstride, \
516  int height, int denom, \
517  int _wx, int _ox, \
518  intptr_t mx, intptr_t my, int width) \
519 { \
520  LOCAL_ALIGNED_16(int16_t, temp, [71 * MAX_PB_SIZE]); \
521  ff_hevc_put_hevc_##name##W##_##bitd##_##opt(temp, _src, _srcstride, height, mx, my, width); \
522  ff_hevc_put_hevc_uni_w##W##_##bitd##_##opt(_dst, _dststride, temp, height, denom, _wx, _ox);\
523 }
524 
525 #define mc_uni_w_funcs(name, bitd, opt) \
526  mc_uni_w_func(name, bitd, 4, opt) \
527  mc_uni_w_func(name, bitd, 8, opt) \
528  mc_uni_w_func(name, bitd, 12, opt) \
529  mc_uni_w_func(name, bitd, 16, opt) \
530  mc_uni_w_func(name, bitd, 24, opt) \
531  mc_uni_w_func(name, bitd, 32, opt) \
532  mc_uni_w_func(name, bitd, 48, opt) \
533  mc_uni_w_func(name, bitd, 64, opt)
534 
535 mc_uni_w_funcs(pel_pixels, 8, sse4)
536 mc_uni_w_func(pel_pixels, 8, 6, sse4)
537 mc_uni_w_funcs(epel_h, 8, sse4)
538 mc_uni_w_func(epel_h, 8, 6, sse4)
539 mc_uni_w_funcs(epel_v, 8, sse4)
540 mc_uni_w_func(epel_v, 8, 6, sse4)
541 mc_uni_w_funcs(epel_hv, 8, sse4)
542 mc_uni_w_func(epel_hv, 8, 6, sse4)
543 mc_uni_w_funcs(qpel_h, 8, sse4)
544 mc_uni_w_funcs(qpel_v, 8, sse4)
545 mc_uni_w_funcs(qpel_hv, 8, sse4)
546 
547 mc_uni_w_funcs(pel_pixels, 10, sse4)
548 mc_uni_w_func(pel_pixels, 10, 6, sse4)
549 mc_uni_w_funcs(epel_h, 10, sse4)
550 mc_uni_w_func(epel_h, 10, 6, sse4)
551 mc_uni_w_funcs(epel_v, 10, sse4)
552 mc_uni_w_func(epel_v, 10, 6, sse4)
553 mc_uni_w_funcs(epel_hv, 10, sse4)
554 mc_uni_w_func(epel_hv, 10, 6, sse4)
555 mc_uni_w_funcs(qpel_h, 10, sse4)
556 mc_uni_w_funcs(qpel_v, 10, sse4)
557 mc_uni_w_funcs(qpel_hv, 10, sse4)
558 
559 mc_uni_w_funcs(pel_pixels, 12, sse4)
560 mc_uni_w_func(pel_pixels, 12, 6, sse4)
561 mc_uni_w_funcs(epel_h, 12, sse4)
562 mc_uni_w_func(epel_h, 12, 6, sse4)
563 mc_uni_w_funcs(epel_v, 12, sse4)
564 mc_uni_w_func(epel_v, 12, 6, sse4)
565 mc_uni_w_funcs(epel_hv, 12, sse4)
566 mc_uni_w_func(epel_hv, 12, 6, sse4)
567 mc_uni_w_funcs(qpel_h, 12, sse4)
568 mc_uni_w_funcs(qpel_v, 12, sse4)
569 mc_uni_w_funcs(qpel_hv, 12, sse4)
570 
571 #define mc_bi_w_func(name, bitd, W, opt) \
572 void ff_hevc_put_hevc_bi_w_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t _dststride, \
573  uint8_t *_src, ptrdiff_t _srcstride, \
574  int16_t *_src2, \
575  int height, int denom, \
576  int _wx0, int _wx1, int _ox0, int _ox1, \
577  intptr_t mx, intptr_t my, int width) \
578 { \
579  LOCAL_ALIGNED_16(int16_t, temp, [71 * MAX_PB_SIZE]); \
580  ff_hevc_put_hevc_##name##W##_##bitd##_##opt(temp, _src, _srcstride, height, mx, my, width); \
581  ff_hevc_put_hevc_bi_w##W##_##bitd##_##opt(_dst, _dststride, temp, _src2, \
582  height, denom, _wx0, _wx1, _ox0, _ox1); \
583 }
584 
585 #define mc_bi_w_funcs(name, bitd, opt) \
586  mc_bi_w_func(name, bitd, 4, opt) \
587  mc_bi_w_func(name, bitd, 8, opt) \
588  mc_bi_w_func(name, bitd, 12, opt) \
589  mc_bi_w_func(name, bitd, 16, opt) \
590  mc_bi_w_func(name, bitd, 24, opt) \
591  mc_bi_w_func(name, bitd, 32, opt) \
592  mc_bi_w_func(name, bitd, 48, opt) \
593  mc_bi_w_func(name, bitd, 64, opt)
594 
595 mc_bi_w_funcs(pel_pixels, 8, sse4)
596 mc_bi_w_func(pel_pixels, 8, 6, sse4)
597 mc_bi_w_funcs(epel_h, 8, sse4)
598 mc_bi_w_func(epel_h, 8, 6, sse4)
599 mc_bi_w_funcs(epel_v, 8, sse4)
600 mc_bi_w_func(epel_v, 8, 6, sse4)
601 mc_bi_w_funcs(epel_hv, 8, sse4)
602 mc_bi_w_func(epel_hv, 8, 6, sse4)
603 mc_bi_w_funcs(qpel_h, 8, sse4)
604 mc_bi_w_funcs(qpel_v, 8, sse4)
605 mc_bi_w_funcs(qpel_hv, 8, sse4)
606 
607 mc_bi_w_funcs(pel_pixels, 10, sse4)
608 mc_bi_w_func(pel_pixels, 10, 6, sse4)
609 mc_bi_w_funcs(epel_h, 10, sse4)
610 mc_bi_w_func(epel_h, 10, 6, sse4)
611 mc_bi_w_funcs(epel_v, 10, sse4)
612 mc_bi_w_func(epel_v, 10, 6, sse4)
613 mc_bi_w_funcs(epel_hv, 10, sse4)
614 mc_bi_w_func(epel_hv, 10, 6, sse4)
615 mc_bi_w_funcs(qpel_h, 10, sse4)
616 mc_bi_w_funcs(qpel_v, 10, sse4)
617 mc_bi_w_funcs(qpel_hv, 10, sse4)
618 
619 mc_bi_w_funcs(pel_pixels, 12, sse4)
620 mc_bi_w_func(pel_pixels, 12, 6, sse4)
621 mc_bi_w_funcs(epel_h, 12, sse4)
622 mc_bi_w_func(epel_h, 12, 6, sse4)
623 mc_bi_w_funcs(epel_v, 12, sse4)
624 mc_bi_w_func(epel_v, 12, 6, sse4)
625 mc_bi_w_funcs(epel_hv, 12, sse4)
626 mc_bi_w_func(epel_hv, 12, 6, sse4)
627 mc_bi_w_funcs(qpel_h, 12, sse4)
628 mc_bi_w_funcs(qpel_v, 12, sse4)
629 mc_bi_w_funcs(qpel_hv, 12, sse4)
630 #endif //ARCH_X86_64 && HAVE_SSE4_EXTERNAL
631 
632 #define SAO_BAND_FILTER_FUNCS(bitd, opt) \
633 void ff_hevc_sao_band_filter_8_##bitd##_##opt(uint8_t *_dst, uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \
634  int16_t *sao_offset_val, int sao_left_class, int width, int height); \
635 void ff_hevc_sao_band_filter_16_##bitd##_##opt(uint8_t *_dst, uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \
636  int16_t *sao_offset_val, int sao_left_class, int width, int height); \
637 void ff_hevc_sao_band_filter_32_##bitd##_##opt(uint8_t *_dst, uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \
638  int16_t *sao_offset_val, int sao_left_class, int width, int height); \
639 void ff_hevc_sao_band_filter_48_##bitd##_##opt(uint8_t *_dst, uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \
640  int16_t *sao_offset_val, int sao_left_class, int width, int height); \
641 void ff_hevc_sao_band_filter_64_##bitd##_##opt(uint8_t *_dst, uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \
642  int16_t *sao_offset_val, int sao_left_class, int width, int height);
643 
644 SAO_BAND_FILTER_FUNCS(8, sse2)
645 SAO_BAND_FILTER_FUNCS(10, sse2)
646 SAO_BAND_FILTER_FUNCS(12, sse2)
647 SAO_BAND_FILTER_FUNCS(8, avx)
648 SAO_BAND_FILTER_FUNCS(10, avx)
649 SAO_BAND_FILTER_FUNCS(12, avx)
650 SAO_BAND_FILTER_FUNCS(8, avx2)
651 SAO_BAND_FILTER_FUNCS(10, avx2)
652 SAO_BAND_FILTER_FUNCS(12, avx2)
653 
654 #define SAO_BAND_INIT(bitd, opt) do { \
655  c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_##bitd##_##opt; \
656  c->sao_band_filter[1] = ff_hevc_sao_band_filter_16_##bitd##_##opt; \
657  c->sao_band_filter[2] = ff_hevc_sao_band_filter_32_##bitd##_##opt; \
658  c->sao_band_filter[3] = ff_hevc_sao_band_filter_48_##bitd##_##opt; \
659  c->sao_band_filter[4] = ff_hevc_sao_band_filter_64_##bitd##_##opt; \
660 } while (0)
661 
662 #define SAO_EDGE_FILTER_FUNCS(bitd, opt) \
663 void ff_hevc_sao_edge_filter_8_##bitd##_##opt(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, int16_t *sao_offset_val, \
664  int eo, int width, int height); \
665 void ff_hevc_sao_edge_filter_16_##bitd##_##opt(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, int16_t *sao_offset_val, \
666  int eo, int width, int height); \
667 void ff_hevc_sao_edge_filter_32_##bitd##_##opt(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, int16_t *sao_offset_val, \
668  int eo, int width, int height); \
669 void ff_hevc_sao_edge_filter_48_##bitd##_##opt(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, int16_t *sao_offset_val, \
670  int eo, int width, int height); \
671 void ff_hevc_sao_edge_filter_64_##bitd##_##opt(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, int16_t *sao_offset_val, \
672  int eo, int width, int height); \
673 
674 SAO_EDGE_FILTER_FUNCS(8, ssse3)
675 SAO_EDGE_FILTER_FUNCS(8, avx2)
676 SAO_EDGE_FILTER_FUNCS(10, sse2)
677 SAO_EDGE_FILTER_FUNCS(10, avx2)
678 SAO_EDGE_FILTER_FUNCS(12, sse2)
679 SAO_EDGE_FILTER_FUNCS(12, avx2)
680 
681 #define SAO_EDGE_INIT(bitd, opt) do { \
682  c->sao_edge_filter[0] = ff_hevc_sao_edge_filter_8_##bitd##_##opt; \
683  c->sao_edge_filter[1] = ff_hevc_sao_edge_filter_16_##bitd##_##opt; \
684  c->sao_edge_filter[2] = ff_hevc_sao_edge_filter_32_##bitd##_##opt; \
685  c->sao_edge_filter[3] = ff_hevc_sao_edge_filter_48_##bitd##_##opt; \
686  c->sao_edge_filter[4] = ff_hevc_sao_edge_filter_64_##bitd##_##opt; \
687 } while (0)
688 
689 #define EPEL_LINKS(pointer, my, mx, fname, bitd, opt ) \
690  PEL_LINK(pointer, 1, my , mx , fname##4 , bitd, opt ); \
691  PEL_LINK(pointer, 2, my , mx , fname##6 , bitd, opt ); \
692  PEL_LINK(pointer, 3, my , mx , fname##8 , bitd, opt ); \
693  PEL_LINK(pointer, 4, my , mx , fname##12, bitd, opt ); \
694  PEL_LINK(pointer, 5, my , mx , fname##16, bitd, opt ); \
695  PEL_LINK(pointer, 6, my , mx , fname##24, bitd, opt ); \
696  PEL_LINK(pointer, 7, my , mx , fname##32, bitd, opt ); \
697  PEL_LINK(pointer, 8, my , mx , fname##48, bitd, opt ); \
698  PEL_LINK(pointer, 9, my , mx , fname##64, bitd, opt )
699 #define QPEL_LINKS(pointer, my, mx, fname, bitd, opt) \
700  PEL_LINK(pointer, 1, my , mx , fname##4 , bitd, opt ); \
701  PEL_LINK(pointer, 3, my , mx , fname##8 , bitd, opt ); \
702  PEL_LINK(pointer, 4, my , mx , fname##12, bitd, opt ); \
703  PEL_LINK(pointer, 5, my , mx , fname##16, bitd, opt ); \
704  PEL_LINK(pointer, 6, my , mx , fname##24, bitd, opt ); \
705  PEL_LINK(pointer, 7, my , mx , fname##32, bitd, opt ); \
706  PEL_LINK(pointer, 8, my , mx , fname##48, bitd, opt ); \
707  PEL_LINK(pointer, 9, my , mx , fname##64, bitd, opt )
708 
710 {
711  int cpu_flags = av_get_cpu_flags();
712 
713  if (bit_depth == 8) {
714  if (EXTERNAL_MMXEXT(cpu_flags)) {
715  c->idct_dc[0] = ff_hevc_idct_4x4_dc_8_mmxext;
716  c->idct_dc[1] = ff_hevc_idct_8x8_dc_8_mmxext;
717 
718  c->add_residual[0] = ff_hevc_add_residual_4_8_mmxext;
719  }
720  if (EXTERNAL_SSE2(cpu_flags)) {
721  c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_sse2;
722  c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_sse2;
723  if (ARCH_X86_64) {
724  c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_sse2;
725  c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_sse2;
726 
727  c->idct[2] = ff_hevc_idct_16x16_8_sse2;
728  c->idct[3] = ff_hevc_idct_32x32_8_sse2;
729  }
730  SAO_BAND_INIT(8, sse2);
731 
732  c->idct_dc[1] = ff_hevc_idct_8x8_dc_8_sse2;
733  c->idct_dc[2] = ff_hevc_idct_16x16_dc_8_sse2;
734  c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_sse2;
735 
736  c->idct[0] = ff_hevc_idct_4x4_8_sse2;
737  c->idct[1] = ff_hevc_idct_8x8_8_sse2;
738 
739  c->add_residual[1] = ff_hevc_add_residual_8_8_sse2;
740  c->add_residual[2] = ff_hevc_add_residual_16_8_sse2;
741  c->add_residual[3] = ff_hevc_add_residual_32_8_sse2;
742  }
743  if (EXTERNAL_SSSE3(cpu_flags)) {
744  if(ARCH_X86_64) {
745  c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_ssse3;
746  c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_ssse3;
747  }
748  SAO_EDGE_INIT(8, ssse3);
749  }
750  if (EXTERNAL_SSE4(cpu_flags) && ARCH_X86_64) {
751 
752  EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 8, sse4);
753  EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h, 8, sse4);
754  EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v, 8, sse4);
755  EPEL_LINKS(c->put_hevc_epel, 1, 1, epel_hv, 8, sse4);
756 
757  QPEL_LINKS(c->put_hevc_qpel, 0, 0, pel_pixels, 8, sse4);
758  QPEL_LINKS(c->put_hevc_qpel, 0, 1, qpel_h, 8, sse4);
759  QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v, 8, sse4);
760  QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv, 8, sse4);
761  }
762  if (EXTERNAL_AVX(cpu_flags)) {
763  c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_avx;
764  c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_avx;
765  if (ARCH_X86_64) {
766  c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_avx;
767  c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_avx;
768 
769  c->idct[2] = ff_hevc_idct_16x16_8_avx;
770  c->idct[3] = ff_hevc_idct_32x32_8_avx;
771  }
772  SAO_BAND_INIT(8, avx);
773 
774  c->idct[0] = ff_hevc_idct_4x4_8_avx;
775  c->idct[1] = ff_hevc_idct_8x8_8_avx;
776 
777  c->add_residual[1] = ff_hevc_add_residual_8_8_avx;
778  c->add_residual[2] = ff_hevc_add_residual_16_8_avx;
779  c->add_residual[3] = ff_hevc_add_residual_32_8_avx;
780  }
781  if (EXTERNAL_AVX2(cpu_flags)) {
782  c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_8_avx2;
783  c->sao_band_filter[1] = ff_hevc_sao_band_filter_16_8_avx2;
784  }
786  c->idct_dc[2] = ff_hevc_idct_16x16_dc_8_avx2;
787  c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_avx2;
788  if (ARCH_X86_64) {
789  c->put_hevc_epel[7][0][0] = ff_hevc_put_hevc_pel_pixels32_8_avx2;
790  c->put_hevc_epel[8][0][0] = ff_hevc_put_hevc_pel_pixels48_8_avx2;
791  c->put_hevc_epel[9][0][0] = ff_hevc_put_hevc_pel_pixels64_8_avx2;
792 
793  c->put_hevc_qpel[7][0][0] = ff_hevc_put_hevc_pel_pixels32_8_avx2;
794  c->put_hevc_qpel[8][0][0] = ff_hevc_put_hevc_pel_pixels48_8_avx2;
795  c->put_hevc_qpel[9][0][0] = ff_hevc_put_hevc_pel_pixels64_8_avx2;
796 
797  c->put_hevc_epel_uni[7][0][0] = ff_hevc_put_hevc_uni_pel_pixels32_8_avx2;
798  c->put_hevc_epel_uni[8][0][0] = ff_hevc_put_hevc_uni_pel_pixels48_8_avx2;
799  c->put_hevc_epel_uni[9][0][0] = ff_hevc_put_hevc_uni_pel_pixels64_8_avx2;
800 
801  c->put_hevc_qpel_uni[7][0][0] = ff_hevc_put_hevc_uni_pel_pixels32_8_avx2;
802  c->put_hevc_qpel_uni[8][0][0] = ff_hevc_put_hevc_uni_pel_pixels48_8_avx2;
803  c->put_hevc_qpel_uni[9][0][0] = ff_hevc_put_hevc_uni_pel_pixels64_8_avx2;
804 
805  c->put_hevc_qpel_bi[7][0][0] = ff_hevc_put_hevc_bi_pel_pixels32_8_avx2;
806  c->put_hevc_qpel_bi[8][0][0] = ff_hevc_put_hevc_bi_pel_pixels48_8_avx2;
807  c->put_hevc_qpel_bi[9][0][0] = ff_hevc_put_hevc_bi_pel_pixels64_8_avx2;
808 
809  c->put_hevc_epel_bi[7][0][0] = ff_hevc_put_hevc_bi_pel_pixels32_8_avx2;
810  c->put_hevc_epel_bi[8][0][0] = ff_hevc_put_hevc_bi_pel_pixels48_8_avx2;
811  c->put_hevc_epel_bi[9][0][0] = ff_hevc_put_hevc_bi_pel_pixels64_8_avx2;
812 
813  c->put_hevc_epel[7][0][1] = ff_hevc_put_hevc_epel_h32_8_avx2;
814  c->put_hevc_epel[8][0][1] = ff_hevc_put_hevc_epel_h48_8_avx2;
815  c->put_hevc_epel[9][0][1] = ff_hevc_put_hevc_epel_h64_8_avx2;
816 
817  c->put_hevc_epel_uni[7][0][1] = ff_hevc_put_hevc_uni_epel_h32_8_avx2;
818  c->put_hevc_epel_uni[8][0][1] = ff_hevc_put_hevc_uni_epel_h48_8_avx2;
819  c->put_hevc_epel_uni[9][0][1] = ff_hevc_put_hevc_uni_epel_h64_8_avx2;
820 
821  c->put_hevc_epel_bi[7][0][1] = ff_hevc_put_hevc_bi_epel_h32_8_avx2;
822  c->put_hevc_epel_bi[8][0][1] = ff_hevc_put_hevc_bi_epel_h48_8_avx2;
823  c->put_hevc_epel_bi[9][0][1] = ff_hevc_put_hevc_bi_epel_h64_8_avx2;
824 
825  c->put_hevc_epel[7][1][0] = ff_hevc_put_hevc_epel_v32_8_avx2;
826  c->put_hevc_epel[8][1][0] = ff_hevc_put_hevc_epel_v48_8_avx2;
827  c->put_hevc_epel[9][1][0] = ff_hevc_put_hevc_epel_v64_8_avx2;
828 
829  c->put_hevc_epel_uni[7][1][0] = ff_hevc_put_hevc_uni_epel_v32_8_avx2;
830  c->put_hevc_epel_uni[8][1][0] = ff_hevc_put_hevc_uni_epel_v48_8_avx2;
831  c->put_hevc_epel_uni[9][1][0] = ff_hevc_put_hevc_uni_epel_v64_8_avx2;
832 
833  c->put_hevc_epel_bi[7][1][0] = ff_hevc_put_hevc_bi_epel_v32_8_avx2;
834  c->put_hevc_epel_bi[8][1][0] = ff_hevc_put_hevc_bi_epel_v48_8_avx2;
835  c->put_hevc_epel_bi[9][1][0] = ff_hevc_put_hevc_bi_epel_v64_8_avx2;
836 
837  c->put_hevc_epel[7][1][1] = ff_hevc_put_hevc_epel_hv32_8_avx2;
838  c->put_hevc_epel[8][1][1] = ff_hevc_put_hevc_epel_hv48_8_avx2;
839  c->put_hevc_epel[9][1][1] = ff_hevc_put_hevc_epel_hv64_8_avx2;
840 
841  c->put_hevc_epel_uni[7][1][1] = ff_hevc_put_hevc_uni_epel_hv32_8_avx2;
842  c->put_hevc_epel_uni[8][1][1] = ff_hevc_put_hevc_uni_epel_hv48_8_avx2;
843  c->put_hevc_epel_uni[9][1][1] = ff_hevc_put_hevc_uni_epel_hv64_8_avx2;
844 
845  c->put_hevc_epel_bi[7][1][1] = ff_hevc_put_hevc_bi_epel_hv32_8_avx2;
846  c->put_hevc_epel_bi[8][1][1] = ff_hevc_put_hevc_bi_epel_hv48_8_avx2;
847  c->put_hevc_epel_bi[9][1][1] = ff_hevc_put_hevc_bi_epel_hv64_8_avx2;
848 
849  c->put_hevc_qpel[7][0][1] = ff_hevc_put_hevc_qpel_h32_8_avx2;
850  c->put_hevc_qpel[8][0][1] = ff_hevc_put_hevc_qpel_h48_8_avx2;
851  c->put_hevc_qpel[9][0][1] = ff_hevc_put_hevc_qpel_h64_8_avx2;
852 
853  c->put_hevc_qpel[7][1][0] = ff_hevc_put_hevc_qpel_v32_8_avx2;
854  c->put_hevc_qpel[8][1][0] = ff_hevc_put_hevc_qpel_v48_8_avx2;
855  c->put_hevc_qpel[9][1][0] = ff_hevc_put_hevc_qpel_v64_8_avx2;
856 
857  c->put_hevc_qpel_uni[7][0][1] = ff_hevc_put_hevc_uni_qpel_h32_8_avx2;
858  c->put_hevc_qpel_uni[8][0][1] = ff_hevc_put_hevc_uni_qpel_h48_8_avx2;
859  c->put_hevc_qpel_uni[9][0][1] = ff_hevc_put_hevc_uni_qpel_h64_8_avx2;
860 
861  c->put_hevc_qpel_uni[7][1][0] = ff_hevc_put_hevc_uni_qpel_v32_8_avx2;
862  c->put_hevc_qpel_uni[8][1][0] = ff_hevc_put_hevc_uni_qpel_v48_8_avx2;
863  c->put_hevc_qpel_uni[9][1][0] = ff_hevc_put_hevc_uni_qpel_v64_8_avx2;
864 
865  c->put_hevc_qpel_bi[7][0][1] = ff_hevc_put_hevc_bi_qpel_h32_8_avx2;
866  c->put_hevc_qpel_bi[8][0][1] = ff_hevc_put_hevc_bi_qpel_h48_8_avx2;
867  c->put_hevc_qpel_bi[9][0][1] = ff_hevc_put_hevc_bi_qpel_h64_8_avx2;
868 
869  c->put_hevc_qpel_bi[7][1][0] = ff_hevc_put_hevc_bi_qpel_v32_8_avx2;
870  c->put_hevc_qpel_bi[8][1][0] = ff_hevc_put_hevc_bi_qpel_v48_8_avx2;
871  c->put_hevc_qpel_bi[9][1][0] = ff_hevc_put_hevc_bi_qpel_v64_8_avx2;
872  }
873  SAO_BAND_INIT(8, avx2);
874 
875  c->sao_edge_filter[2] = ff_hevc_sao_edge_filter_32_8_avx2;
876  c->sao_edge_filter[3] = ff_hevc_sao_edge_filter_48_8_avx2;
877  c->sao_edge_filter[4] = ff_hevc_sao_edge_filter_64_8_avx2;
878 
879  c->add_residual[3] = ff_hevc_add_residual_32_8_avx2;
880  }
881  } else if (bit_depth == 10) {
882  if (EXTERNAL_MMXEXT(cpu_flags)) {
883  c->add_residual[0] = ff_hevc_add_residual_4_10_mmxext;
884  c->idct_dc[0] = ff_hevc_idct_4x4_dc_10_mmxext;
885  c->idct_dc[1] = ff_hevc_idct_8x8_dc_10_mmxext;
886  }
887  if (EXTERNAL_SSE2(cpu_flags)) {
888  c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_sse2;
889  c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_10_sse2;
890  if (ARCH_X86_64) {
891  c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_sse2;
892  c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_sse2;
893 
894  c->idct[2] = ff_hevc_idct_16x16_10_sse2;
895  c->idct[3] = ff_hevc_idct_32x32_10_sse2;
896  }
897  SAO_BAND_INIT(10, sse2);
898  SAO_EDGE_INIT(10, sse2);
899 
900  c->idct_dc[1] = ff_hevc_idct_8x8_dc_10_sse2;
901  c->idct_dc[2] = ff_hevc_idct_16x16_dc_10_sse2;
902  c->idct_dc[3] = ff_hevc_idct_32x32_dc_10_sse2;
903 
904  c->idct[0] = ff_hevc_idct_4x4_10_sse2;
905  c->idct[1] = ff_hevc_idct_8x8_10_sse2;
906 
907  c->add_residual[1] = ff_hevc_add_residual_8_10_sse2;
908  c->add_residual[2] = ff_hevc_add_residual_16_10_sse2;
909  c->add_residual[3] = ff_hevc_add_residual_32_10_sse2;
910  }
911  if (EXTERNAL_SSSE3(cpu_flags) && ARCH_X86_64) {
912  c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_ssse3;
913  c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_ssse3;
914  }
915  if (EXTERNAL_SSE4(cpu_flags) && ARCH_X86_64) {
916  EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 10, sse4);
917  EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h, 10, sse4);
918  EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v, 10, sse4);
919  EPEL_LINKS(c->put_hevc_epel, 1, 1, epel_hv, 10, sse4);
920 
921  QPEL_LINKS(c->put_hevc_qpel, 0, 0, pel_pixels, 10, sse4);
922  QPEL_LINKS(c->put_hevc_qpel, 0, 1, qpel_h, 10, sse4);
923  QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v, 10, sse4);
924  QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv, 10, sse4);
925  }
926  if (EXTERNAL_AVX(cpu_flags)) {
927  c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_avx;
928  c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_10_avx;
929  if (ARCH_X86_64) {
930  c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_avx;
931  c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_avx;
932 
933  c->idct[2] = ff_hevc_idct_16x16_10_avx;
934  c->idct[3] = ff_hevc_idct_32x32_10_avx;
935  }
936 
937  c->idct[0] = ff_hevc_idct_4x4_10_avx;
938  c->idct[1] = ff_hevc_idct_8x8_10_avx;
939 
940  SAO_BAND_INIT(10, avx);
941  }
942  if (EXTERNAL_AVX2(cpu_flags)) {
943  c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_10_avx2;
944  }
946  c->idct_dc[2] = ff_hevc_idct_16x16_dc_10_avx2;
947  c->idct_dc[3] = ff_hevc_idct_32x32_dc_10_avx2;
948  if (ARCH_X86_64) {
949  c->put_hevc_epel[5][0][0] = ff_hevc_put_hevc_pel_pixels16_10_avx2;
950  c->put_hevc_epel[6][0][0] = ff_hevc_put_hevc_pel_pixels24_10_avx2;
951  c->put_hevc_epel[7][0][0] = ff_hevc_put_hevc_pel_pixels32_10_avx2;
952  c->put_hevc_epel[8][0][0] = ff_hevc_put_hevc_pel_pixels48_10_avx2;
953  c->put_hevc_epel[9][0][0] = ff_hevc_put_hevc_pel_pixels64_10_avx2;
954 
955  c->put_hevc_qpel[5][0][0] = ff_hevc_put_hevc_pel_pixels16_10_avx2;
956  c->put_hevc_qpel[6][0][0] = ff_hevc_put_hevc_pel_pixels24_10_avx2;
957  c->put_hevc_qpel[7][0][0] = ff_hevc_put_hevc_pel_pixels32_10_avx2;
958  c->put_hevc_qpel[8][0][0] = ff_hevc_put_hevc_pel_pixels48_10_avx2;
959  c->put_hevc_qpel[9][0][0] = ff_hevc_put_hevc_pel_pixels64_10_avx2;
960 
961  c->put_hevc_epel_uni[5][0][0] = ff_hevc_put_hevc_uni_pel_pixels32_8_avx2;
962  c->put_hevc_epel_uni[6][0][0] = ff_hevc_put_hevc_uni_pel_pixels48_8_avx2;
963  c->put_hevc_epel_uni[7][0][0] = ff_hevc_put_hevc_uni_pel_pixels64_8_avx2;
964  c->put_hevc_epel_uni[8][0][0] = ff_hevc_put_hevc_uni_pel_pixels96_8_avx2;
965  c->put_hevc_epel_uni[9][0][0] = ff_hevc_put_hevc_uni_pel_pixels128_8_avx2;
966 
967  c->put_hevc_qpel_uni[5][0][0] = ff_hevc_put_hevc_uni_pel_pixels32_8_avx2;
968  c->put_hevc_qpel_uni[6][0][0] = ff_hevc_put_hevc_uni_pel_pixels48_8_avx2;
969  c->put_hevc_qpel_uni[7][0][0] = ff_hevc_put_hevc_uni_pel_pixels64_8_avx2;
970  c->put_hevc_qpel_uni[8][0][0] = ff_hevc_put_hevc_uni_pel_pixels96_8_avx2;
971  c->put_hevc_qpel_uni[9][0][0] = ff_hevc_put_hevc_uni_pel_pixels128_8_avx2;
972 
973  c->put_hevc_epel_bi[5][0][0] = ff_hevc_put_hevc_bi_pel_pixels16_10_avx2;
974  c->put_hevc_epel_bi[6][0][0] = ff_hevc_put_hevc_bi_pel_pixels24_10_avx2;
975  c->put_hevc_epel_bi[7][0][0] = ff_hevc_put_hevc_bi_pel_pixels32_10_avx2;
976  c->put_hevc_epel_bi[8][0][0] = ff_hevc_put_hevc_bi_pel_pixels48_10_avx2;
977  c->put_hevc_epel_bi[9][0][0] = ff_hevc_put_hevc_bi_pel_pixels64_10_avx2;
978  c->put_hevc_qpel_bi[5][0][0] = ff_hevc_put_hevc_bi_pel_pixels16_10_avx2;
979  c->put_hevc_qpel_bi[6][0][0] = ff_hevc_put_hevc_bi_pel_pixels24_10_avx2;
980  c->put_hevc_qpel_bi[7][0][0] = ff_hevc_put_hevc_bi_pel_pixels32_10_avx2;
981  c->put_hevc_qpel_bi[8][0][0] = ff_hevc_put_hevc_bi_pel_pixels48_10_avx2;
982  c->put_hevc_qpel_bi[9][0][0] = ff_hevc_put_hevc_bi_pel_pixels64_10_avx2;
983 
984  c->put_hevc_epel[5][0][1] = ff_hevc_put_hevc_epel_h16_10_avx2;
985  c->put_hevc_epel[6][0][1] = ff_hevc_put_hevc_epel_h24_10_avx2;
986  c->put_hevc_epel[7][0][1] = ff_hevc_put_hevc_epel_h32_10_avx2;
987  c->put_hevc_epel[8][0][1] = ff_hevc_put_hevc_epel_h48_10_avx2;
988  c->put_hevc_epel[9][0][1] = ff_hevc_put_hevc_epel_h64_10_avx2;
989 
990  c->put_hevc_epel_uni[5][0][1] = ff_hevc_put_hevc_uni_epel_h16_10_avx2;
991  c->put_hevc_epel_uni[6][0][1] = ff_hevc_put_hevc_uni_epel_h24_10_avx2;
992  c->put_hevc_epel_uni[7][0][1] = ff_hevc_put_hevc_uni_epel_h32_10_avx2;
993  c->put_hevc_epel_uni[8][0][1] = ff_hevc_put_hevc_uni_epel_h48_10_avx2;
994  c->put_hevc_epel_uni[9][0][1] = ff_hevc_put_hevc_uni_epel_h64_10_avx2;
995 
996  c->put_hevc_epel_bi[5][0][1] = ff_hevc_put_hevc_bi_epel_h16_10_avx2;
997  c->put_hevc_epel_bi[6][0][1] = ff_hevc_put_hevc_bi_epel_h24_10_avx2;
998  c->put_hevc_epel_bi[7][0][1] = ff_hevc_put_hevc_bi_epel_h32_10_avx2;
999  c->put_hevc_epel_bi[8][0][1] = ff_hevc_put_hevc_bi_epel_h48_10_avx2;
1000  c->put_hevc_epel_bi[9][0][1] = ff_hevc_put_hevc_bi_epel_h64_10_avx2;
1001 
1002  c->put_hevc_epel[5][1][0] = ff_hevc_put_hevc_epel_v16_10_avx2;
1003  c->put_hevc_epel[6][1][0] = ff_hevc_put_hevc_epel_v24_10_avx2;
1004  c->put_hevc_epel[7][1][0] = ff_hevc_put_hevc_epel_v32_10_avx2;
1005  c->put_hevc_epel[8][1][0] = ff_hevc_put_hevc_epel_v48_10_avx2;
1006  c->put_hevc_epel[9][1][0] = ff_hevc_put_hevc_epel_v64_10_avx2;
1007 
1008  c->put_hevc_epel_uni[5][1][0] = ff_hevc_put_hevc_uni_epel_v16_10_avx2;
1009  c->put_hevc_epel_uni[6][1][0] = ff_hevc_put_hevc_uni_epel_v24_10_avx2;
1010  c->put_hevc_epel_uni[7][1][0] = ff_hevc_put_hevc_uni_epel_v32_10_avx2;
1011  c->put_hevc_epel_uni[8][1][0] = ff_hevc_put_hevc_uni_epel_v48_10_avx2;
1012  c->put_hevc_epel_uni[9][1][0] = ff_hevc_put_hevc_uni_epel_v64_10_avx2;
1013 
1014  c->put_hevc_epel_bi[5][1][0] = ff_hevc_put_hevc_bi_epel_v16_10_avx2;
1015  c->put_hevc_epel_bi[6][1][0] = ff_hevc_put_hevc_bi_epel_v24_10_avx2;
1016  c->put_hevc_epel_bi[7][1][0] = ff_hevc_put_hevc_bi_epel_v32_10_avx2;
1017  c->put_hevc_epel_bi[8][1][0] = ff_hevc_put_hevc_bi_epel_v48_10_avx2;
1018  c->put_hevc_epel_bi[9][1][0] = ff_hevc_put_hevc_bi_epel_v64_10_avx2;
1019 
1020  c->put_hevc_epel[5][1][1] = ff_hevc_put_hevc_epel_hv16_10_avx2;
1021  c->put_hevc_epel[6][1][1] = ff_hevc_put_hevc_epel_hv24_10_avx2;
1022  c->put_hevc_epel[7][1][1] = ff_hevc_put_hevc_epel_hv32_10_avx2;
1023  c->put_hevc_epel[8][1][1] = ff_hevc_put_hevc_epel_hv48_10_avx2;
1024  c->put_hevc_epel[9][1][1] = ff_hevc_put_hevc_epel_hv64_10_avx2;
1025 
1026  c->put_hevc_epel_uni[5][1][1] = ff_hevc_put_hevc_uni_epel_hv16_10_avx2;
1027  c->put_hevc_epel_uni[6][1][1] = ff_hevc_put_hevc_uni_epel_hv24_10_avx2;
1028  c->put_hevc_epel_uni[7][1][1] = ff_hevc_put_hevc_uni_epel_hv32_10_avx2;
1029  c->put_hevc_epel_uni[8][1][1] = ff_hevc_put_hevc_uni_epel_hv48_10_avx2;
1030  c->put_hevc_epel_uni[9][1][1] = ff_hevc_put_hevc_uni_epel_hv64_10_avx2;
1031 
1032  c->put_hevc_epel_bi[5][1][1] = ff_hevc_put_hevc_bi_epel_hv16_10_avx2;
1033  c->put_hevc_epel_bi[6][1][1] = ff_hevc_put_hevc_bi_epel_hv24_10_avx2;
1034  c->put_hevc_epel_bi[7][1][1] = ff_hevc_put_hevc_bi_epel_hv32_10_avx2;
1035  c->put_hevc_epel_bi[8][1][1] = ff_hevc_put_hevc_bi_epel_hv48_10_avx2;
1036  c->put_hevc_epel_bi[9][1][1] = ff_hevc_put_hevc_bi_epel_hv64_10_avx2;
1037 
1038  c->put_hevc_qpel[5][0][1] = ff_hevc_put_hevc_qpel_h16_10_avx2;
1039  c->put_hevc_qpel[6][0][1] = ff_hevc_put_hevc_qpel_h24_10_avx2;
1040  c->put_hevc_qpel[7][0][1] = ff_hevc_put_hevc_qpel_h32_10_avx2;
1041  c->put_hevc_qpel[8][0][1] = ff_hevc_put_hevc_qpel_h48_10_avx2;
1042  c->put_hevc_qpel[9][0][1] = ff_hevc_put_hevc_qpel_h64_10_avx2;
1043 
1044  c->put_hevc_qpel_uni[5][0][1] = ff_hevc_put_hevc_uni_qpel_h16_10_avx2;
1045  c->put_hevc_qpel_uni[6][0][1] = ff_hevc_put_hevc_uni_qpel_h24_10_avx2;
1046  c->put_hevc_qpel_uni[7][0][1] = ff_hevc_put_hevc_uni_qpel_h32_10_avx2;
1047  c->put_hevc_qpel_uni[8][0][1] = ff_hevc_put_hevc_uni_qpel_h48_10_avx2;
1048  c->put_hevc_qpel_uni[9][0][1] = ff_hevc_put_hevc_uni_qpel_h64_10_avx2;
1049 
1050  c->put_hevc_qpel_bi[5][0][1] = ff_hevc_put_hevc_bi_qpel_h16_10_avx2;
1051  c->put_hevc_qpel_bi[6][0][1] = ff_hevc_put_hevc_bi_qpel_h24_10_avx2;
1052  c->put_hevc_qpel_bi[7][0][1] = ff_hevc_put_hevc_bi_qpel_h32_10_avx2;
1053  c->put_hevc_qpel_bi[8][0][1] = ff_hevc_put_hevc_bi_qpel_h48_10_avx2;
1054  c->put_hevc_qpel_bi[9][0][1] = ff_hevc_put_hevc_bi_qpel_h64_10_avx2;
1055 
1056  c->put_hevc_qpel[5][1][0] = ff_hevc_put_hevc_qpel_v16_10_avx2;
1057  c->put_hevc_qpel[6][1][0] = ff_hevc_put_hevc_qpel_v24_10_avx2;
1058  c->put_hevc_qpel[7][1][0] = ff_hevc_put_hevc_qpel_v32_10_avx2;
1059  c->put_hevc_qpel[8][1][0] = ff_hevc_put_hevc_qpel_v48_10_avx2;
1060  c->put_hevc_qpel[9][1][0] = ff_hevc_put_hevc_qpel_v64_10_avx2;
1061 
1062  c->put_hevc_qpel_uni[5][1][0] = ff_hevc_put_hevc_uni_qpel_v16_10_avx2;
1063  c->put_hevc_qpel_uni[6][1][0] = ff_hevc_put_hevc_uni_qpel_v24_10_avx2;
1064  c->put_hevc_qpel_uni[7][1][0] = ff_hevc_put_hevc_uni_qpel_v32_10_avx2;
1065  c->put_hevc_qpel_uni[8][1][0] = ff_hevc_put_hevc_uni_qpel_v48_10_avx2;
1066  c->put_hevc_qpel_uni[9][1][0] = ff_hevc_put_hevc_uni_qpel_v64_10_avx2;
1067 
1068  c->put_hevc_qpel_bi[5][1][0] = ff_hevc_put_hevc_bi_qpel_v16_10_avx2;
1069  c->put_hevc_qpel_bi[6][1][0] = ff_hevc_put_hevc_bi_qpel_v24_10_avx2;
1070  c->put_hevc_qpel_bi[7][1][0] = ff_hevc_put_hevc_bi_qpel_v32_10_avx2;
1071  c->put_hevc_qpel_bi[8][1][0] = ff_hevc_put_hevc_bi_qpel_v48_10_avx2;
1072  c->put_hevc_qpel_bi[9][1][0] = ff_hevc_put_hevc_bi_qpel_v64_10_avx2;
1073 
1074  c->put_hevc_qpel[5][1][1] = ff_hevc_put_hevc_qpel_hv16_10_avx2;
1075  c->put_hevc_qpel[6][1][1] = ff_hevc_put_hevc_qpel_hv24_10_avx2;
1076  c->put_hevc_qpel[7][1][1] = ff_hevc_put_hevc_qpel_hv32_10_avx2;
1077  c->put_hevc_qpel[8][1][1] = ff_hevc_put_hevc_qpel_hv48_10_avx2;
1078  c->put_hevc_qpel[9][1][1] = ff_hevc_put_hevc_qpel_hv64_10_avx2;
1079 
1080  c->put_hevc_qpel_uni[5][1][1] = ff_hevc_put_hevc_uni_qpel_hv16_10_avx2;
1081  c->put_hevc_qpel_uni[6][1][1] = ff_hevc_put_hevc_uni_qpel_hv24_10_avx2;
1082  c->put_hevc_qpel_uni[7][1][1] = ff_hevc_put_hevc_uni_qpel_hv32_10_avx2;
1083  c->put_hevc_qpel_uni[8][1][1] = ff_hevc_put_hevc_uni_qpel_hv48_10_avx2;
1084  c->put_hevc_qpel_uni[9][1][1] = ff_hevc_put_hevc_uni_qpel_hv64_10_avx2;
1085 
1086  c->put_hevc_qpel_bi[5][1][1] = ff_hevc_put_hevc_bi_qpel_hv16_10_avx2;
1087  c->put_hevc_qpel_bi[6][1][1] = ff_hevc_put_hevc_bi_qpel_hv24_10_avx2;
1088  c->put_hevc_qpel_bi[7][1][1] = ff_hevc_put_hevc_bi_qpel_hv32_10_avx2;
1089  c->put_hevc_qpel_bi[8][1][1] = ff_hevc_put_hevc_bi_qpel_hv48_10_avx2;
1090  c->put_hevc_qpel_bi[9][1][1] = ff_hevc_put_hevc_bi_qpel_hv64_10_avx2;
1091  }
1092  SAO_BAND_INIT(10, avx2);
1093  SAO_EDGE_INIT(10, avx2);
1094 
1095  c->add_residual[2] = ff_hevc_add_residual_16_10_avx2;
1096  c->add_residual[3] = ff_hevc_add_residual_32_10_avx2;
1097  }
1098  } else if (bit_depth == 12) {
1099  if (EXTERNAL_MMXEXT(cpu_flags)) {
1100  c->idct_dc[0] = ff_hevc_idct_4x4_dc_12_mmxext;
1101  c->idct_dc[1] = ff_hevc_idct_8x8_dc_12_mmxext;
1102  }
1103  if (EXTERNAL_SSE2(cpu_flags)) {
1104  c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_12_sse2;
1105  c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_12_sse2;
1106  if (ARCH_X86_64) {
1107  c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_sse2;
1108  c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_sse2;
1109  }
1110  SAO_BAND_INIT(12, sse2);
1111  SAO_EDGE_INIT(12, sse2);
1112 
1113  c->idct_dc[1] = ff_hevc_idct_8x8_dc_12_sse2;
1114  c->idct_dc[2] = ff_hevc_idct_16x16_dc_12_sse2;
1115  c->idct_dc[3] = ff_hevc_idct_32x32_dc_12_sse2;
1116  }
1117  if (EXTERNAL_SSSE3(cpu_flags) && ARCH_X86_64) {
1118  c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_ssse3;
1119  c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_ssse3;
1120  }
1121  if (EXTERNAL_SSE4(cpu_flags) && ARCH_X86_64) {
1122  EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 12, sse4);
1123  EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h, 12, sse4);
1124  EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v, 12, sse4);
1125  EPEL_LINKS(c->put_hevc_epel, 1, 1, epel_hv, 12, sse4);
1126 
1127  QPEL_LINKS(c->put_hevc_qpel, 0, 0, pel_pixels, 12, sse4);
1128  QPEL_LINKS(c->put_hevc_qpel, 0, 1, qpel_h, 12, sse4);
1129  QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v, 12, sse4);
1130  QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv, 12, sse4);
1131  }
1132  if (EXTERNAL_AVX(cpu_flags)) {
1133  c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_12_avx;
1134  c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_12_avx;
1135  if (ARCH_X86_64) {
1136  c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_avx;
1137  c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_avx;
1138  }
1139  SAO_BAND_INIT(12, avx);
1140  }
1141  if (EXTERNAL_AVX2(cpu_flags)) {
1142  c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_12_avx2;
1143  }
1145  c->idct_dc[2] = ff_hevc_idct_16x16_dc_12_avx2;
1146  c->idct_dc[3] = ff_hevc_idct_32x32_dc_12_avx2;
1147 
1148  SAO_BAND_INIT(12, avx2);
1149  SAO_EDGE_INIT(12, avx2);
1150  }
1151  }
1152 }
bit_depth
static void bit_depth(AudioStatsContext *s, uint64_t mask, uint64_t imask, AVRational *depth)
Definition: af_astats.c:254
cpu.h
LFL_FUNCS
#define LFL_FUNCS(type, depth, opt)
Definition: hevcdsp_init.c:42
mem_internal.h
ff_hevc_add_residual_16_8_sse2
void ff_hevc_add_residual_16_8_sse2(uint8_t *dst, int16_t *res, ptrdiff_t stride)
ff_hevc_put_hevc_uni_pel_pixels48_8_avx2
void ff_hevc_put_hevc_uni_pel_pixels48_8_avx2(uint8_t *dst, ptrdiff_t dststride, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
ff_hevc_add_residual_32_8_sse2
void ff_hevc_add_residual_32_8_sse2(uint8_t *dst, int16_t *res, ptrdiff_t stride)
ff_hevc_put_hevc_bi_pel_pixels16_10_avx2
void ff_hevc_put_hevc_bi_pel_pixels16_10_avx2(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int16_t *src2, int height, intptr_t mx, intptr_t my, int width)
ff_hevc_add_residual_32_8_avx
void ff_hevc_add_residual_32_8_avx(uint8_t *dst, int16_t *res, ptrdiff_t stride)
ff_hevc_add_residual_16_10_avx2
void ff_hevc_add_residual_16_10_avx2(uint8_t *dst, int16_t *res, ptrdiff_t stride)
mc_rep_funcs2
#define mc_rep_funcs2(name, bitd, step1, step2, W, opt)
Definition: hevcdsp_init.c:170
EXTERNAL_AVX2_FAST
#define EXTERNAL_AVX2_FAST(flags)
Definition: cpu.h:79
mc_rep_func
#define mc_rep_func(name, bitd, step, W, opt)
Definition: hevcdsp_init.c:88
ff_hevc_add_residual_32_10_sse2
void ff_hevc_add_residual_32_10_sse2(uint8_t *dst, int16_t *res, ptrdiff_t stride)
ff_hevc_add_residual_8_8_sse2
void ff_hevc_add_residual_8_8_sse2(uint8_t *dst, int16_t *res, ptrdiff_t stride)
ff_hevc_put_hevc_uni_pel_pixels96_8_avx2
void ff_hevc_put_hevc_uni_pel_pixels96_8_avx2(uint8_t *dst, ptrdiff_t dststride, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
av_get_cpu_flags
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:95
cpu_flags
static atomic_int cpu_flags
Definition: cpu.c:50
ff_hevc_put_hevc_bi_pel_pixels64_10_avx2
void ff_hevc_put_hevc_bi_pel_pixels64_10_avx2(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int16_t *src2, int height, intptr_t mx, intptr_t my, int width)
QPEL_LINKS
#define QPEL_LINKS(pointer, my, mx, fname, bitd, opt)
Definition: hevcdsp_init.c:699
IDCT_FUNCS
#define IDCT_FUNCS(opt)
Definition: hevcdsp_init.c:75
ff_hevc_add_residual_4_8_mmxext
void ff_hevc_add_residual_4_8_mmxext(uint8_t *dst, int16_t *res, ptrdiff_t stride)
ff_hevc_add_residual_32_10_avx2
void ff_hevc_add_residual_32_10_avx2(uint8_t *dst, int16_t *res, ptrdiff_t stride)
ff_hevc_put_hevc_bi_pel_pixels64_8_avx2
void ff_hevc_put_hevc_bi_pel_pixels64_8_avx2(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int16_t *src2, int height, intptr_t mx, intptr_t my, int width)
ff_hevc_add_residual_32_8_avx2
void ff_hevc_add_residual_32_8_avx2(uint8_t *dst, int16_t *res, ptrdiff_t stride)
EPEL_LINKS
#define EPEL_LINKS(pointer, my, mx, fname, bitd, opt)
Definition: hevcdsp_init.c:689
EXTERNAL_AVX2
#define EXTERNAL_AVX2(flags)
Definition: cpu.h:78
ff_hevc_add_residual_16_10_sse2
void ff_hevc_add_residual_16_10_sse2(uint8_t *dst, int16_t *res, ptrdiff_t stride)
LFC_FUNCS
#define LFC_FUNCS(type, depth, opt)
Definition: hevcdsp_init.c:38
get_bits.h
ff_hevc_put_hevc_bi_pel_pixels48_10_avx2
void ff_hevc_put_hevc_bi_pel_pixels48_10_avx2(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int16_t *src2, int height, intptr_t mx, intptr_t my, int width)
hevcdsp.h
ff_hevc_put_hevc_uni_pel_pixels64_8_avx2
void ff_hevc_put_hevc_uni_pel_pixels64_8_avx2(uint8_t *dst, ptrdiff_t dststride, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
ff_hevc_dsp_init_x86
void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
Definition: hevcdsp_init.c:709
hevcdsp.h
ff_hevc_put_hevc_pel_pixels16_10_avx2
void ff_hevc_put_hevc_pel_pixels16_10_avx2(int16_t *dst, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
ff_hevc_add_residual_8_10_sse2
void ff_hevc_add_residual_8_10_sse2(uint8_t *dst, int16_t *res, ptrdiff_t stride)
ff_hevc_put_hevc_uni_pel_pixels128_8_avx2
void ff_hevc_put_hevc_uni_pel_pixels128_8_avx2(uint8_t *dst, ptrdiff_t dststride, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
cpu.h
ff_hevc_put_hevc_pel_pixels48_10_avx2
void ff_hevc_put_hevc_pel_pixels48_10_avx2(int16_t *dst, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
asm.h
HEVCDSPContext
Definition: hevcdsp.h:47
SAO_BAND_FILTER_FUNCS
#define SAO_BAND_FILTER_FUNCS(bitd, opt)
Definition: hevcdsp_init.c:632
EXTERNAL_SSE2
#define EXTERNAL_SSE2(flags)
Definition: cpu.h:59
SAO_EDGE_FILTER_FUNCS
#define SAO_EDGE_FILTER_FUNCS(bitd, opt)
Definition: hevcdsp_init.c:662
SAO_EDGE_INIT
#define SAO_EDGE_INIT(bitd, opt)
Definition: hevcdsp_init.c:681
ff_hevc_put_hevc_bi_pel_pixels24_10_avx2
void ff_hevc_put_hevc_bi_pel_pixels24_10_avx2(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int16_t *src2, int height, intptr_t mx, intptr_t my, int width)
ff_hevc_put_hevc_pel_pixels64_10_avx2
void ff_hevc_put_hevc_pel_pixels64_10_avx2(int16_t *dst, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
ff_hevc_put_hevc_pel_pixels48_8_avx2
void ff_hevc_put_hevc_pel_pixels48_8_avx2(int16_t *dst, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
IDCT_DC_FUNCS
#define IDCT_DC_FUNCS(W, opt)
Definition: hevcdsp_init.c:62
mc_rep_funcs
#define mc_rep_funcs(name, bitd, step, W, opt)
Definition: hevcdsp_init.c:135
ff_hevc_put_hevc_bi_pel_pixels48_8_avx2
void ff_hevc_put_hevc_bi_pel_pixels48_8_avx2(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int16_t *src2, int height, intptr_t mx, intptr_t my, int width)
ff_hevc_put_hevc_uni_pel_pixels32_8_avx2
void ff_hevc_put_hevc_uni_pel_pixels32_8_avx2(uint8_t *dst, ptrdiff_t dststride, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
ff_hevc_put_hevc_pel_pixels64_8_avx2
void ff_hevc_put_hevc_pel_pixels64_8_avx2(int16_t *dst, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
mc_rep_bi_func
#define mc_rep_bi_func(name, bitd, step, W, opt)
Definition: hevcdsp_init.c:117
EXTERNAL_AVX
#define EXTERNAL_AVX(flags)
Definition: cpu.h:70
EXTERNAL_SSE4
#define EXTERNAL_SSE4(flags)
Definition: cpu.h:68
ff_hevc_add_residual_8_8_avx
void ff_hevc_add_residual_8_8_avx(uint8_t *dst, int16_t *res, ptrdiff_t stride)
mc_rep_uni_func
#define mc_rep_uni_func(name, bitd, step, W, opt)
Definition: hevcdsp_init.c:102
SAO_BAND_INIT
#define SAO_BAND_INIT(bitd, opt)
Definition: hevcdsp_init.c:654
ff_hevc_add_residual_16_8_avx
void ff_hevc_add_residual_16_8_avx(uint8_t *dst, int16_t *res, ptrdiff_t stride)
ff_hevc_add_residual_4_10_mmxext
void ff_hevc_add_residual_4_10_mmxext(uint8_t *dst, int16_t *res, ptrdiff_t stride)
ff_hevc_put_hevc_bi_pel_pixels32_8_avx2
void ff_hevc_put_hevc_bi_pel_pixels32_8_avx2(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int16_t *src2, int height, intptr_t mx, intptr_t my, int width)
EXTERNAL_SSSE3
#define EXTERNAL_SSSE3(flags)
Definition: cpu.h:65
ff_hevc_put_hevc_pel_pixels32_10_avx2
void ff_hevc_put_hevc_pel_pixels32_10_avx2(int16_t *dst, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
ff_hevc_put_hevc_pel_pixels32_8_avx2
void ff_hevc_put_hevc_pel_pixels32_8_avx2(int16_t *dst, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
EXTERNAL_MMXEXT
#define EXTERNAL_MMXEXT(flags)
Definition: cpu.h:57
ff_hevc_put_hevc_bi_pel_pixels32_10_avx2
void ff_hevc_put_hevc_bi_pel_pixels32_10_avx2(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int16_t *src2, int height, intptr_t mx, intptr_t my, int width)
ff_hevc_put_hevc_pel_pixels24_10_avx2
void ff_hevc_put_hevc_pel_pixels24_10_avx2(int16_t *dst, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)