Go to the documentation of this file.
33 #define PUT_PROTOTYPE(name, depth, opt) \
34 void ff_vvc_put_ ## name ## _ ## depth ## _##opt(int16_t *dst, const uint8_t *src, ptrdiff_t srcstride, int height, const int8_t *hf, const int8_t *vf, int width);
36 #define PUT_PROTOTYPES(name, bitd, opt) \
37 PUT_PROTOTYPE(name##2, bitd, opt) \
38 PUT_PROTOTYPE(name##4, bitd, opt) \
39 PUT_PROTOTYPE(name##8, bitd, opt) \
40 PUT_PROTOTYPE(name##12, bitd, opt) \
41 PUT_PROTOTYPE(name##16, bitd, opt) \
42 PUT_PROTOTYPE(name##24, bitd, opt) \
43 PUT_PROTOTYPE(name##32, bitd, opt) \
44 PUT_PROTOTYPE(name##48, bitd, opt) \
45 PUT_PROTOTYPE(name##64, bitd, opt) \
46 PUT_PROTOTYPE(name##128, bitd, opt)
48 #define PUT_BPC_PROTOTYPES(name, opt) \
49 PUT_PROTOTYPES(name, 8, opt) \
50 PUT_PROTOTYPES(name, 10, opt) \
51 PUT_PROTOTYPES(name, 12, opt)
53 #define PUT_TAP_PROTOTYPES(n, opt) \
54 PUT_BPC_PROTOTYPES(n##tap_h, opt) \
55 PUT_BPC_PROTOTYPES(n##tap_v, opt) \
56 PUT_BPC_PROTOTYPES(n##tap_hv, opt)
66 #define bf(fn, bd, opt) fn##_##bd##_##opt
67 #define BF(fn, bpc, opt) fn##_##bpc##bpc_##opt
69 #define AVG_BPC_PROTOTYPES(bpc, opt) \
70 void BF(ff_vvc_avg, bpc, opt)(uint8_t *dst, ptrdiff_t dst_stride, \
71 const int16_t *src0, const int16_t *src1, intptr_t width, intptr_t height, intptr_t pixel_max); \
72 void BF(ff_vvc_w_avg, bpc, opt)(uint8_t *dst, ptrdiff_t dst_stride, \
73 const int16_t *src0, const int16_t *src1, intptr_t width, intptr_t height, \
74 intptr_t denom, intptr_t w0, intptr_t w1, intptr_t o0, intptr_t o1, intptr_t pixel_max);
76 #define AVG_PROTOTYPES(bd, opt) \
77 void bf(ff_vvc_avg, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride, \
78 const int16_t *src0, const int16_t *src1, int width, int height); \
79 void bf(ff_vvc_w_avg, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride, \
80 const int16_t *src0, const int16_t *src1, int width, int height, \
81 int denom, int w0, int w1, int o0, int o1);
91 #if HAVE_SSE4_EXTERNAL
92 #define FW_PUT(name, depth, opt) \
93 void ff_vvc_put_ ## name ## _ ## depth ## _##opt(int16_t *dst, const uint8_t *src, ptrdiff_t srcstride, \
94 int height, const int8_t *hf, const int8_t *vf, int width) \
96 ff_h2656_put_## name ## _ ## depth ## _##opt(dst, 2 * MAX_PB_SIZE, src, srcstride, height, hf, vf, width); \
99 #define FW_PUT_TAP(fname, bitd, opt ) \
100 FW_PUT(fname##4, bitd, opt ) \
101 FW_PUT(fname##8, bitd, opt ) \
102 FW_PUT(fname##16, bitd, opt ) \
103 FW_PUT(fname##32, bitd, opt ) \
104 FW_PUT(fname##64, bitd, opt ) \
105 FW_PUT(fname##128, bitd, opt ) \
107 #define FW_PUT_4TAP(fname, bitd, opt) \
108 FW_PUT(fname ## 2, bitd, opt) \
109 FW_PUT_TAP(fname, bitd, opt)
111 #define FW_PUT_4TAP_SSE4(bitd) \
112 FW_PUT_4TAP(pixels, bitd, sse4) \
113 FW_PUT_4TAP(4tap_h, bitd, sse4) \
114 FW_PUT_4TAP(4tap_v, bitd, sse4) \
115 FW_PUT_4TAP(4tap_hv, bitd, sse4)
117 #define FW_PUT_8TAP_SSE4(bitd) \
118 FW_PUT_TAP(8tap_h, bitd, sse4) \
119 FW_PUT_TAP(8tap_v, bitd, sse4) \
120 FW_PUT_TAP(8tap_hv, bitd, sse4)
122 #define FW_PUT_SSE4(bitd) \
123 FW_PUT_4TAP_SSE4(bitd) \
124 FW_PUT_8TAP_SSE4(bitd)
131 #if HAVE_AVX2_EXTERNAL
132 #define FW_PUT_TAP_AVX2(n, bitd) \
133 FW_PUT(n ## tap_h32, bitd, avx2) \
134 FW_PUT(n ## tap_h64, bitd, avx2) \
135 FW_PUT(n ## tap_h128, bitd, avx2) \
136 FW_PUT(n ## tap_v32, bitd, avx2) \
137 FW_PUT(n ## tap_v64, bitd, avx2) \
138 FW_PUT(n ## tap_v128, bitd, avx2)
140 #define FW_PUT_AVX2(bitd) \
141 FW_PUT(pixels32, bitd, avx2) \
142 FW_PUT(pixels64, bitd, avx2) \
143 FW_PUT(pixels128, bitd, avx2) \
144 FW_PUT_TAP_AVX2(4, bitd) \
145 FW_PUT_TAP_AVX2(8, bitd) \
151 #define FW_PUT_TAP_16BPC_AVX2(n, bitd) \
152 FW_PUT(n ## tap_h16, bitd, avx2) \
153 FW_PUT(n ## tap_v16, bitd, avx2) \
154 FW_PUT(n ## tap_hv16, bitd, avx2) \
155 FW_PUT(n ## tap_hv32, bitd, avx2) \
156 FW_PUT(n ## tap_hv64, bitd, avx2) \
157 FW_PUT(n ## tap_hv128, bitd, avx2)
159 #define FW_PUT_16BPC_AVX2(bitd) \
160 FW_PUT(pixels16, bitd, avx2) \
161 FW_PUT_TAP_16BPC_AVX2(4, bitd) \
162 FW_PUT_TAP_16BPC_AVX2(8, bitd)
164 FW_PUT_16BPC_AVX2(10)
165 FW_PUT_16BPC_AVX2(12)
167 #define AVG_FUNCS(bpc, bd, opt) \
168 void bf(ff_vvc_avg, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride, \
169 const int16_t *src0, const int16_t *src1, int width, int height) \
171 BF(ff_vvc_avg, bpc, opt)(dst, dst_stride, src0, src1, width, height, (1 << bd) - 1); \
173 void bf(ff_vvc_w_avg, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride, \
174 const int16_t *src0, const int16_t *src1, int width, int height, \
175 int denom, int w0, int w1, int o0, int o1) \
177 BF(ff_vvc_w_avg, bpc, opt)(dst, dst_stride, src0, src1, width, height, \
178 denom, w0, w1, o0, o1, (1 << bd) - 1); \
181 AVG_FUNCS(8, 8, avx2)
182 AVG_FUNCS(16, 10, avx2)
183 AVG_FUNCS(16, 12, avx2)
186 #define PEL_LINK(dst, C, W, idx1, idx2, name, D, opt) \
187 dst[C][W][idx1][idx2] = ff_vvc_put_## name ## _ ## D ## _##opt; \
188 dst ## _uni[C][W][idx1][idx2] = ff_h2656_put_uni_ ## name ## _ ## D ## _##opt; \
190 #define MC_TAP_LINKS(pointer, C, my, mx, fname, bitd, opt ) \
191 PEL_LINK(pointer, C, 1, my , mx , fname##4 , bitd, opt ); \
192 PEL_LINK(pointer, C, 2, my , mx , fname##8 , bitd, opt ); \
193 PEL_LINK(pointer, C, 3, my , mx , fname##16, bitd, opt ); \
194 PEL_LINK(pointer, C, 4, my , mx , fname##32, bitd, opt ); \
195 PEL_LINK(pointer, C, 5, my , mx , fname##64, bitd, opt ); \
196 PEL_LINK(pointer, C, 6, my , mx , fname##128, bitd, opt );
198 #define MC_8TAP_LINKS(pointer, my, mx, fname, bitd, opt) \
199 MC_TAP_LINKS(pointer, LUMA, my, mx, fname, bitd, opt)
201 #define MC_8TAP_LINKS_SSE4(bd) \
202 MC_8TAP_LINKS(c->inter.put, 0, 0, pixels, bd, sse4); \
203 MC_8TAP_LINKS(c->inter.put, 0, 1, 8tap_h, bd, sse4); \
204 MC_8TAP_LINKS(c->inter.put, 1, 0, 8tap_v, bd, sse4); \
205 MC_8TAP_LINKS(c->inter.put, 1, 1, 8tap_hv, bd, sse4)
207 #define MC_4TAP_LINKS(pointer, my, mx, fname, bitd, opt) \
208 PEL_LINK(pointer, CHROMA, 0, my , mx , fname##2 , bitd, opt ); \
209 MC_TAP_LINKS(pointer, CHROMA, my, mx, fname, bitd, opt) \
211 #define MC_4TAP_LINKS_SSE4(bd) \
212 MC_4TAP_LINKS(c->inter.put, 0, 0, pixels, bd, sse4); \
213 MC_4TAP_LINKS(c->inter.put, 0, 1, 4tap_h, bd, sse4); \
214 MC_4TAP_LINKS(c->inter.put, 1, 0, 4tap_v, bd, sse4); \
215 MC_4TAP_LINKS(c->inter.put, 1, 1, 4tap_hv, bd, sse4)
217 #define MC_LINK_SSE4(bd) \
218 MC_4TAP_LINKS_SSE4(bd) \
219 MC_8TAP_LINKS_SSE4(bd)
221 #define MC_TAP_LINKS_AVX2(C,tap,bd) do { \
222 PEL_LINK(c->inter.put, C, 4, 0, 0, pixels32, bd, avx2) \
223 PEL_LINK(c->inter.put, C, 5, 0, 0, pixels64, bd, avx2) \
224 PEL_LINK(c->inter.put, C, 6, 0, 0, pixels128, bd, avx2) \
225 PEL_LINK(c->inter.put, C, 4, 0, 1, tap##tap_h32, bd, avx2) \
226 PEL_LINK(c->inter.put, C, 5, 0, 1, tap##tap_h64, bd, avx2) \
227 PEL_LINK(c->inter.put, C, 6, 0, 1, tap##tap_h128, bd, avx2) \
228 PEL_LINK(c->inter.put, C, 4, 1, 0, tap##tap_v32, bd, avx2) \
229 PEL_LINK(c->inter.put, C, 5, 1, 0, tap##tap_v64, bd, avx2) \
230 PEL_LINK(c->inter.put, C, 6, 1, 0, tap##tap_v128, bd, avx2) \
233 #define MC_LINKS_AVX2(bd) \
234 MC_TAP_LINKS_AVX2(LUMA, 8, bd); \
235 MC_TAP_LINKS_AVX2(CHROMA, 4, bd);
237 #define MC_TAP_LINKS_16BPC_AVX2(C, tap, bd) do { \
238 PEL_LINK(c->inter.put, C, 3, 0, 0, pixels16, bd, avx2) \
239 PEL_LINK(c->inter.put, C, 3, 0, 1, tap##tap_h16, bd, avx2) \
240 PEL_LINK(c->inter.put, C, 3, 1, 0, tap##tap_v16, bd, avx2) \
241 PEL_LINK(c->inter.put, C, 3, 1, 1, tap##tap_hv16, bd, avx2) \
242 PEL_LINK(c->inter.put, C, 4, 1, 1, tap##tap_hv32, bd, avx2) \
243 PEL_LINK(c->inter.put, C, 5, 1, 1, tap##tap_hv64, bd, avx2) \
244 PEL_LINK(c->inter.put, C, 6, 1, 1, tap##tap_hv128, bd, avx2) \
247 #define MC_LINKS_16BPC_AVX2(bd) \
248 MC_TAP_LINKS_16BPC_AVX2(LUMA, 8, bd); \
249 MC_TAP_LINKS_16BPC_AVX2(CHROMA, 4, bd);
251 #define AVG_INIT(bd, opt) do { \
252 c->inter.avg = bf(ff_vvc_avg, bd, opt); \
253 c->inter.w_avg = bf(ff_vvc_w_avg, bd, opt); \
269 }
else if (bd == 10) {
275 MC_LINKS_16BPC_AVX2(10);
277 }
else if (bd == 12) {
283 MC_LINKS_16BPC_AVX2(12);
void ff_vvc_dsp_init_x86(VVCDSPContext *const c, const int bd)
#define EXTERNAL_AVX2_FAST(flags)
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
static atomic_int cpu_flags
#define EXTERNAL_AVX2(flags)
#define AVG_PROTOTYPES(bd, opt)
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
#define PUT_BPC_PROTOTYPES(name, opt)
#define EXTERNAL_SSE4(flags)
#define AVG_BPC_PROTOTYPES(bpc, opt)
#define PUT_TAP_PROTOTYPES(n, opt)