FFmpeg
swscale.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include <inttypes.h>
22 #include "config.h"
23 #include "libswscale/swscale.h"
25 #include "libavutil/attributes.h"
26 #include "libavutil/avassert.h"
27 #include "libavutil/intreadwrite.h"
28 #include "libavutil/x86/cpu.h"
29 #include "libavutil/cpu.h"
30 #include "libavutil/mem_internal.h"
31 #include "libavutil/pixdesc.h"
32 
33 const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = {
34  0x0103010301030103LL,
35  0x0200020002000200LL,};
36 
37 const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = {
38  0x0602060206020602LL,
39  0x0004000400040004LL,};
40 
41 #if HAVE_INLINE_ASM
42 
43 #define DITHER1XBPP
44 
45 DECLARE_ASM_CONST(8, uint64_t, bF8)= 0xF8F8F8F8F8F8F8F8LL;
46 DECLARE_ASM_CONST(8, uint64_t, bFC)= 0xFCFCFCFCFCFCFCFCLL;
47 
48 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_M24A) = 0x00FF0000FF0000FFLL;
49 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_M24B) = 0xFF0000FF0000FF00LL;
50 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_M24C) = 0x0000FF0000FF0000LL;
51 
52 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_bgr2YOffset) = 0x1010101010101010ULL;
53 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_bgr2UVOffset) = 0x8080808080808080ULL;
54 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_w1111) = 0x0001000100010001ULL;
55 
56 
57 //MMX versions
58 #if HAVE_MMX_INLINE
59 #undef RENAME
60 #define COMPILE_TEMPLATE_MMXEXT 0
61 #define RENAME(a) a ## _mmx
62 #include "swscale_template.c"
63 #endif
64 
65 // MMXEXT versions
66 #if HAVE_MMXEXT_INLINE
67 #undef RENAME
68 #undef COMPILE_TEMPLATE_MMXEXT
69 #define COMPILE_TEMPLATE_MMXEXT 1
70 #define RENAME(a) a ## _mmxext
71 #include "swscale_template.c"
72 #endif
73 
75 {
76  const int dstH= c->dstH;
77  const int flags= c->flags;
78 
79  SwsPlane *lumPlane = &c->slice[c->numSlice-2].plane[0];
80  SwsPlane *chrUPlane = &c->slice[c->numSlice-2].plane[1];
81  SwsPlane *alpPlane = &c->slice[c->numSlice-2].plane[3];
82 
83  int hasAlpha = c->needAlpha;
84  int32_t *vLumFilterPos= c->vLumFilterPos;
85  int32_t *vChrFilterPos= c->vChrFilterPos;
86  int16_t *vLumFilter= c->vLumFilter;
87  int16_t *vChrFilter= c->vChrFilter;
88  int32_t *lumMmxFilter= c->lumMmxFilter;
89  int32_t *chrMmxFilter= c->chrMmxFilter;
90  int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
91  const int vLumFilterSize= c->vLumFilterSize;
92  const int vChrFilterSize= c->vChrFilterSize;
93  const int chrDstY= dstY>>c->chrDstVSubSample;
94  const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
95  const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
96 
97  c->blueDither= ff_dither8[dstY&1];
98  if (c->dstFormat == AV_PIX_FMT_RGB555 || c->dstFormat == AV_PIX_FMT_BGR555)
99  c->greenDither= ff_dither8[dstY&1];
100  else
101  c->greenDither= ff_dither4[dstY&1];
102  c->redDither= ff_dither8[(dstY+1)&1];
103  if (dstY < dstH - 2) {
104  const int16_t **lumSrcPtr = (const int16_t **)(void*) lumPlane->line + firstLumSrcY - lumPlane->sliceY;
105  const int16_t **chrUSrcPtr = (const int16_t **)(void*) chrUPlane->line + firstChrSrcY - chrUPlane->sliceY;
106  const int16_t **alpSrcPtr = (CONFIG_SWSCALE_ALPHA && hasAlpha) ? (const int16_t **)(void*) alpPlane->line + firstLumSrcY - alpPlane->sliceY : NULL;
107 
108  int i;
109  if (firstLumSrcY < 0 || firstLumSrcY + vLumFilterSize > c->srcH) {
110  const int16_t **tmpY = (const int16_t **) lumPlane->tmp;
111 
112  int neg = -firstLumSrcY, i, end = FFMIN(c->srcH - firstLumSrcY, vLumFilterSize);
113  for (i = 0; i < neg; i++)
114  tmpY[i] = lumSrcPtr[neg];
115  for ( ; i < end; i++)
116  tmpY[i] = lumSrcPtr[i];
117  for ( ; i < vLumFilterSize; i++)
118  tmpY[i] = tmpY[i-1];
119  lumSrcPtr = tmpY;
120 
121  if (alpSrcPtr) {
122  const int16_t **tmpA = (const int16_t **) alpPlane->tmp;
123  for (i = 0; i < neg; i++)
124  tmpA[i] = alpSrcPtr[neg];
125  for ( ; i < end; i++)
126  tmpA[i] = alpSrcPtr[i];
127  for ( ; i < vLumFilterSize; i++)
128  tmpA[i] = tmpA[i - 1];
129  alpSrcPtr = tmpA;
130  }
131  }
132  if (firstChrSrcY < 0 || firstChrSrcY + vChrFilterSize > c->chrSrcH) {
133  const int16_t **tmpU = (const int16_t **) chrUPlane->tmp;
134  int neg = -firstChrSrcY, i, end = FFMIN(c->chrSrcH - firstChrSrcY, vChrFilterSize);
135  for (i = 0; i < neg; i++) {
136  tmpU[i] = chrUSrcPtr[neg];
137  }
138  for ( ; i < end; i++) {
139  tmpU[i] = chrUSrcPtr[i];
140  }
141  for ( ; i < vChrFilterSize; i++) {
142  tmpU[i] = tmpU[i - 1];
143  }
144  chrUSrcPtr = tmpU;
145  }
146 
147  if (flags & SWS_ACCURATE_RND) {
148  int s= APCK_SIZE / 8;
149  for (i=0; i<vLumFilterSize; i+=2) {
150  *(const void**)&lumMmxFilter[s*i ]= lumSrcPtr[i ];
151  *(const void**)&lumMmxFilter[s*i+APCK_PTR2/4 ]= lumSrcPtr[i+(vLumFilterSize>1)];
154  + (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1] * (1 << 16) : 0);
155  if (CONFIG_SWSCALE_ALPHA && hasAlpha) {
156  *(const void**)&alpMmxFilter[s*i ]= alpSrcPtr[i ];
157  *(const void**)&alpMmxFilter[s*i+APCK_PTR2/4 ]= alpSrcPtr[i+(vLumFilterSize>1)];
160  }
161  }
162  for (i=0; i<vChrFilterSize; i+=2) {
163  *(const void**)&chrMmxFilter[s*i ]= chrUSrcPtr[i ];
164  *(const void**)&chrMmxFilter[s*i+APCK_PTR2/4 ]= chrUSrcPtr[i+(vChrFilterSize>1)];
167  + (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize + i + 1] * (1 << 16) : 0);
168  }
169  } else {
170  for (i=0; i<vLumFilterSize; i++) {
171  *(const void**)&lumMmxFilter[4*i+0]= lumSrcPtr[i];
172  lumMmxFilter[4*i+2]=
173  lumMmxFilter[4*i+3]=
174  ((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001U;
175  if (CONFIG_SWSCALE_ALPHA && hasAlpha) {
176  *(const void**)&alpMmxFilter[4*i+0]= alpSrcPtr[i];
177  alpMmxFilter[4*i+2]=
178  alpMmxFilter[4*i+3]= lumMmxFilter[4*i+2];
179  }
180  }
181  for (i=0; i<vChrFilterSize; i++) {
182  *(const void**)&chrMmxFilter[4*i+0]= chrUSrcPtr[i];
183  chrMmxFilter[4*i+2]=
184  chrMmxFilter[4*i+3]=
185  ((uint16_t)vChrFilter[chrDstY*vChrFilterSize + i])*0x10001U;
186  }
187  }
188  }
189 }
190 #endif /* HAVE_INLINE_ASM */
191 
192 #define YUV2YUVX_FUNC_MMX(opt, step) \
193 void ff_yuv2yuvX_ ##opt(const int16_t *filter, int filterSize, int srcOffset, \
194  uint8_t *dest, int dstW, \
195  const uint8_t *dither, int offset); \
196 static void yuv2yuvX_ ##opt(const int16_t *filter, int filterSize, \
197  const int16_t **src, uint8_t *dest, int dstW, \
198  const uint8_t *dither, int offset) \
199 { \
200  if(dstW > 0) \
201  ff_yuv2yuvX_ ##opt(filter, filterSize - 1, 0, dest - offset, dstW + offset, dither, offset); \
202  return; \
203 }
204 
205 #define YUV2YUVX_FUNC(opt, step) \
206 void ff_yuv2yuvX_ ##opt(const int16_t *filter, int filterSize, int srcOffset, \
207  uint8_t *dest, int dstW, \
208  const uint8_t *dither, int offset); \
209 static void yuv2yuvX_ ##opt(const int16_t *filter, int filterSize, \
210  const int16_t **src, uint8_t *dest, int dstW, \
211  const uint8_t *dither, int offset) \
212 { \
213  int remainder = (dstW % step); \
214  int pixelsProcessed = dstW - remainder; \
215  if(((uintptr_t)dest) & 15){ \
216  yuv2yuvX_mmx(filter, filterSize, src, dest, dstW, dither, offset); \
217  return; \
218  } \
219  if(pixelsProcessed > 0) \
220  ff_yuv2yuvX_ ##opt(filter, filterSize - 1, 0, dest - offset, pixelsProcessed + offset, dither, offset); \
221  if(remainder > 0){ \
222  ff_yuv2yuvX_mmx(filter, filterSize - 1, pixelsProcessed, dest - offset, pixelsProcessed + remainder + offset, dither, offset); \
223  } \
224  return; \
225 }
226 
227 #if HAVE_MMX_EXTERNAL
228 YUV2YUVX_FUNC_MMX(mmx, 16)
229 #endif
230 #if HAVE_MMXEXT_EXTERNAL
231 YUV2YUVX_FUNC_MMX(mmxext, 16)
232 #endif
233 #if HAVE_SSE3_EXTERNAL
234 YUV2YUVX_FUNC(sse3, 32)
235 #endif
236 #if HAVE_AVX2_EXTERNAL
237 YUV2YUVX_FUNC(avx2, 64)
238 #endif
239 
240 #define SCALE_FUNC(filter_n, from_bpc, to_bpc, opt) \
241 void ff_hscale ## from_bpc ## to ## to_bpc ## _ ## filter_n ## _ ## opt( \
242  SwsContext *c, int16_t *data, \
243  int dstW, const uint8_t *src, \
244  const int16_t *filter, \
245  const int32_t *filterPos, int filterSize)
246 
247 #define SCALE_FUNCS(filter_n, opt) \
248  SCALE_FUNC(filter_n, 8, 15, opt); \
249  SCALE_FUNC(filter_n, 9, 15, opt); \
250  SCALE_FUNC(filter_n, 10, 15, opt); \
251  SCALE_FUNC(filter_n, 12, 15, opt); \
252  SCALE_FUNC(filter_n, 14, 15, opt); \
253  SCALE_FUNC(filter_n, 16, 15, opt); \
254  SCALE_FUNC(filter_n, 8, 19, opt); \
255  SCALE_FUNC(filter_n, 9, 19, opt); \
256  SCALE_FUNC(filter_n, 10, 19, opt); \
257  SCALE_FUNC(filter_n, 12, 19, opt); \
258  SCALE_FUNC(filter_n, 14, 19, opt); \
259  SCALE_FUNC(filter_n, 16, 19, opt)
260 
261 #define SCALE_FUNCS_MMX(opt) \
262  SCALE_FUNCS(4, opt); \
263  SCALE_FUNCS(8, opt); \
264  SCALE_FUNCS(X, opt)
265 
266 #define SCALE_FUNCS_SSE(opt) \
267  SCALE_FUNCS(4, opt); \
268  SCALE_FUNCS(8, opt); \
269  SCALE_FUNCS(X4, opt); \
270  SCALE_FUNCS(X8, opt)
271 
272 #if ARCH_X86_32
273 SCALE_FUNCS_MMX(mmx);
274 #endif
275 SCALE_FUNCS_SSE(sse2);
276 SCALE_FUNCS_SSE(ssse3);
277 SCALE_FUNCS_SSE(sse4);
278 
279 SCALE_FUNC(4, 8, 15, avx2);
280 SCALE_FUNC(X4, 8, 15, avx2);
281 
282 #define VSCALEX_FUNC(size, opt) \
283 void ff_yuv2planeX_ ## size ## _ ## opt(const int16_t *filter, int filterSize, \
284  const int16_t **src, uint8_t *dest, int dstW, \
285  const uint8_t *dither, int offset)
286 #define VSCALEX_FUNCS(opt) \
287  VSCALEX_FUNC(8, opt); \
288  VSCALEX_FUNC(9, opt); \
289  VSCALEX_FUNC(10, opt)
290 
291 #if ARCH_X86_32
292 VSCALEX_FUNCS(mmxext);
293 #endif
294 VSCALEX_FUNCS(sse2);
295 VSCALEX_FUNCS(sse4);
296 VSCALEX_FUNC(16, sse4);
297 VSCALEX_FUNCS(avx);
298 
299 #define VSCALE_FUNC(size, opt) \
300 void ff_yuv2plane1_ ## size ## _ ## opt(const int16_t *src, uint8_t *dst, int dstW, \
301  const uint8_t *dither, int offset)
302 #define VSCALE_FUNCS(opt1, opt2) \
303  VSCALE_FUNC(8, opt1); \
304  VSCALE_FUNC(9, opt2); \
305  VSCALE_FUNC(10, opt2); \
306  VSCALE_FUNC(16, opt1)
307 
308 #if ARCH_X86_32
309 VSCALE_FUNCS(mmx, mmxext);
310 #endif
311 VSCALE_FUNCS(sse2, sse2);
312 VSCALE_FUNC(16, sse4);
313 VSCALE_FUNCS(avx, avx);
314 
315 #define INPUT_Y_FUNC(fmt, opt) \
316 void ff_ ## fmt ## ToY_ ## opt(uint8_t *dst, const uint8_t *src, \
317  const uint8_t *unused1, const uint8_t *unused2, \
318  int w, uint32_t *unused)
319 #define INPUT_UV_FUNC(fmt, opt) \
320 void ff_ ## fmt ## ToUV_ ## opt(uint8_t *dstU, uint8_t *dstV, \
321  const uint8_t *unused0, \
322  const uint8_t *src1, \
323  const uint8_t *src2, \
324  int w, uint32_t *unused)
325 #define INPUT_FUNC(fmt, opt) \
326  INPUT_Y_FUNC(fmt, opt); \
327  INPUT_UV_FUNC(fmt, opt)
328 #define INPUT_FUNCS(opt) \
329  INPUT_FUNC(uyvy, opt); \
330  INPUT_FUNC(yuyv, opt); \
331  INPUT_UV_FUNC(nv12, opt); \
332  INPUT_UV_FUNC(nv21, opt); \
333  INPUT_FUNC(rgba, opt); \
334  INPUT_FUNC(bgra, opt); \
335  INPUT_FUNC(argb, opt); \
336  INPUT_FUNC(abgr, opt); \
337  INPUT_FUNC(rgb24, opt); \
338  INPUT_FUNC(bgr24, opt)
339 
340 #if ARCH_X86_32
341 INPUT_FUNCS(mmx);
342 #endif
343 INPUT_FUNCS(sse2);
344 INPUT_FUNCS(ssse3);
345 INPUT_FUNCS(avx);
346 
347 #if ARCH_X86_64
348 #define YUV2NV_DECL(fmt, opt) \
349 void ff_yuv2 ## fmt ## cX_ ## opt(enum AVPixelFormat format, const uint8_t *dither, \
350  const int16_t *filter, int filterSize, \
351  const int16_t **u, const int16_t **v, \
352  uint8_t *dst, int dstWidth)
353 
354 YUV2NV_DECL(nv12, avx2);
355 YUV2NV_DECL(nv21, avx2);
356 
357 #define YUV2GBRP_FN_DECL(fmt, opt) \
358 void ff_yuv2##fmt##_full_X_ ##opt(SwsContext *c, const int16_t *lumFilter, \
359  const int16_t **lumSrcx, int lumFilterSize, \
360  const int16_t *chrFilter, const int16_t **chrUSrcx, \
361  const int16_t **chrVSrcx, int chrFilterSize, \
362  const int16_t **alpSrcx, uint8_t **dest, \
363  int dstW, int y)
364 
365 #define YUV2GBRP_DECL(opt) \
366 YUV2GBRP_FN_DECL(gbrp, opt); \
367 YUV2GBRP_FN_DECL(gbrap, opt); \
368 YUV2GBRP_FN_DECL(gbrp9le, opt); \
369 YUV2GBRP_FN_DECL(gbrp10le, opt); \
370 YUV2GBRP_FN_DECL(gbrap10le, opt); \
371 YUV2GBRP_FN_DECL(gbrp12le, opt); \
372 YUV2GBRP_FN_DECL(gbrap12le, opt); \
373 YUV2GBRP_FN_DECL(gbrp14le, opt); \
374 YUV2GBRP_FN_DECL(gbrp16le, opt); \
375 YUV2GBRP_FN_DECL(gbrap16le, opt); \
376 YUV2GBRP_FN_DECL(gbrpf32le, opt); \
377 YUV2GBRP_FN_DECL(gbrapf32le, opt); \
378 YUV2GBRP_FN_DECL(gbrp9be, opt); \
379 YUV2GBRP_FN_DECL(gbrp10be, opt); \
380 YUV2GBRP_FN_DECL(gbrap10be, opt); \
381 YUV2GBRP_FN_DECL(gbrp12be, opt); \
382 YUV2GBRP_FN_DECL(gbrap12be, opt); \
383 YUV2GBRP_FN_DECL(gbrp14be, opt); \
384 YUV2GBRP_FN_DECL(gbrp16be, opt); \
385 YUV2GBRP_FN_DECL(gbrap16be, opt); \
386 YUV2GBRP_FN_DECL(gbrpf32be, opt); \
387 YUV2GBRP_FN_DECL(gbrapf32be, opt)
388 
389 YUV2GBRP_DECL(sse2);
390 YUV2GBRP_DECL(sse4);
391 YUV2GBRP_DECL(avx2);
392 
393 #define INPUT_PLANAR_RGB_Y_FN_DECL(fmt, opt) \
394 void ff_planar_##fmt##_to_y_##opt(uint8_t *dst, \
395  const uint8_t *src[4], int w, int32_t *rgb2yuv)
396 
397 #define INPUT_PLANAR_RGB_UV_FN_DECL(fmt, opt) \
398 void ff_planar_##fmt##_to_uv_##opt(uint8_t *dstU, uint8_t *dstV, \
399  const uint8_t *src[4], int w, int32_t *rgb2yuv)
400 
401 #define INPUT_PLANAR_RGB_A_FN_DECL(fmt, opt) \
402 void ff_planar_##fmt##_to_a_##opt(uint8_t *dst, \
403  const uint8_t *src[4], int w, int32_t *rgb2yuv)
404 
405 
406 #define INPUT_PLANAR_RGBXX_A_DECL(fmt, opt) \
407 INPUT_PLANAR_RGB_A_FN_DECL(fmt##le, opt); \
408 INPUT_PLANAR_RGB_A_FN_DECL(fmt##be, opt)
409 
410 #define INPUT_PLANAR_RGBXX_Y_DECL(fmt, opt) \
411 INPUT_PLANAR_RGB_Y_FN_DECL(fmt##le, opt); \
412 INPUT_PLANAR_RGB_Y_FN_DECL(fmt##be, opt)
413 
414 #define INPUT_PLANAR_RGBXX_UV_DECL(fmt, opt) \
415 INPUT_PLANAR_RGB_UV_FN_DECL(fmt##le, opt); \
416 INPUT_PLANAR_RGB_UV_FN_DECL(fmt##be, opt)
417 
418 #define INPUT_PLANAR_RGBXX_YUVA_DECL(fmt, opt) \
419 INPUT_PLANAR_RGBXX_Y_DECL(fmt, opt); \
420 INPUT_PLANAR_RGBXX_UV_DECL(fmt, opt); \
421 INPUT_PLANAR_RGBXX_A_DECL(fmt, opt)
422 
423 #define INPUT_PLANAR_RGBXX_YUV_DECL(fmt, opt) \
424 INPUT_PLANAR_RGBXX_Y_DECL(fmt, opt); \
425 INPUT_PLANAR_RGBXX_UV_DECL(fmt, opt)
426 
427 #define INPUT_PLANAR_RGBXX_UVA_DECL(fmt, opt) \
428 INPUT_PLANAR_RGBXX_UV_DECL(fmt, opt); \
429 INPUT_PLANAR_RGBXX_A_DECL(fmt, opt)
430 
431 #define INPUT_PLANAR_RGB_A_ALL_DECL(opt) \
432 INPUT_PLANAR_RGB_A_FN_DECL(rgb, opt); \
433 INPUT_PLANAR_RGBXX_A_DECL(rgb10, opt); \
434 INPUT_PLANAR_RGBXX_A_DECL(rgb12, opt); \
435 INPUT_PLANAR_RGBXX_A_DECL(rgb16, opt); \
436 INPUT_PLANAR_RGBXX_A_DECL(rgbf32, opt)
437 
438 #define INPUT_PLANAR_RGB_Y_ALL_DECL(opt) \
439 INPUT_PLANAR_RGB_Y_FN_DECL(rgb, opt); \
440 INPUT_PLANAR_RGBXX_Y_DECL(rgb9, opt); \
441 INPUT_PLANAR_RGBXX_Y_DECL(rgb10, opt); \
442 INPUT_PLANAR_RGBXX_Y_DECL(rgb12, opt); \
443 INPUT_PLANAR_RGBXX_Y_DECL(rgb14, opt); \
444 INPUT_PLANAR_RGBXX_Y_DECL(rgb16, opt); \
445 INPUT_PLANAR_RGBXX_Y_DECL(rgbf32, opt)
446 
447 #define INPUT_PLANAR_RGB_UV_ALL_DECL(opt) \
448 INPUT_PLANAR_RGB_UV_FN_DECL(rgb, opt); \
449 INPUT_PLANAR_RGBXX_UV_DECL(rgb9, opt); \
450 INPUT_PLANAR_RGBXX_UV_DECL(rgb10, opt); \
451 INPUT_PLANAR_RGBXX_UV_DECL(rgb12, opt); \
452 INPUT_PLANAR_RGBXX_UV_DECL(rgb14, opt); \
453 INPUT_PLANAR_RGBXX_UV_DECL(rgb16, opt); \
454 INPUT_PLANAR_RGBXX_UV_DECL(rgbf32, opt)
455 
456 INPUT_PLANAR_RGBXX_Y_DECL(rgbf32, sse2);
457 INPUT_PLANAR_RGB_UV_ALL_DECL(sse2);
458 INPUT_PLANAR_RGB_A_ALL_DECL(sse2);
459 
460 INPUT_PLANAR_RGB_Y_ALL_DECL(sse4);
461 INPUT_PLANAR_RGB_UV_ALL_DECL(sse4);
462 INPUT_PLANAR_RGBXX_A_DECL(rgbf32, sse4);
463 
464 INPUT_PLANAR_RGB_Y_ALL_DECL(avx2);
465 INPUT_PLANAR_RGB_UV_ALL_DECL(avx2);
466 INPUT_PLANAR_RGB_A_ALL_DECL(avx2);
467 #endif
468 
470 {
471  int cpu_flags = av_get_cpu_flags();
472 
473 #if HAVE_MMX_INLINE
474  if (INLINE_MMX(cpu_flags))
475  sws_init_swscale_mmx(c);
476 #endif
477 #if HAVE_MMXEXT_INLINE
479  sws_init_swscale_mmxext(c);
480 #endif
481  if(c->use_mmx_vfilter && !(c->flags & SWS_ACCURATE_RND)) {
482 #if HAVE_MMX_EXTERNAL
483  if (EXTERNAL_MMX(cpu_flags))
484  c->yuv2planeX = yuv2yuvX_mmx;
485 #endif
486 #if HAVE_MMXEXT_EXTERNAL
488  c->yuv2planeX = yuv2yuvX_mmxext;
489 #endif
490 #if HAVE_SSE3_EXTERNAL
492  c->yuv2planeX = yuv2yuvX_sse3;
493 #endif
494 #if HAVE_AVX2_EXTERNAL
496  c->yuv2planeX = yuv2yuvX_avx2;
497 #endif
498  }
499 
500 #define ASSIGN_SCALE_FUNC2(hscalefn, filtersize, opt1, opt2) do { \
501  if (c->srcBpc == 8) { \
502  hscalefn = c->dstBpc <= 14 ? ff_hscale8to15_ ## filtersize ## _ ## opt2 : \
503  ff_hscale8to19_ ## filtersize ## _ ## opt1; \
504  } else if (c->srcBpc == 9) { \
505  hscalefn = c->dstBpc <= 14 ? ff_hscale9to15_ ## filtersize ## _ ## opt2 : \
506  ff_hscale9to19_ ## filtersize ## _ ## opt1; \
507  } else if (c->srcBpc == 10) { \
508  hscalefn = c->dstBpc <= 14 ? ff_hscale10to15_ ## filtersize ## _ ## opt2 : \
509  ff_hscale10to19_ ## filtersize ## _ ## opt1; \
510  } else if (c->srcBpc == 12) { \
511  hscalefn = c->dstBpc <= 14 ? ff_hscale12to15_ ## filtersize ## _ ## opt2 : \
512  ff_hscale12to19_ ## filtersize ## _ ## opt1; \
513  } else if (c->srcBpc == 14 || ((c->srcFormat==AV_PIX_FMT_PAL8||isAnyRGB(c->srcFormat)) && av_pix_fmt_desc_get(c->srcFormat)->comp[0].depth<16)) { \
514  hscalefn = c->dstBpc <= 14 ? ff_hscale14to15_ ## filtersize ## _ ## opt2 : \
515  ff_hscale14to19_ ## filtersize ## _ ## opt1; \
516  } else { /* c->srcBpc == 16 */ \
517  av_assert0(c->srcBpc == 16);\
518  hscalefn = c->dstBpc <= 14 ? ff_hscale16to15_ ## filtersize ## _ ## opt2 : \
519  ff_hscale16to19_ ## filtersize ## _ ## opt1; \
520  } \
521 } while (0)
522 #define ASSIGN_MMX_SCALE_FUNC(hscalefn, filtersize, opt1, opt2) \
523  switch (filtersize) { \
524  case 4: ASSIGN_SCALE_FUNC2(hscalefn, 4, opt1, opt2); break; \
525  case 8: ASSIGN_SCALE_FUNC2(hscalefn, 8, opt1, opt2); break; \
526  default: ASSIGN_SCALE_FUNC2(hscalefn, X, opt1, opt2); break; \
527  }
528 #define ASSIGN_VSCALEX_FUNC(vscalefn, opt, do_16_case, condition_8bit) \
529 switch(c->dstBpc){ \
530  case 16: do_16_case; break; \
531  case 10: if (!isBE(c->dstFormat) && !isSemiPlanarYUV(c->dstFormat)) vscalefn = ff_yuv2planeX_10_ ## opt; break; \
532  case 9: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2planeX_9_ ## opt; break; \
533  case 8: if ((condition_8bit) && !c->use_mmx_vfilter) vscalefn = ff_yuv2planeX_8_ ## opt; break; \
534  }
535 #define ASSIGN_VSCALE_FUNC(vscalefn, opt1, opt2, opt2chk) \
536  switch(c->dstBpc){ \
537  case 16: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2plane1_16_ ## opt1; break; \
538  case 10: if (!isBE(c->dstFormat) && !isSemiPlanarYUV(c->dstFormat) && opt2chk) vscalefn = ff_yuv2plane1_10_ ## opt2; break; \
539  case 9: if (!isBE(c->dstFormat) && opt2chk) vscalefn = ff_yuv2plane1_9_ ## opt2; break; \
540  case 8: vscalefn = ff_yuv2plane1_8_ ## opt1; break; \
541  default: av_assert0(c->dstBpc>8); \
542  }
543 #define case_rgb(x, X, opt) \
544  case AV_PIX_FMT_ ## X: \
545  c->lumToYV12 = ff_ ## x ## ToY_ ## opt; \
546  if (!c->chrSrcHSubSample) \
547  c->chrToYV12 = ff_ ## x ## ToUV_ ## opt; \
548  break
549 #if ARCH_X86_32
550  if (EXTERNAL_MMX(cpu_flags)) {
551  ASSIGN_MMX_SCALE_FUNC(c->hyScale, c->hLumFilterSize, mmx, mmx);
552  ASSIGN_MMX_SCALE_FUNC(c->hcScale, c->hChrFilterSize, mmx, mmx);
553  ASSIGN_VSCALE_FUNC(c->yuv2plane1, mmx, mmxext, cpu_flags & AV_CPU_FLAG_MMXEXT);
554 
555  switch (c->srcFormat) {
556  case AV_PIX_FMT_YA8:
557  c->lumToYV12 = ff_yuyvToY_mmx;
558  if (c->needAlpha)
559  c->alpToYV12 = ff_uyvyToY_mmx;
560  break;
561  case AV_PIX_FMT_YUYV422:
562  c->lumToYV12 = ff_yuyvToY_mmx;
563  c->chrToYV12 = ff_yuyvToUV_mmx;
564  break;
565  case AV_PIX_FMT_UYVY422:
566  c->lumToYV12 = ff_uyvyToY_mmx;
567  c->chrToYV12 = ff_uyvyToUV_mmx;
568  break;
569  case AV_PIX_FMT_NV12:
570  c->chrToYV12 = ff_nv12ToUV_mmx;
571  break;
572  case AV_PIX_FMT_NV21:
573  c->chrToYV12 = ff_nv21ToUV_mmx;
574  break;
575  case_rgb(rgb24, RGB24, mmx);
576  case_rgb(bgr24, BGR24, mmx);
577  case_rgb(bgra, BGRA, mmx);
578  case_rgb(rgba, RGBA, mmx);
579  case_rgb(abgr, ABGR, mmx);
580  case_rgb(argb, ARGB, mmx);
581  default:
582  break;
583  }
584  }
585  if (EXTERNAL_MMXEXT(cpu_flags)) {
586  ASSIGN_VSCALEX_FUNC(c->yuv2planeX, mmxext, , 1);
587  }
588 #endif /* ARCH_X86_32 */
589 #define ASSIGN_SSE_SCALE_FUNC(hscalefn, filtersize, opt1, opt2) \
590  switch (filtersize) { \
591  case 4: ASSIGN_SCALE_FUNC2(hscalefn, 4, opt1, opt2); break; \
592  case 8: ASSIGN_SCALE_FUNC2(hscalefn, 8, opt1, opt2); break; \
593  default: if (filtersize & 4) ASSIGN_SCALE_FUNC2(hscalefn, X4, opt1, opt2); \
594  else ASSIGN_SCALE_FUNC2(hscalefn, X8, opt1, opt2); \
595  break; \
596  }
597  if (EXTERNAL_SSE2(cpu_flags)) {
598  ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse2, sse2);
599  ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse2, sse2);
600  ASSIGN_VSCALEX_FUNC(c->yuv2planeX, sse2, ,
601  HAVE_ALIGNED_STACK || ARCH_X86_64);
602  ASSIGN_VSCALE_FUNC(c->yuv2plane1, sse2, sse2, 1);
603 
604  switch (c->srcFormat) {
605  case AV_PIX_FMT_YA8:
606  c->lumToYV12 = ff_yuyvToY_sse2;
607  if (c->needAlpha)
608  c->alpToYV12 = ff_uyvyToY_sse2;
609  break;
610  case AV_PIX_FMT_YUYV422:
611  c->lumToYV12 = ff_yuyvToY_sse2;
612  c->chrToYV12 = ff_yuyvToUV_sse2;
613  break;
614  case AV_PIX_FMT_UYVY422:
615  c->lumToYV12 = ff_uyvyToY_sse2;
616  c->chrToYV12 = ff_uyvyToUV_sse2;
617  break;
618  case AV_PIX_FMT_NV12:
619  c->chrToYV12 = ff_nv12ToUV_sse2;
620  break;
621  case AV_PIX_FMT_NV21:
622  c->chrToYV12 = ff_nv21ToUV_sse2;
623  break;
624  case_rgb(rgb24, RGB24, sse2);
625  case_rgb(bgr24, BGR24, sse2);
626  case_rgb(bgra, BGRA, sse2);
627  case_rgb(rgba, RGBA, sse2);
628  case_rgb(abgr, ABGR, sse2);
629  case_rgb(argb, ARGB, sse2);
630  default:
631  break;
632  }
633  }
634  if (EXTERNAL_SSSE3(cpu_flags)) {
635  ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, ssse3, ssse3);
636  ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, ssse3, ssse3);
637  switch (c->srcFormat) {
638  case_rgb(rgb24, RGB24, ssse3);
639  case_rgb(bgr24, BGR24, ssse3);
640  default:
641  break;
642  }
643  }
644  if (EXTERNAL_SSE4(cpu_flags)) {
645  /* Xto15 don't need special sse4 functions */
646  ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse4, ssse3);
647  ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse4, ssse3);
648  ASSIGN_VSCALEX_FUNC(c->yuv2planeX, sse4,
649  if (!isBE(c->dstFormat)) c->yuv2planeX = ff_yuv2planeX_16_sse4,
650  HAVE_ALIGNED_STACK || ARCH_X86_64);
651  if (c->dstBpc == 16 && !isBE(c->dstFormat))
652  c->yuv2plane1 = ff_yuv2plane1_16_sse4;
653  }
654 
655  if (EXTERNAL_AVX(cpu_flags)) {
656  ASSIGN_VSCALEX_FUNC(c->yuv2planeX, avx, ,
657  HAVE_ALIGNED_STACK || ARCH_X86_64);
658  ASSIGN_VSCALE_FUNC(c->yuv2plane1, avx, avx, 1);
659 
660  switch (c->srcFormat) {
661  case AV_PIX_FMT_YUYV422:
662  c->chrToYV12 = ff_yuyvToUV_avx;
663  break;
664  case AV_PIX_FMT_UYVY422:
665  c->chrToYV12 = ff_uyvyToUV_avx;
666  break;
667  case AV_PIX_FMT_NV12:
668  c->chrToYV12 = ff_nv12ToUV_avx;
669  break;
670  case AV_PIX_FMT_NV21:
671  c->chrToYV12 = ff_nv21ToUV_avx;
672  break;
673  case_rgb(rgb24, RGB24, avx);
674  case_rgb(bgr24, BGR24, avx);
675  case_rgb(bgra, BGRA, avx);
676  case_rgb(rgba, RGBA, avx);
677  case_rgb(abgr, ABGR, avx);
678  case_rgb(argb, ARGB, avx);
679  default:
680  break;
681  }
682  }
683 
684 #if ARCH_X86_64
685 #define ASSIGN_AVX2_SCALE_FUNC(hscalefn, filtersize) \
686  switch (filtersize) { \
687  case 4: hscalefn = ff_hscale8to15_4_avx2; break; \
688  default: hscalefn = ff_hscale8to15_X4_avx2; break; \
689  break; \
690  }
691 
693  if ((c->srcBpc == 8) && (c->dstBpc <= 14)) {
694  if (c->chrDstW % 16 == 0)
695  ASSIGN_AVX2_SCALE_FUNC(c->hcScale, c->hChrFilterSize);
696  if (c->dstW % 16 == 0)
697  ASSIGN_AVX2_SCALE_FUNC(c->hyScale, c->hLumFilterSize);
698  }
699  }
700 
702  switch (c->dstFormat) {
703  case AV_PIX_FMT_NV12:
704  case AV_PIX_FMT_NV24:
705  c->yuv2nv12cX = ff_yuv2nv12cX_avx2;
706  break;
707  case AV_PIX_FMT_NV21:
708  case AV_PIX_FMT_NV42:
709  c->yuv2nv12cX = ff_yuv2nv21cX_avx2;
710  break;
711  default:
712  break;
713  }
714  }
715 
716 
717 #define INPUT_PLANER_RGB_A_FUNC_CASE(fmt, name, opt) \
718  case fmt: \
719  c->readAlpPlanar = ff_planar_##name##_to_a_##opt;
720 
721 #define INPUT_PLANER_RGBA_YUV_FUNC_CASE(rgb_fmt, rgba_fmt, name, opt) \
722  case rgba_fmt: \
723  case rgb_fmt: \
724  c->readLumPlanar = ff_planar_##name##_to_y_##opt; \
725  c->readChrPlanar = ff_planar_##name##_to_uv_##opt; \
726  break;
727 
728 #define INPUT_PLANER_RGB_YUV_FUNC_CASE(fmt, name, opt) \
729  case fmt: \
730  c->readLumPlanar = ff_planar_##name##_to_y_##opt; \
731  c->readChrPlanar = ff_planar_##name##_to_uv_##opt; \
732  break;
733 
734 #define INPUT_PLANER_RGB_UV_FUNC_CASE(fmt, name, opt) \
735  case fmt: \
736  c->readChrPlanar = ff_planar_##name##_to_uv_##opt; \
737  break;
738 
739 #define INPUT_PLANER_RGBAXX_YUVA_FUNC_CASE(rgb_fmt, rgba_fmt, name, opt) \
740  INPUT_PLANER_RGB_A_FUNC_CASE(rgba_fmt##LE, name##le, opt) \
741  INPUT_PLANER_RGB_YUV_FUNC_CASE(rgb_fmt##LE, name##le, opt) \
742  INPUT_PLANER_RGB_A_FUNC_CASE(rgba_fmt##BE, name##be, opt) \
743  INPUT_PLANER_RGB_YUV_FUNC_CASE(rgb_fmt##BE, name##be, opt)
744 
745 #define INPUT_PLANER_RGBAXX_UVA_FUNC_CASE(rgb_fmt, rgba_fmt, name, opt) \
746  INPUT_PLANER_RGB_A_FUNC_CASE(rgba_fmt##LE, name##le, opt) \
747  INPUT_PLANER_RGB_UV_FUNC_CASE(rgb_fmt##LE, name##le, opt) \
748  INPUT_PLANER_RGB_A_FUNC_CASE(rgba_fmt##BE, name##be, opt) \
749  INPUT_PLANER_RGB_UV_FUNC_CASE(rgb_fmt##BE, name##be, opt)
750 
751 #define INPUT_PLANER_RGBAXX_YUV_FUNC_CASE(rgb_fmt, rgba_fmt, name, opt) \
752  INPUT_PLANER_RGBA_YUV_FUNC_CASE(rgb_fmt##LE, rgba_fmt##LE, name##le, opt) \
753  INPUT_PLANER_RGBA_YUV_FUNC_CASE(rgb_fmt##BE, rgba_fmt##BE, name##be, opt)
754 
755 #define INPUT_PLANER_RGBXX_YUV_FUNC_CASE(rgb_fmt, name, opt) \
756  INPUT_PLANER_RGB_YUV_FUNC_CASE(rgb_fmt##LE, name##le, opt) \
757  INPUT_PLANER_RGB_YUV_FUNC_CASE(rgb_fmt##BE, name##be, opt)
758 
759 #define INPUT_PLANER_RGBXX_UV_FUNC_CASE(rgb_fmt, name, opt) \
760  INPUT_PLANER_RGB_UV_FUNC_CASE(rgb_fmt##LE, name##le, opt) \
761  INPUT_PLANER_RGB_UV_FUNC_CASE(rgb_fmt##BE, name##be, opt)
762 
763 #define INPUT_PLANER_RGB_YUVA_ALL_CASES(opt) \
764  INPUT_PLANER_RGB_A_FUNC_CASE( AV_PIX_FMT_GBRAP, rgb, opt) \
765  INPUT_PLANER_RGB_YUV_FUNC_CASE( AV_PIX_FMT_GBRP, rgb, opt) \
766  INPUT_PLANER_RGBXX_YUV_FUNC_CASE( AV_PIX_FMT_GBRP9, rgb9, opt) \
767  INPUT_PLANER_RGBAXX_YUVA_FUNC_CASE(AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRAP10, rgb10, opt) \
768  INPUT_PLANER_RGBAXX_YUVA_FUNC_CASE(AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRAP12, rgb12, opt) \
769  INPUT_PLANER_RGBXX_YUV_FUNC_CASE( AV_PIX_FMT_GBRP14, rgb14, opt) \
770  INPUT_PLANER_RGBAXX_YUVA_FUNC_CASE(AV_PIX_FMT_GBRP16, AV_PIX_FMT_GBRAP16, rgb16, opt) \
771  INPUT_PLANER_RGBAXX_YUVA_FUNC_CASE(AV_PIX_FMT_GBRPF32, AV_PIX_FMT_GBRAPF32, rgbf32, opt)
772 
773 
774  if (EXTERNAL_SSE2(cpu_flags)) {
775  switch (c->srcFormat) {
776  INPUT_PLANER_RGB_A_FUNC_CASE( AV_PIX_FMT_GBRAP, rgb, sse2);
777  INPUT_PLANER_RGB_UV_FUNC_CASE( AV_PIX_FMT_GBRP, rgb, sse2);
778  INPUT_PLANER_RGBXX_UV_FUNC_CASE( AV_PIX_FMT_GBRP9, rgb9, sse2);
779  INPUT_PLANER_RGBAXX_UVA_FUNC_CASE( AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRAP10, rgb10, sse2);
780  INPUT_PLANER_RGBAXX_UVA_FUNC_CASE( AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRAP12, rgb12, sse2);
781  INPUT_PLANER_RGBXX_UV_FUNC_CASE( AV_PIX_FMT_GBRP14, rgb14, sse2);
782  INPUT_PLANER_RGBAXX_UVA_FUNC_CASE( AV_PIX_FMT_GBRP16, AV_PIX_FMT_GBRAP16, rgb16, sse2);
783  INPUT_PLANER_RGBAXX_YUVA_FUNC_CASE(AV_PIX_FMT_GBRPF32, AV_PIX_FMT_GBRAPF32, rgbf32, sse2);
784  default:
785  break;
786  }
787  }
788 
789  if (EXTERNAL_SSE4(cpu_flags)) {
790  switch (c->srcFormat) {
791  case AV_PIX_FMT_GBRAP:
792  INPUT_PLANER_RGB_YUV_FUNC_CASE( AV_PIX_FMT_GBRP, rgb, sse4);
793  INPUT_PLANER_RGBXX_YUV_FUNC_CASE( AV_PIX_FMT_GBRP9, rgb9, sse4);
794  INPUT_PLANER_RGBAXX_YUV_FUNC_CASE( AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRAP10, rgb10, sse4);
795  INPUT_PLANER_RGBAXX_YUV_FUNC_CASE( AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRAP12, rgb12, sse4);
796  INPUT_PLANER_RGBXX_YUV_FUNC_CASE( AV_PIX_FMT_GBRP14, rgb14, sse4);
797  INPUT_PLANER_RGBAXX_YUV_FUNC_CASE( AV_PIX_FMT_GBRP16, AV_PIX_FMT_GBRAP16, rgb16, sse4);
798  INPUT_PLANER_RGBAXX_YUVA_FUNC_CASE(AV_PIX_FMT_GBRPF32, AV_PIX_FMT_GBRAPF32, rgbf32, sse4);
799  default:
800  break;
801  }
802  }
803 
805  switch (c->srcFormat) {
806  INPUT_PLANER_RGB_YUVA_ALL_CASES(avx2)
807  default:
808  break;
809  }
810  }
811 
812  if(c->flags & SWS_FULL_CHR_H_INT) {
813 
814  /* yuv2gbrp uses the SwsContext for yuv coefficients
815  if struct offsets change the asm needs to be updated too */
816  av_assert0(offsetof(SwsContext, yuv2rgb_y_offset) == 40292);
817 
818 #define YUV2ANYX_FUNC_CASE(fmt, name, opt) \
819  case fmt: \
820  c->yuv2anyX = ff_yuv2##name##_full_X_##opt; \
821  break;
822 
823 #define YUV2ANYX_GBRAP_CASES(opt) \
824  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP, gbrp, opt) \
825  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAP, gbrap, opt) \
826  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP9LE, gbrp9le, opt) \
827  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP10LE, gbrp10le, opt) \
828  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAP10LE, gbrap10le, opt) \
829  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP12LE, gbrp12le, opt) \
830  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAP12LE, gbrap12le, opt) \
831  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP14LE, gbrp14le, opt) \
832  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP16LE, gbrp16le, opt) \
833  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAP16LE, gbrap16le, opt) \
834  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRPF32LE, gbrpf32le, opt) \
835  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAPF32LE, gbrapf32le, opt) \
836  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP9BE, gbrp9be, opt) \
837  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP10BE, gbrp10be, opt) \
838  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAP10BE, gbrap10be, opt) \
839  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP12BE, gbrp12be, opt) \
840  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAP12BE, gbrap12be, opt) \
841  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP14BE, gbrp14be, opt) \
842  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRP16BE, gbrp16be, opt) \
843  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAP16BE, gbrap16be, opt) \
844  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRPF32BE, gbrpf32be, opt) \
845  YUV2ANYX_FUNC_CASE(AV_PIX_FMT_GBRAPF32BE, gbrapf32be, opt)
846 
847  if (EXTERNAL_SSE2(cpu_flags)) {
848  switch (c->dstFormat) {
849  YUV2ANYX_GBRAP_CASES(sse2)
850  default:
851  break;
852  }
853  }
854 
855  if (EXTERNAL_SSE4(cpu_flags)) {
856  switch (c->dstFormat) {
857  YUV2ANYX_GBRAP_CASES(sse4)
858  default:
859  break;
860  }
861  }
862 
864  switch (c->dstFormat) {
865  YUV2ANYX_GBRAP_CASES(avx2)
866  default:
867  break;
868  }
869  }
870  }
871 
872 #endif
873 }
INLINE_MMX
#define INLINE_MMX(flags)
Definition: cpu.h:86
SwsContext::vLumFilterSize
int vLumFilterSize
Vertical filter size for luma/alpha pixels.
Definition: swscale_internal.h:419
AV_PIX_FMT_GBRAP16
#define AV_PIX_FMT_GBRAP16
Definition: pixfmt.h:426
ASSIGN_MMX_SCALE_FUNC
#define ASSIGN_MMX_SCALE_FUNC(hscalefn, filtersize, opt1, opt2)
APCK_PTR2
#define APCK_PTR2
Definition: swscale_internal.h:61
cpu.h
SwsPlane::line
uint8_t ** line
line buffer
Definition: swscale_internal.h:1061
AV_PIX_FMT_YA8
@ AV_PIX_FMT_YA8
8 bits gray, 8 bits alpha
Definition: pixfmt.h:133
mem_internal.h
DECLARE_ASM_CONST
#define DECLARE_ASM_CONST(n, t, v)
Definition: mem.h:118
SwsContext::dstY
int dstY
Last destination vertical line output from last slice.
Definition: swscale_internal.h:431
av_unused
#define av_unused
Definition: attributes.h:131
EXTERNAL_AVX2_FAST
#define EXTERNAL_AVX2_FAST(flags)
Definition: cpu.h:79
pixdesc.h
SwsContext::vChrFilter
int16_t * vChrFilter
Array of vertical filter coefficients for chroma planes.
Definition: swscale_internal.h:412
SwsContext::lumMmxFilter
int32_t lumMmxFilter[4 *MAX_FILTER_SIZE]
Definition: swscale_internal.h:513
SwsContext::vLumFilter
int16_t * vLumFilter
Array of vertical filter coefficients for luma/alpha planes.
Definition: swscale_internal.h:411
av_get_cpu_flags
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:98
DECLARE_ASM_ALIGNED
#define DECLARE_ASM_ALIGNED(n, t, v)
Definition: mem.h:117
cpu_flags
static atomic_int cpu_flags
Definition: cpu.c:50
INPUT_FUNCS
#define INPUT_FUNCS(opt)
Definition: swscale.c:328
rgb
Definition: rpzaenc.c:59
AV_PIX_FMT_GBRP14
#define AV_PIX_FMT_GBRP14
Definition: pixfmt.h:422
AV_PIX_FMT_GBRAP
@ AV_PIX_FMT_GBRAP
planar GBRA 4:4:4:4 32bpp
Definition: pixfmt.h:205
U
#define U(x)
Definition: vp56_arith.h:37
AV_PIX_FMT_GBRP10
#define AV_PIX_FMT_GBRP10
Definition: pixfmt.h:420
AV_CPU_FLAG_SLOW_GATHER
#define AV_CPU_FLAG_SLOW_GATHER
CPU has slow gathers.
Definition: cpu.h:57
avassert.h
av_cold
#define av_cold
Definition: attributes.h:90
AV_PIX_FMT_GBRAP10
#define AV_PIX_FMT_GBRAP10
Definition: pixfmt.h:424
intreadwrite.h
s
#define s(width, name)
Definition: cbs_vp9.c:257
AV_PIX_FMT_GBRAP12
#define AV_PIX_FMT_GBRAP12
Definition: pixfmt.h:425
av_assert0
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:37
APCK_COEF
#define APCK_COEF
Definition: swscale_internal.h:62
SwsPlane::tmp
uint8_t ** tmp
Tmp line buffer used by mmx code.
Definition: swscale_internal.h:1062
SCALE_FUNC
#define SCALE_FUNC(filter_n, from_bpc, to_bpc, opt)
Definition: swscale.c:240
VSCALE_FUNCS
#define VSCALE_FUNCS(opt1, opt2)
Definition: swscale.c:302
SwsContext::vLumFilterPos
int32_t * vLumFilterPos
Array of vertical filter starting positions for each dst[i] for luma/alpha planes.
Definition: swscale_internal.h:415
ASSIGN_VSCALE_FUNC
#define ASSIGN_VSCALE_FUNC(vscalefn, opt1, opt2, opt2chk)
SwsContext::yuv2rgb_y_offset
int yuv2rgb_y_offset
Definition: swscale_internal.h:468
if
if(ret)
Definition: filter_design.txt:179
VSCALEX_FUNC
#define VSCALEX_FUNC(size, opt)
Definition: swscale.c:282
AV_PIX_FMT_GBRP16
#define AV_PIX_FMT_GBRP16
Definition: pixfmt.h:423
NULL
#define NULL
Definition: coverity.c:32
ASSIGN_SSE_SCALE_FUNC
#define ASSIGN_SSE_SCALE_FUNC(hscalefn, filtersize, opt1, opt2)
ff_sws_init_swscale_x86
av_cold void ff_sws_init_swscale_x86(SwsContext *c)
Definition: swscale.c:469
AV_PIX_FMT_YUYV422
@ AV_PIX_FMT_YUYV422
packed YUV 4:2:2, 16bpp, Y0 Cb Y1 Cr
Definition: pixfmt.h:67
EXTERNAL_SSE3
#define EXTERNAL_SSE3(flags)
Definition: cpu.h:62
SwsPlane
Slice plane.
Definition: swscale_internal.h:1056
ASSIGN_VSCALEX_FUNC
#define ASSIGN_VSCALEX_FUNC(vscalefn, opt, do_16_case, condition_8bit)
SwsContext::alpMmxFilter
int32_t alpMmxFilter[4 *MAX_FILTER_SIZE]
Definition: swscale_internal.h:521
AV_PIX_FMT_GBRP9
#define AV_PIX_FMT_GBRP9
Definition: pixfmt.h:419
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
SwsContext::vChrFilterPos
int32_t * vChrFilterPos
Array of vertical filter starting positions for each dst[i] for chroma planes.
Definition: swscale_internal.h:416
isBE
static av_always_inline int isBE(enum AVPixelFormat pix_fmt)
Definition: swscale_internal.h:717
cpu.h
for
for(k=2;k<=8;++k)
Definition: h264pred_template.c:425
SWS_FULL_CHR_H_INT
#define SWS_FULL_CHR_H_INT
Definition: swscale.h:80
AV_PIX_FMT_GBRPF32
#define AV_PIX_FMT_GBRPF32
Definition: pixfmt.h:433
AV_PIX_FMT_BGR555
#define AV_PIX_FMT_BGR555
Definition: pixfmt.h:397
attributes.h
SwsContext::vChrFilterSize
int vChrFilterSize
Vertical filter size for chroma pixels.
Definition: swscale_internal.h:420
EXTERNAL_SSE2
#define EXTERNAL_SSE2(flags)
Definition: cpu.h:59
SWS_ACCURATE_RND
#define SWS_ACCURATE_RND
Definition: swscale.h:84
DECLARE_ALIGNED
#define DECLARE_ALIGNED(n, t, v)
Definition: mem.h:116
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:271
AV_PIX_FMT_GBRP12
#define AV_PIX_FMT_GBRP12
Definition: pixfmt.h:421
AV_PIX_FMT_NV24
@ AV_PIX_FMT_NV24
planar YUV 4:4:4, 24bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
Definition: pixfmt.h:338
AV_PIX_FMT_RGB555
#define AV_PIX_FMT_RGB555
Definition: pixfmt.h:392
swscale_internal.h
FFMIN
#define FFMIN(a, b)
Definition: macros.h:49
AV_PIX_FMT_NV21
@ AV_PIX_FMT_NV21
as above, but U and V bytes are swapped
Definition: pixfmt.h:90
AV_PIX_FMT_NV42
@ AV_PIX_FMT_NV42
as above, but U and V bytes are swapped
Definition: pixfmt.h:339
swscale_template.c
ff_dither8
const uint64_t ff_dither8[2]
Definition: swscale.c:37
AV_PIX_FMT_NV12
@ AV_PIX_FMT_NV12
planar YUV 4:2:0, 12bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
Definition: pixfmt.h:89
EXTERNAL_AVX
#define EXTERNAL_AVX(flags)
Definition: cpu.h:70
AV_PIX_FMT_UYVY422
@ AV_PIX_FMT_UYVY422
packed YUV 4:2:2, 16bpp, Cb Y0 Cr Y1
Definition: pixfmt.h:81
EXTERNAL_SSE4
#define EXTERNAL_SSE4(flags)
Definition: cpu.h:68
AV_PIX_FMT_GBRAPF32
#define AV_PIX_FMT_GBRAPF32
Definition: pixfmt.h:434
YUV2YUVX_FUNC_MMX
#define YUV2YUVX_FUNC_MMX(opt, step)
Definition: swscale.c:192
AV_CPU_FLAG_MMXEXT
#define AV_CPU_FLAG_MMXEXT
SSE integer functions or AMD MMX ext.
Definition: cpu.h:30
INLINE_MMXEXT
#define INLINE_MMXEXT(flags)
Definition: cpu.h:87
SwsContext::chrMmxFilter
int32_t chrMmxFilter[4 *MAX_FILTER_SIZE]
Definition: swscale_internal.h:514
AV_PIX_FMT_GBRP
@ AV_PIX_FMT_GBRP
planar GBR 4:4:4 24bpp
Definition: pixfmt.h:158
SwsPlane::sliceY
int sliceY
index of first line
Definition: swscale_internal.h:1059
VSCALE_FUNC
#define VSCALE_FUNC(size, opt)
Definition: swscale.c:299
case_rgb
#define case_rgb(x, X, opt)
int32_t
int32_t
Definition: audioconvert.c:56
RGBA
#define RGBA(r, g, b, a)
Definition: dvbsubdec.c:40
flags
#define flags(name, subs,...)
Definition: cbs_av1.c:561
YUV2YUVX_FUNC
#define YUV2YUVX_FUNC(opt, step)
Definition: swscale.c:205
SCALE_FUNCS_MMX
#define SCALE_FUNCS_MMX(opt)
Definition: swscale.c:261
ff_updateMMXDitherTables
void ff_updateMMXDitherTables(SwsContext *c, int dstY)
EXTERNAL_SSSE3
#define EXTERNAL_SSSE3(flags)
Definition: cpu.h:65
SwsContext
Definition: swscale_internal.h:300
EXTERNAL_MMX
#define EXTERNAL_MMX(flags)
Definition: cpu.h:56
SCALE_FUNCS_SSE
#define SCALE_FUNCS_SSE(opt)
Definition: swscale.c:266
SwsContext::dstH
int dstH
Height of destination luma/alpha planes.
Definition: swscale_internal.h:324
APCK_SIZE
#define APCK_SIZE
Definition: swscale_internal.h:63
VSCALEX_FUNCS
#define VSCALEX_FUNCS(opt)
Definition: swscale.c:286
EXTERNAL_MMXEXT
#define EXTERNAL_MMXEXT(flags)
Definition: cpu.h:57
swscale.h
ff_dither4
const uint64_t ff_dither4[2]
Definition: swscale.c:33