FFmpeg
swscale.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include <inttypes.h>
22 #include "config.h"
23 #include "libswscale/swscale.h"
25 #include "libavutil/attributes.h"
26 #include "libavutil/avassert.h"
27 #include "libavutil/intreadwrite.h"
28 #include "libavutil/x86/cpu.h"
29 #include "libavutil/cpu.h"
30 #include "libavutil/pixdesc.h"
31 
32 const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = {
33  0x0103010301030103LL,
34  0x0200020002000200LL,};
35 
36 const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = {
37  0x0602060206020602LL,
38  0x0004000400040004LL,};
39 
40 #if HAVE_INLINE_ASM
41 
42 #define DITHER1XBPP
43 
44 DECLARE_ASM_CONST(8, uint64_t, bF8)= 0xF8F8F8F8F8F8F8F8LL;
45 DECLARE_ASM_CONST(8, uint64_t, bFC)= 0xFCFCFCFCFCFCFCFCLL;
46 DECLARE_ASM_CONST(8, uint64_t, w10)= 0x0010001000100010LL;
47 DECLARE_ASM_CONST(8, uint64_t, w02)= 0x0002000200020002LL;
48 
49 DECLARE_ASM_CONST(8, uint64_t, b16Mask)= 0x001F001F001F001FLL;
50 DECLARE_ASM_CONST(8, uint64_t, g16Mask)= 0x07E007E007E007E0LL;
51 DECLARE_ASM_CONST(8, uint64_t, r16Mask)= 0xF800F800F800F800LL;
52 DECLARE_ASM_CONST(8, uint64_t, b15Mask)= 0x001F001F001F001FLL;
53 DECLARE_ASM_CONST(8, uint64_t, g15Mask)= 0x03E003E003E003E0LL;
54 DECLARE_ASM_CONST(8, uint64_t, r15Mask)= 0x7C007C007C007C00LL;
55 
56 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_M24A) = 0x00FF0000FF0000FFLL;
57 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_M24B) = 0xFF0000FF0000FF00LL;
58 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_M24C) = 0x0000FF0000FF0000LL;
59 
60 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_bgr2YOffset) = 0x1010101010101010ULL;
61 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_bgr2UVOffset) = 0x8080808080808080ULL;
62 DECLARE_ASM_ALIGNED(8, const uint64_t, ff_w1111) = 0x0001000100010001ULL;
63 
64 
65 //MMX versions
66 #if HAVE_MMX_INLINE
67 #undef RENAME
68 #define COMPILE_TEMPLATE_MMXEXT 0
69 #define RENAME(a) a ## _mmx
70 #include "swscale_template.c"
71 #endif
72 
73 // MMXEXT versions
74 #if HAVE_MMXEXT_INLINE
75 #undef RENAME
76 #undef COMPILE_TEMPLATE_MMXEXT
77 #define COMPILE_TEMPLATE_MMXEXT 1
78 #define RENAME(a) a ## _mmxext
79 #include "swscale_template.c"
80 #endif
81 
83 {
84  const int dstH= c->dstH;
85  const int flags= c->flags;
86 
87  SwsPlane *lumPlane = &c->slice[c->numSlice-2].plane[0];
88  SwsPlane *chrUPlane = &c->slice[c->numSlice-2].plane[1];
89  SwsPlane *alpPlane = &c->slice[c->numSlice-2].plane[3];
90 
91  int hasAlpha = c->needAlpha;
92  int32_t *vLumFilterPos= c->vLumFilterPos;
93  int32_t *vChrFilterPos= c->vChrFilterPos;
94  int16_t *vLumFilter= c->vLumFilter;
95  int16_t *vChrFilter= c->vChrFilter;
96  int32_t *lumMmxFilter= c->lumMmxFilter;
97  int32_t *chrMmxFilter= c->chrMmxFilter;
98  int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
99  const int vLumFilterSize= c->vLumFilterSize;
100  const int vChrFilterSize= c->vChrFilterSize;
101  const int chrDstY= dstY>>c->chrDstVSubSample;
102  const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
103  const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
104 
105  c->blueDither= ff_dither8[dstY&1];
106  if (c->dstFormat == AV_PIX_FMT_RGB555 || c->dstFormat == AV_PIX_FMT_BGR555)
107  c->greenDither= ff_dither8[dstY&1];
108  else
109  c->greenDither= ff_dither4[dstY&1];
110  c->redDither= ff_dither8[(dstY+1)&1];
111  if (dstY < dstH - 2) {
112  const int16_t **lumSrcPtr = (const int16_t **)(void*) lumPlane->line + firstLumSrcY - lumPlane->sliceY;
113  const int16_t **chrUSrcPtr = (const int16_t **)(void*) chrUPlane->line + firstChrSrcY - chrUPlane->sliceY;
114  const int16_t **alpSrcPtr = (CONFIG_SWSCALE_ALPHA && hasAlpha) ? (const int16_t **)(void*) alpPlane->line + firstLumSrcY - alpPlane->sliceY : NULL;
115 
116  int i;
117  if (firstLumSrcY < 0 || firstLumSrcY + vLumFilterSize > c->srcH) {
118  const int16_t **tmpY = (const int16_t **) lumPlane->tmp;
119 
120  int neg = -firstLumSrcY, i, end = FFMIN(c->srcH - firstLumSrcY, vLumFilterSize);
121  for (i = 0; i < neg; i++)
122  tmpY[i] = lumSrcPtr[neg];
123  for ( ; i < end; i++)
124  tmpY[i] = lumSrcPtr[i];
125  for ( ; i < vLumFilterSize; i++)
126  tmpY[i] = tmpY[i-1];
127  lumSrcPtr = tmpY;
128 
129  if (alpSrcPtr) {
130  const int16_t **tmpA = (const int16_t **) alpPlane->tmp;
131  for (i = 0; i < neg; i++)
132  tmpA[i] = alpSrcPtr[neg];
133  for ( ; i < end; i++)
134  tmpA[i] = alpSrcPtr[i];
135  for ( ; i < vLumFilterSize; i++)
136  tmpA[i] = tmpA[i - 1];
137  alpSrcPtr = tmpA;
138  }
139  }
140  if (firstChrSrcY < 0 || firstChrSrcY + vChrFilterSize > c->chrSrcH) {
141  const int16_t **tmpU = (const int16_t **) chrUPlane->tmp;
142  int neg = -firstChrSrcY, i, end = FFMIN(c->chrSrcH - firstChrSrcY, vChrFilterSize);
143  for (i = 0; i < neg; i++) {
144  tmpU[i] = chrUSrcPtr[neg];
145  }
146  for ( ; i < end; i++) {
147  tmpU[i] = chrUSrcPtr[i];
148  }
149  for ( ; i < vChrFilterSize; i++) {
150  tmpU[i] = tmpU[i - 1];
151  }
152  chrUSrcPtr = tmpU;
153  }
154 
155  if (flags & SWS_ACCURATE_RND) {
156  int s= APCK_SIZE / 8;
157  for (i=0; i<vLumFilterSize; i+=2) {
158  *(const void**)&lumMmxFilter[s*i ]= lumSrcPtr[i ];
159  *(const void**)&lumMmxFilter[s*i+APCK_PTR2/4 ]= lumSrcPtr[i+(vLumFilterSize>1)];
162  + (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1] * (1 << 16) : 0);
163  if (CONFIG_SWSCALE_ALPHA && hasAlpha) {
164  *(const void**)&alpMmxFilter[s*i ]= alpSrcPtr[i ];
165  *(const void**)&alpMmxFilter[s*i+APCK_PTR2/4 ]= alpSrcPtr[i+(vLumFilterSize>1)];
168  }
169  }
170  for (i=0; i<vChrFilterSize; i+=2) {
171  *(const void**)&chrMmxFilter[s*i ]= chrUSrcPtr[i ];
172  *(const void**)&chrMmxFilter[s*i+APCK_PTR2/4 ]= chrUSrcPtr[i+(vChrFilterSize>1)];
175  + (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize + i + 1] * (1 << 16) : 0);
176  }
177  } else {
178  for (i=0; i<vLumFilterSize; i++) {
179  *(const void**)&lumMmxFilter[4*i+0]= lumSrcPtr[i];
180  lumMmxFilter[4*i+2]=
181  lumMmxFilter[4*i+3]=
182  ((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001U;
183  if (CONFIG_SWSCALE_ALPHA && hasAlpha) {
184  *(const void**)&alpMmxFilter[4*i+0]= alpSrcPtr[i];
185  alpMmxFilter[4*i+2]=
186  alpMmxFilter[4*i+3]= lumMmxFilter[4*i+2];
187  }
188  }
189  for (i=0; i<vChrFilterSize; i++) {
190  *(const void**)&chrMmxFilter[4*i+0]= chrUSrcPtr[i];
191  chrMmxFilter[4*i+2]=
192  chrMmxFilter[4*i+3]=
193  ((uint16_t)vChrFilter[chrDstY*vChrFilterSize + i])*0x10001U;
194  }
195  }
196  }
197 }
198 
199 #if HAVE_MMXEXT
200 static void yuv2yuvX_sse3(const int16_t *filter, int filterSize,
201  const int16_t **src, uint8_t *dest, int dstW,
202  const uint8_t *dither, int offset)
203 {
204  if(((uintptr_t)dest) & 15){
205  yuv2yuvX_mmxext(filter, filterSize, src, dest, dstW, dither, offset);
206  return;
207  }
208  filterSize--;
209 #define MAIN_FUNCTION \
210  "pxor %%xmm0, %%xmm0 \n\t" \
211  "punpcklbw %%xmm0, %%xmm3 \n\t" \
212  "movd %4, %%xmm1 \n\t" \
213  "punpcklwd %%xmm1, %%xmm1 \n\t" \
214  "punpckldq %%xmm1, %%xmm1 \n\t" \
215  "punpcklqdq %%xmm1, %%xmm1 \n\t" \
216  "psllw $3, %%xmm1 \n\t" \
217  "paddw %%xmm1, %%xmm3 \n\t" \
218  "psraw $4, %%xmm3 \n\t" \
219  "movdqa %%xmm3, %%xmm4 \n\t" \
220  "movdqa %%xmm3, %%xmm7 \n\t" \
221  "movl %3, %%ecx \n\t" \
222  "mov %0, %%"FF_REG_d" \n\t"\
223  "mov (%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
224  ".p2align 4 \n\t" /* FIXME Unroll? */\
225  "1: \n\t"\
226  "movddup 8(%%"FF_REG_d"), %%xmm0 \n\t" /* filterCoeff */\
227  "movdqa (%%"FF_REG_S", %%"FF_REG_c", 2), %%xmm2 \n\t" /* srcData */\
228  "movdqa 16(%%"FF_REG_S", %%"FF_REG_c", 2), %%xmm5 \n\t" /* srcData */\
229  "add $16, %%"FF_REG_d" \n\t"\
230  "mov (%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
231  "test %%"FF_REG_S", %%"FF_REG_S" \n\t"\
232  "pmulhw %%xmm0, %%xmm2 \n\t"\
233  "pmulhw %%xmm0, %%xmm5 \n\t"\
234  "paddw %%xmm2, %%xmm3 \n\t"\
235  "paddw %%xmm5, %%xmm4 \n\t"\
236  " jnz 1b \n\t"\
237  "psraw $3, %%xmm3 \n\t"\
238  "psraw $3, %%xmm4 \n\t"\
239  "packuswb %%xmm4, %%xmm3 \n\t"\
240  "movntdq %%xmm3, (%1, %%"FF_REG_c") \n\t"\
241  "add $16, %%"FF_REG_c" \n\t"\
242  "cmp %2, %%"FF_REG_c" \n\t"\
243  "movdqa %%xmm7, %%xmm3 \n\t" \
244  "movdqa %%xmm7, %%xmm4 \n\t" \
245  "mov %0, %%"FF_REG_d" \n\t"\
246  "mov (%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
247  "jb 1b \n\t"
248 
249  if (offset) {
250  __asm__ volatile(
251  "movq %5, %%xmm3 \n\t"
252  "movdqa %%xmm3, %%xmm4 \n\t"
253  "psrlq $24, %%xmm3 \n\t"
254  "psllq $40, %%xmm4 \n\t"
255  "por %%xmm4, %%xmm3 \n\t"
256  MAIN_FUNCTION
257  :: "g" (filter),
258  "r" (dest-offset), "g" ((x86_reg)(dstW+offset)), "m" (offset),
259  "m"(filterSize), "m"(((uint64_t *) dither)[0])
260  : XMM_CLOBBERS("%xmm0" , "%xmm1" , "%xmm2" , "%xmm3" , "%xmm4" , "%xmm5" , "%xmm7" ,)
261  "%"FF_REG_d, "%"FF_REG_S, "%"FF_REG_c
262  );
263  } else {
264  __asm__ volatile(
265  "movq %5, %%xmm3 \n\t"
266  MAIN_FUNCTION
267  :: "g" (filter),
268  "r" (dest-offset), "g" ((x86_reg)(dstW+offset)), "m" (offset),
269  "m"(filterSize), "m"(((uint64_t *) dither)[0])
270  : XMM_CLOBBERS("%xmm0" , "%xmm1" , "%xmm2" , "%xmm3" , "%xmm4" , "%xmm5" , "%xmm7" ,)
271  "%"FF_REG_d, "%"FF_REG_S, "%"FF_REG_c
272  );
273  }
274 }
275 #endif
276 
277 #endif /* HAVE_INLINE_ASM */
278 
279 #define SCALE_FUNC(filter_n, from_bpc, to_bpc, opt) \
280 void ff_hscale ## from_bpc ## to ## to_bpc ## _ ## filter_n ## _ ## opt( \
281  SwsContext *c, int16_t *data, \
282  int dstW, const uint8_t *src, \
283  const int16_t *filter, \
284  const int32_t *filterPos, int filterSize)
285 
286 #define SCALE_FUNCS(filter_n, opt) \
287  SCALE_FUNC(filter_n, 8, 15, opt); \
288  SCALE_FUNC(filter_n, 9, 15, opt); \
289  SCALE_FUNC(filter_n, 10, 15, opt); \
290  SCALE_FUNC(filter_n, 12, 15, opt); \
291  SCALE_FUNC(filter_n, 14, 15, opt); \
292  SCALE_FUNC(filter_n, 16, 15, opt); \
293  SCALE_FUNC(filter_n, 8, 19, opt); \
294  SCALE_FUNC(filter_n, 9, 19, opt); \
295  SCALE_FUNC(filter_n, 10, 19, opt); \
296  SCALE_FUNC(filter_n, 12, 19, opt); \
297  SCALE_FUNC(filter_n, 14, 19, opt); \
298  SCALE_FUNC(filter_n, 16, 19, opt)
299 
300 #define SCALE_FUNCS_MMX(opt) \
301  SCALE_FUNCS(4, opt); \
302  SCALE_FUNCS(8, opt); \
303  SCALE_FUNCS(X, opt)
304 
305 #define SCALE_FUNCS_SSE(opt) \
306  SCALE_FUNCS(4, opt); \
307  SCALE_FUNCS(8, opt); \
308  SCALE_FUNCS(X4, opt); \
309  SCALE_FUNCS(X8, opt)
310 
311 #if ARCH_X86_32
312 SCALE_FUNCS_MMX(mmx);
313 #endif
314 SCALE_FUNCS_SSE(sse2);
315 SCALE_FUNCS_SSE(ssse3);
316 SCALE_FUNCS_SSE(sse4);
317 
318 #define VSCALEX_FUNC(size, opt) \
319 void ff_yuv2planeX_ ## size ## _ ## opt(const int16_t *filter, int filterSize, \
320  const int16_t **src, uint8_t *dest, int dstW, \
321  const uint8_t *dither, int offset)
322 #define VSCALEX_FUNCS(opt) \
323  VSCALEX_FUNC(8, opt); \
324  VSCALEX_FUNC(9, opt); \
325  VSCALEX_FUNC(10, opt)
326 
327 #if ARCH_X86_32
328 VSCALEX_FUNCS(mmxext);
329 #endif
330 VSCALEX_FUNCS(sse2);
331 VSCALEX_FUNCS(sse4);
332 VSCALEX_FUNC(16, sse4);
333 VSCALEX_FUNCS(avx);
334 
335 #define VSCALE_FUNC(size, opt) \
336 void ff_yuv2plane1_ ## size ## _ ## opt(const int16_t *src, uint8_t *dst, int dstW, \
337  const uint8_t *dither, int offset)
338 #define VSCALE_FUNCS(opt1, opt2) \
339  VSCALE_FUNC(8, opt1); \
340  VSCALE_FUNC(9, opt2); \
341  VSCALE_FUNC(10, opt2); \
342  VSCALE_FUNC(16, opt1)
343 
344 #if ARCH_X86_32
345 VSCALE_FUNCS(mmx, mmxext);
346 #endif
347 VSCALE_FUNCS(sse2, sse2);
348 VSCALE_FUNC(16, sse4);
349 VSCALE_FUNCS(avx, avx);
350 
351 #define INPUT_Y_FUNC(fmt, opt) \
352 void ff_ ## fmt ## ToY_ ## opt(uint8_t *dst, const uint8_t *src, \
353  const uint8_t *unused1, const uint8_t *unused2, \
354  int w, uint32_t *unused)
355 #define INPUT_UV_FUNC(fmt, opt) \
356 void ff_ ## fmt ## ToUV_ ## opt(uint8_t *dstU, uint8_t *dstV, \
357  const uint8_t *unused0, \
358  const uint8_t *src1, \
359  const uint8_t *src2, \
360  int w, uint32_t *unused)
361 #define INPUT_FUNC(fmt, opt) \
362  INPUT_Y_FUNC(fmt, opt); \
363  INPUT_UV_FUNC(fmt, opt)
364 #define INPUT_FUNCS(opt) \
365  INPUT_FUNC(uyvy, opt); \
366  INPUT_FUNC(yuyv, opt); \
367  INPUT_UV_FUNC(nv12, opt); \
368  INPUT_UV_FUNC(nv21, opt); \
369  INPUT_FUNC(rgba, opt); \
370  INPUT_FUNC(bgra, opt); \
371  INPUT_FUNC(argb, opt); \
372  INPUT_FUNC(abgr, opt); \
373  INPUT_FUNC(rgb24, opt); \
374  INPUT_FUNC(bgr24, opt)
375 
376 #if ARCH_X86_32
377 INPUT_FUNCS(mmx);
378 #endif
379 INPUT_FUNCS(sse2);
380 INPUT_FUNCS(ssse3);
381 INPUT_FUNCS(avx);
382 
384 {
385  int cpu_flags = av_get_cpu_flags();
386 
387 #if HAVE_MMX_INLINE
388  if (INLINE_MMX(cpu_flags))
389  sws_init_swscale_mmx(c);
390 #endif
391 #if HAVE_MMXEXT_INLINE
393  sws_init_swscale_mmxext(c);
395  if(c->use_mmx_vfilter && !(c->flags & SWS_ACCURATE_RND))
396  c->yuv2planeX = yuv2yuvX_sse3;
397  }
398 #endif
399 
400 #define ASSIGN_SCALE_FUNC2(hscalefn, filtersize, opt1, opt2) do { \
401  if (c->srcBpc == 8) { \
402  hscalefn = c->dstBpc <= 14 ? ff_hscale8to15_ ## filtersize ## _ ## opt2 : \
403  ff_hscale8to19_ ## filtersize ## _ ## opt1; \
404  } else if (c->srcBpc == 9) { \
405  hscalefn = c->dstBpc <= 14 ? ff_hscale9to15_ ## filtersize ## _ ## opt2 : \
406  ff_hscale9to19_ ## filtersize ## _ ## opt1; \
407  } else if (c->srcBpc == 10) { \
408  hscalefn = c->dstBpc <= 14 ? ff_hscale10to15_ ## filtersize ## _ ## opt2 : \
409  ff_hscale10to19_ ## filtersize ## _ ## opt1; \
410  } else if (c->srcBpc == 12) { \
411  hscalefn = c->dstBpc <= 14 ? ff_hscale12to15_ ## filtersize ## _ ## opt2 : \
412  ff_hscale12to19_ ## filtersize ## _ ## opt1; \
413  } else if (c->srcBpc == 14 || ((c->srcFormat==AV_PIX_FMT_PAL8||isAnyRGB(c->srcFormat)) && av_pix_fmt_desc_get(c->srcFormat)->comp[0].depth<16)) { \
414  hscalefn = c->dstBpc <= 14 ? ff_hscale14to15_ ## filtersize ## _ ## opt2 : \
415  ff_hscale14to19_ ## filtersize ## _ ## opt1; \
416  } else { /* c->srcBpc == 16 */ \
417  av_assert0(c->srcBpc == 16);\
418  hscalefn = c->dstBpc <= 14 ? ff_hscale16to15_ ## filtersize ## _ ## opt2 : \
419  ff_hscale16to19_ ## filtersize ## _ ## opt1; \
420  } \
421 } while (0)
422 #define ASSIGN_MMX_SCALE_FUNC(hscalefn, filtersize, opt1, opt2) \
423  switch (filtersize) { \
424  case 4: ASSIGN_SCALE_FUNC2(hscalefn, 4, opt1, opt2); break; \
425  case 8: ASSIGN_SCALE_FUNC2(hscalefn, 8, opt1, opt2); break; \
426  default: ASSIGN_SCALE_FUNC2(hscalefn, X, opt1, opt2); break; \
427  }
428 #define ASSIGN_VSCALEX_FUNC(vscalefn, opt, do_16_case, condition_8bit) \
429 switch(c->dstBpc){ \
430  case 16: do_16_case; break; \
431  case 10: if (!isBE(c->dstFormat) && c->dstFormat != AV_PIX_FMT_P010LE) vscalefn = ff_yuv2planeX_10_ ## opt; break; \
432  case 9: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2planeX_9_ ## opt; break; \
433  case 8: if ((condition_8bit) && !c->use_mmx_vfilter) vscalefn = ff_yuv2planeX_8_ ## opt; break; \
434  }
435 #define ASSIGN_VSCALE_FUNC(vscalefn, opt1, opt2, opt2chk) \
436  switch(c->dstBpc){ \
437  case 16: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2plane1_16_ ## opt1; break; \
438  case 10: if (!isBE(c->dstFormat) && c->dstFormat != AV_PIX_FMT_P010LE && opt2chk) vscalefn = ff_yuv2plane1_10_ ## opt2; break; \
439  case 9: if (!isBE(c->dstFormat) && opt2chk) vscalefn = ff_yuv2plane1_9_ ## opt2; break; \
440  case 8: vscalefn = ff_yuv2plane1_8_ ## opt1; break; \
441  default: av_assert0(c->dstBpc>8); \
442  }
443 #define case_rgb(x, X, opt) \
444  case AV_PIX_FMT_ ## X: \
445  c->lumToYV12 = ff_ ## x ## ToY_ ## opt; \
446  if (!c->chrSrcHSubSample) \
447  c->chrToYV12 = ff_ ## x ## ToUV_ ## opt; \
448  break
449 #if ARCH_X86_32
450  if (EXTERNAL_MMX(cpu_flags)) {
451  ASSIGN_MMX_SCALE_FUNC(c->hyScale, c->hLumFilterSize, mmx, mmx);
452  ASSIGN_MMX_SCALE_FUNC(c->hcScale, c->hChrFilterSize, mmx, mmx);
453  ASSIGN_VSCALE_FUNC(c->yuv2plane1, mmx, mmxext, cpu_flags & AV_CPU_FLAG_MMXEXT);
454 
455  switch (c->srcFormat) {
456  case AV_PIX_FMT_YA8:
457  c->lumToYV12 = ff_yuyvToY_mmx;
458  if (c->needAlpha)
459  c->alpToYV12 = ff_uyvyToY_mmx;
460  break;
461  case AV_PIX_FMT_YUYV422:
462  c->lumToYV12 = ff_yuyvToY_mmx;
463  c->chrToYV12 = ff_yuyvToUV_mmx;
464  break;
465  case AV_PIX_FMT_UYVY422:
466  c->lumToYV12 = ff_uyvyToY_mmx;
467  c->chrToYV12 = ff_uyvyToUV_mmx;
468  break;
469  case AV_PIX_FMT_NV12:
470  c->chrToYV12 = ff_nv12ToUV_mmx;
471  break;
472  case AV_PIX_FMT_NV21:
473  c->chrToYV12 = ff_nv21ToUV_mmx;
474  break;
475  case_rgb(rgb24, RGB24, mmx);
476  case_rgb(bgr24, BGR24, mmx);
477  case_rgb(bgra, BGRA, mmx);
478  case_rgb(rgba, RGBA, mmx);
479  case_rgb(abgr, ABGR, mmx);
480  case_rgb(argb, ARGB, mmx);
481  default:
482  break;
483  }
484  }
485  if (EXTERNAL_MMXEXT(cpu_flags)) {
486  ASSIGN_VSCALEX_FUNC(c->yuv2planeX, mmxext, , 1);
487  }
488 #endif /* ARCH_X86_32 */
489 #define ASSIGN_SSE_SCALE_FUNC(hscalefn, filtersize, opt1, opt2) \
490  switch (filtersize) { \
491  case 4: ASSIGN_SCALE_FUNC2(hscalefn, 4, opt1, opt2); break; \
492  case 8: ASSIGN_SCALE_FUNC2(hscalefn, 8, opt1, opt2); break; \
493  default: if (filtersize & 4) ASSIGN_SCALE_FUNC2(hscalefn, X4, opt1, opt2); \
494  else ASSIGN_SCALE_FUNC2(hscalefn, X8, opt1, opt2); \
495  break; \
496  }
497  if (EXTERNAL_SSE2(cpu_flags)) {
498  ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse2, sse2);
499  ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse2, sse2);
500  ASSIGN_VSCALEX_FUNC(c->yuv2planeX, sse2, ,
501  HAVE_ALIGNED_STACK || ARCH_X86_64);
502  ASSIGN_VSCALE_FUNC(c->yuv2plane1, sse2, sse2, 1);
503 
504  switch (c->srcFormat) {
505  case AV_PIX_FMT_YA8:
506  c->lumToYV12 = ff_yuyvToY_sse2;
507  if (c->needAlpha)
508  c->alpToYV12 = ff_uyvyToY_sse2;
509  break;
510  case AV_PIX_FMT_YUYV422:
511  c->lumToYV12 = ff_yuyvToY_sse2;
512  c->chrToYV12 = ff_yuyvToUV_sse2;
513  break;
514  case AV_PIX_FMT_UYVY422:
515  c->lumToYV12 = ff_uyvyToY_sse2;
516  c->chrToYV12 = ff_uyvyToUV_sse2;
517  break;
518  case AV_PIX_FMT_NV12:
519  c->chrToYV12 = ff_nv12ToUV_sse2;
520  break;
521  case AV_PIX_FMT_NV21:
522  c->chrToYV12 = ff_nv21ToUV_sse2;
523  break;
524  case_rgb(rgb24, RGB24, sse2);
525  case_rgb(bgr24, BGR24, sse2);
526  case_rgb(bgra, BGRA, sse2);
527  case_rgb(rgba, RGBA, sse2);
528  case_rgb(abgr, ABGR, sse2);
529  case_rgb(argb, ARGB, sse2);
530  default:
531  break;
532  }
533  }
534  if (EXTERNAL_SSSE3(cpu_flags)) {
535  ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, ssse3, ssse3);
536  ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, ssse3, ssse3);
537  switch (c->srcFormat) {
538  case_rgb(rgb24, RGB24, ssse3);
539  case_rgb(bgr24, BGR24, ssse3);
540  default:
541  break;
542  }
543  }
544  if (EXTERNAL_SSE4(cpu_flags)) {
545  /* Xto15 don't need special sse4 functions */
546  ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse4, ssse3);
547  ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse4, ssse3);
548  ASSIGN_VSCALEX_FUNC(c->yuv2planeX, sse4,
549  if (!isBE(c->dstFormat)) c->yuv2planeX = ff_yuv2planeX_16_sse4,
550  HAVE_ALIGNED_STACK || ARCH_X86_64);
551  if (c->dstBpc == 16 && !isBE(c->dstFormat))
552  c->yuv2plane1 = ff_yuv2plane1_16_sse4;
553  }
554 
555  if (EXTERNAL_AVX(cpu_flags)) {
556  ASSIGN_VSCALEX_FUNC(c->yuv2planeX, avx, ,
557  HAVE_ALIGNED_STACK || ARCH_X86_64);
558  ASSIGN_VSCALE_FUNC(c->yuv2plane1, avx, avx, 1);
559 
560  switch (c->srcFormat) {
561  case AV_PIX_FMT_YUYV422:
562  c->chrToYV12 = ff_yuyvToUV_avx;
563  break;
564  case AV_PIX_FMT_UYVY422:
565  c->chrToYV12 = ff_uyvyToUV_avx;
566  break;
567  case AV_PIX_FMT_NV12:
568  c->chrToYV12 = ff_nv12ToUV_avx;
569  break;
570  case AV_PIX_FMT_NV21:
571  c->chrToYV12 = ff_nv21ToUV_avx;
572  break;
573  case_rgb(rgb24, RGB24, avx);
574  case_rgb(bgr24, BGR24, avx);
575  case_rgb(bgra, BGRA, avx);
576  case_rgb(rgba, RGBA, avx);
577  case_rgb(abgr, ABGR, avx);
578  case_rgb(argb, ARGB, avx);
579  default:
580  break;
581  }
582  }
583 }
INLINE_MMX
#define INLINE_MMX(flags)
Definition: cpu.h:86
SwsContext::vLumFilterSize
int vLumFilterSize
Vertical filter size for luma/alpha pixels.
Definition: swscale_internal.h:382
ASSIGN_MMX_SCALE_FUNC
#define ASSIGN_MMX_SCALE_FUNC(hscalefn, filtersize, opt1, opt2)
AV_CPU_FLAG_SSE3
#define AV_CPU_FLAG_SSE3
Prescott SSE3 functions.
Definition: cpu.h:40
APCK_PTR2
#define APCK_PTR2
Definition: swscale_internal.h:56
cpu.h
SwsPlane::line
uint8_t ** line
line buffer
Definition: swscale_internal.h:967
AV_PIX_FMT_YA8
@ AV_PIX_FMT_YA8
8 bits gray, 8 bits alpha
Definition: pixfmt.h:143
SwsContext::dstW
int dstW
Width of destination luma/alpha planes.
Definition: swscale_internal.h:478
DECLARE_ASM_CONST
#define DECLARE_ASM_CONST(n, t, v)
Definition: mem.h:114
SwsContext::dstY
int dstY
Last destination vertical line output from last slice.
Definition: swscale_internal.h:394
av_unused
#define av_unused
Definition: attributes.h:131
end
static av_cold int end(AVCodecContext *avctx)
Definition: avrndec.c:92
pixdesc.h
SwsContext::vChrFilter
int16_t * vChrFilter
Array of vertical filter coefficients for chroma planes.
Definition: swscale_internal.h:375
SwsContext::lumMmxFilter
int32_t lumMmxFilter[4 *MAX_FILTER_SIZE]
Definition: swscale_internal.h:476
SwsContext::vLumFilter
int16_t * vLumFilter
Array of vertical filter coefficients for luma/alpha planes.
Definition: swscale_internal.h:374
filter
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce then the filter should push the output frames on the output link immediately As an exception to the previous rule if the input frame is enough to produce several output frames then the filter needs output only at least one per link The additional frames can be left buffered in the filter
Definition: filter_design.txt:228
av_get_cpu_flags
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:93
DECLARE_ASM_ALIGNED
#define DECLARE_ASM_ALIGNED(n, t, v)
Definition: mem.h:113
cpu_flags
static atomic_int cpu_flags
Definition: cpu.c:50
INPUT_FUNCS
#define INPUT_FUNCS(opt)
Definition: swscale.c:364
U
#define U(x)
Definition: vp56_arith.h:37
avassert.h
av_cold
#define av_cold
Definition: attributes.h:90
intreadwrite.h
s
#define s(width, name)
Definition: cbs_vp9.c:257
APCK_COEF
#define APCK_COEF
Definition: swscale_internal.h:57
SwsPlane::tmp
uint8_t ** tmp
Tmp line buffer used by mmx code.
Definition: swscale_internal.h:968
VSCALE_FUNCS
#define VSCALE_FUNCS(opt1, opt2)
Definition: swscale.c:338
XMM_CLOBBERS
#define XMM_CLOBBERS(...)
Definition: asm.h:98
SwsContext::vLumFilterPos
int32_t * vLumFilterPos
Array of vertical filter starting positions for each dst[i] for luma/alpha planes.
Definition: swscale_internal.h:378
ASSIGN_VSCALE_FUNC
#define ASSIGN_VSCALE_FUNC(vscalefn, opt1, opt2, opt2chk)
int32_t
int32_t
Definition: audio_convert.c:194
if
if(ret)
Definition: filter_design.txt:179
VSCALEX_FUNC
#define VSCALEX_FUNC(size, opt)
Definition: swscale.c:318
NULL
#define NULL
Definition: coverity.c:32
ASSIGN_SSE_SCALE_FUNC
#define ASSIGN_SSE_SCALE_FUNC(hscalefn, filtersize, opt1, opt2)
ff_sws_init_swscale_x86
av_cold void ff_sws_init_swscale_x86(SwsContext *c)
Definition: swscale.c:383
AV_PIX_FMT_YUYV422
@ AV_PIX_FMT_YUYV422
packed YUV 4:2:2, 16bpp, Y0 Cb Y1 Cr
Definition: pixfmt.h:67
src
#define src
Definition: vp8dsp.c:254
SwsPlane
Slice plane.
Definition: swscale_internal.h:962
ASSIGN_VSCALEX_FUNC
#define ASSIGN_VSCALEX_FUNC(vscalefn, opt, do_16_case, condition_8bit)
SwsContext::alpMmxFilter
int32_t alpMmxFilter[4 *MAX_FILTER_SIZE]
Definition: swscale_internal.h:484
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
for
for(j=16;j >0;--j)
Definition: h264pred_template.c:469
SwsContext::vChrFilterPos
int32_t * vChrFilterPos
Array of vertical filter starting positions for each dst[i] for chroma planes.
Definition: swscale_internal.h:379
isBE
static av_always_inline int isBE(enum AVPixelFormat pix_fmt)
Definition: swscale_internal.h:664
cpu.h
AV_PIX_FMT_BGR555
#define AV_PIX_FMT_BGR555
Definition: pixfmt.h:390
FFMIN
#define FFMIN(a, b)
Definition: common.h:96
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
attributes.h
SwsContext::vChrFilterSize
int vChrFilterSize
Vertical filter size for chroma pixels.
Definition: swscale_internal.h:383
EXTERNAL_SSE2
#define EXTERNAL_SSE2(flags)
Definition: cpu.h:59
SWS_ACCURATE_RND
#define SWS_ACCURATE_RND
Definition: swscale.h:83
DECLARE_ALIGNED
#define DECLARE_ALIGNED(n, t, v)
Definition: mem.h:112
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:269
AV_PIX_FMT_RGB555
#define AV_PIX_FMT_RGB555
Definition: pixfmt.h:385
swscale_internal.h
uint8_t
uint8_t
Definition: audio_convert.c:194
AV_PIX_FMT_NV21
@ AV_PIX_FMT_NV21
as above, but U and V bytes are swapped
Definition: pixfmt.h:90
swscale_template.c
ff_dither8
const uint64_t ff_dither8[2]
Definition: swscale.c:36
AV_PIX_FMT_NV12
@ AV_PIX_FMT_NV12
planar YUV 4:2:0, 12bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
Definition: pixfmt.h:89
EXTERNAL_AVX
#define EXTERNAL_AVX(flags)
Definition: cpu.h:70
AV_PIX_FMT_UYVY422
@ AV_PIX_FMT_UYVY422
packed YUV 4:2:2, 16bpp, Cb Y0 Cr Y1
Definition: pixfmt.h:81
EXTERNAL_SSE4
#define EXTERNAL_SSE4(flags)
Definition: cpu.h:68
AV_CPU_FLAG_MMXEXT
#define AV_CPU_FLAG_MMXEXT
SSE integer functions or AMD MMX ext.
Definition: cpu.h:32
INLINE_MMXEXT
#define INLINE_MMXEXT(flags)
Definition: cpu.h:87
SwsContext::chrMmxFilter
int32_t chrMmxFilter[4 *MAX_FILTER_SIZE]
Definition: swscale_internal.h:477
SwsPlane::sliceY
int sliceY
index of first line
Definition: swscale_internal.h:965
VSCALE_FUNC
#define VSCALE_FUNC(size, opt)
Definition: swscale.c:335
x86_reg
int x86_reg
Definition: asm.h:72
case_rgb
#define case_rgb(x, X, opt)
RGBA
#define RGBA(r, g, b, a)
Definition: dvbsubdec.c:39
flags
#define flags(name, subs,...)
Definition: cbs_av1.c:565
SCALE_FUNCS_MMX
#define SCALE_FUNCS_MMX(opt)
Definition: swscale.c:300
ff_updateMMXDitherTables
void ff_updateMMXDitherTables(SwsContext *c, int dstY)
EXTERNAL_SSSE3
#define EXTERNAL_SSSE3(flags)
Definition: cpu.h:65
SwsContext
Definition: swscale_internal.h:280
EXTERNAL_MMX
#define EXTERNAL_MMX(flags)
Definition: cpu.h:56
SCALE_FUNCS_SSE
#define SCALE_FUNCS_SSE(opt)
Definition: swscale.c:305
SwsContext::dstH
int dstH
Height of destination luma/alpha planes.
Definition: swscale_internal.h:293
APCK_SIZE
#define APCK_SIZE
Definition: swscale_internal.h:58
VSCALEX_FUNCS
#define VSCALEX_FUNCS(opt)
Definition: swscale.c:322
EXTERNAL_MMXEXT
#define EXTERNAL_MMXEXT(flags)
Definition: cpu.h:57
swscale.h
ff_dither4
const uint64_t ff_dither4[2]
Definition: swscale.c:32
dither
static const uint8_t dither[8][8]
Definition: vf_fspp.c:57