FFmpeg
swscale_template.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include <stdint.h>
22 
23 #include "libavutil/x86/asm.h"
25 
26 #undef REAL_MOVNTQ
27 #undef MOVNTQ
28 #undef MOVNTQ2
29 #undef PREFETCH
30 
31 
32 #define REAL_MOVNTQ(a,b) "movntq " #a ", " #b " \n\t"
33 #define MOVNTQ2 "movntq "
34 #define MOVNTQ(a,b) REAL_MOVNTQ(a,b)
35 
36 #define YSCALEYUV2PACKEDX_UV \
37  __asm__ volatile(\
38  "xor %%"FF_REG_a", %%"FF_REG_a" \n\t"\
39  ".p2align 4 \n\t"\
40  "nop \n\t"\
41  "1: \n\t"\
42  "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"FF_REG_d" \n\t"\
43  "mov (%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
44  "movq "VROUNDER_OFFSET"(%0), %%mm3 \n\t"\
45  "movq %%mm3, %%mm4 \n\t"\
46  ".p2align 4 \n\t"\
47  "2: \n\t"\
48  "movq 8(%%"FF_REG_d"), %%mm0 \n\t" /* filterCoeff */\
49  "movq (%%"FF_REG_S", %%"FF_REG_a"), %%mm2 \n\t" /* UsrcData */\
50  "add %6, %%"FF_REG_S" \n\t" \
51  "movq (%%"FF_REG_S", %%"FF_REG_a"), %%mm5 \n\t" /* VsrcData */\
52  "add $16, %%"FF_REG_d" \n\t"\
53  "mov (%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
54  "pmulhw %%mm0, %%mm2 \n\t"\
55  "pmulhw %%mm0, %%mm5 \n\t"\
56  "paddw %%mm2, %%mm3 \n\t"\
57  "paddw %%mm5, %%mm4 \n\t"\
58  "test %%"FF_REG_S", %%"FF_REG_S" \n\t"\
59  " jnz 2b \n\t"\
60 
61 #define YSCALEYUV2PACKEDX_YA(offset,coeff,src1,src2,dst1,dst2) \
62  "lea "offset"(%0), %%"FF_REG_d" \n\t"\
63  "mov (%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
64  "movq "VROUNDER_OFFSET"(%0), "#dst1" \n\t"\
65  "movq "#dst1", "#dst2" \n\t"\
66  ".p2align 4 \n\t"\
67  "2: \n\t"\
68  "movq 8(%%"FF_REG_d"), "#coeff" \n\t" /* filterCoeff */\
69  "movq (%%"FF_REG_S", %%"FF_REG_a", 2), "#src1" \n\t" /* Y1srcData */\
70  "movq 8(%%"FF_REG_S", %%"FF_REG_a", 2), "#src2" \n\t" /* Y2srcData */\
71  "add $16, %%"FF_REG_d" \n\t"\
72  "mov (%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
73  "pmulhw "#coeff", "#src1" \n\t"\
74  "pmulhw "#coeff", "#src2" \n\t"\
75  "paddw "#src1", "#dst1" \n\t"\
76  "paddw "#src2", "#dst2" \n\t"\
77  "test %%"FF_REG_S", %%"FF_REG_S" \n\t"\
78  " jnz 2b \n\t"\
79 
80 #define YSCALEYUV2PACKEDX \
81  YSCALEYUV2PACKEDX_UV \
82  YSCALEYUV2PACKEDX_YA(LUM_MMX_FILTER_OFFSET,%%mm0,%%mm2,%%mm5,%%mm1,%%mm7) \
83 
84 #define YSCALEYUV2PACKEDX_END \
85  :: "r" (&c->redDither), \
86  "m" (dummy), "m" (dummy), "m" (dummy),\
87  "r" (dest), "m" (dstW_reg), "m"(uv_off) \
88  NAMED_CONSTRAINTS_ADD(bF8,bFC) \
89  : "%"FF_REG_a, "%"FF_REG_d, "%"FF_REG_S \
90  );
91 
92 #define YSCALEYUV2PACKEDX_ACCURATE_UV \
93  __asm__ volatile(\
94  "xor %%"FF_REG_a", %%"FF_REG_a" \n\t"\
95  ".p2align 4 \n\t"\
96  "nop \n\t"\
97  "1: \n\t"\
98  "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"FF_REG_d" \n\t"\
99  "mov (%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
100  "pxor %%mm4, %%mm4 \n\t"\
101  "pxor %%mm5, %%mm5 \n\t"\
102  "pxor %%mm6, %%mm6 \n\t"\
103  "pxor %%mm7, %%mm7 \n\t"\
104  ".p2align 4 \n\t"\
105  "2: \n\t"\
106  "movq (%%"FF_REG_S", %%"FF_REG_a"), %%mm0 \n\t" /* UsrcData */\
107  "add %6, %%"FF_REG_S" \n\t" \
108  "movq (%%"FF_REG_S", %%"FF_REG_a"), %%mm2 \n\t" /* VsrcData */\
109  "mov "STR(APCK_PTR2)"(%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
110  "movq (%%"FF_REG_S", %%"FF_REG_a"), %%mm1 \n\t" /* UsrcData */\
111  "movq %%mm0, %%mm3 \n\t"\
112  "punpcklwd %%mm1, %%mm0 \n\t"\
113  "punpckhwd %%mm1, %%mm3 \n\t"\
114  "movq "STR(APCK_COEF)"(%%"FF_REG_d"),%%mm1 \n\t" /* filterCoeff */\
115  "pmaddwd %%mm1, %%mm0 \n\t"\
116  "pmaddwd %%mm1, %%mm3 \n\t"\
117  "paddd %%mm0, %%mm4 \n\t"\
118  "paddd %%mm3, %%mm5 \n\t"\
119  "add %6, %%"FF_REG_S" \n\t" \
120  "movq (%%"FF_REG_S", %%"FF_REG_a"), %%mm3 \n\t" /* VsrcData */\
121  "mov "STR(APCK_SIZE)"(%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
122  "add $"STR(APCK_SIZE)", %%"FF_REG_d" \n\t"\
123  "test %%"FF_REG_S", %%"FF_REG_S" \n\t"\
124  "movq %%mm2, %%mm0 \n\t"\
125  "punpcklwd %%mm3, %%mm2 \n\t"\
126  "punpckhwd %%mm3, %%mm0 \n\t"\
127  "pmaddwd %%mm1, %%mm2 \n\t"\
128  "pmaddwd %%mm1, %%mm0 \n\t"\
129  "paddd %%mm2, %%mm6 \n\t"\
130  "paddd %%mm0, %%mm7 \n\t"\
131  " jnz 2b \n\t"\
132  "psrad $16, %%mm4 \n\t"\
133  "psrad $16, %%mm5 \n\t"\
134  "psrad $16, %%mm6 \n\t"\
135  "psrad $16, %%mm7 \n\t"\
136  "movq "VROUNDER_OFFSET"(%0), %%mm0 \n\t"\
137  "packssdw %%mm5, %%mm4 \n\t"\
138  "packssdw %%mm7, %%mm6 \n\t"\
139  "paddw %%mm0, %%mm4 \n\t"\
140  "paddw %%mm0, %%mm6 \n\t"\
141  "movq %%mm4, "U_TEMP"(%0) \n\t"\
142  "movq %%mm6, "V_TEMP"(%0) \n\t"\
143 
144 #define YSCALEYUV2PACKEDX_ACCURATE_YA(offset) \
145  "lea "offset"(%0), %%"FF_REG_d" \n\t"\
146  "mov (%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
147  "pxor %%mm1, %%mm1 \n\t"\
148  "pxor %%mm5, %%mm5 \n\t"\
149  "pxor %%mm7, %%mm7 \n\t"\
150  "pxor %%mm6, %%mm6 \n\t"\
151  ".p2align 4 \n\t"\
152  "2: \n\t"\
153  "movq (%%"FF_REG_S", %%"FF_REG_a", 2), %%mm0 \n\t" /* Y1srcData */\
154  "movq 8(%%"FF_REG_S", %%"FF_REG_a", 2), %%mm2 \n\t" /* Y2srcData */\
155  "mov "STR(APCK_PTR2)"(%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
156  "movq (%%"FF_REG_S", %%"FF_REG_a", 2), %%mm4 \n\t" /* Y1srcData */\
157  "movq %%mm0, %%mm3 \n\t"\
158  "punpcklwd %%mm4, %%mm0 \n\t"\
159  "punpckhwd %%mm4, %%mm3 \n\t"\
160  "movq "STR(APCK_COEF)"(%%"FF_REG_d"), %%mm4 \n\t" /* filterCoeff */\
161  "pmaddwd %%mm4, %%mm0 \n\t"\
162  "pmaddwd %%mm4, %%mm3 \n\t"\
163  "paddd %%mm0, %%mm1 \n\t"\
164  "paddd %%mm3, %%mm5 \n\t"\
165  "movq 8(%%"FF_REG_S", %%"FF_REG_a", 2), %%mm3 \n\t" /* Y2srcData */\
166  "mov "STR(APCK_SIZE)"(%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
167  "add $"STR(APCK_SIZE)", %%"FF_REG_d" \n\t"\
168  "test %%"FF_REG_S", %%"FF_REG_S" \n\t"\
169  "movq %%mm2, %%mm0 \n\t"\
170  "punpcklwd %%mm3, %%mm2 \n\t"\
171  "punpckhwd %%mm3, %%mm0 \n\t"\
172  "pmaddwd %%mm4, %%mm2 \n\t"\
173  "pmaddwd %%mm4, %%mm0 \n\t"\
174  "paddd %%mm2, %%mm7 \n\t"\
175  "paddd %%mm0, %%mm6 \n\t"\
176  " jnz 2b \n\t"\
177  "psrad $16, %%mm1 \n\t"\
178  "psrad $16, %%mm5 \n\t"\
179  "psrad $16, %%mm7 \n\t"\
180  "psrad $16, %%mm6 \n\t"\
181  "movq "VROUNDER_OFFSET"(%0), %%mm0 \n\t"\
182  "packssdw %%mm5, %%mm1 \n\t"\
183  "packssdw %%mm6, %%mm7 \n\t"\
184  "paddw %%mm0, %%mm1 \n\t"\
185  "paddw %%mm0, %%mm7 \n\t"\
186  "movq "U_TEMP"(%0), %%mm3 \n\t"\
187  "movq "V_TEMP"(%0), %%mm4 \n\t"\
188 
189 #define YSCALEYUV2PACKEDX_ACCURATE \
190  YSCALEYUV2PACKEDX_ACCURATE_UV \
191  YSCALEYUV2PACKEDX_ACCURATE_YA(LUM_MMX_FILTER_OFFSET)
192 
193 #define YSCALEYUV2RGBX \
194  "psubw "U_OFFSET"(%0), %%mm3 \n\t" /* (U-128)8*/\
195  "psubw "V_OFFSET"(%0), %%mm4 \n\t" /* (V-128)8*/\
196  "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
197  "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
198  "pmulhw "UG_COEFF"(%0), %%mm3 \n\t"\
199  "pmulhw "VG_COEFF"(%0), %%mm4 \n\t"\
200  /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
201  "pmulhw "UB_COEFF"(%0), %%mm2 \n\t"\
202  "pmulhw "VR_COEFF"(%0), %%mm5 \n\t"\
203  "psubw "Y_OFFSET"(%0), %%mm1 \n\t" /* 8(Y-16)*/\
204  "psubw "Y_OFFSET"(%0), %%mm7 \n\t" /* 8(Y-16)*/\
205  "pmulhw "Y_COEFF"(%0), %%mm1 \n\t"\
206  "pmulhw "Y_COEFF"(%0), %%mm7 \n\t"\
207  /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
208  "paddw %%mm3, %%mm4 \n\t"\
209  "movq %%mm2, %%mm0 \n\t"\
210  "movq %%mm5, %%mm6 \n\t"\
211  "movq %%mm4, %%mm3 \n\t"\
212  "punpcklwd %%mm2, %%mm2 \n\t"\
213  "punpcklwd %%mm5, %%mm5 \n\t"\
214  "punpcklwd %%mm4, %%mm4 \n\t"\
215  "paddw %%mm1, %%mm2 \n\t"\
216  "paddw %%mm1, %%mm5 \n\t"\
217  "paddw %%mm1, %%mm4 \n\t"\
218  "punpckhwd %%mm0, %%mm0 \n\t"\
219  "punpckhwd %%mm6, %%mm6 \n\t"\
220  "punpckhwd %%mm3, %%mm3 \n\t"\
221  "paddw %%mm7, %%mm0 \n\t"\
222  "paddw %%mm7, %%mm6 \n\t"\
223  "paddw %%mm7, %%mm3 \n\t"\
224  /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
225  "packuswb %%mm0, %%mm2 \n\t"\
226  "packuswb %%mm6, %%mm5 \n\t"\
227  "packuswb %%mm3, %%mm4 \n\t"\
228 
229 #define REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t) \
230  "movq "#b", "#q2" \n\t" /* B */\
231  "movq "#r", "#t" \n\t" /* R */\
232  "punpcklbw "#g", "#b" \n\t" /* GBGBGBGB 0 */\
233  "punpcklbw "#a", "#r" \n\t" /* ARARARAR 0 */\
234  "punpckhbw "#g", "#q2" \n\t" /* GBGBGBGB 2 */\
235  "punpckhbw "#a", "#t" \n\t" /* ARARARAR 2 */\
236  "movq "#b", "#q0" \n\t" /* GBGBGBGB 0 */\
237  "movq "#q2", "#q3" \n\t" /* GBGBGBGB 2 */\
238  "punpcklwd "#r", "#q0" \n\t" /* ARGBARGB 0 */\
239  "punpckhwd "#r", "#b" \n\t" /* ARGBARGB 1 */\
240  "punpcklwd "#t", "#q2" \n\t" /* ARGBARGB 2 */\
241  "punpckhwd "#t", "#q3" \n\t" /* ARGBARGB 3 */\
242 \
243  MOVNTQ( q0, (dst, index, 4))\
244  MOVNTQ( b, 8(dst, index, 4))\
245  MOVNTQ( q2, 16(dst, index, 4))\
246  MOVNTQ( q3, 24(dst, index, 4))\
247 \
248  "add $8, "#index" \n\t"\
249  "cmp "dstw", "#index" \n\t"\
250  " jb 1b \n\t"
251 #define WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t) REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t)
252 
253 static void RENAME(yuv2rgb32_X_ar)(SwsContext *c, const int16_t *lumFilter,
254  const int16_t **lumSrc, int lumFilterSize,
255  const int16_t *chrFilter, const int16_t **chrUSrc,
256  const int16_t **chrVSrc,
257  int chrFilterSize, const int16_t **alpSrc,
258  uint8_t *dest, int dstW, int dstY)
259 {
260  x86_reg dummy=0;
261  x86_reg dstW_reg = dstW;
262  x86_reg uv_off = c->uv_offx2;
263 
264  if (CONFIG_SWSCALE_ALPHA && c->needAlpha) {
267  "movq %%mm2, "U_TEMP"(%0) \n\t"
268  "movq %%mm4, "V_TEMP"(%0) \n\t"
269  "movq %%mm5, "Y_TEMP"(%0) \n\t"
271  "movq "Y_TEMP"(%0), %%mm5 \n\t"
272  "psraw $3, %%mm1 \n\t"
273  "psraw $3, %%mm7 \n\t"
274  "packuswb %%mm7, %%mm1 \n\t"
275  WRITEBGR32(%4, "%5", %%FF_REGa, %%mm3, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm2, %%mm6)
277  } else {
280  "pcmpeqd %%mm7, %%mm7 \n\t"
281  WRITEBGR32(%4, "%5", %%FF_REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
283  }
284 }
285 
286 static void RENAME(yuv2rgb32_X)(SwsContext *c, const int16_t *lumFilter,
287  const int16_t **lumSrc, int lumFilterSize,
288  const int16_t *chrFilter, const int16_t **chrUSrc,
289  const int16_t **chrVSrc,
290  int chrFilterSize, const int16_t **alpSrc,
291  uint8_t *dest, int dstW, int dstY)
292 {
293  x86_reg dummy=0;
294  x86_reg dstW_reg = dstW;
295  x86_reg uv_off = c->uv_offx2;
296 
297  if (CONFIG_SWSCALE_ALPHA && c->needAlpha) {
300  YSCALEYUV2PACKEDX_YA(ALP_MMX_FILTER_OFFSET, %%mm0, %%mm3, %%mm6, %%mm1, %%mm7)
301  "psraw $3, %%mm1 \n\t"
302  "psraw $3, %%mm7 \n\t"
303  "packuswb %%mm7, %%mm1 \n\t"
304  WRITEBGR32(%4, "%5", %%FF_REGa, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
306  } else {
309  "pcmpeqd %%mm7, %%mm7 \n\t"
310  WRITEBGR32(%4, "%5", %%FF_REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
312  }
313 }
314 
315 static void RENAME(yuv2bgr32_X)(SwsContext *c, const int16_t *lumFilter,
316  const int16_t **lumSrc, int lumFilterSize,
317  const int16_t *chrFilter, const int16_t **chrUSrc,
318  const int16_t **chrVSrc,
319  int chrFilterSize, const int16_t **alpSrc,
320  uint8_t *dest, int dstW, int dstY)
321 {
322  x86_reg dummy=0;
323  x86_reg dstW_reg = dstW;
324  x86_reg uv_off = c->uv_offx2;
325 
326  if (CONFIG_SWSCALE_ALPHA && c->needAlpha) {
329  YSCALEYUV2PACKEDX_YA(ALP_MMX_FILTER_OFFSET, %%mm0, %%mm3, %%mm6, %%mm1, %%mm7)
330  "psraw $3, %%mm1 \n\t"
331  "psraw $3, %%mm7 \n\t"
332  "packuswb %%mm7, %%mm1 \n\t"
333  WRITEBGR32(%4, "%5", %%FF_REGa, %%mm5, %%mm4, %%mm2, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
335  } else {
338  "pcmpeqd %%mm7, %%mm7 \n\t"
339  WRITEBGR32(%4, "%5", %%FF_REGa, %%mm5, %%mm4, %%mm2, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
341  }
342 }
343 
344 #define REAL_WRITERGB16(dst, dstw, index) \
345  "pand "MANGLE(bF8)", %%mm2 \n\t" /* B */\
346  "pand "MANGLE(bFC)", %%mm4 \n\t" /* G */\
347  "pand "MANGLE(bF8)", %%mm5 \n\t" /* R */\
348  "psrlq $3, %%mm2 \n\t"\
349 \
350  "movq %%mm2, %%mm1 \n\t"\
351  "movq %%mm4, %%mm3 \n\t"\
352 \
353  "punpcklbw %%mm7, %%mm3 \n\t"\
354  "punpcklbw %%mm5, %%mm2 \n\t"\
355  "punpckhbw %%mm7, %%mm4 \n\t"\
356  "punpckhbw %%mm5, %%mm1 \n\t"\
357 \
358  "psllq $3, %%mm3 \n\t"\
359  "psllq $3, %%mm4 \n\t"\
360 \
361  "por %%mm3, %%mm2 \n\t"\
362  "por %%mm4, %%mm1 \n\t"\
363 \
364  MOVNTQ(%%mm2, (dst, index, 2))\
365  MOVNTQ(%%mm1, 8(dst, index, 2))\
366 \
367  "add $8, "#index" \n\t"\
368  "cmp "dstw", "#index" \n\t"\
369  " jb 1b \n\t"
370 #define WRITERGB16(dst, dstw, index) REAL_WRITERGB16(dst, dstw, index)
371 
372 static void RENAME(yuv2rgb565_X_ar)(SwsContext *c, const int16_t *lumFilter,
373  const int16_t **lumSrc, int lumFilterSize,
374  const int16_t *chrFilter, const int16_t **chrUSrc,
375  const int16_t **chrVSrc,
376  int chrFilterSize, const int16_t **alpSrc,
377  uint8_t *dest, int dstW, int dstY)
378 {
379  x86_reg dummy=0;
380  x86_reg dstW_reg = dstW;
381  x86_reg uv_off = c->uv_offx2;
382 
385  "pxor %%mm7, %%mm7 \n\t"
386  /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
387 #ifdef DITHER1XBPP
388  "paddusb "BLUE_DITHER"(%0), %%mm2\n\t"
389  "paddusb "GREEN_DITHER"(%0), %%mm4\n\t"
390  "paddusb "RED_DITHER"(%0), %%mm5\n\t"
391 #endif
392  WRITERGB16(%4, "%5", %%FF_REGa)
394 }
395 
396 static void RENAME(yuv2rgb565_X)(SwsContext *c, const int16_t *lumFilter,
397  const int16_t **lumSrc, int lumFilterSize,
398  const int16_t *chrFilter, const int16_t **chrUSrc,
399  const int16_t **chrVSrc,
400  int chrFilterSize, const int16_t **alpSrc,
401  uint8_t *dest, int dstW, int dstY)
402 {
403  x86_reg dummy=0;
404  x86_reg dstW_reg = dstW;
405  x86_reg uv_off = c->uv_offx2;
406 
409  "pxor %%mm7, %%mm7 \n\t"
410  /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
411 #ifdef DITHER1XBPP
412  "paddusb "BLUE_DITHER"(%0), %%mm2 \n\t"
413  "paddusb "GREEN_DITHER"(%0), %%mm4 \n\t"
414  "paddusb "RED_DITHER"(%0), %%mm5 \n\t"
415 #endif
416  WRITERGB16(%4, "%5", %%FF_REGa)
418 }
419 
420 #define REAL_WRITERGB15(dst, dstw, index) \
421  "pand "MANGLE(bF8)", %%mm2 \n\t" /* B */\
422  "pand "MANGLE(bF8)", %%mm4 \n\t" /* G */\
423  "pand "MANGLE(bF8)", %%mm5 \n\t" /* R */\
424  "psrlq $3, %%mm2 \n\t"\
425  "psrlq $1, %%mm5 \n\t"\
426 \
427  "movq %%mm2, %%mm1 \n\t"\
428  "movq %%mm4, %%mm3 \n\t"\
429 \
430  "punpcklbw %%mm7, %%mm3 \n\t"\
431  "punpcklbw %%mm5, %%mm2 \n\t"\
432  "punpckhbw %%mm7, %%mm4 \n\t"\
433  "punpckhbw %%mm5, %%mm1 \n\t"\
434 \
435  "psllq $2, %%mm3 \n\t"\
436  "psllq $2, %%mm4 \n\t"\
437 \
438  "por %%mm3, %%mm2 \n\t"\
439  "por %%mm4, %%mm1 \n\t"\
440 \
441  MOVNTQ(%%mm2, (dst, index, 2))\
442  MOVNTQ(%%mm1, 8(dst, index, 2))\
443 \
444  "add $8, "#index" \n\t"\
445  "cmp "dstw", "#index" \n\t"\
446  " jb 1b \n\t"
447 #define WRITERGB15(dst, dstw, index) REAL_WRITERGB15(dst, dstw, index)
448 
449 static void RENAME(yuv2rgb555_X_ar)(SwsContext *c, const int16_t *lumFilter,
450  const int16_t **lumSrc, int lumFilterSize,
451  const int16_t *chrFilter, const int16_t **chrUSrc,
452  const int16_t **chrVSrc,
453  int chrFilterSize, const int16_t **alpSrc,
454  uint8_t *dest, int dstW, int dstY)
455 {
456  x86_reg dummy=0;
457  x86_reg dstW_reg = dstW;
458  x86_reg uv_off = c->uv_offx2;
459 
462  "pxor %%mm7, %%mm7 \n\t"
463  /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
464 #ifdef DITHER1XBPP
465  "paddusb "BLUE_DITHER"(%0), %%mm2\n\t"
466  "paddusb "GREEN_DITHER"(%0), %%mm4\n\t"
467  "paddusb "RED_DITHER"(%0), %%mm5\n\t"
468 #endif
469  WRITERGB15(%4, "%5", %%FF_REGa)
471 }
472 
473 static void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter,
474  const int16_t **lumSrc, int lumFilterSize,
475  const int16_t *chrFilter, const int16_t **chrUSrc,
476  const int16_t **chrVSrc,
477  int chrFilterSize, const int16_t **alpSrc,
478  uint8_t *dest, int dstW, int dstY)
479 {
480  x86_reg dummy=0;
481  x86_reg dstW_reg = dstW;
482  x86_reg uv_off = c->uv_offx2;
483 
486  "pxor %%mm7, %%mm7 \n\t"
487  /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
488 #ifdef DITHER1XBPP
489  "paddusb "BLUE_DITHER"(%0), %%mm2 \n\t"
490  "paddusb "GREEN_DITHER"(%0), %%mm4 \n\t"
491  "paddusb "RED_DITHER"(%0), %%mm5 \n\t"
492 #endif
493  WRITERGB15(%4, "%5", %%FF_REGa)
495 }
496 
497 #define WRITEBGR24MMX(dst, dstw, index) \
498  /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
499  "movq %%mm2, %%mm1 \n\t" /* B */\
500  "movq %%mm5, %%mm6 \n\t" /* R */\
501  "punpcklbw %%mm4, %%mm2 \n\t" /* GBGBGBGB 0 */\
502  "punpcklbw %%mm7, %%mm5 \n\t" /* 0R0R0R0R 0 */\
503  "punpckhbw %%mm4, %%mm1 \n\t" /* GBGBGBGB 2 */\
504  "punpckhbw %%mm7, %%mm6 \n\t" /* 0R0R0R0R 2 */\
505  "movq %%mm2, %%mm0 \n\t" /* GBGBGBGB 0 */\
506  "movq %%mm1, %%mm3 \n\t" /* GBGBGBGB 2 */\
507  "punpcklwd %%mm5, %%mm0 \n\t" /* 0RGB0RGB 0 */\
508  "punpckhwd %%mm5, %%mm2 \n\t" /* 0RGB0RGB 1 */\
509  "punpcklwd %%mm6, %%mm1 \n\t" /* 0RGB0RGB 2 */\
510  "punpckhwd %%mm6, %%mm3 \n\t" /* 0RGB0RGB 3 */\
511 \
512  "movq %%mm0, %%mm4 \n\t" /* 0RGB0RGB 0 */\
513  "movq %%mm2, %%mm6 \n\t" /* 0RGB0RGB 1 */\
514  "movq %%mm1, %%mm5 \n\t" /* 0RGB0RGB 2 */\
515  "movq %%mm3, %%mm7 \n\t" /* 0RGB0RGB 3 */\
516 \
517  "psllq $40, %%mm0 \n\t" /* RGB00000 0 */\
518  "psllq $40, %%mm2 \n\t" /* RGB00000 1 */\
519  "psllq $40, %%mm1 \n\t" /* RGB00000 2 */\
520  "psllq $40, %%mm3 \n\t" /* RGB00000 3 */\
521 \
522  "punpckhdq %%mm4, %%mm0 \n\t" /* 0RGBRGB0 0 */\
523  "punpckhdq %%mm6, %%mm2 \n\t" /* 0RGBRGB0 1 */\
524  "punpckhdq %%mm5, %%mm1 \n\t" /* 0RGBRGB0 2 */\
525  "punpckhdq %%mm7, %%mm3 \n\t" /* 0RGBRGB0 3 */\
526 \
527  "psrlq $8, %%mm0 \n\t" /* 00RGBRGB 0 */\
528  "movq %%mm2, %%mm6 \n\t" /* 0RGBRGB0 1 */\
529  "psllq $40, %%mm2 \n\t" /* GB000000 1 */\
530  "por %%mm2, %%mm0 \n\t" /* GBRGBRGB 0 */\
531  MOVNTQ(%%mm0, (dst))\
532 \
533  "psrlq $24, %%mm6 \n\t" /* 0000RGBR 1 */\
534  "movq %%mm1, %%mm5 \n\t" /* 0RGBRGB0 2 */\
535  "psllq $24, %%mm1 \n\t" /* BRGB0000 2 */\
536  "por %%mm1, %%mm6 \n\t" /* BRGBRGBR 1 */\
537  MOVNTQ(%%mm6, 8(dst))\
538 \
539  "psrlq $40, %%mm5 \n\t" /* 000000RG 2 */\
540  "psllq $8, %%mm3 \n\t" /* RGBRGB00 3 */\
541  "por %%mm3, %%mm5 \n\t" /* RGBRGBRG 2 */\
542  MOVNTQ(%%mm5, 16(dst))\
543 \
544  "add $24, "#dst" \n\t"\
545 \
546  "add $8, "#index" \n\t"\
547  "cmp "dstw", "#index" \n\t"\
548  " jb 1b \n\t"
549 
550 #define WRITEBGR24MMXEXT(dst, dstw, index) \
551  /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
552  "movq "MANGLE(ff_M24A)", %%mm0 \n\t"\
553  "movq "MANGLE(ff_M24C)", %%mm7 \n\t"\
554  "pshufw $0x50, %%mm2, %%mm1 \n\t" /* B3 B2 B3 B2 B1 B0 B1 B0 */\
555  "pshufw $0x50, %%mm4, %%mm3 \n\t" /* G3 G2 G3 G2 G1 G0 G1 G0 */\
556  "pshufw $0x00, %%mm5, %%mm6 \n\t" /* R1 R0 R1 R0 R1 R0 R1 R0 */\
557 \
558  "pand %%mm0, %%mm1 \n\t" /* B2 B1 B0 */\
559  "pand %%mm0, %%mm3 \n\t" /* G2 G1 G0 */\
560  "pand %%mm7, %%mm6 \n\t" /* R1 R0 */\
561 \
562  "psllq $8, %%mm3 \n\t" /* G2 G1 G0 */\
563  "por %%mm1, %%mm6 \n\t"\
564  "por %%mm3, %%mm6 \n\t"\
565  MOVNTQ(%%mm6, (dst))\
566 \
567  "psrlq $8, %%mm4 \n\t" /* 00 G7 G6 G5 G4 G3 G2 G1 */\
568  "pshufw $0xA5, %%mm2, %%mm1 \n\t" /* B5 B4 B5 B4 B3 B2 B3 B2 */\
569  "pshufw $0x55, %%mm4, %%mm3 \n\t" /* G4 G3 G4 G3 G4 G3 G4 G3 */\
570  "pshufw $0xA5, %%mm5, %%mm6 \n\t" /* R5 R4 R5 R4 R3 R2 R3 R2 */\
571 \
572  "pand "MANGLE(ff_M24B)", %%mm1 \n\t" /* B5 B4 B3 */\
573  "pand %%mm7, %%mm3 \n\t" /* G4 G3 */\
574  "pand %%mm0, %%mm6 \n\t" /* R4 R3 R2 */\
575 \
576  "por %%mm1, %%mm3 \n\t" /* B5 G4 B4 G3 B3 */\
577  "por %%mm3, %%mm6 \n\t"\
578  MOVNTQ(%%mm6, 8(dst))\
579 \
580  "pshufw $0xFF, %%mm2, %%mm1 \n\t" /* B7 B6 B7 B6 B7 B6 B6 B7 */\
581  "pshufw $0xFA, %%mm4, %%mm3 \n\t" /* 00 G7 00 G7 G6 G5 G6 G5 */\
582  "pshufw $0xFA, %%mm5, %%mm6 \n\t" /* R7 R6 R7 R6 R5 R4 R5 R4 */\
583 \
584  "pand %%mm7, %%mm1 \n\t" /* B7 B6 */\
585  "pand %%mm0, %%mm3 \n\t" /* G7 G6 G5 */\
586  "pand "MANGLE(ff_M24B)", %%mm6 \n\t" /* R7 R6 R5 */\
587 \
588  "por %%mm1, %%mm3 \n\t"\
589  "por %%mm3, %%mm6 \n\t"\
590  MOVNTQ(%%mm6, 16(dst))\
591 \
592  "add $24, "#dst" \n\t"\
593 \
594  "add $8, "#index" \n\t"\
595  "cmp "dstw", "#index" \n\t"\
596  " jb 1b \n\t"
597 
598 #undef WRITEBGR24
599 #define WRITEBGR24(dst, dstw, index) WRITEBGR24MMXEXT(dst, dstw, index)
600 
601 #if HAVE_6REGS
602 static void RENAME(yuv2bgr24_X_ar)(SwsContext *c, const int16_t *lumFilter,
603  const int16_t **lumSrc, int lumFilterSize,
604  const int16_t *chrFilter, const int16_t **chrUSrc,
605  const int16_t **chrVSrc,
606  int chrFilterSize, const int16_t **alpSrc,
607  uint8_t *dest, int dstW, int dstY)
608 {
609  x86_reg dummy=0;
610  x86_reg dstW_reg = dstW;
611  x86_reg uv_off = c->uv_offx2;
612 
615  "pxor %%mm7, %%mm7 \n\t"
616  "lea (%%"FF_REG_a", %%"FF_REG_a", 2), %%"FF_REG_c"\n\t" //FIXME optimize
617  "add %4, %%"FF_REG_c" \n\t"
618  WRITEBGR24(%%FF_REGc, "%5", %%FF_REGa)
619  :: "r" (&c->redDither),
620  "m" (dummy), "m" (dummy), "m" (dummy),
621  "r" (dest), "m" (dstW_reg), "m"(uv_off)
622  NAMED_CONSTRAINTS_ADD(ff_M24A,ff_M24C,ff_M24B)
623  : "%"FF_REG_a, "%"FF_REG_c, "%"FF_REG_d, "%"FF_REG_S
624  );
625 }
626 
627 static void RENAME(yuv2bgr24_X)(SwsContext *c, const int16_t *lumFilter,
628  const int16_t **lumSrc, int lumFilterSize,
629  const int16_t *chrFilter, const int16_t **chrUSrc,
630  const int16_t **chrVSrc,
631  int chrFilterSize, const int16_t **alpSrc,
632  uint8_t *dest, int dstW, int dstY)
633 {
634  x86_reg dummy=0;
635  x86_reg dstW_reg = dstW;
636  x86_reg uv_off = c->uv_offx2;
637 
640  "pxor %%mm7, %%mm7 \n\t"
641  "lea (%%"FF_REG_a", %%"FF_REG_a", 2), %%"FF_REG_c" \n\t" //FIXME optimize
642  "add %4, %%"FF_REG_c" \n\t"
643  WRITEBGR24(%%FF_REGc, "%5", %%FF_REGa)
644  :: "r" (&c->redDither),
645  "m" (dummy), "m" (dummy), "m" (dummy),
646  "r" (dest), "m" (dstW_reg), "m"(uv_off)
647  NAMED_CONSTRAINTS_ADD(ff_M24A,ff_M24C,ff_M24B)
648  : "%"FF_REG_a, "%"FF_REG_c, "%"FF_REG_d, "%"FF_REG_S
649  );
650 }
651 #endif /* HAVE_6REGS */
652 
653 #define REAL_WRITEYUY2(dst, dstw, index) \
654  "packuswb %%mm3, %%mm3 \n\t"\
655  "packuswb %%mm4, %%mm4 \n\t"\
656  "packuswb %%mm7, %%mm1 \n\t"\
657  "punpcklbw %%mm4, %%mm3 \n\t"\
658  "movq %%mm1, %%mm7 \n\t"\
659  "punpcklbw %%mm3, %%mm1 \n\t"\
660  "punpckhbw %%mm3, %%mm7 \n\t"\
661 \
662  MOVNTQ(%%mm1, (dst, index, 2))\
663  MOVNTQ(%%mm7, 8(dst, index, 2))\
664 \
665  "add $8, "#index" \n\t"\
666  "cmp "dstw", "#index" \n\t"\
667  " jb 1b \n\t"
668 #define WRITEYUY2(dst, dstw, index) REAL_WRITEYUY2(dst, dstw, index)
669 
670 static void RENAME(yuv2yuyv422_X_ar)(SwsContext *c, const int16_t *lumFilter,
671  const int16_t **lumSrc, int lumFilterSize,
672  const int16_t *chrFilter, const int16_t **chrUSrc,
673  const int16_t **chrVSrc,
674  int chrFilterSize, const int16_t **alpSrc,
675  uint8_t *dest, int dstW, int dstY)
676 {
677  x86_reg dummy=0;
678  x86_reg dstW_reg = dstW;
679  x86_reg uv_off = c->uv_offx2;
680 
682  /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
683  "psraw $3, %%mm3 \n\t"
684  "psraw $3, %%mm4 \n\t"
685  "psraw $3, %%mm1 \n\t"
686  "psraw $3, %%mm7 \n\t"
687  WRITEYUY2(%4, "%5", %%FF_REGa)
689 }
690 
691 static void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter,
692  const int16_t **lumSrc, int lumFilterSize,
693  const int16_t *chrFilter, const int16_t **chrUSrc,
694  const int16_t **chrVSrc,
695  int chrFilterSize, const int16_t **alpSrc,
696  uint8_t *dest, int dstW, int dstY)
697 {
698  x86_reg dummy=0;
699  x86_reg dstW_reg = dstW;
700  x86_reg uv_off = c->uv_offx2;
701 
703  /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
704  "psraw $3, %%mm3 \n\t"
705  "psraw $3, %%mm4 \n\t"
706  "psraw $3, %%mm1 \n\t"
707  "psraw $3, %%mm7 \n\t"
708  WRITEYUY2(%4, "%5", %%FF_REGa)
710 }
711 
712 #define REAL_YSCALEYUV2RGB_UV(index, c) \
713  "xor "#index", "#index" \n\t"\
714  ".p2align 4 \n\t"\
715  "1: \n\t"\
716  "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
717  "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
718  "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
719  "movq (%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
720  "movq (%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
721  "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
722  "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
723  "psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
724  "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\
725  "pmulhw %%mm0, %%mm2 \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
726  "pmulhw %%mm0, %%mm5 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
727  "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
728  "psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
729  "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
730  "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
731  "psubw "U_OFFSET"("#c"), %%mm3 \n\t" /* (U-128)8*/\
732  "psubw "V_OFFSET"("#c"), %%mm4 \n\t" /* (V-128)8*/\
733  "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
734  "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
735  "pmulhw "UG_COEFF"("#c"), %%mm3 \n\t"\
736  "pmulhw "VG_COEFF"("#c"), %%mm4 \n\t"\
737  /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
738 
739 #define REAL_YSCALEYUV2RGB_YA(index, c, b1, b2) \
740  "movq ("#b1", "#index", 2), %%mm0 \n\t" /*buf0[eax]*/\
741  "movq ("#b2", "#index", 2), %%mm1 \n\t" /*buf1[eax]*/\
742  "movq 8("#b1", "#index", 2), %%mm6 \n\t" /*buf0[eax]*/\
743  "movq 8("#b2", "#index", 2), %%mm7 \n\t" /*buf1[eax]*/\
744  "psubw %%mm1, %%mm0 \n\t" /* buf0[eax] - buf1[eax]*/\
745  "psubw %%mm7, %%mm6 \n\t" /* buf0[eax] - buf1[eax]*/\
746  "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
747  "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
748  "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
749  "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
750  "paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
751  "paddw %%mm6, %%mm7 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
752 
753 #define REAL_YSCALEYUV2RGB_COEFF(c) \
754  "pmulhw "UB_COEFF"("#c"), %%mm2 \n\t"\
755  "pmulhw "VR_COEFF"("#c"), %%mm5 \n\t"\
756  "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" /* 8(Y-16)*/\
757  "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" /* 8(Y-16)*/\
758  "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\
759  "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\
760  /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
761  "paddw %%mm3, %%mm4 \n\t"\
762  "movq %%mm2, %%mm0 \n\t"\
763  "movq %%mm5, %%mm6 \n\t"\
764  "movq %%mm4, %%mm3 \n\t"\
765  "punpcklwd %%mm2, %%mm2 \n\t"\
766  "punpcklwd %%mm5, %%mm5 \n\t"\
767  "punpcklwd %%mm4, %%mm4 \n\t"\
768  "paddw %%mm1, %%mm2 \n\t"\
769  "paddw %%mm1, %%mm5 \n\t"\
770  "paddw %%mm1, %%mm4 \n\t"\
771  "punpckhwd %%mm0, %%mm0 \n\t"\
772  "punpckhwd %%mm6, %%mm6 \n\t"\
773  "punpckhwd %%mm3, %%mm3 \n\t"\
774  "paddw %%mm7, %%mm0 \n\t"\
775  "paddw %%mm7, %%mm6 \n\t"\
776  "paddw %%mm7, %%mm3 \n\t"\
777  /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
778  "packuswb %%mm0, %%mm2 \n\t"\
779  "packuswb %%mm6, %%mm5 \n\t"\
780  "packuswb %%mm3, %%mm4 \n\t"\
781 
782 #define YSCALEYUV2RGB_YA(index, c, b1, b2) REAL_YSCALEYUV2RGB_YA(index, c, b1, b2)
783 
784 #define YSCALEYUV2RGB(index, c) \
785  REAL_YSCALEYUV2RGB_UV(index, c) \
786  REAL_YSCALEYUV2RGB_YA(index, c, %0, %1) \
787  REAL_YSCALEYUV2RGB_COEFF(c)
788 
789 /**
790  * vertical bilinear scale YV12 to RGB
791  */
792 static void RENAME(yuv2rgb32_2)(SwsContext *c, const int16_t *buf[2],
793  const int16_t *ubuf[2], const int16_t *vbuf[2],
794  const int16_t *abuf[2], uint8_t *dest,
795  int dstW, int yalpha, int uvalpha, int y)
796 {
797  const int16_t *buf0 = buf[0], *buf1 = buf[1],
798  *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
799 
800  if (CONFIG_SWSCALE_ALPHA && c->needAlpha) {
801  const int16_t *abuf0 = abuf[0], *abuf1 = abuf[1];
802 #if ARCH_X86_64
803  __asm__ volatile(
804  YSCALEYUV2RGB(%%r8, %5)
805  YSCALEYUV2RGB_YA(%%r8, %5, %6, %7)
806  "psraw $3, %%mm1 \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
807  "psraw $3, %%mm7 \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
808  "packuswb %%mm7, %%mm1 \n\t"
809  WRITEBGR32(%4, DSTW_OFFSET"(%5)", %%r8, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
810  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "r" (dest),
811  "a" (&c->redDither),
812  "r" (abuf0), "r" (abuf1)
813  : "%r8"
814  );
815 #else
816  c->u_temp=(intptr_t)abuf0;
817  c->v_temp=(intptr_t)abuf1;
818  __asm__ volatile(
819  "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
820  "mov %4, %%"FF_REG_b" \n\t"
821  "push %%"FF_REG_BP" \n\t"
822  YSCALEYUV2RGB(%%FF_REGBP, %5)
823  "push %0 \n\t"
824  "push %1 \n\t"
825  "mov "U_TEMP"(%5), %0 \n\t"
826  "mov "V_TEMP"(%5), %1 \n\t"
827  YSCALEYUV2RGB_YA(%%FF_REGBP, %5, %0, %1)
828  "psraw $3, %%mm1 \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
829  "psraw $3, %%mm7 \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
830  "packuswb %%mm7, %%mm1 \n\t"
831  "pop %1 \n\t"
832  "pop %0 \n\t"
833  WRITEBGR32(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
834  "pop %%"FF_REG_BP" \n\t"
835  "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
836  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
837  "a" (&c->redDither)
838  );
839 #endif
840  } else {
841  __asm__ volatile(
842  "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
843  "mov %4, %%"FF_REG_b" \n\t"
844  "push %%"FF_REG_BP" \n\t"
845  YSCALEYUV2RGB(%%FF_REGBP, %5)
846  "pcmpeqd %%mm7, %%mm7 \n\t"
847  WRITEBGR32(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
848  "pop %%"FF_REG_BP" \n\t"
849  "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
850  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
851  "a" (&c->redDither)
852  );
853  }
854 }
855 
856 static void RENAME(yuv2bgr24_2)(SwsContext *c, const int16_t *buf[2],
857  const int16_t *ubuf[2], const int16_t *vbuf[2],
858  const int16_t *abuf[2], uint8_t *dest,
859  int dstW, int yalpha, int uvalpha, int y)
860 {
861  const int16_t *buf0 = buf[0], *buf1 = buf[1],
862  *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
863 
864  __asm__ volatile(
865  "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
866  "mov %4, %%"FF_REG_b" \n\t"
867  "push %%"FF_REG_BP" \n\t"
868  YSCALEYUV2RGB(%%FF_REGBP, %5)
869  "pxor %%mm7, %%mm7 \n\t"
870  WRITEBGR24(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP)
871  "pop %%"FF_REG_BP" \n\t"
872  "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
873  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
874  "a" (&c->redDither)
875  NAMED_CONSTRAINTS_ADD(ff_M24A,ff_M24C,ff_M24B)
876  );
877 }
878 
879 static void RENAME(yuv2rgb555_2)(SwsContext *c, const int16_t *buf[2],
880  const int16_t *ubuf[2], const int16_t *vbuf[2],
881  const int16_t *abuf[2], uint8_t *dest,
882  int dstW, int yalpha, int uvalpha, int y)
883 {
884  const int16_t *buf0 = buf[0], *buf1 = buf[1],
885  *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
886 
887  __asm__ volatile(
888  "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
889  "mov %4, %%"FF_REG_b" \n\t"
890  "push %%"FF_REG_BP" \n\t"
891  YSCALEYUV2RGB(%%FF_REGBP, %5)
892  "pxor %%mm7, %%mm7 \n\t"
893  /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
894 #ifdef DITHER1XBPP
895  "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
896  "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
897  "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
898 #endif
899  WRITERGB15(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP)
900  "pop %%"FF_REG_BP" \n\t"
901  "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
902  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
903  "a" (&c->redDither)
905  );
906 }
907 
908 static void RENAME(yuv2rgb565_2)(SwsContext *c, const int16_t *buf[2],
909  const int16_t *ubuf[2], const int16_t *vbuf[2],
910  const int16_t *abuf[2], uint8_t *dest,
911  int dstW, int yalpha, int uvalpha, int y)
912 {
913  const int16_t *buf0 = buf[0], *buf1 = buf[1],
914  *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
915 
916  __asm__ volatile(
917  "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
918  "mov %4, %%"FF_REG_b" \n\t"
919  "push %%"FF_REG_BP" \n\t"
920  YSCALEYUV2RGB(%%FF_REGBP, %5)
921  "pxor %%mm7, %%mm7 \n\t"
922  /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
923 #ifdef DITHER1XBPP
924  "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
925  "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
926  "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
927 #endif
928  WRITERGB16(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP)
929  "pop %%"FF_REG_BP" \n\t"
930  "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
931  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
932  "a" (&c->redDither)
933  NAMED_CONSTRAINTS_ADD(bF8,bFC)
934  );
935 }
936 
937 #define REAL_YSCALEYUV2PACKED(index, c) \
938  "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\
939  "movq "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm1 \n\t"\
940  "psraw $3, %%mm0 \n\t"\
941  "psraw $3, %%mm1 \n\t"\
942  "movq %%mm0, "CHR_MMX_FILTER_OFFSET"+8("#c") \n\t"\
943  "movq %%mm1, "LUM_MMX_FILTER_OFFSET"+8("#c") \n\t"\
944  "xor "#index", "#index" \n\t"\
945  ".p2align 4 \n\t"\
946  "1: \n\t"\
947  "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
948  "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
949  "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
950  "movq (%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
951  "movq (%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
952  "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
953  "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
954  "psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
955  "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\
956  "pmulhw %%mm0, %%mm2 \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
957  "pmulhw %%mm0, %%mm5 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
958  "psraw $7, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
959  "psraw $7, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
960  "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
961  "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
962  "movq (%0, "#index", 2), %%mm0 \n\t" /*buf0[eax]*/\
963  "movq (%1, "#index", 2), %%mm1 \n\t" /*buf1[eax]*/\
964  "movq 8(%0, "#index", 2), %%mm6 \n\t" /*buf0[eax]*/\
965  "movq 8(%1, "#index", 2), %%mm7 \n\t" /*buf1[eax]*/\
966  "psubw %%mm1, %%mm0 \n\t" /* buf0[eax] - buf1[eax]*/\
967  "psubw %%mm7, %%mm6 \n\t" /* buf0[eax] - buf1[eax]*/\
968  "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
969  "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
970  "psraw $7, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
971  "psraw $7, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
972  "paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
973  "paddw %%mm6, %%mm7 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
974 
975 #define YSCALEYUV2PACKED(index, c) REAL_YSCALEYUV2PACKED(index, c)
976 
977 static void RENAME(yuv2yuyv422_2)(SwsContext *c, const int16_t *buf[2],
978  const int16_t *ubuf[2], const int16_t *vbuf[2],
979  const int16_t *abuf[2], uint8_t *dest,
980  int dstW, int yalpha, int uvalpha, int y)
981 {
982  const int16_t *buf0 = buf[0], *buf1 = buf[1],
983  *ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
984 
985  __asm__ volatile(
986  "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
987  "mov %4, %%"FF_REG_b" \n\t"
988  "push %%"FF_REG_BP" \n\t"
989  YSCALEYUV2PACKED(%%FF_REGBP, %5)
990  WRITEYUY2(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP)
991  "pop %%"FF_REG_BP" \n\t"
992  "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
993  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
994  "a" (&c->redDither)
995  );
996 }
997 
998 #define REAL_YSCALEYUV2RGB1(index, c) \
999  "xor "#index", "#index" \n\t"\
1000  ".p2align 4 \n\t"\
1001  "1: \n\t"\
1002  "movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\
1003  "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
1004  "movq (%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\
1005  "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
1006  "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
1007  "psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
1008  "psubw "U_OFFSET"("#c"), %%mm3 \n\t" /* (U-128)8*/\
1009  "psubw "V_OFFSET"("#c"), %%mm4 \n\t" /* (V-128)8*/\
1010  "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
1011  "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
1012  "pmulhw "UG_COEFF"("#c"), %%mm3 \n\t"\
1013  "pmulhw "VG_COEFF"("#c"), %%mm4 \n\t"\
1014  /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
1015  "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\
1016  "movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\
1017  "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
1018  "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
1019  "pmulhw "UB_COEFF"("#c"), %%mm2 \n\t"\
1020  "pmulhw "VR_COEFF"("#c"), %%mm5 \n\t"\
1021  "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" /* 8(Y-16)*/\
1022  "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" /* 8(Y-16)*/\
1023  "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\
1024  "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\
1025  /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
1026  "paddw %%mm3, %%mm4 \n\t"\
1027  "movq %%mm2, %%mm0 \n\t"\
1028  "movq %%mm5, %%mm6 \n\t"\
1029  "movq %%mm4, %%mm3 \n\t"\
1030  "punpcklwd %%mm2, %%mm2 \n\t"\
1031  "punpcklwd %%mm5, %%mm5 \n\t"\
1032  "punpcklwd %%mm4, %%mm4 \n\t"\
1033  "paddw %%mm1, %%mm2 \n\t"\
1034  "paddw %%mm1, %%mm5 \n\t"\
1035  "paddw %%mm1, %%mm4 \n\t"\
1036  "punpckhwd %%mm0, %%mm0 \n\t"\
1037  "punpckhwd %%mm6, %%mm6 \n\t"\
1038  "punpckhwd %%mm3, %%mm3 \n\t"\
1039  "paddw %%mm7, %%mm0 \n\t"\
1040  "paddw %%mm7, %%mm6 \n\t"\
1041  "paddw %%mm7, %%mm3 \n\t"\
1042  /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
1043  "packuswb %%mm0, %%mm2 \n\t"\
1044  "packuswb %%mm6, %%mm5 \n\t"\
1045  "packuswb %%mm3, %%mm4 \n\t"\
1046 
1047 #define YSCALEYUV2RGB1(index, c) REAL_YSCALEYUV2RGB1(index, c)
1048 
1049 // do vertical chrominance interpolation
1050 #define REAL_YSCALEYUV2RGB1b(index, c) \
1051  "xor "#index", "#index" \n\t"\
1052  ".p2align 4 \n\t"\
1053  "1: \n\t"\
1054  "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
1055  "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
1056  "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
1057  "movq (%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
1058  "movq (%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
1059  "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
1060  "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
1061  "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
1062  "psrlw $5, %%mm3 \n\t" /*FIXME might overflow*/\
1063  "psrlw $5, %%mm4 \n\t" /*FIXME might overflow*/\
1064  "psubw "U_OFFSET"("#c"), %%mm3 \n\t" /* (U-128)8*/\
1065  "psubw "V_OFFSET"("#c"), %%mm4 \n\t" /* (V-128)8*/\
1066  "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
1067  "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
1068  "pmulhw "UG_COEFF"("#c"), %%mm3 \n\t"\
1069  "pmulhw "VG_COEFF"("#c"), %%mm4 \n\t"\
1070  /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
1071  "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\
1072  "movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\
1073  "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
1074  "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
1075  "pmulhw "UB_COEFF"("#c"), %%mm2 \n\t"\
1076  "pmulhw "VR_COEFF"("#c"), %%mm5 \n\t"\
1077  "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" /* 8(Y-16)*/\
1078  "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" /* 8(Y-16)*/\
1079  "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\
1080  "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\
1081  /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
1082  "paddw %%mm3, %%mm4 \n\t"\
1083  "movq %%mm2, %%mm0 \n\t"\
1084  "movq %%mm5, %%mm6 \n\t"\
1085  "movq %%mm4, %%mm3 \n\t"\
1086  "punpcklwd %%mm2, %%mm2 \n\t"\
1087  "punpcklwd %%mm5, %%mm5 \n\t"\
1088  "punpcklwd %%mm4, %%mm4 \n\t"\
1089  "paddw %%mm1, %%mm2 \n\t"\
1090  "paddw %%mm1, %%mm5 \n\t"\
1091  "paddw %%mm1, %%mm4 \n\t"\
1092  "punpckhwd %%mm0, %%mm0 \n\t"\
1093  "punpckhwd %%mm6, %%mm6 \n\t"\
1094  "punpckhwd %%mm3, %%mm3 \n\t"\
1095  "paddw %%mm7, %%mm0 \n\t"\
1096  "paddw %%mm7, %%mm6 \n\t"\
1097  "paddw %%mm7, %%mm3 \n\t"\
1098  /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
1099  "packuswb %%mm0, %%mm2 \n\t"\
1100  "packuswb %%mm6, %%mm5 \n\t"\
1101  "packuswb %%mm3, %%mm4 \n\t"\
1102 
1103 #define YSCALEYUV2RGB1b(index, c) REAL_YSCALEYUV2RGB1b(index, c)
1104 
1105 #define REAL_YSCALEYUV2RGB1_ALPHA(index) \
1106  "movq (%1, "#index", 2), %%mm7 \n\t" /* abuf0[index ] */\
1107  "movq 8(%1, "#index", 2), %%mm1 \n\t" /* abuf0[index+4] */\
1108  "psraw $7, %%mm7 \n\t" /* abuf0[index ] >>7 */\
1109  "psraw $7, %%mm1 \n\t" /* abuf0[index+4] >>7 */\
1110  "packuswb %%mm1, %%mm7 \n\t"
1111 #define YSCALEYUV2RGB1_ALPHA(index) REAL_YSCALEYUV2RGB1_ALPHA(index)
1112 
1113 /**
1114  * YV12 to RGB without scaling or interpolating
1115  */
1116 static void RENAME(yuv2rgb32_1)(SwsContext *c, const int16_t *buf0,
1117  const int16_t *ubuf[2], const int16_t *vbuf[2],
1118  const int16_t *abuf0, uint8_t *dest,
1119  int dstW, int uvalpha, int y)
1120 {
1121  const int16_t *ubuf0 = ubuf[0];
1122  const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
1123 
1124  if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
1125  const int16_t *ubuf1 = ubuf[0];
1126  if (CONFIG_SWSCALE_ALPHA && c->needAlpha) {
1127  __asm__ volatile(
1128  "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
1129  "mov %4, %%"FF_REG_b" \n\t"
1130  "push %%"FF_REG_BP" \n\t"
1131  YSCALEYUV2RGB1(%%FF_REGBP, %5)
1132  YSCALEYUV2RGB1_ALPHA(%%FF_REGBP)
1133  WRITEBGR32(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1134  "pop %%"FF_REG_BP" \n\t"
1135  "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
1136  :: "c" (buf0), "d" (abuf0), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1137  "a" (&c->redDither)
1138  );
1139  } else {
1140  __asm__ volatile(
1141  "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
1142  "mov %4, %%"FF_REG_b" \n\t"
1143  "push %%"FF_REG_BP" \n\t"
1144  YSCALEYUV2RGB1(%%FF_REGBP, %5)
1145  "pcmpeqd %%mm7, %%mm7 \n\t"
1146  WRITEBGR32(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1147  "pop %%"FF_REG_BP" \n\t"
1148  "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
1149  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1150  "a" (&c->redDither)
1151  );
1152  }
1153  } else {
1154  const int16_t *ubuf1 = ubuf[1];
1155  if (CONFIG_SWSCALE_ALPHA && c->needAlpha) {
1156  __asm__ volatile(
1157  "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
1158  "mov %4, %%"FF_REG_b" \n\t"
1159  "push %%"FF_REG_BP" \n\t"
1160  YSCALEYUV2RGB1b(%%FF_REGBP, %5)
1161  YSCALEYUV2RGB1_ALPHA(%%FF_REGBP)
1162  WRITEBGR32(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1163  "pop %%"FF_REG_BP" \n\t"
1164  "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
1165  :: "c" (buf0), "d" (abuf0), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1166  "a" (&c->redDither)
1167  );
1168  } else {
1169  __asm__ volatile(
1170  "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
1171  "mov %4, %%"FF_REG_b" \n\t"
1172  "push %%"FF_REG_BP" \n\t"
1173  YSCALEYUV2RGB1b(%%FF_REGBP, %5)
1174  "pcmpeqd %%mm7, %%mm7 \n\t"
1175  WRITEBGR32(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1176  "pop %%"FF_REG_BP" \n\t"
1177  "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
1178  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1179  "a" (&c->redDither)
1180  );
1181  }
1182  }
1183 }
1184 
1185 static void RENAME(yuv2bgr24_1)(SwsContext *c, const int16_t *buf0,
1186  const int16_t *ubuf[2], const int16_t *vbuf[2],
1187  const int16_t *abuf0, uint8_t *dest,
1188  int dstW, int uvalpha, int y)
1189 {
1190  const int16_t *ubuf0 = ubuf[0];
1191  const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
1192 
1193  if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
1194  const int16_t *ubuf1 = ubuf[0];
1195  __asm__ volatile(
1196  "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
1197  "mov %4, %%"FF_REG_b" \n\t"
1198  "push %%"FF_REG_BP" \n\t"
1199  YSCALEYUV2RGB1(%%FF_REGBP, %5)
1200  "pxor %%mm7, %%mm7 \n\t"
1201  WRITEBGR24(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP)
1202  "pop %%"FF_REG_BP" \n\t"
1203  "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
1204  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1205  "a" (&c->redDither)
1206  NAMED_CONSTRAINTS_ADD(ff_M24A,ff_M24C,ff_M24B)
1207  );
1208  } else {
1209  const int16_t *ubuf1 = ubuf[1];
1210  __asm__ volatile(
1211  "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
1212  "mov %4, %%"FF_REG_b" \n\t"
1213  "push %%"FF_REG_BP" \n\t"
1214  YSCALEYUV2RGB1b(%%FF_REGBP, %5)
1215  "pxor %%mm7, %%mm7 \n\t"
1216  WRITEBGR24(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP)
1217  "pop %%"FF_REG_BP" \n\t"
1218  "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
1219  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1220  "a" (&c->redDither)
1221  NAMED_CONSTRAINTS_ADD(ff_M24A,ff_M24C,ff_M24B)
1222  );
1223  }
1224 }
1225 
1226 static void RENAME(yuv2rgb555_1)(SwsContext *c, const int16_t *buf0,
1227  const int16_t *ubuf[2], const int16_t *vbuf[2],
1228  const int16_t *abuf0, uint8_t *dest,
1229  int dstW, int uvalpha, int y)
1230 {
1231  const int16_t *ubuf0 = ubuf[0];
1232  const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
1233 
1234  if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
1235  const int16_t *ubuf1 = ubuf[0];
1236  __asm__ volatile(
1237  "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
1238  "mov %4, %%"FF_REG_b" \n\t"
1239  "push %%"FF_REG_BP" \n\t"
1240  YSCALEYUV2RGB1(%%FF_REGBP, %5)
1241  "pxor %%mm7, %%mm7 \n\t"
1242  /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1243 #ifdef DITHER1XBPP
1244  "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
1245  "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
1246  "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
1247 #endif
1248  WRITERGB15(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP)
1249  "pop %%"FF_REG_BP" \n\t"
1250  "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
1251  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1252  "a" (&c->redDither)
1254  );
1255  } else {
1256  const int16_t *ubuf1 = ubuf[1];
1257  __asm__ volatile(
1258  "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
1259  "mov %4, %%"FF_REG_b" \n\t"
1260  "push %%"FF_REG_BP" \n\t"
1261  YSCALEYUV2RGB1b(%%FF_REGBP, %5)
1262  "pxor %%mm7, %%mm7 \n\t"
1263  /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1264 #ifdef DITHER1XBPP
1265  "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
1266  "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
1267  "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
1268 #endif
1269  WRITERGB15(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP)
1270  "pop %%"FF_REG_BP" \n\t"
1271  "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
1272  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1273  "a" (&c->redDither)
1275  );
1276  }
1277 }
1278 
1279 static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0,
1280  const int16_t *ubuf[2], const int16_t *vbuf[2],
1281  const int16_t *abuf0, uint8_t *dest,
1282  int dstW, int uvalpha, int y)
1283 {
1284  const int16_t *ubuf0 = ubuf[0];
1285  const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
1286 
1287  if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
1288  const int16_t *ubuf1 = ubuf[0];
1289  __asm__ volatile(
1290  "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
1291  "mov %4, %%"FF_REG_b" \n\t"
1292  "push %%"FF_REG_BP" \n\t"
1293  YSCALEYUV2RGB1(%%FF_REGBP, %5)
1294  "pxor %%mm7, %%mm7 \n\t"
1295  /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1296 #ifdef DITHER1XBPP
1297  "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
1298  "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
1299  "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
1300 #endif
1301  WRITERGB16(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP)
1302  "pop %%"FF_REG_BP" \n\t"
1303  "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
1304  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1305  "a" (&c->redDither)
1306  NAMED_CONSTRAINTS_ADD(bF8,bFC)
1307  );
1308  } else {
1309  const int16_t *ubuf1 = ubuf[1];
1310  __asm__ volatile(
1311  "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
1312  "mov %4, %%"FF_REG_b" \n\t"
1313  "push %%"FF_REG_BP" \n\t"
1314  YSCALEYUV2RGB1b(%%FF_REGBP, %5)
1315  "pxor %%mm7, %%mm7 \n\t"
1316  /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1317 #ifdef DITHER1XBPP
1318  "paddusb "BLUE_DITHER"(%5), %%mm2 \n\t"
1319  "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
1320  "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
1321 #endif
1322  WRITERGB16(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP)
1323  "pop %%"FF_REG_BP" \n\t"
1324  "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
1325  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1326  "a" (&c->redDither)
1327  NAMED_CONSTRAINTS_ADD(bF8,bFC)
1328  );
1329  }
1330 }
1331 
1332 #define REAL_YSCALEYUV2PACKED1(index, c) \
1333  "xor "#index", "#index" \n\t"\
1334  ".p2align 4 \n\t"\
1335  "1: \n\t"\
1336  "movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\
1337  "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
1338  "movq (%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\
1339  "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
1340  "psraw $7, %%mm3 \n\t" \
1341  "psraw $7, %%mm4 \n\t" \
1342  "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\
1343  "movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\
1344  "psraw $7, %%mm1 \n\t" \
1345  "psraw $7, %%mm7 \n\t" \
1346 
1347 #define YSCALEYUV2PACKED1(index, c) REAL_YSCALEYUV2PACKED1(index, c)
1348 
1349 #define REAL_YSCALEYUV2PACKED1b(index, c) \
1350  "xor "#index", "#index" \n\t"\
1351  ".p2align 4 \n\t"\
1352  "1: \n\t"\
1353  "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
1354  "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
1355  "add "UV_OFF_BYTE"("#c"), "#index" \n\t" \
1356  "movq (%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
1357  "movq (%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
1358  "sub "UV_OFF_BYTE"("#c"), "#index" \n\t" \
1359  "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
1360  "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
1361  "psrlw $8, %%mm3 \n\t" \
1362  "psrlw $8, %%mm4 \n\t" \
1363  "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\
1364  "movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\
1365  "psraw $7, %%mm1 \n\t" \
1366  "psraw $7, %%mm7 \n\t"
1367 #define YSCALEYUV2PACKED1b(index, c) REAL_YSCALEYUV2PACKED1b(index, c)
1368 
1369 static void RENAME(yuv2yuyv422_1)(SwsContext *c, const int16_t *buf0,
1370  const int16_t *ubuf[2], const int16_t *vbuf[2],
1371  const int16_t *abuf0, uint8_t *dest,
1372  int dstW, int uvalpha, int y)
1373 {
1374  const int16_t *ubuf0 = ubuf[0];
1375  const int16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
1376 
1377  if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
1378  const int16_t *ubuf1 = ubuf[0];
1379  __asm__ volatile(
1380  "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
1381  "mov %4, %%"FF_REG_b" \n\t"
1382  "push %%"FF_REG_BP" \n\t"
1383  YSCALEYUV2PACKED1(%%FF_REGBP, %5)
1384  WRITEYUY2(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP)
1385  "pop %%"FF_REG_BP" \n\t"
1386  "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
1387  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1388  "a" (&c->redDither)
1389  );
1390  } else {
1391  const int16_t *ubuf1 = ubuf[1];
1392  __asm__ volatile(
1393  "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
1394  "mov %4, %%"FF_REG_b" \n\t"
1395  "push %%"FF_REG_BP" \n\t"
1396  YSCALEYUV2PACKED1b(%%FF_REGBP, %5)
1397  WRITEYUY2(%%FF_REGb, DSTW_OFFSET"(%5)", %%FF_REGBP)
1398  "pop %%"FF_REG_BP" \n\t"
1399  "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
1400  :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
1401  "a" (&c->redDither)
1402  );
1403  }
1404 }
1406 {
1407  enum AVPixelFormat dstFormat = c->dstFormat;
1408 
1409  c->use_mmx_vfilter= 0;
1410  if (!is16BPS(dstFormat) && !isNBPS(dstFormat) && !isSemiPlanarYUV(dstFormat)
1411  && dstFormat != AV_PIX_FMT_GRAYF32BE && dstFormat != AV_PIX_FMT_GRAYF32LE
1412  && !(c->flags & SWS_BITEXACT)) {
1413  if (c->flags & SWS_ACCURATE_RND) {
1414  if (!(c->flags & SWS_FULL_CHR_H_INT)) {
1415  switch (c->dstFormat) {
1416  case AV_PIX_FMT_RGB32: c->yuv2packedX = RENAME(yuv2rgb32_X_ar); break;
1417 #if HAVE_6REGS
1418  case AV_PIX_FMT_BGR24: c->yuv2packedX = RENAME(yuv2bgr24_X_ar); break;
1419 #endif
1420  case AV_PIX_FMT_RGB555: c->yuv2packedX = RENAME(yuv2rgb555_X_ar); break;
1421  case AV_PIX_FMT_RGB565: c->yuv2packedX = RENAME(yuv2rgb565_X_ar); break;
1422  case AV_PIX_FMT_YUYV422: c->yuv2packedX = RENAME(yuv2yuyv422_X_ar); break;
1423  default: break;
1424  }
1425  }
1426  } else {
1427  c->use_mmx_vfilter= 1;
1428  if (!(c->flags & SWS_FULL_CHR_H_INT)) {
1429  switch (c->dstFormat) {
1430  case AV_PIX_FMT_RGB32: c->yuv2packedX = RENAME(yuv2rgb32_X); break;
1431  case AV_PIX_FMT_BGR32: c->yuv2packedX = RENAME(yuv2bgr32_X); break;
1432 #if HAVE_6REGS
1433  case AV_PIX_FMT_BGR24: c->yuv2packedX = RENAME(yuv2bgr24_X); break;
1434 #endif
1435  case AV_PIX_FMT_RGB555: c->yuv2packedX = RENAME(yuv2rgb555_X); break;
1436  case AV_PIX_FMT_RGB565: c->yuv2packedX = RENAME(yuv2rgb565_X); break;
1437  case AV_PIX_FMT_YUYV422: c->yuv2packedX = RENAME(yuv2yuyv422_X); break;
1438  default: break;
1439  }
1440  }
1441  }
1442  if (!(c->flags & SWS_FULL_CHR_H_INT)) {
1443  switch (c->dstFormat) {
1444  case AV_PIX_FMT_RGB32:
1445  c->yuv2packed1 = RENAME(yuv2rgb32_1);
1446  c->yuv2packed2 = RENAME(yuv2rgb32_2);
1447  break;
1448  case AV_PIX_FMT_BGR24:
1449  c->yuv2packed1 = RENAME(yuv2bgr24_1);
1450  c->yuv2packed2 = RENAME(yuv2bgr24_2);
1451  break;
1452  case AV_PIX_FMT_RGB555:
1453  c->yuv2packed1 = RENAME(yuv2rgb555_1);
1454  c->yuv2packed2 = RENAME(yuv2rgb555_2);
1455  break;
1456  case AV_PIX_FMT_RGB565:
1457  c->yuv2packed1 = RENAME(yuv2rgb565_1);
1458  c->yuv2packed2 = RENAME(yuv2rgb565_2);
1459  break;
1460  case AV_PIX_FMT_YUYV422:
1461  c->yuv2packed1 = RENAME(yuv2yuyv422_1);
1462  c->yuv2packed2 = RENAME(yuv2yuyv422_2);
1463  break;
1464  default:
1465  break;
1466  }
1467  }
1468  }
1469 
1470  if (c->srcBpc == 8 && c->dstBpc <= 14) {
1471  // Use the new MMX scaler if the MMXEXT one can't be used (it is faster than the x86 ASM one).
1472  if (c->flags & SWS_FAST_BILINEAR && c->canMMXEXTBeUsed) {
1473  c->hyscale_fast = ff_hyscale_fast_mmxext;
1474  c->hcscale_fast = ff_hcscale_fast_mmxext;
1475  } else {
1476  c->hyscale_fast = NULL;
1477  c->hcscale_fast = NULL;
1478  }
1479  }
1480 }
WRITEBGR32
#define WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t)
Definition: swscale_template.c:251
AVPixelFormat
AVPixelFormat
Pixel format.
Definition: pixfmt.h:71
YSCALEYUV2PACKEDX_ACCURATE
#define YSCALEYUV2PACKEDX_ACCURATE
Definition: swscale_template.c:189
ALP_MMX_FILTER_OFFSET
#define ALP_MMX_FILTER_OFFSET
Definition: swscale_internal.h:493
YSCALEYUV2RGB1
#define YSCALEYUV2RGB1(index, c)
Definition: swscale_template.c:1047
YSCALEYUV2PACKEDX_YA
#define YSCALEYUV2PACKEDX_YA(offset, coeff, src1, src2, dst1, dst2)
Definition: swscale_template.c:61
AV_PIX_FMT_BGR32
#define AV_PIX_FMT_BGR32
Definition: pixfmt.h:453
NAMED_CONSTRAINTS_ADD
#define NAMED_CONSTRAINTS_ADD(...)
Definition: asm.h:145
YSCALEYUV2RGB
#define YSCALEYUV2RGB(index, c)
Definition: swscale_template.c:784
AV_PIX_FMT_BGR24
@ AV_PIX_FMT_BGR24
packed RGB 8:8:8, 24bpp, BGRBGR...
Definition: pixfmt.h:76
AV_PIX_FMT_GRAYF32LE
@ AV_PIX_FMT_GRAYF32LE
IEEE-754 single precision Y, 32bpp, little-endian.
Definition: pixfmt.h:364
SWS_FAST_BILINEAR
#define SWS_FAST_BILINEAR
Definition: swscale.h:65
is16BPS
static av_always_inline int is16BPS(enum AVPixelFormat pix_fmt)
Definition: swscale_internal.h:703
SWS_BITEXACT
#define SWS_BITEXACT
Definition: swscale.h:91
DSTW_OFFSET
#define DSTW_OFFSET
Definition: swscale_internal.h:487
dummy
int dummy
Definition: motion.c:66
isNBPS
static av_always_inline int isNBPS(enum AVPixelFormat pix_fmt)
Definition: swscale_internal.h:717
ff_hcscale_fast_mmxext
void ff_hcscale_fast_mmxext(SwsContext *c, int16_t *dst1, int16_t *dst2, int dstWidth, const uint8_t *src1, const uint8_t *src2, int srcW, int xInc)
av_cold
#define av_cold
Definition: attributes.h:90
sws_init_swscale
static av_cold void sws_init_swscale(SwsContext *c)
Definition: swscale.c:558
BLUE_DITHER
#define BLUE_DITHER
Definition: swscale_internal.h:476
YSCALEYUV2RGB1b
#define YSCALEYUV2RGB1b(index, c)
Definition: swscale_template.c:1103
WRITERGB15
#define WRITERGB15(dst, dstw, index)
Definition: swscale_template.c:447
WRITEBGR24
#define WRITEBGR24(dst, dstw, index)
Definition: swscale_template.c:599
isSemiPlanarYUV
static av_always_inline int isSemiPlanarYUV(enum AVPixelFormat pix_fmt)
Definition: swscale_internal.h:749
NULL
#define NULL
Definition: coverity.c:32
YSCALEYUV2PACKEDX
#define YSCALEYUV2PACKEDX
Definition: swscale_template.c:80
AV_PIX_FMT_YUYV422
@ AV_PIX_FMT_YUYV422
packed YUV 4:2:2, 16bpp, Y0 Cb Y1 Cr
Definition: pixfmt.h:74
U_TEMP
#define U_TEMP
Definition: swscale_internal.h:490
GREEN_DITHER
#define GREEN_DITHER
Definition: swscale_internal.h:475
YSCALEYUV2RGB1_ALPHA
#define YSCALEYUV2RGB1_ALPHA(index)
Definition: swscale_template.c:1111
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
SWS_FULL_CHR_H_INT
#define SWS_FULL_CHR_H_INT
Definition: swscale.h:86
asm.h
RED_DITHER
#define RED_DITHER
Definition: swscale_internal.h:474
AV_PIX_FMT_RGB32
#define AV_PIX_FMT_RGB32
Definition: pixfmt.h:451
SWS_ACCURATE_RND
#define SWS_ACCURATE_RND
Definition: swscale.h:90
YSCALEYUV2RGB_YA
#define YSCALEYUV2RGB_YA(index, c, b1, b2)
Definition: swscale_template.c:782
AV_PIX_FMT_RGB555
#define AV_PIX_FMT_RGB555
Definition: pixfmt.h:466
swscale_internal.h
YSCALEYUV2PACKED
#define YSCALEYUV2PACKED(index, c)
Definition: swscale_template.c:975
AV_PIX_FMT_RGB565
#define AV_PIX_FMT_RGB565
Definition: pixfmt.h:465
V_TEMP
#define V_TEMP
Definition: swscale_internal.h:491
__asm__
__asm__(".macro parse_r var r\n\t" "\\var = -1\n\t" _IFC_REG(0) _IFC_REG(1) _IFC_REG(2) _IFC_REG(3) _IFC_REG(4) _IFC_REG(5) _IFC_REG(6) _IFC_REG(7) _IFC_REG(8) _IFC_REG(9) _IFC_REG(10) _IFC_REG(11) _IFC_REG(12) _IFC_REG(13) _IFC_REG(14) _IFC_REG(15) _IFC_REG(16) _IFC_REG(17) _IFC_REG(18) _IFC_REG(19) _IFC_REG(20) _IFC_REG(21) _IFC_REG(22) _IFC_REG(23) _IFC_REG(24) _IFC_REG(25) _IFC_REG(26) _IFC_REG(27) _IFC_REG(28) _IFC_REG(29) _IFC_REG(30) _IFC_REG(31) ".iflt \\var\n\t" ".error \"Unable to parse register name \\r\"\n\t" ".endif\n\t" ".endm")
AV_PIX_FMT_GRAYF32BE
@ AV_PIX_FMT_GRAYF32BE
IEEE-754 single precision Y, 32bpp, big-endian.
Definition: pixfmt.h:363
WRITEYUY2
#define WRITEYUY2(dst, dstw, index)
Definition: swscale_template.c:668
Y_TEMP
#define Y_TEMP
Definition: swscale_internal.h:492
YSCALEYUV2PACKEDX_ACCURATE_YA
#define YSCALEYUV2PACKEDX_ACCURATE_YA(offset)
Definition: swscale_template.c:144
YSCALEYUV2RGBX
#define YSCALEYUV2RGBX
Definition: swscale_template.c:193
ff_hyscale_fast_mmxext
void ff_hyscale_fast_mmxext(SwsContext *c, int16_t *dst, int dstWidth, const uint8_t *src, int srcW, int xInc)
ESP_OFFSET
#define ESP_OFFSET
Definition: swscale_internal.h:488
x86_reg
int x86_reg
Definition: asm.h:72
YSCALEYUV2PACKEDX_END
#define YSCALEYUV2PACKEDX_END
Definition: swscale_template.c:84
WRITERGB16
#define WRITERGB16(dst, dstw, index)
Definition: swscale_template.c:370
YSCALEYUV2PACKED1
#define YSCALEYUV2PACKED1(index, c)
Definition: swscale_template.c:1347
SwsContext
Definition: swscale_internal.h:299
YSCALEYUV2PACKED1b
#define YSCALEYUV2PACKED1b(index, c)
Definition: swscale_template.c:1367
RENAME
#define RENAME(name)
Definition: ffv1dec.c:117