FFmpeg
yuv2rgb.c
Go to the documentation of this file.
1 /*
2  * software YUV to RGB converter
3  *
4  * Copyright (C) 2001-2007 Michael Niedermayer
5  * Copyright (C) 2009-2010 Konstantin Shishkov
6  *
7  * MMX/MMXEXT template stuff (needed for fast movntq support),
8  * 1,4,8bpp support and context / deglobalize stuff
9  * by Michael Niedermayer (michaelni@gmx.at)
10  *
11  * This file is part of FFmpeg.
12  *
13  * FFmpeg is free software; you can redistribute it and/or
14  * modify it under the terms of the GNU Lesser General Public
15  * License as published by the Free Software Foundation; either
16  * version 2.1 of the License, or (at your option) any later version.
17  *
18  * FFmpeg is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21  * Lesser General Public License for more details.
22  *
23  * You should have received a copy of the GNU Lesser General Public
24  * License along with FFmpeg; if not, write to the Free Software
25  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26  */
27 
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <inttypes.h>
31 
32 #include "config.h"
33 #include "libswscale/rgb2rgb.h"
34 #include "libswscale/swscale.h"
36 #include "libavutil/attributes.h"
37 #include "libavutil/x86/asm.h"
38 #include "libavutil/x86/cpu.h"
39 #include "libavutil/cpu.h"
40 
41 #if HAVE_X86ASM
42 
43 #define YUV2RGB_LOOP(depth) \
44  h_size = (c->dstW + 7) & ~7; \
45  if (h_size * depth > FFABS(dstStride[0])) \
46  h_size -= 8; \
47  \
48  vshift = c->srcFormat != AV_PIX_FMT_YUV422P; \
49  \
50  for (y = 0; y < srcSliceH; y++) { \
51  uint8_t *image = dst[0] + (y + srcSliceY) * dstStride[0]; \
52  const uint8_t *py = src[0] + y * srcStride[0]; \
53  const uint8_t *pu = src[1] + (y >> vshift) * srcStride[1]; \
54  const uint8_t *pv = src[2] + (y >> vshift) * srcStride[2]; \
55  x86_reg index = -h_size / 2; \
56 
57 extern void ff_yuv_420_rgb24_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index,
58  const uint8_t *pv_index, const uint64_t *pointer_c_dither,
59  const uint8_t *py_2index);
60 extern void ff_yuv_420_bgr24_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index,
61  const uint8_t *pv_index, const uint64_t *pointer_c_dither,
62  const uint8_t *py_2index);
63 
64 extern void ff_yuv_420_rgb15_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index,
65  const uint8_t *pv_index, const uint64_t *pointer_c_dither,
66  const uint8_t *py_2index);
67 extern void ff_yuv_420_rgb16_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index,
68  const uint8_t *pv_index, const uint64_t *pointer_c_dither,
69  const uint8_t *py_2index);
70 extern void ff_yuv_420_rgb32_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index,
71  const uint8_t *pv_index, const uint64_t *pointer_c_dither,
72  const uint8_t *py_2index);
73 extern void ff_yuv_420_bgr32_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index,
74  const uint8_t *pv_index, const uint64_t *pointer_c_dither,
75  const uint8_t *py_2index);
76 extern void ff_yuva_420_rgb32_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index,
77  const uint8_t *pv_index, const uint64_t *pointer_c_dither,
78  const uint8_t *py_2index, const uint8_t *pa_2index);
79 extern void ff_yuva_420_bgr32_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index,
80  const uint8_t *pv_index, const uint64_t *pointer_c_dither,
81  const uint8_t *py_2index, const uint8_t *pa_2index);
82 #if ARCH_X86_64
83 extern void ff_yuv_420_gbrp24_ssse3(x86_reg index, uint8_t *image, uint8_t *dst_b, uint8_t *dst_r,
84  const uint8_t *pu_index, const uint8_t *pv_index,
85  const uint64_t *pointer_c_dither,
86  const uint8_t *py_2index);
87 #endif
88 
89 static inline int yuv420_rgb15_ssse3(SwsContext *c, const uint8_t *const src[],
90  const int srcStride[],
91  int srcSliceY, int srcSliceH,
92  uint8_t *const dst[], const int dstStride[])
93 {
94  int y, h_size, vshift;
95 
96  YUV2RGB_LOOP(2)
97 
98  c->blueDither = ff_dither8[y & 1];
99  c->greenDither = ff_dither8[y & 1];
100  c->redDither = ff_dither8[(y + 1) & 1];
101 
102  ff_yuv_420_rgb15_ssse3(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index);
103  }
104  return srcSliceH;
105 }
106 
107 static inline int yuv420_rgb16_ssse3(SwsContext *c, const uint8_t *const src[],
108  const int srcStride[],
109  int srcSliceY, int srcSliceH,
110  uint8_t *const dst[], const int dstStride[])
111 {
112  int y, h_size, vshift;
113 
114  YUV2RGB_LOOP(2)
115 
116  c->blueDither = ff_dither8[y & 1];
117  c->greenDither = ff_dither4[y & 1];
118  c->redDither = ff_dither8[(y + 1) & 1];
119 
120  ff_yuv_420_rgb16_ssse3(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index);
121  }
122  return srcSliceH;
123 }
124 
125 static inline int yuv420_rgb32_ssse3(SwsContext *c, const uint8_t *const src[],
126  const int srcStride[],
127  int srcSliceY, int srcSliceH,
128  uint8_t *const dst[], const int dstStride[])
129 {
130  int y, h_size, vshift;
131 
132  YUV2RGB_LOOP(4)
133 
134  ff_yuv_420_rgb32_ssse3(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index);
135  }
136  return srcSliceH;
137 }
138 
139 static inline int yuv420_bgr32_ssse3(SwsContext *c, const uint8_t *const src[],
140  const int srcStride[],
141  int srcSliceY, int srcSliceH,
142  uint8_t *const dst[], const int dstStride[])
143 {
144  int y, h_size, vshift;
145 
146  YUV2RGB_LOOP(4)
147 
148  ff_yuv_420_bgr32_ssse3(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index);
149  }
150  return srcSliceH;
151 }
152 
153 static inline int yuva420_rgb32_ssse3(SwsContext *c, const uint8_t *const src[],
154  const int srcStride[],
155  int srcSliceY, int srcSliceH,
156  uint8_t *const dst[], const int dstStride[])
157 {
158  int y, h_size, vshift;
159  YUV2RGB_LOOP(4)
160 
161  const uint8_t *pa = src[3] + y * srcStride[3];
162  ff_yuva_420_rgb32_ssse3(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index, pa - 2 * index);
163  }
164  return srcSliceH;
165 }
166 
167 static inline int yuva420_bgr32_ssse3(SwsContext *c, const uint8_t *const src[],
168  const int srcStride[],
169  int srcSliceY, int srcSliceH,
170  uint8_t *const dst[], const int dstStride[])
171 {
172  int y, h_size, vshift;
173 
174  YUV2RGB_LOOP(4)
175 
176  const uint8_t *pa = src[3] + y * srcStride[3];
177  ff_yuva_420_bgr32_ssse3(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index, pa - 2 * index);
178  }
179  return srcSliceH;
180 }
181 
182 static inline int yuv420_rgb24_ssse3(SwsContext *c, const uint8_t *const src[],
183  const int srcStride[],
184  int srcSliceY, int srcSliceH,
185  uint8_t *const dst[], const int dstStride[])
186 {
187  int y, h_size, vshift;
188 
189  YUV2RGB_LOOP(3)
190 
191  ff_yuv_420_rgb24_ssse3(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index);
192  }
193  return srcSliceH;
194 }
195 
196 static inline int yuv420_bgr24_ssse3(SwsContext *c, const uint8_t *const src[],
197  const int srcStride[],
198  int srcSliceY, int srcSliceH,
199  uint8_t *const dst[], const int dstStride[])
200 {
201  int y, h_size, vshift;
202 
203  YUV2RGB_LOOP(3)
204 
205  ff_yuv_420_bgr24_ssse3(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index);
206  }
207  return srcSliceH;
208 }
209 
210 #if ARCH_X86_64
211 static inline int yuv420_gbrp_ssse3(SwsContext *c, const uint8_t *const src[],
212  const int srcStride[],
213  int srcSliceY, int srcSliceH,
214  uint8_t *const dst[], const int dstStride[])
215 {
216  int y, h_size, vshift;
217 
218  h_size = (c->dstW + 7) & ~7;
219  if (h_size * 3 > FFABS(dstStride[0]))
220  h_size -= 8;
221 
222  vshift = c->srcFormat != AV_PIX_FMT_YUV422P;
223 
224  for (y = 0; y < srcSliceH; y++) {
225  uint8_t *dst_g = dst[0] + (y + srcSliceY) * dstStride[0];
226  uint8_t *dst_b = dst[1] + (y + srcSliceY) * dstStride[1];
227  uint8_t *dst_r = dst[2] + (y + srcSliceY) * dstStride[2];
228  const uint8_t *py = src[0] + y * srcStride[0];
229  const uint8_t *pu = src[1] + (y >> vshift) * srcStride[1];
230  const uint8_t *pv = src[2] + (y >> vshift) * srcStride[2];
231  x86_reg index = -h_size / 2;
232 
233  ff_yuv_420_gbrp24_ssse3(index, dst_g, dst_b, dst_r, pu - index, pv - index, &(c->redDither), py - 2 * index);
234  }
235  return srcSliceH;
236 }
237 #endif
238 
239 #endif /* HAVE_X86ASM */
240 
242 {
243 #if HAVE_X86ASM
244  int cpu_flags = av_get_cpu_flags();
245 
246  if (EXTERNAL_SSSE3(cpu_flags)) {
247  switch (c->dstFormat) {
248  case AV_PIX_FMT_RGB32:
249  if (c->srcFormat == AV_PIX_FMT_YUVA420P) {
250 #if CONFIG_SWSCALE_ALPHA
251  return yuva420_rgb32_ssse3;
252 #endif
253  break;
254  } else
255  return yuv420_rgb32_ssse3;
256  case AV_PIX_FMT_BGR32:
257  if (c->srcFormat == AV_PIX_FMT_YUVA420P) {
258 #if CONFIG_SWSCALE_ALPHA
259  return yuva420_bgr32_ssse3;
260 #endif
261  break;
262  } else
263  return yuv420_bgr32_ssse3;
264  case AV_PIX_FMT_RGB24:
265  return yuv420_rgb24_ssse3;
266  case AV_PIX_FMT_BGR24:
267  return yuv420_bgr24_ssse3;
268  case AV_PIX_FMT_RGB565:
269  return yuv420_rgb16_ssse3;
270  case AV_PIX_FMT_RGB555:
271  return yuv420_rgb15_ssse3;
272 #if ARCH_X86_64
273  case AV_PIX_FMT_GBRP:
274  return yuv420_gbrp_ssse3;
275 #endif
276  }
277  }
278 
279 #endif /* HAVE_X86ASM */
280  return NULL;
281 }
cpu.h
AV_PIX_FMT_BGR32
#define AV_PIX_FMT_BGR32
Definition: pixfmt.h:462
x86_reg
int x86_reg
Definition: asm.h:72
AV_PIX_FMT_BGR24
@ AV_PIX_FMT_BGR24
packed RGB 8:8:8, 24bpp, BGRBGR...
Definition: pixfmt.h:76
av_get_cpu_flags
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:107
cpu_flags
static atomic_int cpu_flags
Definition: cpu.c:56
return
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several the filter must be ready for frames arriving randomly on any input any filter with several inputs will most likely require some kind of queuing mechanism It is perfectly acceptable to have a limited queue and to drop frames when the inputs are too unbalanced request_frame For filters that do not use the this method is called when a frame is wanted on an output For a it should directly call filter_frame on the corresponding output For a if there are queued frames already one of these frames should be pushed If the filter should request a frame on one of its repeatedly until at least one frame has been pushed Return or at least make progress towards producing a it should return
Definition: filter_design.txt:264
ff_yuv2rgb_init_x86
av_cold SwsFunc ff_yuv2rgb_init_x86(SwsContext *c)
Definition: yuv2rgb.c:241
ff_dither4
const uint64_t ff_dither4[2]
Definition: swscale.c:33
av_cold
#define av_cold
Definition: attributes.h:90
AV_PIX_FMT_YUVA420P
@ AV_PIX_FMT_YUVA420P
planar YUV 4:2:0, 20bpp, (1 Cr & Cb sample per 2x2 Y & A samples)
Definition: pixfmt.h:108
FFABS
#define FFABS(a)
Absolute value, Note, INT_MIN / INT64_MIN result in undefined behavior as they are not representable ...
Definition: common.h:74
NULL
#define NULL
Definition: coverity.c:32
asm.h
SwsFunc
int(* SwsFunc)(SwsContext *c, const uint8_t *const src[], const int srcStride[], int srcSliceY, int srcSliceH, uint8_t *const dst[], const int dstStride[])
Definition: swscale_internal.h:99
index
int index
Definition: gxfenc.c:90
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
AV_PIX_FMT_RGB24
@ AV_PIX_FMT_RGB24
packed RGB 8:8:8, 24bpp, RGBRGB...
Definition: pixfmt.h:75
dst
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
Definition: dsp.h:83
cpu.h
AV_PIX_FMT_RGB32
#define AV_PIX_FMT_RGB32
Definition: pixfmt.h:460
attributes.h
AV_PIX_FMT_RGB555
#define AV_PIX_FMT_RGB555
Definition: pixfmt.h:475
swscale_internal.h
AV_PIX_FMT_RGB565
#define AV_PIX_FMT_RGB565
Definition: pixfmt.h:474
ff_dither8
const uint64_t ff_dither8[2]
Definition: swscale.c:37
AV_PIX_FMT_GBRP
@ AV_PIX_FMT_GBRP
planar GBR 4:4:4 24bpp
Definition: pixfmt.h:165
AV_PIX_FMT_YUV422P
@ AV_PIX_FMT_YUV422P
planar YUV 4:2:2, 16bpp, (1 Cr & Cb sample per 2x1 Y samples)
Definition: pixfmt.h:77
EXTERNAL_SSSE3
#define EXTERNAL_SSSE3(flags)
Definition: cpu.h:65
SwsContext
Definition: swscale_internal.h:324
rgb2rgb.h
src
#define src
Definition: vp8dsp.c:248
swscale.h