FFmpeg
swscale_unscaled.c
Go to the documentation of this file.
1 /*
2  * This file is part of FFmpeg.
3  *
4  * FFmpeg is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * FFmpeg is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with FFmpeg; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17  */
18 
19 #include "config.h"
20 #include "libswscale/swscale.h"
22 #include "libavutil/aarch64/cpu.h"
23 
24 #define YUV_TO_RGB_TABLE \
25  c->yuv2rgb_v2r_coeff, \
26  c->yuv2rgb_u2g_coeff, \
27  c->yuv2rgb_v2g_coeff, \
28  c->yuv2rgb_u2b_coeff, \
29 
30 #define DECLARE_FF_YUVX_TO_PACKED_RGB_FUNCS(ifmt, ofmt) \
31 int ff_##ifmt##_to_##ofmt##_neon(int w, int h, \
32  int y_offset, \
33  int y_coeff, \
34  const int16_t *table, \
35  const uint8_t *const src[], const int srcStride[], \
36  uint8_t *dst, int linesize); \
37  \
38 static int ifmt##_to_##ofmt##_neon_wrapper(SwsInternal *c, const uint8_t *const src[], \
39  const int srcStride[], int srcSliceY, \
40  int srcSliceH, uint8_t *const dst[], \
41  const int dstStride[]) { \
42  const int16_t yuv2rgb_table[] = { YUV_TO_RGB_TABLE }; \
43  \
44  return ff_##ifmt##_to_##ofmt##_neon(c->opts.src_w, srcSliceH, \
45  c->yuv2rgb_y_offset >> 6, \
46  c->yuv2rgb_y_coeff, \
47  yuv2rgb_table, \
48  src, srcStride, \
49  dst[0] + srcSliceY * dstStride[0], \
50  dstStride[0]); \
51 } \
52 
53 #define DECLARE_FF_YUVX_TO_PLANAR_RGB_FUNCS(ifmt, ofmt) \
54 int ff_##ifmt##_to_##ofmt##_neon(int w, int h, \
55  int y_offset, \
56  int y_coeff, \
57  const int16_t *table, \
58  const uint8_t *const src[], const int srcStride[], \
59  uint8_t *dst0, int linesize0, \
60  uint8_t *dst1, int linesize1, \
61  uint8_t *dst2, int linesize2); \
62  \
63 static int ifmt##_to_##ofmt##_neon_wrapper(SwsInternal *c, const uint8_t *const src[], \
64  const int srcStride[], int srcSliceY, \
65  int srcSliceH, uint8_t *const dst[], \
66  const int dstStride[]) { \
67  const int16_t yuv2rgb_table[] = { YUV_TO_RGB_TABLE }; \
68  \
69  return ff_##ifmt##_to_##ofmt##_neon(c->opts.src_w, srcSliceH, \
70  c->yuv2rgb_y_offset >> 6, \
71  c->yuv2rgb_y_coeff, \
72  yuv2rgb_table, \
73  src, srcStride, \
74  dst[0] + srcSliceY * dstStride[0], dstStride[0], \
75  dst[1] + srcSliceY * dstStride[1], dstStride[1], \
76  dst[2] + srcSliceY * dstStride[2], dstStride[2]); \
77 } \
78 
79 #define DECLARE_FF_YUVX_TO_ALL_RGBX_FUNCS(yuvx) \
80 DECLARE_FF_YUVX_TO_PACKED_RGB_FUNCS(yuvx, argb) \
81 DECLARE_FF_YUVX_TO_PACKED_RGB_FUNCS(yuvx, rgba) \
82 DECLARE_FF_YUVX_TO_PACKED_RGB_FUNCS(yuvx, abgr) \
83 DECLARE_FF_YUVX_TO_PACKED_RGB_FUNCS(yuvx, bgra) \
84 DECLARE_FF_YUVX_TO_PACKED_RGB_FUNCS(yuvx, rgb24) \
85 DECLARE_FF_YUVX_TO_PACKED_RGB_FUNCS(yuvx, bgr24) \
86 DECLARE_FF_YUVX_TO_PLANAR_RGB_FUNCS(yuvx, gbrp) \
87 
92 
93 #define DECLARE_FF_YUVX_TO_ALL_RGB16_FUNCS(yuvx) \
94 DECLARE_FF_YUVX_TO_PACKED_RGB_FUNCS(yuvx, rgb565le) \
95 DECLARE_FF_YUVX_TO_PACKED_RGB_FUNCS(yuvx, bgr565le) \
96 DECLARE_FF_YUVX_TO_PACKED_RGB_FUNCS(yuvx, rgb555le) \
97 DECLARE_FF_YUVX_TO_PACKED_RGB_FUNCS(yuvx, bgr555le) \
98 
103 
108 
109 void ff_nv24_to_yuv420p_chroma_neon(uint8_t *dst1, int dstStride1,
110  uint8_t *dst2, int dstStride2,
111  const uint8_t *src, int srcStride,
112  int w, int h);
113 
114 static int nv24_to_yuv420p_neon_wrapper(SwsInternal *c, const uint8_t *const src[],
115  const int srcStride[], int srcSliceY, int srcSliceH,
116  uint8_t *const dst[], const int dstStride[])
117 {
118  uint8_t *dst1 = dst[1] + dstStride[1] * srcSliceY / 2;
119  uint8_t *dst2 = dst[2] + dstStride[2] * srcSliceY / 2;
120 
121  ff_copyPlane(src[0], srcStride[0], srcSliceY, srcSliceH, c->opts.src_w,
122  dst[0], dstStride[0]);
123 
124  if (c->opts.src_format == AV_PIX_FMT_NV24)
125  ff_nv24_to_yuv420p_chroma_neon(dst1, dstStride[1], dst2, dstStride[2],
126  src[1], srcStride[1], c->opts.src_w / 2,
127  srcSliceH);
128  else
129  ff_nv24_to_yuv420p_chroma_neon(dst2, dstStride[2], dst1, dstStride[1],
130  src[1], srcStride[1], c->opts.src_w / 2,
131  srcSliceH);
132 
133  return srcSliceH;
134 }
135 
136 /* We need a 16 pixel width alignment. This constraint can easily be removed
137  * for input reading but for the output which is 4-bytes per pixel (RGBA) the
138  * assembly might be writing as much as 4*15=60 extra bytes at the end of the
139  * line, which won't fit the 32-bytes buffer alignment. */
140 #define SET_FF_YUVX_TO_RGBX_FUNC(ifmt, IFMT, ofmt, OFMT, accurate_rnd) do { \
141  if (c->opts.src_format == AV_PIX_FMT_##IFMT \
142  && c->opts.dst_format == AV_PIX_FMT_##OFMT \
143  && !(c->opts.src_h & 1) \
144  && !(c->opts.src_w & 15) \
145  && !accurate_rnd) \
146  c->convert_unscaled = ifmt##_to_##ofmt##_neon_wrapper; \
147 } while (0)
148 
149 #define SET_FF_YUVX_TO_ALL_RGBX_FUNC(yuvx, YUVX, accurate_rnd) do { \
150  SET_FF_YUVX_TO_RGBX_FUNC(yuvx, YUVX, argb, ARGB, accurate_rnd); \
151  SET_FF_YUVX_TO_RGBX_FUNC(yuvx, YUVX, rgba, RGBA, accurate_rnd); \
152  SET_FF_YUVX_TO_RGBX_FUNC(yuvx, YUVX, abgr, ABGR, accurate_rnd); \
153  SET_FF_YUVX_TO_RGBX_FUNC(yuvx, YUVX, bgra, BGRA, accurate_rnd); \
154  SET_FF_YUVX_TO_RGBX_FUNC(yuvx, YUVX, gbrp, GBRP, accurate_rnd); \
155  SET_FF_YUVX_TO_RGBX_FUNC(yuvx, YUVX, rgb24, RGB24, accurate_rnd); \
156  SET_FF_YUVX_TO_RGBX_FUNC(yuvx, YUVX, bgr24, BGR24, accurate_rnd); \
157 } while (0)
158 
159 #define SET_FF_YUVX_TO_ALL_RGB16_FUNC(yuvx, YUVX, accurate_rnd) do { \
160  SET_FF_YUVX_TO_RGBX_FUNC(yuvx, YUVX, rgb565le, RGB565LE, accurate_rnd); \
161  SET_FF_YUVX_TO_RGBX_FUNC(yuvx, YUVX, bgr565le, BGR565LE, accurate_rnd); \
162  SET_FF_YUVX_TO_RGBX_FUNC(yuvx, YUVX, rgb555le, RGB555LE, accurate_rnd); \
163  SET_FF_YUVX_TO_RGBX_FUNC(yuvx, YUVX, bgr555le, BGR555LE, accurate_rnd); \
164 } while (0)
165 
167  int accurate_rnd = c->opts.flags & SWS_ACCURATE_RND;
168 
169  SET_FF_YUVX_TO_ALL_RGBX_FUNC(nv12, NV12, accurate_rnd);
170  SET_FF_YUVX_TO_ALL_RGBX_FUNC(nv21, NV21, accurate_rnd);
171  SET_FF_YUVX_TO_ALL_RGBX_FUNC(yuv420p, YUV420P, accurate_rnd);
172  SET_FF_YUVX_TO_ALL_RGBX_FUNC(yuv422p, YUV422P, accurate_rnd);
173  SET_FF_YUVX_TO_ALL_RGB16_FUNC(nv12, NV12, accurate_rnd);
174  SET_FF_YUVX_TO_ALL_RGB16_FUNC(nv21, NV21, accurate_rnd);
175  SET_FF_YUVX_TO_ALL_RGB16_FUNC(yuv420p, YUV420P, accurate_rnd);
176  SET_FF_YUVX_TO_ALL_RGB16_FUNC(yuv422p, YUV422P, accurate_rnd);
177  SET_FF_YUVX_TO_RGBX_FUNC(yuva420p, YUVA420P, argb, ARGB, accurate_rnd);
178  SET_FF_YUVX_TO_RGBX_FUNC(yuva420p, YUVA420P, rgba, RGBA, accurate_rnd);
179  SET_FF_YUVX_TO_RGBX_FUNC(yuva420p, YUVA420P, abgr, ABGR, accurate_rnd);
180  SET_FF_YUVX_TO_RGBX_FUNC(yuva420p, YUVA420P, bgra, BGRA, accurate_rnd);
181  SET_FF_YUVX_TO_RGBX_FUNC(yuv420p, YUVA420P, rgb24, RGB24, accurate_rnd);
182  SET_FF_YUVX_TO_RGBX_FUNC(yuv420p, YUVA420P, bgr24, BGR24, accurate_rnd);
183  SET_FF_YUVX_TO_RGBX_FUNC(yuv420p, YUVA420P, gbrp, GBRP, accurate_rnd);
184  /* yuva420p -> 16bpp: alpha is dropped, route through yuv420p NEON path */
185  SET_FF_YUVX_TO_RGBX_FUNC(yuv420p, YUVA420P, rgb565le, RGB565LE, accurate_rnd);
186  SET_FF_YUVX_TO_RGBX_FUNC(yuv420p, YUVA420P, bgr565le, BGR565LE, accurate_rnd);
187  SET_FF_YUVX_TO_RGBX_FUNC(yuv420p, YUVA420P, rgb555le, RGB555LE, accurate_rnd);
188  SET_FF_YUVX_TO_RGBX_FUNC(yuv420p, YUVA420P, bgr555le, BGR555LE, accurate_rnd);
189 
190  if (c->opts.dst_format == AV_PIX_FMT_YUV420P &&
191  (c->opts.src_format == AV_PIX_FMT_NV24 || c->opts.src_format == AV_PIX_FMT_NV42) &&
192  !(c->opts.src_h & 1) && !(c->opts.src_w & 15) && !accurate_rnd)
193  c->convert_unscaled = nv24_to_yuv420p_neon_wrapper;
194 }
195 
197 {
198  int cpu_flags = av_get_cpu_flags();
199  if (have_neon(cpu_flags))
201 }
202 
204 {
205  int cpu_flags = av_get_cpu_flags();
206  if (!have_neon(cpu_flags) ||
207  (c->opts.src_h & 1) || (c->opts.src_w & 15) ||
208  (c->opts.flags & SWS_ACCURATE_RND))
209  return NULL;
210 
211  if (c->opts.src_format == AV_PIX_FMT_YUV420P) {
212  switch (c->opts.dst_format) {
213  case AV_PIX_FMT_ARGB: return yuv420p_to_argb_neon_wrapper;
214  case AV_PIX_FMT_RGBA: return yuv420p_to_rgba_neon_wrapper;
215  case AV_PIX_FMT_ABGR: return yuv420p_to_abgr_neon_wrapper;
216  case AV_PIX_FMT_BGRA: return yuv420p_to_bgra_neon_wrapper;
217  case AV_PIX_FMT_RGB24: return yuv420p_to_rgb24_neon_wrapper;
218  case AV_PIX_FMT_BGR24: return yuv420p_to_bgr24_neon_wrapper;
219  case AV_PIX_FMT_GBRP: return yuv420p_to_gbrp_neon_wrapper;
220  case AV_PIX_FMT_RGB565LE: return yuv420p_to_rgb565le_neon_wrapper;
221  case AV_PIX_FMT_BGR565LE: return yuv420p_to_bgr565le_neon_wrapper;
222  case AV_PIX_FMT_RGB555LE: return yuv420p_to_rgb555le_neon_wrapper;
223  case AV_PIX_FMT_BGR555LE: return yuv420p_to_bgr555le_neon_wrapper;
224  }
225  } else if (c->opts.src_format == AV_PIX_FMT_YUVA420P) {
226  switch (c->opts.dst_format) {
227 #if CONFIG_SWSCALE_ALPHA
228  case AV_PIX_FMT_ARGB: return yuva420p_to_argb_neon_wrapper;
229  case AV_PIX_FMT_RGBA: return yuva420p_to_rgba_neon_wrapper;
230  case AV_PIX_FMT_ABGR: return yuva420p_to_abgr_neon_wrapper;
231  case AV_PIX_FMT_BGRA: return yuva420p_to_bgra_neon_wrapper;
232 #endif
233  case AV_PIX_FMT_RGB24: return yuv420p_to_rgb24_neon_wrapper;
234  case AV_PIX_FMT_BGR24: return yuv420p_to_bgr24_neon_wrapper;
235  case AV_PIX_FMT_GBRP: return yuv420p_to_gbrp_neon_wrapper;
236  /* 16bpp targets drop alpha, share yuv420p path */
237  case AV_PIX_FMT_RGB565LE: return yuv420p_to_rgb565le_neon_wrapper;
238  case AV_PIX_FMT_BGR565LE: return yuv420p_to_bgr565le_neon_wrapper;
239  case AV_PIX_FMT_RGB555LE: return yuv420p_to_rgb555le_neon_wrapper;
240  case AV_PIX_FMT_BGR555LE: return yuv420p_to_bgr555le_neon_wrapper;
241  }
242  } else if (c->opts.src_format == AV_PIX_FMT_YUV422P) {
243  switch (c->opts.dst_format) {
244  case AV_PIX_FMT_ARGB: return yuv422p_to_argb_neon_wrapper;
245  case AV_PIX_FMT_RGBA: return yuv422p_to_rgba_neon_wrapper;
246  case AV_PIX_FMT_ABGR: return yuv422p_to_abgr_neon_wrapper;
247  case AV_PIX_FMT_BGRA: return yuv422p_to_bgra_neon_wrapper;
248  case AV_PIX_FMT_RGB24: return yuv422p_to_rgb24_neon_wrapper;
249  case AV_PIX_FMT_BGR24: return yuv422p_to_bgr24_neon_wrapper;
250  case AV_PIX_FMT_GBRP: return yuv422p_to_gbrp_neon_wrapper;
251  case AV_PIX_FMT_RGB565LE: return yuv422p_to_rgb565le_neon_wrapper;
252  case AV_PIX_FMT_BGR565LE: return yuv422p_to_bgr565le_neon_wrapper;
253  case AV_PIX_FMT_RGB555LE: return yuv422p_to_rgb555le_neon_wrapper;
254  case AV_PIX_FMT_BGR555LE: return yuv422p_to_bgr555le_neon_wrapper;
255  }
256  }
257  return NULL;
258 }
av_cold
#define av_cold
Definition: attributes.h:119
get_unscaled_swscale_neon
static void get_unscaled_swscale_neon(SwsInternal *c)
Definition: swscale_unscaled.c:166
SET_FF_YUVX_TO_RGBX_FUNC
#define SET_FF_YUVX_TO_RGBX_FUNC(ifmt, IFMT, ofmt, OFMT, accurate_rnd)
Definition: swscale_unscaled.c:140
DECLARE_FF_YUVX_TO_PACKED_RGB_FUNCS
#define DECLARE_FF_YUVX_TO_PACKED_RGB_FUNCS(ifmt, ofmt)
Definition: swscale_unscaled.c:30
AV_PIX_FMT_BGR24
@ AV_PIX_FMT_BGR24
packed RGB 8:8:8, 24bpp, BGRBGR...
Definition: pixfmt.h:76
AV_PIX_FMT_BGRA
@ AV_PIX_FMT_BGRA
packed BGRA 8:8:8:8, 32bpp, BGRABGRA...
Definition: pixfmt.h:102
DECLARE_FF_YUVX_TO_ALL_RGBX_FUNCS
#define DECLARE_FF_YUVX_TO_ALL_RGBX_FUNCS(yuvx)
Definition: swscale_unscaled.c:79
AV_PIX_FMT_YUVA420P
@ AV_PIX_FMT_YUVA420P
planar YUV 4:2:0, 20bpp, (1 Cr & Cb sample per 2x2 Y & A samples)
Definition: pixfmt.h:108
cpu.h
av_get_cpu_flags
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:109
AV_PIX_FMT_YUV420P
@ AV_PIX_FMT_YUV420P
planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
Definition: pixfmt.h:73
AV_PIX_FMT_RGBA
@ AV_PIX_FMT_RGBA
packed RGBA 8:8:8:8, 32bpp, RGBARGBA...
Definition: pixfmt.h:100
AV_PIX_FMT_RGB565LE
@ AV_PIX_FMT_RGB565LE
packed RGB 5:6:5, 16bpp, (msb) 5R 6G 5B(lsb), little-endian
Definition: pixfmt.h:113
NULL
#define NULL
Definition: coverity.c:32
ff_get_unscaled_swscale_aarch64
void ff_get_unscaled_swscale_aarch64(SwsInternal *c)
Definition: swscale_unscaled.c:196
AV_PIX_FMT_BGR565LE
@ AV_PIX_FMT_BGR565LE
packed BGR 5:6:5, 16bpp, (msb) 5B 6G 5R(lsb), little-endian
Definition: pixfmt.h:118
have_neon
#define have_neon(flags)
Definition: cpu.h:26
AV_PIX_FMT_ABGR
@ AV_PIX_FMT_ABGR
packed ABGR 8:8:8:8, 32bpp, ABGRABGR...
Definition: pixfmt.h:101
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
SET_FF_YUVX_TO_ALL_RGB16_FUNC
#define SET_FF_YUVX_TO_ALL_RGB16_FUNC(yuvx, YUVX, accurate_rnd)
Definition: swscale_unscaled.c:159
AV_PIX_FMT_RGB24
@ AV_PIX_FMT_RGB24
packed RGB 8:8:8, 24bpp, RGBRGB...
Definition: pixfmt.h:75
dst
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
Definition: dsp.h:87
DECLARE_FF_YUVX_TO_ALL_RGB16_FUNCS
#define DECLARE_FF_YUVX_TO_ALL_RGB16_FUNCS(yuvx)
Definition: swscale_unscaled.c:93
cpu_flags
CheckasmCpu cpu_flags
Definition: checkasm.c:84
AV_PIX_FMT_ARGB
@ AV_PIX_FMT_ARGB
packed ARGB 8:8:8:8, 32bpp, ARGBARGB...
Definition: pixfmt.h:99
ff_yuv2rgb_init_aarch64
av_cold SwsFunc ff_yuv2rgb_init_aarch64(SwsInternal *c)
Definition: swscale_unscaled.c:203
AV_PIX_FMT_RGB555LE
@ AV_PIX_FMT_RGB555LE
packed RGB 5:5:5, 16bpp, (msb)1X 5R 5G 5B(lsb), little-endian, X=unused/undefined
Definition: pixfmt.h:115
ff_copyPlane
void ff_copyPlane(const uint8_t *src, int srcStride, int srcSliceY, int srcSliceH, int width, uint8_t *dst, int dstStride)
Definition: swscale_unscaled.c:126
AV_PIX_FMT_NV24
@ AV_PIX_FMT_NV24
planar YUV 4:4:4, 24bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
Definition: pixfmt.h:371
swscale_internal.h
ff_nv24_to_yuv420p_chroma_neon
void ff_nv24_to_yuv420p_chroma_neon(uint8_t *dst1, int dstStride1, uint8_t *dst2, int dstStride2, const uint8_t *src, int srcStride, int w, int h)
AV_PIX_FMT_NV42
@ AV_PIX_FMT_NV42
as above, but U and V bytes are swapped
Definition: pixfmt.h:372
SET_FF_YUVX_TO_ALL_RGBX_FUNC
#define SET_FF_YUVX_TO_ALL_RGBX_FUNC(yuvx, YUVX, accurate_rnd)
Definition: swscale_unscaled.c:149
SwsInternal
Definition: swscale_internal.h:337
AV_PIX_FMT_GBRP
@ AV_PIX_FMT_GBRP
planar GBR 4:4:4 24bpp
Definition: pixfmt.h:165
AV_PIX_FMT_YUV422P
@ AV_PIX_FMT_YUV422P
planar YUV 4:2:2, 16bpp, (1 Cr & Cb sample per 2x1 Y samples)
Definition: pixfmt.h:77
w
uint8_t w
Definition: llvidencdsp.c:39
AV_PIX_FMT_BGR555LE
@ AV_PIX_FMT_BGR555LE
packed BGR 5:5:5, 16bpp, (msb)1X 5B 5G 5R(lsb), little-endian, X=unused/undefined
Definition: pixfmt.h:120
SwsFunc
int(* SwsFunc)(SwsInternal *c, const uint8_t *const src[], const int srcStride[], int srcSliceY, int srcSliceH, uint8_t *const dst[], const int dstStride[])
Definition: swscale_internal.h:99
RGBA
#define RGBA(r, g, b, a)
Definition: dvbsubdec.c:42
nv24_to_yuv420p_neon_wrapper
static int nv24_to_yuv420p_neon_wrapper(SwsInternal *c, const uint8_t *const src[], const int srcStride[], int srcSliceY, int srcSliceH, uint8_t *const dst[], const int dstStride[])
Definition: swscale_unscaled.c:114
SWS_ACCURATE_RND
@ SWS_ACCURATE_RND
Force bit-exact output.
Definition: swscale.h:179
h
h
Definition: vp9dsp_template.c:2070
src
#define src
Definition: vp8dsp.c:248
swscale.h