FFmpeg
swscale_unscaled.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2013 Xiaolei Yu <dreifachstein@gmail.com>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "config.h"
22 #include "libswscale/swscale.h"
24 #include "libavutil/arm/cpu.h"
25 
26 #if HAVE_AS_DN_DIRECTIVE
27 extern void rgbx_to_nv12_neon_32(const uint8_t *src, uint8_t *y, uint8_t *chroma,
28  int width, int height,
29  int y_stride, int c_stride, int src_stride,
30  int32_t coeff_tbl[9]);
31 
32 extern void rgbx_to_nv12_neon_16(const uint8_t *src, uint8_t *y, uint8_t *chroma,
33  int width, int height,
34  int y_stride, int c_stride, int src_stride,
35  int32_t coeff_tbl[9]);
36 
37 static int rgbx_to_nv12_neon_32_wrapper(SwsContext *context, const uint8_t *src[],
38  int srcStride[], int srcSliceY, int srcSliceH,
39  uint8_t *dst[], int dstStride[]) {
40 
41  rgbx_to_nv12_neon_32(src[0] + srcSliceY * srcStride[0],
42  dst[0] + srcSliceY * dstStride[0],
43  dst[1] + (srcSliceY / 2) * dstStride[1],
44  context->srcW, srcSliceH,
45  dstStride[0], dstStride[1], srcStride[0],
46  context->input_rgb2yuv_table);
47 
48  return 0;
49 }
50 
51 static int rgbx_to_nv12_neon_16_wrapper(SwsContext *context, const uint8_t *src[],
52  int srcStride[], int srcSliceY, int srcSliceH,
53  uint8_t *dst[], int dstStride[]) {
54 
55  rgbx_to_nv12_neon_16(src[0] + srcSliceY * srcStride[0],
56  dst[0] + srcSliceY * dstStride[0],
57  dst[1] + (srcSliceY / 2) * dstStride[1],
58  context->srcW, srcSliceH,
59  dstStride[0], dstStride[1], srcStride[0],
60  context->input_rgb2yuv_table);
61 
62  return 0;
63 }
64 
65 #define YUV_TO_RGB_TABLE \
66  c->yuv2rgb_v2r_coeff, \
67  c->yuv2rgb_u2g_coeff, \
68  c->yuv2rgb_v2g_coeff, \
69  c->yuv2rgb_u2b_coeff, \
70 
71 #define DECLARE_FF_YUVX_TO_RGBX_FUNCS(ifmt, ofmt) \
72 int ff_##ifmt##_to_##ofmt##_neon(int w, int h, \
73  uint8_t *dst, int linesize, \
74  const uint8_t *srcY, int linesizeY, \
75  const uint8_t *srcU, int linesizeU, \
76  const uint8_t *srcV, int linesizeV, \
77  const int16_t *table, \
78  int y_offset, \
79  int y_coeff); \
80  \
81 static int ifmt##_to_##ofmt##_neon_wrapper(SwsContext *c, const uint8_t *src[], \
82  int srcStride[], int srcSliceY, int srcSliceH, \
83  uint8_t *dst[], int dstStride[]) { \
84  const int16_t yuv2rgb_table[] = { YUV_TO_RGB_TABLE }; \
85  \
86  ff_##ifmt##_to_##ofmt##_neon(c->srcW, srcSliceH, \
87  dst[0] + srcSliceY * dstStride[0], dstStride[0], \
88  src[0], srcStride[0], \
89  src[1], srcStride[1], \
90  src[2], srcStride[2], \
91  yuv2rgb_table, \
92  c->yuv2rgb_y_offset >> 6, \
93  c->yuv2rgb_y_coeff); \
94  \
95  return 0; \
96 } \
97 
98 #define DECLARE_FF_YUVX_TO_ALL_RGBX_FUNCS(yuvx) \
99 DECLARE_FF_YUVX_TO_RGBX_FUNCS(yuvx, argb) \
100 DECLARE_FF_YUVX_TO_RGBX_FUNCS(yuvx, rgba) \
101 DECLARE_FF_YUVX_TO_RGBX_FUNCS(yuvx, abgr) \
102 DECLARE_FF_YUVX_TO_RGBX_FUNCS(yuvx, bgra) \
103 
106 
107 #define DECLARE_FF_NVX_TO_RGBX_FUNCS(ifmt, ofmt) \
108 int ff_##ifmt##_to_##ofmt##_neon(int w, int h, \
109  uint8_t *dst, int linesize, \
110  const uint8_t *srcY, int linesizeY, \
111  const uint8_t *srcC, int linesizeC, \
112  const int16_t *table, \
113  int y_offset, \
114  int y_coeff); \
115  \
116 static int ifmt##_to_##ofmt##_neon_wrapper(SwsContext *c, const uint8_t *src[], \
117  int srcStride[], int srcSliceY, int srcSliceH, \
118  uint8_t *dst[], int dstStride[]) { \
119  const int16_t yuv2rgb_table[] = { YUV_TO_RGB_TABLE }; \
120  \
121  ff_##ifmt##_to_##ofmt##_neon(c->srcW, srcSliceH, \
122  dst[0] + srcSliceY * dstStride[0], dstStride[0], \
123  src[0], srcStride[0], src[1], srcStride[1], \
124  yuv2rgb_table, \
125  c->yuv2rgb_y_offset >> 6, \
126  c->yuv2rgb_y_coeff); \
127  \
128  return 0; \
129 } \
130 
131 #define DECLARE_FF_NVX_TO_ALL_RGBX_FUNCS(nvx) \
132 DECLARE_FF_NVX_TO_RGBX_FUNCS(nvx, argb) \
133 DECLARE_FF_NVX_TO_RGBX_FUNCS(nvx, rgba) \
134 DECLARE_FF_NVX_TO_RGBX_FUNCS(nvx, abgr) \
135 DECLARE_FF_NVX_TO_RGBX_FUNCS(nvx, bgra) \
136 
139 
140 /* We need a 16 pixel width alignment. This constraint can easily be removed
141  * for input reading but for the output which is 4-bytes per pixel (RGBA) the
142  * assembly might be writing as much as 4*15=60 extra bytes at the end of the
143  * line, which won't fit the 32-bytes buffer alignment. */
144 #define SET_FF_NVX_TO_RGBX_FUNC(ifmt, IFMT, ofmt, OFMT, accurate_rnd) do { \
145  if (c->srcFormat == AV_PIX_FMT_##IFMT \
146  && c->dstFormat == AV_PIX_FMT_##OFMT \
147  && !(c->srcH & 1) \
148  && !(c->srcW & 15) \
149  && !accurate_rnd) { \
150  c->convert_unscaled = ifmt##_to_##ofmt##_neon_wrapper; \
151  } \
152 } while (0)
153 
154 #define SET_FF_NVX_TO_ALL_RGBX_FUNC(nvx, NVX, accurate_rnd) do { \
155  SET_FF_NVX_TO_RGBX_FUNC(nvx, NVX, argb, ARGB, accurate_rnd); \
156  SET_FF_NVX_TO_RGBX_FUNC(nvx, NVX, rgba, RGBA, accurate_rnd); \
157  SET_FF_NVX_TO_RGBX_FUNC(nvx, NVX, abgr, ABGR, accurate_rnd); \
158  SET_FF_NVX_TO_RGBX_FUNC(nvx, NVX, bgra, BGRA, accurate_rnd); \
159 } while (0)
160 
161 static void get_unscaled_swscale_neon(SwsContext *c) {
162  int accurate_rnd = c->flags & SWS_ACCURATE_RND;
163  if (c->srcFormat == AV_PIX_FMT_RGBA
164  && c->dstFormat == AV_PIX_FMT_NV12
165  && (c->srcW >= 16)) {
166  c->convert_unscaled = accurate_rnd ? rgbx_to_nv12_neon_32_wrapper
167  : rgbx_to_nv12_neon_16_wrapper;
168  }
169 
170  SET_FF_NVX_TO_ALL_RGBX_FUNC(nv12, NV12, accurate_rnd);
171  SET_FF_NVX_TO_ALL_RGBX_FUNC(nv21, NV21, accurate_rnd);
172  SET_FF_NVX_TO_ALL_RGBX_FUNC(yuv420p, YUV420P, accurate_rnd);
173  SET_FF_NVX_TO_ALL_RGBX_FUNC(yuv422p, YUV422P, accurate_rnd);
174 }
175 
177 {
178  int cpu_flags = av_get_cpu_flags();
179  if (have_neon(cpu_flags))
181 }
182 #else
184 {
185 }
186 #endif
SET_FF_NVX_TO_ALL_RGBX_FUNC
#define SET_FF_NVX_TO_ALL_RGBX_FUNC(nvx, NVX, accurate_rnd)
Definition: swscale_unscaled.c:190
chroma
static av_always_inline void chroma(WaveformContext *s, AVFrame *in, AVFrame *out, int component, int intensity, int offset_y, int offset_x, int column, int mirror, int jobnr, int nb_jobs)
Definition: vf_waveform.c:1639
av_get_cpu_flags
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:107
cpu_flags
static atomic_int cpu_flags
Definition: cpu.c:56
DECLARE_FF_YUVX_TO_ALL_RGBX_FUNCS
#define DECLARE_FF_YUVX_TO_ALL_RGBX_FUNCS(yuvx)
Definition: swscale_unscaled.c:84
get_unscaled_swscale_neon
static void get_unscaled_swscale_neon(SwsContext *c)
Definition: swscale_unscaled.c:198
width
#define width
AV_PIX_FMT_RGBA
@ AV_PIX_FMT_RGBA
packed RGBA 8:8:8:8, 32bpp, RGBARGBA...
Definition: pixfmt.h:100
context
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf default minimum maximum flags name is the option keep it simple and lowercase description are in without and describe what they for example set the foo of the bar offset is the offset of the field in your context
Definition: writing_filters.txt:91
cpu.h
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
have_neon
#define have_neon(flags)
Definition: cpu.h:26
height
#define height
SWS_ACCURATE_RND
#define SWS_ACCURATE_RND
Definition: swscale.h:114
swscale_internal.h
AV_PIX_FMT_NV12
@ AV_PIX_FMT_NV12
planar YUV 4:2:0, 12bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
Definition: pixfmt.h:96
DECLARE_FF_NVX_TO_ALL_RGBX_FUNCS
#define DECLARE_FF_NVX_TO_ALL_RGBX_FUNCS(nvx)
Definition: swscale_unscaled.c:167
ff_get_unscaled_swscale_arm
void ff_get_unscaled_swscale_arm(SwsContext *c)
Definition: swscale_unscaled.c:183
src
INIT_CLIP pixel * src
Definition: h264pred_template.c:418
int32_t
int32_t
Definition: audioconvert.c:56
SwsContext
Definition: swscale_internal.h:299
swscale.h