Go to the documentation of this file.
26 #define SIZEOF_MEMBER(type, member) \
27 sizeof(((type*)0)->member)
29 static_assert(offsetof(
SwsLuts, in) ==
SL_IN,
"struct layout mismatch");
33 "struct layout mismatch");
35 "struct layout mismatch");
40 "struct layout mismatch");
43 int dst_stride,
const uint8_t *
src,
44 int src_stride,
int w,
int h);
47 int dst_stride,
const uint8_t *
src,
48 int src_stride,
int w,
int h)
56 const int32_t *filterPos,
int filterSize);
59 const int32_t *filterPos,
int filterSize);
62 const int32_t *filterPos,
int filterSize);
65 const int32_t *filterPos,
int filterSize);
68 const int32_t *filterPos,
int filterSize);
71 const int32_t *filterPos,
int filterSize);
75 const int32_t *filterPos,
int filterSize)
78 int sh =
desc->comp[0].depth - 1;
91 const int32_t *filterPos,
int filterSize)
94 int sh =
desc->comp[0].depth - 1;
107 const int32_t *filterPos,
int filterSize)
110 int sh =
desc->comp[0].depth - 1;
122 const int32_t *filterPos,
int filterSize)
140 const int32_t *filterPos,
int filterSize)
158 const int32_t *filterPos,
int filterSize)
174 #define SCALE_FUNC(filter_n, from_bpc, to_bpc, opt) \
175 void ff_hscale ## from_bpc ## to ## to_bpc ## _ ## filter_n ## _ ## opt( \
176 SwsInternal *c, int16_t *data, \
177 int dstW, const uint8_t *src, \
178 const int16_t *filter, \
179 const int32_t *filterPos, int filterSize)
180 #define SCALE_FUNCS(filter_n, opt) \
181 SCALE_FUNC(filter_n, 8, 15, opt); \
182 SCALE_FUNC(filter_n, 8, 19, opt);
183 #define ALL_SCALE_FUNCS(opt) \
184 SCALE_FUNCS(4, opt); \
185 SCALE_FUNCS(X8, opt); \
191 const int16_t **
src, uint16_t *dest,
int dstW,
192 int big_endian,
int output_bits);
194 #define yuv2NBPS(bits, BE_LE, is_be, template_size, typeX_t) \
195 static void yuv2planeX_ ## bits ## BE_LE ## _neon(const int16_t *filter, int filterSize, \
196 const int16_t **src, uint8_t *dest, int dstW, \
197 const uint8_t *dither, int offset) \
199 ff_yuv2planeX_## template_size ## _neon(filter, \
200 filterSize, (const typeX_t **) src, \
201 (uint16_t *) dest, dstW, is_be, bits); \
214 const int16_t **
src, uint8_t *dest,
int dstW,
216 void ff_yuv2plane1_8_neon(
223 void ff_yuv2nv12cX_neon_asm(
int isSwapped,
const uint8_t *chrDither,
224 const int16_t *chrFilter,
int chrFilterSize,
225 const int16_t **chrUSrc,
const int16_t **chrVSrc,
226 uint8_t *dest,
int chrDstW);
228 static void ff_yuv2nv12cX_neon(
enum AVPixelFormat dstFormat,
const uint8_t *chrDither,
229 const int16_t *chrFilter,
int chrFilterSize,
230 const int16_t **chrUSrc,
const int16_t **chrVSrc,
231 uint8_t *dest,
int chrDstW)
234 ff_yuv2nv12cX_neon_asm(1, chrDither, chrFilter, chrFilterSize,
235 chrUSrc, chrVSrc, dest, chrDstW);
237 ff_yuv2nv12cX_neon_asm(0, chrDither, chrFilter, chrFilterSize,
238 chrUSrc, chrVSrc, dest, chrDstW);
242 #define ASSIGN_SCALE_FUNC2(hscalefn, filtersize, opt) do { \
243 if (c->srcBpc == 8) { \
244 if(c->dstBpc <= 14) { \
246 ff_hscale8to15_ ## filtersize ## _ ## opt; \
249 ff_hscale8to19_ ## filtersize ## _ ## opt; \
251 if (c->dstBpc <= 14) \
253 ff_hscale16to15_ ## filtersize ## _ ## opt; \
256 ff_hscale16to19_ ## filtersize ## _ ## opt; \
260 #define ASSIGN_SCALE_FUNC(hscalefn, filtersize, opt) do { \
261 if (filtersize == 4) \
262 ASSIGN_SCALE_FUNC2(hscalefn, 4, opt); \
263 else if (filtersize % 8 == 0) \
264 ASSIGN_SCALE_FUNC2(hscalefn, X8, opt); \
265 else if (filtersize % 4 == 0 && filtersize % 8 != 0) \
266 ASSIGN_SCALE_FUNC2(hscalefn, X4, opt); \
269 #define ASSIGN_VSCALE_FUNC(vscalefn, opt) \
270 switch (c->dstBpc) { \
271 case 8: vscalefn = ff_yuv2plane1_8_ ## opt; break; \
275 #define NEON_INPUT(name) \
276 void ff_##name##ToY_neon(uint8_t *dst, const uint8_t *src, const uint8_t *, \
277 const uint8_t *, int w, uint32_t *coeffs, void *); \
278 void ff_##name##ToUV_neon(uint8_t *, uint8_t *, const uint8_t *, \
279 const uint8_t *, const uint8_t *, int w, \
280 uint32_t *coeffs, void *); \
281 void ff_##name##ToUV_half_neon(uint8_t *, uint8_t *, const uint8_t *, \
282 const uint8_t *, const uint8_t *, int w, \
283 uint32_t *coeffs, void *)
284 #define NEON_INPUT_DOTPROD(name) \
285 void ff_##name##ToY_neon_dotprod(uint8_t *dst, const uint8_t *src, const uint8_t *, \
286 const uint8_t *, int w, uint32_t *coeffs, void *);
319 if (
c->dstBpc <= 14) {
320 if (
c->opts.src_range) {
328 if (
c->opts.src_range) {
344 if (!
isBE(
c->opts.src_format)) {
360 if (
c->dstBpc == 8) {
363 c->yuv2nv12cX = ff_yuv2nv12cX_neon;
367 if (
desc->comp[0].depth == 9) {
368 c->yuv2planeX =
isBE(dstFormat) ? yuv2planeX_9BE_neon : yuv2planeX_9LE_neon;
369 }
else if (
desc->comp[0].depth == 10) {
370 c->yuv2planeX =
isBE(dstFormat) ? yuv2planeX_10BE_neon : yuv2planeX_10LE_neon;
371 }
else if (
desc->comp[0].depth == 12) {
372 c->yuv2planeX =
isBE(dstFormat) ? yuv2planeX_12BE_neon : yuv2planeX_12LE_neon;
373 }
else if (
desc->comp[0].depth == 14) {
374 c->yuv2planeX =
isBE(dstFormat) ? yuv2planeX_14BE_neon : yuv2planeX_14LE_neon;
378 switch (
c->opts.src_format) {
380 c->lumToYV12 = ff_abgr32ToY_neon;
381 if (
c->chrSrcHSubSample)
382 c->chrToYV12 = ff_abgr32ToUV_half_neon;
384 c->chrToYV12 = ff_abgr32ToUV_neon;
388 c->lumToYV12 = ff_argb32ToY_neon;
389 if (
c->chrSrcHSubSample)
390 c->chrToYV12 = ff_argb32ToUV_half_neon;
392 c->chrToYV12 = ff_argb32ToUV_neon;
395 c->lumToYV12 = ff_bgr24ToY_neon;
396 if (
c->chrSrcHSubSample)
397 c->chrToYV12 = ff_bgr24ToUV_half_neon;
399 c->chrToYV12 = ff_bgr24ToUV_neon;
402 c->lumToYV12 = ff_bgra32ToY_neon;
405 c->lumToYV12 = ff_bgra32ToY_neon_dotprod;
408 if (
c->chrSrcHSubSample)
409 c->chrToYV12 = ff_bgra32ToUV_half_neon;
411 c->chrToYV12 = ff_bgra32ToUV_neon;
414 c->lumToYV12 = ff_rgb24ToY_neon;
415 if (
c->chrSrcHSubSample)
416 c->chrToYV12 = ff_rgb24ToUV_half_neon;
418 c->chrToYV12 = ff_rgb24ToUV_neon;
421 c->lumToYV12 = ff_rgba32ToY_neon;
424 c->lumToYV12 = ff_rgba32ToY_neon_dotprod;
427 if (
c->chrSrcHSubSample)
428 c->chrToYV12 = ff_rgba32ToUV_half_neon;
430 c->chrToYV12 = ff_rgba32ToUV_neon;
void ff_lumRangeFromJpeg16_neon(int16_t *dst, int width, uint32_t coeff, int64_t offset)
AVPixelFormat
Pixel format.
void ff_hscale16to19_X4_neon_asm(int shift, int16_t *_dst, int dstW, const uint8_t *_src, const int16_t *filter, const int32_t *filterPos, int filterSize)
av_cold void ff_sws_init_range_convert_aarch64(SwsInternal *c)
const AVPixFmtDescriptor * av_pix_fmt_desc_get(enum AVPixelFormat pix_fmt)
#define AV_PIX_FMT_FLAG_FLOAT
The pixel format contains IEEE-754 floating point values.
void(* filter)(uint8_t *src, int stride, int qscale)
@ AV_PIX_FMT_BGR24
packed RGB 8:8:8, 24bpp, BGRBGR...
@ AV_PIX_FMT_BGRA
packed BGRA 8:8:8:8, 32bpp, BGRABGRA...
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
static atomic_int cpu_flags
void ff_lumRangeToJpeg16_neon(int16_t *dst, int width, uint32_t coeff, int64_t offset)
uint8_t ptrdiff_t const uint8_t * _src
void ff_hscale16to15_X4_neon_asm(int shift, int16_t *_dst, int dstW, const uint8_t *_src, const int16_t *filter, const int32_t *filterPos, int filterSize)
static void ff_hscale16to15_4_neon(SwsInternal *c, int16_t *_dst, int dstW, const uint8_t *_src, const int16_t *filter, const int32_t *filterPos, int filterSize)
void ff_chrRangeFromJpeg8_neon(int16_t *dstU, int16_t *dstV, int width, uint32_t coeff, int64_t offset)
av_cold void ff_sws_init_xyzdsp_aarch64(SwsInternal *c)
static av_always_inline int isNBPS(enum AVPixelFormat pix_fmt)
void ff_hscale16to15_X8_neon_asm(int shift, int16_t *_dst, int dstW, const uint8_t *_src, const int16_t *filter, const int32_t *filterPos, int filterSize)
static void ff_hscale16to19_X4_neon(SwsInternal *c, int16_t *_dst, int dstW, const uint8_t *_src, const int16_t *filter, const int32_t *filterPos, int filterSize)
static const uint16_t dither[8][8]
void ff_lumRangeToJpeg8_neon(int16_t *dst, int width, uint32_t coeff, int64_t offset)
#define av_assert0(cond)
assert() equivalent, that is always enabled.
void ff_yuv2planeX_8_neon(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW, const uint8_t *dither, int offset)
@ AV_PIX_FMT_RGBA
packed RGBA 8:8:8:8, 32bpp, RGBARGBA...
static void ff_hscale16to15_X4_neon(SwsInternal *c, int16_t *_dst, int dstW, const uint8_t *_src, const int16_t *filter, const int32_t *filterPos, int filterSize)
static av_always_inline int isSemiPlanarYUV(enum AVPixelFormat pix_fmt)
void ff_hscale16to19_X8_neon_asm(int shift, int16_t *_dst, int dstW, const uint8_t *_src, const int16_t *filter, const int32_t *filterPos, int filterSize)
#define SIZEOF_MEMBER(type, member)
@ AV_PIX_FMT_ABGR
packed ABGR 8:8:8:8, 32bpp, ABGRABGR...
void ff_chrRangeToJpeg16_neon(int16_t *dstU, int16_t *dstV, int width, uint32_t coeff, int64_t offset)
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
static av_always_inline int isDataInHighBits(enum AVPixelFormat pix_fmt)
void ff_yuv2planeX_10_neon(const int16_t *filter, int filterSize, const int16_t **src, uint16_t *dest, int dstW, int big_endian, int output_bits)
static av_always_inline int isBE(enum AVPixelFormat pix_fmt)
@ AV_PIX_FMT_RGB24
packed RGB 8:8:8, 24bpp, RGBRGB...
void ff_lumRangeFromJpeg8_neon(int16_t *dst, int width, uint32_t coeff, int64_t offset)
static int shift(int a, int b)
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
static av_always_inline int isAnyRGB(enum AVPixelFormat pix_fmt)
static void ff_hscale16to19_4_neon(SwsInternal *c, int16_t *_dst, int dstW, const uint8_t *_src, const int16_t *filter, const int32_t *filterPos, int filterSize)
static av_always_inline int isSwappedChroma(enum AVPixelFormat pix_fmt)
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
void ff_hscale16to19_4_neon_asm(int shift, int16_t *_dst, int dstW, const uint8_t *_src, const int16_t *filter, const int32_t *filterPos, int filterSize)
@ AV_PIX_FMT_ARGB
packed ARGB 8:8:8:8, 32bpp, ARGBARGB...
#define ASSIGN_VSCALE_FUNC(vscalefn, opt)
#define yuv2NBPS(bits, BE_LE, is_be, template_size, typeX_t)
void ff_xyz12Torgb48le_neon_asm(const SwsColorXform *c, uint8_t *dst, int dst_stride, const uint8_t *src, int src_stride, int w, int h)
static void ff_hscale16to15_X8_neon(SwsInternal *c, int16_t *_dst, int dstW, const uint8_t *_src, const int16_t *filter, const int32_t *filterPos, int filterSize)
@ AV_PIX_FMT_PAL8
8 bits with AV_PIX_FMT_RGB32 palette
#define ASSIGN_SCALE_FUNC(hscalefn, filtersize, opt)
void ff_chrRangeFromJpeg16_neon(int16_t *dstU, int16_t *dstV, int width, uint32_t coeff, int64_t offset)
#define have_dotprod(flags)
void ff_hscale16to15_4_neon_asm(int shift, int16_t *_dst, int dstW, const uint8_t *_src, const int16_t *filter, const int32_t *filterPos, int filterSize)
Descriptor that unambiguously describes how the bits of a pixel are stored in the up to 4 data planes...
static void ff_hscale16to19_X8_neon(SwsInternal *c, int16_t *_dst, int dstW, const uint8_t *_src, const int16_t *filter, const int32_t *filterPos, int filterSize)
av_cold void ff_sws_init_swscale_aarch64(SwsInternal *c)
static const double coeff[2][5]
#define NEON_INPUT_DOTPROD(name)
#define ALL_SCALE_FUNCS(opt)
static void xyz12Torgb48le_neon(const SwsInternal *c, uint8_t *dst, int dst_stride, const uint8_t *src, int src_stride, int w, int h)
void ff_chrRangeToJpeg8_neon(int16_t *dstU, int16_t *dstV, int width, uint32_t coeff, int64_t offset)