FFmpeg
ops.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2026 Ramiro Polla
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "../ops_chain.h"
22 
23 #include "libavutil/avassert.h"
24 #include "libavutil/avstring.h"
25 #include "libavutil/tree.h"
26 
27 #include "ops_lookup.h"
28 
29 #include "ops_impl_conv.c"
30 
31 /*********************************************************************/
32 typedef struct SwsAArch64BackendContext {
36 
37 /*********************************************************************/
39  const SwsOp *op, SwsImplResult *res)
40 {
41  /**
42  * Compute number of full vector registers needed to pack all non-zero
43  * coefficients.
44  */
45  const int num_vregs = linear_num_vregs(p);
46  av_assert0(num_vregs <= 4);
47  float *coeffs = av_malloc(num_vregs * 4 * sizeof(float));
48  if (!coeffs)
49  return AVERROR(ENOMEM);
50 
51  /**
52  * Copy non-zero coefficients, reordered to match SwsAArch64LinearOpMask.
53  * The coefficients are packed in sequential order. The same order must
54  * be followed in asmgen_op_linear().
55  */
56  int i_coeff = 0;
57  LOOP_LINEAR_MASK(p, i, j) {
58  const int jj = linear_index_to_sws_op(j);
59  coeffs[i_coeff++] = (float) op->lin.m[i][jj].num / op->lin.m[i][jj].den;
60  }
61 
62  res->priv.ptr = coeffs;
63  res->free = ff_op_priv_free;
64 
65  return 0;
66 }
67 
68 /*********************************************************************/
70  const SwsOp *op, SwsImplResult *res)
71 {
72  /**
73  * The input dither matrix is (1 << size_log2)² pixels large. It is
74  * periodic, so the x and y offsets should be masked to fit inside
75  * (1 << size_log2).
76  * The width of the matrix is assumed to be at least 8, which matches
77  * the maximum block_size for aarch64 asmgen when f32 operations
78  * (i.e., dithering) are used. This guarantees that the x offset is
79  * aligned and that reading block_size elements does not extend past
80  * the end of the row. The x offset doesn't change between components,
81  * so it is only required to be masked once.
82  * The y offset, on the other hand, may change per component, and
83  * would therefore need to be masked for every y_offset value. To
84  * simplify the execution, we over-allocate the number of rows of
85  * the output dither matrix by the largest y_offset value. This way,
86  * we only need to mask y offset once, and can safely increment the
87  * dither matrix pointer by fixed offsets for every y_offset change.
88  */
89 
90  /* Find the largest y_offset value. */
91  const int size = 1 << op->dither.size_log2;
92  const int8_t *off = op->dither.y_offset;
93  int max_offset = 0;
94  for (int i = 0; i < 4; i++) {
95  if (off[i] >= 0)
96  max_offset = FFMAX(max_offset, off[i] & (size - 1));
97  }
98 
99  /* Allocate (size + max_offset) rows to allow over-reading the matrix. */
100  const int stride = size * sizeof(float);
101  const int num_rows = size + max_offset;
102  float *matrix = av_malloc(num_rows * stride);
103  if (!matrix)
104  return AVERROR(ENOMEM);
105 
106  for (int i = 0; i < size * size; i++)
107  matrix[i] = (float) op->dither.matrix[i].num / op->dither.matrix[i].den;
108 
109  memcpy(&matrix[size * size], matrix, max_offset * stride);
110 
111  res->priv.ptr = matrix;
112  res->free = ff_op_priv_free;
113 
114  return 0;
115 }
116 
117 /*********************************************************************/
118 static int aarch64_setup(SwsOpList *ops, int block_size, int n,
120 {
121  SwsOp *op = &ops->ops[n];
122  switch (op->op) {
123  case SWS_OP_READ:
124  /* Negative shift values to perform right shift using ushl. */
125  if (op->rw.frac == 3) {
126  out->priv = (SwsOpPriv) {
127  .u8 = {
128  -7, -6, -5, -4, -3, -2, -1, 0,
129  -7, -6, -5, -4, -3, -2, -1, 0,
130  }
131  };
132  }
133  break;
134  case SWS_OP_WRITE:
135  /* Shift values for ushl. */
136  if (op->rw.frac == 3) {
137  out->priv = (SwsOpPriv) {
138  .u8 = {
139  7, 6, 5, 4, 3, 2, 1, 0,
140  7, 6, 5, 4, 3, 2, 1, 0,
141  }
142  };
143  }
144  break;
145  case SWS_OP_CLEAR:
146  ff_sws_setup_clear(&(const SwsImplParams) { .op = op }, out);
147  break;
148  case SWS_OP_MIN:
149  case SWS_OP_MAX:
150  ff_sws_setup_clamp(&(const SwsImplParams) { .op = op }, out);
151  break;
152  case SWS_OP_SCALE:
153  ff_sws_setup_scale(&(const SwsImplParams) { .op = op }, out);
154  break;
155  case SWS_OP_LINEAR:
156  return aarch64_setup_linear(p, op, out);
157  case SWS_OP_DITHER:
158  return aarch64_setup_dither(p, op, out);
159  }
160  return 0;
161 }
162 
163 /*********************************************************************/
165 {
166  /* Currently, no optimization is performed. This is just a placeholder. */
167 
168  /* Use at most two full vregs during the widest precision section */
169  bctx->block_size = (ff_sws_op_list_max_size(ops) == 4) ? 8 : 16;
170 
171  return 0;
172 }
173 
174 /*********************************************************************/
176 {
178  int ret;
179 
180  const int cpu_flags = av_get_cpu_flags();
181  if (!(cpu_flags & AV_CPU_FLAG_NEON))
182  return AVERROR(ENOTSUP);
183 
184  /* Make on-stack copy of `ops` to iterate over */
185  SwsOpList rest = *ops;
186  bctx.sws = ctx;
187  ret = aarch64_optimize(&bctx, &rest);
188  if (ret < 0)
189  return ret;
190 
192  if (!chain)
193  return AVERROR(ENOMEM);
194  chain->cpu_flags = AV_CPU_FLAG_NEON;
195 
196  *out = (SwsCompiledOp) {
197  .priv = chain,
198  .slice_align = 1,
200  .block_size = bctx.block_size,
201  };
202 
203  /* Look up kernel functions. */
204  for (int i = 0; i < rest.num_ops; i++) {
205  SwsAArch64OpImplParams params = { 0 };
206  ret = convert_to_aarch64_impl(ctx, &rest, i, bctx.block_size, &params);
207  if (ret < 0)
208  goto error;
210  if (!func) {
211  ret = AVERROR(ENOTSUP);
212  goto error;
213  }
214  SwsImplResult res = { 0 };
215  ret = aarch64_setup(&rest, bctx.block_size, i, &params, &res);
216  if (ret < 0)
217  goto error;
218  ret = ff_sws_op_chain_append(chain, func, res.free, &res.priv);
219  if (ret < 0)
220  goto error;
221  }
222 
223  /* Look up process/process_return functions. */
224  const SwsOp *read = ff_sws_op_list_input(&rest);
225  const SwsOp *write = ff_sws_op_list_output(&rest);
226  const int read_planes = read ? (read->rw.packed ? 1 : read->rw.elems) : 0;
227  const int write_planes = write->rw.packed ? 1 : write->rw.elems;
229  for (int i = 0; i < FFMAX(read_planes, write_planes); i++)
230  MASK_SET(mask, i, 1);
231 
232  SwsAArch64OpImplParams process_params = { .op = AARCH64_SWS_OP_PROCESS, .mask = mask };
233  SwsAArch64OpImplParams return_params = { .op = AARCH64_SWS_OP_PROCESS_RETURN, .mask = mask };
234  SwsFuncPtr process_func = ff_sws_aarch64_lookup(&process_params);
235  SwsFuncPtr return_func = ff_sws_aarch64_lookup(&return_params);
236  if (!process_func || !return_func) {
237  ret = AVERROR(ENOTSUP);
238  goto error;
239  }
240 
241  ret = ff_sws_op_chain_append(chain, return_func, NULL, &(SwsOpPriv) { 0 });
242  if (ret < 0)
243  goto error;
244 
245  out->func = (SwsOpFunc) process_func;
246  out->cpu_flags = chain->cpu_flags;
247 
248 error:
249  if (ret < 0)
250  ff_sws_op_chain_free(chain);
251  return ret;
252 }
253 
254 /*********************************************************************/
256  .name = "aarch64",
257  .compile = aarch64_compile,
258  .hw_format = AV_PIX_FMT_NONE,
259 };
SWS_OP_READ
@ SWS_OP_READ
Definition: ops.h:50
error
static void error(const char *err)
Definition: target_bsf_fuzzer.c:32
func
int(* func)(AVBPrint *dst, const char *in, const char *arg)
Definition: jacosubdec.c:66
SwsAArch64BackendContext
Definition: ops.c:32
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
linear_index_to_sws_op
static int linear_index_to_sws_op(int idx)
Definition: ops_impl.h:146
ff_sws_setup_clear
int ff_sws_setup_clear(const SwsImplParams *params, SwsImplResult *out)
Definition: ops_chain.c:306
out
static FILE * out
Definition: movenc.c:55
ff_sws_op_list_input
const SwsOp * ff_sws_op_list_input(const SwsOpList *ops)
Returns the input operation for a given op list, or NULL if there is none (e.g.
Definition: ops.c:634
SWS_OP_CLEAR
@ SWS_OP_CLEAR
Definition: ops.h:62
ff_sws_op_list_max_size
int ff_sws_op_list_max_size(const SwsOpList *ops)
Returns the size of the largest pixel type used in ops.
Definition: ops.c:711
matrix
Definition: vc1dsp.c:43
ff_sws_setup_scale
int ff_sws_setup_scale(const SwsImplParams *params, SwsImplResult *out)
Definition: ops_chain.c:274
mask
int mask
Definition: mediacodecdec_common.c:154
SwsOp::rw
SwsReadWriteOp rw
Definition: ops.h:223
SWS_OP_DITHER
@ SWS_OP_DITHER
Definition: ops.h:70
FFMAX
#define FFMAX(a, b)
Definition: macros.h:47
av_get_cpu_flags
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:109
cpu_flags
static atomic_int cpu_flags
Definition: cpu.c:56
SwsOpBackend::name
const char * name
Definition: ops_internal.h:56
SwsOpChain::cpu_flags
int cpu_flags
Definition: ops_chain.h:89
SwsFuncPtr
void(* SwsFuncPtr)(void)
Per-kernel execution context.
Definition: ops_chain.h:70
SwsOpList::num_ops
int num_ops
Definition: ops.h:265
SwsOpFunc
void(* SwsOpFunc)(const SwsOpExec *exec, const void *priv, int bx_start, int y_start, int bx_end, int y_end)
Process a given range of pixel blocks.
Definition: ops_dispatch.h:94
SWS_OP_SCALE
@ SWS_OP_SCALE
Definition: ops.h:66
SwsOpChain::free
void(* free[SWS_MAX_OPS+1])(SwsOpPriv *)
Definition: ops_chain.h:87
avassert.h
backend_aarch64
const SwsOpBackend backend_aarch64
Definition: ops.c:255
ff_sws_aarch64_lookup
SwsFuncPtr ff_sws_aarch64_lookup(const SwsAArch64OpImplParams *p)
float
float
Definition: af_crystalizer.c:122
SwsAArch64OpMask
uint16_t SwsAArch64OpMask
Definition: ops_impl.h:68
ff_sws_op_chain_alloc
SwsOpChain * ff_sws_op_chain_alloc(void)
Definition: ops_chain.c:29
op
static int op(uint8_t **dst, const uint8_t *dst_end, GetByteContext *gb, int pixel, int count, int *x, int width, int linesize)
Perform decode operation.
Definition: anm.c:76
SwsAArch64BackendContext::sws
SwsContext * sws
Definition: ops.c:33
av_assert0
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:42
SWS_OP_MIN
@ SWS_OP_MIN
Definition: ops.h:64
ctx
static AVFormatContext * ctx
Definition: movenc.c:49
SWS_OP_LINEAR
@ SWS_OP_LINEAR
Definition: ops.h:69
ff_sws_op_list_output
const SwsOp * ff_sws_op_list_output(const SwsOpList *ops)
Returns the output operation for a given op list, or NULL if there is none.
Definition: ops.c:643
AARCH64_SWS_OP_PROCESS
@ AARCH64_SWS_OP_PROCESS
Definition: ops_impl.h:40
SwsOpBackend
Definition: ops_internal.h:55
SwsOpPriv::ptr
void * ptr
Definition: ops_chain.h:49
SwsOpChain
Compiled "chain" of operations, which can be dispatched efficiently.
Definition: ops_chain.h:84
NULL
#define NULL
Definition: coverity.c:32
SwsAArch64BackendContext::block_size
int block_size
Definition: ops.c:34
aarch64_optimize
static int aarch64_optimize(SwsAArch64BackendContext *bctx, SwsOpList *ops)
Definition: ops.c:164
aarch64_compile
static int aarch64_compile(SwsContext *ctx, SwsOpList *ops, SwsCompiledOp *out)
Definition: ops.c:175
SwsImplParams
Definition: ops_chain.h:105
ff_sws_setup_clamp
int ff_sws_setup_clamp(const SwsImplParams *params, SwsImplResult *out)
Definition: ops_chain.c:289
i
#define i(width, name, range_min, range_max)
Definition: cbs_h264.c:63
AV_CPU_FLAG_NEON
#define AV_CPU_FLAG_NEON
Definition: cpu.h:73
size
int size
Definition: twinvq_data.h:10344
SwsAArch64OpImplParams::op
SwsAArch64OpType op
Definition: ops_impl.h:95
SWS_OP_WRITE
@ SWS_OP_WRITE
Definition: ops.h:51
tree.h
ops_lookup.h
aarch64_setup
static int aarch64_setup(SwsOpList *ops, int block_size, int n, const SwsAArch64OpImplParams *p, SwsImplResult *out)
Definition: ops.c:118
ff_sws_op_chain_free_cb
void ff_sws_op_chain_free_cb(void *ptr)
Definition: ops_chain.c:34
aarch64_setup_dither
static int aarch64_setup_dither(const SwsAArch64OpImplParams *p, const SwsOp *op, SwsImplResult *res)
Definition: ops.c:69
ops_impl_conv.c
av_malloc
#define av_malloc(s)
Definition: ops_asmgen.c:44
ff_sws_op_chain_free
static void ff_sws_op_chain_free(SwsOpChain *chain)
Definition: ops_chain.h:96
SwsOpList::ops
SwsOp * ops
Definition: ops.h:264
aarch64_setup_linear
static int aarch64_setup_linear(const SwsAArch64OpImplParams *p, const SwsOp *op, SwsImplResult *res)
Definition: ops.c:38
SwsImplResult::free
void(* free)(SwsOpPriv *priv)
Definition: ops_chain.h:114
SwsOp
Definition: ops.h:218
ff_op_priv_free
static void ff_op_priv_free(SwsOpPriv *priv)
Definition: ops_chain.h:149
ret
ret
Definition: filter_design.txt:187
MASK_SET
#define MASK_SET(mask, idx, val)
Definition: ops_impl.h:112
SWS_OP_MAX
@ SWS_OP_MAX
Definition: ops.h:65
SwsCompiledOp
Definition: ops_dispatch.h:100
convert_to_aarch64_impl
static int convert_to_aarch64_impl(SwsContext *ctx, const SwsOpList *ops, int n, int block_size, SwsAArch64OpImplParams *out)
Convert SwsOp to a SwsAArch64OpImplParams.
Definition: ops_impl_conv.c:59
SwsImplResult::priv
SwsOpPriv priv
Definition: ops_chain.h:113
SwsReadWriteOp::packed
bool packed
Definition: ops.h:110
AV_PIX_FMT_NONE
@ AV_PIX_FMT_NONE
Definition: pixfmt.h:72
Windows::Graphics::DirectX::Direct3D11::p
IDirect3DDxgiInterfaceAccess _COM_Outptr_ void ** p
Definition: vsrc_gfxcapture_winrt.hpp:53
LOOP_LINEAR_MASK
#define LOOP_LINEAR_MASK(p, idx, jdx)
Definition: ops_impl.h:132
SwsReadWriteOp::elems
uint8_t elems
Examples: rgba = 4x u8 packed yuv444p = 3x u8 rgb565 = 1x u16 <- use SWS_OP_UNPACK to unpack monow = ...
Definition: ops.h:108
SwsAArch64OpImplParams
SwsAArch64OpImplParams describes the parameters for an SwsAArch64OpType operation.
Definition: ops_impl.h:94
AARCH64_SWS_OP_PROCESS_RETURN
@ AARCH64_SWS_OP_PROCESS_RETURN
Definition: ops_impl.h:41
ff_sws_op_chain_append
int ff_sws_op_chain_append(SwsOpChain *chain, SwsFuncPtr func, void(*free)(SwsOpPriv *), const SwsOpPriv *priv)
Definition: ops_chain.c:48
linear_num_vregs
static int linear_num_vregs(const SwsAArch64OpImplParams *params)
Definition: ops_impl.h:138
stride
#define stride
Definition: h264pred_template.c:536
avstring.h
SwsOpList
Helper struct for representing a list of operations.
Definition: ops.h:263
SwsContext
Main external API structure.
Definition: swscale.h:206
SwsOpPriv
Private data for each kernel.
Definition: ops_chain.h:45
SwsImplResult
Definition: ops_chain.h:111
read
static uint32_t BS_FUNC() read(BSCTX *bc, unsigned int n)
Return n bits from the buffer, n has to be in the 0-32 range.
Definition: bitstream_template.h:239