FFmpeg
ops_tmpl_common.c
Go to the documentation of this file.
1 /**
2  * Copyright (C) 2025 Niklas Haas
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "ops_backend.h"
22 
23 #ifndef BIT_DEPTH
24 # error Should only be included from ops_tmpl_*.c!
25 #endif
26 
27 #define WRAP_CONVERT_UINT(N) \
28 DECL_PATTERN(convert_uint##N) \
29 { \
30  u##N##block_t xu, yu, zu, wu; \
31  \
32  SWS_LOOP \
33  for (int i = 0; i < SWS_BLOCK_SIZE; i++) { \
34  if (X) \
35  xu[i] = x[i]; \
36  if (Y) \
37  yu[i] = y[i]; \
38  if (Z) \
39  zu[i] = z[i]; \
40  if (W) \
41  wu[i] = w[i]; \
42  } \
43  \
44  CONTINUE(xu, yu, zu, wu); \
45 } \
46  \
47 WRAP_COMMON_PATTERNS(convert_uint##N, \
48  .op = SWS_OP_CONVERT, \
49  .convert.to = SWS_PIXEL_U##N, \
50 );
51 
52 #if BIT_DEPTH != 8
54 #endif
55 
56 #if BIT_DEPTH != 16
58 #endif
59 
60 #if BIT_DEPTH != 32 || defined(IS_FLOAT)
62 #endif
63 
65 {
66  SWS_LOOP
67  for (int i = 0; i < SWS_BLOCK_SIZE; i++) {
68  if (X)
69  x[i] = impl->priv.px[0];
70  if (Y)
71  y[i] = impl->priv.px[1];
72  if (Z)
73  z[i] = impl->priv.px[2];
74  if (W)
75  w[i] = impl->priv.px[3];
76  }
77 
78  CONTINUE(x, y, z, w);
79 }
80 
81 #define WRAP_CLEAR(X, Y, Z, W) \
82 DECL_IMPL(clear, clear##_##X##Y##Z##W, X, Y, Z, W) \
83  \
84 DECL_ENTRY(clear##_##X##Y##Z##W, SWS_COMP_ALL, \
85  .setup = ff_sws_setup_clear, \
86  .op = SWS_OP_CLEAR, \
87  .clear.mask = SWS_COMP_MASK(X, Y, Z, W), \
88 );
89 
90 WRAP_CLEAR(0, 0, 0, 1) /* rgba alpha */
91 WRAP_CLEAR(1, 0, 0, 0) /* argb alpha */
92 WRAP_CLEAR(0, 1, 0, 0) /* ya alpha */
93 
94 WRAP_CLEAR(1, 1, 0, 0) /* vuya chroma */
95 WRAP_CLEAR(0, 1, 1, 0) /* yuva chroma */
96 WRAP_CLEAR(0, 0, 1, 1) /* ayuv chroma */
97 WRAP_CLEAR(1, 0, 1, 0) /* uyva chroma */
98 WRAP_CLEAR(0, 1, 0, 1) /* xvyu chroma */
99 
100 WRAP_CLEAR(0, 1, 1, 1) /* gray -> yuva */
101 WRAP_CLEAR(1, 0, 1, 1) /* gray -> ayuv */
102 WRAP_CLEAR(1, 1, 0, 1) /* gray -> vuya */
103 
105 {
106  SWS_LOOP
107  for (int i = 0; i < SWS_BLOCK_SIZE; i++) {
108  if (X)
109  x[i] = FFMIN(x[i], impl->priv.px[0]);
110  if (Y)
111  y[i] = FFMIN(y[i], impl->priv.px[1]);
112  if (Z)
113  z[i] = FFMIN(z[i], impl->priv.px[2]);
114  if (W)
115  w[i] = FFMIN(w[i], impl->priv.px[3]);
116  }
117 
118  CONTINUE(x, y, z, w);
119 }
120 
122 {
123  SWS_LOOP
124  for (int i = 0; i < SWS_BLOCK_SIZE; i++) {
125  if (X)
126  x[i] = FFMAX(x[i], impl->priv.px[0]);
127  if (Y)
128  y[i] = FFMAX(y[i], impl->priv.px[1]);
129  if (Z)
130  z[i] = FFMAX(z[i], impl->priv.px[2]);
131  if (W)
132  w[i] = FFMAX(w[i], impl->priv.px[3]);
133  }
134 
135  CONTINUE(x, y, z, w);
136 }
137 
139  .op = SWS_OP_MIN,
140  .setup = ff_sws_setup_clamp,
141  .flexible = true,
142 );
143 
145  .op = SWS_OP_MAX,
146  .setup = ff_sws_setup_clamp,
147  .flexible = true,
148 );
149 
151 {
152  const pixel_t scale = impl->priv.px[0];
153 
154  SWS_LOOP
155  for (int i = 0; i < SWS_BLOCK_SIZE; i++) {
156  if (X)
157  x[i] *= scale;
158  if (Y)
159  y[i] *= scale;
160  if (Z)
161  z[i] *= scale;
162  if (W)
163  w[i] *= scale;
164  }
165 
166  CONTINUE(x, y, z, w);
167 }
168 
170  .op = SWS_OP_SCALE,
171  .setup = ff_sws_setup_scale,
172  .flexible = true,
173 );
174 
176 {
177  const SwsFilterWeights *filter = params->op->rw.kernel;
178  static_assert(sizeof(out->priv.ptr) <= sizeof(int32_t[2]),
179  ">8 byte pointers not supported");
180 
181  /* Pre-convert weights to float */
182  float *weights = av_calloc(filter->num_weights, sizeof(float));
183  if (!weights)
184  return AVERROR(ENOMEM);
185 
186  for (int i = 0; i < filter->num_weights; i++)
187  weights[i] = (float) filter->weights[i] / SWS_FILTER_SCALE;
188 
189  out->priv.ptr = weights;
190  out->priv.i32[2] = filter->filter_size;
191  out->free = ff_op_priv_free;
192  return 0;
193 }
194 
195 /* Fully general vertical planar filter case */
196 DECL_READ(filter_v, const int elems)
197 {
198  const SwsOpExec *exec = iter->exec;
199  const float *restrict weights = impl->priv.ptr;
200  const int filter_size = impl->priv.i32[2];
201  weights += filter_size * iter->y;
202 
203  f32block_t xs, ys, zs, ws;
204  memset(xs, 0, sizeof(xs));
205  if (elems > 1)
206  memset(ys, 0, sizeof(ys));
207  if (elems > 2)
208  memset(zs, 0, sizeof(zs));
209  if (elems > 3)
210  memset(ws, 0, sizeof(ws));
211 
212  for (int j = 0; j < filter_size; j++) {
213  const float weight = weights[j];
214 
215  SWS_LOOP
216  for (int i = 0; i < SWS_BLOCK_SIZE; i++) {
217  xs[i] += weight * in0[i];
218  if (elems > 1)
219  ys[i] += weight * in1[i];
220  if (elems > 2)
221  zs[i] += weight * in2[i];
222  if (elems > 3)
223  ws[i] += weight * in3[i];
224  }
225 
226  in0 = bump_ptr(in0, exec->in_stride[0]);
227  if (elems > 1)
228  in1 = bump_ptr(in1, exec->in_stride[1]);
229  if (elems > 2)
230  in2 = bump_ptr(in2, exec->in_stride[2]);
231  if (elems > 3)
232  in3 = bump_ptr(in3, exec->in_stride[3]);
233  }
234 
235  for (int i = 0; i < elems; i++)
236  iter->in[i] += sizeof(block_t);
237 
238  CONTINUE(xs, ys, zs, ws);
239 }
240 
242 {
243  SwsFilterWeights *filter = params->op->rw.kernel;
244  out->priv.ptr = av_refstruct_ref(filter->weights);
245  out->priv.i32[2] = filter->filter_size;
246  out->free = ff_op_priv_unref;
247  return 0;
248 }
249 
250 /* Fully general horizontal planar filter case */
251 DECL_READ(filter_h, const int elems)
252 {
253  const SwsOpExec *exec = iter->exec;
254  const int *restrict weights = impl->priv.ptr;
255  const int filter_size = impl->priv.i32[2];
256  const float scale = 1.0f / SWS_FILTER_SCALE;
257  const int xpos = iter->x;
258  weights += filter_size * iter->x;
259 
260  f32block_t xs, ys, zs, ws;
261  for (int i = 0; i < SWS_BLOCK_SIZE; i++) {
262  const int offset = exec->in_offset_x[xpos + i];
263  pixel_t *start0 = bump_ptr(in0, offset);
264  pixel_t *start1 = bump_ptr(in1, offset);
265  pixel_t *start2 = bump_ptr(in2, offset);
266  pixel_t *start3 = bump_ptr(in3, offset);
267 
268  inter_t sx = 0, sy = 0, sz = 0, sw = 0;
269  for (int j = 0; j < filter_size; j++) {
270  const int weight = weights[j];
271  sx += weight * start0[j];
272  if (elems > 1)
273  sy += weight * start1[j];
274  if (elems > 2)
275  sz += weight * start2[j];
276  if (elems > 3)
277  sw += weight * start3[j];
278  }
279 
280  xs[i] = (float) sx * scale;
281  if (elems > 1)
282  ys[i] = (float) sy * scale;
283  if (elems > 2)
284  zs[i] = (float) sz * scale;
285  if (elems > 3)
286  ws[i] = (float) sw * scale;
287 
288  weights += filter_size;
289  }
290 
291  CONTINUE(xs, ys, zs, ws);
292 }
293 
294 #define WRAP_FILTER(FUNC, DIR, ELEMS, SUFFIX) \
295 static av_flatten void fn(FUNC##ELEMS##SUFFIX)(SwsOpIter *restrict iter, \
296  const SwsOpImpl *restrict impl, \
297  void *restrict x, void *restrict y,\
298  void *restrict z, void *restrict w)\
299 { \
300  CALL_READ(FUNC##SUFFIX, ELEMS); \
301 } \
302  \
303 DECL_ENTRY(FUNC##ELEMS##SUFFIX, SWS_COMP_ELEMS(ELEMS), \
304  .op = SWS_OP_READ, \
305  .setup = fn(setup_filter##SUFFIX), \
306  .rw.elems = ELEMS, \
307  .rw.filter = SWS_OP_FILTER_##DIR, \
308 );
309 
310 WRAP_FILTER(filter, V, 1, _v)
311 WRAP_FILTER(filter, V, 2, _v)
312 WRAP_FILTER(filter, V, 3, _v)
313 WRAP_FILTER(filter, V, 4, _v)
314 
315 WRAP_FILTER(filter, H, 1, _h)
316 WRAP_FILTER(filter, H, 2, _h)
317 WRAP_FILTER(filter, H, 3, _h)
318 WRAP_FILTER(filter, H, 4, _h)
319 
320 static void fn(process)(const SwsOpExec *exec, const void *priv,
321  const int bx_start, const int y_start,
322  int bx_end, int y_end)
323 {
324  const SwsOpChain *chain = priv;
325  const SwsOpImpl *impl = chain->impl;
326  u32block_t x, y, z, w; /* allocate enough space for any intermediate */
327 
328  SwsOpIter iterdata;
329  SwsOpIter *iter = &iterdata; /* for CONTINUE() macro to work */
330  iter->exec = exec;
331  for (int i = 0; i < 4; i++) {
332  iter->in[i] = (uintptr_t) exec->in[i];
333  iter->out[i] = (uintptr_t) exec->out[i];
334  }
335 
336  for (iter->y = y_start; iter->y < y_end; iter->y++) {
337  for (int block = bx_start; block < bx_end; block++) {
338  iter->x = block * SWS_BLOCK_SIZE;
339  CONTINUE(x, y, z, w);
340  }
341 
342  const int y_bump = exec->in_bump_y ? exec->in_bump_y[iter->y] : 0;
343  for (int i = 0; i < 4; i++) {
344  iter->in[i] += exec->in_bump[i] + y_bump * exec->in_stride[i];
345  iter->out[i] += exec->out_bump[i];
346  }
347  }
348 }
WRAP_CLEAR
#define WRAP_CLEAR(X, Y, Z, W)
Definition: ops_tmpl_common.c:81
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
ops_backend.h
f32block_t
float f32block_t[SWS_BLOCK_SIZE]
Definition: ops_backend.c:48
out
static FILE * out
Definition: movenc.c:55
SwsOpIter::exec
const SwsOpExec * exec
Definition: ops_backend.h:52
block_t
#define block_t
Definition: ops_tmpl_float.c:34
ff_sws_setup_scale
int ff_sws_setup_scale(const SwsImplParams *params, SwsImplResult *out)
Definition: ops_chain.c:250
SwsFilterWeights
Represents a computed filter kernel.
Definition: filters.h:64
CONTINUE
#define CONTINUE(X, Y, Z, W)
Definition: ops_backend.h:115
filter
void(* filter)(uint8_t *src, int stride, int qscale)
Definition: h263dsp.c:29
SwsOpIter
Copyright (C) 2025 Niklas Haas.
Definition: ops_backend.h:46
max
#define max(a, b)
Definition: cuda_runtime.h:33
FFMAX
#define FFMAX(a, b)
Definition: macros.h:47
SwsOpExec::in_stride
ptrdiff_t in_stride[4]
Definition: ops_dispatch.h:41
ff_op_priv_unref
static void ff_op_priv_unref(SwsOpPriv *priv)
Definition: ops_chain.h:154
SwsOpIter::x
int x
Definition: ops_backend.h:49
WRAP_CONVERT_UINT
#define WRAP_CONVERT_UINT(N)
Copyright (C) 2025 Niklas Haas.
Definition: ops_tmpl_common.c:27
DECL_PATTERN
DECL_PATTERN(clear)
Definition: ops_tmpl_common.c:64
weight
const h264_weight_func weight
Definition: h264dsp_init.c:33
fn
Definition: ops_tmpl_float.c:123
SWS_OP_SCALE
@ SWS_OP_SCALE
Definition: ops.h:66
float
float
Definition: af_crystalizer.c:122
W
#define W(a, i, v)
Definition: jpegls.h:119
op
static int op(uint8_t **dst, const uint8_t *dst_end, GetByteContext *gb, int pixel, int count, int *x, int width, int linesize)
Perform decode operation.
Definition: anm.c:76
SWS_LOOP
#define SWS_LOOP
Definition: ops_backend.h:58
SwsOpImpl
Definition: ops_chain.h:71
SWS_OP_MIN
@ SWS_OP_MIN
Definition: ops.h:64
DECL_READ
DECL_READ(filter_v, const int elems)
Definition: ops_tmpl_common.c:196
u32block_t
uint32_t u32block_t[SWS_BLOCK_SIZE]
Definition: ops_backend.c:47
SwsOpChain::impl
SwsOpImpl impl[SWS_MAX_OPS+1]
Definition: ops_chain.h:86
SWS_BLOCK_SIZE
#define SWS_BLOCK_SIZE
Copyright (C) 2025 Niklas Haas.
Definition: ops_backend.c:42
SwsOpIter::out
uintptr_t out[4]
Definition: ops_backend.h:48
SwsOpExec
Copyright (C) 2026 Niklas Haas.
Definition: ops_dispatch.h:35
bump_ptr
#define bump_ptr(ptr, bump)
Definition: ops_backend.h:71
xs
#define xs(width, name, var, subs,...)
Definition: cbs_vp9.c:305
SwsOpChain
Compiled "chain" of operations, which can be dispatched efficiently.
Definition: ops_chain.h:84
V
#define V
Definition: avdct.c:32
SWS_FILTER_SCALE
@ SWS_FILTER_SCALE
14-bit coefficients are picked to fit comfortably within int16_t for efficient SIMD processing (e....
Definition: filters.h:40
ff_sws_setup_clamp
int ff_sws_setup_clamp(const SwsImplParams *params, SwsImplResult *out)
Definition: ops_chain.c:265
SwsOpIter::in
uintptr_t in[4]
Definition: ops_backend.h:47
i
#define i(width, name, range_min, range_max)
Definition: cbs_h264.c:63
WRAP_FILTER
#define WRAP_FILTER(FUNC, DIR, ELEMS, SUFFIX)
Definition: ops_tmpl_common.c:294
process
static void fn() process(const SwsOpExec *exec, const void *priv, const int bx_start, const int y_start, int bx_end, int y_end)
Definition: ops_tmpl_common.c:320
pixel_t
#define pixel_t
Definition: ops_tmpl_float.c:32
av_refstruct_ref
void * av_refstruct_ref(void *obj)
Create a new reference to an object managed via this API, i.e.
Definition: refstruct.c:140
H
#define H
Definition: pixlet.c:39
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
Y
#define Y
Definition: boxblur.h:37
DECL_SETUP
DECL_SETUP(setup_filter_v, params, out)
Definition: ops_tmpl_common.c:175
SwsOpExec::in_offset_x
int32_t * in_offset_x
Pixel offset map; for horizontal scaling, in bytes.
Definition: ops_dispatch.h:80
weights
static const int weights[]
Definition: hevc_pel.c:32
FFMIN
#define FFMIN(a, b)
Definition: macros.h:49
av_calloc
void * av_calloc(size_t nmemb, size_t size)
Definition: mem.c:264
ff_op_priv_free
static void ff_op_priv_free(SwsOpPriv *priv)
Definition: ops_chain.h:149
SWS_OP_MAX
@ SWS_OP_MAX
Definition: ops.h:65
inter_t
#define inter_t
Definition: ops_tmpl_float.c:33
w
uint8_t w
Definition: llvidencdsp.c:39
scale
static void scale(int *out, const int *in, const int w, const int h, const int shift)
Definition: intra.c:278
setup_filter_v
static int setup_filter_v(const SwsImplParams *params, SwsImplResult *out)
Definition: ops.c:318
X
@ X
Definition: vf_addroi.c:27
int32_t
int32_t
Definition: audioconvert.c:56
block
The exact code depends on how similar the blocks are and how related they are to the block
Definition: filter_design.txt:207
WRAP_COMMON_PATTERNS
WRAP_COMMON_PATTERNS(min,.op=SWS_OP_MIN,.setup=ff_sws_setup_clamp,.flexible=true,)
SwsOpIter::y
int y
Definition: ops_backend.h:49
min
float min
Definition: vorbis_enc_data.h:429
setup_filter_h
static int setup_filter_h(const SwsImplParams *params, SwsImplResult *out)
Definition: ops.c:348