FFmpeg
ops_chain.c
Go to the documentation of this file.
1 /**
2  * Copyright (C) 2025 Niklas Haas
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "libavutil/avassert.h"
22 #include "libavutil/mem.h"
23 #include "libavutil/rational.h"
24 
25 #include "ops_chain.h"
26 
27 #define Q(N) ((AVRational) { N, 1 })
28 
30 {
31  return av_mallocz(sizeof(SwsOpChain));
32 }
33 
34 void ff_sws_op_chain_free_cb(void *ptr)
35 {
36  if (!ptr)
37  return;
38 
39  SwsOpChain *chain = ptr;
40  for (int i = 0; i < chain->num_impl + 1; i++) {
41  if (chain->free[i])
42  chain->free[i](&chain->impl[i].priv);
43  }
44 
45  av_free(chain);
46 }
47 
49  void (*free)(SwsOpPriv *), const SwsOpPriv *priv)
50 {
51  const int idx = chain->num_impl;
52  if (idx == SWS_MAX_OPS)
53  return AVERROR(EINVAL);
54 
56  chain->impl[idx].cont = func;
57  chain->impl[idx + 1].priv = *priv;
58  chain->free[idx + 1] = free;
59  chain->num_impl++;
60  return 0;
61 }
62 
63 /**
64  * Match an operation against a reference operation. Returns a score for how
65  * well the reference matches the operation, or 0 if there is no match.
66  *
67  * For unfiltered SWS_OP_READ/SWS_OP_WRITE, SWS_OP_SWAP_BYTES and
68  * SWS_OP_SWIZZLE, the exact type is not checked, just the size.
69  *
70  * Components marked SWS_COMP_GARBAGE are ignored when matching. If `flexible`
71  * is true, the op body is ignored - only the operation, pixel type, and
72  * component masks are checked.
73  */
74 static int op_match(const SwsOp *op, const SwsOpEntry *entry)
75 {
76  int score = 10;
77  if (op->op != entry->op)
78  return 0;
79 
80  switch (op->op) {
81  case SWS_OP_READ:
82  case SWS_OP_WRITE:
83  if (op->rw.filter && op->type != entry->type)
84  return 0;
86  case SWS_OP_SWAP_BYTES:
87  case SWS_OP_SWIZZLE:
88  /* Only the size matters for these operations */
90  return 0;
91  break;
92  default:
93  if (op->type != entry->type)
94  return 0;
95  break;
96  }
97 
99  if (needed & ~entry->mask)
100  return 0; /* Entry doesn't compute all needed components */
101 
102  /* Otherwise, operating on fewer components is better */
103  score += av_popcount(SWS_COMP_INV(entry->mask));
104 
105  /* Flexible variants always match, but lower the score to prioritize more
106  * specific implementations if they exist */
107  if (entry->flexible)
108  return score - 5;
109 
110  switch (op->op) {
111  case SWS_OP_INVALID:
112  return 0;
113  case SWS_OP_READ:
114  case SWS_OP_WRITE:
115  if (op->rw.elems != entry->rw.elems ||
116  op->rw.frac != entry->rw.frac ||
117  op->rw.filter != entry->rw.filter ||
118  (op->rw.elems > 1 && op->rw.packed != entry->rw.packed))
119  return 0;
120  return score;
121  case SWS_OP_SWAP_BYTES:
122  return score;
123  case SWS_OP_PACK:
124  case SWS_OP_UNPACK:
125  for (int i = 0; i < 4 && op->pack.pattern[i]; i++) {
126  if (op->pack.pattern[i] != entry->pack.pattern[i])
127  return 0;
128  }
129  return score;
130  case SWS_OP_CLEAR:
131  /* Clear mask must match exactly */
132  if (op->clear.mask != entry->clear.mask)
133  return 0;
134  for (int i = 0; i < 4; i++) {
135  if (!SWS_COMP_TEST(op->clear.mask, i) || !SWS_OP_NEEDED(op, i))
136  continue;
137  else if (!entry->clear.value[i].den)
138  continue; /* Any clear value supported */
139  else if (av_cmp_q(op->clear.value[i], entry->clear.value[i]))
140  return 0;
141  }
142  return score;
143  case SWS_OP_LSHIFT:
144  case SWS_OP_RSHIFT:
145  av_assert1(entry->flexible);
146  break;
147  case SWS_OP_SWIZZLE:
148  for (int i = 0; i < 4; i++) {
149  if (SWS_OP_NEEDED(op, i) && op->swizzle.in[i] != entry->swizzle.in[i])
150  return 0;
151  }
152  return score;
153  case SWS_OP_CONVERT:
154  if (op->convert.to != entry->convert.to ||
155  op->convert.expand != entry->convert.expand)
156  return 0;
157  return score;
158  case SWS_OP_DITHER:
159  return op->dither.size_log2 == entry->dither_size ? score : 0;
160  case SWS_OP_MIN:
161  case SWS_OP_MAX:
162  return score;
163  case SWS_OP_LINEAR:
164  if (op->lin.mask != entry->linear_mask)
165  return 0;
166  return score;
167  case SWS_OP_SCALE:
168  return av_cmp_q(op->scale.factor, entry->scale) ? 0 : score;
169  case SWS_OP_FILTER_H:
170  case SWS_OP_FILTER_V:
171  return score;
172  case SWS_OP_TYPE_NB:
173  break;
174  }
175 
176  av_unreachable("Invalid operation type!");
177  return 0;
178 }
179 
181  int num_tables, const SwsOp *op,
182  const int block_size, SwsOpChain *chain)
183 {
184  const unsigned cpu_flags = av_get_cpu_flags();
185  const SwsOpEntry *best = NULL;
186  const SwsOpTable *best_table = NULL;
187  int ret, best_score = 0;
188 
189  SwsImplParams params = {
190  .ctx = ctx,
191  .op = op
192  };
193 
194  for (int n = 0; n < num_tables; n++) {
195  const SwsOpTable *table = tables[n];
196  if (table->block_size && table->block_size != block_size ||
197  table->cpu_flags & ~cpu_flags)
198  continue;
199 
200  params.table = table;
201  for (int i = 0; table->entries[i]; i++) {
202  const SwsOpEntry *entry = table->entries[i];
203  int score = op_match(op, entry);
204  if (score <= best_score)
205  continue;
206  if (entry->check && !entry->check(&params))
207  continue;
208  best_score = score;
209  best_table = table;
210  best = entry;
211  }
212  }
213 
214  if (!best)
215  return AVERROR(ENOTSUP);
216 
217  params.table = best_table;
218 
219  SwsImplResult res = {0};
220  if (best->setup) {
221  ret = best->setup(&params, &res);
222  if (ret < 0)
223  return ret;
224  }
225 
226  ret = ff_sws_op_chain_append(chain, res.func ? res.func : best->func,
227  res.free, &res.priv);
228  if (ret < 0) {
229  if (res.free)
230  res.free(&res.priv);
231  return ret;
232  }
233 
234  chain->cpu_flags |= best_table->cpu_flags;
235  chain->over_read = FFMAX(chain->over_read, res.over_read);
236  chain->over_write = FFMAX(chain->over_write, res.over_write);
237  return 0;
238 }
239 
240 #define q2pixel(type, q) ((q).den ? (type) (q).num / (q).den : 0)
241 
243 {
244  out->priv.u8[0] = params->op->shift.amount;
245  return 0;
246 }
247 
249 {
250  const SwsOp *op = params->op;
251  const AVRational factor = op->scale.factor;
252  switch (op->type) {
253  case SWS_PIXEL_U8: out->priv.u8[0] = q2pixel(uint8_t, factor); break;
254  case SWS_PIXEL_U16: out->priv.u16[0] = q2pixel(uint16_t, factor); break;
255  case SWS_PIXEL_U32: out->priv.u32[0] = q2pixel(uint32_t, factor); break;
256  case SWS_PIXEL_F32: out->priv.f32[0] = q2pixel(float, factor); break;
257  default: return AVERROR(EINVAL);
258  }
259 
260  return 0;
261 }
262 
264 {
265  const SwsOp *op = params->op;
266  for (int i = 0; i < 4; i++) {
267  const AVRational limit = op->clamp.limit[i];
268  switch (op->type) {
269  case SWS_PIXEL_U8: out->priv.u8[i] = q2pixel(uint8_t, limit); break;
270  case SWS_PIXEL_U16: out->priv.u16[i] = q2pixel(uint16_t, limit); break;
271  case SWS_PIXEL_U32: out->priv.u32[i] = q2pixel(uint32_t, limit); break;
272  case SWS_PIXEL_F32: out->priv.f32[i] = q2pixel(float, limit); break;
273  default: return AVERROR(EINVAL);
274  }
275  }
276 
277  return 0;
278 }
279 
281 {
282  const SwsOp *op = params->op;
283  for (int i = 0; i < 4; i++) {
284  const AVRational value = op->clear.value[i];
285  if (!value.den)
286  continue;
287  switch (op->type) {
288  case SWS_PIXEL_U8: out->priv.u8[i] = q2pixel(uint8_t, value); break;
289  case SWS_PIXEL_U16: out->priv.u16[i] = q2pixel(uint16_t, value); break;
290  case SWS_PIXEL_U32: out->priv.u32[i] = q2pixel(uint32_t, value); break;
291  case SWS_PIXEL_F32: out->priv.f32[i] = q2pixel(float, value); break;
292  default: return AVERROR(EINVAL);
293  }
294  }
295 
296  return 0;
297 }
SWS_OP_READ
@ SWS_OP_READ
Definition: ops.h:50
SwsOpTable
Copyright (C) 2025 Niklas Haas.
Definition: ops_chain.h:159
func
int(* func)(AVBPrint *dst, const char *in, const char *arg)
Definition: jacosubdec.c:66
factor
static const int factor[16]
Definition: vf_pp7.c:98
SWS_PIXEL_U16
@ SWS_PIXEL_U16
Definition: ops.h:36
entry
#define entry
Definition: aom_film_grain_template.c:66
SWS_OP_SWIZZLE
@ SWS_OP_SWIZZLE
Definition: ops.h:53
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
SwsImplResult::func
SwsFuncPtr func
Definition: ops_chain.h:112
SWS_OP_LSHIFT
@ SWS_OP_LSHIFT
Definition: ops.h:58
SWS_OP_UNPACK
@ SWS_OP_UNPACK
Definition: ops.h:56
ff_sws_setup_clear
int ff_sws_setup_clear(const SwsImplParams *params, SwsImplResult *out)
Definition: ops_chain.c:280
SWS_MAX_OPS
#define SWS_MAX_OPS
Definition: ops_chain.h:85
out
static FILE * out
Definition: movenc.c:55
SWS_OP_CLEAR
@ SWS_OP_CLEAR
Definition: ops.h:62
rational.h
ff_sws_setup_scale
int ff_sws_setup_scale(const SwsImplParams *params, SwsImplResult *out)
Definition: ops_chain.c:248
ff_sws_setup_shift
int ff_sws_setup_shift(const SwsImplParams *params, SwsImplResult *out)
Definition: ops_chain.c:242
ff_sws_op_compile_tables
int ff_sws_op_compile_tables(SwsContext *ctx, const SwsOpTable *const tables[], int num_tables, const SwsOp *op, const int block_size, SwsOpChain *chain)
"Compile" a single op by looking it up in a list of fixed size op tables.
Definition: ops_chain.c:180
SwsOpImpl::cont
SwsFuncPtr cont
Definition: ops_chain.h:72
SWS_OP_DITHER
@ SWS_OP_DITHER
Definition: ops.h:70
table
static const uint16_t table[]
Definition: prosumer.c:203
SwsCompMask
uint8_t SwsCompMask
Bit-mask of components.
Definition: ops.h:84
SWS_PIXEL_U32
@ SWS_PIXEL_U32
Definition: ops.h:37
SWS_OP_TYPE_NB
@ SWS_OP_TYPE_NB
Definition: ops.h:76
FFMAX
#define FFMAX(a, b)
Definition: macros.h:47
SwsOpEntry::setup
int(* setup)(const SwsImplParams *params, SwsImplResult *out)
Definition: ops_chain.h:139
av_get_cpu_flags
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:109
av_popcount
#define av_popcount
Definition: common.h:154
cpu_flags
static atomic_int cpu_flags
Definition: cpu.c:56
ff_sws_pixel_type_size
int ff_sws_pixel_type_size(SwsPixelType type)
Definition: ops.c:77
SwsOpChain::cpu_flags
int cpu_flags
Definition: ops_chain.h:89
ff_sws_comp_mask_needed
SwsCompMask ff_sws_comp_mask_needed(const SwsOp *op)
Definition: ops.c:159
SWS_PIXEL_F32
@ SWS_PIXEL_F32
Definition: ops.h:38
SwsFuncPtr
void(* SwsFuncPtr)(void)
Per-kernel execution context.
Definition: ops_chain.h:70
tables
Writing a table generator This documentation is preliminary Parts of the API are not good and should be changed Basic concepts A table generator consists of two *_tablegen c and *_tablegen h The h file will provide the variable declarations and initialization code for the tables
Definition: tablegen.txt:10
SWS_PIXEL_U8
@ SWS_PIXEL_U8
Definition: ops.h:35
SWS_COMP_TEST
#define SWS_COMP_TEST(mask, X)
Definition: ops.h:89
SwsOpChain::over_read
int over_read
Definition: ops_chain.h:90
SWS_OP_SCALE
@ SWS_OP_SCALE
Definition: ops.h:66
ops_chain.h
SwsOpChain::free
void(* free[SWS_MAX_OPS+1])(SwsOpPriv *)
Definition: ops_chain.h:87
avassert.h
SWS_OP_NEEDED
#define SWS_OP_NEEDED(op, idx)
Definition: ops.h:265
ff_sws_op_chain_alloc
SwsOpChain * ff_sws_op_chain_alloc(void)
Definition: ops_chain.c:29
op
static int op(uint8_t **dst, const uint8_t *dst_end, GetByteContext *gb, int pixel, int count, int *x, int width, int linesize)
Perform decode operation.
Definition: anm.c:76
SWS_OP_MIN
@ SWS_OP_MIN
Definition: ops.h:64
ctx
static AVFormatContext * ctx
Definition: movenc.c:49
SWS_OP_LINEAR
@ SWS_OP_LINEAR
Definition: ops.h:69
SWS_OP_FILTER_H
@ SWS_OP_FILTER_H
Definition: ops.h:73
SwsOpChain::impl
SwsOpImpl impl[SWS_MAX_OPS+1]
Definition: ops_chain.h:86
av_mallocz
#define av_mallocz(s)
Definition: tableprint_vlc.h:31
av_fallthrough
#define av_fallthrough
Definition: attributes.h:67
SWS_OP_PACK
@ SWS_OP_PACK
Definition: ops.h:57
SwsOpChain
Compiled "chain" of operations, which can be dispatched efficiently.
Definition: ops_chain.h:84
NULL
#define NULL
Definition: coverity.c:32
AVRational
Rational number (pair of numerator and denominator).
Definition: rational.h:58
av_unreachable
#define av_unreachable(msg)
Asserts that are used as compiler optimization hints depending upon ASSERT_LEVEL and NBDEBUG.
Definition: avassert.h:116
SwsImplParams::op
const SwsOp * op
Definition: ops_chain.h:107
SWS_OP_FILTER_V
@ SWS_OP_FILTER_V
Definition: ops.h:74
SwsImplResult::over_read
int over_read
Definition: ops_chain.h:115
SwsImplResult::over_write
int over_write
Definition: ops_chain.h:116
SwsImplParams
Definition: ops_chain.h:105
ff_sws_setup_clamp
int ff_sws_setup_clamp(const SwsImplParams *params, SwsImplResult *out)
Definition: ops_chain.c:263
SwsOpEntry::func
SwsFuncPtr func
Definition: ops_chain.h:138
i
#define i(width, name, range_min, range_max)
Definition: cbs_h264.c:63
SWS_OP_RSHIFT
@ SWS_OP_RSHIFT
Definition: ops.h:59
SWS_OP_INVALID
@ SWS_OP_INVALID
Definition: ops.h:47
SwsShiftOp::amount
uint8_t amount
Definition: ops.h:165
SWS_OP_WRITE
@ SWS_OP_WRITE
Definition: ops.h:51
SwsOpChain::num_impl
int num_impl
Definition: ops_chain.h:88
SwsOpEntry
Definition: ops_chain.h:119
ff_sws_op_chain_free_cb
void ff_sws_op_chain_free_cb(void *ptr)
Definition: ops_chain.c:34
SwsImplParams::ctx
SwsContext * ctx
Definition: ops_chain.h:108
SwsOpTable::cpu_flags
unsigned cpu_flags
Definition: ops_chain.h:160
av_assert1
#define av_assert1(cond)
assert() equivalent, that does not lie in speed critical code.
Definition: avassert.h:58
needed
The exact code depends on how similar the blocks are and how related they are to the and needs to apply these operations to the correct inlink or outlink if there are several Macros are available to factor that when no extra processing is needed
Definition: filter_design.txt:212
value
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf default value
Definition: writing_filters.txt:86
SwsImplResult::free
void(* free)(SwsOpPriv *priv)
Definition: ops_chain.h:114
SwsOp
Definition: ops.h:238
limit
static double limit(double x)
Definition: vf_pseudocolor.c:142
av_cmp_q
static int av_cmp_q(AVRational a, AVRational b)
Compare two rationals.
Definition: rational.h:89
ret
ret
Definition: filter_design.txt:187
op_match
static int op_match(const SwsOp *op, const SwsOpEntry *entry)
Match an operation against a reference operation.
Definition: ops_chain.c:74
SWS_OP_MAX
@ SWS_OP_MAX
Definition: ops.h:65
SwsOpImpl::priv
SwsOpPriv priv
Definition: ops_chain.h:73
SwsImplResult::priv
SwsOpPriv priv
Definition: ops_chain.h:113
SWS_OP_SWAP_BYTES
@ SWS_OP_SWAP_BYTES
Definition: ops.h:52
SWS_COMP_INV
#define SWS_COMP_INV(mask)
Definition: ops.h:90
SwsOp::shift
SwsShiftOp shift
Definition: ops.h:246
mem.h
av_free
#define av_free(p)
Definition: tableprint_vlc.h:34
SWS_OP_CONVERT
@ SWS_OP_CONVERT
Definition: ops.h:63
ff_sws_op_chain_append
int ff_sws_op_chain_append(SwsOpChain *chain, SwsFuncPtr func, void(*free)(SwsOpPriv *), const SwsOpPriv *priv)
Definition: ops_chain.c:48
q2pixel
#define q2pixel(type, q)
Definition: ops_chain.c:240
SwsContext
Main external API structure.
Definition: swscale.h:206
SwsOpPriv
Private data for each kernel.
Definition: ops_chain.h:45
SwsImplResult
Definition: ops_chain.h:111
SwsImplParams::table
const SwsOpTable * table
Definition: ops_chain.h:106
SwsOpChain::over_write
int over_write
Definition: ops_chain.h:91