FFmpeg
uops.c
Go to the documentation of this file.
1 /**
2  * Copyright (C) 2026 Niklas Haas
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include <stdbool.h>
22 
23 #include "libavutil/avassert.h"
24 #include "libavutil/mem.h"
25 #include "libavutil/refstruct.h"
26 #include "libavutil/tree.h"
27 
28 #include "ops.h"
29 #include "ops_internal.h"
30 #include "uops.h"
31 
32 int ff_sws_uop_cmp(const SwsUOp *a, const SwsUOp *b)
33 {
34  if (a->type != b->type)
35  return (int) a->type - b->type;
36  if (a->uop != b->uop)
37  return (int) a->uop - b->uop;
38  if (a->mask != b->mask)
39  return (int) a->mask - b->mask;
40  return memcmp(&a->par, &b->par, sizeof(a->par));
41 }
42 
43 static const struct {
44  char full[32];
45  char abbr[32];
46  char macro[32];
48 #define UOP_NAME(OP, ABBR) [SWS_UOP_##OP] = { "SWS_UOP_" #OP, ABBR, #OP }
49  UOP_NAME(INVALID, "invalid"),
50  UOP_NAME(READ_PLANAR, "read_planar"),
51  UOP_NAME(READ_PLANAR_FH, "read_planar_fh"),
52  UOP_NAME(READ_PLANAR_FV, "read_planar_fv"),
53  UOP_NAME(READ_PLANAR_FV_FMA,"read_planar_fv_fma"),
54  UOP_NAME(READ_PACKED, "read_packed"),
55  UOP_NAME(READ_NIBBLE, "read_nibble"),
56  UOP_NAME(READ_BIT, "read_bit"),
57  UOP_NAME(WRITE_PLANAR, "write_planar"),
58  UOP_NAME(WRITE_PACKED, "write_packed"),
59  UOP_NAME(WRITE_NIBBLE, "write_nibble"),
60  UOP_NAME(WRITE_BIT, "write_bit"),
61  UOP_NAME(PERMUTE, "permute"),
62  UOP_NAME(COPY, "copy"),
63  UOP_NAME(MOVE, "move"),
64  UOP_NAME(SWAP_BYTES, "swap_bytes"),
65  UOP_NAME(EXPAND_BIT, "expand_bit"),
66  UOP_NAME(EXPAND_PAIR, "expand_pair"),
67  UOP_NAME(EXPAND_QUAD, "expand_quad"),
68  UOP_NAME(TO_U8, "to_u8"),
69  UOP_NAME(TO_U16, "to_u16"),
70  UOP_NAME(TO_U32, "to_u32"),
71  UOP_NAME(TO_F32, "to_f32"),
72  UOP_NAME(SCALE, "scale"),
73  UOP_NAME(LINEAR, "linear"),
74  UOP_NAME(LINEAR_FMA, "linear_fma"),
75  UOP_NAME(ADD, "add"),
76  UOP_NAME(MIN, "min"),
77  UOP_NAME(MAX, "max"),
78  UOP_NAME(UNPACK, "unpack"),
79  UOP_NAME(PACK, "pack"),
80  UOP_NAME(LSHIFT, "lshift"),
81  UOP_NAME(RSHIFT, "rshift"),
82  UOP_NAME(CLEAR, "clear"),
83  UOP_NAME(DITHER, "dither"),
84 #undef UOP_NAME
85 };
86 
87 static const struct {
88  char full[16];
89  char prefix[8];
91  [SWS_PIXEL_NONE] = { "SWS_PIXEL_NONE", "" },
92  [SWS_PIXEL_U8] = { "SWS_PIXEL_U8", "U8_" },
93  [SWS_PIXEL_U16] = { "SWS_PIXEL_U16", "U16_" },
94  [SWS_PIXEL_U32] = { "SWS_PIXEL_U32", "U32_" },
95  [SWS_PIXEL_F32] = { "SWS_PIXEL_F32", "F32_" },
96 };
97 
99 {
100  av_assert1(val.den != 0);
101  switch (type) {
102  case SWS_PIXEL_U8: return (SwsPixel) { .u8 = val.num / val.den };
103  case SWS_PIXEL_U16: return (SwsPixel) { .u16 = val.num / val.den };
104  case SWS_PIXEL_U32: return (SwsPixel) { .u32 = val.num / val.den };
105  case SWS_PIXEL_F32: return (SwsPixel) { .f32 = (float) val.num / val.den };
106  case SWS_PIXEL_NONE:
107  case SWS_PIXEL_TYPE_NB: break;
108  }
109 
110  av_unreachable("Invalid pixel type!");
111  return (SwsPixel) {0};
112 }
113 
114 #define Q2PIXEL(val) pixel_from_q(op->type, val)
115 
117 {
118  switch (ff_sws_pixel_type_size(type)) {
119  case 1: return val.u8 == UINT8_MAX;
120  case 2: return val.u16 == UINT16_MAX;
121  case 4: return val.u32 == UINT32_MAX;
122  default: break;
123  }
124 
125  av_unreachable("Invalid pixel type!");
126  return false;
127 }
128 
129 void ff_sws_uop_name(const SwsUOp *op, char buf[SWS_UOP_NAME_MAX])
130 {
131  AVBPrint bp;
133 
134  if (op->type != SWS_PIXEL_NONE)
135  av_bprintf(&bp, "%s_", ff_sws_pixel_type_name(op->type));
136  av_bprintf(&bp, "%s", uop_names[op->uop].abbr);
137 
138  if (op->mask) {
139  av_bprint_chars(&bp, '_', 1);
140  for (int i = 0; i < 4; i++) {
141  if (SWS_COMP_TEST(op->mask, i))
142  av_bprint_chars(&bp, "xyzw"[i], 1);
143  }
144  }
145 
146  const SwsUOpParams *par = &op->par;
147  switch (op->uop) {
151  av_bprintf(&bp, "_%s", ff_sws_pixel_type_name(par->filter.type));
152  break;
153  case SWS_UOP_LSHIFT:
154  case SWS_UOP_RSHIFT:
155  av_bprintf(&bp, "_%u", par->shift.amount);
156  break;
157  case SWS_UOP_PERMUTE:
158  case SWS_UOP_COPY:
159  av_bprint_chars(&bp, '_', 1);
160  for (int i = 0; i < 4; i++) {
161  if (SWS_COMP_TEST(op->mask, i))
162  av_bprint_chars(&bp, "xyzw"[par->swizzle.in[i]], 1);
163  }
164  break;
165  case SWS_UOP_MOVE:
166  av_bprint_chars(&bp, '_', 1);
167  for (int i = 0; i < par->move.num_moves; i++)
168  av_bprint_chars(&bp, "txyzw"[par->move.dst[i] + 1], 1);
169  av_bprint_chars(&bp, '_', 1);
170  for (int i = 0; i < par->move.num_moves; i++)
171  av_bprint_chars(&bp, "txyzw"[par->move.src[i] + 1], 1);
172  break;
173  case SWS_UOP_PACK:
174  case SWS_UOP_UNPACK:
175  av_bprint_chars(&bp, '_', 1);
176  for (int i = 0; i < 4 && par->pack.pattern[i]; i++)
177  av_bprintf(&bp, "%x", par->pack.pattern[i]);
178  break;
179  case SWS_UOP_CLEAR:
180  av_bprint_chars(&bp, '_', 1);
181  for (int i = 0; i < 4; i++) {
182  if (!SWS_COMP_TEST(op->mask, i))
183  continue;
184  else if (SWS_COMP_TEST(par->clear.one, i))
185  av_bprint_chars(&bp, '1', 1);
186  else if (SWS_COMP_TEST(par->clear.zero, i))
187  av_bprint_chars(&bp, '0', 1);
188  else
189  av_bprint_chars(&bp, 'x', 1);
190  }
191  break;
192  case SWS_UOP_LINEAR:
193  case SWS_UOP_LINEAR_FMA:
194  for (int i = 0; i < 4; i++) {
195  if (!SWS_COMP_TEST(op->mask, i))
196  continue;
197  av_bprint_chars(&bp, '_', 1);
198  for (int j = 0; j < 5; j++) {
199  if (par->lin.one & SWS_MASK(i, j))
200  av_bprint_chars(&bp, '1', 1);
201  else if (par->lin.zero & SWS_MASK(i, j))
202  av_bprint_chars(&bp, '0', 1);
203  else if (par->lin.exact & SWS_MASK(i, j))
204  av_bprint_chars(&bp, 'X', 1);
205  else
206  av_bprint_chars(&bp, 'x', 1);
207  }
208  }
209  break;
210  case SWS_UOP_DITHER:
211  for (int i = 0; i < 4; i++) {
212  if (SWS_COMP_TEST(op->mask, i))
213  av_bprintf(&bp, "_%d", par->dither.y_offset[i]);
214  }
215  const unsigned size = 1u << par->dither.size_log2;
216  av_bprintf(&bp, "_%ux%u", size, size);
217  break;
218  }
219 
221 }
222 
223 static int generate_entry_struct(void *opaque, void *key)
224 {
225  const SwsUOp *ref = opaque;
226  const SwsUOp *uop = key;
227  AVBPrint *bp = ref->data.opaque;
228  char name[SWS_UOP_NAME_MAX];
229  ff_sws_uop_name(uop, name);
230  av_bprintf(bp, " \\\n MACRO(__VA_ARGS__, %-40s", name);
231  av_bprintf(bp, ", .type = %-13s, .uop = %-24s, .mask = 0x%x",
232  pixel_types[uop->type].full, uop_names[uop->uop].full, uop->mask);
233 
234  const SwsUOpParams *par = &uop->par;
235  switch (uop->uop) {
239  av_bprintf(bp, ", .par.filter.type = %s", pixel_types[par->filter.type].full);
240  break;
241  case SWS_UOP_LSHIFT:
242  case SWS_UOP_RSHIFT:
243  av_bprintf(bp, ", .par.shift.amount = %u", par->shift.amount);
244  break;
245  case SWS_UOP_PERMUTE:
246  case SWS_UOP_COPY:
247  av_bprintf(bp, ", .par.swizzle.in = {%d, %d, %d, %d}",
248  par->swizzle.in[0], par->swizzle.in[1],
249  par->swizzle.in[2], par->swizzle.in[3]);
250  break;
251  case SWS_UOP_MOVE:
252  av_bprintf(bp, ", .par.move.num_moves = %d", par->move.num_moves);
253  av_bprintf(bp, ", .par.move.dst = {%d, %d, %d, %d, %d, %d}",
254  par->move.dst[0], par->move.dst[1], par->move.dst[2],
255  par->move.dst[3], par->move.dst[4], par->move.dst[5]);
256  av_bprintf(bp, ", .par.move.src = {%d, %d, %d, %d, %d, %d}",
257  par->move.src[0], par->move.src[1], par->move.src[2],
258  par->move.src[3], par->move.src[4], par->move.src[5]);
259  break;
260  case SWS_UOP_PACK:
261  case SWS_UOP_UNPACK:
262  av_bprintf(bp, ", .par.pack.pattern = {%d, %d, %d, %d}",
263  par->pack.pattern[0], par->pack.pattern[1],
264  par->pack.pattern[2], par->pack.pattern[3]);
265  break;
266  case SWS_UOP_CLEAR:
267  av_bprintf(bp, ", .par.clear.one = 0x%x, .par.clear.zero = 0x%x",
268  par->clear.one, par->clear.zero);
269  break;
270  case SWS_UOP_LINEAR:
271  case SWS_UOP_LINEAR_FMA:
272  av_bprintf(bp, ", .par.lin.one = 0x%x, .par.lin.zero = 0x%x",
273  par->lin.one, par->lin.zero);
274  if (uop->uop == SWS_UOP_LINEAR_FMA)
275  av_bprintf(bp, ", .par.lin.exact = 0x%x", par->lin.exact);
276  break;
277  case SWS_UOP_DITHER:
278  av_bprintf(bp, ", .par.dither = { .y_offset = {%u, %u, %u, %u}, .size_log2 = %u }",
279  par->dither.y_offset[0], par->dither.y_offset[1],
280  par->dither.y_offset[2], par->dither.y_offset[3],
281  par->dither.size_log2);
282  break;
283  }
284 
285  av_bprintf(bp, ")");
286  return 0;
287 }
288 
289 static int generate_entry_args(void *opaque, void *key)
290 {
291  const SwsUOp *ref = opaque;
292  const SwsUOp *uop = key;
293  AVBPrint *bp = ref->data.opaque;
294  char name[SWS_UOP_NAME_MAX];
295  ff_sws_uop_name(uop, name);
296  av_bprintf(bp, " \\\n MACRO(__VA_ARGS__, %-40s, %-13s, %-24s, 0x%x",
297  name, pixel_types[uop->type].full, uop_names[uop->uop].full, uop->mask);
298 
299  const SwsUOpParams *par = &uop->par;
300  switch (uop->uop) {
304  av_bprintf(bp, ", %s", pixel_types[par->filter.type].full);
305  break;
306  case SWS_UOP_LSHIFT:
307  case SWS_UOP_RSHIFT:
308  av_bprintf(bp, ", %u", par->shift.amount);
309  break;
310  case SWS_UOP_PERMUTE:
311  case SWS_UOP_COPY:
312  av_bprintf(bp, ", %d, %d, %d, %d",
313  par->swizzle.in[0], par->swizzle.in[1],
314  par->swizzle.in[2], par->swizzle.in[3]);
315  break;
316  case SWS_UOP_MOVE:
317  av_bprintf(bp, ", %d", par->move.num_moves);
318  av_bprintf(bp, ", %d, %d, %d, %d, %d, %d",
319  par->move.dst[0], par->move.dst[1], par->move.dst[2],
320  par->move.dst[3], par->move.dst[4], par->move.dst[5]);
321  av_bprintf(bp, ", %d, %d, %d, %d, %d, %d",
322  par->move.src[0], par->move.src[1], par->move.src[2],
323  par->move.src[3], par->move.src[4], par->move.src[5]);
324  break;
325  case SWS_UOP_PACK:
326  case SWS_UOP_UNPACK:
327  av_bprintf(bp, ", %d, %d, %d, %d",
328  par->pack.pattern[0], par->pack.pattern[1],
329  par->pack.pattern[2], par->pack.pattern[3]);
330  break;
331  case SWS_UOP_CLEAR:
332  av_bprintf(bp, ", 0x%05x, 0x%05x", par->clear.one, par->clear.zero);
333  break;
334  case SWS_UOP_LINEAR:
335  case SWS_UOP_LINEAR_FMA:
336  av_bprintf(bp, ", 0x%05x, 0x%05x", par->lin.one, par->lin.zero);
337  if (uop->uop == SWS_UOP_LINEAR_FMA)
338  av_bprintf(bp, ", 0x%05x", par->lin.exact);
339  break;
340  case SWS_UOP_DITHER:
341  av_bprintf(bp, ", %u, %u, %u, %u, %u",
342  par->dither.y_offset[0], par->dither.y_offset[1],
343  par->dither.y_offset[2], par->dither.y_offset[3],
344  par->dither.size_log2);
345  break;
346  }
347 
348  av_bprintf(bp, ")");
349  return 0;
350 }
351 
352 static void uop_uninit(SwsUOp *uop)
353 {
354  switch (uop->uop) {
355  case SWS_UOP_DITHER:
356  av_refstruct_unref(&uop->data.ptr);
357  break;
362  break;
363  }
364 
365  *uop = (SwsUOp) {0};
366 }
367 
369 {
370  SwsUOpList *ops = *p_ops;
371  if (!ops)
372  return;
373 
374  for (int i = 0; i < ops->num_ops; i++)
375  uop_uninit(&ops->ops[i]);
376 
377  av_freep(&ops->ops);
378  av_free(ops);
379  *p_ops = NULL;
380 }
381 
383 {
384  return av_mallocz(sizeof(SwsUOpList));
385 }
386 
388 {
389  if (!av_dynarray2_add((void **) &uops->ops, &uops->num_ops,
390  sizeof(*uop), (uint8_t *) uop))
391  {
392  uop_uninit(uop);
393  return AVERROR(ENOMEM);
394  }
395 
396  *uop = (SwsUOp) {0};
397  return 0;
398 }
399 
401 {
402  int max_offset = 0;
403  for (int i = 0; i < 4; i++)
404  max_offset = FFMAX(max_offset, dither->y_offset[i]);
405  return (1 << dither->size_log2) + max_offset;
406 }
407 
409 {
410  switch (ff_sws_pixel_type_size(type)) {
411  case 1: return SWS_PIXEL_U8;
412  case 2: return SWS_PIXEL_U16;
413  case 4: return SWS_PIXEL_U32;
414  default: break;
415  }
416 
417  av_unreachable("Invalid pixel type!");
418  return SWS_PIXEL_NONE;
419 }
420 
421 static bool exact_product_f32(float a, float b)
422 {
423  volatile float prod = a * b;
424  volatile float result = b ? prod / b : 0.0f;
425  return !b || result == a;
426 }
427 
429  const SwsComps *comps, int idx)
430 {
431  const AVRational minq = comps->min[idx];
432  const AVRational maxq = comps->max[idx];
434  return true;
435  else if (!minq.den || !maxq.den)
436  return false; /* unknown bounds */
437 
438  const SwsPixel min = pixel_from_q(type, minq);
439  const SwsPixel max = pixel_from_q(type, maxq);
440  switch (type) {
441  case SWS_PIXEL_F32:
442  return exact_product_f32(coef.f32, min.f32) &&
443  exact_product_f32(coef.f32, max.f32);
444  }
445 
446  av_unreachable("Invalid pixel type!");
447  return false;
448 }
449 
451 {
452  if (!(flags & SWS_UOP_FLAG_FMA))
453  return false;
454  if (!(ctx->flags & SWS_BITEXACT))
455  return true;
456  if (!ff_sws_pixel_type_is_int(op->type))
457  return false;
458 
459  const int bits = ff_sws_pixel_type_size(op->type) * 8;
460  const uint64_t max_val = UINT64_MAX >> (64 - bits);
461 
462  /* Maximum value representable losslessly as float. Note that this is
463  * currently true only for U8, but that may change if we ever update the
464  * value of SWS_FILTER_SCALE. */
465  return max_val * SWS_FILTER_SCALE <= (1 << 22);
466 }
467 
469  const SwsOp *op)
470 {
471  SwsUOp uop = {
472  .type = op->type,
473  .mask = SWS_COMP_MASK(op->rw.elems > 0, op->rw.elems > 1,
474  op->rw.elems > 2, op->rw.elems > 3),
475  };
476 
477  /* Non-filtered reads don't care about the exact pixel contents */
478  if (!op->rw.filter.op)
479  uop.type = pixel_type_to_int(op->type);
480 
481  const bool is_read = op->op == SWS_OP_READ;
482  if (op->rw.filter.op) {
483  if (op->op == SWS_OP_WRITE || op->rw.frac || op->rw.mode != SWS_RW_PLANAR)
484  return AVERROR(ENOTSUP);
485  uop.par.filter.type = op->rw.filter.type;
486  uop.data.kernel = av_refstruct_ref(op->rw.filter.kernel);
487  if (op->rw.filter.op == SWS_OP_FILTER_H) {
489  } else if (check_filter_fma(ctx, flags, op)) {
491  } else {
493  }
494  } else if (op->rw.mode == SWS_RW_PACKED && op->rw.elems > 1) {
495  if (op->rw.frac)
496  return AVERROR(ENOTSUP);
497  uop.uop = is_read ? SWS_UOP_READ_PACKED : SWS_UOP_WRITE_PACKED;
498  } else if (op->rw.frac == 3) {
499  uop.uop = is_read ? SWS_UOP_READ_BIT : SWS_UOP_WRITE_BIT;
500  } else if (op->rw.frac == 1) {
501  uop.uop = is_read ? SWS_UOP_READ_NIBBLE : SWS_UOP_WRITE_NIBBLE;
502  } else {
503  av_assert0(!op->rw.frac);
504  uop.uop = is_read ? SWS_UOP_READ_PLANAR : SWS_UOP_WRITE_PLANAR;
505  }
506 
507  return ff_sws_uop_list_append(ops, &uop);
508 }
509 
510 static int count_idx(const int *arr, size_t size, int val)
511 {
512  int num = 0;
513  for (size_t i = 0; i < size; i++) {
514  if (arr[i] == val)
515  num++;
516  }
517 
518  return num;
519 }
520 
521 static int translate_move(SwsUOpList *ops, const SwsOp *op)
522 {
523  SwsUOp uop = {
524  .uop = SWS_UOP_MOVE,
525  .type = pixel_type_to_int(op->type),
526  };
527  SwsMoveUOp *par = &uop.par.move;
528 
529  /* Mask of components that are not yet satisfied */
531  for (int i = 0; i < 4; i++) {
532  if (op->swizzle.in[i] == i)
533  todo &= ~SWS_COMP(i);
534  }
535 
536  /* Mask of components whose value is required for the final output */
537  SwsCompMask needed = 0;
538  for (int i = 0; i < 4; i++) {
539  if (SWS_OP_NEEDED(op, i))
540  needed |= SWS_COMP(op->swizzle.in[i]);
541  }
542 
543  /* Current mapping of registers to components */
544  int idx[4 + 1] = { 0, 1, 2, 3, -1 }; /* +1 for tmp */
545 
546  /* Decompose the swizzle mask into a series of register-register moves */
547  while (todo) {
548  int dst = -1, src = -1;
549 
550  /* Find next unsatisfied dst <- src move that doesn't clobber a value */
551  for (dst = 0; dst < 4; dst++) {
552  if (!SWS_COMP_TEST(todo, dst))
553  continue; /* already satisfied */
554  const int cur = idx[dst];
555  if (count_idx(idx, FF_ARRAY_ELEMS(idx), cur) == 1 && SWS_COMP_TEST(needed, cur))
556  continue; /* clobbers last remaining, still-needed value */
557  for (src = 0; src < FF_ARRAY_ELEMS(idx); src++) {
558  if (idx[src] == op->swizzle.in[dst]) {
559  /* Prevent read-after-write dependency. */
560  if (par->num_moves > 0 && src == par->dst[par->num_moves - 1])
561  src = par->src[par->num_moves - 1];
562  break;
563  }
564  }
565  av_assert1(src < FF_ARRAY_ELEMS(idx));
566  todo &= ~SWS_COMP(dst);
567  break;
568  }
569 
570  if (dst == 4) {
571  /* Stuck in a cycle, break it by saving to the scratch register */
572  dst = 4;
573  for (src = 0; src < 4; src++) {
574  if (SWS_COMP_TEST(todo, src)) {
575  needed &= ~SWS_COMP(idx[src]);
576  break;
577  }
578  }
579  av_assert1(src < 4);
580  }
581 
583  par->dst[par->num_moves] = dst > 3 ? -1 : dst;
584  par->src[par->num_moves] = src > 3 ? -1 : src;
585  par->num_moves++;
586  idx[dst] = idx[src];
587  }
588 
589  return ff_sws_uop_list_append(ops, &uop);
590 }
591 
593 {
594  if (flags & SWS_UOP_FLAG_MOVE)
595  return translate_move(ops, op);
596 
597  SwsUOp uop = {
598  .type = pixel_type_to_int(op->type),
599  .uop = SWS_UOP_PERMUTE,
600  .mask = ff_sws_comp_mask_needed(op),
601  .par.swizzle.in = {0, 1, 2, 3},
602  };
603 
604  SwsCompMask seen = 0;
605  for (int i = 0; i < 4; i++) {
606  if (!SWS_COMP_TEST(uop.mask, i))
607  continue;
608  const int src = op->swizzle.in[i];
609  if (SWS_COMP_TEST(seen, src))
610  uop.uop = SWS_UOP_COPY; /* Swizzle mask contains duplicates */
611  seen |= SWS_COMP(src);
612  uop.par.swizzle.in[i] = src;
613  }
614 
615  if (uop.uop == SWS_UOP_PERMUTE) {
616  /* Prevent overlap by moving unused components to unseen indices */
617  for (int i = 0; i < 4; i++) {
618  if (SWS_COMP_TEST(uop.mask, i))
619  continue;
620 
621  /* Prefer identity mapping if possible */
622  int unused = i;
623  if (SWS_COMP_TEST(seen, i)) {
624  for (int j = 0; j < 4; j++) {
625  if (!SWS_COMP_TEST(seen, j)) {
626  unused = j;
627  break;
628  }
629  }
630  }
631 
632  uop.par.swizzle.in[i] = unused;
633  seen |= SWS_COMP(unused);
634  }
635  }
636 
637  /* Remove remaining trivial / identity components from the mask */
638  for (int i = 0; i < 4; i++) {
639  if (uop.par.swizzle.in[i] == i)
640  uop.mask &= ~SWS_COMP(i);
641  }
642 
643  return ff_sws_uop_list_append(ops, &uop);
644 }
645 
646 static int translate_dither_op(SwsUOpList *ops, const SwsOp *op)
647 {
648  SwsUOp uop = {
649  .type = op->type,
650  .uop = SWS_UOP_DITHER,
651  .par.dither.size_log2 = op->dither.size_log2,
652  };
653 
654  if (op->dither.size_log2 == 0) {
655  /* Constant offset */
656  const SwsPixel val = Q2PIXEL(op->dither.matrix[0]);
657  uop.uop = SWS_UOP_ADD;
658  for (int i = 0; i < 4; i++) {
659  if (!SWS_OP_NEEDED(op, i) || op->dither.y_offset[i] < 0)
660  continue;
661  uop.mask |= SWS_COMP(i);
662  uop.data.vec4[i] = val;
663  }
664 
665  return ff_sws_uop_list_append(ops, &uop);
666  }
667 
668  const int size = 1 << op->dither.size_log2;
669  for (int i = 0; i < 4; i++) {
670  if (!SWS_OP_NEEDED(op, i) || op->dither.y_offset[i] < 0)
671  continue;
672  const uint8_t off = op->dither.y_offset[i] & (size - 1);
673  uop.mask |= SWS_COMP(i);
674  uop.par.dither.y_offset[i] = off;
675  }
676 
677  /* Allocate extra rows to allow over-reading for row offsets. Note that
678  * y_offset is currently never larger than 5, so the extra space needed
679  * for this over-allocation is bounded by 5 * size * sizeof(float),
680  * typically 320 bytes for a 16x16 dither matrix. */
681  const int stride = size * sizeof(SwsPixel);
682  const int num_rows = ff_sws_dither_height(&uop.par.dither);
683  SwsPixel *matrix = uop.data.ptr = av_refstruct_allocz(num_rows * stride);
684  if (!matrix)
685  return AVERROR(ENOMEM);
686 
687  for (int i = 0; i < size * size; i++)
688  matrix[i] = Q2PIXEL(op->dither.matrix[i]);
689  memcpy(&matrix[size * size], matrix, (num_rows - size) * stride);
690 
691  return ff_sws_uop_list_append(ops, &uop);
692 }
693 
695  SwsUOpFlags flags, const SwsOp *op,
696  const SwsComps *input)
697 {
698  SwsUOp uop = {
699  .type = op->type,
700  .uop = SWS_UOP_LINEAR,
701  };
702 
703  const bool bitexact = ctx->flags & SWS_BITEXACT;
704  uint32_t exact = 0;
705 
706  for (int i = 0; i < 4; i++) {
707  if (SWS_OP_NEEDED(op, i) && (op->lin.mask & SWS_MASK_ROW(i)))
708  uop.mask |= SWS_COMP(i);
709  for (int j = 0; j < 5; j++) {
710  const AVRational k = op->lin.m[i][j];
711  const SwsPixel px = Q2PIXEL(k);
712  uop.data.mat4[i][j] = px;
713  if (k.num == 0)
714  uop.par.lin.zero |= SWS_MASK(i, j);
715  else if (k.num == k.den)
716  uop.par.lin.one |= SWS_MASK(i, j);
717  else if (j < 4 && (!bitexact || exact_prod(uop.type, px, input, j)))
718  exact |= SWS_MASK(i, j);
719  }
720  }
721 
722  if (flags & SWS_UOP_FLAG_FMA) {
723  /* multiplication by 1 and 0 are always exact by definition */
724  uop.uop = SWS_UOP_LINEAR_FMA;
725  uop.par.lin.exact = exact | uop.par.lin.zero | uop.par.lin.one;
726  }
727 
728  return ff_sws_uop_list_append(ops, &uop);
729 }
730 
732 {
733  if (factor.den != 1)
734  return false;
735 
736  switch (type) {
737  case SWS_PIXEL_U8: return factor.num == UINT8_MAX;
738  case SWS_PIXEL_U16: return factor.num == UINT16_MAX;
739  case SWS_PIXEL_U32: return factor.num == UINT32_MAX;
740  case SWS_PIXEL_F32: return false;
741  case SWS_PIXEL_NONE:
742  case SWS_PIXEL_TYPE_NB: break;
743  }
744 
745  av_unreachable("Invalid pixel type!");
746  return false;
747 }
748 
750  const SwsOp *op, const SwsComps *input)
751 {
752  switch (op->op) {
753  case SWS_OP_FILTER_H:
754  case SWS_OP_FILTER_V:
755  return AVERROR(ENOTSUP); /* always handled by subpass splitting */
756  case SWS_OP_READ:
757  case SWS_OP_WRITE:
758  return translate_rw_op(ctx, uops, flags, op);
759  case SWS_OP_SWIZZLE:
760  return translate_swizzle(uops, flags, op);
761  case SWS_OP_DITHER:
762  return translate_dither_op(uops, op);
763  case SWS_OP_LINEAR:
764  return translate_linear_op(ctx, uops, flags, op, input);
765  default:
766  break;
767  }
768 
769  /* Default handling for "simple" ops */
770  SwsUOp uop = {
771  .type = op->type,
772  .uop = SWS_UOP_INVALID,
773  .mask = ff_sws_comp_mask_needed(op),
774  };
775 
776  switch (op->op) {
777  case SWS_OP_CONVERT:
778  if (op->convert.expand) {
779  av_assert0(op->type == SWS_PIXEL_U8);
780  switch (op->convert.to) {
781  case SWS_PIXEL_U16: uop.uop = SWS_UOP_EXPAND_PAIR; break;
782  case SWS_PIXEL_U32: uop.uop = SWS_UOP_EXPAND_QUAD; break;
783  }
784  } else {
785  switch (op->convert.to) {
786  case SWS_PIXEL_U8: uop.uop = SWS_UOP_TO_U8; break;
787  case SWS_PIXEL_U16: uop.uop = SWS_UOP_TO_U16; break;
788  case SWS_PIXEL_U32: uop.uop = SWS_UOP_TO_U32; break;
789  case SWS_PIXEL_F32: uop.uop = SWS_UOP_TO_F32; break;
790  }
791  }
792  break;
793  case SWS_OP_UNPACK:
794  case SWS_OP_PACK:
795  uop.uop = op->op == SWS_OP_PACK ? SWS_UOP_PACK : SWS_UOP_UNPACK;
796  uop.mask = 0;
797  for (int i = 0; i < 4 && op->pack.pattern[i]; i++) {
798  uop.par.pack.pattern[i] = op->pack.pattern[i];
799  uop.mask |= SWS_COMP(i);
800  }
801  break;
802  case SWS_OP_LSHIFT:
803  case SWS_OP_RSHIFT:
805  uop.par.shift.amount = op->shift.amount;
806  break;
807  case SWS_OP_CLEAR:
808  uop.uop = SWS_UOP_CLEAR;
809  uop.type = pixel_type_to_int(op->type);
810  uop.mask &= op->clear.mask;
811  for (int i = 0; i < 4; i++) {
812  if (!SWS_COMP_TEST(op->clear.mask, i))
813  continue;
814  const AVRational v = op->clear.value[i];
815  const SwsPixel px = Q2PIXEL(op->clear.value[i]);
816  uop.data.vec4[i] = px;
817  if (v.num == 0)
818  uop.par.clear.zero |= SWS_COMP(i);
819  else if (pixel_is_1s(op->type, px))
820  uop.par.clear.one |= SWS_COMP(i);
821  }
822  break;
823  case SWS_OP_SCALE:
824  if (is_expand_bit(op->type, op->scale.factor)) {
825  uop.uop = SWS_UOP_EXPAND_BIT;
826  } else {
827  uop.uop = SWS_UOP_SCALE;
828  uop.data.scalar = Q2PIXEL(op->scale.factor);
829  }
830  break;
831  case SWS_OP_MIN:
832  case SWS_OP_MAX:
833  uop.uop = op->op == SWS_OP_MIN ? SWS_UOP_MIN : SWS_UOP_MAX;
834  uop.mask &= ff_sws_comp_mask_q4(op->clamp.limit);
835  for (int i = 0; i < 4; i++) {
836  if (SWS_COMP_TEST(uop.mask, i))
837  uop.data.vec4[i] = Q2PIXEL(op->clamp.limit[i]);
838  }
839  break;
840  case SWS_OP_SWAP_BYTES:
841  uop.uop = SWS_UOP_SWAP_BYTES;
842  uop.type = pixel_type_to_int(op->type);
843  break;
844  default:
845  return AVERROR(ENOTSUP);
846  }
847 
849  return ff_sws_uop_list_append(uops, &uop);
850 }
851 
854 {
855  SwsComps input = ops->comps_src;
856  for (int i = 0; i < ops->num_ops; i++) {
857  int ret = translate_op(ctx, uops, flags, &ops->ops[i], &input);
858  if (ret < 0)
859  return ret;
860  input = ops->ops[i].comps;
861  }
862  return 0;
863 }
864 
865 static int register_uop(struct AVTreeNode **root, const SwsUOp *uop)
866 {
867  SwsUOp *key = av_memdup(uop, sizeof(*uop));
868  if (!key)
869  return AVERROR(ENOMEM);
870  memset(&key->data, 0, sizeof(key->data));
871 
872  struct AVTreeNode *node = av_tree_node_alloc();
873  if (!node) {
874  av_free(key);
875  return AVERROR(ENOMEM);
876  }
877 
878  av_tree_insert(root, key, ff_sws_uop_cmp_v, &node);
879  if (node) {
880  av_free(node);
881  av_free(key);
882  }
883  return 0;
884 }
885 
887 {
889  if (!uops)
890  return AVERROR(ENOMEM);
891 
892  int ret = ff_sws_ops_translate(ctx, ops, flags, uops);
893  if (ret < 0)
894  goto fail;
895 
896  struct AVTreeNode **root = ctx->opaque;
897  for (int i = 0; i < uops->num_ops; i++) {
898  ret = register_uop(root, &uops->ops[i]);
899  if (ret < 0)
900  goto fail;
901  }
902 
903 fail:
904  ff_sws_uop_list_free(&uops);
905  return ret;
906 }
907 
908 static const SwsUOpFlags uop_flags[] = {
909  0,
910  SWS_UOP_FLAG_FMA | SWS_UOP_FLAG_MOVE, /* x86 backend */
911 };
912 
913 static int register_uops(SwsContext *ctx, const SwsOpList *ops,
915 {
916  for (int i = 0; i < FF_ARRAY_ELEMS(uop_flags); i++) {
917  int ret = register_flags(ctx, ops, uop_flags[i]);
918  if (ret < 0)
919  return ret;
920  }
921 
922  *out = (SwsCompiledOp) {0}; /* dummy value, will be immediately freed */
923  return 0;
924 }
925 
926 /* Dummy backend that just registers all seen uops */
927 static const SwsOpBackend backend_uops = {
928  .name = "uops_gen",
929  .compile = register_uops,
930 };
931 
932 static int register_all_uops(SwsContext *ctx, void *graph, SwsOpList *ops)
933 {
934  /* ff_sws_compile_pass() takes over ownership of `ops` */
936  if (!copy)
937  return AVERROR(ENOMEM);
938 
939  return ff_sws_compile_pass(graph, &backend_uops, &copy, 0, NULL, NULL);
940 }
941 
942 static const SwsFlags flags[] = {
943  0,
944  SWS_ACCURATE_RND, /* may insert extra 1x1 dither ops (for accurate rounding) */
945  SWS_BITEXACT, /* prevents some FMA optimizations */
947 };
948 
949 /* Limit the range of av_tree_enumerate() to only matching uop and type */
950 static int enum_type(void *opaque, void *elem)
951 {
952  const SwsUOp *a = opaque, *b = elem;
953  if (a->type != b->type)
954  return (int) b->type - a->type;
955  if (a->uop != b->uop)
956  return (int) b->uop - a->uop;
957  return 0;
958 }
959 
960 static int free_uop_key(void *opaque, void *key)
961 {
962  av_free(key);
963  return 0;
964 }
965 
966 int ff_sws_uops_macros_gen(char **out_str)
967 {
968  int ret;
969  struct AVTreeNode *root = NULL;
970 
971  AVBPrint bprint, *const bp = &bprint;
973 
974  /* Allocate dummy graph and context for ff_sws_compile_pass() */
975  SwsGraph *graph = ff_sws_graph_alloc();
976  if (!graph)
977  return AVERROR(ENOMEM);
978 
979  SwsContext *ctx = graph->ctx = sws_alloc_context();
980  if (!ctx) {
981  ret = AVERROR(ENOMEM);
982  goto fail;
983  }
984 
985  /* Use this to plumb the tree state through all the layers of abstraction */
986  ctx->opaque = &root;
987  ctx->scaler = SWS_SCALE_BILINEAR; /* cheaper to generate filter kernels */
988 
989  /* Register all unique uops over every relevant combination of flags */
990  for (int i = 0; i < FF_ARRAY_ELEMS(flags); i++) {
991  ctx->flags = flags[i];
994  if (ret < 0)
995  goto fail;
996  }
997 
998  /**
999  * Additionally make sure planar reads/writes are always available for all
1000  * formats, because checkasm depends on them to be able to verify the
1001  * input/output of any other operations.
1002  */
1005  continue;
1006  for (int elems = 1; elems <= 4; elems++) {
1007  for (int rw = 0; rw < 2; rw++) {
1008  SwsUOp uop = {
1009  .type = type,
1011  .mask = SWS_COMP_ELEMS(elems),
1012  };
1013 
1014  ret = register_uop(&root, &uop);
1015  if (ret < 0)
1016  goto fail;
1017  }
1018  }
1019  }
1020 
1021  #define BPRINT_STR(str) av_bprint_append_data(bp, str, strlen(str))
1022  BPRINT_STR(
1023 "/**\n"
1024 " * This file is automatically generated. Do not edit manually.\n"
1025 " * To regenerate, run: make fate-sws-uops-macros GEN=1\n"
1026 " */\n"
1027 "\n"
1028 "#ifndef SWSCALE_UOPS_MACROS_H\n"
1029 "#define SWSCALE_UOPS_MACROS_H\n"
1030 "\n"
1031 "/**\n"
1032 " * Boilerplate helper macros, for template-based backends. These will be\n"
1033 " * instantiated like this, with parameters in struct order:\n"
1034 " * MACRO(__VA_ARGS__, NAME, UOP, TYPE, MASK, [PARAMS,])\n"
1035 " * The _STRUCT variants pass all arguments in C struct syntax, while the\n"
1036 " * plain variants give them as separate C values (e.g. for use in calls)\n"
1037 " */\n"
1038 "#define SWS_GLUE3(x, y, z) x ## _ ## y ## _ ## z\n"
1039 "#define SWS_FOR(TYPE, UOP, MACRO, ...) \\\n"
1040 " SWS_GLUE3(SWS_FOR, TYPE, UOP)(MACRO, __VA_ARGS__)\n"
1041 "#define SWS_FOR_STRUCT(TYPE, UOP, MACRO, ...) \\\n"
1042 " SWS_GLUE3(SWS_FOR_STRUCT, TYPE, UOP)(MACRO, __VA_ARGS__)\n"
1043 "\n");
1044 
1045  SwsUOp key = { .data.opaque = bp };
1046  for (key.type = SWS_PIXEL_NONE + 1; key.type < SWS_PIXEL_TYPE_NB; key.type++) {
1047  for (key.uop = SWS_UOP_INVALID + 1; key.uop < SWS_UOP_TYPE_NB; key.uop++) {
1048  const char *macro = uop_names[key.uop].macro;
1049  const char *prefix = pixel_types[key.type].prefix;
1050  av_bprintf(bp, "#define SWS_FOR_%s%s(MACRO, ...)", prefix, macro);
1052  av_bprintf(bp, "\n");
1053  av_bprintf(bp, "#define SWS_FOR_STRUCT_%s%s(MACRO, ...)", prefix, macro);
1055  av_bprintf(bp, "\n");
1056  }
1057  }
1058 
1059  BPRINT_STR("\n#endif /* SWSCALE_UOPS_MACROS_H */");
1060  ret = av_bprint_finalize(bp, out_str);
1061 
1062 fail:
1063  av_bprint_finalize(bp, NULL);
1065  av_tree_destroy(root);
1066  ff_sws_graph_free(&graph);
1068  return ret;
1069 }
SWS_OP_READ
@ SWS_OP_READ
Definition: ops.h:38
pixel_from_q
static SwsPixel pixel_from_q(SwsPixelType type, AVRational val)
Definition: uops.c:98
factor
static const int factor[16]
Definition: vf_pp7.c:98
AV_BPRINT_SIZE_UNLIMITED
#define AV_BPRINT_SIZE_UNLIMITED
SWS_UOP_SCALE
@ SWS_UOP_SCALE
Definition: uops.h:121
name
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf default minimum maximum flags name is the option name
Definition: writing_filters.txt:88
SWS_OP_SWIZZLE
@ SWS_OP_SWIZZLE
Definition: ops.h:41
SwsGraph::ctx
SwsContext * ctx
Definition: graph.h:123
Q2PIXEL
#define Q2PIXEL(val)
Definition: uops.c:114
av_bprint_is_complete
static int av_bprint_is_complete(const AVBPrint *buf)
Test if the print buffer is complete (not truncated).
Definition: bprint.h:218
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
MAX
#define MAX
Definition: blend_modes.c:46
SwsUOpParams::move
SwsMoveUOp move
Definition: uops.h:194
SWS_OP_LSHIFT
@ SWS_OP_LSHIFT
Definition: ops.h:46
SWS_OP_UNPACK
@ SWS_OP_UNPACK
Definition: ops.h:44
ff_sws_op_list_duplicate
SwsOpList * ff_sws_op_list_duplicate(const SwsOpList *ops)
Returns a duplicate of ops, or NULL on OOM.
Definition: ops.c:651
SWS_RW_PLANAR
@ SWS_RW_PLANAR
Note: 1-component reads are either SWS_RW_PLANAR or SWS_RW_PACKED, depending on the underlying interp...
Definition: ops.h:97
free_uop_key
static int free_uop_key(void *opaque, void *key)
Definition: uops.c:960
out
static FILE * out
Definition: movenc.c:55
av_bprint_init
void av_bprint_init(AVBPrint *buf, unsigned size_init, unsigned size_max)
Definition: bprint.c:69
SwsOpList::comps_src
SwsComps comps_src
Source component metadata associated with pixel values from each corresponding component (in plane/me...
Definition: ops.h:300
ff_sws_uop_cmp_v
static int ff_sws_uop_cmp_v(const void *a, const void *b)
Definition: uops.h:224
SWS_UOP_RSHIFT
@ SWS_UOP_RSHIFT
Definition: uops.h:130
SWS_PIXEL_NONE
@ SWS_PIXEL_NONE
Definition: uops.h:39
av_tree_insert
void * av_tree_insert(AVTreeNode **tp, void *key, int(*cmp)(const void *key, const void *b), AVTreeNode **next)
Insert or remove an element.
Definition: tree.c:59
SWS_OP_CLEAR
@ SWS_OP_CLEAR
Definition: ops.h:50
SWS_SCALE_BILINEAR
@ SWS_SCALE_BILINEAR
bilinear filtering
Definition: swscale.h:98
SwsClearUOp::zero
SwsCompMask zero
Definition: uops.h:168
matrix
Definition: vc1dsp.c:43
full
char full[32]
Definition: uops.c:44
ops.h
u
#define u(width, name, range_min, range_max)
Definition: cbs_apv.c:68
AVTreeNode::elem
void * elem
Definition: tree.c:28
SWS_OP_DITHER
@ SWS_OP_DITHER
Definition: ops.h:58
SWS_BITEXACT
@ SWS_BITEXACT
Definition: swscale.h:180
av_dynarray2_add
void * av_dynarray2_add(void **tab_ptr, int *nb_ptr, size_t elem_size, const uint8_t *elem_data)
Add an element of size elem_size to a dynamic array.
Definition: mem.c:343
b
#define b
Definition: input.c:43
SWS_UOP_MOVE_MAX
#define SWS_UOP_MOVE_MAX
Definition: uops.h:154
SWS_UOP_LINEAR_FMA
@ SWS_UOP_LINEAR_FMA
Definition: uops.h:133
SWS_UOP_MAX
@ SWS_UOP_MAX
Definition: uops.h:124
translate_rw_op
static int translate_rw_op(SwsContext *ctx, SwsUOpList *ops, SwsUOpFlags flags, const SwsOp *op)
Definition: uops.c:468
ff_sws_uop_cmp
int ff_sws_uop_cmp(const SwsUOp *a, const SwsUOp *b)
Copyright (C) 2026 Niklas Haas.
Definition: uops.c:32
max
#define max(a, b)
Definition: cuda_runtime.h:33
FFMAX
#define FFMAX(a, b)
Definition: macros.h:47
SWS_COMP_MASK
#define SWS_COMP_MASK(X, Y, Z, W)
Definition: uops.h:74
av_tree_node_alloc
struct AVTreeNode * av_tree_node_alloc(void)
Allocate an AVTreeNode.
Definition: tree.c:34
SwsUOpParams::swizzle
SwsSwizzleUOp swizzle
Definition: uops.h:193
SWS_UOP_LSHIFT
@ SWS_UOP_LSHIFT
Definition: uops.h:129
SwsLinearUOp::one
uint32_t one
Definition: uops.h:172
SWS_UOP_TYPE_NB
@ SWS_UOP_TYPE_NB
Definition: uops.h:137
SwsOpBackend::name
const char * name
Definition: ops_dispatch.h:134
SWS_UOP_NAME_MAX
#define SWS_UOP_NAME_MAX
Generate a unique name for a SwsUOp.
Definition: uops.h:232
ff_sws_pixel_type_size
int ff_sws_pixel_type_size(SwsPixelType type)
Definition: ops.c:77
av_tree_enumerate
void av_tree_enumerate(AVTreeNode *t, void *opaque, int(*cmp)(void *opaque, void *elem), int(*enu)(void *opaque, void *elem))
Apply enu(opaque, &elem) to all the elements in the tree in a given range.
Definition: tree.c:155
check_filter_fma
static bool check_filter_fma(SwsContext *ctx, SwsUOpFlags flags, const SwsOp *op)
Definition: uops.c:450
ff_sws_graph_alloc
SwsGraph * ff_sws_graph_alloc(void)
Allocate an empty SwsGraph.
Definition: graph.c:827
ff_sws_comp_mask_needed
SwsCompMask ff_sws_comp_mask_needed(const SwsOp *op)
Definition: ops.c:159
SWS_MASK_ROW
#define SWS_MASK_ROW(I)
Definition: ops.h:199
SwsMoveUOp::num_moves
int num_moves
Definition: uops.h:155
av_memdup
void * av_memdup(const void *p, size_t size)
Duplicate a buffer with av_malloc().
Definition: mem.c:304
enum_type
static int enum_type(void *opaque, void *elem)
Definition: uops.c:950
SwsComps::max
AVRational max[4]
Definition: ops.h:84
SwsMoveUOp
Definition: uops.h:152
SWS_COMP_TEST
#define SWS_COMP_TEST(mask, X)
Definition: uops.h:71
av_bprint_init_for_buffer
void av_bprint_init_for_buffer(AVBPrint *buf, char *buffer, unsigned size)
Init a print buffer using a pre-existing buffer.
Definition: bprint.c:85
SWS_UOP_TO_U16
@ SWS_UOP_TO_U16
Definition: uops.h:116
SwsOpList::num_ops
int num_ops
Definition: ops.h:283
SWS_UOP_PACK
@ SWS_UOP_PACK
Definition: uops.h:128
SwsShiftUOp::amount
uint8_t amount
Definition: uops.h:145
SWS_UOP_PERMUTE
@ SWS_UOP_PERMUTE
Definition: uops.h:106
SwsUOpParams::pack
SwsPackUOp pack
Definition: uops.h:195
SWS_UOP_EXPAND_BIT
@ SWS_UOP_EXPAND_BIT
Definition: uops.h:112
translate_move
static int translate_move(SwsUOpList *ops, const SwsOp *op)
Definition: uops.c:521
UOP_NAME
#define UOP_NAME(OP, ABBR)
ff_sws_pixel_type_is_int
bool ff_sws_pixel_type_is_int(SwsPixelType type)
Definition: ops.c:92
val
static double val(void *priv, double ch)
Definition: aeval.c:77
type
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf type
Definition: writing_filters.txt:86
SwsUOpParams
Definition: uops.h:190
SWS_COMP_ELEMS
#define SWS_COMP_ELEMS(N)
Definition: uops.h:73
AVRational::num
int num
Numerator.
Definition: rational.h:59
SwsFilterUOp::type
SwsPixelType type
Definition: uops.h:141
refstruct.h
av_refstruct_allocz
static void * av_refstruct_allocz(size_t size)
Equivalent to av_refstruct_alloc_ext(size, 0, NULL, NULL)
Definition: refstruct.h:105
SWS_UOP_COPY
@ SWS_UOP_COPY
Definition: uops.h:107
SWS_UOP_INVALID
@ SWS_UOP_INVALID
Definition: uops.h:89
SWS_RW_PACKED
@ SWS_RW_PACKED
Definition: ops.h:98
SWS_OP_SCALE
@ SWS_OP_SCALE
Definition: ops.h:54
avassert.h
FF_ARRAY_ELEMS
#define FF_ARRAY_ELEMS(a)
Definition: sinewin_tablegen.c:29
SWS_UOP_WRITE_NIBBLE
@ SWS_UOP_WRITE_NIBBLE
Definition: uops.h:102
uop_flags
static const SwsUOpFlags uop_flags[]
Definition: uops.c:908
SWS_OP_NEEDED
#define SWS_OP_NEEDED(op, idx)
Definition: ops.h:253
SwsUOp::kernel
SwsFilterWeights * kernel
Definition: uops.h:210
float
float
Definition: af_crystalizer.c:122
SWS_UOP_MOVE
@ SWS_UOP_MOVE
Definition: uops.h:108
SwsFlags
SwsFlags
Definition: swscale.h:133
dither
static const uint16_t dither[8][8]
Definition: vf_gradfun.c:46
SwsUOp::uop
SwsUOpType uop
Definition: uops.h:204
AVFormatContext::flags
int flags
Flags modifying the (de)muxer behaviour.
Definition: avformat.h:1465
backend_uops
static const SwsOpBackend backend_uops
Definition: uops.c:927
SwsComps::min
AVRational min[4]
Definition: ops.h:84
SWS_UOP_WRITE_PLANAR
@ SWS_UOP_WRITE_PLANAR
Definition: uops.h:100
op
static int op(uint8_t **dst, const uint8_t *dst_end, GetByteContext *gb, int pixel, int count, int *x, int width, int linesize)
Perform decode operation.
Definition: anm.c:76
SCALE
#define SCALE(c)
Definition: dcadata.c:7338
bits
uint8_t bits
Definition: vp3data.h:128
SWS_UOP_TO_F32
@ SWS_UOP_TO_F32
Definition: uops.h:118
LINEAR
#define LINEAR
Definition: vf_perspective.c:36
av_assert0
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:42
SWS_UOP_MIN
@ SWS_UOP_MIN
Definition: uops.h:123
SWS_OP_MIN
@ SWS_OP_MIN
Definition: ops.h:52
ctx
static AVFormatContext * ctx
Definition: movenc.c:49
SwsCompMask
uint8_t SwsCompMask
Bit-mask of components.
Definition: uops.h:61
SWS_UOP_READ_PACKED
@ SWS_UOP_READ_PACKED
Definition: uops.h:96
SWS_OP_LINEAR
@ SWS_OP_LINEAR
Definition: ops.h:57
count_idx
static int count_idx(const int *arr, size_t size, int val)
Definition: uops.c:510
SWS_OP_FILTER_H
@ SWS_OP_FILTER_H
Definition: ops.h:61
COPY
#define COPY(src, name)
SwsPixel::f32
float f32
Definition: uops.h:57
AVFormatContext::opaque
void * opaque
User data.
Definition: avformat.h:1878
key
const char * key
Definition: hwcontext_opencl.c:189
av_mallocz
#define av_mallocz(s)
Definition: tableprint_vlc.h:31
RSHIFT
#define RSHIFT(a, b)
Definition: common.h:56
SwsOpBackend
Definition: ops_dispatch.h:133
SWS_OP_PACK
@ SWS_OP_PACK
Definition: ops.h:45
prefix
char prefix[8]
Definition: uops.c:89
register_flags
static int register_flags(SwsContext *ctx, const SwsOpList *ops, SwsUOpFlags flags)
Definition: uops.c:886
fail
#define fail
Definition: test.h:478
result
and forward the result(frame or status change) to the corresponding input. If nothing is possible
NULL
#define NULL
Definition: coverity.c:32
SwsUOp::mat4
SwsPixel mat4[4][5]
Definition: uops.h:214
ADD
#define ADD(a, b)
Definition: dct32_template.c:123
SWS_PIXEL_TYPE_NB
@ SWS_PIXEL_TYPE_NB
Definition: uops.h:44
SwsUOpParams::shift
SwsShiftUOp shift
Definition: uops.h:192
flags
static const SwsFlags flags[]
Definition: uops.c:942
translate_swizzle
static int translate_swizzle(SwsUOpList *ops, SwsUOpFlags flags, const SwsOp *op)
Definition: uops.c:592
AVRational
Rational number (pair of numerator and denominator).
Definition: rational.h:58
av_unreachable
#define av_unreachable(msg)
Asserts that are used as compiler optimization hints depending upon ASSERT_LEVEL and NBDEBUG.
Definition: avassert.h:116
SwsMoveUOp::dst
int8_t dst[SWS_UOP_MOVE_MAX]
Definition: uops.h:158
AVTreeNode
Definition: tree.c:26
SwsClearUOp::one
SwsCompMask one
Definition: uops.h:167
SWS_UOP_FLAG_MOVE
@ SWS_UOP_FLAG_MOVE
Definition: uops.h:85
SWS_OP_FILTER_V
@ SWS_OP_FILTER_V
Definition: ops.h:62
SWS_UOP_READ_NIBBLE
@ SWS_UOP_READ_NIBBLE
Definition: uops.h:97
MOVE
#define MOVE
Definition: rasc.c:45
SWS_UOP_ADD
@ SWS_UOP_ADD
Definition: uops.h:122
generate_entry_struct
static int generate_entry_struct(void *opaque, void *key)
Definition: uops.c:223
translate_dither_op
static int translate_dither_op(SwsUOpList *ops, const SwsOp *op)
Definition: uops.c:646
av_tree_destroy
void av_tree_destroy(AVTreeNode *t)
Definition: tree.c:146
SWS_MASK
#define SWS_MASK(I, J)
Definition: ops.h:197
SwsPixelType
SwsPixelType
Definition: uops.h:38
pixel_is_1s
static bool pixel_is_1s(SwsPixelType type, SwsPixel val)
Definition: uops.c:116
SwsUOp::par
SwsUOpParams par
Definition: uops.h:206
SWS_UOP_TO_U32
@ SWS_UOP_TO_U32
Definition: uops.h:117
exact_prod
static bool exact_prod(SwsPixelType type, SwsPixel coef, const SwsComps *comps, int idx)
Definition: uops.c:428
SwsUOp::data
union SwsUOp::@586 data
ff_sws_graph_free
void ff_sws_graph_free(SwsGraph **pgraph)
Uninitialize any state associate with this filter graph and free it.
Definition: graph.c:916
SwsUOp
Definition: uops.h:201
SWS_UOP_WRITE_BIT
@ SWS_UOP_WRITE_BIT
Definition: uops.h:103
uop_uninit
static void uop_uninit(SwsUOp *uop)
Definition: uops.c:352
copy
static void copy(const float *p1, float *p2, const int length)
Definition: vf_vaguedenoiser.c:186
av_bprint_finalize
int av_bprint_finalize(AVBPrint *buf, char **ret_str)
Finalize a print buffer.
Definition: bprint.c:235
sws_alloc_context
SwsContext * sws_alloc_context(void)
Allocate an empty SwsContext and set its fields to default values.
Definition: utils.c:1043
SWS_UOP_READ_PLANAR_FV_FMA
@ SWS_UOP_READ_PLANAR_FV_FMA
Definition: uops.h:95
dst
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
Definition: dsp.h:87
i
#define i(width, name, range_min, range_max)
Definition: cbs_h264.c:63
SwsLinearUOp::zero
uint32_t zero
Definition: uops.h:173
SwsUOp::mask
SwsCompMask mask
Definition: uops.h:205
SwsDitherUOp::size_log2
uint8_t size_log2
Definition: uops.h:181
size
int size
Definition: twinvq_data.h:10344
SWS_OP_RSHIFT
@ SWS_OP_RSHIFT
Definition: ops.h:47
uop_names
static const struct @583 uop_names[SWS_UOP_TYPE_NB]
SWS_OP_WRITE
@ SWS_OP_WRITE
Definition: ops.h:39
SWS_UOP_UNPACK
@ SWS_UOP_UNPACK
Definition: uops.h:127
SWS_COMP
#define SWS_COMP(X)
Definition: uops.h:70
tree.h
SWS_PIXEL_U32
@ SWS_PIXEL_U32
Definition: uops.h:42
av_refstruct_ref
void * av_refstruct_ref(void *obj)
Create a new reference to an object managed via this API, i.e.
Definition: refstruct.c:140
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
SwsPixel
Definition: uops.h:51
SwsOp::comps
SwsComps comps
Metadata about the operation's input/output components.
Definition: ops.h:250
register_uop
static int register_uop(struct AVTreeNode **root, const SwsUOp *uop)
Definition: uops.c:865
ff_sws_uops_macros_gen
int ff_sws_uops_macros_gen(char **out_str)
Generate a set of boilerplate C preprocessor macros for describing and programmatically iterating ove...
Definition: uops.c:966
input
and forward the test the status of outputs and forward it to the corresponding return FFERROR_NOT_READY If the filters stores internally one or a few frame for some input
Definition: filter_design.txt:172
ff_sws_uop_list_alloc
SwsUOpList * ff_sws_uop_list_alloc(void)
Definition: uops.c:382
av_refstruct_unref
void av_refstruct_unref(void *objp)
Decrement the reference count of the underlying object and automatically free the object if there are...
Definition: refstruct.c:120
ff_sws_comp_mask_q4
SwsCompMask ff_sws_comp_mask_q4(const AVRational q[4])
Definition: ops.c:137
is_expand_bit
static bool is_expand_bit(SwsPixelType type, AVRational factor)
Definition: uops.c:731
register_all_uops
static int register_all_uops(SwsContext *ctx, void *graph, SwsOpList *ops)
Definition: uops.c:932
SWS_UOP_TO_U8
@ SWS_UOP_TO_U8
Definition: uops.h:115
exact_product_f32
static bool exact_product_f32(float a, float b)
Definition: uops.c:421
SWS_UOP_READ_PLANAR
@ SWS_UOP_READ_PLANAR
Definition: uops.h:92
pixel_types
static const struct @584 pixel_types[SWS_PIXEL_TYPE_NB]
SwsOpList::ops
SwsOp * ops
Definition: ops.h:282
av_assert1
#define av_assert1(cond)
assert() equivalent, that does not lie in speed critical code.
Definition: avassert.h:58
SWS_PIXEL_U8
@ SWS_PIXEL_U8
Definition: uops.h:40
needed
The exact code depends on how similar the blocks are and how related they are to the and needs to apply these operations to the correct inlink or outlink if there are several Macros are available to factor that when no extra processing is needed
Definition: filter_design.txt:212
SWS_UOP_SWAP_BYTES
@ SWS_UOP_SWAP_BYTES
Definition: uops.h:111
SwsUOp::scalar
SwsPixel scalar
Definition: uops.h:212
ops_internal.h
MIN
#define MIN(a, b)
Definition: qt-faststart.c:45
SWS_UOP_LINEAR
@ SWS_UOP_LINEAR
Definition: uops.h:132
ff_sws_enum_op_lists
int ff_sws_enum_op_lists(SwsContext *ctx, void *opaque, enum AVPixelFormat src_fmt, enum AVPixelFormat dst_fmt, int(*cb)(SwsContext *ctx, void *opaque, SwsOpList *ops))
Helper function to enumerate over all possible (optimized) operation lists, under the current set of ...
Definition: ops.c:1081
SwsOp
Definition: ops.h:226
SwsUOpParams::lin
SwsLinearUOp lin
Definition: uops.h:197
SwsPackUOp::pattern
uint8_t pattern[4]
Definition: uops.h:163
abbr
char abbr[32]
Definition: uops.c:45
SwsUOp::type
SwsPixelType type
Definition: uops.h:203
pixel_type_to_int
static SwsPixelType pixel_type_to_int(const SwsPixelType type)
Definition: uops.c:408
ff_sws_ops_translate
int ff_sws_ops_translate(SwsContext *ctx, const SwsOpList *ops, SwsUOpFlags flags, SwsUOpList *uops)
Translate a list of operations down to micro-ops, which can be further optimized and then directly ex...
Definition: uops.c:852
ret
ret
Definition: filter_design.txt:187
BPRINT_STR
#define BPRINT_STR(str)
SwsUOpList::num_ops
int num_ops
Definition: uops.h:237
SWS_OP_MAX
@ SWS_OP_MAX
Definition: ops.h:53
SwsCompiledOp
Definition: ops_dispatch.h:100
ff_sws_uop_list_free
void ff_sws_uop_list_free(SwsUOpList **p_ops)
Definition: uops.c:368
av_bprintf
void av_bprintf(AVBPrint *buf, const char *fmt,...)
Definition: bprint.c:122
SwsUOp::ptr
SwsPixel * ptr
Definition: uops.h:211
macro
char macro[32]
Definition: uops.c:46
SwsComps
Definition: ops.h:79
AVRational::den
int den
Denominator.
Definition: rational.h:60
AV_PIX_FMT_NONE
@ AV_PIX_FMT_NONE
Definition: pixfmt.h:72
ff_sws_pixel_type_name
const char * ff_sws_pixel_type_name(SwsPixelType type)
Definition: ops.c:62
SwsLinearUOp::exact
uint32_t exact
Definition: uops.h:176
SWS_OP_SWAP_BYTES
@ SWS_OP_SWAP_BYTES
Definition: ops.h:40
ff_sws_uop_name
void ff_sws_uop_name(const SwsUOp *op, char buf[SWS_UOP_NAME_MAX])
Definition: uops.c:129
CLEAR
#define CLEAR(destin)
Definition: wavpackenc.c:50
SwsDitherUOp::y_offset
uint8_t y_offset[4]
Definition: uops.h:180
ref
static int ref[MAX_W *MAX_W]
Definition: jpeg2000dwt.c:117
px
#define px
Definition: uops_tmpl.c:54
SwsUOpList
Definition: uops.h:235
SwsUOp::vec4
SwsPixel vec4[4]
Definition: uops.h:213
ff_sws_uop_list_append
int ff_sws_uop_list_append(SwsUOpList *uops, SwsUOp *uop)
Definition: uops.c:387
ff_sws_compile_pass
int ff_sws_compile_pass(SwsGraph *graph, const SwsOpBackend *backend, SwsOpList **pops, int flags, SwsPass *input, SwsPass **output)
Resolves an operation list to a graph pass.
Definition: ops_dispatch.c:634
generate_entry_args
static int generate_entry_args(void *opaque, void *key)
Definition: uops.c:289
SWS_FILTER_SCALE
@ SWS_FILTER_SCALE
14-bit coefficients are picked to fit comfortably within int16_t for efficient SIMD processing (e....
Definition: filters.h:40
SWS_UOP_DITHER
@ SWS_UOP_DITHER
Definition: uops.h:134
SWS_UOP_WRITE_PACKED
@ SWS_UOP_WRITE_PACKED
Definition: uops.h:101
SwsDitherUOp
Definition: uops.h:179
SwsUOpParams::dither
SwsDitherUOp dither
Definition: uops.h:198
mem.h
SwsGraph
Filter graph, which represents a 'baked' pixel format conversion.
Definition: graph.h:122
SWS_PIXEL_F32
@ SWS_PIXEL_F32
Definition: uops.h:43
SWS_UOP_READ_PLANAR_FV
@ SWS_UOP_READ_PLANAR_FV
Definition: uops.h:94
av_free
#define av_free(p)
Definition: tableprint_vlc.h:34
uops.h
SWS_UOP_EXPAND_QUAD
@ SWS_UOP_EXPAND_QUAD
Definition: uops.h:114
SwsUOpFlags
uint32_t SwsUOpFlags
Definition: uops.h:81
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:35
SWS_OP_CONVERT
@ SWS_OP_CONVERT
Definition: ops.h:51
SWS_UOP_READ_PLANAR_FH
@ SWS_UOP_READ_PLANAR_FH
Definition: uops.h:93
sws_free_context
void sws_free_context(SwsContext **ctx)
Free the context and everything associated with it, and write NULL to the provided pointer.
Definition: utils.c:2381
SwsMoveUOp::src
int8_t src[SWS_UOP_MOVE_MAX]
Definition: uops.h:159
SwsUOpParams::filter
SwsFilterUOp filter
Definition: uops.h:191
translate_linear_op
static int translate_linear_op(SwsContext *ctx, SwsUOpList *ops, SwsUOpFlags flags, const SwsOp *op, const SwsComps *input)
Definition: uops.c:694
SWS_UOP_FLAG_FMA
@ SWS_UOP_FLAG_FMA
Definition: uops.h:84
ff_sws_dither_height
int ff_sws_dither_height(const SwsDitherUOp *dither)
Computes (1 << size_log2) + MAX(y_offset).
Definition: uops.c:400
translate_op
static int translate_op(SwsContext *ctx, SwsUOpList *uops, SwsUOpFlags flags, const SwsOp *op, const SwsComps *input)
Definition: uops.c:749
SWS_ACCURATE_RND
@ SWS_ACCURATE_RND
Force bit-exact output.
Definition: swscale.h:179
av_bprint_chars
void av_bprint_chars(AVBPrint *buf, char c, unsigned n)
Append char c n times to a print buffer.
Definition: bprint.c:130
SWS_UOP_READ_BIT
@ SWS_UOP_READ_BIT
Definition: uops.h:98
stride
#define stride
Definition: h264pred_template.c:536
SWS_UOP_CLEAR
@ SWS_UOP_CLEAR
Definition: uops.h:131
SwsOpList
Helper struct for representing a list of operations.
Definition: ops.h:281
SwsContext
Main external API structure.
Definition: swscale.h:229
SWS_PIXEL_U16
@ SWS_PIXEL_U16
Definition: uops.h:41
SwsSwizzleUOp::in
uint8_t in[4]
Definition: uops.h:149
SwsUOpParams::clear
SwsClearUOp clear
Definition: uops.h:196
SwsUOpList::ops
SwsUOp * ops
Definition: uops.h:236
src
#define src
Definition: vp8dsp.c:248
SWS_UOP_EXPAND_PAIR
@ SWS_UOP_EXPAND_PAIR
Definition: uops.h:113
register_uops
static int register_uops(SwsContext *ctx, const SwsOpList *ops, SwsCompiledOp *out)
Definition: uops.c:913
min
float min
Definition: vorbis_enc_data.h:429