FFmpeg
sw_ops.c
Go to the documentation of this file.
1 /**
2  * Copyright (C) 2025 Niklas Haas
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License along
17  * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
18  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19  */
20 
21 #include <string.h>
22 
23 #include "libavutil/avassert.h"
24 #include "libavutil/mem_internal.h"
25 #include "libavutil/refstruct.h"
26 
27 #include "libswscale/ops.h"
29 
30 #include "checkasm.h"
31 
32 enum {
33  LINES = 2,
34  NB_PLANES = 4,
35  PIXELS = 64,
36 };
37 
38 enum {
43 };
44 
45 #define FMT(fmt, ...) tprintf((char[256]) {0}, 256, fmt, __VA_ARGS__)
46 static const char *tprintf(char buf[], size_t size, const char *fmt, ...)
47 {
48  va_list ap;
49  va_start(ap, fmt);
50  vsnprintf(buf, size, fmt, ap);
51  va_end(ap);
52  return buf;
53 }
54 
55 static int rw_pixel_bits(const SwsOp *op)
56 {
57  const int elems = op->rw.packed ? op->rw.elems : 1;
58  const int size = ff_sws_pixel_type_size(op->type);
59  const int bits = 8 >> op->rw.frac;
60  av_assert1(bits >= 1);
61  return elems * size * bits;
62 }
63 
64 static float rndf(void)
65 {
66  union { uint32_t u; float f; } x;
67  do {
68  x.u = rnd();
69  } while (!isnormal(x.f));
70  return x.f;
71 }
72 
73 static void fill32f(float *line, int num, unsigned range)
74 {
75  const float scale = (float) range / UINT32_MAX;
76  for (int i = 0; i < num; i++)
77  line[i] = range ? scale * rnd() : rndf();
78 }
79 
80 static void fill32(uint32_t *line, int num, unsigned range)
81 {
82  for (int i = 0; i < num; i++)
83  line[i] = (range && range < UINT_MAX) ? rnd() % (range + 1) : rnd();
84 }
85 
86 static void fill16(uint16_t *line, int num, unsigned range)
87 {
88  if (!range) {
89  fill32((uint32_t *) line, AV_CEIL_RSHIFT(num, 1), 0);
90  } else {
91  for (int i = 0; i < num; i++)
92  line[i] = rnd() % (range + 1);
93  }
94 }
95 
96 static void fill8(uint8_t *line, int num, unsigned range)
97 {
98  if (!range) {
99  fill32((uint32_t *) line, AV_CEIL_RSHIFT(num, 2), 0);
100  } else {
101  for (int i = 0; i < num; i++)
102  line[i] = rnd() % (range + 1);
103  }
104 }
105 
106 static void check_ops(const char *report, const unsigned ranges[NB_PLANES],
107  const SwsOp *ops)
108 {
110  SwsCompiledOp comp_ref = {0}, comp_new = {0};
111  const SwsOpBackend *backend_new = NULL;
112  SwsOpList oplist = { .ops = (SwsOp *) ops };
113  const SwsOp *read_op, *write_op;
114  static const unsigned def_ranges[4] = {0};
115  if (!ranges)
116  ranges = def_ranges;
117 
118  declare_func(void, const SwsOpExec *, const void *, int bx, int y, int bx_end, int y_end);
119 
120  DECLARE_ALIGNED_64(char, src0)[NB_PLANES][LINES][PIXELS * sizeof(uint32_t[4])];
121  DECLARE_ALIGNED_64(char, src1)[NB_PLANES][LINES][PIXELS * sizeof(uint32_t[4])];
122  DECLARE_ALIGNED_64(char, dst0)[NB_PLANES][LINES][PIXELS * sizeof(uint32_t[4])];
123  DECLARE_ALIGNED_64(char, dst1)[NB_PLANES][LINES][PIXELS * sizeof(uint32_t[4])];
124 
125  if (!ctx)
126  return;
128 
129  read_op = &ops[0];
130  for (oplist.num_ops = 0; ops[oplist.num_ops].op; oplist.num_ops++)
131  write_op = &ops[oplist.num_ops];
132 
133  const int read_size = PIXELS * rw_pixel_bits(read_op) >> 3;
134  const int write_size = PIXELS * rw_pixel_bits(write_op) >> 3;
135 
136  for (int p = 0; p < NB_PLANES; p++) {
137  void *plane = src0[p];
138  switch (read_op->type) {
139  case U8: fill8(plane, sizeof(src0[p]) / sizeof(uint8_t), ranges[p]); break;
140  case U16: fill16(plane, sizeof(src0[p]) / sizeof(uint16_t), ranges[p]); break;
141  case U32: fill32(plane, sizeof(src0[p]) / sizeof(uint32_t), ranges[p]); break;
142  case F32: fill32f(plane, sizeof(src0[p]) / sizeof(uint32_t), ranges[p]); break;
143  }
144  }
145 
146  memcpy(src1, src0, sizeof(src0));
147  memset(dst0, 0, sizeof(dst0));
148  memset(dst1, 0, sizeof(dst1));
149 
150  /* Compile `ops` using both the asm and c backends */
151  for (int n = 0; ff_sws_op_backends[n]; n++) {
152  const SwsOpBackend *backend = ff_sws_op_backends[n];
153  const bool is_ref = !strcmp(backend->name, "c");
154  if (is_ref || !comp_new.func) {
156  int ret = ff_sws_ops_compile_backend(ctx, backend, &oplist, &comp);
157  if (ret == AVERROR(ENOTSUP))
158  continue;
159  else if (ret < 0)
160  fail();
161  else if (PIXELS % comp.block_size != 0)
162  fail();
163 
164  if (is_ref)
165  comp_ref = comp;
166  if (!comp_new.func) {
167  comp_new = comp;
168  backend_new = backend;
169  }
170  }
171  }
172 
173  av_assert0(comp_ref.func && comp_new.func);
174 
175  SwsOpExec exec = {0};
176  exec.width = PIXELS;
177  exec.height = exec.slice_h = 1;
178  for (int i = 0; i < NB_PLANES; i++) {
179  exec.in_stride[i] = sizeof(src0[i][0]);
180  exec.out_stride[i] = sizeof(dst0[i][0]);
181  exec.in_bump[i] = exec.in_stride[i] - read_size;
182  exec.out_bump[i] = exec.out_stride[i] - write_size;
183  }
184 
185  /**
186  * Don't use check_func() because the actual function pointer may be a
187  * wrapper shared by multiple implementations. Instead, take a hash of both
188  * the backend pointer and the active CPU flags.
189  */
190  uintptr_t id = (uintptr_t) backend_new;
191  id ^= (id << 6) + (id >> 2) + 0x9e3779b97f4a7c15 + comp_new.cpu_flags;
192 
193  if (check_key((void*) id, "%s", report)) {
194  exec.block_size_in = comp_ref.block_size * rw_pixel_bits(read_op) >> 3;
195  exec.block_size_out = comp_ref.block_size * rw_pixel_bits(write_op) >> 3;
196  for (int i = 0; i < NB_PLANES; i++) {
197  exec.in[i] = (void *) src0[i];
198  exec.out[i] = (void *) dst0[i];
199  }
200  checkasm_call(comp_ref.func, &exec, comp_ref.priv, 0, 0, PIXELS / comp_ref.block_size, LINES);
201 
202  exec.block_size_in = comp_new.block_size * rw_pixel_bits(read_op) >> 3;
203  exec.block_size_out = comp_new.block_size * rw_pixel_bits(write_op) >> 3;
204  for (int i = 0; i < NB_PLANES; i++) {
205  exec.in[i] = (void *) src1[i];
206  exec.out[i] = (void *) dst1[i];
207  }
208  checkasm_call_checked(comp_new.func, &exec, comp_new.priv, 0, 0, PIXELS / comp_new.block_size, LINES);
209 
210  for (int i = 0; i < NB_PLANES; i++) {
211  const char *name = FMT("%s[%d]", report, i);
212  const int stride = sizeof(dst0[i][0]);
213 
214  switch (write_op->type) {
215  case U8:
216  checkasm_check(uint8_t, (void *) dst0[i], stride,
217  (void *) dst1[i], stride,
218  write_size, LINES, name);
219  break;
220  case U16:
221  checkasm_check(uint16_t, (void *) dst0[i], stride,
222  (void *) dst1[i], stride,
223  write_size >> 1, LINES, name);
224  break;
225  case U32:
226  checkasm_check(uint32_t, (void *) dst0[i], stride,
227  (void *) dst1[i], stride,
228  write_size >> 2, LINES, name);
229  break;
230  case F32:
231  checkasm_check(float_ulp, (void *) dst0[i], stride,
232  (void *) dst1[i], stride,
233  write_size >> 2, LINES, name, 0);
234  break;
235  }
236 
237  if (write_op->rw.packed)
238  break;
239  }
240 
241  bench(comp_new.func, &exec, comp_new.priv, 0, 0, PIXELS / comp_new.block_size, LINES);
242  }
243 
244  if (comp_new.func != comp_ref.func && comp_new.free)
245  comp_new.free(comp_new.priv);
246  if (comp_ref.free)
247  comp_ref.free(comp_ref.priv);
249 }
250 
251 #define CHECK_RANGES(NAME, RANGES, N_IN, N_OUT, IN, OUT, ...) \
252  do { \
253  check_ops(NAME, RANGES, (SwsOp[]) { \
254  { \
255  .op = SWS_OP_READ, \
256  .type = IN, \
257  .rw.elems = N_IN, \
258  }, \
259  __VA_ARGS__, \
260  { \
261  .op = SWS_OP_WRITE, \
262  .type = OUT, \
263  .rw.elems = N_OUT, \
264  }, {0} \
265  }); \
266  } while (0)
267 
268 #define MK_RANGES(R) ((const unsigned[]) { R, R, R, R })
269 #define CHECK_RANGE(NAME, RANGE, N_IN, N_OUT, IN, OUT, ...) \
270  CHECK_RANGES(NAME, MK_RANGES(RANGE), N_IN, N_OUT, IN, OUT, __VA_ARGS__)
271 
272 #define CHECK_COMMON_RANGE(NAME, RANGE, IN, OUT, ...) \
273  CHECK_RANGE(FMT("%s_p1000", NAME), RANGE, 1, 1, IN, OUT, __VA_ARGS__); \
274  CHECK_RANGE(FMT("%s_p1110", NAME), RANGE, 3, 3, IN, OUT, __VA_ARGS__); \
275  CHECK_RANGE(FMT("%s_p1111", NAME), RANGE, 4, 4, IN, OUT, __VA_ARGS__); \
276  CHECK_RANGE(FMT("%s_p1001", NAME), RANGE, 4, 2, IN, OUT, __VA_ARGS__, { \
277  .op = SWS_OP_SWIZZLE, \
278  .type = OUT, \
279  .swizzle = SWS_SWIZZLE(0, 3, 1, 2), \
280  })
281 
282 #define CHECK(NAME, N_IN, N_OUT, IN, OUT, ...) \
283  CHECK_RANGE(NAME, 0, N_IN, N_OUT, IN, OUT, __VA_ARGS__)
284 
285 #define CHECK_COMMON(NAME, IN, OUT, ...) \
286  CHECK_COMMON_RANGE(NAME, 0, IN, OUT, __VA_ARGS__)
287 
288 static void check_read_write(void)
289 {
290  for (SwsPixelType t = U8; t < SWS_PIXEL_TYPE_NB; t++) {
291  const char *type = ff_sws_pixel_type_name(t);
292  for (int i = 1; i <= 4; i++) {
293  /* Test N->N planar read/write */
294  for (int o = 1; o <= i; o++) {
295  check_ops(FMT("rw_%d_%d_%s", i, o, type), NULL, (SwsOp[]) {
296  {
297  .op = SWS_OP_READ,
298  .type = t,
299  .rw.elems = i,
300  }, {
301  .op = SWS_OP_WRITE,
302  .type = t,
303  .rw.elems = o,
304  }, {0}
305  });
306  }
307 
308  /* Test packed read/write */
309  if (i == 1)
310  continue;
311 
312  check_ops(FMT("read_packed%d_%s", i, type), NULL, (SwsOp[]) {
313  {
314  .op = SWS_OP_READ,
315  .type = t,
316  .rw.elems = i,
317  .rw.packed = true,
318  }, {
319  .op = SWS_OP_WRITE,
320  .type = t,
321  .rw.elems = i,
322  }, {0}
323  });
324 
325  check_ops(FMT("write_packed%d_%s", i, type), NULL, (SwsOp[]) {
326  {
327  .op = SWS_OP_READ,
328  .type = t,
329  .rw.elems = i,
330  }, {
331  .op = SWS_OP_WRITE,
332  .type = t,
333  .rw.elems = i,
334  .rw.packed = true,
335  }, {0}
336  });
337  }
338  }
339 
340  /* Test fractional reads/writes */
341  for (int frac = 1; frac <= 3; frac++) {
342  const int bits = 8 >> frac;
343  const int range = (1 << bits) - 1;
344  if (bits == 2)
345  continue; /* no 2 bit packed formats currently exist */
346 
347  check_ops(FMT("read_frac%d", frac), NULL, (SwsOp[]) {
348  {
349  .op = SWS_OP_READ,
350  .type = U8,
351  .rw.elems = 1,
352  .rw.frac = frac,
353  }, {
354  .op = SWS_OP_WRITE,
355  .type = U8,
356  .rw.elems = 1,
357  }, {0}
358  });
359 
360  check_ops(FMT("write_frac%d", frac), MK_RANGES(range), (SwsOp[]) {
361  {
362  .op = SWS_OP_READ,
363  .type = U8,
364  .rw.elems = 1,
365  }, {
366  .op = SWS_OP_WRITE,
367  .type = U8,
368  .rw.elems = 1,
369  .rw.frac = frac,
370  }, {0}
371  });
372  }
373 }
374 
375 static void check_swap_bytes(void)
376 {
377  CHECK_COMMON("swap_bytes_16", U16, U16, {
378  .op = SWS_OP_SWAP_BYTES,
379  .type = U16,
380  });
381 
382  CHECK_COMMON("swap_bytes_32", U32, U32, {
383  .op = SWS_OP_SWAP_BYTES,
384  .type = U32,
385  });
386 }
387 
388 static void check_pack_unpack(void)
389 {
390  const struct {
392  SwsPackOp op;
393  } patterns[] = {
394  { U8, {{ 3, 3, 2 }}},
395  { U8, {{ 2, 3, 3 }}},
396  { U8, {{ 1, 2, 1 }}},
397  {U16, {{ 5, 6, 5 }}},
398  {U16, {{ 5, 5, 5 }}},
399  {U16, {{ 4, 4, 4 }}},
400  {U32, {{ 2, 10, 10, 10 }}},
401  {U32, {{10, 10, 10, 2 }}},
402  };
403 
404  for (int i = 0; i < FF_ARRAY_ELEMS(patterns); i++) {
405  const SwsPixelType type = patterns[i].type;
406  const SwsPackOp pack = patterns[i].op;
407  const int num = pack.pattern[3] ? 4 : 3;
408  const char *pat = FMT("%d%d%d%d", pack.pattern[0], pack.pattern[1],
409  pack.pattern[2], pack.pattern[3]);
410  const int total = pack.pattern[0] + pack.pattern[1] +
411  pack.pattern[2] + pack.pattern[3];
412  const unsigned ranges[4] = {
413  (1 << pack.pattern[0]) - 1,
414  (1 << pack.pattern[1]) - 1,
415  (1 << pack.pattern[2]) - 1,
416  (1 << pack.pattern[3]) - 1,
417  };
418 
419  CHECK_RANGES(FMT("pack_%s", pat), ranges, num, 1, type, type, {
420  .op = SWS_OP_PACK,
421  .type = type,
422  .pack = pack,
423  });
424 
425  CHECK_RANGE(FMT("unpack_%s", pat), UINT32_MAX >> (32 - total), 1, num, type, type, {
426  .op = SWS_OP_UNPACK,
427  .type = type,
428  .pack = pack,
429  });
430  }
431 }
432 
434 {
435  const unsigned num = rnd();
436  if (ff_sws_pixel_type_is_int(t)) {
437  const unsigned mask = UINT_MAX >> (32 - ff_sws_pixel_type_size(t) * 8);
438  return (AVRational) { num & mask, 1 };
439  } else {
440  const unsigned den = rnd();
441  return (AVRational) { num, den ? den : 1 };
442  }
443 }
444 
445 static void check_clear(void)
446 {
447  for (SwsPixelType t = U8; t < SWS_PIXEL_TYPE_NB; t++) {
448  const char *type = ff_sws_pixel_type_name(t);
449  const int bits = ff_sws_pixel_type_size(t) * 8;
450 
451  /* TODO: AVRational can't fit 32 bit constants */
452  if (bits < 32) {
453  const AVRational chroma = (AVRational) { 1 << (bits - 1), 1};
454  const AVRational alpha = (AVRational) { (1 << bits) - 1, 1};
455  const AVRational zero = (AVRational) { 0, 1};
456  const AVRational none = {0};
457 
458  const SwsConst patterns[] = {
459  /* Zero only */
460  {.q4 = { none, none, none, zero }},
461  {.q4 = { zero, none, none, none }},
462  /* Alpha only */
463  {.q4 = { none, none, none, alpha }},
464  {.q4 = { alpha, none, none, none }},
465  /* Chroma only */
466  {.q4 = { chroma, chroma, none, none }},
467  {.q4 = { none, chroma, chroma, none }},
468  {.q4 = { none, none, chroma, chroma }},
469  {.q4 = { chroma, none, chroma, none }},
470  {.q4 = { none, chroma, none, chroma }},
471  /* Alpha+chroma */
472  {.q4 = { chroma, chroma, none, alpha }},
473  {.q4 = { none, chroma, chroma, alpha }},
474  {.q4 = { alpha, none, chroma, chroma }},
475  {.q4 = { chroma, none, chroma, alpha }},
476  {.q4 = { alpha, chroma, none, chroma }},
477  /* Random values */
478  {.q4 = { none, rndq(t), rndq(t), rndq(t) }},
479  {.q4 = { none, rndq(t), rndq(t), rndq(t) }},
480  {.q4 = { none, rndq(t), rndq(t), rndq(t) }},
481  {.q4 = { none, rndq(t), rndq(t), rndq(t) }},
482  };
483 
484  for (int i = 0; i < FF_ARRAY_ELEMS(patterns); i++) {
485  CHECK(FMT("clear_pattern_%s[%d]", type, i), 4, 4, t, t, {
486  .op = SWS_OP_CLEAR,
487  .type = t,
488  .c = patterns[i],
489  });
490  }
491  } else if (!ff_sws_pixel_type_is_int(t)) {
492  /* Floating point YUV doesn't exist, only alpha needs to be cleared */
493  CHECK(FMT("clear_alpha_%s", type), 4, 4, t, t, {
494  .op = SWS_OP_CLEAR,
495  .type = t,
496  .c.q4[3] = { 0, 1 },
497  });
498  }
499  }
500 }
501 
502 static void check_shift(void)
503 {
504  for (SwsPixelType t = U16; t < SWS_PIXEL_TYPE_NB; t++) {
505  const char *type = ff_sws_pixel_type_name(t);
506  if (!ff_sws_pixel_type_is_int(t))
507  continue;
508 
509  for (int shift = 1; shift <= 8; shift++) {
510  CHECK_COMMON(FMT("lshift%d_%s", shift, type), t, t, {
511  .op = SWS_OP_LSHIFT,
512  .type = t,
513  .c.u = shift,
514  });
515 
516  CHECK_COMMON(FMT("rshift%d_%s", shift, type), t, t, {
517  .op = SWS_OP_RSHIFT,
518  .type = t,
519  .c.u = shift,
520  });
521  }
522  }
523 }
524 
525 static void check_swizzle(void)
526 {
527  for (SwsPixelType t = U8; t < SWS_PIXEL_TYPE_NB; t++) {
528  const char *type = ff_sws_pixel_type_name(t);
529  static const int patterns[][4] = {
530  /* Pure swizzle */
531  {3, 0, 1, 2},
532  {3, 0, 2, 1},
533  {2, 1, 0, 3},
534  {3, 2, 1, 0},
535  {3, 1, 0, 2},
536  {3, 2, 0, 1},
537  {1, 2, 0, 3},
538  {1, 0, 2, 3},
539  {2, 0, 1, 3},
540  {2, 3, 1, 0},
541  {2, 1, 3, 0},
542  {1, 2, 3, 0},
543  {1, 3, 2, 0},
544  {0, 2, 1, 3},
545  {0, 2, 3, 1},
546  {0, 3, 1, 2},
547  {3, 1, 2, 0},
548  {0, 3, 2, 1},
549  /* Luma expansion */
550  {0, 0, 0, 3},
551  {3, 0, 0, 0},
552  {0, 0, 0, 1},
553  {1, 0, 0, 0},
554  };
555 
556  for (int i = 0; i < FF_ARRAY_ELEMS(patterns); i++) {
557  const int x = patterns[i][0], y = patterns[i][1],
558  z = patterns[i][2], w = patterns[i][3];
559  CHECK(FMT("swizzle_%d%d%d%d_%s", x, y, z, w, type), 4, 4, t, t, {
560  .op = SWS_OP_SWIZZLE,
561  .type = t,
562  .swizzle = SWS_SWIZZLE(x, y, z, w),
563  });
564  }
565  }
566 }
567 
568 static void check_convert(void)
569 {
570  for (SwsPixelType i = U8; i < SWS_PIXEL_TYPE_NB; i++) {
571  const char *itype = ff_sws_pixel_type_name(i);
572  const int isize = ff_sws_pixel_type_size(i);
573  for (SwsPixelType o = U8; o < SWS_PIXEL_TYPE_NB; o++) {
574  const char *otype = ff_sws_pixel_type_name(o);
575  const int osize = ff_sws_pixel_type_size(o);
576  const char *name = FMT("convert_%s_%s", itype, otype);
577  if (i == o)
578  continue;
579 
580  if (isize < osize || !ff_sws_pixel_type_is_int(o)) {
581  CHECK_COMMON(name, i, o, {
582  .op = SWS_OP_CONVERT,
583  .type = i,
584  .convert.to = o,
585  });
586  } else if (isize > osize || !ff_sws_pixel_type_is_int(i)) {
587  uint32_t range = UINT32_MAX >> (32 - osize * 8);
589  .op = SWS_OP_CONVERT,
590  .type = i,
591  .convert.to = o,
592  });
593  }
594  }
595  }
596 
597  /* Check expanding conversions */
598  CHECK_COMMON("expand16", U8, U16, {
599  .op = SWS_OP_CONVERT,
600  .type = U8,
601  .convert.to = U16,
602  .convert.expand = true,
603  });
604 
605  CHECK_COMMON("expand32", U8, U32, {
606  .op = SWS_OP_CONVERT,
607  .type = U8,
608  .convert.to = U32,
609  .convert.expand = true,
610  });
611 }
612 
613 static void check_dither(void)
614 {
615  for (SwsPixelType t = F32; t < SWS_PIXEL_TYPE_NB; t++) {
616  const char *type = ff_sws_pixel_type_name(t);
618  continue;
619 
620  /* Test all sizes up to 256x256 */
621  for (int size_log2 = 0; size_log2 <= 8; size_log2++) {
622  const int size = 1 << size_log2;
623  const int mask = size - 1;
625  if (!matrix) {
626  fail();
627  return;
628  }
629 
630  if (size == 1) {
631  matrix[0] = (AVRational) { 1, 2 };
632  } else {
633  for (int i = 0; i < size * size; i++)
634  matrix[i] = rndq(t);
635  }
636 
637  CHECK_COMMON(FMT("dither_%dx%d_%s", size, size, type), t, t, {
638  .op = SWS_OP_DITHER,
639  .type = t,
640  .dither.size_log2 = size_log2,
641  .dither.matrix = matrix,
642  .dither.y_offset = {0, 3 & mask, 2 & mask, 5 & mask},
643  });
644 
646  }
647  }
648 }
649 
650 static void check_min_max(void)
651 {
652  for (SwsPixelType t = U8; t < SWS_PIXEL_TYPE_NB; t++) {
653  const char *type = ff_sws_pixel_type_name(t);
654  CHECK_COMMON(FMT("min_%s", type), t, t, {
655  .op = SWS_OP_MIN,
656  .type = t,
657  .c.q4 = { rndq(t), rndq(t), rndq(t), rndq(t) },
658  });
659 
660  CHECK_COMMON(FMT("max_%s", type), t, t, {
661  .op = SWS_OP_MAX,
662  .type = t,
663  .c.q4 = { rndq(t), rndq(t), rndq(t), rndq(t) },
664  });
665  }
666 }
667 
668 static void check_linear(void)
669 {
670  static const struct {
671  const char *name;
672  uint32_t mask;
673  } patterns[] = {
674  { "noop", 0 },
675  { "luma", SWS_MASK_LUMA },
676  { "alpha", SWS_MASK_ALPHA },
677  { "luma+alpha", SWS_MASK_LUMA | SWS_MASK_ALPHA },
678  { "dot3", 0x7 },
679  { "dot4", 0xF },
680  { "row0", SWS_MASK_ROW(0) },
681  { "row0+alpha", SWS_MASK_ROW(0) | SWS_MASK_ALPHA },
682  { "off3", SWS_MASK_OFF3 },
683  { "off3+alpha", SWS_MASK_OFF3 | SWS_MASK_ALPHA },
684  { "diag3", SWS_MASK_DIAG3 },
685  { "diag4", SWS_MASK_DIAG4 },
686  { "diag3+alpha", SWS_MASK_DIAG3 | SWS_MASK_ALPHA },
687  { "diag3+off3", SWS_MASK_DIAG3 | SWS_MASK_OFF3 },
688  { "diag3+off3+alpha", SWS_MASK_DIAG3 | SWS_MASK_OFF3 | SWS_MASK_ALPHA },
689  { "diag4+off4", SWS_MASK_DIAG4 | SWS_MASK_OFF4 },
690  { "matrix3", SWS_MASK_MAT3 },
691  { "matrix3+off3", SWS_MASK_MAT3 | SWS_MASK_OFF3 },
692  { "matrix3+off3+alpha", SWS_MASK_MAT3 | SWS_MASK_OFF3 | SWS_MASK_ALPHA },
693  { "matrix4", SWS_MASK_MAT4 },
694  { "matrix4+off4", SWS_MASK_MAT4 | SWS_MASK_OFF4 },
695  };
696 
697  for (SwsPixelType t = F32; t < SWS_PIXEL_TYPE_NB; t++) {
698  const char *type = ff_sws_pixel_type_name(t);
700  continue;
701 
702  for (int p = 0; p < FF_ARRAY_ELEMS(patterns); p++) {
703  const uint32_t mask = patterns[p].mask;
704  SwsLinearOp lin = { .mask = mask };
705 
706  for (int i = 0; i < 4; i++) {
707  for (int j = 0; j < 5; j++) {
708  if (mask & SWS_MASK(i, j)) {
709  lin.m[i][j] = rndq(t);
710  } else {
711  lin.m[i][j] = (AVRational) { i == j, 1 };
712  }
713  }
714  }
715 
716  CHECK(FMT("linear_%s_%s", patterns[p].name, type), 4, 4, t, t, {
717  .op = SWS_OP_LINEAR,
718  .type = t,
719  .lin = lin,
720  });
721  }
722  }
723 }
724 
725 static void check_scale(void)
726 {
727  for (SwsPixelType t = F32; t < SWS_PIXEL_TYPE_NB; t++) {
728  const char *type = ff_sws_pixel_type_name(t);
729  const int bits = ff_sws_pixel_type_size(t) * 8;
730  if (ff_sws_pixel_type_is_int(t)) {
731  /* Ensure the result won't exceed the value range */
732  const unsigned max = (1 << bits) - 1;
733  const unsigned scale = rnd() & max;
734  const unsigned range = max / (scale ? scale : 1);
735  CHECK_COMMON_RANGE(FMT("scale_%s", type), range, t, t, {
736  .op = SWS_OP_SCALE,
737  .type = t,
738  .c.q = { scale, 1 },
739  });
740  } else {
741  CHECK_COMMON(FMT("scale_%s", type), t, t, {
742  .op = SWS_OP_SCALE,
743  .type = t,
744  .c.q = rndq(t),
745  });
746  }
747  }
748 }
749 
751 {
753  report("read_write");
755  report("swap_bytes");
757  report("pack_unpack");
758  check_clear();
759  report("clear");
760  check_shift();
761  report("shift");
762  check_swizzle();
763  report("swizzle");
764  check_convert();
765  report("convert");
766  check_dither();
767  report("dither");
768  check_min_max();
769  report("min_max");
770  check_linear();
771  report("linear");
772  check_scale();
773  report("scale");
774 }
SWS_OP_READ
@ SWS_OP_READ
Definition: ops.h:47
SWS_PIXEL_U16
@ SWS_PIXEL_U16
Definition: ops.h:33
name
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf default minimum maximum flags name is the option name
Definition: writing_filters.txt:88
SWS_OP_SWIZZLE
@ SWS_OP_SWIZZLE
Definition: ops.h:50
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
SwsCompiledOp::func
SwsOpFunc func
Definition: ops_internal.h:105
SWS_OP_LSHIFT
@ SWS_OP_LSHIFT
Definition: ops.h:55
SWS_OP_UNPACK
@ SWS_OP_UNPACK
Definition: ops.h:53
mem_internal.h
comp
static void comp(unsigned char *dst, ptrdiff_t dst_stride, unsigned char *src, ptrdiff_t src_stride, int add)
Definition: eamad.c:79
SwsOpExec::in_bump
ptrdiff_t in_bump[4]
Definition: ops_internal.h:70
SwsConst
Definition: ops.h:79
SWS_OP_CLEAR
@ SWS_OP_CLEAR
Definition: ops.h:59
CHECK_COMMON
#define CHECK_COMMON(NAME, IN, OUT,...)
Definition: sw_ops.c:285
SwsOpExec::in
const uint8_t * in[4]
Definition: ops_internal.h:62
SwsOpExec::out_stride
ptrdiff_t out_stride[4]
Definition: ops_internal.h:67
SwsLinearOp::m
AVRational m[4][5]
Generalized 5x5 affine transformation: [ Out.x ] = [ A B C D E ] [ Out.y ] = [ F G H I J ] * [ x y z ...
Definition: ops.h:158
matrix
Definition: vc1dsp.c:43
src1
const pixel * src1
Definition: h264pred_template.c:420
mask
int mask
Definition: mediacodecdec_common.c:154
SwsOp::rw
SwsReadWriteOp rw
Definition: ops.h:191
check_min_max
static void check_min_max(void)
Definition: sw_ops.c:650
ops.h
u
#define u(width, name, range_min, range_max)
Definition: cbs_apv.c:68
SWS_OP_DITHER
@ SWS_OP_DITHER
Definition: ops.h:67
checkasm_check_sw_ops
void checkasm_check_sw_ops(void)
Definition: sw_ops.c:750
SWS_BITEXACT
@ SWS_BITEXACT
Definition: swscale.h:158
SwsOpExec::block_size_in
int32_t block_size_in
Definition: ops_internal.h:76
check_convert
static void check_convert(void)
Definition: sw_ops.c:568
chroma
static av_always_inline void chroma(WaveformContext *s, AVFrame *in, AVFrame *out, int component, int intensity, int offset_y, int offset_x, int column, int mirror, int jobnr, int nb_jobs)
Definition: vf_waveform.c:1639
SWS_MASK_MAT3
@ SWS_MASK_MAT3
Definition: ops.h:174
SWS_MASK_OFF3
@ SWS_MASK_OFF3
Definition: ops.h:173
check_swap_bytes
static void check_swap_bytes(void)
Definition: sw_ops.c:375
CHECK_COMMON_RANGE
#define CHECK_COMMON_RANGE(NAME, RANGE, IN, OUT,...)
Definition: sw_ops.c:272
check_read_write
static void check_read_write(void)
Definition: sw_ops.c:288
max
#define max(a, b)
Definition: cuda_runtime.h:33
SWS_PIXEL_U32
@ SWS_PIXEL_U32
Definition: ops.h:34
SwsOpExec::in_stride
ptrdiff_t in_stride[4]
Definition: ops_internal.h:66
check_linear
static void check_linear(void)
Definition: sw_ops.c:668
SwsOpBackend::name
const char * name
Definition: ops_internal.h:119
ff_sws_pixel_type_size
int ff_sws_pixel_type_size(SwsPixelType type)
Definition: ops.c:69
rndf
static float rndf(void)
Definition: sw_ops.c:64
SWS_MASK_ROW
#define SWS_MASK_ROW(I)
Definition: ops.h:164
DECLARE_ALIGNED_64
#define DECLARE_ALIGNED_64(t, v)
Definition: mem_internal.h:114
check_clear
static void check_clear(void)
Definition: sw_ops.c:445
SwsPixelType
SwsPixelType
Copyright (C) 2025 Niklas Haas.
Definition: ops.h:30
CHECK_RANGE
#define CHECK_RANGE(NAME, RANGE, N_IN, N_OUT, IN, OUT,...)
Definition: sw_ops.c:269
SWS_PIXEL_F32
@ SWS_PIXEL_F32
Definition: ops.h:35
ff_sws_op_backends
const SwsOpBackend *const ff_sws_op_backends[]
Definition: ops.c:36
check_scale
static void check_scale(void)
Definition: sw_ops.c:725
fail
#define fail()
Definition: checkasm.h:218
SwsOpList::num_ops
int num_ops
Definition: ops.h:224
checkasm.h
SWS_PIXEL_U8
@ SWS_PIXEL_U8
Definition: ops.h:32
ff_sws_pixel_type_is_int
bool ff_sws_pixel_type_is_int(SwsPixelType type)
Definition: ops.c:84
type
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf type
Definition: writing_filters.txt:86
NB_PLANES
@ NB_PLANES
Definition: sw_ops.c:34
ff_sws_ops_compile_backend
int ff_sws_ops_compile_backend(SwsContext *ctx, const SwsOpBackend *backend, const SwsOpList *ops, SwsCompiledOp *out)
Attempt to compile a list of operations using a specific backend.
refstruct.h
SwsLinearOp::mask
uint32_t mask
Definition: ops.h:159
av_refstruct_allocz
static void * av_refstruct_allocz(size_t size)
Equivalent to av_refstruct_alloc_ext(size, 0, NULL, NULL)
Definition: refstruct.h:105
SwsOp::op
SwsOpType op
Definition: ops.h:187
SWS_OP_SCALE
@ SWS_OP_SCALE
Definition: ops.h:63
avassert.h
SWS_MASK_ALPHA
@ SWS_MASK_ALPHA
Definition: ops.h:170
rnd
#define rnd()
Definition: checkasm.h:201
FF_ARRAY_ELEMS
#define FF_ARRAY_ELEMS(a)
Definition: sinewin_tablegen.c:29
float
float
Definition: af_crystalizer.c:122
U8
@ U8
Definition: sw_ops.c:39
AV_CEIL_RSHIFT
#define AV_CEIL_RSHIFT(a, b)
Definition: common.h:60
SWS_SWIZZLE
#define SWS_SWIZZLE(X, Y, Z, W)
Definition: ops.h:132
AVFormatContext::flags
int flags
Flags modifying the (de)muxer behaviour.
Definition: avformat.h:1415
op
static int op(uint8_t **dst, const uint8_t *dst_end, GetByteContext *gb, int pixel, int count, int *x, int width, int linesize)
Perform decode operation.
Definition: anm.c:76
bits
uint8_t bits
Definition: vp3data.h:128
av_assert0
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:42
SWS_OP_MIN
@ SWS_OP_MIN
Definition: ops.h:61
fill32
static void fill32(uint32_t *line, int num, unsigned range)
Definition: sw_ops.c:80
MK_RANGES
#define MK_RANGES(R)
Definition: sw_ops.c:268
ctx
static AVFormatContext * ctx
Definition: movenc.c:49
FMT
#define FMT(fmt,...)
Definition: sw_ops.c:45
SWS_OP_LINEAR
@ SWS_OP_LINEAR
Definition: ops.h:66
SwsOpBackend
Definition: ops_internal.h:118
SWS_OP_PACK
@ SWS_OP_PACK
Definition: ops.h:54
SwsOpExec::height
int32_t height
Definition: ops_internal.h:74
SwsOpExec
Global execution context for all compiled functions.
Definition: ops_internal.h:60
fill16
static void fill16(uint16_t *line, int num, unsigned range)
Definition: sw_ops.c:86
SWS_MASK_LUMA
@ SWS_MASK_LUMA
Definition: ops.h:169
rw_pixel_bits
static int rw_pixel_bits(const SwsOp *op)
Definition: sw_ops.c:55
NULL
#define NULL
Definition: coverity.c:32
U16
@ U16
Definition: sw_ops.c:40
tprintf
static const char * tprintf(char buf[], size_t size, const char *fmt,...)
Definition: sw_ops.c:46
AVRational
Rational number (pair of numerator and denominator).
Definition: rational.h:58
SwsOpExec::slice_h
int32_t slice_h
Definition: ops_internal.h:75
F32
@ F32
Definition: sw_ops.c:42
SWS_MASK
#define SWS_MASK(I, J)
Definition: ops.h:162
LINES
@ LINES
Definition: sw_ops.c:33
SwsPackOp::pattern
uint8_t pattern[4]
Packed bits are assumed to be LSB-aligned within the underlying integer type; i.e.
Definition: ops.h:117
f
f
Definition: af_crystalizer.c:122
SwsOpExec::block_size_out
int32_t block_size_out
Definition: ops_internal.h:77
sws_alloc_context
SwsContext * sws_alloc_context(void)
Allocate an empty SwsContext and set its fields to default values.
Definition: utils.c:1032
shift
static int shift(int a, int b)
Definition: bonk.c:261
i
#define i(width, name, range_min, range_max)
Definition: cbs_h264.c:63
SwsOp::type
SwsPixelType type
Definition: ops.h:188
check_ops
static void check_ops(const char *report, const unsigned ranges[NB_PLANES], const SwsOp *ops)
Definition: sw_ops.c:106
size
int size
Definition: twinvq_data.h:10344
SWS_OP_RSHIFT
@ SWS_OP_RSHIFT
Definition: ops.h:56
range
enum AVColorRange range
Definition: mediacodec_wrapper.c:2594
SWS_OP_WRITE
@ SWS_OP_WRITE
Definition: ops.h:48
line
Definition: graph2dot.c:48
SwsLinearOp
Definition: ops.h:145
zero
static int zero(InterplayACMContext *s, unsigned ind, unsigned col)
Definition: interplayacm.c:121
av_refstruct_unref
void av_refstruct_unref(void *objp)
Decrement the reference count of the underlying object and automatically free the object if there are...
Definition: refstruct.c:120
fill32f
static void fill32f(float *line, int num, unsigned range)
Definition: sw_ops.c:73
SwsOpExec::out
uint8_t * out[4]
Definition: ops_internal.h:63
report
#define report
Definition: checkasm.h:221
PIXELS
@ PIXELS
Definition: sw_ops.c:35
SwsOpList::ops
SwsOp * ops
Definition: ops.h:223
SwsPackOp
Definition: ops.h:112
vsnprintf
#define vsnprintf
Definition: snprintf.h:36
av_assert1
#define av_assert1(cond)
assert() equivalent, that does not lie in speed critical code.
Definition: avassert.h:58
SwsConst::q4
AVRational q4[4]
Definition: ops.h:81
ops_internal.h
checkasm_call_checked
#define checkasm_call_checked(func,...)
Definition: checkasm.h:333
SwsOp
Definition: ops.h:186
SwsOpExec::width
int32_t width
Definition: ops_internal.h:74
SwsCompiledOp::priv
void * priv
Definition: ops_internal.h:114
SwsCompiledOp::block_size
int block_size
Definition: ops_internal.h:108
ret
ret
Definition: filter_design.txt:187
check_shift
static void check_shift(void)
Definition: sw_ops.c:502
SWS_OP_MAX
@ SWS_OP_MAX
Definition: ops.h:62
SwsCompiledOp
Definition: ops_internal.h:104
bench
#define bench(func,...)
Definition: checkasm.h:418
id
enum AVCodecID id
Definition: dts2pts.c:549
check_swizzle
static void check_swizzle(void)
Definition: sw_ops.c:525
SWS_PIXEL_TYPE_NB
@ SWS_PIXEL_TYPE_NB
Definition: ops.h:36
SwsReadWriteOp::packed
bool packed
Definition: ops.h:101
ff_sws_pixel_type_name
const char * ff_sws_pixel_type_name(SwsPixelType type)
Definition: ops.c:54
SWS_OP_SWAP_BYTES
@ SWS_OP_SWAP_BYTES
Definition: ops.h:49
Windows::Graphics::DirectX::Direct3D11::p
IDirect3DDxgiInterfaceAccess _COM_Outptr_ void ** p
Definition: vsrc_gfxcapture_winrt.hpp:53
SWS_MASK_DIAG3
@ SWS_MASK_DIAG3
Definition: ops.h:172
src0
const pixel *const src0
Definition: h264pred_template.c:419
rndq
static AVRational rndq(SwsPixelType t)
Definition: sw_ops.c:433
check_key
#define check_key(key,...)
Definition: checkasm.h:209
w
uint8_t w
Definition: llvidencdsp.c:39
declare_func
#define declare_func(ret,...)
Definition: checkasm.h:213
scale
static void scale(int *out, const int *in, const int w, const int h, const int shift)
Definition: intra.c:278
SWS_MASK_DIAG4
@ SWS_MASK_DIAG4
Definition: ops.h:178
alpha
static const int16_t alpha[]
Definition: ilbcdata.h:55
SWS_MASK_MAT4
@ SWS_MASK_MAT4
Definition: ops.h:180
SWS_OP_CONVERT
@ SWS_OP_CONVERT
Definition: ops.h:60
fill8
static void fill8(uint8_t *line, int num, unsigned range)
Definition: sw_ops.c:96
sws_free_context
void sws_free_context(SwsContext **ctx)
Free the context and everything associated with it, and write NULL to the provided pointer.
Definition: utils.c:2329
checkasm_call
#define checkasm_call(func,...)
Definition: checkasm.h:227
check_pack_unpack
static void check_pack_unpack(void)
Definition: sw_ops.c:388
stride
#define stride
Definition: h264pred_template.c:536
checkasm_check
#define checkasm_check(prefix,...)
Definition: checkasm.h:467
SwsOpList
Helper struct for representing a list of operations.
Definition: ops.h:222
SWS_MASK_OFF4
@ SWS_MASK_OFF4
Definition: ops.h:179
SwsContext
Main external API structure.
Definition: swscale.h:191
CHECK
#define CHECK(NAME, N_IN, N_OUT, IN, OUT,...)
Definition: sw_ops.c:282
CHECK_RANGES
#define CHECK_RANGES(NAME, RANGES, N_IN, N_OUT, IN, OUT,...)
Definition: sw_ops.c:251
SwsOpExec::out_bump
ptrdiff_t out_bump[4]
Definition: ops_internal.h:71
check_dither
static void check_dither(void)
Definition: sw_ops.c:613
U32
@ U32
Definition: sw_ops.c:41
SwsCompiledOp::free
void(* free)(void *priv)
Definition: ops_internal.h:115