FFmpeg
ops_tmpl_float.c
Go to the documentation of this file.
1 /**
2  * Copyright (C) 2025 Niklas Haas
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "libavutil/avassert.h"
22 
23 #include "ops_backend.h"
24 
25 #ifndef BIT_DEPTH
26 # define BIT_DEPTH 32
27 #endif
28 
29 #if BIT_DEPTH == 32
30 # define PIXEL_TYPE SWS_PIXEL_F32
31 # define PIXEL_MAX FLT_MAX
32 # define pixel_t float
33 # define inter_t float
34 # define block_t f32block_t
35 # define px f32
36 #else
37 # error Invalid BIT_DEPTH
38 #endif
39 
40 #define IS_FLOAT 1
41 #define FMT_CHAR f
42 #include "ops_tmpl_common.c"
43 
45 {
46  const SwsOp *op = params->op;
47  const int size = 1 << op->dither.size_log2;
48  if (size == 1) {
49  /* We special case this value */
50  av_assert1(!av_cmp_q(op->dither.matrix[0], av_make_q(1, 2)));
51  out->priv.ptr = NULL;
52  return 0;
53  }
54 
55  const int width = FFMAX(size, SWS_BLOCK_SIZE);
56  pixel_t *matrix = out->priv.ptr = av_malloc(sizeof(pixel_t) * size * width);
57  if (!matrix)
58  return AVERROR(ENOMEM);
59  out->free = ff_op_priv_free;
60 
61  static_assert(sizeof(out->priv.ptr) <= sizeof(uint8_t[8]),
62  ">8 byte pointers not supported");
63 
64  int8_t *offset = &out->priv.i8[8];
65  for (int i = 0; i < 4; i++)
66  offset[i] = op->dither.y_offset[i];
67 
68  for (int y = 0; y < size; y++) {
69  for (int x = 0; x < size; x++)
70  matrix[y * width + x] = av_q2pixel(op->dither.matrix[y * size + x]);
71  for (int x = size; x < width; x++) /* pad to block size */
72  matrix[y * width + x] = matrix[y * width + (x % size)];
73  }
74 
75  return 0;
76 }
77 
78 DECL_FUNC(dither, const int size_log2)
79 {
80  const pixel_t *restrict matrix = impl->priv.ptr;
81  const int8_t *restrict offset = &impl->priv.i8[8];
82  const int mask = (1 << size_log2) - 1;
83  const int y_line = iter->y;
84  const int size = 1 << size_log2;
85  const int width = FFMAX(size, SWS_BLOCK_SIZE);
86  const int base = iter->x & ~(SWS_BLOCK_SIZE - 1) & (size - 1);
87 
88 #define DITHER_COMP(VAR, IDX) \
89  if (offset[IDX] >= 0) { \
90  const int row = (y_line + offset[IDX]) & mask; \
91  SWS_LOOP \
92  for (int i = 0; i < SWS_BLOCK_SIZE; i++) \
93  VAR[i] += size_log2 ? matrix[row * width + base + i] : (pixel_t) 0.5; \
94  }
95 
96  DITHER_COMP(x, 0)
97  DITHER_COMP(y, 1)
98  DITHER_COMP(z, 2)
99  DITHER_COMP(w, 3)
100 
101  CONTINUE(x, y, z, w);
102 }
103 
104 #define WRAP_DITHER(N) \
105 DECL_IMPL(dither, dither##N, N) \
106  \
107 DECL_ENTRY(dither##N, SWS_COMP_ALL, \
108  .op = SWS_OP_DITHER, \
109  .dither_size = N, \
110  .setup = fn(setup_dither), \
111 );
112 
113 WRAP_DITHER(0)
114 WRAP_DITHER(1)
115 WRAP_DITHER(2)
116 WRAP_DITHER(3)
117 WRAP_DITHER(4)
118 WRAP_DITHER(5)
119 WRAP_DITHER(6)
120 WRAP_DITHER(7)
121 WRAP_DITHER(8)
122 
123 typedef struct {
124  /* Stored in split form for convenience */
125  pixel_t m[4][4];
126  pixel_t k[4];
127 } fn(LinCoeffs);
128 
130 {
131  const SwsOp *op = params->op;
132  fn(LinCoeffs) c;
133 
134  for (int i = 0; i < 4; i++) {
135  for (int j = 0; j < 4; j++)
136  c.m[i][j] = av_q2pixel(op->lin.m[i][j]);
137  c.k[i] = av_q2pixel(op->lin.m[i][4]);
138  }
139 
140  return SETUP_MEMDUP(c, out);
141 }
142 
143 /**
144  * Fully general case for a 5x5 linear affine transformation. Should never be
145  * called without constant `mask`. This function will compile down to the
146  * appropriately optimized version for the required subset of operations when
147  * called with a constant mask.
148  */
149 DECL_FUNC(linear_mask, const uint32_t mask)
150 {
151  const fn(LinCoeffs) c = *(const fn(LinCoeffs) *) impl->priv.ptr;
152 
153  SWS_LOOP
154  for (int i = 0; i < SWS_BLOCK_SIZE; i++) {
155  const pixel_t xx = x[i];
156  const pixel_t yy = y[i];
157  const pixel_t zz = z[i];
158  const pixel_t ww = w[i];
159 
160  x[i] = (mask & SWS_MASK_OFF(0)) ? c.k[0] : 0;
161  x[i] += (mask & SWS_MASK(0, 0)) ? c.m[0][0] * xx : xx;
162  x[i] += (mask & SWS_MASK(0, 1)) ? c.m[0][1] * yy : 0;
163  x[i] += (mask & SWS_MASK(0, 2)) ? c.m[0][2] * zz : 0;
164  x[i] += (mask & SWS_MASK(0, 3)) ? c.m[0][3] * ww : 0;
165 
166  y[i] = (mask & SWS_MASK_OFF(1)) ? c.k[1] : 0;
167  y[i] += (mask & SWS_MASK(1, 0)) ? c.m[1][0] * xx : 0;
168  y[i] += (mask & SWS_MASK(1, 1)) ? c.m[1][1] * yy : yy;
169  y[i] += (mask & SWS_MASK(1, 2)) ? c.m[1][2] * zz : 0;
170  y[i] += (mask & SWS_MASK(1, 3)) ? c.m[1][3] * ww : 0;
171 
172  z[i] = (mask & SWS_MASK_OFF(2)) ? c.k[2] : 0;
173  z[i] += (mask & SWS_MASK(2, 0)) ? c.m[2][0] * xx : 0;
174  z[i] += (mask & SWS_MASK(2, 1)) ? c.m[2][1] * yy : 0;
175  z[i] += (mask & SWS_MASK(2, 2)) ? c.m[2][2] * zz : zz;
176  z[i] += (mask & SWS_MASK(2, 3)) ? c.m[2][3] * ww : 0;
177 
178  w[i] = (mask & SWS_MASK_OFF(3)) ? c.k[3] : 0;
179  w[i] += (mask & SWS_MASK(3, 0)) ? c.m[3][0] * xx : 0;
180  w[i] += (mask & SWS_MASK(3, 1)) ? c.m[3][1] * yy : 0;
181  w[i] += (mask & SWS_MASK(3, 2)) ? c.m[3][2] * zz : 0;
182  w[i] += (mask & SWS_MASK(3, 3)) ? c.m[3][3] * ww : ww;
183  }
184 
185  CONTINUE(x, y, z, w);
186 }
187 
188 #define WRAP_LINEAR(NAME, MASK) \
189 DECL_IMPL(linear_mask, linear_##NAME, MASK) \
190  \
191 DECL_ENTRY(linear_##NAME, SWS_COMP_ALL, \
192  .op = SWS_OP_LINEAR, \
193  .setup = fn(setup_linear), \
194  .linear_mask = (MASK), \
195 );
196 
200 WRAP_LINEAR(yalpha, SWS_MASK(1, 1)) /* ya alpha */
201 WRAP_LINEAR(dot3, 0x7)
202 WRAP_LINEAR(dot3a, 0x7 | SWS_MASK_ALPHA)
203 WRAP_LINEAR(row0, SWS_MASK_ROW(0) ^ SWS_MASK(0, 3)) /* row0 sans alpha */
209 WRAP_LINEAR(affine3x, SWS_MASK_MAT3 ^ SWS_MASK(0, 1) | SWS_MASK_OFF3)
211 WRAP_LINEAR(affine3xy, SWS_MASK_MAT3 ^ SWS_MASK(0, 0) ^ SWS_MASK(0, 1) | SWS_MASK_OFF3)
213 
214 static const SwsOpTable fn(op_table_float) = {
215  .block_size = SWS_BLOCK_SIZE,
216  .entries = {
217  REF_COMMON_PATTERNS(convert_uint8),
218  REF_COMMON_PATTERNS(convert_uint16),
219  REF_COMMON_PATTERNS(convert_uint32),
220 
221  &fn(op_clear_0001),
225 
226  &fn(op_dither0),
227  &fn(op_dither1),
228  &fn(op_dither2),
229  &fn(op_dither3),
230  &fn(op_dither4),
231  &fn(op_dither5),
232  &fn(op_dither6),
233  &fn(op_dither7),
234  &fn(op_dither8),
235 
236  &fn(op_clear_0001),
237  &fn(op_clear_1000),
238  &fn(op_clear_1100),
239 
240  &fn(op_linear_luma),
241  &fn(op_linear_alpha),
242  &fn(op_linear_lumalpha),
243  &fn(op_linear_yalpha),
244  &fn(op_linear_dot3),
245  &fn(op_linear_dot3a),
246  &fn(op_linear_row0),
247  &fn(op_linear_diag3),
248  &fn(op_linear_diag4),
249  &fn(op_linear_diagoff3),
250  &fn(op_linear_affine3),
251  &fn(op_linear_affine3uv),
252  &fn(op_linear_affine3x),
253  &fn(op_linear_affine3xa),
254  &fn(op_linear_affine3xy),
255  &fn(op_linear_affine3a),
256 
257  &fn(op_filter1_v),
258  &fn(op_filter2_v),
259  &fn(op_filter3_v),
260  &fn(op_filter4_v),
261 
262  &fn(op_filter1_h),
263  &fn(op_filter2_h),
264  &fn(op_filter3_h),
265  &fn(op_filter4_h),
266 
267  NULL
268  },
269 };
270 
271 #undef PIXEL_TYPE
272 #undef PIXEL_MAX
273 #undef pixel_t
274 #undef inter_t
275 #undef block_t
276 #undef px
277 
278 #undef FMT_CHAR
279 #undef IS_FLOAT
SwsOpTable
Copyright (C) 2025 Niklas Haas.
Definition: ops_chain.h:159
DITHER_COMP
#define DITHER_COMP(VAR, IDX)
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
ops_backend.h
out
static FILE * out
Definition: movenc.c:55
matrix
Definition: vc1dsp.c:43
mask
int mask
Definition: mediacodecdec_common.c:154
CONTINUE
#define CONTINUE(X, Y, Z, W)
Definition: ops_backend.h:115
base
uint8_t base
Definition: vp3data.h:128
max
#define max(a, b)
Definition: cuda_runtime.h:33
FFMAX
#define FFMAX(a, b)
Definition: macros.h:47
SETUP_MEMDUP
#define SETUP_MEMDUP(c, out)
Definition: ops_backend.h:125
setup_linear
static int setup_linear(const SwsImplParams *params, SwsImplResult *out)
Definition: ops.c:269
SWS_MASK_ROW
#define SWS_MASK_ROW(I)
Definition: ops.h:212
setup_dither
static int setup_dither(const SwsImplParams *params, SwsImplResult *out)
Definition: ops.c:211
SWS_MASK_LUMA
@ SWS_MASK_LUMA
Definition: ops.h:217
av_q2pixel
#define av_q2pixel(q)
Definition: ops_backend.h:70
DECL_SETUP
DECL_SETUP(setup_dither, params, out)
Definition: ops_tmpl_float.c:44
SWS_MASK_ALPHA
@ SWS_MASK_ALPHA
Definition: ops.h:218
fn
Definition: ops_tmpl_float.c:123
avassert.h
dither
static const uint16_t dither[8][8]
Definition: vf_gradfun.c:46
op
static int op(uint8_t **dst, const uint8_t *dst_end, GetByteContext *gb, int pixel, int count, int *x, int width, int linesize)
Perform decode operation.
Definition: anm.c:76
SWS_LOOP
#define SWS_LOOP
Definition: ops_backend.h:58
SWS_MASK_OFF3
@ SWS_MASK_OFF3
Definition: ops.h:221
SWS_BLOCK_SIZE
#define SWS_BLOCK_SIZE
Copyright (C) 2025 Niklas Haas.
Definition: ops_backend.c:42
NULL
#define NULL
Definition: coverity.c:32
SWS_MASK_DIAG4
@ SWS_MASK_DIAG4
Definition: ops.h:226
SWS_MASK
#define SWS_MASK(I, J)
Definition: ops.h:210
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
DECL_FUNC
DECL_FUNC(dither, const int size_log2)
Definition: ops_tmpl_float.c:78
i
#define i(width, name, range_min, range_max)
Definition: cbs_h264.c:63
for
for(k=2;k<=8;++k)
Definition: h264pred_template.c:424
size
int size
Definition: twinvq_data.h:10344
av_make_q
static AVRational av_make_q(int num, int den)
Create an AVRational.
Definition: rational.h:71
fn
#define fn(a)
Definition: aap_template.c:37
pixel_t
#define pixel_t
Definition: ops_tmpl_float.c:32
WRAP_LINEAR
#define WRAP_LINEAR(NAME, MASK)
Definition: ops_tmpl_float.c:188
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
av_malloc
#define av_malloc(s)
Definition: ops_asmgen.c:44
REF_COMMON_PATTERNS
#define REF_COMMON_PATTERNS(NAME)
Definition: ops_backend.h:156
av_assert1
#define av_assert1(cond)
assert() equivalent, that does not lie in speed critical code.
Definition: avassert.h:58
SWS_MASK_MAT3
@ SWS_MASK_MAT3
Definition: ops.h:222
SwsOp
Definition: ops.h:238
ff_op_priv_free
static void ff_op_priv_free(SwsOpPriv *priv)
Definition: ops_chain.h:149
av_cmp_q
static int av_cmp_q(AVRational a, AVRational b)
Compare two rationals.
Definition: rational.h:89
SWS_MASK_OFF
#define SWS_MASK_OFF(I)
Definition: ops.h:211
WRAP_DITHER
#define WRAP_DITHER(N)
Definition: ops_tmpl_float.c:104
ops_tmpl_common.c
w
uint8_t w
Definition: llvidencdsp.c:39
scale
static void scale(int *out, const int *in, const int w, const int h, const int shift)
Definition: intra.c:278
alpha
static const int16_t alpha[]
Definition: ilbcdata.h:55
SWS_MASK_DIAG3
@ SWS_MASK_DIAG3
Definition: ops.h:220
width
#define width
Definition: dsp.h:89
min
float min
Definition: vorbis_enc_data.h:429