FFmpeg
vf_nlmeans_vulkan.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) Lynne
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "libavutil/mem.h"
22 #include "libavutil/random_seed.h"
23 #include "libavutil/opt.h"
24 #include "vulkan_filter.h"
25 #include "vulkan_spirv.h"
26 #include "internal.h"
27 #include "video.h"
28 
29 #define TYPE_NAME "vec4"
30 #define TYPE_ELEMS 4
31 #define TYPE_SIZE (TYPE_ELEMS*4)
32 
33 typedef struct NLMeansVulkanContext {
35 
39  VkSampler sampler;
40 
43 
45 
49 
52 
53  int *xoffsets;
54  int *yoffsets;
56  float strength[4];
57  int patch[4];
58 
59  struct nlmeans_opts {
60  int r;
61  double s;
62  double sc[4];
63  int p;
64  int pc[4];
65  int t;
66  } opts;
68 
69 extern const char *ff_source_prefix_sum_comp;
70 
71 static void insert_first(FFVkSPIRVShader *shd, int r, const char *off, int horiz, int plane, int comp)
72 {
73  GLSLF(4, s1 = texture(input_img[%i], pos + ivec2(%i + %s, %i + %s))[%i];
74  ,plane, horiz ? r : 0, horiz ? off : "0", !horiz ? r : 0, !horiz ? off : "0", comp);
75 
76  GLSLF(4, s2[0] = texture(input_img[%i], pos + offs[0] + ivec2(%i + %s, %i + %s))[%i];
77  ,plane, horiz ? r : 0, horiz ? off : "0", !horiz ? r : 0, !horiz ? off : "0", comp);
78  GLSLF(4, s2[1] = texture(input_img[%i], pos + offs[1] + ivec2(%i + %s, %i + %s))[%i];
79  ,plane, horiz ? r : 0, horiz ? off : "0", !horiz ? r : 0, !horiz ? off : "0", comp);
80  GLSLF(4, s2[2] = texture(input_img[%i], pos + offs[2] + ivec2(%i + %s, %i + %s))[%i];
81  ,plane, horiz ? r : 0, horiz ? off : "0", !horiz ? r : 0, !horiz ? off : "0", comp);
82  GLSLF(4, s2[3] = texture(input_img[%i], pos + offs[3] + ivec2(%i + %s, %i + %s))[%i];
83  ,plane, horiz ? r : 0, horiz ? off : "0", !horiz ? r : 0, !horiz ? off : "0", comp);
84 
85  GLSLC(4, s2 = (s1 - s2) * (s1 - s2); );
86 }
87 
88 static void insert_horizontal_pass(FFVkSPIRVShader *shd, int nb_rows, int first, int plane, int comp)
89 {
90  GLSLF(1, pos.y = int(gl_GlobalInvocationID.x) * %i; ,nb_rows);
91  if (!first)
92  GLSLC(1, barrier(); );
93  GLSLC(0, );
94  GLSLF(1, if (pos.y < height[%i]) { ,plane);
95  GLSLC(2, #pragma unroll(1) );
96  GLSLF(2, for (r = 0; r < %i; r++) { ,nb_rows);
97  GLSLC(3, prefix_sum = DTYPE(0); );
98  GLSLC(3, offset = int_stride * uint64_t(pos.y + r); );
99  GLSLC(3, dst = DataBuffer(uint64_t(integral_data) + offset); );
100  GLSLC(0, );
101  GLSLF(3, for (pos.x = 0; pos.x < width[%i]; pos.x++) { ,plane);
102  if (first)
103  insert_first(shd, 0, "r", 0, plane, comp);
104  else
105  GLSLC(4, s2 = dst.v[pos.x]; );
106  GLSLC(4, dst.v[pos.x] = s2 + prefix_sum; );
107  GLSLC(4, prefix_sum += s2; );
108  GLSLC(3, } );
109  GLSLC(2, } );
110  GLSLC(1, } );
111  GLSLC(0, );
112 }
113 
114 static void insert_vertical_pass(FFVkSPIRVShader *shd, int nb_rows, int first, int plane, int comp)
115 {
116  GLSLF(1, pos.x = int(gl_GlobalInvocationID.x) * %i; ,nb_rows);
117  GLSLC(1, #pragma unroll(1) );
118  GLSLF(1, for (r = 0; r < %i; r++) ,nb_rows);
119  GLSLC(2, psum[r] = DTYPE(0); );
120  GLSLC(0, );
121  if (!first)
122  GLSLC(1, barrier(); );
123  GLSLC(0, );
124  GLSLF(1, if (pos.x < width[%i]) { ,plane);
125  GLSLF(2, for (pos.y = 0; pos.y < height[%i]; pos.y++) { ,plane);
126  GLSLC(3, offset = int_stride * uint64_t(pos.y); );
127  GLSLC(3, dst = DataBuffer(uint64_t(integral_data) + offset); );
128  GLSLC(0, );
129  GLSLC(3, #pragma unroll(1) );
130  GLSLF(3, for (r = 0; r < %i; r++) { ,nb_rows);
131  if (first)
132  insert_first(shd, 0, "r", 1, plane, comp);
133  else
134  GLSLC(4, s2 = dst.v[pos.x + r]; );
135  GLSLC(4, dst.v[pos.x + r] = s2 + psum[r]; );
136  GLSLC(4, psum[r] += s2; );
137  GLSLC(3, } );
138  GLSLC(2, } );
139  GLSLC(1, } );
140  GLSLC(0, );
141 }
142 
143 static void insert_weights_pass(FFVkSPIRVShader *shd, int nb_rows, int vert,
144  int t, int dst_comp, int plane, int comp)
145 {
146  GLSLF(1, p = patch_size[%i]; ,dst_comp);
147  GLSLC(0, );
148  GLSLC(1, barrier(); );
149  GLSLC(0, );
150  if (!vert) {
151  GLSLF(1, for (pos.y = 0; pos.y < height[%i]; pos.y++) { ,plane);
152  GLSLF(2, if (gl_GlobalInvocationID.x*%i >= width[%i]) ,nb_rows, plane);
153  GLSLC(3, break; );
154  GLSLF(2, for (r = 0; r < %i; r++) { ,nb_rows);
155  GLSLF(3, pos.x = int(gl_GlobalInvocationID.x) * %i + r; ,nb_rows);
156  } else {
157  GLSLF(1, for (pos.x = 0; pos.x < width[%i]; pos.x++) { ,plane);
158  GLSLF(2, if (gl_GlobalInvocationID.x*%i >= height[%i]) ,nb_rows, plane);
159  GLSLC(3, break; );
160  GLSLF(2, for (r = 0; r < %i; r++) { ,nb_rows);
161  GLSLF(3, pos.y = int(gl_GlobalInvocationID.x) * %i + r; ,nb_rows);
162  }
163  GLSLC(0, );
164  GLSLC(3, a = DTYPE(0); );
165  GLSLC(3, b = DTYPE(0); );
166  GLSLC(3, c = DTYPE(0); );
167  GLSLC(3, d = DTYPE(0); );
168  GLSLC(0, );
169  GLSLC(3, lt = ((pos.x - p) < 0) || ((pos.y - p) < 0); );
170  GLSLC(0, );
171  GLSLF(3, src[0] = texture(input_img[%i], pos + offs[0])[%i]; ,plane, comp);
172  GLSLF(3, src[1] = texture(input_img[%i], pos + offs[1])[%i]; ,plane, comp);
173  GLSLF(3, src[2] = texture(input_img[%i], pos + offs[2])[%i]; ,plane, comp);
174  GLSLF(3, src[3] = texture(input_img[%i], pos + offs[3])[%i]; ,plane, comp);
175  GLSLC(0, );
176  GLSLC(3, if (lt == false) { );
177  GLSLC(3, offset = int_stride * uint64_t(pos.y - p); );
178  GLSLC(3, dst = DataBuffer(uint64_t(integral_data) + offset); );
179  GLSLC(4, a = dst.v[pos.x - p]; );
180  GLSLC(4, c = dst.v[pos.x + p]; );
181  GLSLC(3, offset = int_stride * uint64_t(pos.y + p); );
182  GLSLC(3, dst = DataBuffer(uint64_t(integral_data) + offset); );
183  GLSLC(4, b = dst.v[pos.x - p]; );
184  GLSLC(4, d = dst.v[pos.x + p]; );
185  GLSLC(3, } );
186  GLSLC(0, );
187  GLSLC(3, patch_diff = d + a - b - c; );
188  GLSLF(3, w = exp(patch_diff * strength[%i]); ,dst_comp);
189  GLSLC(3, w_sum = w[0] + w[1] + w[2] + w[3]; );
190  GLSLC(3, sum = dot(w, src*255); );
191  GLSLC(0, );
192  if (t > 1) {
193  GLSLF(3, atomicAdd(weights_%i[pos.y*ws_stride[%i] + pos.x], w_sum); ,dst_comp, dst_comp);
194  GLSLF(3, atomicAdd(sums_%i[pos.y*ws_stride[%i] + pos.x], sum); ,dst_comp, dst_comp);
195  } else {
196  GLSLF(3, weights_%i[pos.y*ws_stride[%i] + pos.x] += w_sum; ,dst_comp, dst_comp);
197  GLSLF(3, sums_%i[pos.y*ws_stride[%i] + pos.x] += sum; ,dst_comp, dst_comp);
198  }
199  GLSLC(2, } );
200  GLSLC(1, } );
201 }
202 
203 typedef struct HorizontalPushData {
204  uint32_t width[4];
205  uint32_t height[4];
206  uint32_t ws_stride[4];
207  int32_t patch_size[4];
208  float strength[4];
209  VkDeviceAddress integral_base;
210  uint64_t integral_size;
211  uint64_t int_stride;
212  uint32_t xyoffs_start;
213 } HorizontalPushData;
214 
215 static av_cold int init_weights_pipeline(FFVulkanContext *vkctx, FFVkExecPool *exec,
217  VkSampler sampler, FFVkSPIRVCompiler *spv,
218  int width, int height, int t,
219  const AVPixFmtDescriptor *desc,
220  int planes, int *nb_rows)
221 {
222  int err;
223  uint8_t *spv_data;
224  size_t spv_len;
225  void *spv_opaque = NULL;
227  int max_dim = FFMAX(width, height);
228  uint32_t max_wg = vkctx->props.properties.limits.maxComputeWorkGroupSize[0];
229  int wg_size, wg_rows;
230 
231  /* Round the max workgroup size to the previous power of two */
232  wg_size = max_wg;
233  wg_rows = 1;
234 
235  if (max_wg > max_dim) {
236  wg_size = max_dim;
237  } else if (max_wg < max_dim) {
238  /* Make it fit */
239  while (wg_size*wg_rows < max_dim)
240  wg_rows++;
241  }
242 
243  RET(ff_vk_shader_init(pl, shd, "nlmeans_weights", VK_SHADER_STAGE_COMPUTE_BIT, 0));
244  ff_vk_shader_set_compute_sizes(shd, wg_size, 1, 1);
245  *nb_rows = wg_rows;
246 
247  if (t > 1)
248  GLSLC(0, #extension GL_EXT_shader_atomic_float : require );
249  GLSLC(0, #extension GL_ARB_gpu_shader_int64 : require );
250  GLSLC(0, );
251  GLSLF(0, #define DTYPE %s ,TYPE_NAME);
252  GLSLF(0, #define T_ALIGN %i ,TYPE_SIZE);
253  GLSLC(0, );
254  GLSLC(0, layout(buffer_reference, buffer_reference_align = T_ALIGN) buffer DataBuffer { );
255  GLSLC(1, DTYPE v[]; );
256  GLSLC(0, }; );
257  GLSLC(0, );
258  GLSLC(0, layout(push_constant, std430) uniform pushConstants { );
259  GLSLC(1, uvec4 width; );
260  GLSLC(1, uvec4 height; );
261  GLSLC(1, uvec4 ws_stride; );
262  GLSLC(1, ivec4 patch_size; );
263  GLSLC(1, vec4 strength; );
264  GLSLC(1, DataBuffer integral_base; );
265  GLSLC(1, uint64_t integral_size; );
266  GLSLC(1, uint64_t int_stride; );
267  GLSLC(1, uint xyoffs_start; );
268  GLSLC(0, }; );
269  GLSLC(0, );
270 
271  ff_vk_add_push_constant(pl, 0, sizeof(HorizontalPushData), VK_SHADER_STAGE_COMPUTE_BIT);
272 
273  desc_set = (FFVulkanDescriptorSetBinding []) {
274  {
275  .name = "input_img",
276  .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
277  .dimensions = 2,
278  .elems = planes,
279  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
280  .samplers = DUP_SAMPLER(sampler),
281  },
282  {
283  .name = "weights_buffer_0",
284  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
285  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
286  .buf_content = "float weights_0[];",
287  },
288  {
289  .name = "sums_buffer_0",
290  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
291  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
292  .buf_content = "float sums_0[];",
293  },
294  {
295  .name = "weights_buffer_1",
296  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
297  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
298  .buf_content = "float weights_1[];",
299  },
300  {
301  .name = "sums_buffer_1",
302  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
303  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
304  .buf_content = "float sums_1[];",
305  },
306  {
307  .name = "weights_buffer_2",
308  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
309  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
310  .buf_content = "float weights_2[];",
311  },
312  {
313  .name = "sums_buffer_2",
314  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
315  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
316  .buf_content = "float sums_2[];",
317  },
318  {
319  .name = "weights_buffer_3",
320  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
321  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
322  .buf_content = "float weights_3[];",
323  },
324  {
325  .name = "sums_buffer_3",
326  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
327  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
328  .buf_content = "float sums_3[];",
329  },
330  };
331  RET(ff_vk_pipeline_descriptor_set_add(vkctx, pl, shd, desc_set, 1 + 2*desc->nb_components, 0, 0));
332 
333  desc_set = (FFVulkanDescriptorSetBinding []) {
334  {
335  .name = "xyoffsets_buffer",
336  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
337  .mem_quali = "readonly",
338  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
339  .buf_content = "ivec2 xyoffsets[];",
340  },
341  };
342  RET(ff_vk_pipeline_descriptor_set_add(vkctx, pl, shd, desc_set, 1, 1, 0));
343 
344  GLSLC(0, );
345  GLSLC(0, void main() );
346  GLSLC(0, { );
347  GLSLC(1, uint64_t offset; );
348  GLSLC(1, DataBuffer dst; );
349  GLSLC(1, float s1; );
350  GLSLC(1, DTYPE s2; );
351  GLSLC(1, DTYPE prefix_sum; );
352  GLSLF(1, DTYPE psum[%i]; ,*nb_rows);
353  GLSLC(1, int r; );
354  GLSLC(1, ivec2 pos; );
355  GLSLC(1, int p; );
356  GLSLC(0, );
357  GLSLC(1, DataBuffer integral_data; );
358  GLSLF(1, ivec2 offs[%i]; ,TYPE_ELEMS);
359  GLSLC(0, );
360  GLSLC(1, int invoc_idx = int(gl_WorkGroupID.z); );
361  GLSLC(0, );
362  GLSLC(1, offset = integral_size * invoc_idx; );
363  GLSLC(1, integral_data = DataBuffer(uint64_t(integral_base) + offset); );
364  for (int i = 0; i < TYPE_ELEMS; i++)
365  GLSLF(1, offs[%i] = xyoffsets[xyoffs_start + %i*invoc_idx + %i]; ,i,TYPE_ELEMS,i);
366  GLSLC(0, );
367  GLSLC(1, DTYPE a; );
368  GLSLC(1, DTYPE b; );
369  GLSLC(1, DTYPE c; );
370  GLSLC(1, DTYPE d; );
371  GLSLC(0, );
372  GLSLC(1, DTYPE patch_diff; );
373  if (TYPE_ELEMS == 4) {
374  GLSLC(1, vec4 src; );
375  GLSLC(1, vec4 w; );
376  } else {
377  GLSLC(1, vec4 src[4]; );
378  GLSLC(1, vec4 w[4]; );
379  }
380  GLSLC(1, float w_sum; );
381  GLSLC(1, float sum; );
382  GLSLC(0, );
383  GLSLC(1, bool lt; );
384  GLSLC(1, bool gt; );
385  GLSLC(0, );
386 
387  for (int i = 0; i < desc->nb_components; i++) {
388  int off = desc->comp[i].offset / (FFALIGN(desc->comp[i].depth, 8)/8);
389  if (width >= height) {
390  insert_horizontal_pass(shd, *nb_rows, 1, desc->comp[i].plane, off);
391  insert_vertical_pass(shd, *nb_rows, 0, desc->comp[i].plane, off);
392  insert_weights_pass(shd, *nb_rows, 0, t, i, desc->comp[i].plane, off);
393  } else {
394  insert_vertical_pass(shd, *nb_rows, 1, desc->comp[i].plane, off);
395  insert_horizontal_pass(shd, *nb_rows, 0, desc->comp[i].plane, off);
396  insert_weights_pass(shd, *nb_rows, 1, t, i, desc->comp[i].plane, off);
397  }
398  }
399 
400  GLSLC(0, } );
401 
402  RET(spv->compile_shader(spv, vkctx, shd, &spv_data, &spv_len, "main", &spv_opaque));
403  RET(ff_vk_shader_create(vkctx, shd, spv_data, spv_len, "main"));
404 
405  RET(ff_vk_init_compute_pipeline(vkctx, pl, shd));
406  RET(ff_vk_exec_pipeline_register(vkctx, exec, pl));
407 
408 fail:
409  if (spv_opaque)
410  spv->free_shader(spv, &spv_opaque);
411 
412  return err;
413 }
414 
415 typedef struct DenoisePushData {
416  uint32_t ws_stride[4];
417 } DenoisePushData;
418 
419 static av_cold int init_denoise_pipeline(FFVulkanContext *vkctx, FFVkExecPool *exec,
421  VkSampler sampler, FFVkSPIRVCompiler *spv,
422  const AVPixFmtDescriptor *desc, int planes)
423 {
424  int err;
425  uint8_t *spv_data;
426  size_t spv_len;
427  void *spv_opaque = NULL;
429 
430  RET(ff_vk_shader_init(pl, shd, "nlmeans_denoise",
431  VK_SHADER_STAGE_COMPUTE_BIT, 0));
432 
433  ff_vk_shader_set_compute_sizes(shd, 32, 32, 1);
434 
435  GLSLC(0, layout(push_constant, std430) uniform pushConstants { );
436  GLSLC(1, uvec4 ws_stride; );
437  GLSLC(0, }; );
438 
439  ff_vk_add_push_constant(pl, 0, sizeof(DenoisePushData), VK_SHADER_STAGE_COMPUTE_BIT);
440 
441  desc_set = (FFVulkanDescriptorSetBinding []) {
442  {
443  .name = "input_img",
444  .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
445  .dimensions = 2,
446  .elems = planes,
447  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
448  .samplers = DUP_SAMPLER(sampler),
449  },
450  {
451  .name = "output_img",
452  .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
453  .mem_layout = ff_vk_shader_rep_fmt(vkctx->output_format),
454  .mem_quali = "writeonly",
455  .dimensions = 2,
456  .elems = planes,
457  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
458  },
459  {
460  .name = "weights_buffer_0",
461  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
462  .mem_quali = "readonly",
463  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
464  .buf_content = "float weights_0[];",
465  },
466  {
467  .name = "sums_buffer_0",
468  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
469  .mem_quali = "readonly",
470  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
471  .buf_content = "float sums_0[];",
472  },
473  {
474  .name = "weights_buffer_1",
475  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
476  .mem_quali = "readonly",
477  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
478  .buf_content = "float weights_1[];",
479  },
480  {
481  .name = "sums_buffer_1",
482  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
483  .mem_quali = "readonly",
484  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
485  .buf_content = "float sums_1[];",
486  },
487  {
488  .name = "weights_buffer_2",
489  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
490  .mem_quali = "readonly",
491  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
492  .buf_content = "float weights_2[];",
493  },
494  {
495  .name = "sums_buffer_2",
496  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
497  .mem_quali = "readonly",
498  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
499  .buf_content = "float sums_2[];",
500  },
501  {
502  .name = "weights_buffer_3",
503  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
504  .mem_quali = "readonly",
505  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
506  .buf_content = "float weights_3[];",
507  },
508  {
509  .name = "sums_buffer_3",
510  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
511  .mem_quali = "readonly",
512  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
513  .buf_content = "float sums_3[];",
514  },
515  };
516  RET(ff_vk_pipeline_descriptor_set_add(vkctx, pl, shd, desc_set, 2 + 2*desc->nb_components, 0, 0));
517 
518  GLSLC(0, void main() );
519  GLSLC(0, { );
520  GLSLC(1, ivec2 size; );
521  GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
522  GLSLC(1, const uint plane = uint(gl_WorkGroupID.z); );
523  GLSLC(0, );
524  GLSLC(1, float w_sum; );
525  GLSLC(1, float sum; );
526  GLSLC(1, vec4 src; );
527  GLSLC(1, vec4 r; );
528  GLSLC(0, );
529  GLSLC(1, size = imageSize(output_img[plane]); );
530  GLSLC(1, if (!IS_WITHIN(pos, size)) );
531  GLSLC(2, return; );
532  GLSLC(0, );
533  GLSLC(1, src = texture(input_img[plane], pos); );
534  GLSLC(0, );
535  for (int c = 0; c < desc->nb_components; c++) {
536  int off = desc->comp[c].offset / (FFALIGN(desc->comp[c].depth, 8)/8);
537  GLSLF(1, if (plane == %i) { ,desc->comp[c].plane);
538  GLSLF(2, w_sum = weights_%i[pos.y*ws_stride[%i] + pos.x]; ,c, c);
539  GLSLF(2, sum = sums_%i[pos.y*ws_stride[%i] + pos.x]; ,c, c);
540  GLSLF(2, r[%i] = (sum + src[%i]*255) / (1.0 + w_sum) / 255; ,off, off);
541  GLSLC(1, } );
542  GLSLC(0, );
543  }
544  GLSLC(1, imageStore(output_img[plane], pos, r); );
545  GLSLC(0, } );
546 
547  RET(spv->compile_shader(spv, vkctx, shd, &spv_data, &spv_len, "main", &spv_opaque));
548  RET(ff_vk_shader_create(vkctx, shd, spv_data, spv_len, "main"));
549 
550  RET(ff_vk_init_compute_pipeline(vkctx, pl, shd));
551  RET(ff_vk_exec_pipeline_register(vkctx, exec, pl));
552 
553 fail:
554  if (spv_opaque)
555  spv->free_shader(spv, &spv_opaque);
556 
557  return err;
558 }
559 
561 {
562  int rad, err;
563  int xcnt = 0, ycnt = 0;
564  NLMeansVulkanContext *s = ctx->priv;
565  FFVulkanContext *vkctx = &s->vkctx;
566  const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
567  FFVkSPIRVCompiler *spv;
568  int *offsets_buf;
569  int offsets_dispatched = 0, nb_dispatches = 0;
570 
571  const AVPixFmtDescriptor *desc;
573  if (!desc)
574  return AVERROR(EINVAL);
575 
576  if (!(s->opts.r & 1)) {
577  s->opts.r |= 1;
578  av_log(ctx, AV_LOG_WARNING, "Research size should be odd, setting to %i",
579  s->opts.r);
580  }
581 
582  if (!(s->opts.p & 1)) {
583  s->opts.p |= 1;
584  av_log(ctx, AV_LOG_WARNING, "Patch size should be odd, setting to %i",
585  s->opts.p);
586  }
587 
588  for (int i = 0; i < 4; i++) {
589  double str = (s->opts.sc[i] > 1.0) ? s->opts.sc[i] : s->opts.s;
590  int ps = (s->opts.pc[i] ? s->opts.pc[i] : s->opts.p);
591  str = 10.0f*str;
592  str *= -str;
593  str = 255.0*255.0 / str;
594  s->strength[i] = str;
595  if (!(ps & 1)) {
596  ps |= 1;
597  av_log(ctx, AV_LOG_WARNING, "Patch size should be odd, setting to %i",
598  ps);
599  }
600  s->patch[i] = ps / 2;
601  }
602 
603  rad = s->opts.r/2;
604  s->nb_offsets = (2*rad + 1)*(2*rad + 1) - 1;
605  s->xoffsets = av_malloc(s->nb_offsets*sizeof(*s->xoffsets));
606  s->yoffsets = av_malloc(s->nb_offsets*sizeof(*s->yoffsets));
607  s->nb_offsets = 0;
608 
609  for (int x = -rad; x <= rad; x++) {
610  for (int y = -rad; y <= rad; y++) {
611  if (!x && !y)
612  continue;
613 
614  s->xoffsets[xcnt++] = x;
615  s->yoffsets[ycnt++] = y;
616  s->nb_offsets++;
617  }
618  }
619 
620  RET(ff_vk_create_buf(&s->vkctx, &s->xyoffsets_buf, 2*s->nb_offsets*sizeof(int32_t), NULL, NULL,
621  VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT |
622  VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
623  VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
624  VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT));
625  RET(ff_vk_map_buffer(&s->vkctx, &s->xyoffsets_buf, (uint8_t **)&offsets_buf, 0));
626 
627  for (int i = 0; i < 2*s->nb_offsets; i += 2) {
628  offsets_buf[i + 0] = s->xoffsets[i >> 1];
629  offsets_buf[i + 1] = s->yoffsets[i >> 1];
630  }
631 
632  RET(ff_vk_unmap_buffer(&s->vkctx, &s->xyoffsets_buf, 1));
633 
634  s->opts.t = FFMIN(s->opts.t, (FFALIGN(s->nb_offsets, TYPE_ELEMS) / TYPE_ELEMS));
635  if (!vkctx->atomic_float_feats.shaderBufferFloat32AtomicAdd) {
636  av_log(ctx, AV_LOG_WARNING, "Device doesn't support atomic float adds, "
637  "disabling dispatch parallelism\n");
638  s->opts.t = 1;
639  }
640 
641  spv = ff_vk_spirv_init();
642  if (!spv) {
643  av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n");
644  return AVERROR_EXTERNAL;
645  }
646 
647  ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT);
648  RET(ff_vk_exec_pool_init(vkctx, &s->qf, &s->e, 1, 0, 0, 0, NULL));
649  RET(ff_vk_init_sampler(vkctx, &s->sampler, 1, VK_FILTER_NEAREST));
650 
651  RET(init_weights_pipeline(vkctx, &s->e, &s->pl_weights, &s->shd_weights, s->sampler,
652  spv, s->vkctx.output_width, s->vkctx.output_height,
653  s->opts.t, desc, planes, &s->pl_weights_rows));
654 
655  RET(init_denoise_pipeline(vkctx, &s->e, &s->pl_denoise, &s->shd_denoise, s->sampler,
656  spv, desc, planes));
657 
658  RET(ff_vk_set_descriptor_buffer(&s->vkctx, &s->pl_weights, NULL, 1, 0, 0,
659  s->xyoffsets_buf.address, s->xyoffsets_buf.size,
660  VK_FORMAT_UNDEFINED));
661 
662  do {
663  int wg_invoc = FFMIN((s->nb_offsets - offsets_dispatched)/TYPE_ELEMS, s->opts.t);
664  wg_invoc = FFMIN(wg_invoc, vkctx->props.properties.limits.maxComputeWorkGroupCount[2]);
665  offsets_dispatched += wg_invoc * TYPE_ELEMS;
666  nb_dispatches++;
667  } while (offsets_dispatched < s->nb_offsets);
668 
669  av_log(ctx, AV_LOG_VERBOSE, "Filter initialized, %i x/y offsets, %i dispatches\n",
670  s->nb_offsets, nb_dispatches);
671 
672  s->initialized = 1;
673 
674 fail:
675  if (spv)
676  spv->uninit(&spv);
677 
678  return err;
679 }
680 
681 static int denoise_pass(NLMeansVulkanContext *s, FFVkExecContext *exec,
682  FFVkBuffer *ws_vk, uint32_t ws_stride[4])
683 {
684  FFVulkanContext *vkctx = &s->vkctx;
685  FFVulkanFunctions *vk = &vkctx->vkfn;
686  VkBufferMemoryBarrier2 buf_bar[8];
687  int nb_buf_bar = 0;
688 
689  /* Denoise pass pipeline */
690  ff_vk_exec_bind_pipeline(vkctx, exec, &s->pl_denoise);
691 
692  /* Push data */
693  ff_vk_update_push_exec(vkctx, exec, &s->pl_denoise, VK_SHADER_STAGE_COMPUTE_BIT,
694  0, sizeof(DenoisePushData), &(DenoisePushData) {
695  { ws_stride[0], ws_stride[1], ws_stride[2], ws_stride[3] },
696  });
697 
698  buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
699  .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
700  .srcStageMask = ws_vk->stage,
701  .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
702  .srcAccessMask = ws_vk->access,
703  .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT,
704  .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
705  .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
706  .buffer = ws_vk->buf,
707  .size = ws_vk->size,
708  .offset = 0,
709  };
710 
711  vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
712  .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
713  .pBufferMemoryBarriers = buf_bar,
714  .bufferMemoryBarrierCount = nb_buf_bar,
715  });
716  ws_vk->stage = buf_bar[0].dstStageMask;
717  ws_vk->access = buf_bar[0].dstAccessMask;
718 
719  /* End of denoise pass */
720  vk->CmdDispatch(exec->buf,
721  FFALIGN(vkctx->output_width, s->pl_denoise.wg_size[0])/s->pl_denoise.wg_size[0],
722  FFALIGN(vkctx->output_height, s->pl_denoise.wg_size[1])/s->pl_denoise.wg_size[1],
723  av_pix_fmt_count_planes(s->vkctx.output_format));
724 
725  return 0;
726 }
727 
728 static int nlmeans_vulkan_filter_frame(AVFilterLink *link, AVFrame *in)
729 {
730  int err;
731  AVFrame *out = NULL;
732  AVFilterContext *ctx = link->dst;
733  NLMeansVulkanContext *s = ctx->priv;
734  AVFilterLink *outlink = ctx->outputs[0];
735  FFVulkanContext *vkctx = &s->vkctx;
736  FFVulkanFunctions *vk = &vkctx->vkfn;
737 
738  const AVPixFmtDescriptor *desc;
739  int plane_widths[4];
740  int plane_heights[4];
741 
742  int offsets_dispatched = 0;
743 
744  /* Integral */
745  AVBufferRef *integral_buf = NULL;
746  FFVkBuffer *integral_vk;
747  size_t int_stride;
748  size_t int_size;
749 
750  /* Weights/sums */
751  AVBufferRef *ws_buf = NULL;
752  FFVkBuffer *ws_vk;
753  VkDeviceAddress weights_addr[4];
754  VkDeviceAddress sums_addr[4];
755  uint32_t ws_stride[4];
756  size_t ws_size[4];
757  size_t ws_total_size = 0;
758 
759  FFVkExecContext *exec;
760  VkImageView in_views[AV_NUM_DATA_POINTERS];
761  VkImageView out_views[AV_NUM_DATA_POINTERS];
762  VkImageMemoryBarrier2 img_bar[8];
763  int nb_img_bar = 0;
764  VkBufferMemoryBarrier2 buf_bar[8];
765  int nb_buf_bar = 0;
766 
767  if (!s->initialized)
768  RET(init_filter(ctx));
769 
771  if (!desc)
772  return AVERROR(EINVAL);
773 
774  /* Integral image */
775  int_stride = s->pl_weights.wg_size[0]*s->pl_weights_rows*TYPE_SIZE;
776  int_size = s->pl_weights.wg_size[0]*s->pl_weights_rows*int_stride;
777 
778  /* Plane dimensions */
779  for (int i = 0; i < desc->nb_components; i++) {
780  plane_widths[i] = !i || (i == 3) ? vkctx->output_width : AV_CEIL_RSHIFT(vkctx->output_width, desc->log2_chroma_w);
781  plane_heights[i] = !i || (i == 3) ? vkctx->output_height : AV_CEIL_RSHIFT(vkctx->output_height, desc->log2_chroma_w);
782  plane_widths[i] = FFALIGN(plane_widths[i], s->pl_denoise.wg_size[0]);
783  plane_heights[i] = FFALIGN(plane_heights[i], s->pl_denoise.wg_size[1]);
784 
785  ws_stride[i] = plane_widths[i];
786  ws_size[i] = ws_stride[i] * plane_heights[i] * sizeof(float);
787  ws_total_size += ws_size[i];
788  }
789 
790  /* Buffers */
791  err = ff_vk_get_pooled_buffer(&s->vkctx, &s->integral_buf_pool, &integral_buf,
792  VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
793  VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
794  NULL,
795  s->opts.t * int_size,
796  VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
797  if (err < 0)
798  return err;
799  integral_vk = (FFVkBuffer *)integral_buf->data;
800 
801  err = ff_vk_get_pooled_buffer(&s->vkctx, &s->ws_buf_pool, &ws_buf,
802  VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
803  VK_BUFFER_USAGE_TRANSFER_DST_BIT |
804  VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
805  NULL,
806  ws_total_size * 2,
807  VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
808  if (err < 0)
809  return err;
810  ws_vk = (FFVkBuffer *)ws_buf->data;
811 
812  weights_addr[0] = ws_vk->address;
813  sums_addr[0] = ws_vk->address + ws_total_size;
814  for (int i = 1; i < desc->nb_components; i++) {
815  weights_addr[i] = weights_addr[i - 1] + ws_size[i - 1];
816  sums_addr[i] = sums_addr[i - 1] + ws_size[i - 1];
817  }
818 
819  /* Output frame */
820  out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
821  if (!out) {
822  err = AVERROR(ENOMEM);
823  goto fail;
824  }
825 
826  /* Execution context */
827  exec = ff_vk_exec_get(&s->e);
828  ff_vk_exec_start(vkctx, exec);
829 
830  /* Dependencies */
831  RET(ff_vk_exec_add_dep_frame(vkctx, exec, in,
832  VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
833  VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
834  RET(ff_vk_exec_add_dep_frame(vkctx, exec, out,
835  VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
836  VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
837 
838  RET(ff_vk_exec_add_dep_buf(vkctx, exec, &integral_buf, 1, 0));
839  integral_buf = NULL;
840 
841  RET(ff_vk_exec_add_dep_buf(vkctx, exec, &ws_buf, 1, 0));
842  ws_buf = NULL;
843 
844  /* Input frame prep */
845  RET(ff_vk_create_imageviews(vkctx, exec, in_views, in));
846  ff_vk_update_descriptor_img_array(vkctx, &s->pl_weights, exec, in, in_views, 0, 0,
847  VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
848  s->sampler);
849  ff_vk_frame_barrier(vkctx, exec, in, img_bar, &nb_img_bar,
850  VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
851  VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
852  VK_ACCESS_SHADER_READ_BIT,
853  VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
854  VK_QUEUE_FAMILY_IGNORED);
855 
856  /* Output frame prep */
857  RET(ff_vk_create_imageviews(vkctx, exec, out_views, out));
858  ff_vk_frame_barrier(vkctx, exec, out, img_bar, &nb_img_bar,
859  VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
860  VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
861  VK_ACCESS_SHADER_WRITE_BIT,
862  VK_IMAGE_LAYOUT_GENERAL,
863  VK_QUEUE_FAMILY_IGNORED);
864 
865  nb_buf_bar = 0;
866  buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
867  .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
868  .srcStageMask = ws_vk->stage,
869  .dstStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT,
870  .srcAccessMask = ws_vk->access,
871  .dstAccessMask = VK_ACCESS_2_TRANSFER_WRITE_BIT,
872  .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
873  .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
874  .buffer = ws_vk->buf,
875  .size = ws_vk->size,
876  .offset = 0,
877  };
878  buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
879  .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
880  .srcStageMask = integral_vk->stage,
881  .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
882  .srcAccessMask = integral_vk->access,
883  .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT |
884  VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
885  .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
886  .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
887  .buffer = integral_vk->buf,
888  .size = integral_vk->size,
889  .offset = 0,
890  };
891 
892  vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
893  .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
894  .pImageMemoryBarriers = img_bar,
895  .imageMemoryBarrierCount = nb_img_bar,
896  .pBufferMemoryBarriers = buf_bar,
897  .bufferMemoryBarrierCount = nb_buf_bar,
898  });
899  ws_vk->stage = buf_bar[0].dstStageMask;
900  ws_vk->access = buf_bar[0].dstAccessMask;
901  integral_vk->stage = buf_bar[1].dstStageMask;
902  integral_vk->access = buf_bar[1].dstAccessMask;
903 
904  /* Buffer zeroing */
905  vk->CmdFillBuffer(exec->buf, ws_vk->buf, 0, ws_vk->size, 0x0);
906 
907  nb_buf_bar = 0;
908  buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
909  .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
910  .srcStageMask = ws_vk->stage,
911  .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
912  .srcAccessMask = ws_vk->access,
913  .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT |
914  VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
915  .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
916  .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
917  .buffer = ws_vk->buf,
918  .size = ws_vk->size,
919  .offset = 0,
920  };
921 
922  vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
923  .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
924  .pBufferMemoryBarriers = buf_bar,
925  .bufferMemoryBarrierCount = nb_buf_bar,
926  });
927  ws_vk->stage = buf_bar[0].dstStageMask;
928  ws_vk->access = buf_bar[0].dstAccessMask;
929 
930  /* Update weights descriptors */
931  ff_vk_update_descriptor_img_array(vkctx, &s->pl_weights, exec, in, in_views, 0, 0,
932  VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
933  s->sampler);
934  for (int i = 0; i < desc->nb_components; i++) {
935  RET(ff_vk_set_descriptor_buffer(&s->vkctx, &s->pl_weights, exec, 0, 1 + i*2 + 0, 0,
936  weights_addr[i], ws_size[i],
937  VK_FORMAT_UNDEFINED));
938  RET(ff_vk_set_descriptor_buffer(&s->vkctx, &s->pl_weights, exec, 0, 1 + i*2 + 1, 0,
939  sums_addr[i], ws_size[i],
940  VK_FORMAT_UNDEFINED));
941  }
942 
943  /* Update denoise descriptors */
944  ff_vk_update_descriptor_img_array(vkctx, &s->pl_denoise, exec, in, in_views, 0, 0,
945  VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
946  s->sampler);
947  ff_vk_update_descriptor_img_array(vkctx, &s->pl_denoise, exec, out, out_views, 0, 1,
948  VK_IMAGE_LAYOUT_GENERAL, s->sampler);
949  for (int i = 0; i < desc->nb_components; i++) {
950  RET(ff_vk_set_descriptor_buffer(&s->vkctx, &s->pl_denoise, exec, 0, 2 + i*2 + 0, 0,
951  weights_addr[i], ws_size[i],
952  VK_FORMAT_UNDEFINED));
953  RET(ff_vk_set_descriptor_buffer(&s->vkctx, &s->pl_denoise, exec, 0, 2 + i*2 + 1, 0,
954  sums_addr[i], ws_size[i],
955  VK_FORMAT_UNDEFINED));
956  }
957 
958  /* Weights pipeline */
959  ff_vk_exec_bind_pipeline(vkctx, exec, &s->pl_weights);
960 
961  do {
962  int wg_invoc;
963  HorizontalPushData pd = {
964  { plane_widths[0], plane_widths[1], plane_widths[2], plane_widths[3] },
965  { plane_heights[0], plane_heights[1], plane_heights[2], plane_heights[3] },
966  { ws_stride[0], ws_stride[1], ws_stride[2], ws_stride[3] },
967  { s->patch[0], s->patch[1], s->patch[2], s->patch[3] },
968  { s->strength[0], s->strength[1], s->strength[2], s->strength[2], },
969  integral_vk->address,
970  (uint64_t)int_size,
971  (uint64_t)int_stride,
972  offsets_dispatched,
973  };
974 
975  if (offsets_dispatched) {
976  nb_buf_bar = 0;
977  buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
978  .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
979  .srcStageMask = integral_vk->stage,
980  .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
981  .srcAccessMask = integral_vk->access,
982  .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT |
983  VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
984  .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
985  .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
986  .buffer = integral_vk->buf,
987  .size = integral_vk->size,
988  .offset = 0,
989  };
990 
991  vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
992  .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
993  .pBufferMemoryBarriers = buf_bar,
994  .bufferMemoryBarrierCount = nb_buf_bar,
995  });
996  integral_vk->stage = buf_bar[1].dstStageMask;
997  integral_vk->access = buf_bar[1].dstAccessMask;
998  }
999 
1000  /* Push data */
1001  ff_vk_update_push_exec(vkctx, exec, &s->pl_weights, VK_SHADER_STAGE_COMPUTE_BIT,
1002  0, sizeof(pd), &pd);
1003 
1004  wg_invoc = FFMIN((s->nb_offsets - offsets_dispatched)/TYPE_ELEMS, s->opts.t);
1005  wg_invoc = FFMIN(wg_invoc, vkctx->props.properties.limits.maxComputeWorkGroupCount[2]);
1006 
1007  /* End of horizontal pass */
1008  vk->CmdDispatch(exec->buf, 1, 1, wg_invoc);
1009 
1010  offsets_dispatched += wg_invoc * TYPE_ELEMS;
1011  } while (offsets_dispatched < s->nb_offsets);
1012 
1013  RET(denoise_pass(s, exec, ws_vk, ws_stride));
1014 
1015  err = ff_vk_exec_submit(vkctx, exec);
1016  if (err < 0)
1017  return err;
1018 
1019  err = av_frame_copy_props(out, in);
1020  if (err < 0)
1021  goto fail;
1022 
1023  av_frame_free(&in);
1024 
1025  return ff_filter_frame(outlink, out);
1026 
1027 fail:
1028  av_buffer_unref(&integral_buf);
1029  av_buffer_unref(&ws_buf);
1030  av_frame_free(&in);
1031  av_frame_free(&out);
1032  return err;
1033 }
1034 
1035 static void nlmeans_vulkan_uninit(AVFilterContext *avctx)
1036 {
1037  NLMeansVulkanContext *s = avctx->priv;
1038  FFVulkanContext *vkctx = &s->vkctx;
1039  FFVulkanFunctions *vk = &vkctx->vkfn;
1040 
1041  ff_vk_exec_pool_free(vkctx, &s->e);
1042  ff_vk_pipeline_free(vkctx, &s->pl_weights);
1043  ff_vk_shader_free(vkctx, &s->shd_weights);
1044  ff_vk_pipeline_free(vkctx, &s->pl_denoise);
1045  ff_vk_shader_free(vkctx, &s->shd_denoise);
1046 
1047  av_buffer_pool_uninit(&s->integral_buf_pool);
1048  av_buffer_pool_uninit(&s->ws_buf_pool);
1049 
1050  if (s->sampler)
1051  vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler,
1052  vkctx->hwctx->alloc);
1053 
1054  ff_vk_uninit(&s->vkctx);
1055 
1056  av_freep(&s->xoffsets);
1057  av_freep(&s->yoffsets);
1058 
1059  s->initialized = 0;
1060 }
1061 
1062 #define OFFSET(x) offsetof(NLMeansVulkanContext, x)
1063 #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
1064 static const AVOption nlmeans_vulkan_options[] = {
1065  { "s", "denoising strength for all components", OFFSET(opts.s), AV_OPT_TYPE_DOUBLE, { .dbl = 1.0 }, 1.0, 100.0, FLAGS },
1066  { "p", "patch size for all components", OFFSET(opts.p), AV_OPT_TYPE_INT, { .i64 = 3*2+1 }, 0, 99, FLAGS },
1067  { "r", "research window radius", OFFSET(opts.r), AV_OPT_TYPE_INT, { .i64 = 7*2+1 }, 0, 99, FLAGS },
1068  { "t", "parallelism", OFFSET(opts.t), AV_OPT_TYPE_INT, { .i64 = 36 }, 1, 168, FLAGS },
1069 
1070  { "s1", "denoising strength for component 1", OFFSET(opts.sc[0]), AV_OPT_TYPE_DOUBLE, { .dbl = 1.0 }, 1.0, 100.0, FLAGS },
1071  { "s2", "denoising strength for component 2", OFFSET(opts.sc[1]), AV_OPT_TYPE_DOUBLE, { .dbl = 1.0 }, 1.0, 100.0, FLAGS },
1072  { "s3", "denoising strength for component 3", OFFSET(opts.sc[2]), AV_OPT_TYPE_DOUBLE, { .dbl = 1.0 }, 1.0, 100.0, FLAGS },
1073  { "s4", "denoising strength for component 4", OFFSET(opts.sc[3]), AV_OPT_TYPE_DOUBLE, { .dbl = 1.0 }, 1.0, 100.0, FLAGS },
1074 
1075  { "p1", "patch size for component 1", OFFSET(opts.pc[0]), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 99, FLAGS },
1076  { "p2", "patch size for component 2", OFFSET(opts.pc[1]), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 99, FLAGS },
1077  { "p3", "patch size for component 3", OFFSET(opts.pc[2]), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 99, FLAGS },
1078  { "p4", "patch size for component 4", OFFSET(opts.pc[3]), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 99, FLAGS },
1079 
1080  { NULL }
1081 };
1082 
1083 AVFILTER_DEFINE_CLASS(nlmeans_vulkan);
1084 
1085 static const AVFilterPad nlmeans_vulkan_inputs[] = {
1086  {
1087  .name = "default",
1088  .type = AVMEDIA_TYPE_VIDEO,
1089  .filter_frame = &nlmeans_vulkan_filter_frame,
1090  .config_props = &ff_vk_filter_config_input,
1091  },
1092 };
1093 
1094 static const AVFilterPad nlmeans_vulkan_outputs[] = {
1095  {
1096  .name = "default",
1097  .type = AVMEDIA_TYPE_VIDEO,
1098  .config_props = &ff_vk_filter_config_output,
1099  },
1100 };
1101 
1103  .name = "nlmeans_vulkan",
1104  .description = NULL_IF_CONFIG_SMALL("Non-local means denoiser (Vulkan)"),
1105  .priv_size = sizeof(NLMeansVulkanContext),
1106  .init = &ff_vk_filter_init,
1107  .uninit = &nlmeans_vulkan_uninit,
1108  FILTER_INPUTS(nlmeans_vulkan_inputs),
1109  FILTER_OUTPUTS(nlmeans_vulkan_outputs),
1111  .priv_class = &nlmeans_vulkan_class,
1112  .flags = AVFILTER_FLAG_HWDEVICE,
1113  .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
1114 };
ff_get_video_buffer
AVFrame * ff_get_video_buffer(AVFilterLink *link, int w, int h)
Request a picture buffer with a specific set of permissions.
Definition: video.c:112
NLMeansVulkanContext::nlmeans_opts::p
int p
Definition: vf_nlmeans_vulkan.c:63
ff_vk_create_buf
int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, void *pNext, void *alloc_pNext, VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags)
Definition: vulkan.c:848
AV_LOG_WARNING
#define AV_LOG_WARNING
Something somehow does not look correct.
Definition: log.h:186
ff_vk_exec_get
FFVkExecContext * ff_vk_exec_get(FFVkExecPool *pool)
Retrieve an execution pool.
Definition: vulkan.c:497
ff_vk_update_descriptor_img_array
void ff_vk_update_descriptor_img_array(FFVulkanContext *s, FFVulkanPipeline *pl, FFVkExecContext *e, AVFrame *f, VkImageView *views, int set, int binding, VkImageLayout layout, VkSampler sampler)
Definition: vulkan.c:1727
ff_vk_pipeline_free
void ff_vk_pipeline_free(FFVulkanContext *s, FFVulkanPipeline *pl)
Definition: vulkan.c:1844
FFVulkanContext::output_height
int output_height
Definition: vulkan.h:265
r
const char * r
Definition: vf_curves.c:127
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
opt.h
NLMeansVulkanContext::opts
struct NLMeansVulkanContext::nlmeans_opts opts
NLMeansVulkanContext::sampler
VkSampler sampler
Definition: vf_nlmeans_vulkan.c:39
out
FILE * out
Definition: movenc.c:55
FF_FILTER_FLAG_HWFRAME_AWARE
#define FF_FILTER_FLAG_HWFRAME_AWARE
The filter is aware of hardware frames, and any hardware frame context should not be automatically pr...
Definition: internal.h:351
comp
static void comp(unsigned char *dst, ptrdiff_t dst_stride, unsigned char *src, ptrdiff_t src_stride, int add)
Definition: eamad.c:81
AVBufferPool
The buffer pool.
Definition: buffer_internal.h:88
ff_filter_frame
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:1015
av_pix_fmt_desc_get
const AVPixFmtDescriptor * av_pix_fmt_desc_get(enum AVPixelFormat pix_fmt)
Definition: pixdesc.c:2965
AVBufferRef::data
uint8_t * data
The data buffer.
Definition: buffer.h:90
FFVkBuffer::access
VkAccessFlags2 access
Definition: vulkan.h:104
ff_vk_qf_init
int ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf, VkQueueFlagBits dev_family)
Chooses a QF and loads it into a context.
Definition: vulkan.c:225
FFVkBuffer::stage
VkPipelineStageFlags2 stage
Definition: vulkan.h:103
av_frame_free
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
Definition: frame.c:160
NLMeansVulkanContext::integral_buf_pool
AVBufferPool * integral_buf_pool
Definition: vf_nlmeans_vulkan.c:41
AVFrame
This structure describes decoded (raw) audio or video data.
Definition: frame.h:375
ff_vk_filter_init
int ff_vk_filter_init(AVFilterContext *avctx)
General lavfi IO functions.
Definition: vulkan_filter.c:221
w
uint8_t w
Definition: llviddspenc.c:38
NLMeansVulkanContext::xoffsets
int * xoffsets
Definition: vf_nlmeans_vulkan.c:53
ff_vk_shader_create
int ff_vk_shader_create(FFVulkanContext *s, FFVkSPIRVShader *shd, uint8_t *spirv, size_t spirv_size, const char *entrypoint)
Definition: vulkan.c:1415
AVOption
AVOption.
Definition: opt.h:346
b
#define b
Definition: input.c:41
AV_LOG_VERBOSE
#define AV_LOG_VERBOSE
Detailed information.
Definition: log.h:196
FFVkBuffer::address
VkDeviceAddress address
Definition: vulkan.h:100
NLMeansVulkanContext::strength
float strength[4]
Definition: vf_nlmeans_vulkan.c:56
ff_vk_uninit
void ff_vk_uninit(FFVulkanContext *s)
Frees main context.
Definition: vulkan.c:1873
FFMAX
#define FFMAX(a, b)
Definition: macros.h:47
FFVkSPIRVCompiler::uninit
void(* uninit)(struct FFVkSPIRVCompiler **ctx)
Definition: vulkan_spirv.h:33
AVFilter::name
const char * name
Filter name.
Definition: avfilter.h:170
NLMeansVulkanContext::initialized
int initialized
Definition: vf_nlmeans_vulkan.c:36
ff_vk_pipeline_descriptor_set_add
int ff_vk_pipeline_descriptor_set_add(FFVulkanContext *s, FFVulkanPipeline *pl, FFVkSPIRVShader *shd, FFVulkanDescriptorSetBinding *desc, int nb, int read_only, int print_to_shader_only)
Add descriptor to a pipeline.
Definition: vulkan.c:1465
ff_vk_shader_set_compute_sizes
void ff_vk_shader_set_compute_sizes(FFVkSPIRVShader *shd, int x, int y, int z)
Definition: vulkan.c:1373
video.h
AV_PIX_FMT_VULKAN
@ AV_PIX_FMT_VULKAN
Vulkan hardware images.
Definition: pixfmt.h:379
ff_vk_exec_add_dep_frame
int ff_vk_exec_add_dep_frame(FFVulkanContext *s, FFVkExecContext *e, AVFrame *f, VkPipelineStageFlagBits2 wait_stage, VkPipelineStageFlagBits2 signal_stage)
Definition: vulkan.c:599
FFVkBuffer::buf
VkBuffer buf
Definition: vulkan.h:96
av_malloc
#define av_malloc(s)
Definition: tableprint_vlc.h:30
NLMeansVulkanContext::yoffsets
int * yoffsets
Definition: vf_nlmeans_vulkan.c:54
av_pix_fmt_count_planes
int av_pix_fmt_count_planes(enum AVPixelFormat pix_fmt)
Definition: pixdesc.c:3005
AVVulkanDeviceContext::alloc
const VkAllocationCallbacks * alloc
Custom memory allocator, else NULL.
Definition: hwcontext_vulkan.h:49
ff_vk_add_push_constant
int ff_vk_add_push_constant(FFVulkanPipeline *pl, int offset, int size, VkShaderStageFlagBits stage)
Add/update push constants for execution.
Definition: vulkan.c:1143
AVFilterContext::priv
void * priv
private data for use by the filter
Definition: avfilter.h:422
fail
#define fail()
Definition: checkasm.h:179
insert_weights_pass
static void insert_weights_pass(FFVkSPIRVShader *shd, int nb_rows, int vert, int t, int dst_comp, int plane, int comp)
Definition: vf_nlmeans_vulkan.c:143
vulkan_filter.h
ff_source_prefix_sum_comp
const char * ff_source_prefix_sum_comp
FFVulkanContext::atomic_float_feats
VkPhysicalDeviceShaderAtomicFloatFeaturesEXT atomic_float_feats
Definition: vulkan.h:248
AVFilterPad
A filter pad used for either input or output.
Definition: internal.h:33
first
trying all byte sequences megabyte in length and selecting the best looking sequence will yield cases to try But first
Definition: rate_distortion.txt:12
AV_LOG_ERROR
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:180
av_cold
#define av_cold
Definition: attributes.h:90
float
float
Definition: af_crystalizer.c:121
FFVulkanContext::output_width
int output_width
Definition: vulkan.h:264
NLMeansVulkanContext::ws_buf_pool
AVBufferPool * ws_buf_pool
Definition: vf_nlmeans_vulkan.c:42
width
#define width
s
#define s(width, name)
Definition: cbs_vp9.c:198
AV_CEIL_RSHIFT
#define AV_CEIL_RSHIFT(a, b)
Definition: common.h:59
AV_OPT_TYPE_DOUBLE
@ AV_OPT_TYPE_DOUBLE
Definition: opt.h:237
s1
#define s1
Definition: regdef.h:38
FLAGS
#define FLAGS
ctx
AVFormatContext * ctx
Definition: movenc.c:49
ff_vf_nlmeans_vulkan
const AVFilter ff_vf_nlmeans_vulkan
ff_vk_exec_add_dep_buf
int ff_vk_exec_add_dep_buf(FFVulkanContext *s, FFVkExecContext *e, AVBufferRef **deps, int nb_deps, int ref)
Execution dependency management.
Definition: vulkan.c:575
ff_vk_unmap_buffer
static int ff_vk_unmap_buffer(FFVulkanContext *s, FFVkBuffer *buf, int flush)
Definition: vulkan.h:417
ff_vk_exec_bind_pipeline
void ff_vk_exec_bind_pipeline(FFVulkanContext *s, FFVkExecContext *e, FFVulkanPipeline *pl)
Definition: vulkan.c:1822
FFVkSPIRVCompiler::compile_shader
int(* compile_shader)(struct FFVkSPIRVCompiler *ctx, void *avctx, struct FFVkSPIRVShader *shd, uint8_t **data, size_t *size, const char *entrypoint, void **opaque)
Definition: vulkan_spirv.h:29
ff_vk_exec_pool_free
void ff_vk_exec_pool_free(FFVulkanContext *s, FFVkExecPool *pool)
Definition: vulkan.c:256
FILTER_INPUTS
#define FILTER_INPUTS(array)
Definition: internal.h:182
link
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a link
Definition: filter_design.txt:23
opts
AVDictionary * opts
Definition: movenc.c:51
NLMeansVulkanContext::pl_denoise
FFVulkanPipeline pl_denoise
Definition: vf_nlmeans_vulkan.c:50
NULL
#define NULL
Definition: coverity.c:32
av_frame_copy_props
int av_frame_copy_props(AVFrame *dst, const AVFrame *src)
Copy only "metadata" fields from src to dst.
Definition: frame.c:709
av_buffer_unref
void av_buffer_unref(AVBufferRef **buf)
Free a given reference and automatically free the buffer if there are no more references to it.
Definition: buffer.c:139
av_buffer_pool_uninit
void av_buffer_pool_uninit(AVBufferPool **ppool)
Mark the pool as being available for freeing.
Definition: buffer.c:322
ff_vk_filter_config_output
int ff_vk_filter_config_output(AVFilterLink *outlink)
Definition: vulkan_filter.c:198
FFVkBuffer::size
size_t size
Definition: vulkan.h:99
ff_vk_init_compute_pipeline
int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl, FFVkSPIRVShader *shd)
Definition: vulkan.c:1785
NLMeansVulkanContext::nlmeans_opts
Definition: vf_nlmeans_vulkan.c:59
ff_vk_exec_pool_init
int ff_vk_exec_pool_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf, FFVkExecPool *pool, int nb_contexts, int nb_queries, VkQueryType query_type, int query_64bit, const void *query_create_pnext)
Allocates/frees an execution pool.
Definition: vulkan.c:296
FFVulkanContext
Definition: vulkan.h:228
FFVulkanPipeline
Definition: vulkan.h:131
insert_vertical_pass
static void insert_vertical_pass(FFVkSPIRVShader *shd, int nb_rows, int first, int plane, int comp)
Definition: vf_nlmeans_vulkan.c:114
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
NLMeansVulkanContext::qf
FFVkQueueFamilyCtx qf
Definition: vf_nlmeans_vulkan.c:38
NLMeansVulkanContext::patch
int patch[4]
Definition: vf_nlmeans_vulkan.c:57
ff_vk_shader_init
int ff_vk_shader_init(FFVulkanPipeline *pl, FFVkSPIRVShader *shd, const char *name, VkShaderStageFlags stage, uint32_t required_subgroup_size)
Shader management.
Definition: vulkan.c:1347
main
int main(int argc, char **argv)
Definition: avio_http_serve_files.c:99
s2
#define s2
Definition: regdef.h:39
init
int(* init)(AVBSFContext *ctx)
Definition: dts2pts.c:366
NLMeansVulkanContext
Definition: vf_nlmeans_vulkan.c:33
NLMeansVulkanContext::xyoffsets_buf
FFVkBuffer xyoffsets_buf
Definition: vf_nlmeans_vulkan.c:44
FFVulkanDescriptorSetBinding
Definition: vulkan.h:83
NULL_IF_CONFIG_SMALL
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
Definition: internal.h:94
for
for(k=2;k<=8;++k)
Definition: h264pred_template.c:425
AVFILTER_FLAG_HWDEVICE
#define AVFILTER_FLAG_HWDEVICE
The filter can create hardware frames using AVFilterContext.hw_device_ctx.
Definition: avfilter.h:138
NLMeansVulkanContext::nlmeans_opts::t
int t
Definition: vf_nlmeans_vulkan.c:65
size
int size
Definition: twinvq_data.h:10344
AV_NUM_DATA_POINTERS
#define AV_NUM_DATA_POINTERS
Definition: frame.h:376
FFVkQueueFamilyCtx
Definition: vulkan.h:110
FFVulkanContext::output_format
enum AVPixelFormat output_format
Definition: vulkan.h:266
height
#define height
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
AVERROR_EXTERNAL
#define AVERROR_EXTERNAL
Generic error in an external library.
Definition: error.h:59
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
FFVkExecContext
Definition: vulkan.h:152
FFVulkanDescriptorSetBinding::name
const char * name
Definition: vulkan.h:84
ff_vk_update_push_exec
void ff_vk_update_push_exec(FFVulkanContext *s, FFVkExecContext *e, FFVulkanPipeline *pl, VkShaderStageFlagBits stage, int offset, size_t size, void *src)
Definition: vulkan.c:1740
TYPE_SIZE
#define TYPE_SIZE
Definition: vf_nlmeans_vulkan.c:31
internal.h
NLMeansVulkanContext::pl_weights
FFVulkanPipeline pl_weights
Definition: vf_nlmeans_vulkan.c:47
NLMeansVulkanContext::shd_denoise
FFVkSPIRVShader shd_denoise
Definition: vf_nlmeans_vulkan.c:51
AVFILTER_DEFINE_CLASS
#define AVFILTER_DEFINE_CLASS(fname)
Definition: internal.h:323
FFVkSPIRVCompiler
Definition: vulkan_spirv.h:27
layout
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel layout
Definition: filter_design.txt:18
FILTER_SINGLE_PIXFMT
#define FILTER_SINGLE_PIXFMT(pix_fmt_)
Definition: internal.h:172
uninit
static void uninit(AVBSFContext *ctx)
Definition: pcm_rechunk.c:68
NLMeansVulkanContext::pl_weights_rows
int pl_weights_rows
Definition: vf_nlmeans_vulkan.c:46
planes
static const struct @396 planes[]
ff_vk_exec_start
int ff_vk_exec_start(FFVulkanContext *s, FFVkExecContext *e)
Start/submit/wait an execution.
Definition: vulkan.c:513
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:256
init_filter
static int init_filter(FilteringContext *fctx, AVCodecContext *dec_ctx, AVCodecContext *enc_ctx, const char *filter_spec)
Definition: transcode.c:245
ff_vk_frame_barrier
void ff_vk_frame_barrier(FFVulkanContext *s, FFVkExecContext *e, AVFrame *pic, VkImageMemoryBarrier2 *bar, int *nb_bar, VkPipelineStageFlags src_stage, VkPipelineStageFlags dst_stage, VkAccessFlagBits new_access, VkImageLayout new_layout, uint32_t new_qf)
Definition: vulkan.c:1304
DUP_SAMPLER
#define DUP_SAMPLER(x)
Definition: vulkan.h:73
ff_vk_shader_rep_fmt
const char * ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt)
Returns the format to use for images in shaders.
Definition: vulkan.c:1207
FFMIN
#define FFMIN(a, b)
Definition: macros.h:49
vulkan_spirv.h
NLMeansVulkanContext::shd_weights
FFVkSPIRVShader shd_weights
Definition: vf_nlmeans_vulkan.c:48
FFVulkanContext::props
VkPhysicalDeviceProperties2 props
Definition: vulkan.h:233
AVFilterPad::name
const char * name
Pad name.
Definition: internal.h:39
GLSLF
#define GLSLF(N, S,...)
Definition: vulkan.h:54
FFVkSPIRVCompiler::free_shader
void(* free_shader)(struct FFVkSPIRVCompiler *ctx, void **opaque)
Definition: vulkan_spirv.h:32
NLMeansVulkanContext::nb_offsets
int nb_offsets
Definition: vf_nlmeans_vulkan.c:55
AVFilter
Filter definition.
Definition: avfilter.h:166
insert_horizontal_pass
static void insert_horizontal_pass(FFVkSPIRVShader *shd, int nb_rows, int first, int plane, int comp)
Definition: vf_nlmeans_vulkan.c:88
TYPE_NAME
#define TYPE_NAME
Definition: vf_nlmeans_vulkan.c:29
ff_vk_map_buffer
static int ff_vk_map_buffer(FFVulkanContext *s, FFVkBuffer *buf, uint8_t **mem, int invalidate)
Definition: vulkan.h:410
FFVulkanContext::vkfn
FFVulkanFunctions vkfn
Definition: vulkan.h:231
FFVkExecPool
Definition: vulkan.h:210
pos
unsigned int pos
Definition: spdifenc.c:414
OFFSET
#define OFFSET(x)
FFVkExecContext::buf
VkCommandBuffer buf
Definition: vulkan.h:164
NLMeansVulkanContext::nlmeans_opts::s
double s
Definition: vf_nlmeans_vulkan.c:61
NLMeansVulkanContext::nlmeans_opts::pc
int pc[4]
Definition: vf_nlmeans_vulkan.c:64
random_seed.h
FFVkSPIRVShader
Definition: vulkan.h:75
buffer
the frame and frame reference mechanism is intended to as much as expensive copies of that data while still allowing the filters to produce correct results The data is stored in buffers represented by AVFrame structures Several references can point to the same frame buffer
Definition: filter_design.txt:49
AV_OPT_TYPE_INT
@ AV_OPT_TYPE_INT
Definition: opt.h:235
insert_first
static void insert_first(FFVkSPIRVShader *shd, int r, const char *off, int horiz, int plane, int comp)
Definition: vf_nlmeans_vulkan.c:71
AVFilterContext
An instance of a filter.
Definition: avfilter.h:407
NLMeansVulkanContext::nlmeans_opts::sc
double sc[4]
Definition: vf_nlmeans_vulkan.c:62
desc
const char * desc
Definition: libsvtav1.c:75
GLSLC
#define GLSLC(N, S)
Definition: vulkan.h:44
ff_vk_filter_config_input
int ff_vk_filter_config_input(AVFilterLink *inlink)
Definition: vulkan_filter.c:166
AVMEDIA_TYPE_VIDEO
@ AVMEDIA_TYPE_VIDEO
Definition: avutil.h:201
FFVulkanContext::hwctx
AVVulkanDeviceContext * hwctx
Definition: vulkan.h:253
ff_vk_set_descriptor_buffer
int ff_vk_set_descriptor_buffer(FFVulkanContext *s, FFVulkanPipeline *pl, FFVkExecContext *e, int set, int bind, int offs, VkDeviceAddress addr, VkDeviceSize len, VkFormat fmt)
Definition: vulkan.c:1682
mem.h
AVBufferRef
A reference to a data buffer.
Definition: buffer.h:82
NLMeansVulkanContext::e
FFVkExecPool e
Definition: vf_nlmeans_vulkan.c:37
AVPixFmtDescriptor
Descriptor that unambiguously describes how the bits of a pixel are stored in the up to 4 data planes...
Definition: pixdesc.h:69
AVVulkanDeviceContext::act_dev
VkDevice act_dev
Active device.
Definition: hwcontext_vulkan.h:71
FFALIGN
#define FFALIGN(x, a)
Definition: macros.h:78
FILTER_OUTPUTS
#define FILTER_OUTPUTS(array)
Definition: internal.h:183
ff_vk_init_sampler
int ff_vk_init_sampler(FFVulkanContext *s, VkSampler *sampler, int unnorm_coords, VkFilter filt)
Create a sampler.
Definition: vulkan.c:1163
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:34
src
INIT_CLIP pixel * src
Definition: h264pred_template.c:418
TYPE_ELEMS
#define TYPE_ELEMS
Definition: vf_nlmeans_vulkan.c:30
FFVkBuffer
Definition: vulkan.h:95
ff_vk_exec_pipeline_register
int ff_vk_exec_pipeline_register(FFVulkanContext *s, FFVkExecPool *pool, FFVulkanPipeline *pl)
Register a pipeline with an exec pool.
Definition: vulkan.c:1579
d
d
Definition: ffmpeg_filter.c:424
int32_t
int32_t
Definition: audioconvert.c:56
ff_vk_exec_submit
int ff_vk_exec_submit(FFVulkanContext *s, FFVkExecContext *e)
Definition: vulkan.c:724
av_log
#define av_log(a,...)
Definition: tableprint_vlc.h:27
ff_vk_create_imageviews
int ff_vk_create_imageviews(FFVulkanContext *s, FFVkExecContext *e, VkImageView views[AV_NUM_DATA_POINTERS], AVFrame *f)
Create an imageview and add it as a dependency to an execution.
Definition: vulkan.c:1231
ff_vk_shader_free
void ff_vk_shader_free(FFVulkanContext *s, FFVkSPIRVShader *shd)
Definition: vulkan.c:1406
NLMeansVulkanContext::nlmeans_opts::r
int r
Definition: vf_nlmeans_vulkan.c:60
RET
#define RET(x)
Definition: vulkan.h:67
FFVulkanFunctions
Definition: vulkan_functions.h:226
ff_vk_get_pooled_buffer
int ff_vk_get_pooled_buffer(FFVulkanContext *ctx, AVBufferPool **buf_pool, AVBufferRef **buf, VkBufferUsageFlags usage, void *create_pNext, size_t size, VkMemoryPropertyFlagBits mem_props)
Initialize a pool and create AVBufferRefs containing FFVkBuffer.
Definition: vulkan.c:1091
NLMeansVulkanContext::vkctx
FFVulkanContext vkctx
Definition: vf_nlmeans_vulkan.c:34