FFmpeg
vf_nlmeans_vulkan.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) Lynne
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "libavutil/random_seed.h"
22 #include "libavutil/opt.h"
23 #include "vulkan_filter.h"
24 #include "vulkan_spirv.h"
25 #include "internal.h"
26 #include "video.h"
27 
28 #define TYPE_NAME "vec4"
29 #define TYPE_ELEMS 4
30 #define TYPE_SIZE (TYPE_ELEMS*4)
31 
32 typedef struct NLMeansVulkanContext {
34 
38  VkSampler sampler;
39 
43 
47 
50 
51  int *xoffsets;
52  int *yoffsets;
54  float strength[4];
55  int patch[4];
56 
57  struct nlmeans_opts {
58  int r;
59  double s;
60  double sc[4];
61  int p;
62  int pc[4];
63  int t;
64  } opts;
66 
67 extern const char *ff_source_prefix_sum_comp;
68 
69 static void insert_first(FFVkSPIRVShader *shd, int r, int horiz, int plane, int comp)
70 {
71  GLSLF(2, s1 = texture(input_img[%i], ivec2(x + %i, y + %i))[%i];
72  ,plane, horiz ? r : 0, !horiz ? r : 0, comp);
73 
74  if (TYPE_ELEMS == 4) {
75  GLSLF(2, s2[0] = texture(input_img[%i], ivec2(x + %i + xoffs[0], y + %i + yoffs[0]))[%i];
76  ,plane, horiz ? r : 0, !horiz ? r : 0, comp);
77  GLSLF(2, s2[1] = texture(input_img[%i], ivec2(x + %i + xoffs[1], y + %i + yoffs[1]))[%i];
78  ,plane, horiz ? r : 0, !horiz ? r : 0, comp);
79  GLSLF(2, s2[2] = texture(input_img[%i], ivec2(x + %i + xoffs[2], y + %i + yoffs[2]))[%i];
80  ,plane, horiz ? r : 0, !horiz ? r : 0, comp);
81  GLSLF(2, s2[3] = texture(input_img[%i], ivec2(x + %i + xoffs[3], y + %i + yoffs[3]))[%i];
82  ,plane, horiz ? r : 0, !horiz ? r : 0, comp);
83  } else {
84  for (int i = 0; i < 16; i++) {
85  GLSLF(2, s2[%i][%i] = texture(input_img[%i], ivec2(x + %i + xoffs[%i], y + %i + yoffs[%i]))[%i];
86  ,i / 4, i % 4, plane, horiz ? r : 0, i, !horiz ? r : 0, i, comp);
87  }
88  }
89 
90  GLSLC(2, s2 = (s1 - s2) * (s1 - s2); );
91 }
92 
93 static void insert_horizontal_pass(FFVkSPIRVShader *shd, int nb_rows, int first, int plane, int comp)
94 {
95  GLSLF(1, x = int(gl_GlobalInvocationID.x) * %i; ,nb_rows);
96  if (!first) {
97  GLSLC(1, controlBarrier(gl_ScopeWorkgroup, gl_ScopeWorkgroup,
98  gl_StorageSemanticsBuffer,
99  gl_SemanticsAcquireRelease |
100  gl_SemanticsMakeAvailable |
101  gl_SemanticsMakeVisible); );
102  }
103  GLSLC(1, for (y = 0; y < height[0]; y++) { );
104  GLSLC(2, offset = uint64_t(int_stride)*y*T_ALIGN; );
105  GLSLC(2, dst = DataBuffer(uint64_t(integral_data) + offset); );
106  GLSLC(0, );
107  if (first) {
108  for (int r = 0; r < nb_rows; r++) {
109  insert_first(shd, r, 1, plane, comp);
110  GLSLF(2, dst.v[x + %i] = s2; ,r);
111  GLSLC(0, );
112  }
113  }
114  GLSLC(2, barrier(); );
115  GLSLC(2, prefix_sum(dst, 1, dst, 1); );
116  GLSLC(1, } );
117  GLSLC(0, );
118 }
119 
120 static void insert_vertical_pass(FFVkSPIRVShader *shd, int nb_rows, int first, int plane, int comp)
121 {
122  GLSLF(1, y = int(gl_GlobalInvocationID.x) * %i; ,nb_rows);
123  if (!first) {
124  GLSLC(1, controlBarrier(gl_ScopeWorkgroup, gl_ScopeWorkgroup,
125  gl_StorageSemanticsBuffer,
126  gl_SemanticsAcquireRelease |
127  gl_SemanticsMakeAvailable |
128  gl_SemanticsMakeVisible); );
129  }
130  GLSLC(1, for (x = 0; x < width[0]; x++) { );
131  GLSLC(2, dst = DataBuffer(uint64_t(integral_data) + x*T_ALIGN); );
132 
133  for (int r = 0; r < nb_rows; r++) {
134  if (first) {
135  insert_first(shd, r, 0, plane, comp);
136  GLSLF(2, integral_data.v[(y + %i)*int_stride + x] = s2; ,r);
137  GLSLC(0, );
138  }
139  }
140 
141  GLSLC(2, barrier(); );
142  GLSLC(2, prefix_sum(dst, int_stride, dst, int_stride); );
143  GLSLC(1, } );
144  GLSLC(0, );
145 }
146 
147 static void insert_weights_pass(FFVkSPIRVShader *shd, int nb_rows, int vert,
148  int t, int dst_comp, int plane, int comp)
149 {
150  GLSLF(1, p = patch_size[%i]; ,dst_comp);
151  GLSLC(0, );
152  GLSLC(1, controlBarrier(gl_ScopeWorkgroup, gl_ScopeWorkgroup,
153  gl_StorageSemanticsBuffer,
154  gl_SemanticsAcquireRelease |
155  gl_SemanticsMakeAvailable |
156  gl_SemanticsMakeVisible); );
157  GLSLC(1, barrier(); );
158  if (!vert) {
159  GLSLC(1, for (y = 0; y < height[0]; y++) { );
160  GLSLF(2, if (gl_GlobalInvocationID.x*%i >= width[%i]) ,nb_rows, plane);
161  GLSLC(3, break; );
162  GLSLF(2, for (r = 0; r < %i; r++) { ,nb_rows);
163  GLSLF(3, x = int(gl_GlobalInvocationID.x) * %i + r; ,nb_rows);
164  } else {
165  GLSLC(1, for (x = 0; x < width[0]; x++) { );
166  GLSLF(2, if (gl_GlobalInvocationID.x*%i >= height[%i]) ,nb_rows, plane);
167  GLSLC(3, break; );
168  GLSLF(2, for (r = 0; r < %i; r++) { ,nb_rows);
169  GLSLF(3, y = int(gl_GlobalInvocationID.x) * %i + r; ,nb_rows);
170  }
171  GLSLC(0, );
172  GLSLC(3, a = DTYPE(0); );
173  GLSLC(3, b = DTYPE(0); );
174  GLSLC(3, c = DTYPE(0); );
175  GLSLC(3, d = DTYPE(0); );
176  GLSLC(0, );
177  GLSLC(3, lt = ((x - p) < 0) || ((y - p) < 0); );
178  GLSLC(0, );
179  if (TYPE_ELEMS == 4) {
180  GLSLF(3, src[0] = texture(input_img[%i], ivec2(x + xoffs[0], y + yoffs[0]))[%i]; ,plane, comp);
181  GLSLF(3, src[1] = texture(input_img[%i], ivec2(x + xoffs[1], y + yoffs[1]))[%i]; ,plane, comp);
182  GLSLF(3, src[2] = texture(input_img[%i], ivec2(x + xoffs[2], y + yoffs[2]))[%i]; ,plane, comp);
183  GLSLF(3, src[3] = texture(input_img[%i], ivec2(x + xoffs[3], y + yoffs[3]))[%i]; ,plane, comp);
184  } else {
185  for (int i = 0; i < 16; i++)
186  GLSLF(3, src[%i][%i] = texture(input_img[%i], ivec2(x + xoffs[%i], y + yoffs[%i]))[%i];
187  ,i / 4, i % 4, plane, i, i, comp);
188 
189  }
190  GLSLC(0, );
191  GLSLC(3, if (lt == false) { );
192  GLSLC(4, a = integral_data.v[(y - p)*int_stride + x - p]; );
193  GLSLC(4, c = integral_data.v[(y - p)*int_stride + x + p]; );
194  GLSLC(4, b = integral_data.v[(y + p)*int_stride + x - p]; );
195  GLSLC(4, d = integral_data.v[(y + p)*int_stride + x + p]; );
196  GLSLC(3, } );
197  GLSLC(0, );
198  GLSLC(3, patch_diff = d + a - b - c; );
199  if (TYPE_ELEMS == 4) {
200  GLSLF(3, w = exp(patch_diff * strength[%i]); ,dst_comp);
201  GLSLC(3, w_sum = w[0] + w[1] + w[2] + w[3]; );
202  GLSLC(3, sum = dot(w, src*255); );
203  } else {
204  for (int i = 0; i < 4; i++)
205  GLSLF(3, w[%i] = exp(patch_diff[%i] * strength[%i]); ,i,i,dst_comp);
206  for (int i = 0; i < 4; i++)
207  GLSLF(3, w_sum %s w[%i][0] + w[%i][1] + w[%i][2] + w[%i][3];
208  ,!i ? "=" : "+=", i, i, i, i);
209  for (int i = 0; i < 4; i++)
210  GLSLF(3, sum %s dot(w[%i], src[%i]*255);
211  ,!i ? "=" : "+=", i, i);
212  }
213  GLSLC(0, );
214  if (t > 1) {
215  GLSLF(3, atomicAdd(weights_%i[y*ws_stride[%i] + x], w_sum); ,dst_comp, dst_comp);
216  GLSLF(3, atomicAdd(sums_%i[y*ws_stride[%i] + x], sum); ,dst_comp, dst_comp);
217  } else {
218  GLSLF(3, weights_%i[y*ws_stride[%i] + x] += w_sum; ,dst_comp, dst_comp);
219  GLSLF(3, sums_%i[y*ws_stride[%i] + x] += sum; ,dst_comp, dst_comp);
220  }
221  GLSLC(2, } );
222  GLSLC(1, } );
223 }
224 
225 typedef struct HorizontalPushData {
226  VkDeviceAddress integral_data;
227  VkDeviceAddress state_data;
228  int32_t xoffs[TYPE_ELEMS];
229  int32_t yoffs[TYPE_ELEMS];
230  uint32_t width[4];
231  uint32_t height[4];
232  uint32_t ws_stride[4];
233  int32_t patch_size[4];
234  float strength[4];
235  uint32_t int_stride;
236 } HorizontalPushData;
237 
238 static av_cold int init_weights_pipeline(FFVulkanContext *vkctx, FFVkExecPool *exec,
240  VkSampler sampler, FFVkSPIRVCompiler *spv,
241  int width, int height, int t,
242  const AVPixFmtDescriptor *desc,
243  int planes, int *nb_rows)
244 {
245  int err;
246  uint8_t *spv_data;
247  size_t spv_len;
248  void *spv_opaque = NULL;
250  int max_dim = FFMAX(width, height);
251  uint32_t max_wg = vkctx->props.properties.limits.maxComputeWorkGroupSize[0];
252  int max_shm = vkctx->props.properties.limits.maxComputeSharedMemorySize;
253  int wg_size, wg_rows;
254 
255  /* Round the max workgroup size to the previous power of two */
256  max_wg = 1 << (31 - ff_clz(max_wg));
257  wg_size = max_wg;
258  wg_rows = 1;
259 
260  if (max_wg > max_dim) {
261  wg_size = max_wg / (max_wg / max_dim);
262  } else if (max_wg < max_dim) {
263  /* First, make it fit */
264  while (wg_size*wg_rows < max_dim)
265  wg_rows++;
266 
267  /* Second, make sure there's enough shared memory */
268  while ((wg_size * TYPE_SIZE + TYPE_SIZE + 2*4) > max_shm) {
269  wg_size >>= 1;
270  wg_rows++;
271  }
272  }
273 
274  RET(ff_vk_shader_init(pl, shd, "nlmeans_weights", VK_SHADER_STAGE_COMPUTE_BIT, 0));
275  ff_vk_shader_set_compute_sizes(shd, wg_size, 1, 1);
276  *nb_rows = wg_rows;
277 
278  if (t > 1)
279  GLSLC(0, #extension GL_EXT_shader_atomic_float : require );
280  GLSLC(0, #extension GL_ARB_gpu_shader_int64 : require );
281  GLSLC(0, #pragma use_vulkan_memory_model );
282  GLSLC(0, #extension GL_KHR_memory_scope_semantics : enable );
283  GLSLC(0, );
284  GLSLF(0, #define N_ROWS %i ,*nb_rows);
285  GLSLC(0, #define WG_SIZE (gl_WorkGroupSize.x) );
286  GLSLF(0, #define LG_WG_SIZE %i ,ff_log2(shd->local_size[0]));
287  GLSLC(0, #define PARTITION_SIZE (N_ROWS*WG_SIZE) );
288  GLSLF(0, #define DTYPE %s ,TYPE_NAME);
289  GLSLF(0, #define T_ALIGN %i ,TYPE_SIZE);
290  GLSLC(0, );
291  GLSLC(0, layout(buffer_reference, buffer_reference_align = T_ALIGN) coherent buffer DataBuffer { );
292  GLSLC(1, DTYPE v[]; );
293  GLSLC(0, }; );
294  GLSLC(0, );
295  GLSLC(0, layout(buffer_reference) buffer StateData; );
296  GLSLC(0, );
297  GLSLC(0, layout(push_constant, std430) uniform pushConstants { );
298  GLSLC(1, coherent DataBuffer integral_data; );
299  GLSLC(1, StateData state; );
300  GLSLF(1, uint xoffs[%i]; ,TYPE_ELEMS);
301  GLSLF(1, uint yoffs[%i]; ,TYPE_ELEMS);
302  GLSLC(1, uvec4 width; );
303  GLSLC(1, uvec4 height; );
304  GLSLC(1, uvec4 ws_stride; );
305  GLSLC(1, ivec4 patch_size; );
306  GLSLC(1, vec4 strength; );
307  GLSLC(1, uint int_stride; );
308  GLSLC(0, }; );
309  GLSLC(0, );
310 
311  ff_vk_add_push_constant(pl, 0, sizeof(HorizontalPushData), VK_SHADER_STAGE_COMPUTE_BIT);
312 
313  desc_set = (FFVulkanDescriptorSetBinding []) {
314  {
315  .name = "input_img",
316  .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
317  .dimensions = 2,
318  .elems = planes,
319  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
320  .samplers = DUP_SAMPLER(sampler),
321  },
322  {
323  .name = "weights_buffer_0",
324  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
325  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
326  .buf_content = "float weights_0[];",
327  },
328  {
329  .name = "sums_buffer_0",
330  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
331  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
332  .buf_content = "float sums_0[];",
333  },
334  {
335  .name = "weights_buffer_1",
336  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
337  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
338  .buf_content = "float weights_1[];",
339  },
340  {
341  .name = "sums_buffer_1",
342  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
343  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
344  .buf_content = "float sums_1[];",
345  },
346  {
347  .name = "weights_buffer_2",
348  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
349  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
350  .buf_content = "float weights_2[];",
351  },
352  {
353  .name = "sums_buffer_2",
354  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
355  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
356  .buf_content = "float sums_2[];",
357  },
358  {
359  .name = "weights_buffer_3",
360  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
361  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
362  .buf_content = "float weights_3[];",
363  },
364  {
365  .name = "sums_buffer_3",
366  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
367  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
368  .buf_content = "float sums_3[];",
369  },
370  };
371  RET(ff_vk_pipeline_descriptor_set_add(vkctx, pl, shd, desc_set, 1 + 2*desc->nb_components, 0, 0));
372 
374  GLSLC(0, );
375  GLSLC(0, void main() );
376  GLSLC(0, { );
377  GLSLC(1, uint64_t offset; );
378  GLSLC(1, DataBuffer dst; );
379  GLSLC(1, float s1; );
380  GLSLC(1, DTYPE s2; );
381  GLSLC(1, int r; );
382  GLSLC(1, int x; );
383  GLSLC(1, int y; );
384  GLSLC(1, int p; );
385  GLSLC(0, );
386  GLSLC(1, DTYPE a; );
387  GLSLC(1, DTYPE b; );
388  GLSLC(1, DTYPE c; );
389  GLSLC(1, DTYPE d; );
390  GLSLC(0, );
391  GLSLC(1, DTYPE patch_diff; );
392  if (TYPE_ELEMS == 4) {
393  GLSLC(1, vec4 src; );
394  GLSLC(1, vec4 w; );
395  } else {
396  GLSLC(1, vec4 src[4]; );
397  GLSLC(1, vec4 w[4]; );
398  }
399  GLSLC(1, float w_sum; );
400  GLSLC(1, float sum; );
401  GLSLC(0, );
402  GLSLC(1, bool lt; );
403  GLSLC(1, bool gt; );
404  GLSLC(0, );
405 
406  for (int i = 0; i < desc->nb_components; i++) {
407  int off = desc->comp[i].offset / (FFALIGN(desc->comp[i].depth, 8)/8);
408  if (width > height) {
409  insert_horizontal_pass(shd, *nb_rows, 1, desc->comp[i].plane, off);
410  insert_vertical_pass(shd, *nb_rows, 0, desc->comp[i].plane, off);
411  insert_weights_pass(shd, *nb_rows, 0, t, i, desc->comp[i].plane, off);
412  } else {
413  insert_vertical_pass(shd, *nb_rows, 1, desc->comp[i].plane, off);
414  insert_horizontal_pass(shd, *nb_rows, 0, desc->comp[i].plane, off);
415  insert_weights_pass(shd, *nb_rows, 1, t, i, desc->comp[i].plane, off);
416  }
417  }
418 
419  GLSLC(0, } );
420 
421  RET(spv->compile_shader(spv, vkctx, shd, &spv_data, &spv_len, "main", &spv_opaque));
422  RET(ff_vk_shader_create(vkctx, shd, spv_data, spv_len, "main"));
423 
424  RET(ff_vk_init_compute_pipeline(vkctx, pl, shd));
425  RET(ff_vk_exec_pipeline_register(vkctx, exec, pl));
426 
427 fail:
428  if (spv_opaque)
429  spv->free_shader(spv, &spv_opaque);
430 
431  return err;
432 }
433 
434 typedef struct DenoisePushData {
435  uint32_t ws_stride[4];
436 } DenoisePushData;
437 
438 static av_cold int init_denoise_pipeline(FFVulkanContext *vkctx, FFVkExecPool *exec,
440  VkSampler sampler, FFVkSPIRVCompiler *spv,
441  const AVPixFmtDescriptor *desc, int planes)
442 {
443  int err;
444  uint8_t *spv_data;
445  size_t spv_len;
446  void *spv_opaque = NULL;
448 
449  RET(ff_vk_shader_init(pl, shd, "nlmeans_denoise",
450  VK_SHADER_STAGE_COMPUTE_BIT, 0));
451 
452  ff_vk_shader_set_compute_sizes(shd, 32, 32, 1);
453 
454  GLSLC(0, layout(push_constant, std430) uniform pushConstants { );
455  GLSLC(1, uvec4 ws_stride; );
456  GLSLC(0, }; );
457 
458  ff_vk_add_push_constant(pl, 0, sizeof(DenoisePushData), VK_SHADER_STAGE_COMPUTE_BIT);
459 
460  desc_set = (FFVulkanDescriptorSetBinding []) {
461  {
462  .name = "input_img",
463  .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
464  .dimensions = 2,
465  .elems = planes,
466  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
467  .samplers = DUP_SAMPLER(sampler),
468  },
469  {
470  .name = "output_img",
471  .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
472  .mem_layout = ff_vk_shader_rep_fmt(vkctx->output_format),
473  .mem_quali = "writeonly",
474  .dimensions = 2,
475  .elems = planes,
476  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
477  },
478  {
479  .name = "weights_buffer_0",
480  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
481  .mem_quali = "readonly",
482  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
483  .buf_content = "float weights_0[];",
484  },
485  {
486  .name = "sums_buffer_0",
487  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
488  .mem_quali = "readonly",
489  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
490  .buf_content = "float sums_0[];",
491  },
492  {
493  .name = "weights_buffer_1",
494  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
495  .mem_quali = "readonly",
496  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
497  .buf_content = "float weights_1[];",
498  },
499  {
500  .name = "sums_buffer_1",
501  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
502  .mem_quali = "readonly",
503  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
504  .buf_content = "float sums_1[];",
505  },
506  {
507  .name = "weights_buffer_2",
508  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
509  .mem_quali = "readonly",
510  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
511  .buf_content = "float weights_2[];",
512  },
513  {
514  .name = "sums_buffer_2",
515  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
516  .mem_quali = "readonly",
517  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
518  .buf_content = "float sums_2[];",
519  },
520  {
521  .name = "weights_buffer_3",
522  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
523  .mem_quali = "readonly",
524  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
525  .buf_content = "float weights_3[];",
526  },
527  {
528  .name = "sums_buffer_3",
529  .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
530  .mem_quali = "readonly",
531  .stages = VK_SHADER_STAGE_COMPUTE_BIT,
532  .buf_content = "float sums_3[];",
533  },
534  };
535  RET(ff_vk_pipeline_descriptor_set_add(vkctx, pl, shd, desc_set, 2 + 2*desc->nb_components, 0, 0));
536 
537  GLSLC(0, void main() );
538  GLSLC(0, { );
539  GLSLC(1, ivec2 size; );
540  GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
541  GLSLC(0, );
542  GLSLC(1, float w_sum; );
543  GLSLC(1, float sum; );
544  GLSLC(1, vec4 src; );
545  GLSLC(1, vec4 r; );
546  GLSLC(0, );
547 
548  for (int i = 0; i < planes; i++) {
549  GLSLF(1, src = texture(input_img[%i], pos); ,i);
550  for (int c = 0; c < desc->nb_components; c++) {
551  if (desc->comp[c].plane == i) {
552  int off = desc->comp[c].offset / (FFALIGN(desc->comp[c].depth, 8)/8);
553  GLSLF(1, w_sum = weights_%i[pos.y*ws_stride[%i] + pos.x]; ,c, c);
554  GLSLF(1, sum = sums_%i[pos.y*ws_stride[%i] + pos.x]; ,c, c);
555  GLSLF(1, r[%i] = (sum + src[%i]*255) / (1.0 + w_sum) / 255; ,off, off);
556  GLSLC(0, );
557  }
558  }
559  GLSLF(1, imageStore(output_img[%i], pos, r); ,i);
560  GLSLC(0, );
561  }
562 
563  GLSLC(0, } );
564 
565  RET(spv->compile_shader(spv, vkctx, shd, &spv_data, &spv_len, "main", &spv_opaque));
566  RET(ff_vk_shader_create(vkctx, shd, spv_data, spv_len, "main"));
567 
568  RET(ff_vk_init_compute_pipeline(vkctx, pl, shd));
569  RET(ff_vk_exec_pipeline_register(vkctx, exec, pl));
570 
571 fail:
572  if (spv_opaque)
573  spv->free_shader(spv, &spv_opaque);
574 
575  return err;
576 }
577 
579 {
580  int rad, err;
581  int xcnt = 0, ycnt = 0;
582  NLMeansVulkanContext *s = ctx->priv;
583  FFVulkanContext *vkctx = &s->vkctx;
584  const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
585  FFVkSPIRVCompiler *spv;
586 
587  const AVPixFmtDescriptor *desc;
589  if (!desc)
590  return AVERROR(EINVAL);
591 
592  if (!(s->opts.r & 1)) {
593  s->opts.r |= 1;
594  av_log(ctx, AV_LOG_WARNING, "Research size should be odd, setting to %i",
595  s->opts.r);
596  }
597 
598  if (!(s->opts.p & 1)) {
599  s->opts.p |= 1;
600  av_log(ctx, AV_LOG_WARNING, "Patch size should be odd, setting to %i",
601  s->opts.p);
602  }
603 
604  for (int i = 0; i < 4; i++) {
605  double str = (s->opts.sc[i] > 1.0) ? s->opts.sc[i] : s->opts.s;
606  int ps = (s->opts.pc[i] ? s->opts.pc[i] : s->opts.p);
607  str = 10.0f*str;
608  str *= -str;
609  str = 255.0*255.0 / str;
610  s->strength[i] = str;
611  if (!(ps & 1)) {
612  ps |= 1;
613  av_log(ctx, AV_LOG_WARNING, "Patch size should be odd, setting to %i",
614  ps);
615  }
616  s->patch[i] = ps / 2;
617  }
618 
619  rad = s->opts.r/2;
620  s->nb_offsets = (2*rad + 1)*(2*rad + 1) - 1;
621  s->xoffsets = av_malloc(s->nb_offsets*sizeof(*s->xoffsets));
622  s->yoffsets = av_malloc(s->nb_offsets*sizeof(*s->yoffsets));
623  s->nb_offsets = 0;
624 
625  for (int x = -rad; x <= rad; x++) {
626  for (int y = -rad; y <= rad; y++) {
627  if (!x && !y)
628  continue;
629 
630  s->xoffsets[xcnt++] = x;
631  s->yoffsets[ycnt++] = y;
632  s->nb_offsets++;
633  }
634  }
635 
636  s->opts.t = FFMIN(s->opts.t, (FFALIGN(s->nb_offsets, TYPE_ELEMS) / TYPE_ELEMS));
637  if (!vkctx->atomic_float_feats.shaderBufferFloat32AtomicAdd) {
638  av_log(ctx, AV_LOG_WARNING, "Device doesn't support atomic float adds, "
639  "disabling dispatch parallelism\n");
640  s->opts.t = 1;
641  }
642 
643  if (!vkctx->feats_12.vulkanMemoryModel) {
644  av_log(ctx, AV_LOG_ERROR, "Device doesn't support the Vulkan memory model!");
645  return AVERROR(EINVAL);;
646  }
647 
648  spv = ff_vk_spirv_init();
649  if (!spv) {
650  av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n");
651  return AVERROR_EXTERNAL;
652  }
653 
654  ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT);
655  RET(ff_vk_exec_pool_init(vkctx, &s->qf, &s->e, 1, 0, 0, 0, NULL));
656  RET(ff_vk_init_sampler(vkctx, &s->sampler, 1, VK_FILTER_NEAREST));
657 
658  RET(init_weights_pipeline(vkctx, &s->e, &s->pl_weights, &s->shd_weights, s->sampler,
659  spv, s->vkctx.output_width, s->vkctx.output_height,
660  s->opts.t, desc, planes, &s->pl_weights_rows));
661 
662  RET(init_denoise_pipeline(vkctx, &s->e, &s->pl_denoise, &s->shd_denoise, s->sampler,
663  spv, desc, planes));
664 
665  av_log(ctx, AV_LOG_VERBOSE, "Filter initialized, %i x/y offsets, %i dispatches, %i parallel\n",
666  s->nb_offsets, (FFALIGN(s->nb_offsets, TYPE_ELEMS) / TYPE_ELEMS) + 1, s->opts.t);
667 
668  s->initialized = 1;
669 
670 fail:
671  if (spv)
672  spv->uninit(&spv);
673 
674  return err;
675 }
676 
677 static int denoise_pass(NLMeansVulkanContext *s, FFVkExecContext *exec,
678  FFVkBuffer *ws_vk, uint32_t ws_stride[4])
679 {
680  FFVulkanContext *vkctx = &s->vkctx;
681  FFVulkanFunctions *vk = &vkctx->vkfn;
682  VkBufferMemoryBarrier2 buf_bar[8];
683  int nb_buf_bar = 0;
684 
685  /* Denoise pass pipeline */
686  ff_vk_exec_bind_pipeline(vkctx, exec, &s->pl_denoise);
687 
688  /* Push data */
689  ff_vk_update_push_exec(vkctx, exec, &s->pl_denoise, VK_SHADER_STAGE_COMPUTE_BIT,
690  0, sizeof(DenoisePushData), &(DenoisePushData) {
691  { ws_stride[0], ws_stride[1], ws_stride[2], ws_stride[3] },
692  });
693 
694  buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
695  .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
696  .srcStageMask = ws_vk->stage,
697  .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
698  .srcAccessMask = ws_vk->access,
699  .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT,
700  .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
701  .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
702  .buffer = ws_vk->buf,
703  .size = ws_vk->size,
704  .offset = 0,
705  };
706 
707  vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
708  .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
709  .pBufferMemoryBarriers = buf_bar,
710  .bufferMemoryBarrierCount = nb_buf_bar,
711  });
712  ws_vk->stage = buf_bar[0].dstStageMask;
713  ws_vk->access = buf_bar[0].dstAccessMask;
714 
715  /* End of denoise pass */
716  vk->CmdDispatch(exec->buf,
717  FFALIGN(vkctx->output_width, s->pl_denoise.wg_size[0])/s->pl_denoise.wg_size[0],
718  FFALIGN(vkctx->output_height, s->pl_denoise.wg_size[1])/s->pl_denoise.wg_size[1],
719  1);
720 
721  return 0;
722 }
723 
724 static int nlmeans_vulkan_filter_frame(AVFilterLink *link, AVFrame *in)
725 {
726  int err;
727  AVFrame *out = NULL;
728  AVFilterContext *ctx = link->dst;
729  NLMeansVulkanContext *s = ctx->priv;
730  AVFilterLink *outlink = ctx->outputs[0];
731  FFVulkanContext *vkctx = &s->vkctx;
732  FFVulkanFunctions *vk = &vkctx->vkfn;
733 
734  const AVPixFmtDescriptor *desc;
735  int plane_widths[4];
736  int plane_heights[4];
737 
738  /* Integral */
739  AVBufferRef *state_buf;
740  FFVkBuffer *state_vk;
741  AVBufferRef *integral_buf;
742  FFVkBuffer *integral_vk;
743  uint32_t int_stride;
744  size_t int_size;
745  size_t state_size;
746  int t_offset = 0;
747 
748  /* Weights/sums */
749  AVBufferRef *ws_buf;
750  FFVkBuffer *ws_vk;
751  VkDeviceAddress weights_addr[4];
752  VkDeviceAddress sums_addr[4];
753  uint32_t ws_stride[4];
754  size_t ws_size[4];
755  size_t ws_total_size = 0;
756 
757  FFVkExecContext *exec;
758  VkImageView in_views[AV_NUM_DATA_POINTERS];
759  VkImageView out_views[AV_NUM_DATA_POINTERS];
760  VkImageMemoryBarrier2 img_bar[8];
761  int nb_img_bar = 0;
762  VkBufferMemoryBarrier2 buf_bar[8];
763  int nb_buf_bar = 0;
764 
765  if (!s->initialized)
766  RET(init_filter(ctx));
767 
769  if (!desc)
770  return AVERROR(EINVAL);
771 
772  /* Integral image */
773  int_stride = s->pl_weights.wg_size[0]*s->pl_weights_rows;
774  int_size = int_stride * int_stride * TYPE_SIZE;
775  state_size = int_stride * 3 *TYPE_SIZE;
776 
777  /* Plane dimensions */
778  for (int i = 0; i < desc->nb_components; i++) {
779  plane_widths[i] = !i || (i == 3) ? vkctx->output_width : AV_CEIL_RSHIFT(vkctx->output_width, desc->log2_chroma_w);
780  plane_heights[i] = !i || (i == 3) ? vkctx->output_height : AV_CEIL_RSHIFT(vkctx->output_height, desc->log2_chroma_w);
781  plane_widths[i] = FFALIGN(plane_widths[i], s->pl_denoise.wg_size[0]);
782  plane_heights[i] = FFALIGN(plane_heights[i], s->pl_denoise.wg_size[1]);
783 
784  ws_stride[i] = plane_widths[i];
785  ws_size[i] = ws_stride[i] * plane_heights[i] * sizeof(float);
786  ws_total_size += ws_size[i];
787  }
788 
789  /* Buffers */
790  err = ff_vk_get_pooled_buffer(&s->vkctx, &s->integral_buf_pool, &integral_buf,
791  VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
792  VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
793  NULL,
794  s->opts.t * int_size,
795  VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
796  if (err < 0)
797  return err;
798  integral_vk = (FFVkBuffer *)integral_buf->data;
799 
800  err = ff_vk_get_pooled_buffer(&s->vkctx, &s->state_buf_pool, &state_buf,
801  VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
802  VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
803  NULL,
804  s->opts.t * state_size,
805  VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
806  if (err < 0)
807  return err;
808  state_vk = (FFVkBuffer *)state_buf->data;
809 
810  err = ff_vk_get_pooled_buffer(&s->vkctx, &s->ws_buf_pool, &ws_buf,
811  VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
812  VK_BUFFER_USAGE_TRANSFER_DST_BIT |
813  VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
814  NULL,
815  ws_total_size * 2,
816  VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
817  if (err < 0)
818  return err;
819  ws_vk = (FFVkBuffer *)ws_buf->data;
820 
821  weights_addr[0] = ws_vk->address;
822  sums_addr[0] = ws_vk->address + ws_total_size;
823  for (int i = 1; i < desc->nb_components; i++) {
824  weights_addr[i] = weights_addr[i - 1] + ws_size[i - 1];
825  sums_addr[i] = sums_addr[i - 1] + ws_size[i - 1];
826  }
827 
828  /* Output frame */
829  out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
830  if (!out) {
831  err = AVERROR(ENOMEM);
832  goto fail;
833  }
834 
835  /* Execution context */
836  exec = ff_vk_exec_get(&s->e);
837  ff_vk_exec_start(vkctx, exec);
838 
839  /* Dependencies */
840  RET(ff_vk_exec_add_dep_frame(vkctx, exec, in,
841  VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
842  VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
843  RET(ff_vk_exec_add_dep_frame(vkctx, exec, out,
844  VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
845  VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
846  RET(ff_vk_exec_add_dep_buf(vkctx, exec, &integral_buf, 1, 0));
847  RET(ff_vk_exec_add_dep_buf(vkctx, exec, &state_buf, 1, 0));
848  RET(ff_vk_exec_add_dep_buf(vkctx, exec, &ws_buf, 1, 0));
849 
850  /* Input frame prep */
851  RET(ff_vk_create_imageviews(vkctx, exec, in_views, in));
852  ff_vk_update_descriptor_img_array(vkctx, &s->pl_weights, exec, in, in_views, 0, 0,
853  VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
854  s->sampler);
855  ff_vk_frame_barrier(vkctx, exec, in, img_bar, &nb_img_bar,
856  VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
857  VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
858  VK_ACCESS_SHADER_READ_BIT,
859  VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
860  VK_QUEUE_FAMILY_IGNORED);
861 
862  /* Output frame prep */
863  RET(ff_vk_create_imageviews(vkctx, exec, out_views, out));
864  ff_vk_frame_barrier(vkctx, exec, out, img_bar, &nb_img_bar,
865  VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
866  VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
867  VK_ACCESS_SHADER_WRITE_BIT,
868  VK_IMAGE_LAYOUT_GENERAL,
869  VK_QUEUE_FAMILY_IGNORED);
870 
871  buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
872  .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
873  .srcStageMask = ws_vk->stage,
874  .dstStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT,
875  .srcAccessMask = ws_vk->access,
876  .dstAccessMask = VK_ACCESS_2_TRANSFER_WRITE_BIT,
877  .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
878  .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
879  .buffer = ws_vk->buf,
880  .size = ws_vk->size,
881  .offset = 0,
882  };
883 
884  vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
885  .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
886  .pImageMemoryBarriers = img_bar,
887  .imageMemoryBarrierCount = nb_img_bar,
888  .pBufferMemoryBarriers = buf_bar,
889  .bufferMemoryBarrierCount = nb_buf_bar,
890  });
891  ws_vk->stage = buf_bar[0].dstStageMask;
892  ws_vk->access = buf_bar[0].dstAccessMask;
893 
894  /* Weights/sums buffer zeroing */
895  vk->CmdFillBuffer(exec->buf, ws_vk->buf, 0, ws_vk->size, 0x0);
896 
897  buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
898  .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
899  .srcStageMask = ws_vk->stage,
900  .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
901  .srcAccessMask = ws_vk->access,
902  .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT |
903  VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
904  .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
905  .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
906  .buffer = ws_vk->buf,
907  .size = ws_vk->size,
908  .offset = 0,
909  };
910 
911  vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
912  .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
913  .pBufferMemoryBarriers = buf_bar,
914  .bufferMemoryBarrierCount = nb_buf_bar,
915  });
916  ws_vk->stage = buf_bar[0].dstStageMask;
917  ws_vk->access = buf_bar[0].dstAccessMask;
918 
919  /* Update weights descriptors */
920  ff_vk_update_descriptor_img_array(vkctx, &s->pl_weights, exec, in, in_views, 0, 0,
921  VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
922  s->sampler);
923  for (int i = 0; i < desc->nb_components; i++) {
924  RET(ff_vk_set_descriptor_buffer(&s->vkctx, &s->pl_weights, exec, 0, 1 + i*2 + 0, 0,
925  weights_addr[i], ws_size[i],
926  VK_FORMAT_UNDEFINED));
927  RET(ff_vk_set_descriptor_buffer(&s->vkctx, &s->pl_weights, exec, 0, 1 + i*2 + 1, 0,
928  sums_addr[i], ws_size[i],
929  VK_FORMAT_UNDEFINED));
930  }
931 
932  /* Update denoise descriptors */
933  ff_vk_update_descriptor_img_array(vkctx, &s->pl_denoise, exec, in, in_views, 0, 0,
934  VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
935  s->sampler);
936  ff_vk_update_descriptor_img_array(vkctx, &s->pl_denoise, exec, out, out_views, 0, 1,
937  VK_IMAGE_LAYOUT_GENERAL, s->sampler);
938  for (int i = 0; i < desc->nb_components; i++) {
939  RET(ff_vk_set_descriptor_buffer(&s->vkctx, &s->pl_denoise, exec, 0, 2 + i*2 + 0, 0,
940  weights_addr[i], ws_size[i],
941  VK_FORMAT_UNDEFINED));
942  RET(ff_vk_set_descriptor_buffer(&s->vkctx, &s->pl_denoise, exec, 0, 2 + i*2 + 1, 0,
943  sums_addr[i], ws_size[i],
944  VK_FORMAT_UNDEFINED));
945  }
946 
947  /* Weights pipeline */
948  ff_vk_exec_bind_pipeline(vkctx, exec, &s->pl_weights);
949 
950  for (int i = 0; i < s->nb_offsets; i += TYPE_ELEMS) {
951  int *xoffs = s->xoffsets + i;
952  int *yoffs = s->yoffsets + i;
953  HorizontalPushData pd = {
954  integral_vk->address + t_offset*int_size,
955  state_vk->address + t_offset*state_size,
956  { 0 },
957  { 0 },
958  { plane_widths[0], plane_widths[1], plane_widths[2], plane_widths[3] },
959  { plane_heights[0], plane_heights[1], plane_heights[2], plane_heights[3] },
960  { ws_stride[0], ws_stride[1], ws_stride[2], ws_stride[3] },
961  { s->patch[0], s->patch[1], s->patch[2], s->patch[3] },
962  { s->strength[0], s->strength[1], s->strength[2], s->strength[2], },
963  int_stride,
964  };
965 
966  memcpy(pd.xoffs, xoffs, sizeof(pd.xoffs));
967  memcpy(pd.yoffs, yoffs, sizeof(pd.yoffs));
968 
969  /* Put a barrier once we run out of parallelism buffers */
970  if (!t_offset) {
971  nb_buf_bar = 0;
972  /* Buffer prep/sync */
973  buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
974  .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
975  .srcStageMask = integral_vk->stage,
976  .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
977  .srcAccessMask = integral_vk->access,
978  .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT |
979  VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
980  .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
981  .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
982  .buffer = integral_vk->buf,
983  .size = integral_vk->size,
984  .offset = 0,
985  };
986  buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
987  .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
988  .srcStageMask = state_vk->stage,
989  .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
990  .srcAccessMask = state_vk->access,
991  .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT |
992  VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
993  .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
994  .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
995  .buffer = state_vk->buf,
996  .size = state_vk->size,
997  .offset = 0,
998  };
999 
1000  vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
1001  .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
1002  .pBufferMemoryBarriers = buf_bar,
1003  .bufferMemoryBarrierCount = nb_buf_bar,
1004  });
1005  integral_vk->stage = buf_bar[0].dstStageMask;
1006  integral_vk->access = buf_bar[0].dstAccessMask;
1007  state_vk->stage = buf_bar[1].dstStageMask;
1008  state_vk->access = buf_bar[1].dstAccessMask;
1009  }
1010  t_offset = (t_offset + 1) % s->opts.t;
1011 
1012  /* Push data */
1013  ff_vk_update_push_exec(vkctx, exec, &s->pl_weights, VK_SHADER_STAGE_COMPUTE_BIT,
1014  0, sizeof(pd), &pd);
1015 
1016  /* End of horizontal pass */
1017  vk->CmdDispatch(exec->buf, 1, 1, 1);
1018  }
1019 
1020  RET(denoise_pass(s, exec, ws_vk, ws_stride));
1021 
1022  err = ff_vk_exec_submit(vkctx, exec);
1023  if (err < 0)
1024  return err;
1025 
1026  err = av_frame_copy_props(out, in);
1027  if (err < 0)
1028  goto fail;
1029 
1030  av_frame_free(&in);
1031 
1032  return ff_filter_frame(outlink, out);
1033 
1034 fail:
1035  av_frame_free(&in);
1036  av_frame_free(&out);
1037  return err;
1038 }
1039 
1040 static void nlmeans_vulkan_uninit(AVFilterContext *avctx)
1041 {
1042  NLMeansVulkanContext *s = avctx->priv;
1043  FFVulkanContext *vkctx = &s->vkctx;
1044  FFVulkanFunctions *vk = &vkctx->vkfn;
1045 
1046  ff_vk_exec_pool_free(vkctx, &s->e);
1047  ff_vk_pipeline_free(vkctx, &s->pl_weights);
1048  ff_vk_shader_free(vkctx, &s->shd_weights);
1049  ff_vk_pipeline_free(vkctx, &s->pl_denoise);
1050  ff_vk_shader_free(vkctx, &s->shd_denoise);
1051 
1052  av_buffer_pool_uninit(&s->integral_buf_pool);
1053  av_buffer_pool_uninit(&s->state_buf_pool);
1054  av_buffer_pool_uninit(&s->ws_buf_pool);
1055 
1056  if (s->sampler)
1057  vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler,
1058  vkctx->hwctx->alloc);
1059 
1060  ff_vk_uninit(&s->vkctx);
1061 
1062  av_freep(&s->xoffsets);
1063  av_freep(&s->yoffsets);
1064 
1065  s->initialized = 0;
1066 }
1067 
1068 #define OFFSET(x) offsetof(NLMeansVulkanContext, x)
1069 #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
1070 static const AVOption nlmeans_vulkan_options[] = {
1071  { "s", "denoising strength for all components", OFFSET(opts.s), AV_OPT_TYPE_DOUBLE, { .dbl = 1.0 }, 1.0, 100.0, FLAGS },
1072  { "p", "patch size for all components", OFFSET(opts.p), AV_OPT_TYPE_INT, { .i64 = 3*2+1 }, 0, 99, FLAGS },
1073  { "r", "research window radius", OFFSET(opts.r), AV_OPT_TYPE_INT, { .i64 = 7*2+1 }, 0, 99, FLAGS },
1074  { "t", "parallelism", OFFSET(opts.t), AV_OPT_TYPE_INT, { .i64 = 36 }, 1, 168, FLAGS },
1075 
1076  { "s1", "denoising strength for component 1", OFFSET(opts.sc[0]), AV_OPT_TYPE_DOUBLE, { .dbl = 1.0 }, 1.0, 100.0, FLAGS },
1077  { "s2", "denoising strength for component 2", OFFSET(opts.sc[1]), AV_OPT_TYPE_DOUBLE, { .dbl = 1.0 }, 1.0, 100.0, FLAGS },
1078  { "s3", "denoising strength for component 3", OFFSET(opts.sc[2]), AV_OPT_TYPE_DOUBLE, { .dbl = 1.0 }, 1.0, 100.0, FLAGS },
1079  { "s4", "denoising strength for component 4", OFFSET(opts.sc[3]), AV_OPT_TYPE_DOUBLE, { .dbl = 1.0 }, 1.0, 100.0, FLAGS },
1080 
1081  { "p1", "patch size for component 1", OFFSET(opts.pc[0]), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 99, FLAGS },
1082  { "p2", "patch size for component 2", OFFSET(opts.pc[1]), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 99, FLAGS },
1083  { "p3", "patch size for component 3", OFFSET(opts.pc[2]), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 99, FLAGS },
1084  { "p4", "patch size for component 4", OFFSET(opts.pc[3]), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 99, FLAGS },
1085 
1086  { NULL }
1087 };
1088 
1089 AVFILTER_DEFINE_CLASS(nlmeans_vulkan);
1090 
1091 static const AVFilterPad nlmeans_vulkan_inputs[] = {
1092  {
1093  .name = "default",
1094  .type = AVMEDIA_TYPE_VIDEO,
1095  .filter_frame = &nlmeans_vulkan_filter_frame,
1096  .config_props = &ff_vk_filter_config_input,
1097  },
1098 };
1099 
1100 static const AVFilterPad nlmeans_vulkan_outputs[] = {
1101  {
1102  .name = "default",
1103  .type = AVMEDIA_TYPE_VIDEO,
1104  .config_props = &ff_vk_filter_config_output,
1105  },
1106 };
1107 
1109  .name = "nlmeans_vulkan",
1110  .description = NULL_IF_CONFIG_SMALL("Non-local means denoiser (Vulkan)"),
1111  .priv_size = sizeof(NLMeansVulkanContext),
1112  .init = &ff_vk_filter_init,
1113  .uninit = &nlmeans_vulkan_uninit,
1114  FILTER_INPUTS(nlmeans_vulkan_inputs),
1115  FILTER_OUTPUTS(nlmeans_vulkan_outputs),
1117  .priv_class = &nlmeans_vulkan_class,
1118  .flags = AVFILTER_FLAG_HWDEVICE,
1119  .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
1120 };
ff_get_video_buffer
AVFrame * ff_get_video_buffer(AVFilterLink *link, int w, int h)
Request a picture buffer with a specific set of permissions.
Definition: video.c:108
NLMeansVulkanContext::nlmeans_opts::p
int p
Definition: vf_nlmeans_vulkan.c:61
AV_LOG_WARNING
#define AV_LOG_WARNING
Something somehow does not look correct.
Definition: log.h:186
ff_vk_exec_get
FFVkExecContext * ff_vk_exec_get(FFVkExecPool *pool)
Retrieve an execution pool.
Definition: vulkan.c:494
ff_vk_update_descriptor_img_array
void ff_vk_update_descriptor_img_array(FFVulkanContext *s, FFVulkanPipeline *pl, FFVkExecContext *e, AVFrame *f, VkImageView *views, int set, int binding, VkImageLayout layout, VkSampler sampler)
Definition: vulkan.c:1746
ff_vk_pipeline_free
void ff_vk_pipeline_free(FFVulkanContext *s, FFVulkanPipeline *pl)
Definition: vulkan.c:1863
FFVulkanContext::output_height
int output_height
Definition: vulkan.h:265
r
const char * r
Definition: vf_curves.c:126
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
opt.h
NLMeansVulkanContext::opts
struct NLMeansVulkanContext::nlmeans_opts opts
NLMeansVulkanContext::sampler
VkSampler sampler
Definition: vf_nlmeans_vulkan.c:38
out
FILE * out
Definition: movenc.c:54
FF_FILTER_FLAG_HWFRAME_AWARE
#define FF_FILTER_FLAG_HWFRAME_AWARE
The filter is aware of hardware frames, and any hardware frame context should not be automatically pr...
Definition: internal.h:364
comp
static void comp(unsigned char *dst, ptrdiff_t dst_stride, unsigned char *src, ptrdiff_t src_stride, int add)
Definition: eamad.c:80
AVBufferPool
The buffer pool.
Definition: buffer_internal.h:88
ff_filter_frame
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:978
av_pix_fmt_desc_get
const AVPixFmtDescriptor * av_pix_fmt_desc_get(enum AVPixelFormat pix_fmt)
Definition: pixdesc.c:2936
AVBufferRef::data
uint8_t * data
The data buffer.
Definition: buffer.h:90
FFVkBuffer::access
VkAccessFlags2 access
Definition: vulkan.h:105
ff_vk_qf_init
int ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf, VkQueueFlagBits dev_family)
Chooses a QF and loads it into a context.
Definition: vulkan.c:225
FFVkBuffer::stage
VkPipelineStageFlags2 stage
Definition: vulkan.h:104
ff_clz
#define ff_clz
Definition: intmath.h:143
av_frame_free
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
Definition: frame.c:100
NLMeansVulkanContext::integral_buf_pool
AVBufferPool * integral_buf_pool
Definition: vf_nlmeans_vulkan.c:40
AVFrame
This structure describes decoded (raw) audio or video data.
Definition: frame.h:340
ff_vk_filter_init
int ff_vk_filter_init(AVFilterContext *avctx)
General lavfi IO functions.
Definition: vulkan_filter.c:214
w
uint8_t w
Definition: llviddspenc.c:38
NLMeansVulkanContext::xoffsets
int * xoffsets
Definition: vf_nlmeans_vulkan.c:51
ff_vk_shader_create
int ff_vk_shader_create(FFVulkanContext *s, FFVkSPIRVShader *shd, uint8_t *spirv, size_t spirv_size, const char *entrypoint)
Definition: vulkan.c:1408
AVOption
AVOption.
Definition: opt.h:251
b
#define b
Definition: input.c:41
AV_LOG_VERBOSE
#define AV_LOG_VERBOSE
Detailed information.
Definition: log.h:196
FFVkBuffer::address
VkDeviceAddress address
Definition: vulkan.h:101
NLMeansVulkanContext::strength
float strength[4]
Definition: vf_nlmeans_vulkan.c:54
ff_vk_uninit
void ff_vk_uninit(FFVulkanContext *s)
Frees main context.
Definition: vulkan.c:1892
FFMAX
#define FFMAX(a, b)
Definition: macros.h:47
FFVkSPIRVCompiler::uninit
void(* uninit)(struct FFVkSPIRVCompiler **ctx)
Definition: vulkan_spirv.h:33
AVFilter::name
const char * name
Filter name.
Definition: avfilter.h:170
NLMeansVulkanContext::initialized
int initialized
Definition: vf_nlmeans_vulkan.c:35
ff_vk_pipeline_descriptor_set_add
int ff_vk_pipeline_descriptor_set_add(FFVulkanContext *s, FFVulkanPipeline *pl, FFVkSPIRVShader *shd, FFVulkanDescriptorSetBinding *desc, int nb, int read_only, int print_to_shader_only)
Add descriptor to a pipeline.
Definition: vulkan.c:1458
ff_vk_shader_set_compute_sizes
void ff_vk_shader_set_compute_sizes(FFVkSPIRVShader *shd, int x, int y, int z)
Definition: vulkan.c:1366
video.h
AV_PIX_FMT_VULKAN
@ AV_PIX_FMT_VULKAN
Vulkan hardware images.
Definition: pixfmt.h:376
ff_vk_exec_add_dep_frame
int ff_vk_exec_add_dep_frame(FFVulkanContext *s, FFVkExecContext *e, AVFrame *f, VkPipelineStageFlagBits2 wait_stage, VkPipelineStageFlagBits2 signal_stage)
Definition: vulkan.c:596
FFVkBuffer::buf
VkBuffer buf
Definition: vulkan.h:97
av_malloc
#define av_malloc(s)
Definition: tableprint_vlc.h:30
NLMeansVulkanContext::yoffsets
int * yoffsets
Definition: vf_nlmeans_vulkan.c:52
av_pix_fmt_count_planes
int av_pix_fmt_count_planes(enum AVPixelFormat pix_fmt)
Definition: pixdesc.c:2976
AVVulkanDeviceContext::alloc
const VkAllocationCallbacks * alloc
Custom memory allocator, else NULL.
Definition: hwcontext_vulkan.h:48
ff_vk_add_push_constant
int ff_vk_add_push_constant(FFVulkanPipeline *pl, int offset, int size, VkShaderStageFlagBits stage)
Add/update push constants for execution.
Definition: vulkan.c:1138
AVFilterContext::priv
void * priv
private data for use by the filter
Definition: avfilter.h:412
fail
#define fail()
Definition: checkasm.h:138
insert_weights_pass
static void insert_weights_pass(FFVkSPIRVShader *shd, int nb_rows, int vert, int t, int dst_comp, int plane, int comp)
Definition: vf_nlmeans_vulkan.c:147
vulkan_filter.h
ff_source_prefix_sum_comp
const char * ff_source_prefix_sum_comp
FFVulkanContext::atomic_float_feats
VkPhysicalDeviceShaderAtomicFloatFeaturesEXT atomic_float_feats
Definition: vulkan.h:248
AVFilterPad
A filter pad used for either input or output.
Definition: internal.h:47
first
trying all byte sequences megabyte in length and selecting the best looking sequence will yield cases to try But first
Definition: rate_distortion.txt:12
AV_LOG_ERROR
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:180
av_cold
#define av_cold
Definition: attributes.h:90
float
float
Definition: af_crystalizer.c:121
FFVulkanContext::output_width
int output_width
Definition: vulkan.h:264
NLMeansVulkanContext::ws_buf_pool
AVBufferPool * ws_buf_pool
Definition: vf_nlmeans_vulkan.c:42
width
#define width
s
#define s(width, name)
Definition: cbs_vp9.c:198
AV_CEIL_RSHIFT
#define AV_CEIL_RSHIFT(a, b)
Definition: common.h:51
AV_OPT_TYPE_DOUBLE
@ AV_OPT_TYPE_DOUBLE
Definition: opt.h:227
s1
#define s1
Definition: regdef.h:38
init
int(* init)(AVBSFContext *ctx)
Definition: dts2pts_bsf.c:365
FLAGS
#define FLAGS
ctx
AVFormatContext * ctx
Definition: movenc.c:48
ff_vf_nlmeans_vulkan
const AVFilter ff_vf_nlmeans_vulkan
ff_vk_exec_add_dep_buf
int ff_vk_exec_add_dep_buf(FFVulkanContext *s, FFVkExecContext *e, AVBufferRef **deps, int nb_deps, int ref)
Execution dependency management.
Definition: vulkan.c:572
ff_vk_exec_bind_pipeline
void ff_vk_exec_bind_pipeline(FFVulkanContext *s, FFVkExecContext *e, FFVulkanPipeline *pl)
Definition: vulkan.c:1841
FFVkSPIRVCompiler::compile_shader
int(* compile_shader)(struct FFVkSPIRVCompiler *ctx, void *avctx, struct FFVkSPIRVShader *shd, uint8_t **data, size_t *size, const char *entrypoint, void **opaque)
Definition: vulkan_spirv.h:29
ff_vk_exec_pool_free
void ff_vk_exec_pool_free(FFVulkanContext *s, FFVkExecPool *pool)
Definition: vulkan.c:256
FILTER_INPUTS
#define FILTER_INPUTS(array)
Definition: internal.h:192
link
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a link
Definition: filter_design.txt:23
opts
AVDictionary * opts
Definition: movenc.c:50
NLMeansVulkanContext::pl_denoise
FFVulkanPipeline pl_denoise
Definition: vf_nlmeans_vulkan.c:48
NULL
#define NULL
Definition: coverity.c:32
av_frame_copy_props
int av_frame_copy_props(AVFrame *dst, const AVFrame *src)
Copy only "metadata" fields from src to dst.
Definition: frame.c:736
GLSLD
#define GLSLD(D)
Definition: vulkan.h:60
av_buffer_pool_uninit
void av_buffer_pool_uninit(AVBufferPool **ppool)
Mark the pool as being available for freeing.
Definition: buffer.c:322
ff_vk_filter_config_output
int ff_vk_filter_config_output(AVFilterLink *outlink)
Definition: vulkan_filter.c:191
FFVkBuffer::size
size_t size
Definition: vulkan.h:100
ff_log2
#define ff_log2
Definition: intmath.h:51
ff_vk_init_compute_pipeline
int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl, FFVkSPIRVShader *shd)
Definition: vulkan.c:1804
NLMeansVulkanContext::nlmeans_opts
Definition: vf_nlmeans_vulkan.c:57
ff_vk_exec_pool_init
int ff_vk_exec_pool_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf, FFVkExecPool *pool, int nb_contexts, int nb_queries, VkQueryType query_type, int query_64bit, const void *query_create_pnext)
Allocates/frees an execution pool.
Definition: vulkan.c:296
FFVulkanContext
Definition: vulkan.h:228
exp
int8_t exp
Definition: eval.c:72
FFVulkanPipeline
Definition: vulkan.h:132
insert_vertical_pass
static void insert_vertical_pass(FFVkSPIRVShader *shd, int nb_rows, int first, int plane, int comp)
Definition: vf_nlmeans_vulkan.c:120
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
NLMeansVulkanContext::qf
FFVkQueueFamilyCtx qf
Definition: vf_nlmeans_vulkan.c:37
NLMeansVulkanContext::patch
int patch[4]
Definition: vf_nlmeans_vulkan.c:55
ff_vk_shader_init
int ff_vk_shader_init(FFVulkanPipeline *pl, FFVkSPIRVShader *shd, const char *name, VkShaderStageFlags stage, uint32_t required_subgroup_size)
Shader management.
Definition: vulkan.c:1340
main
int main(int argc, char **argv)
Definition: avio_http_serve_files.c:99
s2
#define s2
Definition: regdef.h:39
NLMeansVulkanContext
Definition: vf_nlmeans_vulkan.c:32
FFVulkanDescriptorSetBinding
Definition: vulkan.h:84
NULL_IF_CONFIG_SMALL
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
Definition: internal.h:106
for
for(k=2;k<=8;++k)
Definition: h264pred_template.c:425
AVFILTER_FLAG_HWDEVICE
#define AVFILTER_FLAG_HWDEVICE
The filter can create hardware frames using AVFilterContext.hw_device_ctx.
Definition: avfilter.h:138
NLMeansVulkanContext::nlmeans_opts::t
int t
Definition: vf_nlmeans_vulkan.c:63
size
int size
Definition: twinvq_data.h:10344
AV_NUM_DATA_POINTERS
#define AV_NUM_DATA_POINTERS
Definition: frame.h:341
FFVkQueueFamilyCtx
Definition: vulkan.h:111
planes
static const struct @362 planes[]
FFVulkanContext::output_format
enum AVPixelFormat output_format
Definition: vulkan.h:266
height
#define height
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
AVERROR_EXTERNAL
#define AVERROR_EXTERNAL
Generic error in an external library.
Definition: error.h:59
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
FFVkExecContext
Definition: vulkan.h:153
FFVulkanDescriptorSetBinding::name
const char * name
Definition: vulkan.h:85
ff_vk_update_push_exec
void ff_vk_update_push_exec(FFVulkanContext *s, FFVkExecContext *e, FFVulkanPipeline *pl, VkShaderStageFlagBits stage, int offset, size_t size, void *src)
Definition: vulkan.c:1759
TYPE_SIZE
#define TYPE_SIZE
Definition: vf_nlmeans_vulkan.c:30
internal.h
NLMeansVulkanContext::pl_weights
FFVulkanPipeline pl_weights
Definition: vf_nlmeans_vulkan.c:45
NLMeansVulkanContext::shd_denoise
FFVkSPIRVShader shd_denoise
Definition: vf_nlmeans_vulkan.c:49
AVFILTER_DEFINE_CLASS
#define AVFILTER_DEFINE_CLASS(fname)
Definition: internal.h:319
FFVkSPIRVCompiler
Definition: vulkan_spirv.h:27
layout
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel layout
Definition: filter_design.txt:18
FILTER_SINGLE_PIXFMT
#define FILTER_SINGLE_PIXFMT(pix_fmt_)
Definition: internal.h:182
NLMeansVulkanContext::pl_weights_rows
int pl_weights_rows
Definition: vf_nlmeans_vulkan.c:44
ff_vk_exec_start
int ff_vk_exec_start(FFVulkanContext *s, FFVkExecContext *e)
Start/submit/wait an execution.
Definition: vulkan.c:510
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:244
insert_first
static void insert_first(FFVkSPIRVShader *shd, int r, int horiz, int plane, int comp)
Definition: vf_nlmeans_vulkan.c:69
init_filter
static int init_filter(FilteringContext *fctx, AVCodecContext *dec_ctx, AVCodecContext *enc_ctx, const char *filter_spec)
Definition: transcode.c:244
ff_vk_frame_barrier
void ff_vk_frame_barrier(FFVulkanContext *s, FFVkExecContext *e, AVFrame *pic, VkImageMemoryBarrier2 *bar, int *nb_bar, VkPipelineStageFlags src_stage, VkPipelineStageFlags dst_stage, VkAccessFlagBits new_access, VkImageLayout new_layout, uint32_t new_qf)
Definition: vulkan.c:1299
DUP_SAMPLER
#define DUP_SAMPLER(x)
Definition: vulkan.h:74
ff_vk_shader_rep_fmt
const char * ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt)
Returns the format to use for images in shaders.
Definition: vulkan.c:1202
FFMIN
#define FFMIN(a, b)
Definition: macros.h:49
vulkan_spirv.h
NLMeansVulkanContext::shd_weights
FFVkSPIRVShader shd_weights
Definition: vf_nlmeans_vulkan.c:46
FFVulkanContext::props
VkPhysicalDeviceProperties2 props
Definition: vulkan.h:233
AVFilterPad::name
const char * name
Pad name.
Definition: internal.h:53
GLSLF
#define GLSLF(N, S,...)
Definition: vulkan.h:55
FFVkSPIRVCompiler::free_shader
void(* free_shader)(struct FFVkSPIRVCompiler *ctx, void **opaque)
Definition: vulkan_spirv.h:32
NLMeansVulkanContext::nb_offsets
int nb_offsets
Definition: vf_nlmeans_vulkan.c:53
AVFilter
Filter definition.
Definition: avfilter.h:166
insert_horizontal_pass
static void insert_horizontal_pass(FFVkSPIRVShader *shd, int nb_rows, int first, int plane, int comp)
Definition: vf_nlmeans_vulkan.c:93
NLMeansVulkanContext::state_buf_pool
AVBufferPool * state_buf_pool
Definition: vf_nlmeans_vulkan.c:41
TYPE_NAME
#define TYPE_NAME
Definition: vf_nlmeans_vulkan.c:28
FFVulkanContext::vkfn
FFVulkanFunctions vkfn
Definition: vulkan.h:231
FFVkExecPool
Definition: vulkan.h:210
pos
unsigned int pos
Definition: spdifenc.c:413
OFFSET
#define OFFSET(x)
FFVkExecContext::buf
VkCommandBuffer buf
Definition: vulkan.h:164
NLMeansVulkanContext::nlmeans_opts::s
double s
Definition: vf_nlmeans_vulkan.c:59
NLMeansVulkanContext::nlmeans_opts::pc
int pc[4]
Definition: vf_nlmeans_vulkan.c:62
random_seed.h
FFVkSPIRVShader
Definition: vulkan.h:76
buffer
the frame and frame reference mechanism is intended to as much as expensive copies of that data while still allowing the filters to produce correct results The data is stored in buffers represented by AVFrame structures Several references can point to the same frame buffer
Definition: filter_design.txt:49
AV_OPT_TYPE_INT
@ AV_OPT_TYPE_INT
Definition: opt.h:225
state
static struct @361 state
AVFilterContext
An instance of a filter.
Definition: avfilter.h:397
NLMeansVulkanContext::nlmeans_opts::sc
double sc[4]
Definition: vf_nlmeans_vulkan.c:60
desc
const char * desc
Definition: libsvtav1.c:83
GLSLC
#define GLSLC(N, S)
Definition: vulkan.h:45
ff_vk_filter_config_input
int ff_vk_filter_config_input(AVFilterLink *inlink)
Definition: vulkan_filter.c:160
AVMEDIA_TYPE_VIDEO
@ AVMEDIA_TYPE_VIDEO
Definition: avutil.h:201
FFVulkanContext::hwctx
AVVulkanDeviceContext * hwctx
Definition: vulkan.h:253
ff_vk_set_descriptor_buffer
int ff_vk_set_descriptor_buffer(FFVulkanContext *s, FFVulkanPipeline *pl, FFVkExecContext *e, int set, int bind, int offs, VkDeviceAddress addr, VkDeviceSize len, VkFormat fmt)
Definition: vulkan.c:1701
AVBufferRef
A reference to a data buffer.
Definition: buffer.h:82
NLMeansVulkanContext::e
FFVkExecPool e
Definition: vf_nlmeans_vulkan.c:36
FFVulkanContext::feats_12
VkPhysicalDeviceVulkan12Features feats_12
Definition: vulkan.h:249
AVPixFmtDescriptor
Descriptor that unambiguously describes how the bits of a pixel are stored in the up to 4 data planes...
Definition: pixdesc.h:69
atomicAdd
#define atomicAdd(a, b)
Definition: cuda_runtime.h:37
AVVulkanDeviceContext::act_dev
VkDevice act_dev
Active device.
Definition: hwcontext_vulkan.h:70
FFVkSPIRVShader::local_size
int local_size[3]
Definition: vulkan.h:79
FFALIGN
#define FFALIGN(x, a)
Definition: macros.h:78
FILTER_OUTPUTS
#define FILTER_OUTPUTS(array)
Definition: internal.h:193
ff_vk_init_sampler
int ff_vk_init_sampler(FFVulkanContext *s, VkSampler *sampler, int unnorm_coords, VkFilter filt)
Create a sampler.
Definition: vulkan.c:1158
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:34
src
INIT_CLIP pixel * src
Definition: h264pred_template.c:418
TYPE_ELEMS
#define TYPE_ELEMS
Definition: vf_nlmeans_vulkan.c:29
FFVkBuffer
Definition: vulkan.h:96
ff_vk_exec_pipeline_register
int ff_vk_exec_pipeline_register(FFVulkanContext *s, FFVkExecPool *pool, FFVulkanPipeline *pl)
Register a pipeline with an exec pool.
Definition: vulkan.c:1572
d
d
Definition: ffmpeg_filter.c:331
int32_t
int32_t
Definition: audioconvert.c:56
ff_vk_exec_submit
int ff_vk_exec_submit(FFVulkanContext *s, FFVkExecContext *e)
Definition: vulkan.c:721
av_log
#define av_log(a,...)
Definition: tableprint_vlc.h:27
ff_vk_create_imageviews
int ff_vk_create_imageviews(FFVulkanContext *s, FFVkExecContext *e, VkImageView views[AV_NUM_DATA_POINTERS], AVFrame *f)
Create an imageview and add it as a dependency to an execution.
Definition: vulkan.c:1226
uninit
static av_cold int uninit(AVCodecContext *avctx)
Definition: crystalhd.c:285
ff_vk_shader_free
void ff_vk_shader_free(FFVulkanContext *s, FFVkSPIRVShader *shd)
Definition: vulkan.c:1399
NLMeansVulkanContext::nlmeans_opts::r
int r
Definition: vf_nlmeans_vulkan.c:58
RET
#define RET(x)
Definition: vulkan.h:68
FFVulkanFunctions
Definition: vulkan_functions.h:226
ff_vk_get_pooled_buffer
int ff_vk_get_pooled_buffer(FFVulkanContext *ctx, AVBufferPool **buf_pool, AVBufferRef **buf, VkBufferUsageFlags usage, void *create_pNext, size_t size, VkMemoryPropertyFlagBits mem_props)
Initialize a pool and create AVBufferRefs containing FFVkBuffer.
Definition: vulkan.c:1086
NLMeansVulkanContext::vkctx
FFVulkanContext vkctx
Definition: vf_nlmeans_vulkan.c:33