24 #include "../ops_internal.h"
25 #include "../swscale_internal.h"
29 #if HAVE_SPIRV_HEADERS_SPIRV_H || HAVE_SPIRV_UNIFIED1_SPIRV_H
37 #if CONFIG_LIBSHADERC || CONFIG_LIBGLSLANG
39 s->spvc->uninit(&
s->spvc);
57 if (
s->vkctx.device_ref &&
s->vkctx.device_ref->data != dev_ref->
data) {
60 }
else if (
s->vkctx.device_ref &&
s->vkctx.device_ref->data == dev_ref->
data) {
74 #if CONFIG_LIBSHADERC || CONFIG_LIBGLSLANG
76 s->spvc = ff_vk_spirv_init();
85 #define MAX_DITHER_BUFS 4
108 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
109 VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT);
111 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
112 VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT);
120 0, 0, VK_IMAGE_LAYOUT_GENERAL, VK_NULL_HANDLE);
122 0, 1, VK_IMAGE_LAYOUT_GENERAL, VK_NULL_HANDLE);
125 VkImageMemoryBarrier2 img_bar[8];
127 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
128 VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
129 VK_ACCESS_SHADER_READ_BIT,
130 VK_IMAGE_LAYOUT_GENERAL,
131 VK_QUEUE_FAMILY_IGNORED);
133 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
134 VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
135 VK_ACCESS_SHADER_WRITE_BIT,
136 VK_IMAGE_LAYOUT_GENERAL,
137 VK_QUEUE_FAMILY_IGNORED);
138 vk->CmdPipelineBarrier2(ec->buf, &(VkDependencyInfo) {
139 .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
140 .pImageMemoryBarriers = img_bar,
141 .imageMemoryBarrierCount = nb_img_bar,
146 vk->CmdDispatch(ec->buf,
160 for (
int i = 0;
i <
p->nb_dither_buf;
i++)
169 p->nb_dither_buf = 0;
170 for (
int n = 0; n < ops->
num_ops; n++) {
176 int size = (1 <<
op->dither.size_log2);
177 int idx =
p->nb_dither_buf;
180 VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
181 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
182 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
189 (uint8_t **)&dither_data, 0);
193 for (
int i = 0;
i <
size;
i++) {
194 for (
int j = 0; j <
size; j++) {
206 for (
int i = 0;
i <
p->nb_dither_buf;
i++)
211 #if HAVE_SPIRV_HEADERS_SPIRV_H || HAVE_SPIRV_UNIFIED1_SPIRV_H
222 typedef struct SPIRVIDs {
255 int linear_deco_off[16];
256 int linear_deco_ops[16];
260 int dither_ptr_elem_id;
264 int out_img_array_id;
280 SPICtx *spi, SPIRVIDs *
id)
299 id->in_vars, 3 +
id->nb_dither_bufs);
305 SpvBuiltInGlobalInvocationId);
317 for (
int i = 0;
i <
id->nb_dither_bufs;
i++) {
321 id->dither[
i].size*
sizeof(
float));
329 for (
int n = 0; n < ops->
num_ops; n++) {
336 for (
int j = 0; j < 4; j++) {
337 nb_ops += !!
op->lin.m[j][0].num;
338 nb_ops +=
op->lin.m[j][0].num &&
op->lin.m[j][4].num;
339 for (
int i = 1;
i < 4;
i++) {
340 nb_ops += !!
op->lin.m[j][
i].num;
341 nb_ops +=
op->lin.m[j][
i].num &&
342 (
op->lin.m[j][0].num ||
op->lin.m[j][4].num);
346 id->linear_deco_off[
id->nb_linear_ops] =
spi_reserve(spi, nb_ops*4*3);
347 id->linear_deco_ops[
id->nb_linear_ops] = nb_ops;
353 static void define_shader_consts(
SwsOpList *ops,
SPICtx *spi, SPIRVIDs *
id)
359 id->u32_type = spi_OpTypeInt(spi, 32, 0);
360 id->i32_type = spi_OpTypeInt(spi, 32, 1);
362 id->f32_type = spi_OpTypeFloat(spi, 32);
366 id->bvec2_type = spi_OpTypeVector(spi,
id->b_type, 2);
367 id->u32vec2_type = spi_OpTypeVector(spi, u32_type, 2);
368 id->i32vec2_type = spi_OpTypeVector(spi,
id->i32_type, 2);
370 id->u32vec3_type = spi_OpTypeVector(spi, u32_type, 3);
372 id->u32vec4_type = spi_OpTypeVector(spi, u32_type, 4);
373 id->f32vec4_type = spi_OpTypeVector(spi, f32_type, 4);
379 for (
int i = 0;
i < 5;
i++)
383 id->nb_const_ids = 0;
384 for (
int n = 0; n < ops->
num_ops; n++) {
396 id->const_ids[
id->nb_const_ids++] =
tmp;
400 for (
int i = 0;
i < 4;
i++) {
406 id->const_ids[
id->nb_const_ids++] =
410 id->const_ids[
id->nb_const_ids++] =
420 id->const_ids[
id->nb_const_ids++] =
tmp;
426 float q =
op->scale.factor.num/(
float)
op->scale.factor.den;
436 id->const_ids[
id->nb_const_ids++] =
tmp;
441 for (
int i = 0;
i < 4;
i++) {
444 if (!
op->clamp.limit[
i].den) {
453 id->const_ids[
id->nb_const_ids++] =
tmp;
457 for (
int i = 0;
i < 4;
i++) {
458 if (
op->dither.y_offset[
i] < 0)
461 id->const_ids[
id->nb_const_ids++] =
tmp;
465 for (
int i = 0;
i < 4;
i++) {
467 if (
op->lin.m[
i][0].num) {
469 id->const_ids[
id->nb_const_ids++] =
472 if (
op->lin.m[
i][4].num) {
474 id->const_ids[
id->nb_const_ids++] =
477 for (
int j = 1; j < 4; j++) {
478 if (!
op->lin.m[
i][j].num)
481 id->const_ids[
id->nb_const_ids++] =
494 static void define_shader_bindings(
SwsOpList *ops,
SPICtx *spi, SPIRVIDs *
id,
495 int in_img_count,
int out_img_count)
500 struct DitherData *
dither =
id->dither;
501 for (
int i = 0;
i <
id->nb_dither_bufs;
i++) {
510 SpvStorageClassUniform, 0);
519 id->f32_type :
id->u32_type,
520 2, 0, 0, 0, 2, SpvImageFormatUnknown);
522 id->u32_cid[out_img_count]);
525 id->in_img_array_id = 0;
531 id->in_img_type = match ?
id->out_img_type :
534 id->f32_type :
id->u32_type,
535 2, 0, 0, 0, 2, SpvImageFormatUnknown);
537 id->u32_cid[in_img_count]);
544 id->out_img_array_id);
552 id->in_img_array_id);
559 SpvStorageClassInput, 0);
562 SpvStorageClassUniformConstant, 0);
565 SpvStorageClassUniformConstant, 0);
571 uint8_t spvbuf[1024*16];
572 SPICtx spi_context = { 0 }, *spi = &spi_context;
573 SPIRVIDs spid_data = { 0 }, *
id = &spid_data;
574 spi_init(spi, spvbuf,
sizeof(spvbuf));
581 (uint32_t []) { 32, 32, 1 }, 0);
596 .
type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
597 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
601 .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
602 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
619 id->nb_dither_bufs = 0;
620 for (
int n = 0; n < ops->
num_ops; n++) {
625 id->
dither[
id->nb_dither_bufs].size = 1 <<
op->dither.size_log2;
626 id->dither[
id->nb_dither_bufs].arr_1d_id =
spi_get_id(spi);
627 id->dither[
id->nb_dither_bufs].arr_2d_id =
spi_get_id(spi);
628 id->dither[
id->nb_dither_bufs].struct_id =
spi_get_id(spi);
629 id->dither[
id->nb_dither_bufs].id =
spi_get_id(spi);
630 id->in_vars[3 +
id->nb_dither_bufs] =
id->dither[
id->nb_dither_bufs].id;
633 .
type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
634 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
637 if (
id->nb_dither_bufs)
639 id->nb_dither_bufs, 1, 0);
642 define_shader_header(shd, ops, spi,
id);
643 define_shader_consts(ops, spi,
id);
644 define_shader_bindings(ops, spi,
id, in_img_count, out_img_count);
651 int in_img[4] = { 0 };
652 for (
int i = 0;
i < in_img_count;
i++) {
655 id->in_vars[1],
id->u32_cid[
i]);
657 SpvMemoryAccessMaskNone, 0);
662 for (
int i = 0;
i < out_img_count;
i++) {
664 id->in_vars[2],
id->u32_cid[
i]);
666 SpvMemoryAccessMaskNone, 0);
671 SpvMemoryAccessMaskNone, 0);
675 int gi2 = spi_OpBitcast(spi,
id->i32vec2_type, gid);
678 int img1_s = spi_OpImageQuerySize(spi,
id->i32vec2_type, out_img[0]);
679 int scmp = spi_OpSGreaterThanEqual(spi,
id->bvec2_type, gi2, img1_s);
680 scmp = spi_OpAny(spi,
id->b_type, scmp);
695 id->f32_p,
id->f32_p,
696 id->f32_p,
id->f32_p);
699 id->u32_p,
id->u32_p,
700 id->u32_p,
id->u32_p);
703 int nb_const_ids = 0;
704 int nb_dither_bufs = 0;
705 int nb_linear_ops = 0;
708 for (
int n = 0; n < ops->
num_ops; n++) {
711 op->convert.to :
op->type;
713 id->f32vec4_type :
id->u32vec4_type;
715 id->f32_type :
id->u32_type;
717 id->f32_p :
id->u32_p;
721 if (
op->rw.frac ||
op->rw.filter) {
723 }
else if (
op->rw.packed) {
725 gid, SpvImageOperandsMaskNone);
728 for (
int i = 0;
i <
op->rw.elems;
i++) {
731 SpvImageOperandsMaskNone);
739 if (
op->rw.frac ||
op->rw.filter) {
741 }
else if (
op->rw.packed) {
743 SpvImageOperandsMaskNone);
745 for (
int i = 0;
i <
op->rw.elems;
i++) {
749 SpvImageOperandsMaskNone);
754 for (
int i = 0;
i < 4;
i++) {
755 if (!
op->clear.value[
i].den)
758 id->const_ids[nb_const_ids++],
771 data = spi_OpIMul(spi, type_v,
data,
id->const_ids[nb_const_ids++]);
773 data = spi_OpConvertFToU(spi, type_v,
data);
775 data = spi_OpConvertUToF(spi, type_v,
data);
778 data = spi_OpShiftLeftLogical(spi, type_v,
data,
779 id->const_ids[nb_const_ids++]);
782 data = spi_OpShiftRightLogical(spi, type_v,
data,
783 id->const_ids[nb_const_ids++]);
787 data = spi_OpFMul(spi, type_v,
data,
788 id->const_ids[nb_const_ids++]);
790 data = spi_OpIMul(spi, type_v,
data,
791 id->const_ids[nb_const_ids++]);
796 op->op ==
SWS_OP_MIN ? GLSLstd450FMin : GLSLstd450FMax :
798 for (
int i = 0;
i < 4;
i++) {
799 if (!
op->clamp.limit[
i].den)
803 tmp,
id->const_ids[nb_const_ids++]);
809 int did = nb_dither_bufs++;
812 x_id = spi_OpBitwiseAnd(spi,
id->u32_type, x_id,
813 id->dither[did].mask_id);
814 for (
int i = 0;
i < 4;
i++) {
815 if (
op->dither.y_offset[
i] < 0)
818 int y_id = spi_OpIAdd(spi,
id->u32_type, y_pos,
819 id->const_ids[nb_const_ids++]);
820 y_id = spi_OpBitwiseAnd(spi,
id->u32_type, y_id,
821 id->dither[did].mask_id);
824 id->dither[did].id,
id->u32_cid[0],
827 SpvMemoryAccessMaskNone, 0);
843 spi->
off =
id->linear_deco_off[nb_linear_ops];
844 for (
int i = 0;
i <
id->linear_deco_ops[nb_linear_ops];
i++)
849 for (
int j = 0; j < 4; j++) {
851 if (
op->lin.m[j][0].num)
852 res[j] = spi_OpFMul(spi, type_s,
tmp[0],
853 id->const_ids[nb_const_ids++]);
855 if (
op->lin.m[j][0].num &&
op->lin.m[j][4].num)
856 res[j] = spi_OpFAdd(spi, type_s,
857 id->const_ids[nb_const_ids++], res[j]);
858 else if (
op->lin.m[j][4].num)
859 res[j] =
id->const_ids[nb_const_ids++];
861 for (
int i = 1;
i < 4;
i++) {
862 if (!
op->lin.m[j][
i].num)
865 int v = spi_OpFMul(spi, type_s,
tmp[
i],
866 id->const_ids[nb_const_ids++]);
867 if (
op->lin.m[j][0].num ||
op->lin.m[j][4].num)
868 res[j] = spi_OpFAdd(spi, type_s, res[j], v);
874 res[0], res[1], res[2], res[3]);
895 #if CONFIG_LIBSHADERC || CONFIG_LIBGLSLANG
907 .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
908 .mem_layout = img_type,
912 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
918 #define QSTR "(%i/%i%s)"
919 #define QTYPE(Q) (Q).num, (Q).den, cur_type == SWS_PIXEL_F32 ? ".0f" : ""
927 void *spv_opaque =
NULL;
934 VK_SHADER_STAGE_COMPUTE_BIT,
935 NULL, 0, 32, 32, 1, 0);
945 add_desc_read_write(&
buf_desc[nb_desc++], &
p->src_rep,
read);
946 add_desc_read_write(&
buf_desc[nb_desc++], &
p->dst_rep, write);
956 for (
int n = 0; n < ops->
num_ops; n++) {
960 int size = (1 <<
op->dither.size_log2);
962 snprintf(dither_buf_name[nb_desc], 64,
"dither_buf%i", n);
963 snprintf(dither_mat_name[nb_desc], 64,
"float dither_mat%i[%i][%i];",
966 .name = dither_buf_name[nb_desc],
967 .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
968 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
969 .mem_layout =
"scalar",
970 .buf_content = dither_mat_name[nb_desc],
980 GLSLC(1, ivec2
pos = ivec2(gl_GlobalInvocationID.xy); );
981 GLSLC(1, ivec2
size = imageSize(src_img[0]); );
985 GLSLC(1, u8vec4 u8; );
986 GLSLC(1, u16vec4 u16; );
987 GLSLC(1, u32vec4 u32; );
988 GLSLC(1, precise f32vec4 f32; );
992 for (
int n = 0; n < ops->
num_ops; n++) {
1007 if (
op->rw.frac ||
op->rw.filter) {
1009 }
else if (
op->rw.packed) {
1013 for (
int i = 0;
i <
op->rw.elems;
i++)
1020 if (
op->rw.frac ||
op->rw.filter) {
1022 }
else if (
op->rw.packed) {
1026 for (
int i = 0;
i <
op->rw.elems;
i++)
1034 for (
int i = 0;
i < 4;
i++)
1040 for (
int i = 0;
i < 4;
i++) {
1044 "xyzw"[
i], type_s, QTYPE(
op->clear.value[
i]));
1050 type_name, type_name, QTYPE(
op->scale.factor));
1054 for (
int i = 0;
i < 4;
i++) {
1055 if (!
op->clamp.limit[
i].den)
1058 type_name,
"xyzw"[
i],
1060 type_name,
"xyzw"[
i], QTYPE(
op->clamp.limit[
i]));
1080 int size = (1 <<
op->dither.size_log2);
1081 for (
int i = 0;
i < 4;
i++) {
1082 if (
op->dither.y_offset[
i] < 0)
1084 av_bprintf(&shd->
src,
" %s.%c += dither_mat%i[(pos.y + %i) & %i]"
1086 type_name,
"xyzw"[
i], n,
1087 op->dither.y_offset[
i],
size - 1,
1093 for (
int i = 0;
i < 4;
i++) {
1094 if (
op->lin.m[
i][4].num)
1096 QTYPE(
op->lin.m[
i][4]));
1099 for (
int j = 0; j < 4; j++) {
1100 if (!
op->lin.m[
i][j].num)
1103 "xyzw"[
i],
"xyzw"[j], QTYPE(
op->lin.m[
i][j]));
1115 err =
s->spvc->compile_shader(&
s->vkctx,
s->spvc, shd,
1116 &spv_data, &spv_len,
"main",
1124 s->spvc->free_shader(
s->spvc, &spv_opaque);
1155 VkFormatProperties2 prop = {
1156 .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2,
1159 vk->GetPhysicalDeviceFormatProperties2(
s->vkctx.hwctx->phys_dev,
1160 VK_FORMAT_B8G8R8A8_UNORM,
1162 if (!(prop.formatProperties.optimalTilingFeatures &
1163 VK_FORMAT_FEATURE_2_STORAGE_WRITE_WITHOUT_FORMAT_BIT)) {
1171 #if CONFIG_LIBSHADERC || CONFIG_LIBGLSLANG
1172 err = add_ops_glsl(
p,
s, ops, &
p->shd);
1176 #if HAVE_SPIRV_HEADERS_SPIRV_H || HAVE_SPIRV_UNIFIED1_SPIRV_H
1177 err = add_ops_spirv(
p,
s, ops, &
p->shd);
1187 for (
int i = 0;
i <
p->nb_dither_buf;
i++)
1189 1,
i, 0, &
p->dither_buf[
i],
1190 0, VK_WHOLE_SIZE, VK_FORMAT_UNDEFINED);
1206 #if HAVE_SPIRV_HEADERS_SPIRV_H || HAVE_SPIRV_UNIFIED1_SPIRV_H
1214 .compile = compile_spirv,
1219 #if CONFIG_LIBSHADERC || CONFIG_LIBGLSLANG
1227 .compile = compile_glsl,