29 #define RGB_LINECACHE 2
42 .queue_flags = VK_QUEUE_COMPUTE_BIT,
109 GLSLC(0,
layout(push_constant, scalar) uniform pushConstants { );
110 GLSLC(1, u8buf slice_data; );
111 GLSLC(1, u8buf slice_state; );
113 GLSLC(1, ivec4 fmt_lut; );
114 GLSLC(1, uvec2 img_size; );
115 GLSLC(1, uvec2 chroma_shift; );
117 GLSLC(1, uint plane_state_size; );
118 GLSLC(1, uint32_t crcref; );
119 GLSLC(1,
int rct_offset; );
121 GLSLC(1, uint8_t extend_lookup[8]; );
122 GLSLC(1, uint8_t bits_per_raw_sample; );
123 GLSLC(1, uint8_t quant_table_count; );
125 GLSLC(1, uint8_t micro_version; );
126 GLSLC(1, uint8_t key_frame; );
128 GLSLC(1, uint8_t codec_planes; );
129 GLSLC(1, uint8_t color_planes; );
130 GLSLC(1, uint8_t transparency; );
131 GLSLC(1, uint8_t planar_rgb; );
132 GLSLC(1, uint8_t colorspace; );
133 GLSLC(1, uint8_t ec; );
134 GLSLC(1, uint8_t golomb; );
135 GLSLC(1, uint8_t check_crc; );
136 GLSLC(1, uint8_t padding[3]; );
139 VK_SHADER_STAGE_COMPUTE_BIT);
166 for (
int i = 0;
i <
f->quant_table_count;
i++)
167 max_contexts =
FFMAX(
f->context_count[
i], max_contexts);
188 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
189 VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
195 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
196 VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
198 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
211 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
212 VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
213 NULL, 2*
f->slice_count*
sizeof(uint32_t),
214 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
215 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
222 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
223 VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
224 NULL, 2*
f->slice_count*
sizeof(uint32_t),
225 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
226 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
263 if (slices_buf && slices_buf->host_ref) {
265 data - slices_buf->mapped_mem);
301 int bits =
f->avctx->bits_per_raw_sample > 0 ?
f->avctx->bits_per_raw_sample : 8;
320 VkImageView *decode_dst_view = is_rgb ? rct_image_views : vp->
view.
out;
322 VkImageMemoryBarrier2 img_bar[37];
324 VkBufferMemoryBarrier2 buf_bar[8];
332 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
333 VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
344 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
345 VK_PIPELINE_STAGE_2_CLEAR_BIT));
347 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
348 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
349 VK_ACCESS_2_TRANSFER_WRITE_BIT,
350 VK_IMAGE_LAYOUT_GENERAL,
351 VK_QUEUE_FAMILY_IGNORED);
360 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT));
371 buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
372 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
373 .srcStageMask = slice_state->stage,
374 .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
375 .srcAccessMask = slice_state->access,
376 .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT |
377 VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
378 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
379 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
380 .buffer = slice_state->buf,
385 vk->CmdPipelineBarrier2(exec->
buf, &(VkDependencyInfo) {
386 .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
387 .pImageMemoryBarriers = img_bar,
388 .imageMemoryBarrierCount = nb_img_bar,
389 .pBufferMemoryBarriers = buf_bar,
390 .bufferMemoryBarrierCount = nb_buf_bar,
392 slice_state->stage = buf_bar[0].dstStageMask;
393 slice_state->access = buf_bar[0].dstAccessMask;
402 VK_FORMAT_UNDEFINED);
406 0, 2*
f->slice_count*
sizeof(uint32_t),
407 VK_FORMAT_UNDEFINED);
411 0, 2*
f->slice_count*
sizeof(uint32_t),
412 VK_FORMAT_UNDEFINED);
419 .img_size[0] =
f->picture.f->width,
420 .img_size[1] =
f->picture.f->height,
421 .chroma_shift[0] =
f->chroma_h_shift,
422 .chroma_shift[1] =
f->chroma_v_shift,
426 .rct_offset = 1 <<
bits,
428 .bits_per_raw_sample =
bits,
429 .quant_table_count =
f->quant_table_count,
430 .version =
f->version,
431 .micro_version =
f->micro_version,
434 .codec_planes =
f->plane_count,
435 .color_planes = color_planes,
436 .transparency =
f->transparency,
439 .colorspace =
f->colorspace,
444 for (
int i = 0;
i <
f->quant_table_count;
i++)
446 (
f->quant_tables[
i][4][127] != 0);
451 memcpy(pd.
fmt_lut, (
int [4]) { 2, 1, 0, 3 }, 4*
sizeof(
int));
453 memcpy(pd.
fmt_lut, (
int [4]) { 0, 2, 1, 3 }, 4*
sizeof(
int));
458 VK_SHADER_STAGE_COMPUTE_BIT,
461 vk->CmdDispatch(exec->
buf,
f->num_h_slices,
f->num_v_slices, 1);
465 for (
int i = 0;
i < color_planes;
i++)
466 vk->CmdClearColorImage(exec->
buf, vkf->
img[
i], VK_IMAGE_LAYOUT_GENERAL,
467 &((VkClearColorValue) { 0 }),
468 1, &((VkImageSubresourceRange) {
469 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
481 VK_FORMAT_UNDEFINED);
488 .codec_planes =
f->plane_count,
490 .version =
f->version,
491 .micro_version =
f->micro_version,
493 for (
int i = 0;
i <
f->quant_table_count;
i++)
497 VK_SHADER_STAGE_COMPUTE_BIT,
498 0,
sizeof(pd_reset), &pd_reset);
501 buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
502 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
503 .srcStageMask = slice_state->stage,
504 .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
505 .srcAccessMask = slice_state->access,
506 .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT,
507 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
508 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
509 .buffer = slice_state->buf,
513 vk->CmdPipelineBarrier2(exec->
buf, &(VkDependencyInfo) {
514 .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
515 .pImageMemoryBarriers = img_bar,
516 .imageMemoryBarrierCount = nb_img_bar,
517 .pBufferMemoryBarriers = buf_bar,
518 .bufferMemoryBarrierCount = nb_buf_bar,
520 slice_state->stage = buf_bar[0].dstStageMask;
521 slice_state->access = buf_bar[0].dstAccessMask;
525 vk->CmdDispatch(exec->
buf,
f->num_h_slices,
f->num_v_slices,
534 VK_FORMAT_UNDEFINED);
536 decode_dst, decode_dst_view,
538 VK_IMAGE_LAYOUT_GENERAL,
543 0, 2*
f->slice_count*
sizeof(uint32_t),
544 VK_FORMAT_UNDEFINED);
549 VK_IMAGE_LAYOUT_GENERAL,
554 VK_SHADER_STAGE_COMPUTE_BIT,
558 buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
559 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
560 .srcStageMask = slice_state->stage,
561 .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
562 .srcAccessMask = slice_state->access,
563 .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT |
564 VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
565 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
566 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
567 .buffer = slice_state->buf,
574 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
575 VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
576 VK_ACCESS_SHADER_WRITE_BIT |
577 (!is_rgb ? VK_ACCESS_SHADER_READ_BIT : 0),
578 VK_IMAGE_LAYOUT_GENERAL,
579 VK_QUEUE_FAMILY_IGNORED);
582 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
583 VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
584 VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
585 VK_IMAGE_LAYOUT_GENERAL,
586 VK_QUEUE_FAMILY_IGNORED);
588 vk->CmdPipelineBarrier2(exec->
buf, &(VkDependencyInfo) {
589 .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
590 .pImageMemoryBarriers = img_bar,
591 .imageMemoryBarrierCount = nb_img_bar,
592 .pBufferMemoryBarriers = buf_bar,
593 .bufferMemoryBarrierCount = nb_buf_bar,
595 slice_state->stage = buf_bar[0].dstStageMask;
596 slice_state->access = buf_bar[0].dstAccessMask;
600 vk->CmdDispatch(exec->
buf,
f->num_h_slices,
f->num_v_slices, 1);
615 int smp_bits = use32bit ? 32 : 16;
623 GLSLF(0, #define
TYPE int%i_t ,smp_bits);
624 GLSLF(0, #define VTYPE2
i%ivec2 ,smp_bits);
625 GLSLF(0, #define VTYPE3
i%ivec3 ,smp_bits);
639 void *spv_opaque =
NULL;
642 VK_SHADER_STAGE_COMPUTE_BIT,
643 (
const char *[]) {
"GL_EXT_buffer_reference",
644 "GL_EXT_buffer_reference2" }, 2,
659 .
name =
"rangecoder_static_buf",
660 .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
661 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
662 .mem_layout =
"scalar",
663 .buf_content =
"uint8_t zero_one_state[512];",
666 .name =
"crc_ieee_buf",
667 .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
668 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
669 .mem_layout =
"scalar",
670 .buf_content =
"uint32_t crc_ieee[256];",
674 .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
675 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
676 .mem_layout =
"scalar",
677 .buf_content =
"int16_t quant_table[MAX_QUANT_TABLES]"
678 "[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];",
688 .
name =
"slice_data_buf",
689 .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
690 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
691 .buf_content =
"SliceContext slice_ctx",
692 .buf_elems =
f->max_slice_count,
695 .name =
"slice_offsets_buf",
696 .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
697 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
698 .mem_quali =
"readonly",
699 .buf_content =
"uint32_t slice_offsets",
700 .buf_elems = 2*
f->max_slice_count,
703 .name =
"slice_status_buf",
704 .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
705 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
706 .mem_quali =
"writeonly",
707 .buf_content =
"uint32_t slice_status",
708 .buf_elems = 2*
f->max_slice_count,
737 void *spv_opaque =
NULL;
738 int wg_dim =
FFMIN(
s->props.properties.limits.maxComputeWorkGroupSize[0], 1024);
741 VK_SHADER_STAGE_COMPUTE_BIT,
742 (
const char *[]) {
"GL_EXT_buffer_reference",
743 "GL_EXT_buffer_reference2" }, 2,
753 GLSLC(0,
layout(push_constant, scalar) uniform pushConstants { );
755 GLSLC(1, u8buf slice_state; );
756 GLSLC(1, uint plane_state_size; );
757 GLSLC(1, uint8_t codec_planes; );
758 GLSLC(1, uint8_t key_frame; );
760 GLSLC(1, uint8_t micro_version; );
761 GLSLC(1, uint8_t padding[1]; );
764 VK_SHADER_STAGE_COMPUTE_BIT);
772 .
name =
"rangecoder_static_buf",
773 .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
774 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
775 .mem_layout =
"scalar",
776 .buf_content =
"uint8_t zero_one_state[512];",
780 .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
781 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
782 .mem_layout =
"scalar",
783 .buf_content =
"int16_t quant_table[MAX_QUANT_TABLES]"
784 "[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];",
795 .
name =
"slice_data_buf",
796 .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
797 .mem_quali =
"readonly",
798 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
799 .buf_content =
"SliceContext slice_ctx",
800 .buf_elems =
f->max_slice_count,
825 int use32bit,
int ac,
int rgb)
832 void *spv_opaque =
NULL;
834 s->driver_props.driverID == VK_DRIVER_ID_MESA_RADV;
837 VK_SHADER_STAGE_COMPUTE_BIT,
838 (
const char *[]) {
"GL_EXT_buffer_reference",
839 "GL_EXT_buffer_reference2" }, 2,
849 if (use_cached_reader)
863 .
name =
"rangecoder_static_buf",
864 .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
865 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
866 .mem_layout =
"scalar",
867 .buf_content =
"uint8_t zero_one_state[512];",
871 .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
872 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
873 .mem_layout =
"scalar",
874 .buf_content =
"int16_t quant_table[MAX_QUANT_TABLES]"
875 "[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];",
887 .
name =
"slice_data_buf",
888 .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
889 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
890 .buf_content =
"SliceContext slice_ctx",
891 .buf_elems =
f->max_slice_count,
895 .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
900 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
903 .name =
"slice_status_buf",
904 .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
905 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
906 .mem_quali =
"writeonly",
907 .buf_content =
"uint32_t slice_status",
908 .buf_elems = 2*
f->max_slice_count,
912 .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
916 .mem_quali =
"writeonly",
918 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
953 frames_ctx->
width =
s->frames->width;
956 vk_frames = frames_ctx->
hwctx;
957 vk_frames->
tiling = VK_IMAGE_TILING_OPTIMAL;
958 vk_frames->
img_flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT;
959 vk_frames->
usage = VK_IMAGE_USAGE_STORAGE_BIT |
960 VK_IMAGE_USAGE_TRANSFER_DST_BIT;
979 for (
int i = 0;
i < 2;
i++)
982 for (
int i = 0;
i < 2;
i++)
985 for (
int i = 0;
i < 2;
i++)
986 for (
int j = 0; j < 2; j++)
987 for (
int k = 0; k < 2; k++)
1010 if (
f->version < 3 ||
1011 (
f->version == 4 &&
f->micro_version > 3))
1014 spv = ff_vk_spirv_init();
1034 for (
int i = 0;
i < 2;
i++) {
1043 for (
int i = 0;
i < 2;
i++) {
1049 for (
int i = 0;
i < 2;
i++) {
1050 for (
int j = 0; j < 2; j++) {
1051 for (
int k = 0; k < 2; k++) {
1083 &fv->
setup, 0, 0, 0,
1086 VK_FORMAT_UNDEFINED));
1088 &fv->
setup, 0, 1, 0,
1091 VK_FORMAT_UNDEFINED));
1094 for (
int i = 0;
i < 2;
i++) {
1095 for (
int j = 0; j < 2; j++) {
1096 for (
int k = 0; k < 2; k++) {
1098 &fv->
decode[
i][j][k], 0, 0, 0,
1101 VK_FORMAT_UNDEFINED));
1103 &fv->
decode[
i][j][k], 0, 1, 0,
1106 VK_FORMAT_UNDEFINED));
1129 if (!(slice_status->
flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
1130 VkMappedMemoryRange invalidate_data = {
1131 .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
1132 .memory = slice_status->
mem,
1134 .size = 2*fp->
slice_num*
sizeof(uint32_t),
1137 1, &invalidate_data);
1141 uint32_t crc_res = 0;
1157 .
p.
name =
"ffv1_vulkan",