29 #define RGB_LINECACHE 2
42 .queue_flags = VK_QUEUE_COMPUTE_BIT,
109 GLSLC(0,
layout(push_constant, scalar) uniform pushConstants { );
110 GLSLC(1, u8buf slice_data; );
111 GLSLC(1, u8buf slice_state; );
113 GLSLC(1, ivec4 fmt_lut; );
114 GLSLC(1, uvec2 img_size; );
115 GLSLC(1, uvec2 chroma_shift; );
117 GLSLC(1, uint plane_state_size; );
118 GLSLC(1, uint32_t crcref; );
119 GLSLC(1,
int rct_offset; );
121 GLSLC(1, uint8_t extend_lookup[8]; );
122 GLSLC(1, uint8_t bits_per_raw_sample; );
123 GLSLC(1, uint8_t quant_table_count; );
125 GLSLC(1, uint8_t micro_version; );
126 GLSLC(1, uint8_t key_frame; );
128 GLSLC(1, uint8_t codec_planes; );
129 GLSLC(1, uint8_t color_planes; );
130 GLSLC(1, uint8_t transparency; );
131 GLSLC(1, uint8_t planar_rgb; );
132 GLSLC(1, uint8_t colorspace; );
133 GLSLC(1, uint8_t ec; );
134 GLSLC(1, uint8_t golomb; );
135 GLSLC(1, uint8_t check_crc; );
136 GLSLC(1, uint8_t padding[3]; );
139 VK_SHADER_STAGE_COMPUTE_BIT);
166 for (
int i = 0;
i <
f->quant_table_count;
i++)
167 max_contexts =
FFMAX(
f->context_count[
i], max_contexts);
188 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
189 VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
195 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
196 VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
198 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
211 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
212 VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
213 NULL, 2*
f->slice_count*
sizeof(uint32_t),
214 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
215 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
222 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
223 VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
224 NULL, 2*
f->slice_count*
sizeof(uint32_t),
225 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
226 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
263 if (slices_buf && slices_buf->host_ref) {
265 data - slices_buf->mapped_mem);
301 int bits =
f->avctx->bits_per_raw_sample > 0 ?
f->avctx->bits_per_raw_sample : 8;
320 VkImageView *decode_dst_view = is_rgb ? rct_image_views : vp->
view.
out;
322 VkImageMemoryBarrier2 img_bar[37];
324 VkBufferMemoryBarrier2 buf_bar[8];
332 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
333 VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
344 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
345 VK_PIPELINE_STAGE_2_CLEAR_BIT));
347 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
348 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
349 VK_ACCESS_2_TRANSFER_WRITE_BIT,
350 VK_IMAGE_LAYOUT_GENERAL,
351 VK_QUEUE_FAMILY_IGNORED);
360 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT));
371 buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
372 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
373 .srcStageMask = slice_state->stage,
374 .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
375 .srcAccessMask = slice_state->access,
376 .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT |
377 VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
378 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
379 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
380 .buffer = slice_state->buf,
385 vk->CmdPipelineBarrier2(exec->
buf, &(VkDependencyInfo) {
386 .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
387 .pImageMemoryBarriers = img_bar,
388 .imageMemoryBarrierCount = nb_img_bar,
389 .pBufferMemoryBarriers = buf_bar,
390 .bufferMemoryBarrierCount = nb_buf_bar,
392 slice_state->stage = buf_bar[0].dstStageMask;
393 slice_state->access = buf_bar[0].dstAccessMask;
402 VK_FORMAT_UNDEFINED);
406 0, 2*
f->slice_count*
sizeof(uint32_t),
407 VK_FORMAT_UNDEFINED);
411 0, 2*
f->slice_count*
sizeof(uint32_t),
412 VK_FORMAT_UNDEFINED);
419 .img_size[0] =
f->picture.f->width,
420 .img_size[1] =
f->picture.f->height,
421 .chroma_shift[0] =
f->chroma_h_shift,
422 .chroma_shift[1] =
f->chroma_v_shift,
426 .rct_offset = 1 <<
bits,
428 .bits_per_raw_sample =
bits,
429 .quant_table_count =
f->quant_table_count,
430 .version =
f->version,
431 .micro_version =
f->micro_version,
434 .codec_planes =
f->plane_count,
435 .color_planes = color_planes,
436 .transparency =
f->transparency,
439 .colorspace =
f->colorspace,
444 for (
int i = 0;
i <
f->quant_table_count;
i++)
446 (
f->quant_tables[
i][4][127] != 0);
452 memcpy(pd.
fmt_lut, (
int [4]) { 2, 1, 0, 3 }, 4*
sizeof(
int));
454 memcpy(pd.
fmt_lut, (
int [4]) { 0, 2, 1, 3 }, 4*
sizeof(
int));
459 VK_SHADER_STAGE_COMPUTE_BIT,
462 vk->CmdDispatch(exec->
buf,
f->num_h_slices,
f->num_v_slices, 1);
466 for (
int i = 0;
i < color_planes;
i++)
467 vk->CmdClearColorImage(exec->
buf, vkf->
img[
i], VK_IMAGE_LAYOUT_GENERAL,
468 &((VkClearColorValue) { 0 }),
469 1, &((VkImageSubresourceRange) {
470 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
482 VK_FORMAT_UNDEFINED);
489 .codec_planes =
f->plane_count,
491 .version =
f->version,
492 .micro_version =
f->micro_version,
494 for (
int i = 0;
i <
f->quant_table_count;
i++)
498 VK_SHADER_STAGE_COMPUTE_BIT,
499 0,
sizeof(pd_reset), &pd_reset);
502 buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
503 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
504 .srcStageMask = slice_state->stage,
505 .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
506 .srcAccessMask = slice_state->access,
507 .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT,
508 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
509 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
510 .buffer = slice_state->buf,
514 vk->CmdPipelineBarrier2(exec->
buf, &(VkDependencyInfo) {
515 .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
516 .pImageMemoryBarriers = img_bar,
517 .imageMemoryBarrierCount = nb_img_bar,
518 .pBufferMemoryBarriers = buf_bar,
519 .bufferMemoryBarrierCount = nb_buf_bar,
521 slice_state->stage = buf_bar[0].dstStageMask;
522 slice_state->access = buf_bar[0].dstAccessMask;
526 vk->CmdDispatch(exec->
buf,
f->num_h_slices,
f->num_v_slices,
535 VK_FORMAT_UNDEFINED);
537 decode_dst, decode_dst_view,
539 VK_IMAGE_LAYOUT_GENERAL,
544 0, 2*
f->slice_count*
sizeof(uint32_t),
545 VK_FORMAT_UNDEFINED);
550 VK_IMAGE_LAYOUT_GENERAL,
555 VK_SHADER_STAGE_COMPUTE_BIT,
559 buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
560 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
561 .srcStageMask = slice_state->stage,
562 .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
563 .srcAccessMask = slice_state->access,
564 .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT |
565 VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
566 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
567 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
568 .buffer = slice_state->buf,
575 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
576 VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
577 VK_ACCESS_SHADER_WRITE_BIT |
578 (!is_rgb ? VK_ACCESS_SHADER_READ_BIT : 0),
579 VK_IMAGE_LAYOUT_GENERAL,
580 VK_QUEUE_FAMILY_IGNORED);
583 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
584 VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
585 VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
586 VK_IMAGE_LAYOUT_GENERAL,
587 VK_QUEUE_FAMILY_IGNORED);
589 vk->CmdPipelineBarrier2(exec->
buf, &(VkDependencyInfo) {
590 .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
591 .pImageMemoryBarriers = img_bar,
592 .imageMemoryBarrierCount = nb_img_bar,
593 .pBufferMemoryBarriers = buf_bar,
594 .bufferMemoryBarrierCount = nb_buf_bar,
596 slice_state->stage = buf_bar[0].dstStageMask;
597 slice_state->access = buf_bar[0].dstAccessMask;
601 vk->CmdDispatch(exec->
buf,
f->num_h_slices,
f->num_v_slices, 1);
616 int smp_bits = use32bit ? 32 : 16;
624 GLSLF(0, #define
TYPE int%i_t ,smp_bits);
625 GLSLF(0, #define VTYPE2
i%ivec2 ,smp_bits);
626 GLSLF(0, #define VTYPE3
i%ivec3 ,smp_bits);
640 void *spv_opaque =
NULL;
643 VK_SHADER_STAGE_COMPUTE_BIT,
644 (
const char *[]) {
"GL_EXT_buffer_reference",
645 "GL_EXT_buffer_reference2" }, 2,
660 .
name =
"rangecoder_static_buf",
661 .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
662 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
663 .mem_layout =
"scalar",
664 .buf_content =
"uint8_t zero_one_state[512];",
667 .name =
"crc_ieee_buf",
668 .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
669 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
670 .mem_layout =
"scalar",
671 .buf_content =
"uint32_t crc_ieee[256];",
675 .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
676 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
677 .mem_layout =
"scalar",
678 .buf_content =
"int16_t quant_table[MAX_QUANT_TABLES]"
679 "[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];",
689 .
name =
"slice_data_buf",
690 .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
691 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
692 .buf_content =
"SliceContext slice_ctx",
693 .buf_elems =
f->max_slice_count,
696 .name =
"slice_offsets_buf",
697 .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
698 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
699 .mem_quali =
"readonly",
700 .buf_content =
"uint32_t slice_offsets",
701 .buf_elems = 2*
f->max_slice_count,
704 .name =
"slice_status_buf",
705 .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
706 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
707 .mem_quali =
"writeonly",
708 .buf_content =
"uint32_t slice_status",
709 .buf_elems = 2*
f->max_slice_count,
738 void *spv_opaque =
NULL;
739 int wg_dim =
FFMIN(
s->props.properties.limits.maxComputeWorkGroupSize[0], 1024);
742 VK_SHADER_STAGE_COMPUTE_BIT,
743 (
const char *[]) {
"GL_EXT_buffer_reference",
744 "GL_EXT_buffer_reference2" }, 2,
754 GLSLC(0,
layout(push_constant, scalar) uniform pushConstants { );
756 GLSLC(1, u8buf slice_state; );
757 GLSLC(1, uint plane_state_size; );
758 GLSLC(1, uint8_t codec_planes; );
759 GLSLC(1, uint8_t key_frame; );
761 GLSLC(1, uint8_t micro_version; );
762 GLSLC(1, uint8_t padding[1]; );
765 VK_SHADER_STAGE_COMPUTE_BIT);
773 .
name =
"rangecoder_static_buf",
774 .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
775 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
776 .mem_layout =
"scalar",
777 .buf_content =
"uint8_t zero_one_state[512];",
781 .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
782 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
783 .mem_layout =
"scalar",
784 .buf_content =
"int16_t quant_table[MAX_QUANT_TABLES]"
785 "[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];",
796 .
name =
"slice_data_buf",
797 .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
798 .mem_quali =
"readonly",
799 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
800 .buf_content =
"SliceContext slice_ctx",
801 .buf_elems =
f->max_slice_count,
826 int use32bit,
int ac,
int rgb)
833 void *spv_opaque =
NULL;
835 s->driver_props.driverID == VK_DRIVER_ID_MESA_RADV;
838 VK_SHADER_STAGE_COMPUTE_BIT,
839 (
const char *[]) {
"GL_EXT_buffer_reference",
840 "GL_EXT_buffer_reference2" }, 2,
850 if (use_cached_reader)
864 .
name =
"rangecoder_static_buf",
865 .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
866 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
867 .mem_layout =
"scalar",
868 .buf_content =
"uint8_t zero_one_state[512];",
872 .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
873 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
874 .mem_layout =
"scalar",
875 .buf_content =
"int16_t quant_table[MAX_QUANT_TABLES]"
876 "[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];",
888 .
name =
"slice_data_buf",
889 .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
890 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
891 .buf_content =
"SliceContext slice_ctx",
892 .buf_elems =
f->max_slice_count,
896 .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
901 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
904 .name =
"slice_status_buf",
905 .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
906 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
907 .mem_quali =
"writeonly",
908 .buf_content =
"uint32_t slice_status",
909 .buf_elems = 2*
f->max_slice_count,
913 .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
917 .mem_quali =
"writeonly",
919 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
954 frames_ctx->
width =
s->frames->width;
957 vk_frames = frames_ctx->
hwctx;
958 vk_frames->
tiling = VK_IMAGE_TILING_OPTIMAL;
959 vk_frames->
img_flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT;
960 vk_frames->
usage = VK_IMAGE_USAGE_STORAGE_BIT |
961 VK_IMAGE_USAGE_TRANSFER_DST_BIT;
980 for (
int i = 0;
i < 2;
i++)
983 for (
int i = 0;
i < 2;
i++)
986 for (
int i = 0;
i < 2;
i++)
987 for (
int j = 0; j < 2; j++)
988 for (
int k = 0; k < 2; k++)
1009 if (
f->version < 3 ||
1010 (
f->version == 4 &&
f->micro_version > 3))
1013 spv = ff_vk_spirv_init();
1033 for (
int i = 0;
i < 2;
i++) {
1046 for (
int i = 0;
i < 2;
i++) {
1054 for (
int i = 0;
i < 2;
i++) {
1055 for (
int j = 0; j < 2; j++) {
1056 for (
int k = 0; k < 2; k++) {
1096 &fv->
setup, 0, 0, 0,
1099 VK_FORMAT_UNDEFINED));
1101 &fv->
setup, 0, 1, 0,
1104 VK_FORMAT_UNDEFINED));
1107 for (
int i = 0;
i < 2;
i++) {
1108 for (
int j = 0; j < 2; j++) {
1109 for (
int k = 0; k < 2; k++) {
1111 &fv->
decode[
i][j][k], 0, 0, 0,
1114 VK_FORMAT_UNDEFINED));
1116 &fv->
decode[
i][j][k], 0, 1, 0,
1119 VK_FORMAT_UNDEFINED));
1140 if (!(slice_status->
flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
1141 VkMappedMemoryRange invalidate_data = {
1142 .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
1143 .memory = slice_status->
mem,
1145 .size = 2*fp->
slice_num*
sizeof(uint32_t),
1148 1, &invalidate_data);
1152 uint32_t crc_res = 0;
1168 .
p.
name =
"ffv1_vulkan",