40 #define RGB_LINECACHE 4
160 AVFrame *enc_in, VkImageView *enc_in_views,
161 FFVkBuffer *slice_data_buf, uint32_t slice_data_size,
172 0, slice_data_size*
f->slice_count,
173 VK_FORMAT_UNDEFINED);
175 enc_in, enc_in_views,
177 VK_IMAGE_LAYOUT_GENERAL,
182 VK_SHADER_STAGE_COMPUTE_BIT,
191 AVFrame *enc_in, VkImageView *enc_in_views,
201 enc_in, enc_in_views,
203 VK_IMAGE_LAYOUT_GENERAL,
208 0, fltmap_size*
f->slice_count,
209 VK_FORMAT_UNDEFINED);
213 VK_SHADER_STAGE_COMPUTE_BIT,
222 AVFrame *enc_in, VkImageView *enc_in_views,
232 enc_in, enc_in_views,
234 VK_IMAGE_LAYOUT_GENERAL,
239 0, units_size*
f->slice_count,
240 VK_FORMAT_UNDEFINED);
244 VK_SHADER_STAGE_COMPUTE_BIT,
266 uint32_t plane_state_size;
267 uint32_t slice_state_size;
268 uint32_t slice_data_size;
273 uint32_t remap_data_size = 0;
279 int has_inter = avctx->
gop_size > 1;
280 uint32_t context_count =
f->context_count[
f->context_model];
282 VkImageMemoryBarrier2 img_bar[37];
284 VkBufferMemoryBarrier2 buf_bar[8];
288 f->cur_enc_frame = pict;
297 f->slice_count =
f->max_slice_count;
301 plane_state_size = 8;
305 plane_state_size *= context_count;
306 slice_state_size = plane_state_size*
f->plane_count;
308 slice_data_size = 256;
309 slice_state_size += slice_data_size;
310 slice_state_size =
FFALIGN(slice_state_size, 8);
314 if (!slice_data_ref) {
317 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
318 NULL, slice_state_size*
f->slice_count,
319 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT));
333 remap_data_size = 4*(1 <<
desc->comp[0].depth)*
sizeof(uint32_t);
338 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
339 NULL, remap_data_size*
f->slice_count,
340 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT));
349 VkMemoryPropertyFlagBits out_buf_flags;
350 if (maxsize < fv->max_heap_size) {
351 out_buf_flags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
354 out_buf_flags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
356 out_buf_flags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
362 VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
363 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
364 VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
365 NULL, maxsize, out_buf_flags));
391 .plane_state_size = plane_state_size,
392 .key_frame =
f->key_frame,
394 .micro_version =
f->micro_version,
400 .slice_size_max = out_data_buf->
size /
f->slice_count,
404 for (
int i = 0;
i <
f->quant_table_count;
i++) {
407 f->quant_tables[
i][4][127];
414 memcpy(pd.
fmt_lut, (
int [4]) { 2, 1, 0, 3 }, 4*
sizeof(
int));
433 remap_data_ref =
NULL;
437 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
438 VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
441 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
442 VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
447 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
448 VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
449 VK_ACCESS_SHADER_READ_BIT,
450 VK_IMAGE_LAYOUT_GENERAL,
451 VK_QUEUE_FAMILY_IGNORED);
453 vk->CmdPipelineBarrier2(exec->
buf, &(VkDependencyInfo) {
454 .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
455 .pImageMemoryBarriers = img_bar,
456 .imageMemoryBarrierCount = nb_img_bar,
464 slice_data_buf, slice_data_size, &pd));
468 COMPUTE_SHADER_BIT, SHADER_READ_BIT, SHADER_WRITE_BIT,
469 COMPUTE_SHADER_BIT, SHADER_READ_BIT, SHADER_WRITE_BIT,
470 0, slice_data_size*
f->slice_count);
476 remap_data_buf, remap_data_size, &pd));
479 remap_data_buf, remap_data_size, &pd));
484 COMPUTE_SHADER_BIT, SHADER_READ_BIT, SHADER_WRITE_BIT,
485 COMPUTE_SHADER_BIT, SHADER_READ_BIT, SHADER_WRITE_BIT,
486 0, remap_data_size*
f->slice_count);
490 vk->CmdPipelineBarrier2(exec->
buf, &(VkDependencyInfo) {
491 .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
492 .pBufferMemoryBarriers = buf_bar,
493 .bufferMemoryBarrierCount = nb_buf_bar,
502 0, slice_data_size*
f->slice_count,
503 VK_FORMAT_UNDEFINED);
508 0, remap_data_size*
f->slice_count,
509 VK_FORMAT_UNDEFINED);
513 VK_SHADER_STAGE_COMPUTE_BIT,
521 vkf->
layout[0] = VK_IMAGE_LAYOUT_UNDEFINED;
522 vkf->
access[0] = VK_ACCESS_2_NONE;
529 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
530 VK_PIPELINE_STAGE_2_CLEAR_BIT,
531 VK_ACCESS_2_TRANSFER_WRITE_BIT,
532 VK_IMAGE_LAYOUT_GENERAL,
533 VK_QUEUE_FAMILY_IGNORED);
534 vk->CmdPipelineBarrier2(exec->
buf, &(VkDependencyInfo) {
535 .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
536 .pImageMemoryBarriers = img_bar,
537 .imageMemoryBarrierCount = nb_img_bar,
541 vk->CmdClearColorImage(exec->
buf, vkf->
img[0], VK_IMAGE_LAYOUT_GENERAL,
542 &((VkClearColorValue) { 0 }),
543 1, &((VkImageSubresourceRange) {
544 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
555 0, slice_data_size*
f->slice_count,
556 VK_FORMAT_UNDEFINED);
562 VK_FORMAT_UNDEFINED);
566 VK_SHADER_STAGE_COMPUTE_BIT,
575 COMPUTE_SHADER_BIT, SHADER_WRITE_BIT, NONE_KHR,
576 COMPUTE_SHADER_BIT, SHADER_READ_BIT, SHADER_WRITE_BIT,
577 0, slice_data_size*
f->slice_count);
583 COMPUTE_SHADER_BIT, SHADER_READ_BIT, SHADER_WRITE_BIT,
584 COMPUTE_SHADER_BIT, SHADER_READ_BIT, NONE_KHR,
585 0, remap_data_size*
f->slice_count);
588 COMPUTE_SHADER_BIT, SHADER_WRITE_BIT, NONE_KHR,
589 COMPUTE_SHADER_BIT, SHADER_READ_BIT, SHADER_WRITE_BIT,
590 slice_data_size*
f->slice_count, VK_WHOLE_SIZE);
593 COMPUTE_SHADER_BIT, SHADER_READ_BIT, SHADER_WRITE_BIT,
594 COMPUTE_SHADER_BIT, SHADER_READ_BIT, SHADER_WRITE_BIT,
595 slice_data_size*
f->slice_count, VK_WHOLE_SIZE);
598 fv->
optimize_rct ? VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT :
599 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
600 VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
601 VK_ACCESS_SHADER_READ_BIT,
602 VK_IMAGE_LAYOUT_GENERAL,
603 VK_QUEUE_FAMILY_IGNORED);
607 VK_PIPELINE_STAGE_2_CLEAR_BIT,
608 VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
609 VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
610 VK_IMAGE_LAYOUT_GENERAL,
611 VK_QUEUE_FAMILY_IGNORED);
613 vk->CmdPipelineBarrier2(exec->
buf, &(VkDependencyInfo) {
614 .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
615 .pImageMemoryBarriers = img_bar,
616 .imageMemoryBarrierCount = nb_img_bar,
617 .pBufferMemoryBarriers = buf_bar,
618 .bufferMemoryBarrierCount = nb_buf_bar,
627 0, slice_data_size*
f->slice_count,
628 VK_FORMAT_UNDEFINED);
632 fd->
idx*
f->max_slice_count*
sizeof(uint32_t),
633 f->slice_count*
sizeof(uint32_t),
634 VK_FORMAT_UNDEFINED);
640 VK_FORMAT_UNDEFINED);
644 VK_IMAGE_LAYOUT_GENERAL,
650 VK_IMAGE_LAYOUT_GENERAL,
656 0, remap_data_size*
f->slice_count,
657 VK_FORMAT_UNDEFINED);
661 VK_SHADER_STAGE_COMPUTE_BIT,
685 VkBufferCopy *buf_regions,
int nb_regions,
699 VkBufferMemoryBarrier2 buf_bar[8];
703 VK_BUFFER_USAGE_TRANSFER_DST_BIT);
721 COMPUTE_SHADER_BIT, SHADER_WRITE_BIT, NONE_KHR,
722 TRANSFER_BIT, TRANSFER_READ_BIT, NONE_KHR,
724 vk->CmdPipelineBarrier2(exec->
buf, &(VkDependencyInfo) {
725 .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
726 .pBufferMemoryBarriers = buf_bar,
727 .bufferMemoryBarrierCount = nb_buf_bar,
731 for (
int i = 0;
i < nb_regions;
i++)
732 buf_regions[
i].dstOffset += mapped_buf->virtual_offset;
734 vk->CmdCopyBuffer(exec->
buf,
735 out_data_buf->
buf, mapped_buf->buf,
736 nb_regions, buf_regions);
759 uint32_t slice_size_max = out_data_buf->
size /
f->slice_count;
765 uint32_t rb_off = fd->
idx*
f->max_slice_count*
sizeof(uint32_t);
767 VkMappedMemoryRange invalidate_data = {
768 .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
771 .size =
f->slice_count*
sizeof(uint32_t),
774 1, &invalidate_data);
780 for (
int i = 0;
i <
f->slice_count;
i++) {
781 uint32_t sl_len =
AV_RN32(rb +
i*4);
785 .srcOffset =
i*slice_size_max,
817 if (!(out_data_buf->
flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
818 VkMappedMemoryRange invalidate_data = {
819 .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
820 .memory = out_data_buf->
mem,
822 .size = VK_WHOLE_SIZE,
825 1, &invalidate_data);
829 for (
int i = 0;
i <
f->slice_count;
i++) {
831 memcpy(
pkt->
data + region->dstOffset,
913 vk_frames = frames_ctx->
hwctx;
914 vk_frames->
tiling = VK_IMAGE_TILING_OPTIMAL;
915 vk_frames->
usage = VK_IMAGE_USAGE_STORAGE_BIT |
916 VK_IMAGE_USAGE_TRANSFER_DST_BIT;
917 vk_frames->
img_flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT;
937 (uint32_t []) { 32, 32, 1 }, 0);
940 VK_SHADER_STAGE_COMPUTE_BIT);
944 .
type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
945 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
952 .
type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
953 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
956 .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
957 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
981 (uint32_t []) { wg_x, 1, 1 }, 0);
984 VK_SHADER_STAGE_COMPUTE_BIT);
988 .
type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
989 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
996 .
type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
997 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
1000 .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
1001 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
1005 .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
1006 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
1028 (uint32_t []) { 32, 32, 1 }, 0);
1031 VK_SHADER_STAGE_COMPUTE_BIT);
1035 .
type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
1036 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
1043 .
type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
1044 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
1047 .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
1048 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
1052 .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
1053 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
1075 (uint32_t []) { 1, 1, 1 }, 0);
1078 VK_SHADER_STAGE_COMPUTE_BIT);
1082 .
type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
1083 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
1090 .
type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
1091 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
1094 .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
1095 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
1116 int wg_dim =
FFMIN(fv->
s.
props.properties.limits.maxComputeWorkGroupSize[0], 1024);
1119 (uint32_t []) { wg_dim, 1, 1 }, 0);
1122 VK_SHADER_STAGE_COMPUTE_BIT);
1126 .
type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
1127 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
1134 .
type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
1135 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
1138 .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
1139 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
1168 (uint32_t []) { wg_x, 1, 1 }, 0);
1171 VK_SHADER_STAGE_COMPUTE_BIT);
1175 .
type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
1176 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
1179 .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
1180 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
1183 .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
1184 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
1191 .
type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
1192 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
1195 .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
1196 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
1199 .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
1200 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
1203 .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
1204 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
1208 .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
1209 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
1212 .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
1213 .stages = VK_SHADER_STAGE_COMPUTE_BIT,
1217 4 + fv->
is_rgb + !!
f->remap_mode, 0, 0);
1228 }
else if (
f->remap_mode) {
1266 size_t maxsize, max_heap_size, max_host_size;
1282 f->version =
f->bayer ? 4 : 3;
1289 if (
f->bits_per_raw_sample > (
f->version > 3 ? 16 : 8)) {
1292 "forcing range coder\n");
1297 if (
f->version < 4 && avctx->
gop_size > 1) {
1308 if (
f->version == 4)
1309 f->micro_version =
f->bayer ? 10 : (
f->remap_mode ? 9 : 3);
1314 if (
f->num_h_slices <= 0 &&
f->num_v_slices <= 0) {
1320 f->num_h_slices = 32;
1321 f->num_v_slices = 32;
1323 }
else if (
f->num_h_slices &&
f->num_v_slices <= 0) {
1325 }
else if (
f->num_v_slices &&
f->num_h_slices <= 0) {
1329 f->num_h_slices =
FFMIN(
f->num_h_slices, avctx->
width);
1334 "by the standard is %i\n",
1339 f->max_slice_count =
f->num_h_slices *
f->num_v_slices;
1344 if (
f->version < 4) {
1345 if (((
f->chroma_h_shift > 0) && (avctx->
width % (64 <<
f->chroma_h_shift))) ||
1346 ((
f->chroma_v_shift > 0) && (avctx->
height % (64 <<
f->chroma_v_shift)))) {
1348 "dimensions is only supported in version 4 (-level 4)\n");
1354 if (
f->version < 4) {
1377 for (
int i = 0;
i < fv->
s.
mprops.memoryHeapCount;
i++) {
1378 if (fv->
s.
mprops.memoryHeaps[
i].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT)
1381 if (!(fv->
s.
mprops.memoryHeaps[
i].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT))
1382 max_host_size =
FFMAX(max_host_size,
1388 if (maxsize > fv->
s.
props_11.maxMemoryAllocationSize) {
1390 "than maximum device allocation (%zu), clipping\n",
1391 maxsize, fv->
s.
props_11.maxMemoryAllocationSize);
1392 maxsize = fv->
s.
props_11.maxMemoryAllocationSize;
1395 if (max_heap_size < maxsize) {
1397 "using host memory (slower)\n",
1401 max_heap_size = max_host_size - (max_host_size >> 1);
1404 max_heap_size = max_heap_size - (max_heap_size >> 3);
1407 av_log(avctx,
AV_LOG_INFO,
"Async buffers: %zuMiB per context, %zuMiB total, depth: %i\n",
1408 maxsize / (1024*1024),
1442 uint32_t mw = (avctx->
width +
f->num_h_slices - 1) /
f->num_h_slices;
1443 uint32_t
mh = (avctx->
height +
f->num_v_slices - 1) /
f->num_v_slices;
1446 uint32_t pn = mw*
mh;
1474 if (
f->remap_mode) {
1509 &fv->
setup, 0, 0, 0,
1511 256*
sizeof(uint32_t), 512*
sizeof(uint8_t),
1512 VK_FORMAT_UNDEFINED));
1518 256*
sizeof(uint32_t), 512*
sizeof(uint8_t),
1519 VK_FORMAT_UNDEFINED));
1523 256*
sizeof(uint32_t) + 512*
sizeof(uint8_t),
1525 VK_FORMAT_UNDEFINED));
1529 0, 256*
sizeof(uint32_t),
1530 VK_FORMAT_UNDEFINED));
1553 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
1554 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
1555 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
1604 #define OFFSET(x) offsetof(VulkanEncodeFFv1Context, x)
1605 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
1608 { .i64 = -1 }, -1, 2,
VE },
1610 { .i64 = 0 }, 0, 1,
VE },
1620 { .i64 = -1 }, -1, 2,
VE , .unit =
"qtable"},
1624 { .i64 =
QTABLE_8BIT }, INT_MIN, INT_MAX,
VE, .unit =
"qtable" },
1634 { .i64 = 0 }, 0, 1,
VE },
1636 {
"rct_search",
"Run a search for RCT parameters (level 4 only)",
OFFSET(optimize_rct),
AV_OPT_TYPE_BOOL,
1637 { .i64 = 1 }, 0, 1,
VE },
1640 { .i64 = 1 }, 1, INT_MAX,
VE },
1643 { .i64 = -1 }, -1, 2,
VE, .unit =
"remap_mode" },
1645 { .i64 = -1 }, INT_MIN, INT_MAX,
VE, .unit =
"remap_mode" },
1647 { .i64 = 0 }, INT_MIN, INT_MAX,
VE, .unit =
"remap_mode" },
1649 { .i64 = 1 }, INT_MIN, INT_MAX,
VE, .unit =
"remap_mode" },
1651 { .i64 = 2 }, INT_MIN, INT_MAX,
VE, .unit =
"remap_mode" },
1674 .
p.
name =
"ffv1_vulkan",
1692 .p.wrapper_name =
"vulkan",