From 478e49dd73753125911724e52733ad220f07754c Mon Sep 17 00:00:00 2001 From: Seungha Yang Date: Sat, 27 Apr 2024 17:54:38 +0900 Subject: [PATCH] d3d12: Update copy_texture_region() method Pass external fence value if any and allow passing fence data so that dependent resources can be released once copy is done Part-of: --- .../gst/d3d12/gstd3d12device-private.h | 7 +++ .../gst-libs/gst/d3d12/gstd3d12device.cpp | 43 ++++++++++++++++--- .../gst-libs/gst/d3d12/gstd3d12frame.cpp | 16 +++++-- .../gst-libs/gst/d3d12/gstd3d12memory.cpp | 36 +++++++++++++--- .../sys/d3d12/gstd3d12decoder.cpp | 2 +- .../sys/d3d12/gstd3d12encoder.cpp | 3 +- .../sys/d3d12/gstd3d12ipcclient.cpp | 3 +- .../sys/dwrite/gstdwriterender_d3d12.cpp | 14 +++--- 8 files changed, 97 insertions(+), 27 deletions(-) diff --git a/subprojects/gst-plugins-bad/gst-libs/gst/d3d12/gstd3d12device-private.h b/subprojects/gst-plugins-bad/gst-libs/gst/d3d12/gstd3d12device-private.h index 8b021aa2f1..d7e176463a 100644 --- a/subprojects/gst-plugins-bad/gst-libs/gst/d3d12/gstd3d12device-private.h +++ b/subprojects/gst-plugins-bad/gst-libs/gst/d3d12/gstd3d12device-private.h @@ -39,9 +39,16 @@ GST_D3D12_API gboolean gst_d3d12_device_copy_texture_region (GstD3D12Device * device, guint num_args, const GstD3D12CopyTextureRegionArgs * args, + GstD3D12FenceData * fence_data, + ID3D12Fence * fence_to_wait, + guint64 fence_value_to_wait, D3D12_COMMAND_LIST_TYPE command_type, guint64 * fence_value); +GST_D3D12_API +gboolean gst_d3d12_device_acquire_fence_data (GstD3D12Device * device, + GstD3D12FenceData ** fence_data); + GST_D3D12_API void gst_d3d12_device_clear_yuv_texture (GstD3D12Device * device, GstMemory * mem); diff --git a/subprojects/gst-plugins-bad/gst-libs/gst/d3d12/gstd3d12device.cpp b/subprojects/gst-plugins-bad/gst-libs/gst/d3d12/gstd3d12device.cpp index e1fccb935f..2b82e490c7 100644 --- a/subprojects/gst-plugins-bad/gst-libs/gst/d3d12/gstd3d12device.cpp +++ b/subprojects/gst-plugins-bad/gst-libs/gst/d3d12/gstd3d12device.cpp @@ -120,6 +120,8 @@ struct DeviceInner gst_clear_object (©_ca_pool); gst_clear_object (©_cl_pool); + gst_clear_object (&fence_data_pool); + factory = nullptr; adapter = nullptr; @@ -222,6 +224,8 @@ struct DeviceInner GstD3D12CommandListPool *copy_cl_pool = nullptr; GstD3D12CommandAllocatorPool *copy_ca_pool = nullptr; + GstD3D12FenceDataPool *fence_data_pool = nullptr; + guint rtv_inc_size; guint adapter_index = 0; @@ -910,6 +914,8 @@ gst_d3d12_device_new_internal (const GstD3D12DeviceConstructData * data) priv->rtv_inc_size = device->GetDescriptorHandleIncrementSize (D3D12_DESCRIPTOR_HEAP_TYPE_RTV); + priv->fence_data_pool = gst_d3d12_fence_data_pool_new (); + GST_OBJECT_FLAG_SET (priv->direct_queue, GST_OBJECT_FLAG_MAY_BE_LEAKED); GST_OBJECT_FLAG_SET (priv->direct_cl_pool, GST_OBJECT_FLAG_MAY_BE_LEAKED); GST_OBJECT_FLAG_SET (priv->direct_ca_pool, GST_OBJECT_FLAG_MAY_BE_LEAKED); @@ -918,6 +924,8 @@ gst_d3d12_device_new_internal (const GstD3D12DeviceConstructData * data) GST_OBJECT_FLAG_SET (priv->copy_cl_pool, GST_OBJECT_FLAG_MAY_BE_LEAKED); GST_OBJECT_FLAG_SET (priv->copy_ca_pool, GST_OBJECT_FLAG_MAY_BE_LEAKED); + GST_OBJECT_FLAG_SET (priv->fence_data_pool, GST_OBJECT_FLAG_MAY_BE_LEAKED); + hr = device->CreateFence (0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS (&priv->dev_removed_fence)); if (FAILED (hr)) { @@ -1270,6 +1278,8 @@ gst_d3d12_device_fence_wait (GstD3D12Device * device, gboolean gst_d3d12_device_copy_texture_region (GstD3D12Device * device, guint num_args, const GstD3D12CopyTextureRegionArgs * args, + GstD3D12FenceData * fence_data, + ID3D12Fence * fence_to_wait, guint64 fence_value_to_wait, D3D12_COMMAND_LIST_TYPE command_type, guint64 * fence_value) { g_return_val_if_fail (GST_IS_D3D12_DEVICE (device), FALSE); @@ -1285,6 +1295,9 @@ gst_d3d12_device_copy_texture_region (GstD3D12Device * device, GstD3D12CommandQueue *queue = nullptr; guint64 fence_val = 0; + if (!fence_data) + gst_d3d12_fence_data_pool_acquire (priv->fence_data_pool, &fence_data); + switch (command_type) { case D3D12_COMMAND_LIST_TYPE_DIRECT: queue = priv->direct_queue; @@ -1299,21 +1312,25 @@ gst_d3d12_device_copy_texture_region (GstD3D12Device * device, default: GST_ERROR_OBJECT (device, "Not supported command list type %d", command_type); + gst_d3d12_fence_data_unref (fence_data); return FALSE; } gst_d3d12_command_allocator_pool_acquire (ca_pool, &gst_ca); if (!gst_ca) { GST_ERROR_OBJECT (device, "Couldn't acquire command allocator"); + gst_d3d12_fence_data_unref (fence_data); return FALSE; } + gst_d3d12_fence_data_add_notify_mini_object (fence_data, gst_ca); + auto ca = gst_d3d12_command_allocator_get_handle (gst_ca); gst_d3d12_command_list_pool_acquire (cl_pool, ca, &gst_cl); if (!gst_cl) { GST_ERROR_OBJECT (device, "Couldn't acquire command list"); - gst_clear_d3d12_command_allocator (&gst_ca); + gst_d3d12_fence_data_unref (fence_data); return FALSE; } @@ -1333,23 +1350,23 @@ gst_d3d12_device_copy_texture_region (GstD3D12Device * device, if (!gst_d3d12_result (hr, device)) { GST_ERROR_OBJECT (device, "Couldn't close command list"); gst_clear_d3d12_command_list (&gst_cl); - gst_clear_d3d12_command_allocator (&gst_ca); + gst_d3d12_fence_data_unref (fence_data); return FALSE; } ID3D12CommandList *cmd_list[] = { cl.Get () }; - hr = gst_d3d12_command_queue_execute_command_lists (queue, - 1, cmd_list, &fence_val); + hr = gst_d3d12_command_queue_execute_wait_and_command_lists (queue, + fence_to_wait, fence_value_to_wait, 1, cmd_list, &fence_val); auto ret = gst_d3d12_result (hr, device); /* We can release command list since command list pool will hold it */ gst_d3d12_command_list_unref (gst_cl); if (ret) { - gst_d3d12_command_queue_set_notify (queue, fence_val, gst_ca, - (GDestroyNotify) gst_d3d12_command_allocator_unref); + gst_d3d12_command_queue_set_notify (queue, fence_val, fence_data, + (GDestroyNotify) gst_d3d12_fence_data_unref); } else { - gst_d3d12_command_allocator_unref (gst_ca); + gst_d3d12_fence_data_unref (fence_data); } if (fence_value) @@ -1358,6 +1375,18 @@ gst_d3d12_device_copy_texture_region (GstD3D12Device * device, return ret; } +gboolean +gst_d3d12_device_acquire_fence_data (GstD3D12Device * device, + GstD3D12FenceData ** fence_data) +{ + g_return_val_if_fail (GST_IS_D3D12_DEVICE (device), FALSE); + g_return_val_if_fail (fence_data, FALSE); + + auto priv = device->priv->inner; + + return gst_d3d12_fence_data_pool_acquire (priv->fence_data_pool, fence_data); +} + static inline GstDebugLevel d3d12_message_severity_to_gst (D3D12_MESSAGE_SEVERITY level) { diff --git a/subprojects/gst-plugins-bad/gst-libs/gst/d3d12/gstd3d12frame.cpp b/subprojects/gst-plugins-bad/gst-libs/gst/d3d12/gstd3d12frame.cpp index 1eecd0358f..4922897f37 100644 --- a/subprojects/gst-plugins-bad/gst-libs/gst/d3d12/gstd3d12frame.cpp +++ b/subprojects/gst-plugins-bad/gst-libs/gst/d3d12/gstd3d12frame.cpp @@ -332,9 +332,14 @@ gst_d3d12_frame_copy (GstD3D12Frame * dest, const GstD3D12Frame * src, args[i].src_box = &src_box[i]; } + GstD3D12FenceData *fence_data; + gst_d3d12_device_acquire_fence_data (dest->device, &fence_data); + gst_d3d12_fence_data_add_notify_mini_object (fence_data, + gst_buffer_ref (src->buffer)); + return gst_d3d12_device_copy_texture_region (dest->device, - GST_VIDEO_INFO_N_PLANES (&dest->info), args, - D3D12_COMMAND_LIST_TYPE_DIRECT, fence_value); + GST_VIDEO_INFO_N_PLANES (&dest->info), args, fence_data, + nullptr, 0, D3D12_COMMAND_LIST_TYPE_DIRECT, fence_value); } /** @@ -373,6 +378,11 @@ gst_d3d12_frame_copy_plane (GstD3D12Frame * dest, const GstD3D12Frame * src, gst_d3d12_frame_build_copy_args (dest, src, plane, &args, &src_box); args.src_box = &src_box; + GstD3D12FenceData *fence_data; + gst_d3d12_device_acquire_fence_data (dest->device, &fence_data); + gst_d3d12_fence_data_add_notify_mini_object (fence_data, + gst_buffer_ref (src->buffer)); + return gst_d3d12_device_copy_texture_region (dest->device, 1, &args, - D3D12_COMMAND_LIST_TYPE_DIRECT, fence_value); + fence_data, nullptr, 0, D3D12_COMMAND_LIST_TYPE_DIRECT, fence_value); } diff --git a/subprojects/gst-plugins-bad/gst-libs/gst/d3d12/gstd3d12memory.cpp b/subprojects/gst-plugins-bad/gst-libs/gst/d3d12/gstd3d12memory.cpp index d0feece58e..f929a999d9 100644 --- a/subprojects/gst-plugins-bad/gst-libs/gst/d3d12/gstd3d12memory.cpp +++ b/subprojects/gst-plugins-bad/gst-libs/gst/d3d12/gstd3d12memory.cpp @@ -479,13 +479,22 @@ gst_d3d12_memory_download (GstD3D12Memory * dmem) copy_args.push_back (args); } - gst_d3d12_memory_wait_gpu (dmem, D3D12_COMMAND_LIST_TYPE_DIRECT, - dmem->fence_value); + if (priv->external_fence) { + auto cq = gst_d3d12_device_get_command_queue (dmem->device, + D3D12_COMMAND_LIST_TYPE_COPY); + gst_d3d12_command_queue_execute_wait (cq, priv->external_fence.Get (), + priv->external_fence_val); + } + + auto cq = gst_d3d12_device_get_command_queue (dmem->device, + D3D12_COMMAND_LIST_TYPE_DIRECT); + auto direct_fence = gst_d3d12_command_queue_get_fence_handle (cq); guint64 fence_val = 0; /* Use async copy queue when downloading */ if (!gst_d3d12_device_copy_texture_region (dmem->device, copy_args.size (), - copy_args.data (), D3D12_COMMAND_LIST_TYPE_COPY, &fence_val)) { + copy_args.data (), nullptr, direct_fence, dmem->fence_value, + D3D12_COMMAND_LIST_TYPE_COPY, &fence_val)) { GST_ERROR_OBJECT (dmem->device, "Couldn't download texture to staging"); return FALSE; } @@ -521,7 +530,8 @@ gst_d3d12_memory_upload (GstD3D12Memory * dmem) } if (!gst_d3d12_device_copy_texture_region (dmem->device, copy_args.size (), - copy_args.data (), D3D12_COMMAND_LIST_TYPE_DIRECT, + copy_args.data (), nullptr, priv->external_fence.Get (), + priv->external_fence_val, D3D12_COMMAND_LIST_TYPE_DIRECT, &dmem->fence_value)) { GST_ERROR_OBJECT (dmem->device, "Couldn't upload texture"); return FALSE; @@ -1166,11 +1176,25 @@ gst_d3d12_memory_copy (GstMemory * mem, gssize offset, gssize size) mem_priv->subresource_index[i]); copy_args.push_back (args); } + gst_memory_unmap (mem, &info); + + ComPtr < ID3D12Fence > fence_to_wait; + guint64 fence_value_to_wait; + { + std::lock_guard < std::mutex > lk (mem_priv->lock); + fence_to_wait = mem_priv->external_fence; + fence_value_to_wait = mem_priv->external_fence_val; + } + + GstD3D12FenceData *fence_data; + gst_d3d12_device_acquire_fence_data (dmem->device, &fence_data); + gst_d3d12_fence_data_add_notify_mini_object (fence_data, + gst_memory_ref (mem)); gst_d3d12_device_copy_texture_region (dmem->device, - copy_args.size (), copy_args.data (), D3D12_COMMAND_LIST_TYPE_DIRECT, + copy_args.size (), copy_args.data (), fence_data, fence_to_wait.Get (), + fence_value_to_wait, D3D12_COMMAND_LIST_TYPE_DIRECT, &dst_dmem->fence_value); - gst_memory_unmap (mem, &info); GST_MINI_OBJECT_FLAG_SET (dst, GST_D3D12_MEMORY_TRANSFER_NEED_DOWNLOAD); diff --git a/subprojects/gst-plugins-bad/sys/d3d12/gstd3d12decoder.cpp b/subprojects/gst-plugins-bad/sys/d3d12/gstd3d12decoder.cpp index caf82ceb94..7ad41da61c 100644 --- a/subprojects/gst-plugins-bad/sys/d3d12/gstd3d12decoder.cpp +++ b/subprojects/gst-plugins-bad/sys/d3d12/gstd3d12decoder.cpp @@ -1523,7 +1523,7 @@ gst_d3d12_decoder_process_output (GstD3D12Decoder * self, if (out_resource) queue_type = D3D12_COMMAND_LIST_TYPE_DIRECT; gst_d3d12_device_copy_texture_region (self->device, copy_args.size (), - copy_args.data (), queue_type, ©_fence_val); + copy_args.data (), nullptr, nullptr, 0, queue_type, ©_fence_val); gst_d3d12_device_fence_wait (self->device, queue_type, copy_fence_val, priv->copy_event_handle); diff --git a/subprojects/gst-plugins-bad/sys/d3d12/gstd3d12encoder.cpp b/subprojects/gst-plugins-bad/sys/d3d12/gstd3d12encoder.cpp index ee93596194..160b2ccd30 100644 --- a/subprojects/gst-plugins-bad/sys/d3d12/gstd3d12encoder.cpp +++ b/subprojects/gst-plugins-bad/sys/d3d12/gstd3d12encoder.cpp @@ -771,7 +771,8 @@ gst_d3d12_encoder_upload_frame (GstD3D12Encoder * self, GstBuffer * buffer) guint64 fence_val = 0; gst_d3d12_device_copy_texture_region (self->device, copy_args.size (), - copy_args.data (), D3D12_COMMAND_LIST_TYPE_DIRECT, &fence_val); + copy_args.data (), nullptr, nullptr, 0, D3D12_COMMAND_LIST_TYPE_DIRECT, + &fence_val); gst_d3d12_buffer_after_write (upload, fence_val); } else { GstVideoFrame src_frame, dst_frame; diff --git a/subprojects/gst-plugins-bad/sys/d3d12/gstd3d12ipcclient.cpp b/subprojects/gst-plugins-bad/sys/d3d12/gstd3d12ipcclient.cpp index e0fc737769..d5efe97cf5 100644 --- a/subprojects/gst-plugins-bad/sys/d3d12/gstd3d12ipcclient.cpp +++ b/subprojects/gst-plugins-bad/sys/d3d12/gstd3d12ipcclient.cpp @@ -513,7 +513,8 @@ gst_d3d12_ipc_client_have_data (GstD3D12IpcClient * self) guint64 copy_fence_val; gst_d3d12_device_copy_texture_region (priv->device, copy_args.size (), - copy_args.data (), D3D12_COMMAND_LIST_TYPE_DIRECT, ©_fence_val); + copy_args.data (), nullptr, nullptr, 0, D3D12_COMMAND_LIST_TYPE_DIRECT, + ©_fence_val); auto data = new GstD3D12IpcReleaseData (); data->self = (GstD3D12IpcClient *) gst_object_ref (self); diff --git a/subprojects/gst-plugins-bad/sys/dwrite/gstdwriterender_d3d12.cpp b/subprojects/gst-plugins-bad/sys/dwrite/gstdwriterender_d3d12.cpp index b793559e7e..04bc33f54d 100644 --- a/subprojects/gst-plugins-bad/sys/dwrite/gstdwriterender_d3d12.cpp +++ b/subprojects/gst-plugins-bad/sys/dwrite/gstdwriterender_d3d12.cpp @@ -363,12 +363,6 @@ gst_dwrite_d3d12_render_draw_layout (GstDWriteRender * render, args.dst.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; args.dst.pResource = texture; - gst_d3d12_device_copy_texture_region (priv->device, - 1, &args, D3D12_COMMAND_LIST_TYPE_DIRECT, &priv->fence_val); - - priv->scheduled.push (priv->fence_val); - dmem->fence_value = priv->fence_val; - GstD3D12FenceData *fence_data; gst_d3d12_fence_data_pool_acquire (priv->fence_data_pool, &fence_data); auto resource_clone = priv->layout_resource; @@ -377,8 +371,12 @@ gst_dwrite_d3d12_render_draw_layout (GstDWriteRender * render, gst_d3d12_fence_data_add_notify_com (fence_data, resource_clone.Detach ()); gst_d3d12_fence_data_add_notify_com (fence_data, wrapped_clone.Detach ()); - gst_d3d12_device_set_fence_notify (priv->device, - D3D12_COMMAND_LIST_TYPE_DIRECT, dmem->fence_value, fence_data); + gst_d3d12_device_copy_texture_region (priv->device, + 1, &args, fence_data, nullptr, 0, D3D12_COMMAND_LIST_TYPE_DIRECT, + &priv->fence_val); + + priv->scheduled.push (priv->fence_val); + dmem->fence_value = priv->fence_val; GST_MINI_OBJECT_FLAG_SET (dmem, GST_D3D12_MEMORY_TRANSFER_NEED_DOWNLOAD); GST_MINI_OBJECT_FLAG_UNSET (dmem, GST_D3D12_MEMORY_TRANSFER_NEED_UPLOAD);