From b7844ef307e25b4f3a21f1895fe93dd549a2210a Mon Sep 17 00:00:00 2001 From: Seungha Yang Date: Sat, 27 Apr 2024 23:34:35 +0900 Subject: [PATCH] d3d12decoder: Remove CPU-side waiting Sets decoder command queue's fence to memory instead of waiting from decoder's output thread. CPU-side waiting will happen only if download is required. Part-of: --- .../sys/d3d12/gstd3d12decoder.cpp | 39 +++++++++++++------ 1 file changed, 28 insertions(+), 11 deletions(-) diff --git a/subprojects/gst-plugins-bad/sys/d3d12/gstd3d12decoder.cpp b/subprojects/gst-plugins-bad/sys/d3d12/gstd3d12decoder.cpp index 7ad41da61c..fe55884dc2 100644 --- a/subprojects/gst-plugins-bad/sys/d3d12/gstd3d12decoder.cpp +++ b/subprojects/gst-plugins-bad/sys/d3d12/gstd3d12decoder.cpp @@ -445,7 +445,7 @@ gst_d3d12_decoder_open (GstD3D12Decoder * decoder, GstElement * element) desc.Type = D3D12_COMMAND_LIST_TYPE_VIDEO_DECODE; desc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE; cmd->queue = gst_d3d12_command_queue_new (cmd->device.Get (), &desc, - D3D12_FENCE_FLAG_NONE, 4); + D3D12_FENCE_FLAG_NONE, ASYNC_DEPTH * 2); if (!cmd->queue) { GST_ERROR_OBJECT (element, "Couldn't create command queue"); return FALSE; @@ -1299,6 +1299,17 @@ gst_d3d12_decoder_end_picture (GstD3D12Decoder * decoder, } decoder_pic->fence_val = priv->cmd->fence_val; + auto fence_handle = + gst_d3d12_command_queue_get_fence_handle (priv->cmd->queue); + dmem = (GstD3D12Memory *) gst_buffer_peek_memory (decoder_pic->buffer, 0); + gst_d3d12_memory_set_external_fence (dmem, + fence_handle, priv->cmd->fence_val); + if (decoder_pic->output_buffer) { + dmem = (GstD3D12Memory *) + gst_buffer_peek_memory (decoder_pic->output_buffer, 0); + gst_d3d12_memory_set_external_fence (dmem, + fence_handle, priv->cmd->fence_val); + } GstD3D12FenceData *fence_data; gst_d3d12_fence_data_pool_acquire (priv->fence_data_pool, &fence_data); @@ -1519,18 +1530,28 @@ gst_d3d12_decoder_process_output (GstD3D12Decoder * self, } guint64 copy_fence_val = 0; + GstD3D12FenceData *fence_data = nullptr; D3D12_COMMAND_LIST_TYPE queue_type = D3D12_COMMAND_LIST_TYPE_COPY; - if (out_resource) + if (out_resource) { queue_type = D3D12_COMMAND_LIST_TYPE_DIRECT; + gst_d3d12_fence_data_pool_acquire (priv->fence_data_pool, &fence_data); + gst_d3d12_fence_data_add_notify_mini_object (fence_data, + gst_buffer_ref (buffer)); + } + + auto fence_handle = + gst_d3d12_command_queue_get_fence_handle (priv->cmd->queue); gst_d3d12_device_copy_texture_region (self->device, copy_args.size (), - copy_args.data (), nullptr, nullptr, 0, queue_type, ©_fence_val); - gst_d3d12_device_fence_wait (self->device, queue_type, - copy_fence_val, priv->copy_event_handle); + copy_args.data (), fence_data, fence_handle, decoder_pic->fence_val, + queue_type, ©_fence_val); if (!out_resource) { guint8 *map_data; GstVideoFrame vframe; + gst_d3d12_device_fence_wait (self->device, queue_type, + copy_fence_val, priv->copy_event_handle); + hr = priv->session->staging->Map (0, nullptr, (void **) &map_data); if (!gst_d3d12_result (hr, self->device)) { ret = GST_FLOW_ERROR; @@ -1555,6 +1576,8 @@ gst_d3d12_decoder_process_output (GstD3D12Decoder * self, priv->session->staging->Unmap (0, nullptr); gst_video_frame_unmap (&vframe); + } else { + gst_d3d12_buffer_after_write (frame->output_buffer, copy_fence_val); } } @@ -1592,8 +1615,6 @@ gst_d3d12_decoder_output_loop (GstD3D12Decoder * self) GST_DEBUG_OBJECT (self, "Entering output thread"); - HANDLE event_handle = CreateEventEx (nullptr, nullptr, 0, EVENT_ALL_ACCESS); - while (true) { DecoderOutputData output_data; { @@ -1613,8 +1634,6 @@ gst_d3d12_decoder_output_loop (GstD3D12Decoder * self) auto decoder_pic = get_decoder_picture (output_data.picture); g_assert (decoder_pic); - gst_d3d12_command_queue_fence_wait (priv->cmd->queue, - decoder_pic->fence_val, event_handle); if (priv->flushing) { GST_DEBUG_OBJECT (self, "Drop framem, we are flushing"); @@ -1640,8 +1659,6 @@ gst_d3d12_decoder_output_loop (GstD3D12Decoder * self) GST_DEBUG_OBJECT (self, "Leaving output thread"); - CloseHandle (event_handle); - return nullptr; }