d3d12decoder: Use flexible task queue

Instead of using fixed size command allocator array, make it
resizable.

Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/5870>
This commit is contained in:
Seungha Yang 2023-12-24 21:05:06 +09:00 committed by GStreamer Marge Bot
parent efc023e76e
commit e6bdb0458c

View file

@ -167,6 +167,15 @@ private:
bool flushing = false; bool flushing = false;
}; };
struct DecoderTaskData
{
ComPtr <ID3D12CommandAllocator> ca;
ComPtr <ID3D12Resource> bitstream;
gsize bitstream_size;
};
typedef std::shared_ptr<DecoderTaskData> DecoderTaskDataPtr;
struct GstD3D12DecoderPicture : public GstMiniObject struct GstD3D12DecoderPicture : public GstMiniObject
{ {
GstD3D12DecoderPicture (GstBuffer * dpb_buf, GstBuffer * out_buf, GstD3D12DecoderPicture (GstBuffer * dpb_buf, GstBuffer * out_buf,
@ -192,6 +201,7 @@ struct GstD3D12DecoderPicture : public GstMiniObject
ComPtr<ID3D12VideoDecoderHeap> heap; ComPtr<ID3D12VideoDecoderHeap> heap;
std::weak_ptr<GstD3D12Dpb> dpb; std::weak_ptr<GstD3D12Dpb> dpb;
guint64 fence_val = 0; guint64 fence_val = 0;
DecoderTaskDataPtr task_data;
guint8 view_id; guint8 view_id;
}; };
@ -211,13 +221,6 @@ DEFINE_ENUM_FLAG_OPERATORS (GstD3D12DecoderOutputType);
constexpr UINT64 ASYNC_DEPTH = 4; constexpr UINT64 ASYNC_DEPTH = 4;
struct DecoderTaskData
{
ComPtr <ID3D12CommandAllocator> ca;
ComPtr <ID3D12Resource> bitstream;
gsize bitstream_size;
};
struct DecoderCmdData struct DecoderCmdData
{ {
DecoderCmdData () DecoderCmdData ()
@ -249,7 +252,9 @@ struct DecoderCmdData
HANDLE event_handle; HANDLE event_handle;
UINT64 fence_val = 0; UINT64 fence_val = 0;
std::vector<DecoderTaskData> task_data; std::mutex task_data_queue_lock;
std::queue<DecoderTaskDataPtr> task_data_queue;
guint num_allocated_tasks = 0;
}; };
struct DecoderOutputData struct DecoderOutputData
@ -471,8 +476,9 @@ gst_d3d12_decoder_open (GstD3D12Decoder * decoder, GstElement * element)
return FALSE; return FALSE;
} }
cmd->task_data.resize (ASYNC_DEPTH); /* Preallocate command allocators, but we can allocate additional command
for (size_t i = 0; i < cmd->task_data.size (); i++) { * allocators later */
for (size_t i = 0; i < ASYNC_DEPTH; i++) {
ComPtr < ID3D12CommandAllocator > ca; ComPtr < ID3D12CommandAllocator > ca;
hr = cmd->device->CreateCommandAllocator hr = cmd->device->CreateCommandAllocator
(D3D12_COMMAND_LIST_TYPE_VIDEO_DECODE, IID_PPV_ARGS (&ca)); (D3D12_COMMAND_LIST_TYPE_VIDEO_DECODE, IID_PPV_ARGS (&ca));
@ -481,7 +487,10 @@ gst_d3d12_decoder_open (GstD3D12Decoder * decoder, GstElement * element)
return FALSE; return FALSE;
} }
cmd->task_data[i].ca = ca; auto task_data = std::make_shared < DecoderTaskData > ();
task_data->ca = ca;
cmd->task_data_queue.push (task_data);
cmd->num_allocated_tasks++;
} }
priv->cmd = std::move (cmd); priv->cmd = std::move (cmd);
@ -1021,16 +1030,15 @@ gst_d3d12_decoder_start_picture (GstD3D12Decoder * decoder,
static gboolean static gboolean
gst_d3d12_decoder_upload_bitstream (GstD3D12Decoder * self, gpointer data, gst_d3d12_decoder_upload_bitstream (GstD3D12Decoder * self, gpointer data,
gsize size, DecoderTaskData & task) gsize size, DecoderTaskDataPtr task)
{ {
auto priv = self->priv; auto priv = self->priv;
HRESULT hr; HRESULT hr;
D3D12_RANGE range = { 0, size };
if (task.bitstream && task.bitstream_size < size) if (task->bitstream && task->bitstream_size < size)
task.bitstream = nullptr; task->bitstream = nullptr;
if (!task.bitstream) { if (!task->bitstream) {
ComPtr < ID3D12Resource > bitstream; ComPtr < ID3D12Resource > bitstream;
size_t alloc_size = GST_ROUND_UP_128 (size) + 1024; size_t alloc_size = GST_ROUND_UP_128 (size) + 1024;
@ -1048,19 +1056,22 @@ gst_d3d12_decoder_upload_bitstream (GstD3D12Decoder * self, gpointer data,
GST_LOG_OBJECT (self, "Allocated new bitstream buffer with size %" GST_LOG_OBJECT (self, "Allocated new bitstream buffer with size %"
G_GSIZE_FORMAT, size); G_GSIZE_FORMAT, size);
task.bitstream = bitstream; task->bitstream = bitstream;
task.bitstream_size = alloc_size; task->bitstream_size = alloc_size;
} }
gpointer map_data; gpointer map_data;
hr = task.bitstream->Map (0, &range, &map_data); D3D12_RANGE zero_range = { 0, 0 };
hr = task->bitstream->Map (0, &zero_range, &map_data);
if (!gst_d3d12_result (hr, self->device)) { if (!gst_d3d12_result (hr, self->device)) {
GST_ERROR_OBJECT (self, "Couldn't map bitstream buffer"); GST_ERROR_OBJECT (self, "Couldn't map bitstream buffer");
return FALSE; return FALSE;
} }
memcpy (map_data, data, size); memcpy (map_data, data, size);
task.bitstream->Unmap (0, &range);
D3D12_RANGE range = { 0, size };
task->bitstream->Unmap (0, &range);
return TRUE; return TRUE;
} }
@ -1102,25 +1113,49 @@ gst_d3d12_decoder_end_picture (GstD3D12Decoder * decoder,
(GThreadFunc) gst_d3d12_decoder_output_loop, decoder); (GThreadFunc) gst_d3d12_decoder_output_loop, decoder);
} }
auto task_slot_idx = priv->cmd->fence_val % ASYNC_DEPTH; DecoderTaskDataPtr task_data;
GST_LOG_OBJECT (decoder, "Using task slot %" G_GUINT64_FORMAT, task_slot_idx); size_t free_tasks_in_queue = 0;
{
std::lock_guard < std::mutex > lk (priv->cmd->task_data_queue_lock);
if (priv->cmd->task_data_queue.empty ()) {
ComPtr < ID3D12CommandAllocator > ca;
hr = priv->cmd->device->CreateCommandAllocator
(D3D12_COMMAND_LIST_TYPE_VIDEO_DECODE, IID_PPV_ARGS (&ca));
if (!gst_d3d12_result (hr, decoder->device)) {
GST_ERROR_OBJECT (decoder, "Couldn't create command allocator");
return GST_FLOW_ERROR;
}
task_data = std::make_shared < DecoderTaskData > ();
task_data->ca = ca;
priv->cmd->num_allocated_tasks++;
GST_TRACE_OBJECT (decoder,
"Allocating new task, total allocated tasks %u",
priv->cmd->num_allocated_tasks);
} else {
free_tasks_in_queue = priv->cmd->task_data_queue.size ();
task_data = priv->cmd->task_data_queue.front ();
priv->cmd->task_data_queue.pop ();
GST_TRACE_OBJECT (decoder, "Reusing task, total allocated tasks %u",
priv->cmd->num_allocated_tasks);
}
}
auto & task_slot = priv->cmd->task_data[task_slot_idx];
if (!gst_d3d12_decoder_upload_bitstream (decoder, args->bitstream, if (!gst_d3d12_decoder_upload_bitstream (decoder, args->bitstream,
args->bitstream_size, task_slot)) { args->bitstream_size, task_data)) {
return GST_FLOW_ERROR; return GST_FLOW_ERROR;
} }
memset (&in_args, 0, sizeof (D3D12_VIDEO_DECODE_INPUT_STREAM_ARGUMENTS)); memset (&in_args, 0, sizeof (D3D12_VIDEO_DECODE_INPUT_STREAM_ARGUMENTS));
memset (&out_args, 0, sizeof (D3D12_VIDEO_DECODE_OUTPUT_STREAM_ARGUMENTS)); memset (&out_args, 0, sizeof (D3D12_VIDEO_DECODE_OUTPUT_STREAM_ARGUMENTS));
hr = task_slot.ca->Reset (); hr = task_data->ca->Reset ();
if (!gst_d3d12_result (hr, decoder->device)) { if (!gst_d3d12_result (hr, decoder->device)) {
GST_ERROR_OBJECT (decoder, "Couldn't reset command allocator"); GST_ERROR_OBJECT (decoder, "Couldn't reset command allocator");
return GST_FLOW_ERROR; return GST_FLOW_ERROR;
} }
hr = priv->cmd->cl->Reset (task_slot.ca.Get ()); hr = priv->cmd->cl->Reset (task_data->ca.Get ());
if (!gst_d3d12_result (hr, decoder->device)) { if (!gst_d3d12_result (hr, decoder->device)) {
GST_ERROR_OBJECT (decoder, "Couldn't reset command list"); GST_ERROR_OBJECT (decoder, "Couldn't reset command list");
return GST_FLOW_ERROR; return GST_FLOW_ERROR;
@ -1239,7 +1274,7 @@ gst_d3d12_decoder_end_picture (GstD3D12Decoder * decoder,
in_args.NumFrameArguments++; in_args.NumFrameArguments++;
} }
in_args.CompressedBitstream.pBuffer = task_slot.bitstream.Get (); in_args.CompressedBitstream.pBuffer = task_data->bitstream.Get ();
in_args.CompressedBitstream.Offset = 0; in_args.CompressedBitstream.Offset = 0;
in_args.CompressedBitstream.Size = args->bitstream_size; in_args.CompressedBitstream.Size = args->bitstream_size;
in_args.pHeap = decoder_pic->heap.Get (); in_args.pHeap = decoder_pic->heap.Get ();
@ -1268,6 +1303,7 @@ gst_d3d12_decoder_end_picture (GstD3D12Decoder * decoder,
} }
decoder_pic->fence_val = priv->cmd->fence_val; decoder_pic->fence_val = priv->cmd->fence_val;
decoder_pic->task_data = task_data;
return GST_FLOW_OK; return GST_FLOW_OK;
} }
@ -1577,6 +1613,12 @@ gst_d3d12_decoder_output_loop (GstD3D12Decoder * self)
WaitForSingleObjectEx (event_handle, INFINITE, FALSE); WaitForSingleObjectEx (event_handle, INFINITE, FALSE);
} }
{
std::lock_guard < std::mutex > lk (priv->cmd->task_data_queue_lock);
priv->cmd->task_data_queue.push (decoder_pic->task_data);
decoder_pic->task_data = nullptr;
}
if (priv->flushing) { if (priv->flushing) {
GST_DEBUG_OBJECT (self, "Drop framem, we are flushing"); GST_DEBUG_OBJECT (self, "Drop framem, we are flushing");
gst_codec_picture_unref (output_data.picture); gst_codec_picture_unref (output_data.picture);
@ -1639,7 +1681,7 @@ gst_d3d12_decoder_output_picture (GstD3D12Decoder * decoder,
gst_queue_array_push_tail_struct (priv->session->output_queue, &output_data); gst_queue_array_push_tail_struct (priv->session->output_queue, &output_data);
priv->session->queue_cond.notify_one (); priv->session->queue_cond.notify_one ();
return GST_FLOW_OK; return priv->last_flow;
} }
gboolean gboolean