d3d12: Rework command scheduling

* Use single fence object per queue and remove GstD3D12Fence
  implementation
* Add a helper method for texture copy
* Run background thread and release unused resource from the thread

Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/5870>
This commit is contained in:
Seungha Yang 2023-12-25 16:14:39 +09:00 committed by GStreamer Marge Bot
parent 6d7d9291c3
commit efc023e76e
12 changed files with 957 additions and 541 deletions

View file

@ -35,9 +35,6 @@ typedef struct _GstD3D12Device GstD3D12Device;
typedef struct _GstD3D12DeviceClass GstD3D12DeviceClass;
typedef struct _GstD3D12DevicePrivate GstD3D12DevicePrivate;
typedef struct _GstD3D12Fence GstD3D12Fence;
typedef struct _GstD3D12FencePrivate GstD3D12FencePrivate;
typedef struct _GstD3D12Memory GstD3D12Memory;
typedef struct _GstD3D12MemoryPrivate GstD3D12MemoryPrivate;

View file

@ -0,0 +1,409 @@
/* GStreamer
* Copyright (C) 2023 Seungha Yang <seungha@centricular.com>
*
* This library is free software; you cln redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "gstd3d12commandqueue.h"
#include "gstd3d12device.h"
#include "gstd3d12utils.h"
#include <wrl.h>
#include <queue>
#include <mutex>
#include <memory>
#include <condition_variable>
GST_DEBUG_CATEGORY_STATIC (gst_d3d12_command_queue_debug);
#define GST_CAT_DEFAULT gst_d3d12_command_queue_debug
/* *INDENT-OFF* */
using namespace Microsoft::WRL;
struct GCData
{
GCData (gpointer user_data, GDestroyNotify destroy_notify,
guint64 fence_value) : data (user_data),
notify(destroy_notify), fence_val (fence_value) {}
~GCData ()
{
if (notify)
notify (data);
}
gpointer data = nullptr;
GDestroyNotify notify = nullptr;
guint64 fence_val = 0;
};
typedef std::shared_ptr<GCData> GCDataPtr;
struct GstD3D12CommandQueuePrivate
{
GstD3D12CommandQueuePrivate ()
{
event_handle = CreateEventEx (nullptr, nullptr, 0, EVENT_ALL_ACCESS);
}
~GstD3D12CommandQueuePrivate ()
{
{
std::lock_guard <std::mutex> lk (lock);
shutdown = true;
cond.notify_one ();
}
g_clear_pointer (&gc_thread, g_thread_join);
auto completed = fence->GetCompletedValue ();
if (fence_val > completed) {
auto hr = fence->SetEventOnCompletion (completed, event_handle);
if (SUCCEEDED (hr))
WaitForSingleObjectEx (event_handle, INFINITE, FALSE);
}
CloseHandle (event_handle);
}
struct gc_cmp {
bool operator()(const GCDataPtr &a, const GCDataPtr &b)
{
return a->fence_val > b->fence_val;
}
};
D3D12_COMMAND_QUEUE_DESC desc;
ComPtr<ID3D12Device> device;
ComPtr<ID3D12CommandQueue> cq;
ComPtr<ID3D12Fence> fence;
HANDLE event_handle;
guint64 fence_val = 0;
GThread *gc_thread = nullptr;
std::priority_queue<GCDataPtr, std::vector<GCDataPtr>, gc_cmp> gc_list;
std::mutex execute_lock;
std::mutex lock;
std::condition_variable cond;
bool shutdown = false;
size_t queue_size = 0;
};
/* *INDENT-ON* */
struct _GstD3D12CommandQueue
{
GstObject parent;
GstD3D12CommandQueuePrivate *priv;
};
static void gst_d3d12_command_queue_finalize (GObject * object);
#define gst_d3d12_command_queue_parent_class parent_class
G_DEFINE_TYPE (GstD3D12CommandQueue, gst_d3d12_command_queue, GST_TYPE_OBJECT);
static void
gst_d3d12_command_queue_class_init (GstD3D12CommandQueueClass * klass)
{
auto object_class = G_OBJECT_CLASS (klass);
object_class->finalize = gst_d3d12_command_queue_finalize;
GST_DEBUG_CATEGORY_INIT (gst_d3d12_command_queue_debug,
"d3d12commandqueue", 0, "d3d12commandqueue");
}
static void
gst_d3d12_command_queue_init (GstD3D12CommandQueue * self)
{
self->priv = new GstD3D12CommandQueuePrivate ();
}
static void
gst_d3d12_command_queue_finalize (GObject * object)
{
auto self = GST_D3D12_COMMAND_QUEUE (object);
delete self->priv;
G_OBJECT_CLASS (parent_class)->finalize (object);
}
GstD3D12CommandQueue *
gst_d3d12_command_queue_new (GstD3D12Device * device,
const D3D12_COMMAND_QUEUE_DESC * desc, guint queue_size)
{
g_return_val_if_fail (GST_IS_D3D12_DEVICE (device), nullptr);
g_return_val_if_fail (desc, nullptr);
auto device_handle = gst_d3d12_device_get_device_handle (device);
ComPtr < ID3D12CommandQueue > cq;
auto hr = device_handle->CreateCommandQueue (desc, IID_PPV_ARGS (&cq));
if (!gst_d3d12_result (hr, device)) {
GST_ERROR_OBJECT (device, "Couldn't create command queue");
return nullptr;
}
ComPtr < ID3D12Fence > fence;
hr = device_handle->CreateFence (0,
D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS (&fence));
if (!gst_d3d12_result (hr, device)) {
GST_ERROR_OBJECT (device, "Couldn't create fence");
return nullptr;
}
auto self = (GstD3D12CommandQueue *)
g_object_new (GST_TYPE_D3D12_COMMAND_QUEUE, nullptr);
gst_object_ref_sink (self);
auto priv = self->priv;
priv->device = gst_d3d12_device_get_device_handle (device);
priv->cq = cq;
priv->fence = fence;
priv->queue_size = queue_size;
return self;
}
gboolean
gst_d3d12_command_queue_get_handle (GstD3D12CommandQueue * queue,
ID3D12CommandQueue ** handle)
{
g_return_val_if_fail (GST_IS_D3D12_COMMAND_QUEUE (queue), FALSE);
g_return_val_if_fail (handle, FALSE);
auto priv = queue->priv;
*handle = priv->cq.Get ();
(*handle)->AddRef ();
return TRUE;
}
gboolean
gst_d3d12_command_queue_get_fence (GstD3D12CommandQueue * queue,
ID3D12Fence ** handle)
{
g_return_val_if_fail (GST_IS_D3D12_COMMAND_QUEUE (queue), FALSE);
g_return_val_if_fail (handle, FALSE);
auto priv = queue->priv;
*handle = priv->fence.Get ();
(*handle)->AddRef ();
return TRUE;
}
HRESULT
gst_d3d12_command_queue_execute_command_lists (GstD3D12CommandQueue * queue,
guint num_command_lists, ID3D12CommandList ** command_lists,
guint64 * fence_value)
{
g_return_val_if_fail (GST_IS_D3D12_COMMAND_QUEUE (queue), E_INVALIDARG);
auto priv = queue->priv;
HRESULT hr;
std::lock_guard < std::mutex > lk (priv->execute_lock);
priv->fence_val++;
priv->cq->ExecuteCommandLists (num_command_lists, command_lists);
hr = priv->cq->Signal (priv->fence.Get (), priv->fence_val);
if (FAILED (hr)) {
GST_ERROR_OBJECT (queue, "Signal failed");
priv->fence_val--;
} else if (fence_value) {
*fence_value = priv->fence_val;
}
if (priv->queue_size > 0) {
auto completed = priv->fence->GetCompletedValue ();
if (completed + priv->queue_size < priv->fence_val) {
hr = priv->fence->SetEventOnCompletion (priv->fence_val -
priv->queue_size, priv->event_handle);
if (FAILED (hr)) {
GST_ERROR_OBJECT (queue, "SetEventOnCompletion failed");
return hr;
}
WaitForSingleObjectEx (priv->event_handle, INFINITE, FALSE);
}
}
return hr;
}
HRESULT
gst_d3d12_command_queue_execute_wait (GstD3D12CommandQueue * queue,
ID3D12Fence * fence, guint64 fence_value)
{
g_return_val_if_fail (GST_IS_D3D12_COMMAND_QUEUE (queue), E_INVALIDARG);
g_return_val_if_fail (fence, E_INVALIDARG);
auto priv = queue->priv;
return priv->cq->Wait (fence, fence_value);
}
guint64
gst_d3d12_command_queue_get_completed_value (GstD3D12CommandQueue * queue)
{
g_return_val_if_fail (GST_IS_D3D12_COMMAND_QUEUE (queue), G_MAXUINT64);
return queue->priv->fence->GetCompletedValue ();
}
HRESULT
gst_d3d12_command_queue_fence_wait (GstD3D12CommandQueue * queue,
guint64 fence_value, HANDLE event_handle)
{
g_return_val_if_fail (GST_IS_D3D12_COMMAND_QUEUE (queue), E_INVALIDARG);
auto priv = queue->priv;
guint64 fence_to_wait = fence_value;
HRESULT hr;
if (fence_value == G_MAXUINT64) {
std::lock_guard < std::mutex > lk (priv->execute_lock);
priv->fence_val++;
hr = priv->cq->Signal (priv->fence.Get (), priv->fence_val);
if (FAILED (hr)) {
GST_ERROR_OBJECT (queue, "Signal failed");
priv->fence_val--;
return hr;
}
fence_to_wait = priv->fence_val;
}
auto completed = priv->fence->GetCompletedValue ();
if (completed < fence_to_wait) {
bool close_handle = false;
if (!event_handle) {
event_handle = CreateEventEx (nullptr, nullptr, 0, EVENT_ALL_ACCESS);
close_handle = true;
}
hr = priv->fence->SetEventOnCompletion (fence_to_wait, event_handle);
if (FAILED (hr)) {
GST_ERROR_OBJECT (queue, "SetEventOnCompletion failed");
if (close_handle)
CloseHandle (event_handle);
return hr;
}
WaitForSingleObjectEx (event_handle, INFINITE, FALSE);
if (close_handle)
CloseHandle (event_handle);
}
return S_OK;
}
static gpointer
gst_d3d12_command_queue_gc_thread (GstD3D12CommandQueue * self)
{
auto priv = self->priv;
GST_INFO_OBJECT (self, "Entering GC thread");
HANDLE event_handle = CreateEventEx (nullptr, nullptr, 0, EVENT_ALL_ACCESS);
while (true) {
GCDataPtr gc_data;
{
std::unique_lock < std::mutex > lk (priv->lock);
while (!priv->shutdown && priv->gc_list.empty ())
priv->cond.wait (lk);
if (priv->shutdown)
break;
auto completed = priv->fence->GetCompletedValue ();
while (!priv->gc_list.empty ()) {
auto top = priv->gc_list.top ();
if (top->fence_val > completed) {
gc_data = top;
priv->gc_list.pop ();
break;
}
GST_LOG_OBJECT (self, "Releasing fence data, completed %"
G_GUINT64_FORMAT ", fence value %" G_GUINT64_FORMAT,
completed, top->fence_val);
priv->gc_list.pop ();
}
}
if (gc_data) {
GST_LOG_OBJECT (self, "Waiting for fence data %" G_GUINT64_FORMAT,
gc_data->fence_val);
auto hr =
priv->fence->SetEventOnCompletion (gc_data->fence_val, event_handle);
if (FAILED (hr)) {
GST_ERROR_OBJECT (self, "SetEventOnCompletion failed");
} else {
WaitForSingleObjectEx (event_handle, INFINITE, FALSE);
GST_LOG_OBJECT (self, "Waiting done, %" G_GUINT64_FORMAT,
gc_data->fence_val);
}
}
}
GST_INFO_OBJECT (self, "Leaving GC thread");
CloseHandle (event_handle);
return nullptr;
}
void
gst_d3d12_command_queue_set_notify (GstD3D12CommandQueue * queue,
guint64 fence_value, gpointer fence_data, GDestroyNotify notify)
{
g_return_if_fail (GST_IS_D3D12_COMMAND_QUEUE (queue));
auto priv = queue->priv;
auto completed = priv->fence->GetCompletedValue ();
if (completed >= fence_value) {
GST_DEBUG_OBJECT (queue, "Already completed fence value %"
G_GUINT64_FORMAT, fence_value);
if (notify)
notify (fence_data);
return;
}
auto gc_data = std::make_shared < GCData > (fence_data, notify, fence_value);
std::lock_guard < std::mutex > lk (priv->lock);
if (!priv->gc_thread) {
priv->gc_thread = g_thread_new ("GstD3D12Gc",
(GThreadFunc) gst_d3d12_command_queue_gc_thread, queue);
}
GST_LOG_OBJECT (queue, "Pushing GC data %" G_GUINT64_FORMAT, fence_value);
priv->gc_list.push (gc_data);
priv->cond.notify_one ();
}

View file

@ -0,0 +1,63 @@
/* GStreamer
* Copyright (C) 2023 Seungha Yang <seungha@centricular.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
#pragma once
#include <gst/gst.h>
#include <gst/video/video.h>
#include "gstd3d12_fwd.h"
G_BEGIN_DECLS
#define GST_TYPE_D3D12_COMMAND_QUEUE (gst_d3d12_command_queue_get_type())
G_DECLARE_FINAL_TYPE (GstD3D12CommandQueue,
gst_d3d12_command_queue, GST, D3D12_COMMAND_QUEUE, GstObject);
GstD3D12CommandQueue * gst_d3d12_command_queue_new (GstD3D12Device * device,
const D3D12_COMMAND_QUEUE_DESC * desc,
guint queue_size);
gboolean gst_d3d12_command_queue_get_handle (GstD3D12CommandQueue * queue,
ID3D12CommandQueue ** handle);
gboolean gst_d3d12_command_queue_get_fence (GstD3D12CommandQueue * queue,
ID3D12Fence ** handle);
HRESULT gst_d3d12_command_queue_execute_command_lists (GstD3D12CommandQueue * queue,
guint num_command_lists,
ID3D12CommandList ** command_lists,
guint64 * fence_value);
HRESULT gst_d3d12_command_queue_execute_wait (GstD3D12CommandQueue * queue,
ID3D12Fence * fence,
guint64 fence_value);
guint64 gst_d3d12_command_queue_get_completed_value (GstD3D12CommandQueue * queue);
HRESULT gst_d3d12_command_queue_fence_wait (GstD3D12CommandQueue * queue,
guint64 fence_value,
HANDLE event_handle);
void gst_d3d12_command_queue_set_notify (GstD3D12CommandQueue * queue,
guint64 fence_value,
gpointer fence_data,
GDestroyNotify notify);
G_END_DECLS

View file

@ -28,7 +28,6 @@
#include "gstd3d12format.h"
#include "gstd3d12memory.h"
#include "gstd3d12bufferpool.h"
#include "gstd3d12fence.h"
#include <gst/base/gstqueuearray.h>
#include <wrl.h>
#include <string.h>
@ -174,14 +173,7 @@ struct GstD3D12DecoderPicture : public GstMiniObject
std::shared_ptr<GstD3D12Dpb> d3d12_dpb,
ID3D12VideoDecoderHeap * decoder_heap, guint8 dxva_id)
: buffer(dpb_buf), output_buffer(out_buf)
, heap(decoder_heap), dpb(d3d12_dpb), view_id(dxva_id)
{
GstBuffer *fence_buf = output_buffer ? output_buffer : buffer;
if (fence_buf) {
auto dmem = (GstD3D12Memory *) gst_buffer_peek_memory (fence_buf, 0);
fence = gst_d3d12_fence_ref (dmem->fence);
}
}
, heap(decoder_heap), dpb(d3d12_dpb), view_id(dxva_id) {}
~GstD3D12DecoderPicture ()
{
@ -189,8 +181,6 @@ struct GstD3D12DecoderPicture : public GstMiniObject
if (d3d12_dpb)
d3d12_dpb->Release (view_id);
if (fence)
gst_d3d12_fence_unref (fence);
if (buffer)
gst_buffer_unref (buffer);
if (output_buffer)
@ -201,7 +191,7 @@ struct GstD3D12DecoderPicture : public GstMiniObject
GstBuffer *output_buffer;
ComPtr<ID3D12VideoDecoderHeap> heap;
std::weak_ptr<GstD3D12Dpb> dpb;
GstD3D12Fence *fence = nullptr;
guint64 fence_val = 0;
guint8 view_id;
};
@ -246,9 +236,6 @@ struct DecoderCmdData
}
CloseHandle (event_handle);
if (copy_fence)
gst_d3d12_fence_unref (copy_fence);
}
ComPtr<ID3D12Device4> device;
@ -262,13 +249,6 @@ struct DecoderCmdData
HANDLE event_handle;
UINT64 fence_val = 0;
/* Used for download decoded picture to staging */
ComPtr<ID3D12CommandAllocator> copy_ca;
ComPtr<ID3D12GraphicsCommandList> copy_cl;
/* Fence to wait texture copy */
GstD3D12Fence *copy_fence = nullptr;
std::vector<DecoderTaskData> task_data;
};
@ -361,6 +341,16 @@ struct DecoderSessionData
struct GstD3D12DecoderPrivate
{
GstD3D12DecoderPrivate ()
{
copy_event_handle = CreateEventEx (nullptr, nullptr, 0, EVENT_ALL_ACCESS);
}
~GstD3D12DecoderPrivate ()
{
CloseHandle (copy_event_handle);
}
std::mutex lock;
std::recursive_mutex context_lock;
@ -369,6 +359,8 @@ struct GstD3D12DecoderPrivate
GThread *output_thread = nullptr;
std::atomic<bool> flushing;
std::atomic<GstFlowReturn> last_flow;
HANDLE copy_event_handle;
};
/* *INDENT-ON* */
@ -456,20 +448,6 @@ gst_d3d12_decoder_open (GstD3D12Decoder * decoder, GstElement * element)
return FALSE;
}
hr = cmd->device->CreateCommandAllocator (D3D12_COMMAND_LIST_TYPE_COPY,
IID_PPV_ARGS (&cmd->copy_ca));
if (!gst_d3d12_result (hr, decoder->device)) {
GST_ERROR_OBJECT (element, "Couldn't create copy command allocator");
return FALSE;
}
hr = cmd->device->CreateCommandList1 (0, D3D12_COMMAND_LIST_TYPE_COPY,
D3D12_COMMAND_LIST_FLAG_NONE, IID_PPV_ARGS (&cmd->copy_cl));
if (!gst_d3d12_result (hr, decoder->device)) {
GST_ERROR_OBJECT (element, "Couldn't create copy command list");
return FALSE;
}
hr = cmd->device->CreateCommandList1 (0, D3D12_COMMAND_LIST_TYPE_VIDEO_DECODE,
D3D12_COMMAND_LIST_FLAG_NONE, IID_PPV_ARGS (&cmd->cl));
if (!gst_d3d12_result (hr, decoder->device)) {
@ -493,12 +471,6 @@ gst_d3d12_decoder_open (GstD3D12Decoder * decoder, GstElement * element)
return FALSE;
}
cmd->copy_fence = gst_d3d12_fence_new (decoder->device);
if (!cmd->copy_fence) {
GST_ERROR_OBJECT (element, "Couldn't create fence");
return FALSE;
}
cmd->task_data.resize (ASYNC_DEPTH);
for (size_t i = 0; i < cmd->task_data.size (); i++) {
ComPtr < ID3D12CommandAllocator > ca;
@ -1295,16 +1267,7 @@ gst_d3d12_decoder_end_picture (GstD3D12Decoder * decoder,
return GST_FLOW_ERROR;
}
/* And signal to session fence */
hr = priv->cmd->cq->Signal (gst_d3d12_fence_get_handle (decoder_pic->fence),
priv->cmd->fence_val);
if (!gst_d3d12_result (hr, decoder->device)) {
GST_ERROR_OBJECT (decoder, "Signal failed");
return GST_FLOW_ERROR;
}
gst_d3d12_fence_set_event_on_completion_value (decoder_pic->fence,
priv->cmd->fence_val);
decoder_pic->fence_val = priv->cmd->fence_val;
return GST_FLOW_OK;
}
@ -1380,7 +1343,6 @@ gst_d3d12_decoder_process_output (GstD3D12Decoder * self,
gint display_width, gint display_height)
{
GstFlowReturn ret = GST_FLOW_ERROR;
UINT64 fence_value;
GstBuffer *buffer;
GstD3D12Memory *dmem;
ID3D12Resource *resource;
@ -1389,9 +1351,6 @@ gst_d3d12_decoder_process_output (GstD3D12Decoder * self,
auto priv = self->priv;
auto copy_queue = gst_d3d12_device_get_copy_queue (self->device);
g_assert (copy_queue);
auto decoder_pic = get_decoder_picture (picture);
g_assert (decoder_pic);
@ -1455,7 +1414,6 @@ gst_d3d12_decoder_process_output (GstD3D12Decoder * self,
GstMemory *mem;
ID3D12Resource *out_resource = nullptr;
UINT out_subresource[2];
GstD3D12Fence *out_fence = priv->cmd->copy_fence;
ret = gst_video_decoder_allocate_output_frame (videodec, frame);
if (ret != GST_FLOW_OK) {
@ -1470,7 +1428,6 @@ gst_d3d12_decoder_process_output (GstD3D12Decoder * self,
out_resource = gst_d3d12_memory_get_resource_handle (dmem);
gst_d3d12_memory_get_subresource_index (dmem, 0, &out_subresource[0]);
gst_d3d12_memory_get_subresource_index (dmem, 1, &out_subresource[1]);
out_fence = dmem->fence;
GST_MINI_OBJECT_FLAG_SET (dmem,
GST_D3D12_MEMORY_TRANSFER_NEED_DOWNLOAD);
@ -1491,81 +1448,58 @@ gst_d3d12_decoder_process_output (GstD3D12Decoder * self,
gst_d3d12_memory_get_subresource_index (dmem, 1, &subresource[1]);
/* Copy texture to staging */
hr = priv->cmd->copy_ca->Reset ();
if (!gst_d3d12_result (hr, self->device)) {
ret = GST_FLOW_ERROR;
goto error;
}
D3D12_BOX src_box[2];
std::vector < GstD3D12CopyTextureRegionArgs > copy_args;
hr = priv->cmd->copy_cl->Reset (priv->cmd->copy_ca.Get (), nullptr);
if (!gst_d3d12_result (hr, self->device)) {
ret = GST_FLOW_ERROR;
goto error;
}
/* simultaneous access must be enabled already, so,barrier is not needed */
for (guint i = 0; i < 2; i++) {
D3D12_TEXTURE_COPY_LOCATION src =
CD3DX12_TEXTURE_COPY_LOCATION (resource, subresource[i]);
D3D12_TEXTURE_COPY_LOCATION dst;
D3D12_BOX src_box = { 0, };
GstD3D12CopyTextureRegionArgs args;
memset (&args, 0, sizeof (args));
args.src = CD3DX12_TEXTURE_COPY_LOCATION (resource, subresource[i]);
if (out_resource) {
dst = CD3DX12_TEXTURE_COPY_LOCATION (out_resource, out_subresource[i]);
args.dst =
CD3DX12_TEXTURE_COPY_LOCATION (out_resource, out_subresource[i]);
} else {
dst =
args.dst =
CD3DX12_TEXTURE_COPY_LOCATION (priv->session->staging.Get (),
priv->session->layout[i]);
}
/* FIXME: only 4:2:0 */
if (i == 0) {
src_box.left = GST_ROUND_UP_2 (priv->session->crop_x);
src_box.top = GST_ROUND_UP_2 (priv->session->crop_y);
src_box.right = GST_ROUND_UP_2 (priv->session->crop_x +
src_box[i].left = GST_ROUND_UP_2 (priv->session->crop_x);
src_box[i].top = GST_ROUND_UP_2 (priv->session->crop_y);
src_box[i].right = GST_ROUND_UP_2 (priv->session->crop_x +
priv->session->output_info.width);
src_box.bottom =
src_box[i].bottom =
GST_ROUND_UP_2 (priv->session->crop_y +
priv->session->output_info.height);
} else {
src_box.left = GST_ROUND_UP_2 (priv->session->crop_x) / 2;
src_box.top = GST_ROUND_UP_2 (priv->session->crop_y) / 2;
src_box.right =
src_box[i].left = GST_ROUND_UP_2 (priv->session->crop_x) / 2;
src_box[i].top = GST_ROUND_UP_2 (priv->session->crop_y) / 2;
src_box[i].right =
GST_ROUND_UP_2 (priv->session->crop_x +
priv->session->output_info.width) / 2;
src_box.bottom =
src_box[i].bottom =
GST_ROUND_UP_2 (priv->session->crop_y +
priv->session->output_info.height) / 2;
}
src_box.front = 0;
src_box.back = 1;
src_box[i].front = 0;
src_box[i].back = 1;
priv->cmd->copy_cl->CopyTextureRegion (&dst, 0, 0, 0, &src, &src_box);
args.src_box = &src_box[i];
copy_args.push_back (args);
}
hr = priv->cmd->copy_cl->Close ();
if (!gst_d3d12_result (hr, self->device)) {
GST_ERROR_OBJECT (videodec, "Couldn't record copy command");
ret = GST_FLOW_ERROR;
goto error;
}
ID3D12CommandList *copy_cl[] = { priv->cmd->copy_cl.Get () };
copy_queue->ExecuteCommandLists (1, copy_cl);
fence_value = gst_d3d12_device_get_fence_value (self->device);
hr = copy_queue->Signal (gst_d3d12_fence_get_handle (out_fence),
fence_value);
if (!gst_d3d12_result (hr, self->device)) {
ret = GST_FLOW_ERROR;
goto error;
}
gst_d3d12_fence_set_event_on_completion_value (out_fence, fence_value);
guint64 copy_fence_val = 0;
gst_d3d12_device_copy_texture_region (self->device, copy_args.size (),
copy_args.data (), D3D12_COMMAND_LIST_TYPE_COPY, &copy_fence_val);
gst_d3d12_device_fence_wait (self->device, D3D12_COMMAND_LIST_TYPE_COPY,
copy_fence_val, priv->copy_event_handle);
if (!out_resource) {
gst_d3d12_fence_wait (out_fence);
guint8 *map_data;
GstVideoFrame vframe;
@ -1615,6 +1549,8 @@ gst_d3d12_decoder_output_loop (GstD3D12Decoder * self)
GST_DEBUG_OBJECT (self, "Entering output thread");
HANDLE event_handle = CreateEventEx (nullptr, nullptr, 0, EVENT_ALL_ACCESS);
while (true) {
DecoderOutputData output_data;
{
@ -1634,7 +1570,12 @@ gst_d3d12_decoder_output_loop (GstD3D12Decoder * self)
auto decoder_pic = get_decoder_picture (output_data.picture);
g_assert (decoder_pic);
gst_d3d12_fence_wait (decoder_pic->fence);
auto completed = priv->cmd->fence->GetCompletedValue ();
if (completed < decoder_pic->fence_val) {
priv->cmd->fence->SetEventOnCompletion (decoder_pic->fence_val,
event_handle);
WaitForSingleObjectEx (event_handle, INFINITE, FALSE);
}
if (priv->flushing) {
GST_DEBUG_OBJECT (self, "Drop framem, we are flushing");
@ -1659,6 +1600,8 @@ gst_d3d12_decoder_output_loop (GstD3D12Decoder * self)
GST_DEBUG_OBJECT (self, "Leaving output thread");
CloseHandle (event_handle);
return nullptr;
}

View file

@ -24,16 +24,21 @@
#include "gstd3d12device.h"
#include "gstd3d12utils.h"
#include "gstd3d12format.h"
#include "gstd3d12commandlistpool.h"
#include "gstd3d12commandallocatorpool.h"
#include "gstd3d12commandqueue.h"
#include <wrl.h>
#include <vector>
#include <string.h>
#include <mutex>
#include <atomic>
#include <condition_variable>
#include <string>
#include <locale>
#include <codecvt>
#include <algorithm>
#include <d3d12sdklayers.h>
#include <memory>
#include <queue>
GST_DEBUG_CATEGORY_STATIC (gst_d3d12_device_debug);
GST_DEBUG_CATEGORY_STATIC (gst_d3d12_sdk_debug);
@ -154,23 +159,45 @@ using namespace Microsoft::WRL;
struct _GstD3D12DevicePrivate
{
_GstD3D12DevicePrivate ()
~_GstD3D12DevicePrivate ()
{
fence_value = 1;
auto hr = device->GetDeviceRemovedReason ();
/* If device were not removed, make sure no pending command in queue */
if (hr == S_OK) {
if (direct_queue)
gst_d3d12_command_queue_fence_wait (direct_queue, G_MAXUINT64, nullptr);
if (copy_queue)
gst_d3d12_command_queue_fence_wait (copy_queue, G_MAXUINT64, nullptr);
}
gst_clear_object (&direct_queue);
gst_clear_object (&copy_queue);
gst_clear_object (&direct_ca_pool);
gst_clear_object (&direct_cl_pool);
gst_clear_object (&copy_ca_pool);
gst_clear_object (&copy_cl_pool);
}
ComPtr<ID3D12Device> device;
ComPtr<IDXGIAdapter1> adapter;
ComPtr<IDXGIFactory2> factory;
std::vector < GstD3D12Format> formats;
std::mutex lock;
std::recursive_mutex extern_lock;
std::atomic<guint64> fence_value;
ComPtr<ID3D12CommandQueue> copy_queue;
ComPtr<ID3D12InfoQueue> info_queue;
GstD3D12CommandQueue *direct_queue = nullptr;
GstD3D12CommandQueue *copy_queue = nullptr;
GstD3D12CommandListPool *direct_cl_pool = nullptr;
GstD3D12CommandAllocatorPool *direct_ca_pool = nullptr;
GstD3D12CommandListPool *copy_cl_pool = nullptr;
GstD3D12CommandAllocatorPool *copy_ca_pool = nullptr;
guint adapter_index = 0;
guint device_id = 0;
guint vendor_id = 0;
@ -281,7 +308,7 @@ gst_d3d12_device_init (GstD3D12Device * self)
static void
gst_d3d12_device_finalize (GObject * object)
{
GstD3D12Device *self = GST_D3D12_DEVICE (object);
auto self = GST_D3D12_DEVICE (object);
GST_DEBUG_OBJECT (self, "Finalize");
@ -580,6 +607,22 @@ gst_d3d12_device_new_internal (const GstD3D12DeviceConstructData * data)
return nullptr;
}
ComPtr < ID3D12Fence > copy_fence;
hr = device->CreateFence (0,
D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS (&copy_fence));
if (FAILED (hr)) {
GST_ERROR ("Couldn't create copy fence");
return nullptr;
}
ComPtr < ID3D12Fence > direct_fence;
hr = device->CreateFence (0,
D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS (&direct_fence));
if (FAILED (hr)) {
GST_ERROR ("Couldn't create fence");
return nullptr;
}
GstD3D12Device *self = (GstD3D12Device *)
g_object_new (GST_TYPE_D3D12_DEVICE, nullptr);
GstD3D12DevicePrivate *priv = self->priv;
@ -608,7 +651,45 @@ gst_d3d12_device_new_internal (const GstD3D12DeviceConstructData * data)
priv->info_queue = info_queue;
}
D3D12_COMMAND_QUEUE_DESC queue_desc = { };
queue_desc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT;
queue_desc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE;
priv->direct_queue = gst_d3d12_command_queue_new (self, &queue_desc, 0);
if (!priv->direct_queue)
goto error;
priv->direct_cl_pool = gst_d3d12_command_list_pool_new (self,
D3D12_COMMAND_LIST_TYPE_DIRECT);
if (!priv->direct_cl_pool)
goto error;
priv->direct_ca_pool = gst_d3d12_command_allocator_pool_new (self,
D3D12_COMMAND_LIST_TYPE_DIRECT);
if (!priv->direct_ca_pool)
goto error;
queue_desc.Type = D3D12_COMMAND_LIST_TYPE_COPY;
priv->copy_queue = gst_d3d12_command_queue_new (self, &queue_desc, 0);
if (!priv->copy_queue)
goto error;
priv->copy_cl_pool = gst_d3d12_command_list_pool_new (self,
D3D12_COMMAND_LIST_TYPE_COPY);
if (!priv->copy_cl_pool)
goto error;
priv->copy_ca_pool = gst_d3d12_command_allocator_pool_new (self,
D3D12_COMMAND_LIST_TYPE_COPY);
if (!priv->copy_ca_pool)
goto error;
return self;
error:
gst_object_unref (self);
return nullptr;
}
GstD3D12Device *
@ -725,37 +806,226 @@ gst_d3d12_device_get_format (GstD3D12Device * device,
return FALSE;
}
guint64
gst_d3d12_device_get_fence_value (GstD3D12Device * device)
{
g_return_val_if_fail (GST_IS_D3D12_DEVICE (device), G_MAXUINT64);
return device->priv->fence_value.fetch_add (1);
}
ID3D12CommandQueue *
gst_d3d12_device_get_copy_queue (GstD3D12Device * device)
GstD3D12CommandQueue *
gst_d3d12_device_get_command_queue (GstD3D12Device * device,
D3D12_COMMAND_LIST_TYPE queue_type)
{
g_return_val_if_fail (GST_IS_D3D12_DEVICE (device), nullptr);
auto priv = device->priv;
std::lock_guard < std::mutex > lk (priv->lock);
if (!priv->copy_queue) {
HRESULT hr;
D3D12_COMMAND_QUEUE_DESC desc = { };
ComPtr < ID3D12CommandQueue > queue;
desc.Type = D3D12_COMMAND_LIST_TYPE_COPY;
desc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE;
hr = priv->device->CreateCommandQueue (&desc, IID_PPV_ARGS (&queue));
if (!gst_d3d12_result (hr, device))
return nullptr;
priv->copy_queue = queue;
switch (queue_type) {
case D3D12_COMMAND_LIST_TYPE_DIRECT:
return priv->direct_queue;
case D3D12_COMMAND_LIST_TYPE_COPY:
return priv->copy_queue;
default:
break;
}
return priv->copy_queue.Get ();
GST_ERROR_OBJECT (device, "Not supported queue type %d", queue_type);
return nullptr;
}
gboolean
gst_d3d12_device_execute_command_lists (GstD3D12Device * device,
D3D12_COMMAND_LIST_TYPE queue_type, guint num_command_lists,
ID3D12CommandList ** command_lists, guint64 * fence_value)
{
g_return_val_if_fail (GST_IS_D3D12_DEVICE (device), FALSE);
auto priv = device->priv;
GstD3D12CommandQueue *queue;
switch (queue_type) {
case D3D12_COMMAND_LIST_TYPE_DIRECT:
queue = priv->direct_queue;
break;
case D3D12_COMMAND_LIST_TYPE_COPY:
queue = priv->copy_queue;
break;
default:
GST_ERROR_OBJECT (device, "Not supported queue type %d", queue_type);
return FALSE;
}
auto hr = gst_d3d12_command_queue_execute_command_lists (queue,
num_command_lists, command_lists, fence_value);
return gst_d3d12_result (hr, device);
}
guint64
gst_d3d12_device_get_completed_value (GstD3D12Device * device,
D3D12_COMMAND_LIST_TYPE queue_type)
{
g_return_val_if_fail (GST_IS_D3D12_DEVICE (device), G_MAXUINT64);
auto priv = device->priv;
GstD3D12CommandQueue *queue;
switch (queue_type) {
case D3D12_COMMAND_LIST_TYPE_DIRECT:
queue = priv->direct_queue;
break;
case D3D12_COMMAND_LIST_TYPE_COPY:
queue = priv->copy_queue;
break;
default:
GST_ERROR_OBJECT (device, "Not supported queue type %d", queue_type);
return G_MAXUINT64;
}
return gst_d3d12_command_queue_get_completed_value (queue);
}
gboolean
gst_d3d12_device_set_fence_notify (GstD3D12Device * device,
D3D12_COMMAND_LIST_TYPE queue_type, guint64 fence_value,
gpointer fence_data, GDestroyNotify notify)
{
g_return_val_if_fail (GST_IS_D3D12_DEVICE (device), FALSE);
auto priv = device->priv;
GstD3D12CommandQueue *queue;
switch (queue_type) {
case D3D12_COMMAND_LIST_TYPE_DIRECT:
queue = priv->direct_queue;
break;
case D3D12_COMMAND_LIST_TYPE_COPY:
queue = priv->copy_queue;
break;
default:
GST_ERROR_OBJECT (device, "Not supported queue type %d", queue_type);
return FALSE;
}
gst_d3d12_command_queue_set_notify (queue, fence_value, fence_data, notify);
return TRUE;
}
gboolean
gst_d3d12_device_fence_wait (GstD3D12Device * device,
D3D12_COMMAND_LIST_TYPE queue_type, guint64 fence_value,
HANDLE event_handle)
{
g_return_val_if_fail (GST_IS_D3D12_DEVICE (device), FALSE);
auto priv = device->priv;
GstD3D12CommandQueue *queue;
switch (queue_type) {
case D3D12_COMMAND_LIST_TYPE_DIRECT:
queue = priv->direct_queue;
break;
case D3D12_COMMAND_LIST_TYPE_COPY:
queue = priv->copy_queue;
break;
default:
GST_ERROR_OBJECT (device, "Not supported queue type %d", queue_type);
return FALSE;
}
auto hr = gst_d3d12_command_queue_fence_wait (queue,
fence_value, event_handle);
return gst_d3d12_result (hr, device);
}
gboolean
gst_d3d12_device_copy_texture_region (GstD3D12Device * device,
guint num_args, const GstD3D12CopyTextureRegionArgs * args,
D3D12_COMMAND_LIST_TYPE command_type, guint64 * fence_value)
{
g_return_val_if_fail (GST_IS_D3D12_DEVICE (device), FALSE);
g_return_val_if_fail (num_args > 0, FALSE);
g_return_val_if_fail (args, FALSE);
HRESULT hr;
auto priv = device->priv;
GstD3D12CommandAllocatorPool *ca_pool;
GstD3D12CommandAllocator *gst_ca = nullptr;
GstD3D12CommandListPool *cl_pool;
GstD3D12CommandList *gst_cl = nullptr;
GstD3D12CommandQueue *queue = nullptr;
guint64 fence_val = 0;
switch (command_type) {
case D3D12_COMMAND_LIST_TYPE_DIRECT:
queue = priv->direct_queue;
ca_pool = priv->direct_ca_pool;
cl_pool = priv->direct_cl_pool;
break;
case D3D12_COMMAND_LIST_TYPE_COPY:
queue = priv->copy_queue;
ca_pool = priv->copy_ca_pool;
cl_pool = priv->copy_cl_pool;
break;
default:
GST_ERROR_OBJECT (device, "Not supported command list type %d",
command_type);
return FALSE;
}
gst_d3d12_command_allocator_pool_acquire (ca_pool, &gst_ca);
if (!gst_ca) {
GST_ERROR_OBJECT (device, "Couldn't acquire command allocator");
return FALSE;
}
ComPtr < ID3D12CommandAllocator > ca;
gst_d3d12_command_allocator_get_handle (gst_ca, &ca);
gst_d3d12_command_list_pool_acquire (cl_pool, ca.Get (), &gst_cl);
if (!gst_cl) {
GST_ERROR_OBJECT (device, "Couldn't acquire command list");
gst_clear_d3d12_command_allocator (&gst_ca);
return FALSE;
}
ComPtr < ID3D12CommandList > cl_base;
ComPtr < ID3D12GraphicsCommandList > cl;
gst_d3d12_command_list_get_handle (gst_cl, &cl_base);
cl_base.As (&cl);
for (guint i = 0; i < num_args; i++) {
const auto arg = args[i];
cl->CopyTextureRegion (&arg.dst, arg.dst_x, arg.dst_y, arg.dst_z,
&arg.src, arg.src_box);
}
hr = cl->Close ();
if (!gst_d3d12_result (hr, device)) {
GST_ERROR_OBJECT (device, "Couldn't close command list");
gst_clear_d3d12_command_list (&gst_cl);
gst_clear_d3d12_command_allocator (&gst_ca);
return FALSE;
}
ID3D12CommandList *cmd_list[] = { cl.Get () };
hr = gst_d3d12_command_queue_execute_command_lists (queue,
1, cmd_list, &fence_val);
auto ret = gst_d3d12_result (hr, device);
/* We can release command list since command list pool will hold it */
gst_d3d12_command_list_unref (gst_cl);
if (ret) {
gst_d3d12_command_queue_set_notify (queue, fence_val, gst_ca,
(GDestroyNotify) gst_d3d12_command_allocator_unref);
} else {
gst_d3d12_command_allocator_unref (gst_ca);
}
if (fence_value)
*fence_value = fence_val;
return ret;
}
static inline GstDebugLevel

View file

@ -22,6 +22,7 @@
#include <gst/gst.h>
#include <gst/video/video.h>
#include "gstd3d12_fwd.h"
#include "gstd3d12commandqueue.h"
G_BEGIN_DECLS
@ -52,6 +53,16 @@ struct _GstD3D12DeviceClass
gpointer _gst_reserved[GST_PADDING];
};
typedef struct _GstD3D12CopyTextureRegionArgs
{
D3D12_TEXTURE_COPY_LOCATION dst;
guint dst_x;
guint dst_y;
guint dst_z;
D3D12_TEXTURE_COPY_LOCATION src;
const D3D12_BOX * src_box;
} GstD3D12CopyTextureRegionArgs;
GType gst_d3d12_device_get_type (void);
GstD3D12Device * gst_d3d12_device_new (guint adapter_index);
@ -68,9 +79,34 @@ gboolean gst_d3d12_device_get_format (GstD3D12Devic
GstVideoFormat format,
GstD3D12Format * device_format);
guint64 gst_d3d12_device_get_fence_value (GstD3D12Device * device);
GstD3D12CommandQueue * gst_d3d12_device_get_command_queue (GstD3D12Device * device,
D3D12_COMMAND_LIST_TYPE queue_type);
ID3D12CommandQueue * gst_d3d12_device_get_copy_queue (GstD3D12Device * device);
gboolean gst_d3d12_device_execute_command_lists (GstD3D12Device * device,
D3D12_COMMAND_LIST_TYPE queue_type,
guint num_command_lists,
ID3D12CommandList ** command_lists,
guint64 * fence_value);
guint64 gst_d3d12_device_get_completed_value (GstD3D12Device * device,
D3D12_COMMAND_LIST_TYPE queue_type);
gboolean gst_d3d12_device_set_fence_notify (GstD3D12Device * device,
D3D12_COMMAND_LIST_TYPE queue_type,
guint64 fence_value,
gpointer fence_data,
GDestroyNotify notify);
gboolean gst_d3d12_device_fence_wait (GstD3D12Device * device,
D3D12_COMMAND_LIST_TYPE queue_type,
guint64 fence_value,
HANDLE event_handle);
gboolean gst_d3d12_device_copy_texture_region (GstD3D12Device * device,
guint num_args,
const GstD3D12CopyTextureRegionArgs * args,
D3D12_COMMAND_LIST_TYPE command_type,
guint64 * fence_value);
void gst_d3d12_device_d3d12_debug (GstD3D12Device * device,
const gchar * file,

View file

@ -1,181 +0,0 @@
/* GStreamer
* Copyright (C) 2023 Seungha Yang <seungha@centricular.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "gstd3d12fence.h"
#include "gstd3d12device.h"
#include "gstd3d12utils.h"
#include <wrl.h>
#include <mutex>
GST_DEBUG_CATEGORY_EXTERN (gst_d3d12_fence_debug);
#define GST_CAT_DEFAULT gst_d3d12_fence_debug
GST_DEFINE_MINI_OBJECT_TYPE (GstD3D12Fence, gst_d3d12_fence);
/* *INDENT-OFF* */
using namespace Microsoft::WRL;
struct _GstD3D12FencePrivate
{
_GstD3D12FencePrivate()
{
event_handle = CreateEventEx (nullptr, nullptr, 0, EVENT_ALL_ACCESS);
}
~_GstD3D12FencePrivate()
{
CloseHandle (event_handle);
}
ComPtr<ID3D12Fence> fence;
HANDLE event_handle;
std::mutex lock;
guint64 value = 0;
};
/* *INDENT-ON* */
static void
gst_d3d12_fence_free (GstD3D12Fence * self)
{
if (!self)
return;
GST_TRACE ("Freeing fence %p", self);
gst_clear_object (&self->device);
delete self->priv;
g_free (self);
}
GstD3D12Fence *
gst_d3d12_fence_new (GstD3D12Device * device)
{
GstD3D12Fence *self;
GstD3D12FencePrivate *priv;
ID3D12Device *device_handle;
HRESULT hr;
ComPtr < ID3D12Fence > fence;
g_return_val_if_fail (GST_IS_D3D12_DEVICE (device), nullptr);
device_handle = gst_d3d12_device_get_device_handle (device);
hr = device_handle->CreateFence (0,
D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS (&fence));
if (!gst_d3d12_result (hr, device)) {
GST_ERROR_OBJECT (device, "Failed to create fence, hr: 0x%x", (guint) hr);
return nullptr;
}
priv = new GstD3D12FencePrivate ();
priv->fence = fence;
self = g_new0 (GstD3D12Fence, 1);
GST_TRACE_OBJECT (device, "Creating fence %p", self);
self->device = (GstD3D12Device *) gst_object_ref (device);
self->priv = priv;
gst_mini_object_init (GST_MINI_OBJECT_CAST (self), 0, GST_TYPE_D3D12_FENCE,
nullptr, nullptr, (GstMiniObjectFreeFunction) gst_d3d12_fence_free);
return self;
}
gboolean
gst_d3d12_fence_set_event_on_completion_value (GstD3D12Fence * fence,
guint64 value)
{
GstD3D12FencePrivate *priv;
g_return_val_if_fail (fence != nullptr, FALSE);
priv = fence->priv;
std::lock_guard < std::mutex > lk (priv->lock);
auto current = priv->fence->GetCompletedValue ();
if (value > current)
priv->value = value;
return TRUE;
}
ID3D12Fence *
gst_d3d12_fence_get_handle (GstD3D12Fence * fence)
{
g_return_val_if_fail (fence != nullptr, nullptr);
return fence->priv->fence.Get ();
}
void
gst_d3d12_fence_wait_for (GstD3D12Fence * fence, guint timeout_ms)
{
g_return_if_fail (fence != nullptr);
GstD3D12FencePrivate *priv = fence->priv;
std::lock_guard < std::mutex > lk (priv->lock);
if (!priv->value)
return;
auto current = priv->fence->GetCompletedValue ();
if (current < priv->value) {
HRESULT hr;
GST_TRACE ("Waiting for fence to be signalled with value %" G_GUINT64_FORMAT
", current: %" G_GUINT64_FORMAT, priv->value, current);
hr = priv->fence->SetEventOnCompletion (priv->value, priv->event_handle);
if (!gst_d3d12_result (hr, fence->device)) {
GST_ERROR_OBJECT (fence->device, "Failed to set completion event");
return;
}
WaitForSingleObjectEx (priv->event_handle, timeout_ms, FALSE);
GST_TRACE ("Signalled with value %" G_GUINT64_FORMAT, priv->value);
} else {
GST_TRACE ("target %" G_GUINT64_FORMAT " <= target: %" G_GUINT64_FORMAT,
priv->value, current);
}
priv->value = 0;
priv->fence->Signal (0);
}
void
gst_d3d12_fence_wait (GstD3D12Fence * fence)
{
gst_d3d12_fence_wait_for (fence, INFINITE);
}
GstD3D12Fence *
gst_d3d12_fence_ref (GstD3D12Fence * fence)
{
return (GstD3D12Fence *) gst_mini_object_ref (GST_MINI_OBJECT_CAST (fence));
}
void
gst_d3d12_fence_unref (GstD3D12Fence * fence)
{
gst_mini_object_unref (GST_MINI_OBJECT_CAST (fence));
}

View file

@ -1,60 +0,0 @@
/* GStreamer
* Copyright (C) 2023 Seungha Yang <seungha@centricular.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
#pragma once
#include <gst/gst.h>
#include "gstd3d12_fwd.h"
G_BEGIN_DECLS
#define GST_TYPE_D3D12_FENCE (gst_d3d12_fence_get_type())
#define GST_D3D12_FENCE_CAST(f) ((GstD3D12Fence *)f)
struct _GstD3D12Fence
{
GstMiniObject parent;
GstD3D12Device *device;
/*< private >*/
GstD3D12FencePrivate *priv;
gpointer _gst_reserved[GST_PADDING];
};
GType gst_d3d12_fence_get_type (void);
GstD3D12Fence * gst_d3d12_fence_new (GstD3D12Device * device);
ID3D12Fence * gst_d3d12_fence_get_handle (GstD3D12Fence * fence);
gboolean gst_d3d12_fence_set_event_on_completion_value (GstD3D12Fence * fence,
guint64 value);
void gst_d3d12_fence_wait_for (GstD3D12Fence * fence,
guint timeout_ms);
void gst_d3d12_fence_wait (GstD3D12Fence * fence);
GstD3D12Fence * gst_d3d12_fence_ref (GstD3D12Fence * fence);
void gst_d3d12_fence_unref (GstD3D12Fence * fence);
G_END_DECLS

View file

@ -27,13 +27,13 @@
#include "gstd3d12device.h"
#include "gstd3d12utils.h"
#include "gstd3d12format.h"
#include "gstd3d12fence.h"
#include <string.h>
#include <wrl.h>
#include <mutex>
#include <condition_variable>
#include <atomic>
#include <queue>
#include <vector>
/* *INDENT-OFF* */
using namespace Microsoft::WRL;
@ -181,16 +181,18 @@ gst_d3d12_allocation_params_set_array_size (GstD3D12AllocationParams * params,
/* *INDENT-OFF* */
struct _GstD3D12MemoryPrivate
{
~_GstD3D12MemoryPrivate ()
{
if (event_handle)
CloseHandle (event_handle);
}
ComPtr<ID3D12Resource> resource;
ComPtr<ID3D12Resource> staging;
ComPtr<ID3D12DescriptorHeap> srv_heap;
ComPtr<ID3D12DescriptorHeap> rtv_heap;
ComPtr<ID3D12CommandAllocator> copy_ca;
ComPtr<ID3D12GraphicsCommandList> copy_cl;
guint cpu_map_count = 0;
gpointer staging_ptr = nullptr;
D3D12_RESOURCE_DESC desc;
@ -215,7 +217,7 @@ GST_DEFINE_MINI_OBJECT_TYPE (GstD3D12Memory, gst_d3d12_memory);
static gboolean
gst_d3d12_memory_ensure_staging_resource (GstD3D12Memory * dmem)
{
GstD3D12MemoryPrivate *priv = dmem->priv;
auto priv = dmem->priv;
if (priv->staging)
return TRUE;
@ -226,16 +228,16 @@ gst_d3d12_memory_ensure_staging_resource (GstD3D12Memory * dmem)
}
HRESULT hr;
ID3D12Device *device = gst_d3d12_device_get_device_handle (dmem->device);
auto device = gst_d3d12_device_get_device_handle (dmem->device);
D3D12_HEAP_PROPERTIES prop =
CD3DX12_HEAP_PROPERTIES (D3D12_HEAP_TYPE_READBACK);
CD3DX12_HEAP_PROPERTIES (D3D12_CPU_PAGE_PROPERTY_WRITE_BACK,
D3D12_MEMORY_POOL_L0);
D3D12_RESOURCE_DESC desc = CD3DX12_RESOURCE_DESC::Buffer (priv->size);
ComPtr < ID3D12Resource > staging;
hr = device->CreateCommittedResource (&prop, D3D12_HEAP_FLAG_NONE,
&desc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, IID_PPV_ARGS (&staging));
&desc, D3D12_RESOURCE_STATE_COMMON, nullptr, IID_PPV_ARGS (&staging));
if (!gst_d3d12_result (hr, dmem->device)) {
GST_ERROR_OBJECT (dmem->device, "Couldn't create readback resource");
GST_ERROR_OBJECT (dmem->device, "Couldn't create staging resource");
return FALSE;
}
@ -246,82 +248,66 @@ gst_d3d12_memory_ensure_staging_resource (GstD3D12Memory * dmem)
return FALSE;
}
ComPtr < ID3D12CommandAllocator > copy_ca;
ComPtr < ID3D12GraphicsCommandList > copy_cl;
hr = device->CreateCommandAllocator (D3D12_COMMAND_LIST_TYPE_COPY,
IID_PPV_ARGS (&copy_ca));
if (!gst_d3d12_result (hr, dmem->device))
return FALSE;
hr = device->CreateCommandList (0, D3D12_COMMAND_LIST_TYPE_COPY,
copy_ca.Get (), nullptr, IID_PPV_ARGS (&copy_cl));
if (!gst_d3d12_result (hr, dmem->device))
return FALSE;
hr = copy_cl->Close ();
if (!gst_d3d12_result (hr, dmem->device))
return FALSE;
priv->staging = staging;
priv->copy_ca = copy_ca;
priv->copy_cl = copy_cl;
GST_MINI_OBJECT_FLAG_SET (dmem, GST_D3D12_MEMORY_TRANSFER_NEED_DOWNLOAD);
return TRUE;
}
static void
gst_d3d12_memory_wait_gpu (GstD3D12Memory * dmem,
D3D12_COMMAND_LIST_TYPE command_type, guint64 fence_value)
{
auto priv = dmem->priv;
auto completed = gst_d3d12_device_get_completed_value (dmem->device,
command_type);
if (completed < fence_value) {
if (!priv->event_handle) {
priv->event_handle =
CreateEventEx (nullptr, nullptr, 0, EVENT_ALL_ACCESS);
}
gst_d3d12_device_fence_wait (dmem->device, command_type,
fence_value, priv->event_handle);
}
}
static gboolean
gst_d3d12_memory_download (GstD3D12Memory * dmem)
{
GstD3D12MemoryPrivate *priv = dmem->priv;
HRESULT hr;
ID3D12CommandQueue *queue;
auto priv = dmem->priv;
if (!priv->staging ||
!GST_MEMORY_FLAG_IS_SET (dmem, GST_D3D12_MEMORY_TRANSFER_NEED_DOWNLOAD)) {
return TRUE;
}
gst_d3d12_fence_wait (dmem->fence);
queue = gst_d3d12_device_get_copy_queue (dmem->device);
if (!queue)
return FALSE;
hr = priv->copy_ca->Reset ();
if (!gst_d3d12_result (hr, dmem->device))
return FALSE;
hr = priv->copy_cl->Reset (priv->copy_ca.Get (), nullptr);
if (!gst_d3d12_result (hr, dmem->device))
return FALSE;
std::vector < GstD3D12CopyTextureRegionArgs > copy_args;
for (guint i = 0; i < priv->num_subresources; i++) {
D3D12_TEXTURE_COPY_LOCATION src =
CD3DX12_TEXTURE_COPY_LOCATION (priv->resource.Get (),
priv->subresource_index[i]);
D3D12_TEXTURE_COPY_LOCATION dst =
CD3DX12_TEXTURE_COPY_LOCATION (priv->staging.Get (), priv->layout[i]);
GstD3D12CopyTextureRegionArgs args;
memset (&args, 0, sizeof (args));
priv->copy_cl->CopyTextureRegion (&dst, 0, 0, 0, &src, nullptr);
args.dst = CD3DX12_TEXTURE_COPY_LOCATION (priv->staging.Get (),
priv->layout[i]);
args.src = CD3DX12_TEXTURE_COPY_LOCATION (priv->resource.Get (),
priv->subresource_index[i]);
copy_args.push_back (args);
}
hr = priv->copy_cl->Close ();
if (!gst_d3d12_result (hr, dmem->device))
gst_d3d12_memory_wait_gpu (dmem, D3D12_COMMAND_LIST_TYPE_DIRECT,
dmem->fence_value);
guint64 fence_val = 0;
/* Use async copy queue when downloading */
if (!gst_d3d12_device_copy_texture_region (dmem->device, copy_args.size (),
copy_args.data (), D3D12_COMMAND_LIST_TYPE_COPY, &fence_val)) {
GST_ERROR_OBJECT (dmem->device, "Couldn't download texture to staging");
return FALSE;
}
ID3D12CommandList *list[] = { priv->copy_cl.Get () };
queue->ExecuteCommandLists (1, list);
guint64 fence_value = gst_d3d12_device_get_fence_value (dmem->device);
hr = queue->Signal (gst_d3d12_fence_get_handle (dmem->fence), fence_value);
if (!gst_d3d12_result (hr, dmem->device))
return FALSE;
gst_d3d12_fence_set_event_on_completion_value (dmem->fence, fence_value);
gst_d3d12_fence_wait (dmem->fence);
gst_d3d12_memory_wait_gpu (dmem, D3D12_COMMAND_LIST_TYPE_COPY, fence_val);
GST_MEMORY_FLAG_UNSET (dmem, GST_D3D12_MEMORY_TRANSFER_NEED_DOWNLOAD);
@ -331,51 +317,32 @@ gst_d3d12_memory_download (GstD3D12Memory * dmem)
static gboolean
gst_d3d12_memory_upload (GstD3D12Memory * dmem)
{
GstD3D12MemoryPrivate *priv = dmem->priv;
HRESULT hr;
ID3D12CommandQueue *queue;
auto priv = dmem->priv;
if (!priv->staging ||
!GST_MEMORY_FLAG_IS_SET (dmem, GST_D3D12_MEMORY_TRANSFER_NEED_UPLOAD)) {
return TRUE;
}
queue = gst_d3d12_device_get_copy_queue (dmem->device);
if (!queue)
return FALSE;
hr = priv->copy_ca->Reset ();
if (!gst_d3d12_result (hr, dmem->device))
return FALSE;
hr = priv->copy_cl->Reset (priv->copy_ca.Get (), nullptr);
if (!gst_d3d12_result (hr, dmem->device))
return FALSE;
std::vector < GstD3D12CopyTextureRegionArgs > copy_args;
for (guint i = 0; i < priv->num_subresources; i++) {
D3D12_TEXTURE_COPY_LOCATION src =
CD3DX12_TEXTURE_COPY_LOCATION (priv->staging.Get (), priv->layout[i]);
D3D12_TEXTURE_COPY_LOCATION dst =
CD3DX12_TEXTURE_COPY_LOCATION (priv->resource.Get (),
priv->subresource_index[i]);
GstD3D12CopyTextureRegionArgs args;
memset (&args, 0, sizeof (args));
priv->copy_cl->CopyTextureRegion (&dst, 0, 0, 0, &src, nullptr);
args.dst = CD3DX12_TEXTURE_COPY_LOCATION (priv->resource.Get (),
priv->subresource_index[i]);
args.src = CD3DX12_TEXTURE_COPY_LOCATION (priv->staging.Get (),
priv->layout[i]);
copy_args.push_back (args);
}
hr = priv->copy_cl->Close ();
if (!gst_d3d12_result (hr, dmem->device))
if (!gst_d3d12_device_copy_texture_region (dmem->device, copy_args.size (),
copy_args.data (), D3D12_COMMAND_LIST_TYPE_DIRECT,
&dmem->fence_value)) {
GST_ERROR_OBJECT (dmem->device, "Couldn't upload texture");
return FALSE;
ID3D12CommandList *list[] = { priv->copy_cl.Get () };
queue->ExecuteCommandLists (1, list);
guint64 fence_value = gst_d3d12_device_get_fence_value (dmem->device);
hr = queue->Signal (gst_d3d12_fence_get_handle (dmem->fence), fence_value);
if (!gst_d3d12_result (hr, dmem->device))
return FALSE;
gst_d3d12_fence_set_event_on_completion_value (dmem->fence, fence_value);
gst_d3d12_fence_wait (dmem->fence);
}
GST_MEMORY_FLAG_UNSET (dmem, GST_D3D12_MEMORY_TRANSFER_NEED_UPLOAD);
@ -385,40 +352,33 @@ gst_d3d12_memory_upload (GstD3D12Memory * dmem)
static gpointer
gst_d3d12_memory_map_full (GstMemory * mem, GstMapInfo * info, gsize maxsize)
{
GstD3D12Memory *dmem = GST_D3D12_MEMORY_CAST (mem);
GstD3D12MemoryPrivate *priv = dmem->priv;
auto dmem = GST_D3D12_MEMORY_CAST (mem);
auto priv = dmem->priv;
GstMapFlags flags = info->flags;
std::lock_guard < std::mutex > lk (priv->lock);
if ((flags & GST_MAP_D3D12) == GST_MAP_D3D12) {
if (!gst_d3d12_memory_upload (dmem)) {
GST_ERROR_OBJECT (mem->allocator, "Upload failed");
return nullptr;
}
if ((flags & GST_MAP_D3D12) != 0) {
gst_d3d12_memory_upload (dmem);
if ((flags & GST_MAP_WRITE) == GST_MAP_WRITE) {
if ((flags & GST_MAP_WRITE) != 0)
GST_MINI_OBJECT_FLAG_SET (dmem, GST_D3D12_MEMORY_TRANSFER_NEED_DOWNLOAD);
}
return priv->resource.Get ();
}
if (priv->cpu_map_count == 0) {
if (!gst_d3d12_memory_ensure_staging_resource (dmem)) {
GST_ERROR_OBJECT (mem->allocator, "Couldn't create staging resource");
return nullptr;
}
if (!gst_d3d12_memory_download (dmem)) {
GST_ERROR_OBJECT (mem->allocator, "Couldn't download resource");
return nullptr;
}
if ((flags & GST_MAP_WRITE) == GST_MAP_WRITE)
GST_MINI_OBJECT_FLAG_SET (mem, GST_D3D12_MEMORY_TRANSFER_NEED_UPLOAD);
if (!gst_d3d12_memory_ensure_staging_resource (dmem)) {
GST_ERROR_OBJECT (mem->allocator,
"Couldn't create readback_staging resource");
return nullptr;
}
priv->cpu_map_count++;
if (!gst_d3d12_memory_download (dmem)) {
GST_ERROR_OBJECT (mem->allocator, "Couldn't download resource");
return nullptr;
}
if ((flags & GST_MAP_WRITE) != 0)
GST_MINI_OBJECT_FLAG_SET (mem, GST_D3D12_MEMORY_TRANSFER_NEED_UPLOAD);
return priv->staging_ptr;
}
@ -426,20 +386,7 @@ gst_d3d12_memory_map_full (GstMemory * mem, GstMapInfo * info, gsize maxsize)
static void
gst_d3d12_memory_unmap_full (GstMemory * mem, GstMapInfo * info)
{
GstD3D12Memory *dmem = GST_D3D12_MEMORY_CAST (mem);
GstD3D12MemoryPrivate *priv = dmem->priv;
std::lock_guard < std::mutex > lk (priv->lock);
if ((info->flags & GST_MAP_D3D12) == GST_MAP_D3D12) {
if ((info->flags & GST_MAP_WRITE) == GST_MAP_WRITE)
GST_MINI_OBJECT_FLAG_SET (mem, GST_D3D12_MEMORY_TRANSFER_NEED_DOWNLOAD);
return;
}
if ((info->flags & GST_MAP_WRITE) == GST_MAP_WRITE)
GST_MINI_OBJECT_FLAG_SET (mem, GST_D3D12_MEMORY_TRANSFER_NEED_UPLOAD);
priv->cpu_map_count--;
/* Nothing to do here */
}
static GstMemory *
@ -509,11 +456,9 @@ gboolean
gst_d3d12_memory_get_plane_size (GstD3D12Memory * mem, guint plane,
gint * width, gint * height, gint * stride, gsize * offset)
{
GstD3D12MemoryPrivate *priv;
g_return_val_if_fail (gst_is_d3d12_memory (GST_MEMORY_CAST (mem)), FALSE);
priv = mem->priv;
auto priv = mem->priv;
if (plane >= priv->num_subresources) {
GST_WARNING_OBJECT (GST_MEMORY_CAST (mem)->allocator, "Invalid plane %d",
@ -755,14 +700,12 @@ gst_d3d12_allocator_dummy_alloc (GstAllocator * allocator, gsize size,
static void
gst_d3d12_allocator_free (GstAllocator * allocator, GstMemory * mem)
{
GstD3D12Memory *dmem = GST_D3D12_MEMORY_CAST (mem);
auto dmem = GST_D3D12_MEMORY_CAST (mem);
GST_LOG_OBJECT (allocator, "Free memory %p", mem);
if (dmem->fence) {
gst_d3d12_fence_wait (dmem->fence);
gst_d3d12_fence_unref (dmem->fence);
}
gst_d3d12_memory_wait_gpu (dmem, D3D12_COMMAND_LIST_TYPE_DIRECT,
dmem->fence_value);
delete dmem->priv;
@ -777,9 +720,7 @@ gst_d3d12_allocator_alloc_wrapped (GstD3D12Allocator * self,
D3D12_RESOURCE_STATES initial_state, ID3D12Resource * resource,
guint array_slice)
{
GstD3D12Memory *mem;
GstD3D12MemoryPrivate *priv;
ID3D12Device *device_handle = gst_d3d12_device_get_device_handle (device);
auto device_handle = gst_d3d12_device_get_device_handle (device);
guint8 num_subresources =
D3D12GetFormatPlaneCount (device_handle, desc->Format);
@ -788,8 +729,10 @@ gst_d3d12_allocator_alloc_wrapped (GstD3D12Allocator * self,
return nullptr;
}
mem = g_new0 (GstD3D12Memory, 1);
mem->priv = priv = new GstD3D12MemoryPrivate ();
auto mem = g_new0 (GstD3D12Memory, 1);
mem->priv = new GstD3D12MemoryPrivate ();
auto priv = mem->priv;
priv->desc = *desc;
priv->num_subresources = num_subresources;
@ -804,7 +747,6 @@ gst_d3d12_allocator_alloc_wrapped (GstD3D12Allocator * self,
(D3D12_DESCRIPTOR_HEAP_TYPE_RTV);
mem->device = (GstD3D12Device *) gst_object_ref (device);
mem->fence = gst_d3d12_fence_new (device);
mem->priv->size = 0;
for (guint i = 0; i < mem->priv->num_subresources; i++) {

View file

@ -110,7 +110,7 @@ struct _GstD3D12Memory
/*< public >*/
GstD3D12Device *device;
GstD3D12Fence *fence;
guint64 fence_value;
/*< private >*/
GstD3D12MemoryPrivate *priv;

View file

@ -4,11 +4,11 @@ d3d12_sources = [
'gstd3d12bufferpool.cpp',
'gstd3d12commandallocatorpool.cpp',
'gstd3d12commandlistpool.cpp',
'gstd3d12commandqueue.cpp',
'gstd3d12decoder.cpp',
'gstd3d12descriptorpool.cpp',
'gstd3d12device.cpp',
'gstd3d12download.cpp',
'gstd3d12fence.cpp',
'gstd3d12format.cpp',
'gstd3d12h264dec.cpp',
'gstd3d12h265dec.cpp',

View file

@ -44,7 +44,6 @@ using namespace Microsoft::WRL;
GST_DEBUG_CATEGORY (gst_d3d12_debug);
GST_DEBUG_CATEGORY (gst_d3d12_allocator_debug);
GST_DEBUG_CATEGORY (gst_d3d12_decoder_debug);
GST_DEBUG_CATEGORY (gst_d3d12_fence_debug);
GST_DEBUG_CATEGORY (gst_d3d12_format_debug);
GST_DEBUG_CATEGORY (gst_d3d12_utils_debug);
@ -58,8 +57,6 @@ plugin_init (GstPlugin * plugin)
"d3d12allocator");
GST_DEBUG_CATEGORY_INIT (gst_d3d12_decoder_debug, "d3d12decoder", 0,
"d3d12decoder");
GST_DEBUG_CATEGORY_INIT (gst_d3d12_fence_debug, "d3d12fence", 0,
"d3d12fence");
GST_DEBUG_CATEGORY_INIT (gst_d3d12_format_debug, "d3d12format", 0,
"d3d12format");
GST_DEBUG_CATEGORY_INIT (gst_d3d12_utils_debug,