From 6d28f3b2c670e21659985c5912426c1d81fa9388 Mon Sep 17 00:00:00 2001 From: Seungha Yang Date: Sun, 8 Sep 2024 01:00:12 +0900 Subject: [PATCH] nvcodec: Add a helper object for d3d12 interop Adding new helper object for d3d12 -> cuda memory copy Part-of: --- .../sys/nvcodec/gstcudainterop_d3d12.cpp | 677 ++++++++++++++++++ .../sys/nvcodec/gstcudainterop_d3d12.h | 45 ++ .../gst-plugins-bad/sys/nvcodec/meson.build | 10 + 3 files changed, 732 insertions(+) create mode 100644 subprojects/gst-plugins-bad/sys/nvcodec/gstcudainterop_d3d12.cpp create mode 100644 subprojects/gst-plugins-bad/sys/nvcodec/gstcudainterop_d3d12.h diff --git a/subprojects/gst-plugins-bad/sys/nvcodec/gstcudainterop_d3d12.cpp b/subprojects/gst-plugins-bad/sys/nvcodec/gstcudainterop_d3d12.cpp new file mode 100644 index 0000000000..262b213f20 --- /dev/null +++ b/subprojects/gst-plugins-bad/sys/nvcodec/gstcudainterop_d3d12.cpp @@ -0,0 +1,677 @@ +/* GStreamer + * Copyright (C) 2024 Seungha Yang + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include "gstcudainterop_d3d12.h" +#include +#include +#include +#include +#include +#include + +/* *INDENT-OFF* */ +using namespace Microsoft::WRL; + +struct GstCudaD3D12InteropResource : public GstMiniObject +{ + GstCudaD3D12InteropResource() = default; + + ~GstCudaD3D12InteropResource() + { + if (context) { + if (gst_cuda_context_push (context)) { + if (devptr) + CuMemFree (devptr); + + if (ext_mem) + CuDestroyExternalMemory (ext_mem); + } + + gst_object_unref (context); + } + } + + GstCudaD3D12Interop *interop = nullptr; + + ComPtr resource; + GstCudaContext *context = nullptr; + CUdeviceptr devptr = 0; + CUexternalMemory ext_mem = nullptr; +}; +/* *INDENT-ON* */ + +GST_DEFINE_MINI_OBJECT_TYPE (GstCudaD3D12InteropResource, + gst_cuda_d3d12_interop_resource); + +#define ASYNC_FENCE_WAIT_DEPTH 16 + +struct FenceWaitData +{ + UINT64 fence_value = 0; + GstCudaD3D12InteropResource *resource = nullptr; +}; + +static gpointer gst_cuda_d3d12_interop_fence_wait_thread (gpointer data); + +struct FenceAsyncWaiter +{ + FenceAsyncWaiter (ID3D12Fence * fence) + { + fence_ = fence; + queue_ = gst_vec_deque_new_for_struct (sizeof (FenceWaitData), + ASYNC_FENCE_WAIT_DEPTH); + thread_ = g_thread_new ("GstCudaD3D12Interop", + gst_cuda_d3d12_interop_fence_wait_thread, this); + } + + ~FenceAsyncWaiter () + { + { + std::lock_guard < std::mutex > lk (lock_); + shutdown_ = true; + cond_.notify_one (); + } + g_thread_join (thread_); + + while (!gst_vec_deque_is_empty (queue_)) { + auto fence_data = *((FenceWaitData *) + gst_vec_deque_pop_head_struct (queue_)); + auto completed = fence_->GetCompletedValue (); + if (completed < fence_data.fence_value) + fence_->SetEventOnCompletion (fence_data.fence_value, nullptr); + gst_mini_object_unref (fence_data.resource); + } + + gst_vec_deque_free (queue_); + } + + void wait_async (UINT64 fence_value, GstCudaD3D12InteropResource * resource) + { + auto completed = fence_->GetCompletedValue (); + if (completed + ASYNC_FENCE_WAIT_DEPTH < fence_value) { + fence_->SetEventOnCompletion (fence_value - ASYNC_FENCE_WAIT_DEPTH, + nullptr); + } + + FenceWaitData data; + data.fence_value = fence_value; + data.resource = resource; + + std::lock_guard < std::mutex > lk (lock_); + gst_vec_deque_push_tail_struct (queue_, &data); + cond_.notify_one (); + } + + ComPtr < ID3D12Fence > fence_; + GThread *thread_; + std::mutex lock_; + std::condition_variable cond_; + GstVecDeque *queue_; + bool shutdown_ = false; +}; + +static gpointer +gst_cuda_d3d12_interop_fence_wait_thread (gpointer data) +{ + auto self = (FenceAsyncWaiter *) data; + + while (true) { + FenceWaitData fence_data; + + { + std::unique_lock < std::mutex > lk (self->lock_); + while (gst_vec_deque_is_empty (self->queue_) && !self->shutdown_) + self->cond_.wait (lk); + + if (self->shutdown_) + return nullptr; + + fence_data = *((FenceWaitData *) + gst_vec_deque_pop_head_struct (self->queue_)); + } + + auto completed = self->fence_->GetCompletedValue (); + if (completed < fence_data.fence_value) { + GST_TRACE ("Waiting for fence value %" G_GUINT64_FORMAT, + fence_data.fence_value); + self->fence_->SetEventOnCompletion (fence_data.fence_value, nullptr); + GST_TRACE ("Fence completed with value %" G_GUINT64_FORMAT, + fence_data.fence_value); + } else { + GST_TRACE ("Fence was completed already, fence value: %" G_GUINT64_FORMAT + ", completed: %" G_GUINT64_FORMAT, fence_data.fence_value, completed); + } + + gst_mini_object_unref (fence_data.resource); + } + + return nullptr; +} + +struct GstCudaD3D12InteropPrivate +{ + GstCudaD3D12InteropPrivate () + { + fence_data_pool = gst_d3d12_fence_data_pool_new (); + } + + ~GstCudaD3D12InteropPrivate () + { + fence_waiter = nullptr; + + while (!resource_pool.empty ()) { + auto resource = resource_pool.front (); + resource_pool.pop (); + gst_mini_object_unref (resource); + } + + if (gst_cuda_context_push (context)) { + if (in_sem) + CuDestroyExternalSemaphore (in_sem); + + if (out_sem) + CuDestroyExternalSemaphore (out_sem); + + gst_cuda_context_pop (nullptr); + } + + in_fence = nullptr; + out_fence = nullptr; + + gst_clear_object (&fence_data_pool); + gst_clear_object (&context); + gst_clear_object (&device); + } + + GstVideoInfo info; + + D3D12_RESOURCE_DESC desc; + D3D12_HEAP_PROPERTIES heap_prop; + D3D12_RESOURCE_ALLOCATION_INFO alloc_info; + + ComPtr < ID3D12Fence > in_fence; + ComPtr < ID3D12Fence > out_fence; + guint64 fence_val = 0; + CUexternalSemaphore in_sem = nullptr; + CUexternalSemaphore out_sem = nullptr; + D3D12_PLACED_SUBRESOURCE_FOOTPRINT layout[GST_VIDEO_MAX_PLANES]; + + GstD3D12FenceDataPool *fence_data_pool; + + std::shared_ptr < FenceAsyncWaiter > fence_waiter; + + std::mutex lock; + std::queue < GstCudaD3D12InteropResource * >resource_pool; + + GstCudaContext *context = nullptr; + GstD3D12Device *device = nullptr; +}; + +struct _GstCudaD3D12Interop +{ + GstObject parent; + + GstCudaD3D12InteropPrivate *priv; +}; + +#define gst_cuda_d3d12_interop_parent_class parent_class +G_DEFINE_TYPE (GstCudaD3D12Interop, gst_cuda_d3d12_interop, GST_TYPE_OBJECT); + +static void gst_cuda_d3d12_interop_finalize (GObject * object); + +static void +gst_cuda_d3d12_interop_class_init (GstCudaD3D12InteropClass * klass) +{ + auto object_class = G_OBJECT_CLASS (klass); + + object_class->finalize = gst_cuda_d3d12_interop_finalize; +} + +static void +gst_cuda_d3d12_interop_finalize (GObject * object) +{ + auto self = GST_CUDA_D3D12_INTEROP (object); + + delete self->priv; + + G_OBJECT_CLASS (parent_class)->finalize (object); +} + +static void +gst_cuda_d3d12_interop_init (GstCudaD3D12Interop * self) +{ + self->priv = new GstCudaD3D12InteropPrivate (); +} + +GstCudaD3D12Interop * +gst_cuda_d3d12_interop_new (GstCudaContext * context, GstD3D12Device * device, + const GstVideoInfo * info) +{ + gint64 cuda_luid = 0; + gint64 d3d_luid = 0; + + g_object_get (context, "dxgi-adapter-luid", &cuda_luid, nullptr); + g_object_get (device, "adapter-luid", &d3d_luid, nullptr); + + if (cuda_luid != d3d_luid) + return nullptr; + + auto self = (GstCudaD3D12Interop *) + g_object_new (GST_TYPE_CUDA_D3D12_INTEROP, nullptr); + gst_object_ref_sink (self); + + auto priv = self->priv; + + priv->context = (GstCudaContext *) gst_object_ref (context); + priv->device = (GstD3D12Device *) gst_object_ref (device); + + guint64 size; + if (!gst_d3d12_get_copyable_footprints (device, info, priv->layout, &size)) { + gst_object_unref (self); + return nullptr; + } + + priv->info = *info; + + D3D12_RESOURCE_DESC desc = { }; + desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + desc.Alignment = 0; + desc.Width = size; + desc.Height = 1; + desc.DepthOrArraySize = 1; + desc.MipLevels = 1; + desc.Format = DXGI_FORMAT_UNKNOWN; + desc.SampleDesc.Count = 1; + desc.SampleDesc.Quality = 0; + desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + desc.Flags = D3D12_RESOURCE_FLAG_NONE; + + priv->desc = desc; + + D3D12_HEAP_PROPERTIES heap_prop = { }; + heap_prop.Type = D3D12_HEAP_TYPE_DEFAULT; + heap_prop.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; + heap_prop.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; + heap_prop.CreationNodeMask = 1; + heap_prop.VisibleNodeMask = 1; + + priv->heap_prop = heap_prop; + + auto device_handle = gst_d3d12_device_get_device_handle (device); + priv->alloc_info = device_handle->GetResourceAllocationInfo (0, 1, &desc); + + priv->in_fence = gst_d3d12_device_get_fence_handle (device, + D3D12_COMMAND_LIST_TYPE_COMPUTE); + + auto hr = device_handle->CreateFence (0, D3D12_FENCE_FLAG_SHARED, + IID_PPV_ARGS (&priv->out_fence)); + if (!gst_d3d12_result (hr, device)) { + gst_object_unref (self); + return nullptr; + } + + HANDLE nt_handle; + hr = device_handle->CreateSharedHandle (priv->in_fence.Get (), + nullptr, GENERIC_ALL, nullptr, &nt_handle); + if (!gst_d3d12_result (hr, device)) { + gst_object_unref (self); + return nullptr; + } + + gst_cuda_context_push (context); + + CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC sem_desc = { }; + sem_desc.type = CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D12_FENCE; + sem_desc.handle.win32.handle = nt_handle; + + auto cuda_ret = CuImportExternalSemaphore (&priv->in_sem, &sem_desc); + CloseHandle (nt_handle); + + if (!gst_cuda_result (cuda_ret)) { + gst_cuda_context_pop (nullptr); + gst_object_unref (self); + return nullptr; + } + + hr = device_handle->CreateSharedHandle (priv->out_fence.Get (), nullptr, + GENERIC_ALL, nullptr, &nt_handle); + if (!gst_d3d12_result (hr, device)) { + gst_cuda_context_pop (nullptr); + gst_object_unref (self); + return nullptr; + } + + sem_desc.handle.win32.handle = nt_handle; + cuda_ret = CuImportExternalSemaphore (&priv->out_sem, &sem_desc); + CloseHandle (nt_handle); + gst_cuda_context_pop (nullptr); + + if (!gst_cuda_result (cuda_ret)) { + gst_object_unref (self); + return nullptr; + } + + priv->fence_waiter = + std::make_shared < FenceAsyncWaiter > (priv->out_fence.Get ()); + + return self; +} + +static void +gst_cuda_d3d12_interop_resource_free (GstCudaD3D12InteropResource * resource) +{ + delete resource; +} + +static void +gst_cuda_d3d12_interop_resource_release (GstCudaD3D12Interop * interop, + GstCudaD3D12InteropResource * resource) +{ + auto priv = interop->priv; + { + std::lock_guard < std::mutex > lk (priv->lock); + resource->dispose = nullptr; + resource->interop = nullptr; + priv->resource_pool.push (resource); + } + + gst_object_unref (interop); +} + +static gboolean +gst_cuda_d3d12_interop_resource_dispose (GstCudaD3D12InteropResource * resource) +{ + if (!resource->interop) + return TRUE; + + gst_mini_object_ref (resource); + gst_cuda_d3d12_interop_resource_release (resource->interop, resource); + + return FALSE; +} + +static gboolean +gst_cuda_d3d12_interop_acquire_resource (GstCudaD3D12Interop * self, + GstCudaD3D12InteropResource ** resource) +{ + auto priv = self->priv; + + *resource = nullptr; + + GstCudaD3D12InteropResource *ret = nullptr; + + { + std::lock_guard < std::mutex > lk (priv->lock); + if (!priv->resource_pool.empty ()) { + ret = priv->resource_pool.front (); + priv->resource_pool.pop (); + } + } + + if (!ret) { + auto device = gst_d3d12_device_get_device_handle (priv->device); + ComPtr < ID3D12Resource > resource_12; + auto hr = device->CreateCommittedResource (&priv->heap_prop, + D3D12_HEAP_FLAG_SHARED, &priv->desc, D3D12_RESOURCE_STATE_COMMON, + nullptr, IID_PPV_ARGS (&resource_12)); + if (!gst_d3d12_result (hr, priv->device)) { + GST_ERROR_OBJECT (self, "Couldn't allocate resource"); + return FALSE; + } + + HANDLE nt_handle; + hr = device->CreateSharedHandle (resource_12.Get (), nullptr, + GENERIC_ALL, nullptr, &nt_handle); + if (!gst_d3d12_result (hr, priv->device)) { + GST_ERROR_OBJECT (self, "Couldn't create shared handle"); + return FALSE; + } + + if (!gst_cuda_context_push (priv->context)) { + GST_ERROR_OBJECT (self, "Couldn't push context"); + CloseHandle (nt_handle); + return FALSE; + } + + CUDA_EXTERNAL_MEMORY_HANDLE_DESC mem_desc = { }; + mem_desc.type = CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE; + mem_desc.handle.win32.handle = nt_handle; + mem_desc.size = priv->alloc_info.SizeInBytes; + /* CUDA_EXTERNAL_MEMORY_DEDICATED = 0x1 */ + mem_desc.flags = 0x1; + + CUexternalMemory ext_mem; + auto cuda_ret = CuImportExternalMemory (&ext_mem, &mem_desc); + CloseHandle (nt_handle); + if (!gst_cuda_result (cuda_ret)) { + GST_ERROR_OBJECT (self, "Couldn't import NT handle"); + gst_cuda_context_pop (nullptr); + return FALSE; + } + + CUDA_EXTERNAL_MEMORY_BUFFER_DESC buf_desc = { }; + buf_desc.size = priv->desc.Width; + + CUdeviceptr devptr; + cuda_ret = CuExternalMemoryGetMappedBuffer (&devptr, ext_mem, &buf_desc); + if (!gst_cuda_result (cuda_ret)) { + GST_ERROR_OBJECT (self, "Couldn't get mapped buffer"); + CuDestroyExternalMemory (ext_mem); + gst_cuda_context_pop (nullptr); + return FALSE; + } + + gst_cuda_context_pop (nullptr); + + ret = new GstCudaD3D12InteropResource (); + gst_mini_object_init (ret, 0, gst_cuda_d3d12_interop_resource_get_type (), + nullptr, nullptr, + (GstMiniObjectFreeFunction) gst_cuda_d3d12_interop_resource_free); + + ret->context = (GstCudaContext *) gst_object_ref (priv->context); + ret->resource = resource_12; + ret->ext_mem = ext_mem; + ret->devptr = devptr; + } + + ret->interop = (GstCudaD3D12Interop *) gst_object_ref (self); + ret->dispose = + (GstMiniObjectDisposeFunction) gst_cuda_d3d12_interop_resource_dispose; + + *resource = ret; + + return TRUE; +} + +gboolean +gst_cuda_d3d12_interop_upload_async (GstCudaD3D12Interop * interop, + GstBuffer * dst_cuda, GstBuffer * src_d3d12, GstCudaStream * stream) +{ + GstD3D12Frame frame_12; + GstVideoFrame frame_cuda; + + auto priv = interop->priv; + + if (!gst_d3d12_frame_map (&frame_12, &priv->info, + src_d3d12, GST_MAP_READ_D3D12, GST_D3D12_FRAME_MAP_FLAG_NONE)) { + GST_ERROR_OBJECT (interop, "Couldn't map d3d12 buffer"); + return FALSE; + } + + if (!gst_d3d12_device_is_equal (priv->device, frame_12.device)) { + GST_WARNING_OBJECT (interop, "Different d3d12 device"); + gst_d3d12_frame_unmap (&frame_12); + return FALSE; + } + + if (!gst_video_frame_map (&frame_cuda, &priv->info, dst_cuda, + (GstMapFlags) (GST_MAP_WRITE | GST_MAP_CUDA))) { + GST_ERROR_OBJECT (interop, "Couldn't map cuda buffer"); + gst_d3d12_frame_unmap (&frame_12); + return FALSE; + } + + GstCudaD3D12InteropResource *resource; + if (!gst_cuda_d3d12_interop_acquire_resource (interop, &resource)) { + GST_ERROR_OBJECT (interop, "Couldn't acquire resource"); + gst_d3d12_frame_unmap (&frame_12); + gst_video_frame_unmap (&frame_cuda); + return FALSE; + } + + GstD3D12FenceData *fence_data; + gst_d3d12_fence_data_pool_acquire (priv->fence_data_pool, &fence_data); + + gst_d3d12_fence_data_push (fence_data, + FENCE_NOTIFY_MINI_OBJECT (gst_buffer_ref (src_d3d12))); + gst_d3d12_fence_data_push (fence_data, + FENCE_NOTIFY_MINI_OBJECT (gst_mini_object_ref (resource))); + + GstD3D12CopyTextureRegionArgs args[GST_VIDEO_MAX_PLANES] = { }; + D3D12_BOX src_box[GST_VIDEO_MAX_PLANES] = { }; + std::vector < ID3D12Fence * >fences_to_wait; + std::vector < guint64 > fence_values_to_wait; + + for (guint i = 0; i < GST_VIDEO_INFO_N_PLANES (&priv->info); i++) { + auto fence = frame_12.fence[i].fence; + auto fence_val = frame_12.fence[i].fence_value; + + if (fence) { + auto completed = fence->GetCompletedValue (); + if (completed < fence_val) { + fences_to_wait.push_back (fence); + fence_values_to_wait.push_back (fence_val); + } + } + + src_box[i].left = 0; + src_box[i].top = 0; + src_box[i].right = MIN (frame_12.plane_rect[i].right, + priv->layout[i].Footprint.Width); + src_box[i].bottom = MIN (frame_12.plane_rect[i].bottom, + priv->layout[i].Footprint.Height); + src_box[i].front = 0; + src_box[i].back = 1; + + args[i].src.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + args[i].src.pResource = frame_12.data[i]; + args[i].src.SubresourceIndex = frame_12.subresource_index[i]; + + args[i].dst.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; + args[i].dst.pResource = resource->resource.Get (); + args[i].dst.PlacedFootprint = priv->layout[i]; + } + + guint64 fence_val; + auto ret = gst_d3d12_device_copy_texture_region (priv->device, + GST_VIDEO_INFO_N_PLANES (&priv->info), args, fence_data, + fences_to_wait.size (), fences_to_wait.data (), + fence_values_to_wait.data (), + D3D12_COMMAND_LIST_TYPE_COMPUTE, &fence_val); + gst_d3d12_frame_unmap (&frame_12); + + if (!ret) { + GST_ERROR_OBJECT (interop, "Couldn't execute d3d12 copy"); + gst_video_frame_unmap (&frame_cuda); + gst_mini_object_unref (resource); + return FALSE; + } + + if (!gst_cuda_context_push (priv->context)) { + GST_ERROR_OBJECT (interop, "Couldn't push context"); + gst_video_frame_unmap (&frame_cuda); + gst_mini_object_unref (resource); + return FALSE; + } + + CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS wait_params = { }; + wait_params.params.fence.value = fence_val; + + auto stream_handle = gst_cuda_stream_get_handle (stream); + auto cuda_ret = CuWaitExternalSemaphoresAsync (&priv->in_sem, &wait_params, + 1, stream_handle); + if (!gst_cuda_result (cuda_ret)) { + GST_ERROR_OBJECT (interop, "CuWaitExternalSemaphoresAsync failed"); + gst_video_frame_unmap (&frame_cuda); + gst_mini_object_unref (resource); + + gst_cuda_context_pop (nullptr); + priv->in_fence->SetEventOnCompletion (fence_val, nullptr); + + return FALSE; + } + + for (guint i = 0; i < GST_VIDEO_FRAME_N_PLANES (&frame_cuda); i++) { + CUDA_MEMCPY2D copy_params = { }; + guint8 *src_data = (guint8 *) resource->devptr; + + src_data += priv->layout[i].Offset; + + copy_params.srcMemoryType = CU_MEMORYTYPE_DEVICE; + copy_params.srcDevice = (CUdeviceptr) src_data; + copy_params.srcPitch = priv->layout[i].Footprint.RowPitch; + + copy_params.dstMemoryType = CU_MEMORYTYPE_DEVICE; + copy_params.dstDevice = (CUdeviceptr) + GST_VIDEO_FRAME_PLANE_DATA (&frame_cuda, i); + copy_params.dstPitch = GST_VIDEO_FRAME_PLANE_STRIDE (&frame_cuda, i); + + copy_params.WidthInBytes = GST_VIDEO_FRAME_COMP_WIDTH (&frame_cuda, i) * + GST_VIDEO_FRAME_COMP_PSTRIDE (&frame_cuda, i); + copy_params.Height = GST_VIDEO_FRAME_COMP_HEIGHT (&frame_cuda, i); + + cuda_ret = CuMemcpy2DAsync (©_params, stream_handle); + if (!gst_cuda_result (cuda_ret)) { + GST_ERROR_OBJECT (interop, "CuMemcpy2DAsync failed"); + gst_video_frame_unmap (&frame_cuda); + gst_mini_object_unref (resource); + + gst_cuda_context_pop (nullptr); + priv->in_fence->SetEventOnCompletion (fence_val, nullptr); + + return FALSE; + } + } + + priv->fence_val++; + + CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS signal_params = { }; + signal_params.params.fence.value = priv->fence_val; + + cuda_ret = CuSignalExternalSemaphoresAsync (&priv->out_sem, &signal_params, + 1, stream_handle); + gst_cuda_context_pop (nullptr); + gst_video_frame_unmap (&frame_cuda); + + if (!gst_cuda_result (cuda_ret)) { + GST_ERROR_OBJECT (interop, "CuSignalExternalSemaphoresAsync failed"); + gst_mini_object_unref (resource); + priv->fence_val--; + + return FALSE; + } + + priv->fence_waiter->wait_async (priv->fence_val, resource); + + return TRUE; +} diff --git a/subprojects/gst-plugins-bad/sys/nvcodec/gstcudainterop_d3d12.h b/subprojects/gst-plugins-bad/sys/nvcodec/gstcudainterop_d3d12.h new file mode 100644 index 0000000000..21d5324985 --- /dev/null +++ b/subprojects/gst-plugins-bad/sys/nvcodec/gstcudainterop_d3d12.h @@ -0,0 +1,45 @@ +/* GStreamer + * Copyright (C) 2024 Seungha Yang + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#pragma once + +#include +#include +#include +#include + +G_BEGIN_DECLS + +#define GST_TYPE_CUDA_D3D12_INTEROP (gst_cuda_d3d12_interop_get_type()) +G_DECLARE_FINAL_TYPE (GstCudaD3D12Interop, gst_cuda_d3d12_interop, + GST, CUDA_D3D12_INTEROP, GstObject) + +GType gst_cuda_d3d12_interop_resource_get_type (void); + +GstCudaD3D12Interop * gst_cuda_d3d12_interop_new (GstCudaContext * context, + GstD3D12Device * device, + const GstVideoInfo * info); + +gboolean gst_cuda_d3d12_interop_upload_async (GstCudaD3D12Interop * interop, + GstBuffer * dst_cuda, + GstBuffer * src_d3d12, + GstCudaStream * stream); + +G_END_DECLS + diff --git a/subprojects/gst-plugins-bad/sys/nvcodec/meson.build b/subprojects/gst-plugins-bad/sys/nvcodec/meson.build index d5720d443e..dbc4b833a4 100644 --- a/subprojects/gst-plugins-bad/sys/nvcodec/meson.build +++ b/subprojects/gst-plugins-bad/sys/nvcodec/meson.build @@ -40,6 +40,10 @@ nvcodec_unix_sources = [ 'gstcudaipcserver_unix.cpp', ] +nvcodec_d3d12_sources = [ + 'gstcudainterop_d3d12.cpp', +] + if get_option('nvcodec').disabled() subdir_done() endif @@ -79,6 +83,12 @@ if not nvbuf_dep.found() nvcodec_sources += nvcodec_dgpu_sources endif +if gstd3d12_dep.found() + extra_args += ['-DHAVE_GST_D3D12'] + extra_deps += [gstd3d12_dep] + nvcodec_sources += nvcodec_d3d12_sources +endif + if cc.get_id() != 'msvc' if host_system == 'windows' # MinGW 32bits compiler seems to be complaining about redundant-decls