cuda: Add support for application cuda memory pool

Adding gst_cuda_register_allocator_need_pool_callback() method
to support memory allocation from application's CUmemoryPool

Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/7427>
This commit is contained in:
Seungha Yang 2024-08-29 23:52:08 +09:00 committed by GStreamer Marge Bot
parent 3c3b8e79c2
commit ad02fae416
2 changed files with 81 additions and 6 deletions

View file

@ -28,6 +28,7 @@
#include <string.h> #include <string.h>
#include <map> #include <map>
#include <memory> #include <memory>
#include <mutex>
#ifdef G_OS_WIN32 #ifdef G_OS_WIN32
#include <windows.h> #include <windows.h>
@ -40,6 +41,13 @@ GST_DEBUG_CATEGORY_STATIC (cuda_allocator_debug);
static GstAllocator *_gst_cuda_allocator = nullptr; static GstAllocator *_gst_cuda_allocator = nullptr;
/* *INDENT-OFF* */
static std::recursive_mutex _callback_lock;
/* *INDENT-ON* */
static GstCudaMemoryAllocatorNeedPoolCallback _need_pool_callback = nullptr;
static gpointer _alloc_callback_user_data = nullptr;
static GDestroyNotify _alloc_callback_notify = nullptr;
GType GType
gst_cuda_memory_alloc_method_get_type (void) gst_cuda_memory_alloc_method_get_type (void)
{ {
@ -292,7 +300,8 @@ do_align (size_t value, size_t align)
static GstMemory * static GstMemory *
gst_cuda_allocator_alloc_internal (GstCudaAllocator * self, gst_cuda_allocator_alloc_internal (GstCudaAllocator * self,
GstCudaContext * context, GstCudaStream * stream, const GstVideoInfo * info, GstCudaContext * context, GstCudaStream * stream, const GstVideoInfo * info,
guint width_in_bytes, guint alloc_height, gboolean stream_ordered) guint width_in_bytes, guint alloc_height, gboolean stream_ordered,
GstCudaMemoryPool * pool)
{ {
GstCudaMemoryPrivate *priv; GstCudaMemoryPrivate *priv;
GstCudaMemory *mem; GstCudaMemory *mem;
@ -310,8 +319,14 @@ gst_cuda_allocator_alloc_internal (GstCudaAllocator * self,
g_assert (stream_handle); g_assert (stream_handle);
pitch = do_align (width_in_bytes, texture_align); pitch = do_align (width_in_bytes, texture_align);
ret = gst_cuda_result (CuMemAllocAsync (&data, pitch * alloc_height, if (pool) {
stream_handle)); ret = gst_cuda_result (CuMemAllocFromPoolAsync (&data,
pitch * alloc_height, gst_cuda_memory_pool_get_handle (pool),
stream_handle));
} else {
ret = gst_cuda_result (CuMemAllocAsync (&data, pitch * alloc_height,
stream_handle));
}
if (ret) if (ret)
ret = gst_cuda_result (CuStreamSynchronize (stream_handle)); ret = gst_cuda_result (CuStreamSynchronize (stream_handle));
@ -605,7 +620,7 @@ cuda_mem_copy (GstMemory * mem, gssize offset, gssize size)
if (!copy) { if (!copy) {
copy = gst_cuda_allocator_alloc_internal (self, context, stream, copy = gst_cuda_allocator_alloc_internal (self, context, stream,
&src_mem->info, src_mem->priv->width_in_bytes, src_mem->priv->height, &src_mem->info, src_mem->priv->width_in_bytes, src_mem->priv->height,
FALSE); FALSE, nullptr);
} }
if (!copy) { if (!copy) {
@ -1148,7 +1163,7 @@ gst_cuda_allocator_alloc (GstCudaAllocator * allocator,
alloc_height = gst_cuda_allocator_calculate_alloc_height (info); alloc_height = gst_cuda_allocator_calculate_alloc_height (info);
return gst_cuda_allocator_alloc_internal (allocator, context, stream, return gst_cuda_allocator_alloc_internal (allocator, context, stream,
info, info->stride[0], alloc_height, FALSE); info, info->stride[0], alloc_height, FALSE, nullptr);
} }
/** /**
@ -1441,6 +1456,8 @@ struct _GstCudaPoolAllocatorPrivate
guint cur_mems; guint cur_mems;
gboolean flushing; gboolean flushing;
gboolean stream_ordered_alloc; gboolean stream_ordered_alloc;
GstCudaMemoryPool *mem_pool;
}; };
static void gst_cuda_pool_allocator_finalize (GObject * object); static void gst_cuda_pool_allocator_finalize (GObject * object);
@ -1503,6 +1520,7 @@ gst_cuda_pool_allocator_finalize (GObject * object)
gst_atomic_queue_unref (priv->queue); gst_atomic_queue_unref (priv->queue);
gst_poll_free (priv->poll); gst_poll_free (priv->poll);
g_rec_mutex_clear (&priv->lock); g_rec_mutex_clear (&priv->lock);
gst_clear_cuda_memory_pool (&priv->mem_pool);
gst_clear_cuda_stream (&self->stream); gst_clear_cuda_stream (&self->stream);
gst_clear_object (&self->context); gst_clear_object (&self->context);
@ -1517,6 +1535,14 @@ gst_cuda_pool_allocator_start (GstCudaPoolAllocator * self)
priv->started = TRUE; priv->started = TRUE;
if (priv->stream_ordered_alloc && !priv->mem_pool) {
std::lock_guard < std::recursive_mutex > lk (_callback_lock);
if (_need_pool_callback) {
priv->mem_pool = _need_pool_callback (GST_CUDA_ALLOCATOR (self),
self->context, _alloc_callback_user_data);
}
}
return TRUE; return TRUE;
} }
@ -1744,7 +1770,8 @@ gst_cuda_pool_allocator_alloc (GstCudaPoolAllocator * self, GstMemory ** mem)
auto allocator = (GstCudaAllocator *) _gst_cuda_allocator; auto allocator = (GstCudaAllocator *) _gst_cuda_allocator;
new_mem = gst_cuda_allocator_alloc_internal (allocator, new_mem = gst_cuda_allocator_alloc_internal (allocator,
self->context, self->stream, &self->info, self->info.stride[0], self->context, self->stream, &self->info, self->info.stride[0],
gst_cuda_allocator_calculate_alloc_height (&self->info), TRUE); gst_cuda_allocator_calculate_alloc_height (&self->info), TRUE,
priv->mem_pool);
} else { } else {
new_mem = gst_cuda_allocator_alloc (nullptr, new_mem = gst_cuda_allocator_alloc (nullptr,
self->context, self->stream, &self->info); self->context, self->stream, &self->info);
@ -1991,3 +2018,27 @@ gst_cuda_pool_allocator_acquire_memory (GstCudaPoolAllocator * allocator,
return result; return result;
} }
/**
* gst_cuda_register_allocator_need_pool_callback:
* @callback: the callbacks
* @user_data: an user_data argument for the callback
* @notify: a destory notify function
*
* Sets global need-pool callback function
*
* Since: 1.26
*/
void gst_cuda_register_allocator_need_pool_callback
(GstCudaMemoryAllocatorNeedPoolCallback callback, gpointer user_data,
GDestroyNotify notify)
{
std::lock_guard < std::recursive_mutex > lk (_callback_lock);
if (_alloc_callback_notify)
_alloc_callback_notify (_alloc_callback_user_data);
_need_pool_callback = callback;
_alloc_callback_user_data = user_data;
_alloc_callback_notify = notify;
}

View file

@ -25,6 +25,7 @@
#include <gst/cuda/cuda-prelude.h> #include <gst/cuda/cuda-prelude.h>
#include <gst/cuda/gstcudacontext.h> #include <gst/cuda/gstcudacontext.h>
#include <gst/cuda/gstcudastream.h> #include <gst/cuda/gstcudastream.h>
#include <gst/cuda/gstcudamemorypool.h>
G_BEGIN_DECLS G_BEGIN_DECLS
@ -183,6 +184,24 @@ GType gst_cuda_memory_alloc_method_get_type (void);
*/ */
#define GST_CUDA_ALLOCATOR_OPT_STREAM_ORDERED "GstCudaAllocator.stream-ordered" #define GST_CUDA_ALLOCATOR_OPT_STREAM_ORDERED "GstCudaAllocator.stream-ordered"
/**
* GstCudaMemoryAllocatorNeedPoolCallback: (skip)
* @allocator: a #GstCudaAllocator
* @context: a #GstCudaContext
* @user_data: the user data
*
* Called to request cuda memory pool object. If callee returns a memory pool,
* @allocator will allocate memory via cuMemAllocFromPoolAsync.
* Otherwise device default memory pool will be used with cuMemAllocAsync method
*
* Returns: (transfer full) (nullable): Configured #GstCudaMemoryPool object
*
* Since: 1.26
*/
typedef GstCudaMemoryPool * (*GstCudaMemoryAllocatorNeedPoolCallback) (GstCudaAllocator * allocator,
GstCudaContext * context,
gpointer user_data);
/** /**
* GstCudaMemory: * GstCudaMemory:
* *
@ -356,5 +375,10 @@ GST_CUDA_API
GstFlowReturn gst_cuda_pool_allocator_acquire_memory (GstCudaPoolAllocator * allocator, GstFlowReturn gst_cuda_pool_allocator_acquire_memory (GstCudaPoolAllocator * allocator,
GstMemory ** memory); GstMemory ** memory);
GST_CUDA_API
void gst_cuda_register_allocator_need_pool_callback (GstCudaMemoryAllocatorNeedPoolCallback callback,
gpointer user_data,
GDestroyNotify notify);
G_END_DECLS G_END_DECLS