mirror of
https://gitlab.freedesktop.org/gstreamer/gstreamer.git
synced 2025-01-23 15:48:23 +00:00
cuda/context: add gpu stack size property
Allows reducing the initial stack size of GPU threads. Cuda should automatically increase this value if a kernel requires a larger stack. Can save roughly 40MB of GPU memory for a single nvh264enc instance. Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/8158>
This commit is contained in:
parent
d6563016ca
commit
dbf4915abd
2 changed files with 54 additions and 0 deletions
|
@ -509,6 +509,10 @@ so all CUDA functions that operate on the current context are affected.</doc>
|
|||
<property name="cuda-device-id" writable="1" construct-only="1" transfer-ownership="none" default-value="0">
|
||||
<type name="guint" c:type="guint"/>
|
||||
</property>
|
||||
<property name="default-gpu-stack-size" version="1.26" writable="1" transfer-ownership="none" default-value="1024">
|
||||
<doc xml:space="preserve" filename="../subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcudacontext.cpp">The default stack size for each GPU thread.</doc>
|
||||
<type name="guint" c:type="guint"/>
|
||||
</property>
|
||||
<property name="external-resource-interop" version="1.26" transfer-ownership="none" default-value="FALSE">
|
||||
<doc xml:space="preserve" filename="../subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcudacontext.cpp">External resource interop API support</doc>
|
||||
<type name="gboolean" c:type="gboolean"/>
|
||||
|
|
|
@ -57,6 +57,7 @@ enum
|
|||
PROP_STREAM_ORDERED_ALLOC,
|
||||
PROP_PREFER_STREAM_ORDERED_ALLLOC,
|
||||
PROP_EXT_INTEROP,
|
||||
PROP_DEFAULT_GPU_STACK_SIZE,
|
||||
};
|
||||
|
||||
struct _GstCudaContextPrivate
|
||||
|
@ -70,6 +71,7 @@ struct _GstCudaContextPrivate
|
|||
gboolean stream_ordered_alloc_supported;
|
||||
gboolean prefer_stream_ordered_alloc;
|
||||
gboolean ext_interop_supported;
|
||||
guint default_gpu_stack_size;
|
||||
|
||||
gint tex_align;
|
||||
|
||||
|
@ -182,6 +184,19 @@ gst_cuda_context_class_init (GstCudaContextClass * klass)
|
|||
"External resource interop API support", FALSE,
|
||||
(GParamFlags) (G_PARAM_READABLE | G_PARAM_STATIC_STRINGS)));
|
||||
|
||||
/**
|
||||
* GstCudaContext:default-gpu-stack-size:
|
||||
*
|
||||
* The default stack size for each GPU thread.
|
||||
*
|
||||
* Since: 1.26
|
||||
*/
|
||||
g_object_class_install_property (gobject_class, PROP_DEFAULT_GPU_STACK_SIZE,
|
||||
g_param_spec_uint ("default-gpu-stack-size",
|
||||
"Default GPU stack size",
|
||||
"The initial stack size for GPU threads", 0, G_MAXUINT, 1024,
|
||||
(GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
|
||||
|
||||
gst_cuda_memory_init_once ();
|
||||
}
|
||||
|
||||
|
@ -214,6 +229,27 @@ gst_cuda_context_set_property (GObject * object, guint prop_id,
|
|||
priv->prefer_stream_ordered_alloc = g_value_get_boolean (value);
|
||||
g_mutex_unlock (&priv->lock);
|
||||
break;
|
||||
case PROP_DEFAULT_GPU_STACK_SIZE:{
|
||||
guint new_stack_limit = g_value_get_uint (value);
|
||||
|
||||
g_mutex_lock (&priv->lock);
|
||||
if (new_stack_limit != priv->default_gpu_stack_size) {
|
||||
size_t set_value = 0;
|
||||
gst_cuda_context_push (context);
|
||||
if (CuCtxSetLimit (CU_LIMIT_STACK_SIZE,
|
||||
(size_t) new_stack_limit) == CUDA_SUCCESS) {
|
||||
if (CuCtxGetLimit (&set_value, CU_LIMIT_STACK_SIZE) == CUDA_SUCCESS) {
|
||||
priv->default_gpu_stack_size = (guint) set_value;
|
||||
GST_INFO_OBJECT (context,
|
||||
"set default stack size to %" G_GUINT64_FORMAT,
|
||||
(guint64) set_value);
|
||||
}
|
||||
}
|
||||
gst_cuda_context_pop (nullptr);
|
||||
}
|
||||
g_mutex_unlock (&priv->lock);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
|
||||
break;
|
||||
|
@ -251,6 +287,9 @@ gst_cuda_context_get_property (GObject * object, guint prop_id,
|
|||
case PROP_EXT_INTEROP:
|
||||
g_value_set_boolean (value, priv->ext_interop_supported);
|
||||
break;
|
||||
case PROP_DEFAULT_GPU_STACK_SIZE:
|
||||
g_value_set_uint (value, priv->default_gpu_stack_size);
|
||||
break;
|
||||
default:
|
||||
G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
|
||||
break;
|
||||
|
@ -635,6 +674,7 @@ gst_cuda_context_new_wrapped (CUcontext handler, CUdevice device)
|
|||
GList *iter;
|
||||
gint tex_align = 0;
|
||||
GstCudaContext *self;
|
||||
size_t default_gpu_stack_size;
|
||||
|
||||
g_return_val_if_fail (handler, nullptr);
|
||||
g_return_val_if_fail (device >= 0, nullptr);
|
||||
|
@ -654,6 +694,16 @@ gst_cuda_context_new_wrapped (CUcontext handler, CUdevice device)
|
|||
self->priv->context = handler;
|
||||
self->priv->device = device;
|
||||
self->priv->tex_align = tex_align;
|
||||
|
||||
gst_cuda_context_push (self);
|
||||
if (CuCtxGetLimit (&default_gpu_stack_size,
|
||||
CU_LIMIT_STACK_SIZE) == CUDA_SUCCESS) {
|
||||
self->priv->default_gpu_stack_size = (guint) default_gpu_stack_size;
|
||||
GST_DEBUG ("cuda default stack size %" G_GUINT64_FORMAT,
|
||||
(guint64) default_gpu_stack_size);
|
||||
}
|
||||
gst_cuda_context_pop (nullptr);
|
||||
|
||||
gst_object_ref_sink (self);
|
||||
|
||||
#ifdef G_OS_WIN32
|
||||
|
|
Loading…
Reference in a new issue