From d16a9237c33fdc6fc7f9a6c6a2f8add9d5793a8d Mon Sep 17 00:00:00 2001 From: Seungha Yang Date: Fri, 30 Aug 2019 17:19:44 +0900 Subject: [PATCH] cudacontext: Enable direct CUDA memory access over multiple GPUs If each device context can access each other, enable peer access for better interoperability. Part-of: --- sys/nvcodec/gstcudacontext.c | 134 +++++++++++++++++++++++++++++++++++ sys/nvcodec/gstcudacontext.h | 4 ++ sys/nvcodec/gstcudaloader.c | 32 +++++++++ sys/nvcodec/gstcudaloader.h | 12 ++++ 4 files changed, 182 insertions(+) diff --git a/sys/nvcodec/gstcudacontext.c b/sys/nvcodec/gstcudacontext.c index b6406f6873..8962ff3281 100644 --- a/sys/nvcodec/gstcudacontext.c +++ b/sys/nvcodec/gstcudacontext.c @@ -28,6 +28,10 @@ GST_DEBUG_CATEGORY_STATIC (gst_cuda_context_debug); #define GST_CAT_DEFAULT gst_cuda_context_debug +/* store all context object with weak ref */ +static GList *context_list = NULL; +G_LOCK_DEFINE_STATIC (list_lock); + enum { PROP_0, @@ -41,6 +45,8 @@ struct _GstCudaContextPrivate CUcontext context; CUdevice device; gint device_id; + + GHashTable *accessible_peer; }; #define gst_cuda_context_parent_class parent_class @@ -52,6 +58,10 @@ static void gst_cuda_context_get_property (GObject * object, guint prop_id, GValue * value, GParamSpec * pspec); static void gst_cuda_context_constructed (GObject * object); static void gst_cuda_context_finalize (GObject * object); +static void gst_cuda_context_weak_ref_notify (gpointer data, + GstCudaContext * context); +static void gst_cuda_context_enable_peer_access (GstCudaContext * context, + GstCudaContext * peer); static void gst_cuda_context_class_init (GstCudaContextClass * klass) @@ -80,6 +90,7 @@ gst_cuda_context_init (GstCudaContext * context) priv->context = NULL; priv->device_id = DEFAULT_DEVICE_ID; + priv->accessible_peer = g_hash_table_new (g_direct_hash, g_direct_equal); context->priv = priv; } @@ -131,6 +142,7 @@ gst_cuda_context_constructed (GObject * object) gchar name[256]; gint min = 0, maj = 0; gint i; + GList *iter; if (g_once_init_enter (&once)) { if (CuInit (0) != CUDA_SUCCESS) { @@ -183,6 +195,101 @@ gst_cuda_context_constructed (GObject * object) priv->context = cuda_ctx; priv->device = cuda_dev; + + G_LOCK (list_lock); + g_object_weak_ref (G_OBJECT (object), + (GWeakNotify) gst_cuda_context_weak_ref_notify, NULL); + for (iter = context_list; iter; iter = g_list_next (iter)) { + GstCudaContext *peer = (GstCudaContext *) iter->data; + + /* EnablePeerAccess is unidirectional */ + gst_cuda_context_enable_peer_access (context, peer); + gst_cuda_context_enable_peer_access (peer, context); + } + + context_list = g_list_append (context_list, context); + G_UNLOCK (list_lock); +} + +/* must be called with list_lock taken */ +static void +gst_cuda_context_enable_peer_access (GstCudaContext * context, + GstCudaContext * peer) +{ + GstCudaContextPrivate *priv = context->priv; + GstCudaContextPrivate *peer_priv = peer->priv; + CUdevice device = priv->device; + CUdevice other_dev = peer_priv->device; + CUresult cuda_ret; + gint can_access = 0; + + cuda_ret = CuDeviceCanAccessPeer (&can_access, device, other_dev); + + if (!gst_cuda_result (cuda_ret) || !can_access) { + GST_DEBUG_OBJECT (context, + "Peer access to %" GST_PTR_FORMAT " is not allowed", peer); + return; + } + + gst_cuda_context_push (context); + if (gst_cuda_result (CuCtxEnablePeerAccess (peer_priv->context, 0))) { + GST_DEBUG_OBJECT (context, "Enable peer access to %" GST_PTR_FORMAT, peer); + g_hash_table_add (priv->accessible_peer, peer); + } + + gst_cuda_context_pop (NULL); +} + +static void +gst_cuda_context_weak_ref_notify (gpointer data, GstCudaContext * context) +{ + GList *iter; + + G_LOCK (list_lock); + context_list = g_list_remove (context_list, context); + + /* disable self -> peer access */ + if (context->priv->accessible_peer) { + GHashTableIter iter; + gpointer key; + g_hash_table_iter_init (&iter, context->priv->accessible_peer); + if (gst_cuda_context_push (context)) { + while (g_hash_table_iter_next (&iter, &key, NULL)) { + GstCudaContext *peer = GST_CUDA_CONTEXT (key); + CUcontext peer_handle = gst_cuda_context_get_handle (peer); + GST_DEBUG_OBJECT (context, + "Disable peer access to %" GST_PTR_FORMAT, peer); + gst_cuda_result (CuCtxDisablePeerAccess (peer_handle)); + } + gst_cuda_context_pop (NULL); + } + + g_hash_table_destroy (context->priv->accessible_peer); + context->priv->accessible_peer = NULL; + } + + /* disable peer -> self access */ + for (iter = context_list; iter; iter = g_list_next (iter)) { + GstCudaContext *other = (GstCudaContext *) iter->data; + GstCudaContextPrivate *other_priv = other->priv; + CUcontext self_handle; + + if (!other_priv->accessible_peer) + continue; + + if (g_hash_table_lookup (other_priv->accessible_peer, context)) { + if (gst_cuda_context_push (other)) { + self_handle = gst_cuda_context_get_handle (context); + GST_DEBUG_OBJECT (other, + "Disable peer access to %" GST_PTR_FORMAT, context); + gst_cuda_result (CuCtxDisablePeerAccess (self_handle)); + gst_cuda_context_pop (NULL); + } + + g_hash_table_remove (other_priv->accessible_peer, context); + } + } + G_UNLOCK (list_lock); } static void @@ -274,3 +381,30 @@ gst_cuda_context_get_handle (GstCudaContext * ctx) return ctx->priv->context; } + +/** + * gst_cuda_context_can_access_peer: + * @ctx: a #GstCudaContext + * @peer: a #GstCudaContext + * + * Query whether @ctx can access any memory which belongs to @peer directly. + + * Returns: %TRUE if @ctx can access @peer directly + */ +gboolean +gst_cuda_context_can_access_peer (GstCudaContext * ctx, GstCudaContext * peer) +{ + gboolean ret = FALSE; + + g_return_val_if_fail (GST_IS_CUDA_CONTEXT (ctx), FALSE); + g_return_val_if_fail (GST_IS_CUDA_CONTEXT (peer), FALSE); + + G_LOCK (list_lock); + if (ctx->priv->accessible_peer && + g_hash_table_lookup (ctx->priv->accessible_peer, peer)) { + ret = TRUE; + } + G_UNLOCK (list_lock); + + return ret; +} diff --git a/sys/nvcodec/gstcudacontext.h b/sys/nvcodec/gstcudacontext.h index ca6b3d7079..5b5d640a76 100644 --- a/sys/nvcodec/gstcudacontext.h +++ b/sys/nvcodec/gstcudacontext.h @@ -72,6 +72,10 @@ gboolean gst_cuda_context_pop (CUcontext * cuda_ctx); G_GNUC_INTERNAL gpointer gst_cuda_context_get_handle (GstCudaContext * ctx); +G_GNUC_INTERNAL +gboolean gst_cuda_context_can_access_peer (GstCudaContext * ctx, + GstCudaContext * peer); + G_END_DECLS #endif /* __GST_CUDA_CONTEXT_H__ */ diff --git a/sys/nvcodec/gstcudaloader.c b/sys/nvcodec/gstcudaloader.c index 01dc0d78d2..116e68117d 100644 --- a/sys/nvcodec/gstcudaloader.c +++ b/sys/nvcodec/gstcudaloader.c @@ -54,6 +54,9 @@ typedef struct _GstNvCodecCudaVTable CUresult (CUDAAPI * CuCtxPopCurrent) (CUcontext * pctx); CUresult (CUDAAPI * CuCtxPushCurrent) (CUcontext ctx); + CUresult (CUDAAPI * CuCtxEnablePeerAccess) (CUcontext peerContext, + unsigned int Flags); + CUresult (CUDAAPI * CuCtxDisablePeerAccess) (CUcontext peerContext); CUresult (CUDAAPI * CuGraphicsMapResources) (unsigned int count, CUgraphicsResource * resources, CUstream hStream); CUresult (CUDAAPI * CuGraphicsUnmapResources) (unsigned int count, @@ -87,6 +90,8 @@ typedef struct _GstNvCodecCudaVTable CUresult (CUDAAPI * CuDeviceGetName) (char *name, int len, CUdevice dev); CUresult (CUDAAPI * CuDeviceGetAttribute) (int *pi, CUdevice_attribute attrib, CUdevice dev); + CUresult (CUDAAPI * CuDeviceCanAccessPeer) (int *canAccessPeer, + CUdevice dev, CUdevice peerDev); CUresult (CUDAAPI * CuGraphicsGLRegisterImage) (CUgraphicsResource * pCudaResource, unsigned int image, unsigned int target, @@ -129,6 +134,8 @@ gst_cuda_load_library (void) LOAD_SYMBOL (cuCtxDestroy, CuCtxDestroy); LOAD_SYMBOL (cuCtxPopCurrent, CuCtxPopCurrent); LOAD_SYMBOL (cuCtxPushCurrent, CuCtxPushCurrent); + LOAD_SYMBOL (cuCtxEnablePeerAccess, CuCtxEnablePeerAccess); + LOAD_SYMBOL (cuCtxDisablePeerAccess, CuCtxDisablePeerAccess); LOAD_SYMBOL (cuGraphicsMapResources, CuGraphicsMapResources); LOAD_SYMBOL (cuGraphicsUnmapResources, CuGraphicsUnmapResources); @@ -155,6 +162,7 @@ gst_cuda_load_library (void) LOAD_SYMBOL (cuDeviceGetCount, CuDeviceGetCount); LOAD_SYMBOL (cuDeviceGetName, CuDeviceGetName); LOAD_SYMBOL (cuDeviceGetAttribute, CuDeviceGetAttribute); + LOAD_SYMBOL (cuDeviceCanAccessPeer, CuDeviceCanAccessPeer); /* cudaGL.h */ LOAD_SYMBOL (cuGraphicsGLRegisterImage, CuGraphicsGLRegisterImage); @@ -228,6 +236,22 @@ CuCtxPushCurrent (CUcontext ctx) return gst_cuda_vtable.CuCtxPushCurrent (ctx); } +CUresult CUDAAPI +CuCtxEnablePeerAccess (CUcontext peerContext, unsigned int Flags) +{ + g_assert (gst_cuda_vtable.CuCtxEnablePeerAccess != NULL); + + return gst_cuda_vtable.CuCtxEnablePeerAccess (peerContext, Flags); +} + +CUresult CUDAAPI +CuCtxDisablePeerAccess (CUcontext peerContext) +{ + g_assert (gst_cuda_vtable.CuCtxDisablePeerAccess != NULL); + + return gst_cuda_vtable.CuCtxDisablePeerAccess (peerContext); +} + CUresult CUDAAPI CuGraphicsMapResources (unsigned int count, CUgraphicsResource * resources, CUstream hStream) @@ -388,6 +412,14 @@ CuDeviceGetAttribute (int *pi, CUdevice_attribute attrib, CUdevice dev) return gst_cuda_vtable.CuDeviceGetAttribute (pi, attrib, dev); } +CUresult CUDAAPI +CuDeviceCanAccessPeer (int *canAccessPeer, CUdevice dev, CUdevice peerDev) +{ + g_assert (gst_cuda_vtable.CuDeviceCanAccessPeer != NULL); + + return gst_cuda_vtable.CuDeviceCanAccessPeer (canAccessPeer, dev, peerDev); +} + /* cudaGL.h */ CUresult CUDAAPI CuGraphicsGLRegisterImage (CUgraphicsResource * pCudaResource, diff --git a/sys/nvcodec/gstcudaloader.h b/sys/nvcodec/gstcudaloader.h index ba0e37279e..64e8831e72 100644 --- a/sys/nvcodec/gstcudaloader.h +++ b/sys/nvcodec/gstcudaloader.h @@ -55,6 +55,13 @@ CUresult CUDAAPI CuCtxPopCurrent (CUcontext * pctx); G_GNUC_INTERNAL CUresult CUDAAPI CuCtxPushCurrent (CUcontext ctx); +G_GNUC_INTERNAL +CUresult CUDAAPI CuCtxEnablePeerAccess (CUcontext peerContext, + unsigned int Flags); + +G_GNUC_INTERNAL +CUresult CUDAAPI CuCtxDisablePeerAccess (CUcontext peerContext); + G_GNUC_INTERNAL CUresult CUDAAPI CuGraphicsMapResources (unsigned int count, CUgraphicsResource * resources, @@ -133,6 +140,11 @@ CUresult CUDAAPI CuDeviceGetAttribute (int *pi, CUdevice_attribute attrib, CUdevice dev); +G_GNUC_INTERNAL +CUresult CUDAAPI CuDeviceCanAccessPeer (int *canAccessPeer, + CUdevice dev, + CUdevice peerDev); + /* cudaGL.h */ G_GNUC_INTERNAL CUresult CUDAAPI CuGraphicsGLRegisterImage (CUgraphicsResource * pCudaResource,