mirror of
https://gitlab.freedesktop.org/gstreamer/gstreamer.git
synced 2025-01-11 18:05:37 +00:00
nvdec: Don't use default CUDA stream
Async CUDA operation with default stream (NULL CUstream) is not much beneficial than blocking operation since all CUDA operations which belong to the CUDA context will be synchronized with the default stream's operation. Note that CUDA stream will share all resources of the corresponding CUDA context but which can help parallel operation similar to the relation between thread and process
This commit is contained in:
parent
20d8f54e63
commit
5615e9258f
5 changed files with 60 additions and 7 deletions
|
@ -68,6 +68,8 @@ typedef struct _GstNvCodecCudaVTable
|
|||
CUresult (*CuMemcpy2D) (const CUDA_MEMCPY2D * pCopy);
|
||||
CUresult (*CuMemcpy2DAsync) (const CUDA_MEMCPY2D * pCopy, CUstream hStream);
|
||||
CUresult (*CuMemFree) (CUdeviceptr dptr);
|
||||
CUresult (*CuStreamCreate) (CUstream * phStream, unsigned int Flags);
|
||||
CUresult (*CuStreamDestroy) (CUstream hStream);
|
||||
CUresult (*CuStreamSynchronize) (CUstream hStream);
|
||||
|
||||
CUresult (*CuDeviceGet) (CUdevice * device, int ordinal);
|
||||
|
@ -125,6 +127,8 @@ gst_cuda_load_library (void)
|
|||
LOAD_SYMBOL (cuMemcpy2DAsync, CuMemcpy2DAsync);
|
||||
LOAD_SYMBOL (cuMemFree, CuMemFree);
|
||||
|
||||
LOAD_SYMBOL (cuStreamCreate, CuStreamCreate);
|
||||
LOAD_SYMBOL (cuStreamDestroy, CuStreamDestroy);
|
||||
LOAD_SYMBOL (cuStreamSynchronize, CuStreamSynchronize);
|
||||
|
||||
LOAD_SYMBOL (cuDeviceGet, CuDeviceGet);
|
||||
|
@ -290,6 +294,22 @@ CuMemFree (CUdeviceptr dptr)
|
|||
return gst_cuda_vtable.CuMemFree (dptr);
|
||||
}
|
||||
|
||||
CUresult
|
||||
CuStreamCreate (CUstream * phStream, unsigned int Flags)
|
||||
{
|
||||
g_assert (gst_cuda_vtable.CuStreamCreate != NULL);
|
||||
|
||||
return gst_cuda_vtable.CuStreamCreate (phStream, Flags);
|
||||
}
|
||||
|
||||
CUresult
|
||||
CuStreamDestroy (CUstream hStream)
|
||||
{
|
||||
g_assert (gst_cuda_vtable.CuStreamDestroy != NULL);
|
||||
|
||||
return gst_cuda_vtable.CuStreamDestroy (hStream);
|
||||
}
|
||||
|
||||
CUresult
|
||||
CuStreamSynchronize (CUstream hStream)
|
||||
{
|
||||
|
|
|
@ -98,6 +98,13 @@ CUresult CuMemcpy2DAsync (const CUDA_MEMCPY2D *pCopy, CUstream hStream);
|
|||
G_GNUC_INTERNAL
|
||||
CUresult CuMemFree (CUdeviceptr dptr);
|
||||
|
||||
G_GNUC_INTERNAL
|
||||
CUresult CuStreamCreate (CUstream *phStream,
|
||||
unsigned int Flags);
|
||||
|
||||
G_GNUC_INTERNAL
|
||||
CUresult CuStreamDestroy (CUstream hStream);
|
||||
|
||||
G_GNUC_INTERNAL
|
||||
CUresult CuStreamSynchronize (CUstream hStream);
|
||||
|
||||
|
|
|
@ -616,6 +616,7 @@ gst_nvdec_open (GstVideoDecoder * decoder)
|
|||
{
|
||||
GstNvDec *nvdec = GST_NVDEC (decoder);
|
||||
GstNvDecClass *klass = GST_NVDEC_GET_CLASS (nvdec);
|
||||
CUresult cuda_ret;
|
||||
|
||||
GST_DEBUG_OBJECT (nvdec, "creating CUDA context");
|
||||
|
||||
|
@ -624,6 +625,16 @@ gst_nvdec_open (GstVideoDecoder * decoder)
|
|||
GST_ERROR_OBJECT (nvdec, "failed to create CUDA context");
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
if (gst_cuda_context_push (nvdec->cuda_ctx)) {
|
||||
cuda_ret = CuStreamCreate (&nvdec->cuda_stream, CU_STREAM_NON_BLOCKING);
|
||||
if (!gst_cuda_result (cuda_ret)) {
|
||||
GST_WARNING_OBJECT (nvdec,
|
||||
"Could not create cuda stream, will use default stream");
|
||||
nvdec->cuda_stream = NULL;
|
||||
}
|
||||
gst_cuda_context_pop (NULL);
|
||||
}
|
||||
#if HAVE_NVCODEC_GST_GL
|
||||
gst_gl_ensure_element_data (GST_ELEMENT (nvdec),
|
||||
&nvdec->gl_display, &nvdec->other_gl_context);
|
||||
|
@ -723,7 +734,15 @@ gst_nvdec_close (GstVideoDecoder * decoder)
|
|||
{
|
||||
GstNvDec *nvdec = GST_NVDEC (decoder);
|
||||
|
||||
if (nvdec->cuda_ctx && nvdec->cuda_stream) {
|
||||
if (gst_cuda_context_push (nvdec->cuda_ctx)) {
|
||||
gst_cuda_result (CuStreamDestroy (nvdec->cuda_stream));
|
||||
gst_cuda_context_pop (NULL);
|
||||
}
|
||||
}
|
||||
|
||||
gst_clear_object (&nvdec->cuda_ctx);
|
||||
nvdec->cuda_stream = NULL;
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
@ -819,7 +838,7 @@ copy_video_frame_to_gl_textures (GstGLContext * context,
|
|||
}
|
||||
|
||||
if (!gst_cuda_result (CuGraphicsMapResources (num_resources, resources,
|
||||
NULL))) {
|
||||
nvdec->cuda_stream))) {
|
||||
GST_WARNING_OBJECT (nvdec, "failed to map CUDA resources");
|
||||
data->ret = FALSE;
|
||||
goto unmap_video_frame;
|
||||
|
@ -844,18 +863,18 @@ copy_video_frame_to_gl_textures (GstGLContext * context,
|
|||
mcpy2d.dstArray = array;
|
||||
mcpy2d.Height = GST_VIDEO_INFO_COMP_HEIGHT (info, i);
|
||||
|
||||
if (!gst_cuda_result (CuMemcpy2DAsync (&mcpy2d, 0))) {
|
||||
if (!gst_cuda_result (CuMemcpy2DAsync (&mcpy2d, nvdec->cuda_stream))) {
|
||||
GST_WARNING_OBJECT (nvdec, "memcpy to mapped array failed");
|
||||
data->ret = FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
gst_cuda_result (CuStreamSynchronize (0));
|
||||
|
||||
if (!gst_cuda_result (CuGraphicsUnmapResources (num_resources, resources,
|
||||
NULL)))
|
||||
nvdec->cuda_stream)))
|
||||
GST_WARNING_OBJECT (nvdec, "failed to unmap CUDA resources");
|
||||
|
||||
gst_cuda_result (CuStreamSynchronize (nvdec->cuda_stream));
|
||||
|
||||
unmap_video_frame:
|
||||
if (!gst_cuda_result (CuvidUnmapVideoFrame (nvdec->decoder, dptr)))
|
||||
GST_WARNING_OBJECT (nvdec, "failed to unmap CUDA video frame");
|
||||
|
@ -943,7 +962,7 @@ gst_nvdec_copy_device_to_system (GstNvDec * nvdec,
|
|||
copy_params.dstPitch = GST_VIDEO_FRAME_PLANE_STRIDE (&video_frame, i);
|
||||
copy_params.Height = GST_VIDEO_FRAME_COMP_HEIGHT (&video_frame, i);
|
||||
|
||||
if (!gst_cuda_result (CuMemcpy2DAsync (©_params, 0))) {
|
||||
if (!gst_cuda_result (CuMemcpy2DAsync (©_params, nvdec->cuda_stream))) {
|
||||
GST_ERROR_OBJECT (nvdec, "failed to copy %dth plane", i);
|
||||
CuvidUnmapVideoFrame (nvdec->decoder, dptr);
|
||||
gst_video_frame_unmap (&video_frame);
|
||||
|
@ -952,7 +971,7 @@ gst_nvdec_copy_device_to_system (GstNvDec * nvdec,
|
|||
}
|
||||
}
|
||||
|
||||
gst_cuda_result (CuStreamSynchronize (0));
|
||||
gst_cuda_result (CuStreamSynchronize (nvdec->cuda_stream));
|
||||
|
||||
gst_video_frame_unmap (&video_frame);
|
||||
|
||||
|
|
|
@ -77,6 +77,7 @@ struct _GstNvDec
|
|||
CUvideoparser parser;
|
||||
CUvideodecoder decoder;
|
||||
GstCudaContext *cuda_ctx;
|
||||
CUstream cuda_stream;
|
||||
|
||||
guint width;
|
||||
guint height;
|
||||
|
|
|
@ -56,6 +56,12 @@ typedef enum
|
|||
CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD = 2
|
||||
} CUgraphicsRegisterFlags;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
CU_STREAM_DEFAULT = 0x0,
|
||||
CU_STREAM_NON_BLOCKING = 0x1
|
||||
} CUstream_flags;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
gsize srcXInBytes;
|
||||
|
|
Loading…
Reference in a new issue