nvdec: Don't use default CUDA stream

NVDEC launches CUDA kernel function (ConvertNV12BLtoNV12 or so)
when CuvidMapVideoFrame() is called. Which seems to be
NVDEC's internal post-processing kernel function, maybe
to convert tiled YUV to linear YUV format or something similar.

A problem if we don't pass CUDA stream to the CuvidMapVideoFrame()
call is that the NVDEC's internel kernel function will use default CUDA stream.
Then lots of the other CUDA API calls will be blocked/serialized.

To avoid the unnecessary blocking, we should pass our own
CUDA stream object to the CuvidMapVideoFrame() call

Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/3605>
This commit is contained in:
Seungha Yang 2022-12-19 18:41:46 +09:00 committed by GStreamer Marge Bot
parent 74a503c197
commit 9914ff9b4c
2 changed files with 3 additions and 0 deletions

View file

@ -1167,6 +1167,7 @@ copy_video_frame_to_gl_textures (GstGLContext * context,
proc_params.progressive_frame = dispinfo->progressive_frame; proc_params.progressive_frame = dispinfo->progressive_frame;
proc_params.top_field_first = dispinfo->top_field_first; proc_params.top_field_first = dispinfo->top_field_first;
proc_params.unpaired_field = dispinfo->repeat_first_field == -1; proc_params.unpaired_field = dispinfo->repeat_first_field == -1;
proc_params.output_stream = nvdec->cuda_stream;
data->ret = TRUE; data->ret = TRUE;
@ -1309,6 +1310,7 @@ gst_nvdec_copy_device_to_memory (GstNvDec * nvdec,
params.second_field = dispinfo->repeat_first_field + 1; params.second_field = dispinfo->repeat_first_field + 1;
params.top_field_first = dispinfo->top_field_first; params.top_field_first = dispinfo->top_field_first;
params.unpaired_field = dispinfo->repeat_first_field < 0; params.unpaired_field = dispinfo->repeat_first_field < 0;
params.output_stream = nvdec->cuda_stream;
if (!gst_cuda_result (CuvidMapVideoFrame (nvdec->decoder, if (!gst_cuda_result (CuvidMapVideoFrame (nvdec->decoder,
dispinfo->picture_index, &dptr, &pitch, &params))) { dispinfo->picture_index, &dptr, &pitch, &params))) {

View file

@ -397,6 +397,7 @@ gst_nv_decoder_frame_map (GstNvDecoderFrame * frame)
/* TODO: check interlaced */ /* TODO: check interlaced */
params.progressive_frame = 1; params.progressive_frame = 1;
params.output_stream = self->cuda_stream;
if (frame->mapped) { if (frame->mapped) {
GST_WARNING_OBJECT (self, "Frame %p is mapped already", frame); GST_WARNING_OBJECT (self, "Frame %p is mapped already", frame);