nvdec: Don't use default CUDA stream

NVDEC launches CUDA kernel function (ConvertNV12BLtoNV12 or so) when CuvidMapVideoFrame() is called. Which seems to be NVDEC's internal post-processing kernel function, maybe to convert tiled YUV to linear YUV format or something similar. A problem if we don't pass CUDA stream to the CuvidMapVideoFrame() call is that the NVDEC's internel kernel function will use default CUDA stream. Then lots of the other CUDA API calls will be blocked/serialized. To avoid the unnecessary blocking, we should pass our own CUDA stream object to the CuvidMapVideoFrame() call Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/3605>
2025-02-04 05:22:30 +00:00 · 2022-12-19 18:41:46 +09:00 · 2022-12-19 18:41:46 +09:00 · 9914ff9b4c
commit 9914ff9b4c
parent 74a503c197
2 changed files with 3 additions and 0 deletions
--- a/subprojects/gst-plugins-bad/sys/nvcodec/gstnvdec.c
+++ b/subprojects/gst-plugins-bad/sys/nvcodec/gstnvdec.c
@ -1167,6 +1167,7 @@ copy_video_frame_to_gl_textures (GstGLContext * context,
  proc_params.progressive_frame = dispinfo->progressive_frame;
  proc_params.top_field_first = dispinfo->top_field_first;
  proc_params.unpaired_field = dispinfo->repeat_first_field == -1;
+  proc_params.output_stream = nvdec->cuda_stream;

  data->ret = TRUE;

@ -1309,6 +1310,7 @@ gst_nvdec_copy_device_to_memory (GstNvDec * nvdec,
  params.second_field = dispinfo->repeat_first_field + 1;
  params.top_field_first = dispinfo->top_field_first;
  params.unpaired_field = dispinfo->repeat_first_field < 0;
+  params.output_stream = nvdec->cuda_stream;

  if (!gst_cuda_result (CuvidMapVideoFrame (nvdec->decoder,
              dispinfo->picture_index, &dptr, &pitch, &params))) {
--- a/subprojects/gst-plugins-bad/sys/nvcodec/gstnvdecoder.c
+++ b/subprojects/gst-plugins-bad/sys/nvcodec/gstnvdecoder.c
@ -397,6 +397,7 @@ gst_nv_decoder_frame_map (GstNvDecoderFrame * frame)

  /* TODO: check interlaced */
  params.progressive_frame = 1;
+  params.output_stream = self->cuda_stream;

  if (frame->mapped) {
    GST_WARNING_OBJECT (self, "Frame %p is mapped already", frame);