nvcodec: Port to GstCudaStream

Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/3629>
This commit is contained in:
Seungha Yang 2022-12-21 23:59:19 +09:00 committed by GStreamer Marge Bot
parent a7c54ebc06
commit 0a81c8deb3
10 changed files with 68 additions and 121 deletions

View file

@ -170,7 +170,6 @@ static gboolean
gst_cuda_base_transform_start (GstBaseTransform * trans) gst_cuda_base_transform_start (GstBaseTransform * trans)
{ {
GstCudaBaseTransform *filter = GST_CUDA_BASE_TRANSFORM (trans); GstCudaBaseTransform *filter = GST_CUDA_BASE_TRANSFORM (trans);
CUresult cuda_ret;
if (!gst_cuda_ensure_element_context (GST_ELEMENT_CAST (filter), if (!gst_cuda_ensure_element_context (GST_ELEMENT_CAST (filter),
filter->device_id, &filter->context)) { filter->device_id, &filter->context)) {
@ -178,14 +177,10 @@ gst_cuda_base_transform_start (GstBaseTransform * trans)
return FALSE; return FALSE;
} }
if (gst_cuda_context_push (filter->context)) { filter->stream = gst_cuda_stream_new (filter->context);
cuda_ret = CuStreamCreate (&filter->cuda_stream, CU_STREAM_DEFAULT); if (!filter->stream) {
if (!gst_cuda_result (cuda_ret)) { GST_WARNING_OBJECT (filter,
GST_WARNING_OBJECT (filter, "Could not create cuda stream, will use default stream");
"Could not create cuda stream, will use default stream");
filter->cuda_stream = NULL;
}
gst_cuda_context_pop (NULL);
} }
return TRUE; return TRUE;
@ -196,15 +191,8 @@ gst_cuda_base_transform_stop (GstBaseTransform * trans)
{ {
GstCudaBaseTransform *filter = GST_CUDA_BASE_TRANSFORM (trans); GstCudaBaseTransform *filter = GST_CUDA_BASE_TRANSFORM (trans);
if (filter->context && filter->cuda_stream) { gst_clear_cuda_stream (&filter->stream);
if (gst_cuda_context_push (filter->context)) {
gst_cuda_result (CuStreamDestroy (filter->cuda_stream));
gst_cuda_context_pop (NULL);
}
}
gst_clear_object (&filter->context); gst_clear_object (&filter->context);
filter->cuda_stream = NULL;
return TRUE; return TRUE;
} }
@ -338,19 +326,11 @@ gst_cuda_base_transform_before_transform (GstBaseTransform * trans,
GST_INFO_OBJECT (self, "Updating device %" GST_PTR_FORMAT " -> %" GST_INFO_OBJECT (self, "Updating device %" GST_PTR_FORMAT " -> %"
GST_PTR_FORMAT, self->context, cmem->context); GST_PTR_FORMAT, self->context, cmem->context);
if (self->cuda_stream) { gst_clear_cuda_stream (&self->stream);
gst_cuda_context_push (self->context);
CuStreamDestroy (self->cuda_stream);
gst_cuda_context_pop (NULL);
self->cuda_stream = NULL;
}
gst_object_unref (self->context); gst_object_unref (self->context);
self->context = gst_object_ref (cmem->context); self->context = gst_object_ref (cmem->context);
gst_cuda_context_push (self->context); self->stream = gst_cuda_stream_new (self->context);
CuStreamCreate (&self->cuda_stream, CU_STREAM_DEFAULT);
gst_cuda_context_pop (NULL);
/* subclass will update internal object. /* subclass will update internal object.
* Note that gst_base_transform_reconfigure() might not trigger this * Note that gst_base_transform_reconfigure() might not trigger this

View file

@ -42,7 +42,7 @@ struct _GstCudaBaseTransform
GstBaseTransform parent; GstBaseTransform parent;
GstCudaContext *context; GstCudaContext *context;
CUstream cuda_stream; GstCudaStream *stream;
GstVideoInfo in_info; GstVideoInfo in_info;
GstVideoInfo out_info; GstVideoInfo out_info;

View file

@ -1380,7 +1380,7 @@ gst_cuda_base_convert_transform (GstBaseTransform * trans,
} }
if (!gst_cuda_converter_convert_frame (self->converter, &in_frame, &out_frame, if (!gst_cuda_converter_convert_frame (self->converter, &in_frame, &out_frame,
btrans->cuda_stream)) { gst_cuda_stream_get_handle (btrans->stream))) {
GST_ERROR_OBJECT (self, "Failed to convert frame"); GST_ERROR_OBJECT (self, "Failed to convert frame");
ret = GST_FLOW_ERROR; ret = GST_FLOW_ERROR;
} }

View file

@ -951,7 +951,7 @@ gst_cuda_memory_copy_transform (GstBaseTransform * trans, GstBuffer * inbuf,
GST_TRACE_OBJECT (self, "Both in/out buffers are not CUDA"); GST_TRACE_OBJECT (self, "Both in/out buffers are not CUDA");
if (!gst_cuda_buffer_copy (outbuf, GST_CUDA_BUFFER_COPY_SYSTEM, out_info, if (!gst_cuda_buffer_copy (outbuf, GST_CUDA_BUFFER_COPY_SYSTEM, out_info,
inbuf, GST_CUDA_BUFFER_COPY_SYSTEM, in_info, ctrans->context, inbuf, GST_CUDA_BUFFER_COPY_SYSTEM, in_info, ctrans->context,
ctrans->cuda_stream)) { gst_cuda_stream_get_handle (ctrans->stream))) {
return GST_FLOW_ERROR; return GST_FLOW_ERROR;
} }
@ -959,7 +959,7 @@ gst_cuda_memory_copy_transform (GstBaseTransform * trans, GstBuffer * inbuf,
} }
ret = gst_cuda_buffer_copy (outbuf, out_type, out_info, inbuf, in_type, ret = gst_cuda_buffer_copy (outbuf, out_type, out_info, inbuf, in_type,
in_info, ctrans->context, ctrans->cuda_stream); in_info, ctrans->context, gst_cuda_stream_get_handle (ctrans->stream));
/* system memory <-> CUDA copy fallback if possible */ /* system memory <-> CUDA copy fallback if possible */
if (!ret) { if (!ret) {
@ -1002,7 +1002,8 @@ gst_cuda_memory_copy_transform (GstBaseTransform * trans, GstBuffer * inbuf,
gst_cuda_buffer_copy_type_to_string (fallback_out_type)); gst_cuda_buffer_copy_type_to_string (fallback_out_type));
ret = gst_cuda_buffer_copy (outbuf, fallback_out_type, out_info, inbuf, ret = gst_cuda_buffer_copy (outbuf, fallback_out_type, out_info, inbuf,
fallback_in_type, in_info, ctrans->context, ctrans->cuda_stream); fallback_in_type, in_info, ctrans->context,
gst_cuda_stream_get_handle (ctrans->stream));
} }
if (ret) if (ret)
@ -1017,7 +1018,7 @@ gst_cuda_memory_copy_transform (GstBaseTransform * trans, GstBuffer * inbuf,
/* final fallback using system memory */ /* final fallback using system memory */
ret = gst_cuda_buffer_copy (outbuf, GST_CUDA_BUFFER_COPY_SYSTEM, out_info, ret = gst_cuda_buffer_copy (outbuf, GST_CUDA_BUFFER_COPY_SYSTEM, out_info,
inbuf, GST_CUDA_BUFFER_COPY_SYSTEM, in_info, ctrans->context, inbuf, GST_CUDA_BUFFER_COPY_SYSTEM, in_info, ctrans->context,
ctrans->cuda_stream); gst_cuda_stream_get_handle (ctrans->stream));
if (ret) if (ret)
return GST_FLOW_OK; return GST_FLOW_OK;

View file

@ -469,7 +469,6 @@ gst_nv_base_enc_open (GstVideoEncoder * enc)
GstNvBaseEnc *nvenc = GST_NV_BASE_ENC (enc); GstNvBaseEnc *nvenc = GST_NV_BASE_ENC (enc);
GstNvBaseEncClass *klass = GST_NV_BASE_ENC_GET_CLASS (enc); GstNvBaseEncClass *klass = GST_NV_BASE_ENC_GET_CLASS (enc);
GValue *formats = NULL; GValue *formats = NULL;
CUresult cuda_ret;
if (!gst_cuda_ensure_element_context (GST_ELEMENT_CAST (enc), if (!gst_cuda_ensure_element_context (GST_ELEMENT_CAST (enc),
klass->cuda_device_id, &nvenc->cuda_ctx)) { klass->cuda_device_id, &nvenc->cuda_ctx)) {
@ -477,14 +476,10 @@ gst_nv_base_enc_open (GstVideoEncoder * enc)
return FALSE; return FALSE;
} }
if (gst_cuda_context_push (nvenc->cuda_ctx)) { nvenc->stream = gst_cuda_stream_new (nvenc->cuda_ctx);
cuda_ret = CuStreamCreate (&nvenc->cuda_stream, CU_STREAM_DEFAULT); if (!nvenc->stream) {
if (!gst_cuda_result (cuda_ret)) { GST_WARNING_OBJECT (nvenc,
GST_WARNING_OBJECT (nvenc, "Could not create cuda stream, will use default stream");
"Could not create cuda stream, will use default stream");
nvenc->cuda_stream = NULL;
}
gst_cuda_context_pop (NULL);
} }
if (!gst_nv_base_enc_open_encode_session (nvenc)) { if (!gst_nv_base_enc_open_encode_session (nvenc)) {
@ -998,15 +993,8 @@ gst_nv_base_enc_close (GstVideoEncoder * enc)
nvenc->encoder = NULL; nvenc->encoder = NULL;
} }
if (nvenc->cuda_ctx && nvenc->cuda_stream) { gst_clear_cuda_stream (&nvenc->stream);
if (gst_cuda_context_push (nvenc->cuda_ctx)) {
gst_cuda_result (CuStreamDestroy (nvenc->cuda_stream));
gst_cuda_context_pop (NULL);
}
}
gst_clear_object (&nvenc->cuda_ctx); gst_clear_object (&nvenc->cuda_ctx);
nvenc->cuda_stream = NULL;
GST_OBJECT_LOCK (nvenc); GST_OBJECT_LOCK (nvenc);
if (nvenc->input_formats) if (nvenc->input_formats)
@ -2131,6 +2119,7 @@ _map_gl_input_buffer (GstGLContext * context, GstNvEncGLMapData * data)
CUDA_MEMCPY2D param; CUDA_MEMCPY2D param;
GstCudaGraphicsResource **resources; GstCudaGraphicsResource **resources;
guint num_resources; guint num_resources;
CUstream stream = gst_cuda_stream_get_handle (nvenc->stream);
data->ret = FALSE; data->ret = FALSE;
@ -2172,7 +2161,7 @@ _map_gl_input_buffer (GstGLContext * context, GstNvEncGLMapData * data)
gl_mem->mem.tex_id); gl_mem->mem.tex_id);
cuda_resource = cuda_resource =
gst_cuda_graphics_resource_map (resources[i], nvenc->cuda_stream, gst_cuda_graphics_resource_map (resources[i], stream,
CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY); CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY);
if (!cuda_resource) { if (!cuda_resource) {
@ -2210,18 +2199,18 @@ _map_gl_input_buffer (GstGLContext * context, GstNvEncGLMapData * data)
param.WidthInBytes = _get_plane_width (data->info, i); param.WidthInBytes = _get_plane_width (data->info, i);
param.Height = _get_plane_height (data->info, i); param.Height = _get_plane_height (data->info, i);
cuda_ret = CuMemcpy2DAsync (&param, nvenc->cuda_stream); cuda_ret = CuMemcpy2DAsync (&param, stream);
if (!gst_cuda_result (cuda_ret)) { if (!gst_cuda_result (cuda_ret)) {
GST_ERROR_OBJECT (data->nvenc, "failed to copy GL texture %u into cuda " GST_ERROR_OBJECT (data->nvenc, "failed to copy GL texture %u into cuda "
"ret :%d", gl_mem->mem.tex_id, cuda_ret); "ret :%d", gl_mem->mem.tex_id, cuda_ret);
g_assert_not_reached (); g_assert_not_reached ();
} }
gst_cuda_graphics_resource_unmap (resources[i], nvenc->cuda_stream); gst_cuda_graphics_resource_unmap (resources[i], stream);
data_pointer += dest_stride * _get_plane_height (&nvenc->input_info, i); data_pointer += dest_stride * _get_plane_height (&nvenc->input_info, i);
} }
gst_cuda_result (CuStreamSynchronize (nvenc->cuda_stream)); gst_cuda_result (CuStreamSynchronize (stream));
gst_cuda_context_pop (NULL); gst_cuda_context_pop (NULL);
data->ret = TRUE; data->ret = TRUE;
@ -2236,6 +2225,7 @@ gst_nv_base_enc_upload_frame (GstNvBaseEnc * nvenc, GstVideoFrame * frame,
CUdeviceptr dst = resource->cuda_pointer; CUdeviceptr dst = resource->cuda_pointer;
GstVideoInfo *info = &frame->info; GstVideoInfo *info = &frame->info;
CUresult cuda_ret; CUresult cuda_ret;
CUstream stream = gst_cuda_stream_get_handle (nvenc->stream);
if (!gst_cuda_context_push (nvenc->cuda_ctx)) { if (!gst_cuda_context_push (nvenc->cuda_ctx)) {
GST_ERROR_OBJECT (nvenc, "cannot push context"); GST_ERROR_OBJECT (nvenc, "cannot push context");
@ -2262,7 +2252,7 @@ gst_nv_base_enc_upload_frame (GstNvBaseEnc * nvenc, GstVideoFrame * frame,
param.WidthInBytes = _get_plane_width (info, i); param.WidthInBytes = _get_plane_width (info, i);
param.Height = _get_plane_height (info, i); param.Height = _get_plane_height (info, i);
cuda_ret = CuMemcpy2DAsync (&param, nvenc->cuda_stream); cuda_ret = CuMemcpy2DAsync (&param, stream);
if (!gst_cuda_result (cuda_ret)) { if (!gst_cuda_result (cuda_ret)) {
GST_ERROR_OBJECT (nvenc, "cannot copy %dth plane, ret %d", i, cuda_ret); GST_ERROR_OBJECT (nvenc, "cannot copy %dth plane, ret %d", i, cuda_ret);
gst_cuda_context_pop (NULL); gst_cuda_context_pop (NULL);
@ -2273,7 +2263,7 @@ gst_nv_base_enc_upload_frame (GstNvBaseEnc * nvenc, GstVideoFrame * frame,
dst += dest_stride * _get_plane_height (&nvenc->input_info, i); dst += dest_stride * _get_plane_height (&nvenc->input_info, i);
} }
gst_cuda_result (CuStreamSynchronize (nvenc->cuda_stream)); gst_cuda_result (CuStreamSynchronize (stream));
gst_cuda_context_pop (NULL); gst_cuda_context_pop (NULL);
return TRUE; return TRUE;

View file

@ -110,7 +110,7 @@ typedef struct {
gboolean i_adapt; gboolean i_adapt;
GstCudaContext * cuda_ctx; GstCudaContext * cuda_ctx;
CUstream cuda_stream; GstCudaStream * stream;
void * encoder; void * encoder;
NV_ENC_INITIALIZE_PARAMS init_params; NV_ENC_INITIALIZE_PARAMS init_params;
NV_ENC_CONFIG config; NV_ENC_CONFIG config;

View file

@ -896,7 +896,6 @@ gst_nvdec_open (GstVideoDecoder * decoder)
{ {
GstNvDec *nvdec = GST_NVDEC (decoder); GstNvDec *nvdec = GST_NVDEC (decoder);
GstNvDecClass *klass = GST_NVDEC_GET_CLASS (nvdec); GstNvDecClass *klass = GST_NVDEC_GET_CLASS (nvdec);
CUresult cuda_ret;
GST_DEBUG_OBJECT (nvdec, "creating CUDA context"); GST_DEBUG_OBJECT (nvdec, "creating CUDA context");
@ -906,14 +905,10 @@ gst_nvdec_open (GstVideoDecoder * decoder)
return FALSE; return FALSE;
} }
if (gst_cuda_context_push (nvdec->cuda_ctx)) { nvdec->stream = gst_cuda_stream_new (nvdec->cuda_ctx);
cuda_ret = CuStreamCreate (&nvdec->cuda_stream, CU_STREAM_DEFAULT); if (!nvdec->stream) {
if (!gst_cuda_result (cuda_ret)) { GST_WARNING_OBJECT (nvdec,
GST_WARNING_OBJECT (nvdec, "Could not create CUDA stream, will use default stream");
"Could not create CUDA stream, will use default stream");
nvdec->cuda_stream = NULL;
}
gst_cuda_context_pop (NULL);
} }
#if HAVE_NVCODEC_GST_GL #if HAVE_NVCODEC_GST_GL
gst_gl_ensure_element_data (GST_ELEMENT (nvdec), gst_gl_ensure_element_data (GST_ELEMENT (nvdec),
@ -1039,15 +1034,8 @@ gst_nvdec_close (GstVideoDecoder * decoder)
{ {
GstNvDec *nvdec = GST_NVDEC (decoder); GstNvDec *nvdec = GST_NVDEC (decoder);
if (nvdec->cuda_ctx && nvdec->cuda_stream) { gst_clear_cuda_stream (&nvdec->stream);
if (gst_cuda_context_push (nvdec->cuda_ctx)) {
gst_cuda_result (CuStreamDestroy (nvdec->cuda_stream));
gst_cuda_context_pop (NULL);
}
}
gst_clear_object (&nvdec->cuda_ctx); gst_clear_object (&nvdec->cuda_ctx);
nvdec->cuda_stream = NULL;
return TRUE; return TRUE;
} }
@ -1158,13 +1146,14 @@ copy_video_frame_to_gl_textures (GstGLContext * context,
guint pitch, i; guint pitch, i;
CUDA_MEMCPY2D mcpy2d = { 0, }; CUDA_MEMCPY2D mcpy2d = { 0, };
GstVideoInfo *info = &nvdec->output_state->info; GstVideoInfo *info = &nvdec->output_state->info;
CUstream stream = gst_cuda_stream_get_handle (nvdec->stream);
GST_LOG_OBJECT (nvdec, "picture index: %u", dispinfo->picture_index); GST_LOG_OBJECT (nvdec, "picture index: %u", dispinfo->picture_index);
proc_params.progressive_frame = dispinfo->progressive_frame; proc_params.progressive_frame = dispinfo->progressive_frame;
proc_params.top_field_first = dispinfo->top_field_first; proc_params.top_field_first = dispinfo->top_field_first;
proc_params.unpaired_field = dispinfo->repeat_first_field == -1; proc_params.unpaired_field = dispinfo->repeat_first_field == -1;
proc_params.output_stream = nvdec->cuda_stream; proc_params.output_stream = stream;
data->ret = TRUE; data->ret = TRUE;
@ -1208,7 +1197,7 @@ copy_video_frame_to_gl_textures (GstGLContext * context,
CUdeviceptr cuda_ptr; CUdeviceptr cuda_ptr;
gsize size; gsize size;
CUgraphicsResource cuda_resource = CUgraphicsResource cuda_resource =
gst_cuda_graphics_resource_map (resources[i], nvdec->cuda_stream, gst_cuda_graphics_resource_map (resources[i], stream,
CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD); CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD);
if (!cuda_resource) { if (!cuda_resource) {
@ -1232,17 +1221,17 @@ copy_video_frame_to_gl_textures (GstGLContext * context,
mcpy2d.dstDevice = cuda_ptr; mcpy2d.dstDevice = cuda_ptr;
mcpy2d.Height = GST_VIDEO_INFO_COMP_HEIGHT (info, i); mcpy2d.Height = GST_VIDEO_INFO_COMP_HEIGHT (info, i);
if (!gst_cuda_result (CuMemcpy2DAsync (&mcpy2d, nvdec->cuda_stream))) { if (!gst_cuda_result (CuMemcpy2DAsync (&mcpy2d, stream))) {
GST_WARNING_OBJECT (nvdec, "memcpy to mapped array failed"); GST_WARNING_OBJECT (nvdec, "memcpy to mapped array failed");
data->ret = FALSE; data->ret = FALSE;
} }
} }
gst_cuda_result (CuStreamSynchronize (nvdec->cuda_stream)); gst_cuda_result (CuStreamSynchronize (stream));
unmap_video_frame: unmap_video_frame:
for (i = 0; i < num_resources; i++) { for (i = 0; i < num_resources; i++) {
gst_cuda_graphics_resource_unmap (resources[i], nvdec->cuda_stream); gst_cuda_graphics_resource_unmap (resources[i], stream);
} }
if (!gst_cuda_result (CuvidUnmapVideoFrame (nvdec->decoder, dptr))) if (!gst_cuda_result (CuvidUnmapVideoFrame (nvdec->decoder, dptr)))
@ -1284,6 +1273,7 @@ gst_nvdec_copy_device_to_memory (GstNvDec * nvdec,
GstMemory *mem; GstMemory *mem;
gboolean use_device_copy = FALSE; gboolean use_device_copy = FALSE;
GstMapFlags map_flags = GST_MAP_WRITE; GstMapFlags map_flags = GST_MAP_WRITE;
CUstream stream = gst_cuda_stream_get_handle (nvdec->stream);
if (nvdec->mem_type == GST_NVDEC_MEM_TYPE_CUDA && if (nvdec->mem_type == GST_NVDEC_MEM_TYPE_CUDA &&
(mem = gst_buffer_peek_memory (output_buffer, 0)) && (mem = gst_buffer_peek_memory (output_buffer, 0)) &&
@ -1307,7 +1297,7 @@ gst_nvdec_copy_device_to_memory (GstNvDec * nvdec,
params.second_field = dispinfo->repeat_first_field + 1; params.second_field = dispinfo->repeat_first_field + 1;
params.top_field_first = dispinfo->top_field_first; params.top_field_first = dispinfo->top_field_first;
params.unpaired_field = dispinfo->repeat_first_field < 0; params.unpaired_field = dispinfo->repeat_first_field < 0;
params.output_stream = nvdec->cuda_stream; params.output_stream = stream;
if (!gst_cuda_result (CuvidMapVideoFrame (nvdec->decoder, if (!gst_cuda_result (CuvidMapVideoFrame (nvdec->decoder,
dispinfo->picture_index, &dptr, &pitch, &params))) { dispinfo->picture_index, &dptr, &pitch, &params))) {
@ -1334,7 +1324,7 @@ gst_nvdec_copy_device_to_memory (GstNvDec * nvdec,
* GST_VIDEO_INFO_COMP_PSTRIDE (info, i); * GST_VIDEO_INFO_COMP_PSTRIDE (info, i);
copy_params.Height = GST_VIDEO_INFO_COMP_HEIGHT (info, i); copy_params.Height = GST_VIDEO_INFO_COMP_HEIGHT (info, i);
if (!gst_cuda_result (CuMemcpy2DAsync (&copy_params, nvdec->cuda_stream))) { if (!gst_cuda_result (CuMemcpy2DAsync (&copy_params, stream))) {
GST_ERROR_OBJECT (nvdec, "failed to copy %dth plane", i); GST_ERROR_OBJECT (nvdec, "failed to copy %dth plane", i);
CuvidUnmapVideoFrame (nvdec->decoder, dptr); CuvidUnmapVideoFrame (nvdec->decoder, dptr);
gst_video_frame_unmap (&video_frame); gst_video_frame_unmap (&video_frame);
@ -1343,7 +1333,7 @@ gst_nvdec_copy_device_to_memory (GstNvDec * nvdec,
} }
} }
gst_cuda_result (CuStreamSynchronize (nvdec->cuda_stream)); gst_cuda_result (CuStreamSynchronize (stream));
gst_video_frame_unmap (&video_frame); gst_video_frame_unmap (&video_frame);

View file

@ -83,7 +83,7 @@ struct _GstNvDec
CUvideoparser parser; CUvideoparser parser;
CUvideodecoder decoder; CUvideodecoder decoder;
GstCudaContext *cuda_ctx; GstCudaContext *cuda_ctx;
CUstream cuda_stream; GstCudaStream *stream;
GstVideoInfo out_info; GstVideoInfo out_info;
GstClockTime min_latency; GstClockTime min_latency;

View file

@ -52,6 +52,7 @@
#include <gst/cuda/gstcudamemory.h> #include <gst/cuda/gstcudamemory.h>
#include <gst/cuda/gstcudabufferpool.h> #include <gst/cuda/gstcudabufferpool.h>
#include <gst/cuda/gstcudastream.h>
#include "gstnvdecoder.h" #include "gstnvdecoder.h"
#include <string.h> #include <string.h>
@ -79,7 +80,7 @@ struct _GstNvDecoder
{ {
GstObject parent; GstObject parent;
GstCudaContext *context; GstCudaContext *context;
CUstream cuda_stream; GstCudaStream *stream;
CUvideodecoder decoder_handle; CUvideodecoder decoder_handle;
GstNvDecoderFrameInfo *frame_pool; GstNvDecoderFrameInfo *frame_pool;
@ -125,14 +126,7 @@ gst_nv_decoder_dispose (GObject * object)
gst_nv_decoder_reset (self); gst_nv_decoder_reset (self);
if (self->context && self->cuda_stream) { gst_clear_cuda_stream (&self->stream);
if (gst_cuda_context_push (self->context)) {
gst_cuda_result (CuStreamDestroy (self->cuda_stream));
gst_cuda_context_pop (NULL);
self->cuda_stream = NULL;
}
}
gst_clear_object (&self->context); gst_clear_object (&self->context);
gst_clear_object (&self->gl_display); gst_clear_object (&self->gl_display);
gst_clear_object (&self->gl_context); gst_clear_object (&self->gl_context);
@ -213,16 +207,10 @@ gst_nv_decoder_new (GstCudaContext * context)
self->context = gst_object_ref (context); self->context = gst_object_ref (context);
gst_object_ref_sink (self); gst_object_ref_sink (self);
if (gst_cuda_context_push (context)) { self->stream = gst_cuda_stream_new (self->context);
CUresult cuda_ret; if (!self->stream) {
cuda_ret = CuStreamCreate (&self->cuda_stream, CU_STREAM_DEFAULT); GST_WARNING_OBJECT (self,
if (!gst_cuda_result (cuda_ret)) { "Could not create CUDA stream, will use default stream");
GST_WARNING_OBJECT (self,
"Could not create CUDA stream, will use default stream");
self->cuda_stream = NULL;
}
gst_cuda_context_pop (NULL);
} }
return self; return self;
@ -397,7 +385,7 @@ gst_nv_decoder_frame_map (GstNvDecoderFrame * frame)
/* TODO: check interlaced */ /* TODO: check interlaced */
params.progressive_frame = 1; params.progressive_frame = 1;
params.output_stream = self->cuda_stream; params.output_stream = gst_cuda_stream_get_handle (self->stream);
if (frame->mapped) { if (frame->mapped) {
GST_WARNING_OBJECT (self, "Frame %p is mapped already", frame); GST_WARNING_OBJECT (self, "Frame %p is mapped already", frame);
@ -606,6 +594,7 @@ gst_nv_decoder_copy_frame_to_gl_internal (GstGLContext * context,
guint i; guint i;
CUDA_MEMCPY2D copy_params = { 0, }; CUDA_MEMCPY2D copy_params = { 0, };
GstVideoInfo *info = &self->info; GstVideoInfo *info = &self->info;
CUstream stream = gst_cuda_stream_get_handle (self->stream);
data->ret = TRUE; data->ret = TRUE;
@ -667,13 +656,13 @@ gst_nv_decoder_copy_frame_to_gl_internal (GstGLContext * context,
copy_params.dstDevice = dst_ptr; copy_params.dstDevice = dst_ptr;
copy_params.Height = GST_VIDEO_INFO_COMP_HEIGHT (info, i); copy_params.Height = GST_VIDEO_INFO_COMP_HEIGHT (info, i);
if (!gst_cuda_result (CuMemcpy2DAsync (&copy_params, self->cuda_stream))) { if (!gst_cuda_result (CuMemcpy2DAsync (&copy_params, stream))) {
GST_WARNING_OBJECT (self, "memcpy to mapped array failed"); GST_WARNING_OBJECT (self, "memcpy to mapped array failed");
data->ret = FALSE; data->ret = FALSE;
} }
} }
gst_cuda_result (CuStreamSynchronize (self->cuda_stream)); gst_cuda_result (CuStreamSynchronize (stream));
unmap_video_frame: unmap_video_frame:
for (i = 0; i < num_resources; i++) { for (i = 0; i < num_resources; i++) {
@ -711,6 +700,7 @@ gst_nv_decoder_copy_frame_to_system (GstNvDecoder * decoder,
CUDA_MEMCPY2D copy_params = { 0, }; CUDA_MEMCPY2D copy_params = { 0, };
gint i; gint i;
gboolean ret = FALSE; gboolean ret = FALSE;
CUstream stream = gst_cuda_stream_get_handle (decoder->stream);
if (!gst_video_frame_map (&video_frame, &decoder->info, buffer, if (!gst_video_frame_map (&video_frame, &decoder->info, buffer,
GST_MAP_WRITE)) { GST_MAP_WRITE)) {
@ -737,13 +727,13 @@ gst_nv_decoder_copy_frame_to_system (GstNvDecoder * decoder,
copy_params.dstPitch = GST_VIDEO_FRAME_PLANE_STRIDE (&video_frame, i); copy_params.dstPitch = GST_VIDEO_FRAME_PLANE_STRIDE (&video_frame, i);
copy_params.Height = GST_VIDEO_FRAME_COMP_HEIGHT (&video_frame, i); copy_params.Height = GST_VIDEO_FRAME_COMP_HEIGHT (&video_frame, i);
if (!gst_cuda_result (CuMemcpy2DAsync (&copy_params, decoder->cuda_stream))) { if (!gst_cuda_result (CuMemcpy2DAsync (&copy_params, stream))) {
GST_ERROR_OBJECT (decoder, "failed to copy %dth plane", i); GST_ERROR_OBJECT (decoder, "failed to copy %dth plane", i);
goto done; goto done;
} }
} }
gst_cuda_result (CuStreamSynchronize (decoder->cuda_stream)); gst_cuda_result (CuStreamSynchronize (stream));
ret = TRUE; ret = TRUE;
@ -766,6 +756,7 @@ gst_nv_decoder_copy_frame_to_cuda (GstNvDecoder * decoder,
gint i; gint i;
gboolean ret = FALSE; gboolean ret = FALSE;
GstVideoFrame video_frame; GstVideoFrame video_frame;
CUstream stream = gst_cuda_stream_get_handle (decoder->stream);
mem = gst_buffer_peek_memory (buffer, 0); mem = gst_buffer_peek_memory (buffer, 0);
if (!gst_is_cuda_memory (mem)) { if (!gst_is_cuda_memory (mem)) {
@ -799,13 +790,13 @@ gst_nv_decoder_copy_frame_to_cuda (GstNvDecoder * decoder,
* GST_VIDEO_INFO_COMP_PSTRIDE (&decoder->info, 0); * GST_VIDEO_INFO_COMP_PSTRIDE (&decoder->info, 0);
copy_params.Height = GST_VIDEO_INFO_COMP_HEIGHT (&decoder->info, i); copy_params.Height = GST_VIDEO_INFO_COMP_HEIGHT (&decoder->info, i);
if (!gst_cuda_result (CuMemcpy2DAsync (&copy_params, decoder->cuda_stream))) { if (!gst_cuda_result (CuMemcpy2DAsync (&copy_params, stream))) {
GST_ERROR_OBJECT (decoder, "failed to copy %dth plane", i); GST_ERROR_OBJECT (decoder, "failed to copy %dth plane", i);
goto done; goto done;
} }
} }
gst_cuda_result (CuStreamSynchronize (decoder->cuda_stream)); gst_cuda_result (CuStreamSynchronize (stream));
ret = TRUE; ret = TRUE;

View file

@ -26,6 +26,7 @@
#include <gst/cuda/gstcudautils.h> #include <gst/cuda/gstcudautils.h>
#include <gst/cuda/gstcudamemory.h> #include <gst/cuda/gstcudamemory.h>
#include <gst/cuda/gstcudabufferpool.h> #include <gst/cuda/gstcudabufferpool.h>
#include <gst/cuda/gstcudastream.h>
#include <string.h> #include <string.h>
#ifdef GST_CUDA_HAS_D3D #ifdef GST_CUDA_HAS_D3D
@ -58,7 +59,7 @@ GST_DEBUG_CATEGORY_STATIC (gst_nv_encoder_debug);
struct _GstNvEncoderPrivate struct _GstNvEncoderPrivate
{ {
GstCudaContext *context; GstCudaContext *context;
CUstream cuda_stream; GstCudaStream *stream;
#ifdef GST_CUDA_HAS_D3D #ifdef GST_CUDA_HAS_D3D
GstD3D11Device *device; GstD3D11Device *device;
@ -245,13 +246,7 @@ gst_nv_encoder_reset (GstNvEncoder * self)
priv->session = NULL; priv->session = NULL;
} }
if (priv->context && priv->cuda_stream) { gst_clear_cuda_stream (&priv->stream);
gst_cuda_context_push (priv->context);
CuStreamDestroy (priv->cuda_stream);
gst_cuda_context_pop (nullptr);
priv->cuda_stream = nullptr;
}
g_queue_clear (&priv->free_tasks); g_queue_clear (&priv->free_tasks);
g_queue_clear (&priv->output_tasks); g_queue_clear (&priv->output_tasks);
@ -1275,12 +1270,12 @@ gst_nv_encoder_init_session (GstNvEncoder * self, GstBuffer * in_buf)
if (priv->selected_device_mode == GST_NV_ENCODER_DEVICE_CUDA && if (priv->selected_device_mode == GST_NV_ENCODER_DEVICE_CUDA &&
gst_nvenc_have_set_io_cuda_streams ()) { gst_nvenc_have_set_io_cuda_streams ()) {
CUresult cuda_ret = CuStreamCreate (&priv->cuda_stream, CU_STREAM_DEFAULT); priv->stream = gst_cuda_stream_new (priv->context);
if (gst_cuda_result (cuda_ret)) { if (priv->stream) {
CUstream stream = gst_cuda_stream_get_handle (priv->stream);
status = NvEncSetIOCudaStreams (priv->session, status = NvEncSetIOCudaStreams (priv->session,
(NV_ENC_CUSTREAM_PTR) & priv->cuda_stream, (NV_ENC_CUSTREAM_PTR) & stream, (NV_ENC_CUSTREAM_PTR) & stream);
(NV_ENC_CUSTREAM_PTR) & priv->cuda_stream);
if (status != NV_ENC_SUCCESS) { if (status != NV_ENC_SUCCESS) {
GST_WARNING_OBJECT (self, "NvEncSetIOCudaStreams failed, status: %" GST_WARNING_OBJECT (self, "NvEncSetIOCudaStreams failed, status: %"
GST_NVENC_STATUS_FORMAT, GST_NVENC_STATUS_ARGS (status)); GST_NVENC_STATUS_FORMAT, GST_NVENC_STATUS_ARGS (status));