/* GStreamer * Copyright (C) <2019> Seungha Yang * Copyright (C) 2022 Seungha Yang * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Library General Public License for more details. * * You should have received a copy of the GNU Library General Public * License along with this library; if not, write to the * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, * Boston, MA 02110-1301, USA. */ /** * element-cudaupload: * * Uploads data to NVIDA GPU via CUDA APIs * * Since: 1.20 */ #ifdef HAVE_CONFIG_H # include #endif #include "gstcudabasetransform.h" #include "gstcudamemorycopy.h" #include "gstcudaformat.h" #include "gstcudautils.h" #ifdef HAVE_NVCODEC_NVMM #include "gstcudanvmm.h" #endif #ifdef HAVE_NVCODEC_GST_GL #include #endif #ifdef HAVE_NVCODEC_GST_D3D11 #include #endif #include GST_DEBUG_CATEGORY_STATIC (gst_cuda_memory_copy_debug); #define GST_CAT_DEFAULT gst_cuda_memory_copy_debug typedef enum { GST_CUDA_MEMORY_COPY_MEM_SYSTEM, GST_CUDA_MEMORY_COPY_MEM_CUDA, GST_CUDA_MEMORY_COPY_MEM_NVMM, GST_CUDA_MEMORY_COPY_MEM_GL, GST_CUDA_MEMORY_COPY_MEM_D3D11, } GstCudaMemoryCopyMemType; typedef struct _GstCudaMemoryCopyClassData { GstCaps *sink_caps; GstCaps *src_caps; } GstCudaMemoryCopyClassData; struct _GstCudaMemoryCopy { GstCudaBaseTransform parent; gboolean in_nvmm; gboolean out_nvmm; #ifdef HAVE_NVCODEC_GST_GL GstGLDisplay *gl_display; GstGLContext *gl_context; GstGLContext *other_gl_context; #endif #ifdef HAVE_NVCODEC_GST_D3D11 GstD3D11Device *d3d11_device; #endif }; typedef struct _GstCudaUpload { GstCudaMemoryCopy parent; } GstCudaUpload; typedef struct _GstCudaUploadClass { GstCudaMemoryCopyClass parent_class; } GstCudaUploadClass; typedef struct _GstCudaDownload { GstCudaMemoryCopy parent; } GstCudaDownload; typedef struct _GstCudaDownloadClass { GstCudaMemoryCopyClass parent_class; } GstCudaDownloadClass; #define gst_cuda_memory_copy_parent_class parent_class G_DEFINE_ABSTRACT_TYPE (GstCudaMemoryCopy, gst_cuda_memory_copy, GST_TYPE_CUDA_BASE_TRANSFORM); static void gst_cuda_memory_copy_set_context (GstElement * element, GstContext * context); static gboolean gst_cuda_memory_copy_transform_stop (GstBaseTransform * trans); static GstCaps *gst_cuda_memory_copy_transform_caps (GstBaseTransform * trans, GstPadDirection direction, GstCaps * caps, GstCaps * filter); static gboolean gst_cuda_memory_copy_query (GstBaseTransform * trans, GstPadDirection direction, GstQuery * query); static gboolean gst_cuda_memory_copy_propose_allocation (GstBaseTransform * trans, GstQuery * decide_query, GstQuery * query); static gboolean gst_cuda_memory_copy_decide_allocation (GstBaseTransform * trans, GstQuery * query); static gboolean gst_cuda_memory_copy_set_info (GstCudaBaseTransform * btrans, GstCaps * incaps, GstVideoInfo * in_info, GstCaps * outcaps, GstVideoInfo * out_info); static GstFlowReturn gst_cuda_memory_copy_transform (GstBaseTransform * trans, GstBuffer * inbuf, GstBuffer * outbuf); static void gst_cuda_memory_copy_class_init (GstCudaMemoryCopyClass * klass) { GstElementClass *element_class = GST_ELEMENT_CLASS (klass); GstBaseTransformClass *trans_class = GST_BASE_TRANSFORM_CLASS (klass); GstCudaBaseTransformClass *btrans_class = GST_CUDA_BASE_TRANSFORM_CLASS (klass); element_class->set_context = GST_DEBUG_FUNCPTR (gst_cuda_memory_copy_set_context); trans_class->stop = GST_DEBUG_FUNCPTR (gst_cuda_memory_copy_transform_stop); trans_class->transform_caps = GST_DEBUG_FUNCPTR (gst_cuda_memory_copy_transform_caps); trans_class->propose_allocation = GST_DEBUG_FUNCPTR (gst_cuda_memory_copy_propose_allocation); trans_class->decide_allocation = GST_DEBUG_FUNCPTR (gst_cuda_memory_copy_decide_allocation); trans_class->query = GST_DEBUG_FUNCPTR (gst_cuda_memory_copy_query); btrans_class->set_info = GST_DEBUG_FUNCPTR (gst_cuda_memory_copy_set_info); } static void gst_cuda_memory_copy_init (GstCudaMemoryCopy * self) { } static void gst_cuda_memory_copy_set_context (GstElement * element, GstContext * context) { /* CUDA context is handled by parent class, handle only non-CUDA context */ #ifdef HAVE_NVCODEC_GST_GL GstCudaMemoryCopy *self = GST_CUDA_MEMORY_COPY (element); gst_gl_handle_set_context (element, context, &self->gl_display, &self->other_gl_context); #endif GST_ELEMENT_CLASS (parent_class)->set_context (element, context); } static gboolean gst_cuda_memory_copy_transform_stop (GstBaseTransform * trans) { #if defined(HAVE_NVCODEC_GST_GL) || defined(HAVE_NVCODEC_GST_D3D11) GstCudaMemoryCopy *self = GST_CUDA_MEMORY_COPY (trans); # ifdef HAVE_NVCODEC_GST_GL gst_clear_object (&self->gl_display); gst_clear_object (&self->gl_context); gst_clear_object (&self->other_gl_context); # endif # ifdef HAVE_NVCODEC_GST_D3D11 gst_clear_object (&self->d3d11_device); # endif #endif return GST_BASE_TRANSFORM_CLASS (parent_class)->stop (trans); } static GstCaps * _set_caps_features (const GstCaps * caps, const gchar * feature_name) { GstCaps *tmp = gst_caps_copy (caps); guint n = gst_caps_get_size (tmp); guint i = 0; for (i = 0; i < n; i++) gst_caps_set_features (tmp, i, gst_caps_features_from_string (feature_name)); return tmp; } static void _remove_field (GstCaps * caps, const gchar * field) { guint n = gst_caps_get_size (caps); guint i = 0; for (i = 0; i < n; i++) { GstStructure *s = gst_caps_get_structure (caps, i); gst_structure_remove_field (s, field); } } static GstCaps * create_transform_caps (GstCaps * caps, gboolean to_cuda) { GstCaps *ret = NULL; if (to_cuda) { GstCaps *sys_caps = gst_caps_simplify (_set_caps_features (caps, GST_CAPS_FEATURE_MEMORY_SYSTEM_MEMORY)); GstCaps *new_caps; ret = gst_caps_copy (sys_caps); #ifdef HAVE_NVCODEC_NVMM if (gst_cuda_nvmm_init_once ()) { new_caps = _set_caps_features (sys_caps, GST_CAPS_FEATURE_MEMORY_CUDA_NVMM_MEMORY); ret = gst_caps_merge (ret, new_caps); } #endif new_caps = _set_caps_features (sys_caps, GST_CAPS_FEATURE_MEMORY_CUDA_MEMORY); ret = gst_caps_merge (ret, new_caps); _remove_field (ret, "texture-target"); gst_caps_unref (sys_caps); } else { GstCaps *new_caps; ret = gst_caps_ref (caps); #ifdef HAVE_NVCODEC_NVMM if (gst_cuda_nvmm_init_once ()) { new_caps = _set_caps_features (caps, GST_CAPS_FEATURE_MEMORY_CUDA_NVMM_MEMORY); ret = gst_caps_merge (ret, new_caps); } #endif #ifdef HAVE_NVCODEC_GST_GL new_caps = _set_caps_features (caps, GST_CAPS_FEATURE_MEMORY_GL_MEMORY); ret = gst_caps_merge (ret, new_caps); #endif #ifdef HAVE_NVCODEC_GST_D3D11 new_caps = _set_caps_features (caps, GST_CAPS_FEATURE_MEMORY_D3D11_MEMORY); ret = gst_caps_merge (ret, new_caps); #endif new_caps = _set_caps_features (caps, GST_CAPS_FEATURE_MEMORY_SYSTEM_MEMORY); ret = gst_caps_merge (ret, new_caps); _remove_field (ret, "texture-target"); } return ret; } static GstCaps * gst_cuda_memory_copy_transform_caps (GstBaseTransform * trans, GstPadDirection direction, GstCaps * caps, GstCaps * filter) { GstCudaMemoryCopyClass *klass = GST_CUDA_MEMORY_COPY_GET_CLASS (trans); GstCaps *result, *tmp; GST_DEBUG_OBJECT (trans, "Transforming caps %" GST_PTR_FORMAT " in direction %s", caps, (direction == GST_PAD_SINK) ? "sink" : "src"); if (direction == GST_PAD_SINK) { tmp = create_transform_caps (caps, klass->uploader); } else { tmp = create_transform_caps (caps, !klass->uploader); } if (filter) { result = gst_caps_intersect_full (filter, tmp, GST_CAPS_INTERSECT_FIRST); gst_caps_unref (tmp); } else { result = tmp; } GST_DEBUG_OBJECT (trans, "returning caps: %" GST_PTR_FORMAT, result); return result; } #ifdef HAVE_NVCODEC_GST_GL static void gst_cuda_memory_copy_ensure_gl_interop (GstGLContext * context, gboolean * ret) { guint device_count = 0; CUdevice device_list[1] = { 0, }; CUresult cuda_ret; *ret = FALSE; cuda_ret = CuGLGetDevices (&device_count, device_list, 1, CU_GL_DEVICE_LIST_ALL); if (cuda_ret != CUDA_SUCCESS || device_count == 0) return; *ret = TRUE; return; } static gboolean gst_cuda_memory_copy_ensure_gl_context (GstCudaMemoryCopy * self) { GstGLDisplay *display; GstGLContext *context; gboolean ret = FALSE; if (!gst_gl_ensure_element_data (GST_ELEMENT (self), &self->gl_display, &self->other_gl_context)) { GST_DEBUG_OBJECT (self, "No available OpenGL display"); return FALSE; } display = self->gl_display; if (!gst_gl_query_local_gl_context (GST_ELEMENT (self), GST_PAD_SRC, &self->gl_context) && !gst_gl_query_local_gl_context (GST_ELEMENT (self), GST_PAD_SINK, &self->gl_context)) { GST_INFO_OBJECT (self, "failed to query local OpenGL context"); gst_clear_object (&self->gl_context); self->gl_context = gst_gl_display_get_gl_context_for_thread (display, NULL); if (!self->gl_context || !gst_gl_display_add_context (display, GST_GL_CONTEXT (self->gl_context))) { gst_clear_object (&self->gl_context); if (!gst_gl_display_create_context (display, self->other_gl_context, &self->gl_context, NULL)) { GST_WARNING_OBJECT (self, "failed to create OpenGL context"); return FALSE; } if (!gst_gl_display_add_context (display, self->gl_context)) { GST_WARNING_OBJECT (self, "failed to add the OpenGL context to the display"); return FALSE; } } } context = self->gl_context; if (!gst_gl_context_check_gl_version (context, (GstGLAPI) (GST_GL_API_OPENGL | GST_GL_API_OPENGL3), 3, 0)) { GST_WARNING_OBJECT (self, "OpenGL context could not support PBO download"); return FALSE; } gst_gl_context_thread_add (context, (GstGLContextThreadFunc) gst_cuda_memory_copy_ensure_gl_interop, &ret); if (!ret) { GST_WARNING_OBJECT (self, "Current GL context is not CUDA compatible"); return FALSE; } return TRUE; } #endif #ifdef HAVE_NVCODEC_GST_D3D11 static gboolean gst_cuda_memory_copy_ensure_d3d11_interop (GstCudaContext * context, GstD3D11Device * device) { guint device_count = 0; CUdevice cuda_device_id; CUdevice device_list[1] = { 0, }; CUresult cuda_ret; g_object_get (context, "cuda-device-id", &cuda_device_id, NULL); cuda_ret = CuD3D11GetDevices (&device_count, device_list, 1, gst_d3d11_device_get_device_handle (device), CU_D3D11_DEVICE_LIST_ALL); if (cuda_ret != CUDA_SUCCESS || device_count == 0) return FALSE; if (device_list[0] != cuda_device_id) return FALSE; return TRUE; } static gboolean gst_cuda_memory_copy_ensure_d3d11_context (GstCudaMemoryCopy * self) { gint64 dxgi_adapter_luid = 0; g_object_get (GST_CUDA_BASE_TRANSFORM (self)->context, "dxgi-adapter-luid", &dxgi_adapter_luid, NULL); if (!gst_d3d11_ensure_element_data_for_adapter_luid (GST_ELEMENT (self), dxgi_adapter_luid, &self->d3d11_device)) { GST_DEBUG_OBJECT (self, "No available D3D11 device"); return FALSE; } if (!gst_cuda_memory_copy_ensure_d3d11_interop (GST_CUDA_BASE_TRANSFORM (self)->context, self->d3d11_device)) { GST_WARNING_OBJECT (self, "Current D3D11 device is not CUDA compatible"); return FALSE; } return TRUE; } #endif static gboolean gst_cuda_memory_copy_propose_allocation (GstBaseTransform * trans, GstQuery * decide_query, GstQuery * query) { GstCudaMemoryCopy *self = GST_CUDA_MEMORY_COPY (trans); GstCudaBaseTransform *ctrans = GST_CUDA_BASE_TRANSFORM (trans); GstVideoInfo info; GstBufferPool *pool = NULL; GstCaps *caps; guint size; if (!GST_BASE_TRANSFORM_CLASS (parent_class)->propose_allocation (trans, decide_query, query)) return FALSE; /* passthrough, we're done */ if (decide_query == NULL) return TRUE; gst_query_parse_allocation (query, &caps, NULL); if (caps == NULL) return FALSE; if (!gst_video_info_from_caps (&info, caps)) return FALSE; if (gst_query_get_n_allocation_pools (query) == 0) { GstCapsFeatures *features; GstStructure *config; features = gst_caps_get_features (caps, 0); if (features && gst_caps_features_contains (features, GST_CAPS_FEATURE_MEMORY_CUDA_MEMORY)) { GST_DEBUG_OBJECT (self, "upstream support CUDA memory"); pool = gst_cuda_buffer_pool_new (ctrans->context); #ifdef HAVE_NVCODEC_GST_GL } else if (features && gst_caps_features_contains (features, GST_CAPS_FEATURE_MEMORY_GL_MEMORY) && gst_cuda_memory_copy_ensure_gl_context (self)) { GST_DEBUG_OBJECT (self, "upstream support GL memory"); pool = gst_gl_buffer_pool_new (self->gl_context); #endif #ifdef HAVE_NVCODEC_GST_D3D11 } else if (features && gst_caps_features_contains (features, GST_CAPS_FEATURE_MEMORY_D3D11_MEMORY) && gst_cuda_memory_copy_ensure_d3d11_context (self)) { GST_DEBUG_OBJECT (self, "upstream support D3D11 memory"); pool = gst_d3d11_buffer_pool_new (self->d3d11_device); #endif #ifdef HAVE_NVCODEC_NVMM } else if (features && gst_caps_features_contains (features, GST_CAPS_FEATURE_MEMORY_CUDA_NVMM_MEMORY) && gst_cuda_nvmm_init_once ()) { guint gpu_id = 0; GST_DEBUG_OBJECT (self, "upstream support NVMM memory"); g_object_get (ctrans->context, "cuda-device-id", &gpu_id, NULL); pool = gst_cuda_nvmm_buffer_pool_new (); if (!pool) { GST_ERROR_OBJECT (self, "Failed to create pool"); return FALSE; } config = gst_buffer_pool_get_config (pool); gst_buffer_pool_config_set_params (config, caps, sizeof (NvBufSurface), 0, 0); gst_structure_set (config, "memtype", G_TYPE_UINT, NVBUF_MEM_DEFAULT, "gpu-id", G_TYPE_UINT, gpu_id, "batch-size", G_TYPE_UINT, 1, NULL); if (!gst_buffer_pool_set_config (pool, config)) { GST_ERROR_OBJECT (self, "Failed to set config"); gst_object_unref (pool); return FALSE; } gst_query_add_allocation_pool (query, pool, sizeof (NvBufSurface), 0, 0); return TRUE; #endif } if (!pool) { GST_DEBUG_OBJECT (self, "creating system buffer pool"); pool = gst_video_buffer_pool_new (); } config = gst_buffer_pool_get_config (pool); gst_buffer_pool_config_add_option (config, GST_BUFFER_POOL_OPTION_VIDEO_META); size = GST_VIDEO_INFO_SIZE (&info); gst_buffer_pool_config_set_params (config, caps, size, 0, 0); if (!gst_buffer_pool_set_config (pool, config)) { GST_ERROR_OBJECT (ctrans, "failed to set config"); gst_object_unref (pool); return FALSE; } /* Get updated size by cuda buffer pool */ config = gst_buffer_pool_get_config (pool); gst_buffer_pool_config_get_params (config, NULL, &size, NULL, NULL); gst_structure_free (config); gst_query_add_allocation_pool (query, pool, size, 0, 0); gst_object_unref (pool); } gst_query_add_allocation_meta (query, GST_VIDEO_META_API_TYPE, NULL); return TRUE; } static gboolean gst_cuda_memory_copy_decide_allocation (GstBaseTransform * trans, GstQuery * query) { GstCudaMemoryCopy *self = GST_CUDA_MEMORY_COPY (trans); GstCudaBaseTransform *ctrans = GST_CUDA_BASE_TRANSFORM (trans); GstCaps *outcaps = NULL; GstBufferPool *pool = NULL; guint size, min, max; GstStructure *config; gboolean update_pool = FALSE; GstCapsFeatures *features; gboolean need_cuda = FALSE; #ifdef HAVE_NVCODEC_GST_GL gboolean need_gl = FALSE; #endif #ifdef HAVE_NVCODEC_GST_D3D11 gboolean need_d3d11 = FALSE; #endif #ifdef HAVE_NVCODEC_NVMM gboolean need_nvmm = FALSE; #endif gst_query_parse_allocation (query, &outcaps, NULL); if (!outcaps) return FALSE; features = gst_caps_get_features (outcaps, 0); if (features && gst_caps_features_contains (features, GST_CAPS_FEATURE_MEMORY_CUDA_MEMORY)) { need_cuda = TRUE; } #ifdef HAVE_NVCODEC_GST_GL else if (features && gst_caps_features_contains (features, GST_CAPS_FEATURE_MEMORY_GL_MEMORY) && gst_cuda_memory_copy_ensure_gl_context (self)) { need_gl = TRUE; } #endif #ifdef HAVE_NVCODEC_GST_D3D11 else if (features && gst_caps_features_contains (features, GST_CAPS_FEATURE_MEMORY_D3D11_MEMORY) && gst_cuda_memory_copy_ensure_d3d11_context (self)) { need_d3d11 = TRUE; } #endif #ifdef HAVE_NVCODEC_NVMM else if (features && gst_caps_features_contains (features, GST_CAPS_FEATURE_MEMORY_CUDA_NVMM_MEMORY) && gst_cuda_nvmm_init_once ()) { need_nvmm = TRUE; } #endif if (gst_query_get_n_allocation_pools (query) > 0) { gst_query_parse_nth_allocation_pool (query, 0, &pool, &size, &min, &max); if (need_cuda && pool) { if (!GST_IS_CUDA_BUFFER_POOL (pool)) { gst_clear_object (&pool); } else { GstCudaBufferPool *cpool = GST_CUDA_BUFFER_POOL (pool); if (cpool->context != ctrans->context) { gst_clear_object (&pool); } } } #ifdef HAVE_NVCODEC_NVMM if (need_nvmm) { /* XXX: Always create new pool to set config option */ gst_clear_object (&pool); } #endif update_pool = TRUE; } else { GstVideoInfo vinfo; gst_video_info_from_caps (&vinfo, outcaps); size = GST_VIDEO_INFO_SIZE (&vinfo); min = max = 0; } if (!pool) { if (need_cuda) { GST_DEBUG_OBJECT (self, "creating cuda pool"); pool = gst_cuda_buffer_pool_new (ctrans->context); } #ifdef HAVE_NVCODEC_GST_GL else if (need_gl) { GST_DEBUG_OBJECT (self, "creating gl pool"); pool = gst_gl_buffer_pool_new (self->gl_context); } #endif #ifdef HAVE_NVCODEC_GST_D3D11 else if (need_d3d11) { GST_DEBUG_OBJECT (self, "creating d3d11 pool"); pool = gst_d3d11_buffer_pool_new (self->d3d11_device); } #endif #ifdef HAVE_NVCODEC_NVMM else if (need_nvmm) { guint gpu_id = 0; GST_DEBUG_OBJECT (self, "create nvmm pool"); g_object_get (ctrans->context, "cuda-device-id", &gpu_id, NULL); pool = gst_cuda_nvmm_buffer_pool_new (); if (!pool) { GST_ERROR_OBJECT (self, "Failed to create pool"); return FALSE; } config = gst_buffer_pool_get_config (pool); gst_buffer_pool_config_set_params (config, outcaps, sizeof (NvBufSurface), min, max); gst_structure_set (config, "memtype", G_TYPE_UINT, NVBUF_MEM_DEFAULT, "gpu-id", G_TYPE_UINT, gpu_id, "batch-size", G_TYPE_UINT, 1, NULL); if (!gst_buffer_pool_set_config (pool, config)) { GST_ERROR_OBJECT (self, "Failed to set config"); gst_object_unref (pool); return FALSE; } if (update_pool) { gst_query_set_nth_allocation_pool (query, 0, pool, sizeof (NvBufSurface), min, max); } else { gst_query_add_allocation_pool (query, pool, sizeof (NvBufSurface), min, max); } gst_object_unref (pool); /* Don't chain up to parent method, which will break NVMM specific * config */ return TRUE; } #endif else { GST_DEBUG_OBJECT (self, "creating system pool"); pool = gst_video_buffer_pool_new (); } } config = gst_buffer_pool_get_config (pool); gst_buffer_pool_config_add_option (config, GST_BUFFER_POOL_OPTION_VIDEO_META); gst_buffer_pool_config_set_params (config, outcaps, size, min, max); gst_buffer_pool_set_config (pool, config); /* Get updated size by cuda buffer pool */ config = gst_buffer_pool_get_config (pool); gst_buffer_pool_config_get_params (config, NULL, &size, NULL, NULL); gst_structure_free (config); if (update_pool) gst_query_set_nth_allocation_pool (query, 0, pool, size, min, max); else gst_query_add_allocation_pool (query, pool, size, min, max); gst_object_unref (pool); return GST_BASE_TRANSFORM_CLASS (parent_class)->decide_allocation (trans, query); } static gboolean gst_cuda_memory_copy_query (GstBaseTransform * trans, GstPadDirection direction, GstQuery * query) { #if defined(HAVE_NVCODEC_GST_GL) || defined(HAVE_NVCODEC_GST_D3D11) GstCudaMemoryCopy *self = GST_CUDA_MEMORY_COPY (trans); switch (GST_QUERY_TYPE (query)) { case GST_QUERY_CONTEXT: { gboolean ret; # ifdef HAVE_NVCODEC_GST_GL ret = gst_gl_handle_context_query (GST_ELEMENT (self), query, self->gl_display, self->gl_context, self->other_gl_context); if (ret) return TRUE; # endif # ifdef HAVE_NVCODEC_GST_D3D11 ret = gst_d3d11_handle_context_query (GST_ELEMENT (self), query, self->d3d11_device); if (ret) return TRUE; # endif break; } default: break; } #endif return GST_BASE_TRANSFORM_CLASS (parent_class)->query (trans, direction, query); } static gboolean gst_cuda_memory_copy_set_info (GstCudaBaseTransform * btrans, GstCaps * incaps, GstVideoInfo * in_info, GstCaps * outcaps, GstVideoInfo * out_info) { #ifdef HAVE_NVCODEC_NVMM GstCudaMemoryCopy *self = GST_CUDA_MEMORY_COPY (btrans); self->in_nvmm = FALSE; self->out_nvmm = FALSE; if (gst_cuda_nvmm_init_once ()) { GstCapsFeatures *features; features = gst_caps_get_features (incaps, 0); if (features && gst_caps_features_contains (features, GST_CAPS_FEATURE_MEMORY_CUDA_NVMM_MEMORY)) { GST_DEBUG_OBJECT (self, "Input memory type is NVMM"); self->in_nvmm = TRUE; } features = gst_caps_get_features (outcaps, 0); if (features && gst_caps_features_contains (features, GST_CAPS_FEATURE_MEMORY_CUDA_NVMM_MEMORY)) { GST_DEBUG_OBJECT (self, "Output memory type is NVMM"); self->out_nvmm = TRUE; } } #endif return TRUE; } static gboolean gst_cuda_memory_copy_transform_sysmem (GstCudaMemoryCopy * self, GstBuffer * inbuf, GstVideoInfo * in_info, GstBuffer * outbuf, GstVideoInfo * out_info) { GstVideoFrame in_frame, out_frame; gboolean ret; if (!gst_video_frame_map (&in_frame, in_info, inbuf, GST_MAP_READ)) { GST_ERROR_OBJECT (self, "Failed to map input buffer"); return FALSE; } if (!gst_video_frame_map (&out_frame, out_info, outbuf, GST_MAP_WRITE)) { gst_video_frame_unmap (&in_frame); GST_ERROR_OBJECT (self, "Failed to map input buffer"); return FALSE; } ret = gst_video_frame_copy (&out_frame, &in_frame); gst_video_frame_unmap (&out_frame); gst_video_frame_unmap (&in_frame); if (!ret) GST_ERROR_OBJECT (self, "Failed to copy buffer"); return ret; } static gboolean gst_cuda_memory_copy_map_and_fill_copy2d (GstCudaMemoryCopy * self, GstBuffer * buf, GstVideoInfo * info, GstCudaMemoryCopyMemType mem_type, GstVideoFrame * frame, GstMapInfo * map_info, gboolean is_src, CUDA_MEMCPY2D copy_params[GST_VIDEO_MAX_PLANES]) { gboolean buffer_mapped = FALSE; guint i; #ifdef HAVE_NVCODEC_NVMM if (mem_type == GST_CUDA_MEMORY_COPY_MEM_NVMM) { NvBufSurface *surface; NvBufSurfaceParams *surface_params; NvBufSurfacePlaneParams *plane_params; if (!gst_buffer_map (buf, map_info, GST_MAP_READ)) { GST_ERROR_OBJECT (self, "Failed to map input NVMM buffer"); memset (map_info, 0, sizeof (GstMapInfo)); return FALSE; } surface = (NvBufSurface *) map_info->data; GST_TRACE_OBJECT (self, "batch-size %d, num-filled %d, memType %d", surface->batchSize, surface->numFilled, surface->memType); surface_params = surface->surfaceList; buffer_mapped = TRUE; if (!surface_params) { GST_ERROR_OBJECT (self, "NVMM memory doesn't hold buffer"); goto error; } plane_params = &surface_params->planeParams; if (plane_params->num_planes != GST_VIDEO_INFO_N_PLANES (info)) { GST_ERROR_OBJECT (self, "num_planes mismatch, %d / %d", plane_params->num_planes, GST_VIDEO_INFO_N_PLANES (info)); goto error; } switch (surface->memType) { /* TODO: NVBUF_MEM_DEFAULT on jetson is SURFACE_ARRAY */ case NVBUF_MEM_DEFAULT: case NVBUF_MEM_CUDA_DEVICE: { for (i = 0; i < plane_params->num_planes; i++) { if (is_src) { copy_params[i].srcMemoryType = CU_MEMORYTYPE_DEVICE; copy_params[i].srcDevice = (CUdeviceptr) ((guint8 *) surface_params->dataPtr + plane_params->offset[i]); copy_params[i].srcPitch = plane_params->pitch[i]; } else { copy_params[i].dstMemoryType = CU_MEMORYTYPE_DEVICE; copy_params[i].dstDevice = (CUdeviceptr) ((guint8 *) surface_params->dataPtr + plane_params->offset[i]); copy_params[i].dstPitch = plane_params->pitch[i]; } } break; } case NVBUF_MEM_CUDA_PINNED: { for (i = 0; i < plane_params->num_planes; i++) { if (is_src) { copy_params[i].srcMemoryType = CU_MEMORYTYPE_HOST; copy_params[i].srcHost = ((guint8 *) surface_params->dataPtr + plane_params->offset[i]); copy_params[i].srcPitch = plane_params->pitch[i]; } else { copy_params[i].dstMemoryType = CU_MEMORYTYPE_HOST; copy_params[i].dstHost = ((guint8 *) surface_params->dataPtr + plane_params->offset[i]); copy_params[i].dstPitch = plane_params->pitch[i]; } } break; } case NVBUF_MEM_CUDA_UNIFIED: { for (i = 0; i < plane_params->num_planes; i++) { if (is_src) { copy_params[i].srcMemoryType = CU_MEMORYTYPE_UNIFIED; copy_params[i].srcDevice = (CUdeviceptr) ((guint8 *) surface_params->dataPtr + plane_params->offset[i]); copy_params[i].srcPitch = plane_params->pitch[i]; } else { copy_params[i].dstMemoryType = CU_MEMORYTYPE_UNIFIED; copy_params[i].dstDevice = (CUdeviceptr) ((guint8 *) surface_params->dataPtr + plane_params->offset[i]); copy_params[i].dstPitch = plane_params->pitch[i]; } } break; } default: GST_ERROR_OBJECT (self, "Unexpected NVMM memory type %d", surface->memType); goto error; } for (i = 0; i < plane_params->num_planes; i++) { copy_params[i].WidthInBytes = plane_params->width[i] * plane_params->bytesPerPix[i]; copy_params[i].Height = plane_params->height[i]; } } else #endif { GstMapFlags map_flags; if (is_src) map_flags = GST_MAP_READ; else map_flags = GST_MAP_WRITE; if (mem_type == GST_CUDA_MEMORY_COPY_MEM_CUDA) map_flags |= GST_MAP_CUDA; if (!gst_video_frame_map (frame, info, buf, map_flags)) { GST_ERROR_OBJECT (self, "Failed to map buffer"); goto error; } for (i = 0; i < GST_VIDEO_FRAME_N_PLANES (frame); i++) { if (is_src) { if (mem_type == GST_CUDA_MEMORY_COPY_MEM_CUDA) { copy_params[i].srcMemoryType = CU_MEMORYTYPE_DEVICE; copy_params[i].srcDevice = (CUdeviceptr) GST_VIDEO_FRAME_PLANE_DATA (frame, i); } else { copy_params[i].srcMemoryType = CU_MEMORYTYPE_HOST; copy_params[i].srcHost = GST_VIDEO_FRAME_PLANE_DATA (frame, i); } copy_params[i].srcPitch = GST_VIDEO_FRAME_PLANE_STRIDE (frame, i); } else { if (mem_type == GST_CUDA_MEMORY_COPY_MEM_CUDA) { copy_params[i].dstMemoryType = CU_MEMORYTYPE_DEVICE; copy_params[i].dstDevice = (CUdeviceptr) GST_VIDEO_FRAME_PLANE_DATA (frame, i); } else { copy_params[i].dstMemoryType = CU_MEMORYTYPE_HOST; copy_params[i].dstHost = GST_VIDEO_FRAME_PLANE_DATA (frame, i); } copy_params[i].dstPitch = GST_VIDEO_FRAME_PLANE_STRIDE (frame, i); } copy_params[i].WidthInBytes = GST_VIDEO_FRAME_COMP_WIDTH (frame, i) * GST_VIDEO_FRAME_COMP_PSTRIDE (frame, i); copy_params[i].Height = GST_VIDEO_FRAME_COMP_HEIGHT (frame, i); } } return TRUE; error: if (buffer_mapped) { gst_buffer_unmap (buf, map_info); memset (map_info, 0, sizeof (GstMapInfo)); } return FALSE; } static void gst_cuda_memory_copy_unmap (GstCudaMemoryCopy * self, GstBuffer * buf, GstVideoFrame * frame, GstMapInfo * map_info) { if (frame->buffer) gst_video_frame_unmap (frame); if (map_info->data) gst_buffer_unmap (buf, map_info); } static gboolean gst_cuda_memory_copy_transform_cuda (GstCudaMemoryCopy * self, GstBuffer * inbuf, GstVideoInfo * in_info, GstCudaMemoryCopyMemType in_type, GstBuffer * outbuf, GstVideoInfo * out_info, GstCudaMemoryCopyMemType out_type) { GstCudaBaseTransform *trans = GST_CUDA_BASE_TRANSFORM (self); GstVideoFrame in_frame, out_frame; gboolean ret = FALSE; CUstream cuda_stream = trans->cuda_stream; GstMapInfo in_map, out_map; guint i; CUDA_MEMCPY2D copy_params[GST_VIDEO_MAX_PLANES]; memset (copy_params, 0, sizeof (copy_params)); memset (&in_frame, 0, sizeof (GstVideoFrame)); memset (&out_frame, 0, sizeof (GstVideoFrame)); memset (&in_map, 0, sizeof (GstMapInfo)); memset (&out_map, 0, sizeof (GstMapInfo)); if (!gst_cuda_memory_copy_map_and_fill_copy2d (self, inbuf, in_info, in_type, &in_frame, &in_map, TRUE, copy_params)) { GST_ERROR_OBJECT (self, "Failed to map input buffer"); return FALSE; } if (!gst_cuda_memory_copy_map_and_fill_copy2d (self, outbuf, out_info, out_type, &out_frame, &out_map, FALSE, copy_params)) { GST_ERROR_OBJECT (self, "Failed to map output buffer"); gst_cuda_memory_copy_unmap (self, inbuf, &in_frame, &in_map); return FALSE; } if (!gst_cuda_context_push (trans->context)) { GST_ERROR_OBJECT (self, "Failed to push our context"); gst_cuda_context_pop (NULL); goto unmap_and_out; } for (i = 0; i < GST_VIDEO_INFO_N_PLANES (in_info); i++) { ret = gst_cuda_result (CuMemcpy2DAsync (©_params[i], cuda_stream)); if (!ret) { GST_ERROR_OBJECT (self, "Failed to copy plane %d", i); break; } } gst_cuda_result (CuStreamSynchronize (cuda_stream)); gst_cuda_context_pop (NULL); unmap_and_out: gst_cuda_memory_copy_unmap (self, inbuf, &in_frame, &in_map); gst_cuda_memory_copy_unmap (self, outbuf, &out_frame, &out_map); return ret; } #ifdef HAVE_NVCODEC_GST_GL typedef struct _GLCopyData { GstCudaMemoryCopy *self; GstBuffer *inbuf; GstVideoInfo *in_info; GstBuffer *outbuf; GstVideoInfo *out_info; gboolean pbo_to_cuda; GstCudaMemoryCopyMemType cuda_mem_type; gboolean ret; } GLCopyData; static GstCudaGraphicsResource * ensure_cuda_gl_graphics_resource (GstCudaMemoryCopy * self, GstMemory * mem) { GstCudaBaseTransform *trans = GST_CUDA_BASE_TRANSFORM (self); GQuark quark; GstCudaGraphicsResource *ret = NULL; if (!gst_is_gl_memory_pbo (mem)) { GST_WARNING_OBJECT (self, "memory is not GL PBO memory, %s", mem->allocator->mem_type); return NULL; } quark = gst_cuda_quark_from_id (GST_CUDA_QUARK_GRAPHICS_RESOURCE); ret = (GstCudaGraphicsResource *) gst_mini_object_get_qdata (GST_MINI_OBJECT (mem), quark); if (!ret) { GstGLMemoryPBO *pbo; GstGLBuffer *buf; GstMapInfo info; ret = gst_cuda_graphics_resource_new (trans->context, GST_OBJECT (GST_GL_BASE_MEMORY_CAST (mem)->context), GST_CUDA_GRAPHICS_RESOURCE_GL_BUFFER); if (!gst_memory_map (mem, &info, (GstMapFlags) (GST_MAP_READ | GST_MAP_GL))) { GST_ERROR_OBJECT (self, "Failed to map gl memory"); gst_cuda_graphics_resource_free (ret); return NULL; } pbo = (GstGLMemoryPBO *) mem; buf = pbo->pbo; if (!gst_cuda_graphics_resource_register_gl_buffer (ret, buf->id, CU_GRAPHICS_REGISTER_FLAGS_NONE)) { GST_ERROR_OBJECT (self, "Failed to register gl buffer"); gst_memory_unmap (mem, &info); gst_cuda_graphics_resource_free (ret); return NULL; } gst_memory_unmap (mem, &info); gst_mini_object_set_qdata (GST_MINI_OBJECT (mem), quark, ret, (GDestroyNotify) gst_cuda_graphics_resource_free); } return ret; } static void gl_copy_thread_func (GstGLContext * gl_context, GLCopyData * data) { GstCudaMemoryCopy *self = data->self; GstCudaBaseTransform *trans = GST_CUDA_BASE_TRANSFORM (self); GstCudaGraphicsResource *resources[GST_VIDEO_MAX_PLANES]; guint num_resources; GstBuffer *gl_buf, *cuda_buf; GstVideoFrame cuda_frame; GstMapInfo cuda_map_info; CUDA_MEMCPY2D copy_params[GST_VIDEO_MAX_PLANES]; CUstream cuda_stream = trans->cuda_stream; gboolean ret = FALSE; guint i; memset (copy_params, 0, sizeof (copy_params)); memset (&cuda_frame, 0, sizeof (GstVideoFrame)); memset (&cuda_map_info, 0, sizeof (GstMapInfo)); data->ret = FALSE; /* Incompatible gl context */ gst_cuda_memory_copy_ensure_gl_interop (gl_context, &ret); if (!ret) return; if (data->pbo_to_cuda) { gl_buf = data->inbuf; cuda_buf = data->outbuf; if (!gst_cuda_memory_copy_map_and_fill_copy2d (self, cuda_buf, data->out_info, data->cuda_mem_type, &cuda_frame, &cuda_map_info, FALSE, copy_params)) { GST_ERROR_OBJECT (self, "Failed to map output CUDA buffer"); return; } } else { gl_buf = data->outbuf; cuda_buf = data->inbuf; if (!gst_cuda_memory_copy_map_and_fill_copy2d (self, cuda_buf, data->in_info, data->cuda_mem_type, &cuda_frame, &cuda_map_info, TRUE, copy_params)) { GST_ERROR_OBJECT (self, "Failed to map input CUDA buffer"); return; } } num_resources = gst_buffer_n_memory (gl_buf); g_assert (num_resources >= GST_VIDEO_INFO_N_PLANES (data->in_info)); if (!gst_cuda_context_push (trans->context)) { GST_ERROR_OBJECT (self, "Failed to push context"); gst_cuda_memory_copy_unmap (self, cuda_buf, &cuda_frame, &cuda_map_info); return; } for (i = 0; i < GST_VIDEO_INFO_N_PLANES (data->in_info); i++) { GstMemory *mem = gst_buffer_peek_memory (gl_buf, i); GstGLMemoryPBO *pbo; resources[i] = ensure_cuda_gl_graphics_resource (self, mem); if (!resources[i]) goto out; pbo = (GstGLMemoryPBO *) mem; if (!data->pbo_to_cuda) { /* Need PBO -> texture */ GST_MINI_OBJECT_FLAG_SET (mem, GST_GL_BASE_MEMORY_TRANSFER_NEED_UPLOAD); /* PBO -> sysmem */ GST_MINI_OBJECT_FLAG_SET (pbo->pbo, GST_GL_BASE_MEMORY_TRANSFER_NEED_DOWNLOAD); } else { /* get the texture into the PBO */ gst_gl_memory_pbo_upload_transfer (pbo); gst_gl_memory_pbo_download_transfer (pbo); } } for (i = 0; i < GST_VIDEO_INFO_N_PLANES (data->in_info); i++) { CUgraphicsResource cuda_resource; CUdeviceptr dev_ptr; size_t size; gboolean copy_ret; if (data->pbo_to_cuda) { cuda_resource = gst_cuda_graphics_resource_map (resources[i], cuda_stream, CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY); } else { cuda_resource = gst_cuda_graphics_resource_map (resources[i], cuda_stream, CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD); } if (!cuda_resource) { GST_ERROR_OBJECT (self, "Failed to map graphics resource %d", i); goto out; } if (!gst_cuda_result (CuGraphicsResourceGetMappedPointer (&dev_ptr, &size, cuda_resource))) { gst_cuda_graphics_resource_unmap (resources[i], cuda_stream); GST_ERROR_OBJECT (self, "Failed to get mapped pointer"); goto out; } if (data->pbo_to_cuda) { copy_params[i].srcMemoryType = CU_MEMORYTYPE_DEVICE; copy_params[i].srcDevice = dev_ptr; copy_params[i].srcPitch = GST_VIDEO_INFO_PLANE_STRIDE (data->in_info, i); } else { copy_params[i].dstMemoryType = CU_MEMORYTYPE_DEVICE; copy_params[i].dstDevice = dev_ptr; copy_params[i].dstPitch = GST_VIDEO_INFO_PLANE_STRIDE (data->out_info, i); } copy_ret = gst_cuda_result (CuMemcpy2DAsync (©_params[i], cuda_stream)); gst_cuda_graphics_resource_unmap (resources[i], cuda_stream); if (!copy_ret) { GST_ERROR_OBJECT (self, "Failed to copy plane %d", i); goto out; } } data->ret = TRUE; out: gst_cuda_result (CuStreamSynchronize (cuda_stream)); gst_cuda_memory_copy_unmap (self, cuda_buf, &cuda_frame, &cuda_map_info); } static gboolean gst_cuda_memory_copy_gl_interop (GstCudaMemoryCopy * self, GstBuffer * inbuf, GstVideoInfo * in_info, GstBuffer * outbuf, GstVideoInfo * out_info, GstGLContext * context, gboolean pbo_to_cuda, GstCudaMemoryCopyMemType cuda_mem_type) { GLCopyData data; g_assert (cuda_mem_type == GST_CUDA_MEMORY_COPY_MEM_CUDA || cuda_mem_type == GST_CUDA_MEMORY_COPY_MEM_NVMM); data.self = self; data.inbuf = inbuf; data.in_info = in_info; data.outbuf = outbuf; data.out_info = out_info; data.pbo_to_cuda = pbo_to_cuda; data.cuda_mem_type = cuda_mem_type; data.ret = FALSE; gst_gl_context_thread_add (context, (GstGLContextThreadFunc) gl_copy_thread_func, &data); return data.ret; } #endif #ifdef HAVE_NVCODEC_GST_D3D11 static GstCudaGraphicsResource * ensure_cuda_d3d11_graphics_resource (GstCudaMemoryCopy * self, GstMemory * mem) { GstCudaBaseTransform *trans = GST_CUDA_BASE_TRANSFORM (self); GQuark quark; GstCudaGraphicsResource *ret = NULL; if (!gst_is_d3d11_memory (mem)) { GST_WARNING_OBJECT (self, "memory is not D3D11 memory, %s", mem->allocator->mem_type); return NULL; } quark = gst_cuda_quark_from_id (GST_CUDA_QUARK_GRAPHICS_RESOURCE); ret = (GstCudaGraphicsResource *) gst_mini_object_get_qdata (GST_MINI_OBJECT (mem), quark); if (!ret) { ret = gst_cuda_graphics_resource_new (trans->context, GST_OBJECT (GST_D3D11_MEMORY_CAST (mem)->device), GST_CUDA_GRAPHICS_RESOURCE_D3D11_RESOURCE); if (!gst_cuda_graphics_resource_register_d3d11_resource (ret, gst_d3d11_memory_get_resource_handle (GST_D3D11_MEMORY_CAST (mem)), CU_GRAPHICS_REGISTER_FLAGS_SURFACE_LOAD_STORE)) { GST_ERROR_OBJECT (self, "failed to register d3d11 resource"); gst_cuda_graphics_resource_free (ret); return NULL; } gst_mini_object_set_qdata (GST_MINI_OBJECT (mem), quark, ret, (GDestroyNotify) gst_cuda_graphics_resource_free); } return ret; } static gboolean gst_cuda_memory_copy_d3d11_interop (GstCudaMemoryCopy * self, GstBuffer * inbuf, GstVideoInfo * in_info, GstBuffer * outbuf, GstVideoInfo * out_info, GstD3D11Device * device, gboolean d3d11_to_cuda, GstCudaMemoryCopyMemType cuda_mem_type) { GstCudaBaseTransform *trans = GST_CUDA_BASE_TRANSFORM (self); GstCudaGraphicsResource *resources[GST_VIDEO_MAX_PLANES]; D3D11_TEXTURE2D_DESC desc[GST_VIDEO_MAX_PLANES]; guint num_resources; GstBuffer *d3d11_buf, *cuda_buf; GstVideoFrame d3d11_frame, cuda_frame; GstMapInfo cuda_map_info; CUDA_MEMCPY2D copy_params[GST_VIDEO_MAX_PLANES]; CUstream cuda_stream = trans->cuda_stream; gboolean ret = FALSE; guint i; g_assert (cuda_mem_type == GST_CUDA_MEMORY_COPY_MEM_CUDA || cuda_mem_type == GST_CUDA_MEMORY_COPY_MEM_NVMM); memset (copy_params, 0, sizeof (copy_params)); memset (&cuda_frame, 0, sizeof (GstVideoFrame)); memset (&cuda_map_info, 0, sizeof (GstMapInfo)); /* Incompatible d3d11 device */ ret = gst_cuda_memory_copy_ensure_d3d11_interop (GST_CUDA_BASE_TRANSFORM (self)->context, device); if (!ret) return FALSE; if (d3d11_to_cuda) { d3d11_buf = inbuf; cuda_buf = outbuf; if (!gst_video_frame_map (&d3d11_frame, in_info, d3d11_buf, GST_MAP_READ | GST_MAP_D3D11)) { GST_ERROR_OBJECT (self, "Failed to map input D3D11 buffer"); return FALSE; } if (!gst_cuda_memory_copy_map_and_fill_copy2d (self, cuda_buf, out_info, cuda_mem_type, &cuda_frame, &cuda_map_info, FALSE, copy_params)) { GST_ERROR_OBJECT (self, "Failed to map output CUDA buffer"); gst_video_frame_unmap (&d3d11_frame); return FALSE; } } else { d3d11_buf = outbuf; cuda_buf = inbuf; if (!gst_video_frame_map (&d3d11_frame, out_info, d3d11_buf, GST_MAP_WRITE | GST_MAP_D3D11)) { GST_ERROR_OBJECT (self, "Failed to map output D3D11 buffer"); return FALSE; } if (!gst_cuda_memory_copy_map_and_fill_copy2d (self, cuda_buf, in_info, cuda_mem_type, &cuda_frame, &cuda_map_info, TRUE, copy_params)) { GST_ERROR_OBJECT (self, "Failed to map input CUDA buffer"); gst_video_frame_unmap (&d3d11_frame); return FALSE; } } num_resources = gst_buffer_n_memory (d3d11_buf); g_assert (num_resources >= GST_VIDEO_FRAME_N_PLANES (&d3d11_frame)); if (!gst_cuda_context_push (trans->context)) { GST_ERROR_OBJECT (self, "Failed to push context"); gst_video_frame_unmap (&d3d11_frame); gst_cuda_memory_copy_unmap (self, cuda_buf, &cuda_frame, &cuda_map_info); return FALSE; } for (i = 0; i < GST_VIDEO_FRAME_N_PLANES (&d3d11_frame); i++) { GstMemory *mem = gst_buffer_peek_memory (d3d11_buf, i); resources[i] = ensure_cuda_d3d11_graphics_resource (self, mem); if (!resources[i] || !gst_d3d11_memory_get_texture_desc (GST_D3D11_MEMORY_CAST (mem), &desc[i])) goto out; } for (i = 0; i < GST_VIDEO_FRAME_N_PLANES (&d3d11_frame); i++) { CUgraphicsResource cuda_resource; CUarray d3d11_array; gboolean copy_ret; if (d3d11_to_cuda) { cuda_resource = gst_cuda_graphics_resource_map (resources[i], cuda_stream, CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY); } else { cuda_resource = gst_cuda_graphics_resource_map (resources[i], cuda_stream, CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD); } if (!cuda_resource) { GST_ERROR_OBJECT (self, "Failed to map graphics resource %d", i); goto out; } if (!gst_cuda_result (CuGraphicsSubResourceGetMappedArray (&d3d11_array, cuda_resource, 0, 0))) { gst_cuda_graphics_resource_unmap (resources[i], cuda_stream); GST_ERROR_OBJECT (self, "Failed to get mapped array"); goto out; } if (d3d11_to_cuda) { copy_params[i].srcMemoryType = CU_MEMORYTYPE_ARRAY; copy_params[i].srcArray = d3d11_array; copy_params[i].srcPitch = desc[i].Width * GST_VIDEO_FRAME_COMP_PSTRIDE (&d3d11_frame, i); } else { copy_params[i].dstMemoryType = CU_MEMORYTYPE_ARRAY; copy_params[i].dstArray = d3d11_array; copy_params[i].dstPitch = desc[i].Width * GST_VIDEO_FRAME_COMP_PSTRIDE (&d3d11_frame, i); } copy_ret = gst_cuda_result (CuMemcpy2DAsync (©_params[i], cuda_stream)); gst_cuda_graphics_resource_unmap (resources[i], cuda_stream); if (!copy_ret) { GST_ERROR_OBJECT (self, "Failed to copy plane %d", i); goto out; } } ret = TRUE; out: gst_cuda_result (CuStreamSynchronize (cuda_stream)); gst_video_frame_unmap (&d3d11_frame); gst_cuda_memory_copy_unmap (self, cuda_buf, &cuda_frame, &cuda_map_info); return ret; } #endif static const gchar * mem_type_to_string (GstCudaMemoryCopyMemType type) { switch (type) { case GST_CUDA_MEMORY_COPY_MEM_SYSTEM: return "SYSTEM"; case GST_CUDA_MEMORY_COPY_MEM_CUDA: return "CUDA"; case GST_CUDA_MEMORY_COPY_MEM_NVMM: return "NVMM"; case GST_CUDA_MEMORY_COPY_MEM_GL: return "GL"; case GST_CUDA_MEMORY_COPY_MEM_D3D11: return "D3D11"; default: g_assert_not_reached (); break; } return "UNKNOWN"; } static GstFlowReturn gst_cuda_memory_copy_transform (GstBaseTransform * trans, GstBuffer * inbuf, GstBuffer * outbuf) { GstCudaMemoryCopy *self = GST_CUDA_MEMORY_COPY (trans); GstCudaBaseTransform *ctrans = GST_CUDA_BASE_TRANSFORM (trans); GstMemory *in_mem; GstMemory *out_mem; GstVideoInfo *in_info, *out_info; gboolean ret = FALSE; GstCudaMemoryCopyMemType in_type = GST_CUDA_MEMORY_COPY_MEM_SYSTEM; GstCudaMemoryCopyMemType out_type = GST_CUDA_MEMORY_COPY_MEM_SYSTEM; gboolean use_device_copy = FALSE; #ifdef HAVE_NVCODEC_GST_D3D11 D3D11_TEXTURE2D_DESC desc; #endif in_info = &ctrans->in_info; out_info = &ctrans->out_info; in_mem = gst_buffer_peek_memory (inbuf, 0); if (!in_mem) { GST_ERROR_OBJECT (self, "Empty input buffer"); return GST_FLOW_ERROR; } out_mem = gst_buffer_peek_memory (outbuf, 0); if (!out_mem) { GST_ERROR_OBJECT (self, "Empty output buffer"); return GST_FLOW_ERROR; } if (self->in_nvmm) { in_type = GST_CUDA_MEMORY_COPY_MEM_NVMM; use_device_copy = TRUE; } else if (gst_is_cuda_memory (in_mem)) { in_type = GST_CUDA_MEMORY_COPY_MEM_CUDA; use_device_copy = TRUE; #ifdef HAVE_NVCODEC_GST_GL } else if (self->gl_context && gst_is_gl_memory_pbo (in_mem)) { in_type = GST_CUDA_MEMORY_COPY_MEM_GL; #endif #ifdef HAVE_NVCODEC_GST_D3D11 } else if (self->d3d11_device && gst_is_d3d11_memory (in_mem) && gst_d3d11_memory_get_texture_desc (GST_D3D11_MEMORY_CAST (in_mem), &desc) && desc.Usage == D3D11_USAGE_DEFAULT) { in_type = GST_CUDA_MEMORY_COPY_MEM_D3D11; #endif } else { in_type = GST_CUDA_MEMORY_COPY_MEM_SYSTEM; } if (self->out_nvmm) { out_type = GST_CUDA_MEMORY_COPY_MEM_NVMM; use_device_copy = TRUE; } else if (gst_is_cuda_memory (out_mem)) { out_type = GST_CUDA_MEMORY_COPY_MEM_CUDA; use_device_copy = TRUE; #ifdef HAVE_NVCODEC_GST_GL } else if (self->gl_context && gst_is_gl_memory_pbo (out_mem)) { out_type = GST_CUDA_MEMORY_COPY_MEM_GL; #endif #ifdef HAVE_NVCODEC_GST_D3D11 } else if (self->d3d11_device && gst_is_d3d11_memory (out_mem) && gst_d3d11_memory_get_texture_desc (GST_D3D11_MEMORY_CAST (out_mem), &desc) && desc.Usage == D3D11_USAGE_DEFAULT) { out_type = GST_CUDA_MEMORY_COPY_MEM_D3D11; #endif } else { out_type = GST_CUDA_MEMORY_COPY_MEM_SYSTEM; } if (!use_device_copy) { GST_TRACE_OBJECT (self, "Both in/out buffers are not CUDA"); if (!gst_cuda_memory_copy_transform_sysmem (self, inbuf, in_info, outbuf, out_info)) { return GST_FLOW_ERROR; } return GST_FLOW_OK; } #ifdef HAVE_NVCODEC_GST_GL if (in_type == GST_CUDA_MEMORY_COPY_MEM_GL) { GstGLMemory *gl_mem = (GstGLMemory *) in_mem; GstGLContext *context = gl_mem->mem.context; GST_TRACE_OBJECT (self, "GL -> %s", mem_type_to_string (out_type)); ret = gst_cuda_memory_copy_gl_interop (self, inbuf, in_info, outbuf, out_info, context, TRUE, out_type); if (!ret) { GST_LOG_OBJECT (self, "GL interop failed, try normal CUDA copy"); /* We cannot use software fallback for NVMM */ if (out_type == GST_CUDA_MEMORY_COPY_MEM_NVMM) { ret = gst_cuda_memory_copy_transform_cuda (self, inbuf, in_info, GST_CUDA_MEMORY_COPY_MEM_SYSTEM, outbuf, out_info, out_type); } else { ret = !gst_cuda_memory_copy_transform_sysmem (self, inbuf, in_info, outbuf, out_info); } } if (!ret) return GST_FLOW_ERROR; return GST_FLOW_OK; } if (out_type == GST_CUDA_MEMORY_COPY_MEM_GL) { GstGLMemory *gl_mem = (GstGLMemory *) out_mem; GstGLContext *context = gl_mem->mem.context; GST_TRACE_OBJECT (self, "%s -> GL", mem_type_to_string (in_type)); ret = gst_cuda_memory_copy_gl_interop (self, inbuf, in_info, outbuf, out_info, context, FALSE, in_type); if (!ret) { GST_LOG_OBJECT (self, "GL interop failed, try normal CUDA copy"); /* We cannot use software fallback for NVMM */ if (in_type == GST_CUDA_MEMORY_COPY_MEM_NVMM) { ret = gst_cuda_memory_copy_transform_cuda (self, inbuf, in_info, in_type, outbuf, out_info, GST_CUDA_MEMORY_COPY_MEM_SYSTEM); } else { ret = !gst_cuda_memory_copy_transform_sysmem (self, inbuf, in_info, outbuf, out_info); } } if (!ret) return GST_FLOW_ERROR; return GST_FLOW_OK; } #endif /* HAVE_NVCODEC_GST_GL */ #ifdef HAVE_NVCODEC_GST_D3D11 if (in_type == GST_CUDA_MEMORY_COPY_MEM_D3D11) { GstD3D11Memory *dmem = (GstD3D11Memory *) in_mem; GstD3D11Device *device = dmem->device; GST_TRACE_OBJECT (self, "D3D11 -> %s", mem_type_to_string (out_type)); gst_d3d11_device_lock (device); ret = gst_cuda_memory_copy_d3d11_interop (self, inbuf, in_info, outbuf, out_info, device, TRUE, out_type); gst_d3d11_device_unlock (device); if (!ret) { GST_LOG_OBJECT (self, "D3D11 interop failed, try normal CUDA copy"); ret = !gst_cuda_memory_copy_transform_sysmem (self, inbuf, in_info, outbuf, out_info); } if (!ret) return GST_FLOW_ERROR; return GST_FLOW_OK; } if (out_type == GST_CUDA_MEMORY_COPY_MEM_D3D11) { GstD3D11Memory *dmem = (GstD3D11Memory *) out_mem; GstD3D11Device *device = dmem->device; GST_TRACE_OBJECT (self, "%s -> D3D11", mem_type_to_string (in_type)); gst_d3d11_device_lock (device); ret = gst_cuda_memory_copy_d3d11_interop (self, inbuf, in_info, outbuf, out_info, device, FALSE, in_type); gst_d3d11_device_unlock (device); if (!ret) { GST_LOG_OBJECT (self, "D3D11 interop failed, try normal CUDA copy"); ret = !gst_cuda_memory_copy_transform_sysmem (self, inbuf, in_info, outbuf, out_info); } if (!ret) return GST_FLOW_ERROR; return GST_FLOW_OK; } #endif /* HAVE_NVCODEC_GST_D3D11 */ GST_TRACE_OBJECT (self, "%s -> %s", mem_type_to_string (in_type), mem_type_to_string (out_type)); ret = gst_cuda_memory_copy_transform_cuda (self, inbuf, in_info, in_type, outbuf, out_info, out_type); if (!ret && !self->in_nvmm && !self->out_nvmm) { GST_LOG_OBJECT (self, "Failed to copy using fast path, trying fallback"); ret = gst_cuda_memory_copy_transform_sysmem (self, inbuf, in_info, outbuf, out_info); } if (ret) return GST_FLOW_OK; return GST_FLOW_ERROR; } static void gst_cuda_upload_class_init (GstCudaUploadClass * klass, gpointer data) { GstElementClass *element_class = GST_ELEMENT_CLASS (klass); GstBaseTransformClass *trans_class = GST_BASE_TRANSFORM_CLASS (klass); GstCudaMemoryCopyClass *copy_class = GST_CUDA_MEMORY_COPY_CLASS (klass); GstCudaMemoryCopyClassData *cdata = (GstCudaMemoryCopyClassData *) data; gst_element_class_add_pad_template (element_class, gst_pad_template_new ("sink", GST_PAD_SINK, GST_PAD_ALWAYS, cdata->sink_caps)); gst_element_class_add_pad_template (element_class, gst_pad_template_new ("src", GST_PAD_SRC, GST_PAD_ALWAYS, cdata->src_caps)); gst_element_class_set_static_metadata (element_class, "CUDA uploader", "Filter/Video", "Uploads data into NVIDA GPU via CUDA APIs", "Seungha Yang "); trans_class->transform = GST_DEBUG_FUNCPTR (gst_cuda_memory_copy_transform); copy_class->uploader = TRUE; gst_caps_unref (cdata->sink_caps); gst_caps_unref (cdata->src_caps); g_free (cdata); } static void gst_cuda_upload_init (GstCudaUpload * self) { } static void gst_cuda_download_class_init (GstCudaDownloadClass * klass, gpointer data) { GstElementClass *element_class = GST_ELEMENT_CLASS (klass); GstBaseTransformClass *trans_class = GST_BASE_TRANSFORM_CLASS (klass); GstCudaMemoryCopyClass *copy_class = GST_CUDA_MEMORY_COPY_CLASS (klass); GstCudaMemoryCopyClassData *cdata = (GstCudaMemoryCopyClassData *) data; gst_element_class_add_pad_template (element_class, gst_pad_template_new ("sink", GST_PAD_SINK, GST_PAD_ALWAYS, cdata->sink_caps)); gst_element_class_add_pad_template (element_class, gst_pad_template_new ("src", GST_PAD_SRC, GST_PAD_ALWAYS, cdata->src_caps)); gst_element_class_set_static_metadata (element_class, "CUDA downloader", "Filter/Video", "Downloads data from NVIDA GPU via CUDA APIs", "Seungha Yang "); trans_class->transform = GST_DEBUG_FUNCPTR (gst_cuda_memory_copy_transform); copy_class->uploader = FALSE; gst_caps_unref (cdata->sink_caps); gst_caps_unref (cdata->src_caps); g_free (cdata); } static void gst_cuda_download_init (GstCudaDownload * self) { } void gst_cuda_memory_copy_register (GstPlugin * plugin, guint rank) { GType upload_type, download_type; GTypeInfo upload_type_info = { sizeof (GstCudaUploadClass), NULL, NULL, (GClassInitFunc) gst_cuda_upload_class_init, NULL, NULL, sizeof (GstCudaUpload), 0, (GInstanceInitFunc) gst_cuda_upload_init, }; GTypeInfo download_type_info = { sizeof (GstCudaDownloadClass), NULL, NULL, (GClassInitFunc) gst_cuda_download_class_init, NULL, NULL, sizeof (GstCudaDownload), 0, (GInstanceInitFunc) gst_cuda_download_init, }; GstCaps *sys_caps; GstCaps *cuda_caps; #ifdef HAVE_NVCODEC_NVMM GstCaps *nvmm_caps = NULL; #endif #ifdef HAVE_NVCODEC_GST_GL GstCaps *gl_caps; #endif #ifdef HAVE_NVCODEC_GST_D3D11 GstCaps *d3d11_caps; #endif GstCaps *upload_sink_caps; GstCaps *upload_src_caps; GstCaps *download_sink_caps; GstCaps *download_src_caps; GstCudaMemoryCopyClassData *upload_cdata; GstCudaMemoryCopyClassData *download_cdata; GST_DEBUG_CATEGORY_INIT (gst_cuda_memory_copy_debug, "cudamemorycopy", 0, "cudamemorycopy"); sys_caps = gst_caps_from_string (GST_VIDEO_CAPS_MAKE (GST_CUDA_FORMATS)); cuda_caps = gst_caps_from_string (GST_VIDEO_CAPS_MAKE_WITH_FEATURES (GST_CAPS_FEATURE_MEMORY_CUDA_MEMORY, GST_CUDA_FORMATS)); #ifdef HAVE_NVCODEC_NVMM if (gst_cuda_nvmm_init_once ()) { nvmm_caps = gst_caps_from_string (GST_VIDEO_CAPS_MAKE_WITH_FEATURES (GST_CAPS_FEATURE_MEMORY_CUDA_NVMM_MEMORY, GST_CUDA_NVMM_FORMATS)); } #endif #ifdef HAVE_NVCODEC_GST_GL gl_caps = gst_caps_from_string (GST_VIDEO_CAPS_MAKE_WITH_FEATURES (GST_CAPS_FEATURE_MEMORY_GL_MEMORY, GST_CUDA_GL_FORMATS)); #endif #ifdef HAVE_NVCODEC_GST_D3D11 d3d11_caps = gst_caps_from_string (GST_VIDEO_CAPS_MAKE_WITH_FEATURES (GST_CAPS_FEATURE_MEMORY_D3D11_MEMORY, GST_CUDA_D3D11_FORMATS)); #endif upload_sink_caps = gst_caps_copy (sys_caps); #ifdef HAVE_NVCODEC_GST_GL upload_sink_caps = gst_caps_merge (upload_sink_caps, gst_caps_copy (gl_caps)); #endif #ifdef HAVE_NVCODEC_GST_D3D11 upload_sink_caps = gst_caps_merge (upload_sink_caps, gst_caps_copy (d3d11_caps)); #endif #ifdef HAVE_NVCODEC_NVMM if (nvmm_caps) { upload_sink_caps = gst_caps_merge (upload_sink_caps, gst_caps_copy (nvmm_caps)); } #endif upload_sink_caps = gst_caps_merge (upload_sink_caps, gst_caps_copy (cuda_caps)); upload_src_caps = gst_caps_copy (cuda_caps); #ifdef HAVE_NVCODEC_NVMM if (nvmm_caps) { upload_src_caps = gst_caps_merge (upload_src_caps, gst_caps_copy (nvmm_caps)); } #endif upload_src_caps = gst_caps_merge (upload_src_caps, gst_caps_copy (sys_caps)); download_sink_caps = gst_caps_copy (cuda_caps); #ifdef HAVE_NVCODEC_NVMM if (nvmm_caps) { download_sink_caps = gst_caps_merge (download_sink_caps, gst_caps_copy (nvmm_caps)); } #endif download_sink_caps = gst_caps_merge (download_sink_caps, gst_caps_copy (sys_caps)); download_src_caps = sys_caps; #ifdef HAVE_NVCODEC_GST_GL download_src_caps = gst_caps_merge (download_src_caps, gl_caps); #endif #ifdef HAVE_NVCODEC_GST_D3D11 download_src_caps = gst_caps_merge (download_src_caps, d3d11_caps); #endif #ifdef HAVE_NVCODEC_NVMM if (nvmm_caps) { download_src_caps = gst_caps_merge (download_src_caps, nvmm_caps); } #endif download_src_caps = gst_caps_merge (download_src_caps, cuda_caps); GST_MINI_OBJECT_FLAG_SET (upload_sink_caps, GST_MINI_OBJECT_FLAG_MAY_BE_LEAKED); GST_MINI_OBJECT_FLAG_SET (upload_src_caps, GST_MINI_OBJECT_FLAG_MAY_BE_LEAKED); GST_MINI_OBJECT_FLAG_SET (download_sink_caps, GST_MINI_OBJECT_FLAG_MAY_BE_LEAKED); GST_MINI_OBJECT_FLAG_SET (download_src_caps, GST_MINI_OBJECT_FLAG_MAY_BE_LEAKED); upload_cdata = g_new0 (GstCudaMemoryCopyClassData, 1); upload_cdata->sink_caps = upload_sink_caps; upload_cdata->src_caps = upload_src_caps; upload_type_info.class_data = upload_cdata; download_cdata = g_new0 (GstCudaMemoryCopyClassData, 1); download_cdata->sink_caps = download_sink_caps; download_cdata->src_caps = download_src_caps; download_type_info.class_data = download_cdata; upload_type = g_type_register_static (GST_TYPE_CUDA_MEMORY_COPY, "GstCudaUpload", &upload_type_info, 0); download_type = g_type_register_static (GST_TYPE_CUDA_MEMORY_COPY, "GstCudaDownload", &download_type_info, 0); if (!gst_element_register (plugin, "cudaupload", rank, upload_type)) GST_WARNING ("Failed to register cudaupload element"); if (!gst_element_register (plugin, "cudadownload", rank, download_type)) GST_WARNING ("Failed to register cudadownload element"); }