gstreamer/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcudautils.c
Seungha Yang 219bb769a2 cuda: Remove GST_CUDA_HAS_D3D define from header
... and fix d3d11 specific enum type name

GST_CUDA_HAS_D3D is a build time define which indicates whether
GstD3D11 library is available or not, but DirectX SDK headers
must be available on the build system already.

Expose Direct3D related symbols if the build target is Windows
(i.e., if G_OS_WIN32 is defined)

Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/3018>
2022-09-13 16:40:51 +00:00

1621 lines
47 KiB
C

/* GStreamer
* Copyright (C) <2018-2019> Seungha Yang <seungha.yang@navercorp.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "gstcudautils.h"
#include "gstcudacontext.h"
#ifdef HAVE_NVCODEC_GST_GL
#include <gst/gl/gl.h>
#include <gst/gl/gstglfuncs.h>
#endif
#ifdef GST_CUDA_HAS_D3D
#include <gst/d3d11/gstd3d11.h>
#endif
#ifdef HAVE_NVCODEC_NVMM
#include "gstcudanvmm.h"
#endif
#include "gstcudamemory.h"
GST_DEBUG_CATEGORY_STATIC (gst_cuda_utils_debug);
#define GST_CAT_DEFAULT gst_cuda_utils_debug
GST_DEBUG_CATEGORY_STATIC (GST_CAT_CONTEXT);
static void
_init_debug (void)
{
static gsize once_init = 0;
if (g_once_init_enter (&once_init)) {
GST_DEBUG_CATEGORY_INIT (gst_cuda_utils_debug, "cudautils", 0,
"CUDA utils");
GST_DEBUG_CATEGORY_GET (GST_CAT_CONTEXT, "GST_CONTEXT");
g_once_init_leave (&once_init, 1);
}
}
static gboolean
pad_query (const GValue * item, GValue * value, gpointer user_data)
{
GstPad *pad = g_value_get_object (item);
GstQuery *query = user_data;
gboolean res;
res = gst_pad_peer_query (pad, query);
if (res) {
g_value_set_boolean (value, TRUE);
return FALSE;
}
GST_CAT_INFO_OBJECT (GST_CAT_CONTEXT, pad, "pad peer query failed");
return TRUE;
}
static gboolean
run_query (GstElement * element, GstQuery * query, GstPadDirection direction)
{
GstIterator *it;
GstIteratorFoldFunction func = pad_query;
GValue res = { 0 };
g_value_init (&res, G_TYPE_BOOLEAN);
g_value_set_boolean (&res, FALSE);
/* Ask neighbor */
if (direction == GST_PAD_SRC)
it = gst_element_iterate_src_pads (element);
else
it = gst_element_iterate_sink_pads (element);
while (gst_iterator_fold (it, func, &res, query) == GST_ITERATOR_RESYNC)
gst_iterator_resync (it);
gst_iterator_free (it);
return g_value_get_boolean (&res);
}
static void
find_cuda_context (GstElement * element, GstCudaContext ** cuda_ctx)
{
GstQuery *query;
GstContext *ctxt;
/* 1) Query downstream with GST_QUERY_CONTEXT for the context and
* check if upstream already has a context of the specific type
* 2) Query upstream as above.
*/
query = gst_query_new_context (GST_CUDA_CONTEXT_TYPE);
if (run_query (element, query, GST_PAD_SRC)) {
gst_query_parse_context (query, &ctxt);
if (ctxt) {
GST_CAT_INFO_OBJECT (GST_CAT_CONTEXT, element,
"found context (%p) in downstream query", ctxt);
gst_element_set_context (element, ctxt);
}
}
/* although we found cuda context above, the element does not want
* to use the context. Then try to find from the other direction */
if (*cuda_ctx == NULL && run_query (element, query, GST_PAD_SINK)) {
gst_query_parse_context (query, &ctxt);
if (ctxt) {
GST_CAT_INFO_OBJECT (GST_CAT_CONTEXT, element,
"found context (%p) in upstream query", ctxt);
gst_element_set_context (element, ctxt);
}
}
if (*cuda_ctx == NULL) {
/* 3) Post a GST_MESSAGE_NEED_CONTEXT message on the bus with
* the required context type and afterwards check if a
* usable context was set now. The message could
* be handled by the parent bins of the element and the
* application.
*/
GstMessage *msg;
GST_CAT_INFO_OBJECT (GST_CAT_CONTEXT, element,
"posting need context message");
msg = gst_message_new_need_context (GST_OBJECT_CAST (element),
GST_CUDA_CONTEXT_TYPE);
gst_element_post_message (element, msg);
}
/*
* Whomever responds to the need-context message performs a
* GstElement::set_context() with the required context in which the element
* is required to update the cuda_ctx or call gst_cuda_handle_set_context().
*/
gst_query_unref (query);
}
static void
context_set_cuda_context (GstContext * context, GstCudaContext * cuda_ctx)
{
GstStructure *s;
guint device_id;
g_return_if_fail (context != NULL);
g_object_get (G_OBJECT (cuda_ctx), "cuda-device-id", &device_id, NULL);
GST_CAT_LOG (GST_CAT_CONTEXT,
"setting GstCudaContext(%" GST_PTR_FORMAT
") with cuda-device-id %d on context(%" GST_PTR_FORMAT ")",
cuda_ctx, device_id, context);
s = gst_context_writable_structure (context);
gst_structure_set (s, GST_CUDA_CONTEXT_TYPE, GST_TYPE_CUDA_CONTEXT,
cuda_ctx, "cuda-device-id", G_TYPE_UINT, device_id, NULL);
}
/**
* gst_cuda_ensure_element_context:
* @element: the #GstElement running the query
* @device_id: preferred device-id, pass device_id >=0 when
* the device_id explicitly required. Otherwise, set -1.
* @cuda_ctx: (inout): the resulting #GstCudaContext
*
* Perform the steps necessary for retrieving a #GstCudaContext from the
* surrounding elements or from the application using the #GstContext mechanism.
*
* If the content of @cuda_ctx is not %NULL, then no #GstContext query is
* necessary for #GstCudaContext.
*
* Returns: whether a #GstCudaContext exists in @cuda_ctx
*
* Since: 1.22
*/
gboolean
gst_cuda_ensure_element_context (GstElement * element, gint device_id,
GstCudaContext ** cuda_ctx)
{
guint target_device_id = 0;
gboolean ret = TRUE;
static GRecMutex lock;
static gsize init_lock_once = 0;
g_return_val_if_fail (element != NULL, FALSE);
g_return_val_if_fail (cuda_ctx != NULL, FALSE);
_init_debug ();
if (g_once_init_enter (&init_lock_once)) {
g_rec_mutex_init (&lock);
g_once_init_leave (&init_lock_once, 1);
}
g_rec_mutex_lock (&lock);
if (*cuda_ctx)
goto out;
find_cuda_context (element, cuda_ctx);
if (*cuda_ctx)
goto out;
if (device_id > 0)
target_device_id = device_id;
/* No available CUDA context in pipeline, create new one here */
*cuda_ctx = gst_cuda_context_new (target_device_id);
if (*cuda_ctx == NULL) {
GST_CAT_ERROR_OBJECT (GST_CAT_CONTEXT, element,
"Failed to create CUDA context with device-id %d", device_id);
ret = FALSE;
} else {
GstContext *context;
GstMessage *msg;
/* Propagate new CUDA context */
context = gst_context_new (GST_CUDA_CONTEXT_TYPE, TRUE);
context_set_cuda_context (context, *cuda_ctx);
gst_element_set_context (element, context);
GST_CAT_INFO_OBJECT (GST_CAT_CONTEXT, element,
"posting have context (%p) message with CUDA context (%p)",
context, *cuda_ctx);
msg = gst_message_new_have_context (GST_OBJECT_CAST (element), context);
gst_element_post_message (GST_ELEMENT_CAST (element), msg);
}
out:
g_rec_mutex_unlock (&lock);
return ret;
}
/**
* gst_cuda_handle_set_context:
* @element: a #GstElement
* @context: a #GstContext
* @device_id: preferred device-id, pass device_id >=0 when
* the device_id explicitly required. Otherwise, set -1.
* @cuda_ctx: (inout) (transfer full): location of a #GstCudaContext
*
* Helper function for implementing #GstElementClass.set_context() in
* CUDA capable elements.
*
* Retrieves the #GstCudaContext in @context and places the result in @cuda_ctx.
*
* Returns: whether the @cuda_ctx could be set successfully
*
* Since: 1.22
*/
gboolean
gst_cuda_handle_set_context (GstElement * element,
GstContext * context, gint device_id, GstCudaContext ** cuda_ctx)
{
const gchar *context_type;
g_return_val_if_fail (element != NULL, FALSE);
g_return_val_if_fail (cuda_ctx != NULL, FALSE);
_init_debug ();
if (!context)
return FALSE;
context_type = gst_context_get_context_type (context);
if (g_strcmp0 (context_type, GST_CUDA_CONTEXT_TYPE) == 0) {
const GstStructure *str;
GstCudaContext *other_ctx = NULL;
guint other_device_id = 0;
/* If we had context already, will not replace it */
if (*cuda_ctx)
return TRUE;
str = gst_context_get_structure (context);
if (gst_structure_get (str, GST_CUDA_CONTEXT_TYPE, GST_TYPE_CUDA_CONTEXT,
&other_ctx, NULL)) {
g_object_get (other_ctx, "cuda-device-id", &other_device_id, NULL);
if (device_id == -1 || other_device_id == device_id) {
GST_CAT_DEBUG_OBJECT (GST_CAT_CONTEXT, element, "Found CUDA context");
*cuda_ctx = other_ctx;
return TRUE;
}
gst_object_unref (other_ctx);
}
}
return FALSE;
}
/**
* gst_cuda_handle_context_query:
* @element: a #GstElement
* @query: a #GstQuery of type %GST_QUERY_CONTEXT
* @cuda_ctx: (transfer none) (nullable): a #GstCudaContext
*
* Returns: Whether the @query was successfully responded to from the passed
* @context.
*
* Since: 1.22
*/
gboolean
gst_cuda_handle_context_query (GstElement * element,
GstQuery * query, GstCudaContext * cuda_ctx)
{
const gchar *context_type;
GstContext *context, *old_context;
g_return_val_if_fail (GST_IS_ELEMENT (element), FALSE);
g_return_val_if_fail (GST_IS_QUERY (query), FALSE);
g_return_val_if_fail (cuda_ctx == NULL
|| GST_IS_CUDA_CONTEXT (cuda_ctx), FALSE);
_init_debug ();
GST_CAT_LOG_OBJECT (GST_CAT_CONTEXT, element,
"handle context query %" GST_PTR_FORMAT, query);
gst_query_parse_context_type (query, &context_type);
if (cuda_ctx && g_strcmp0 (context_type, GST_CUDA_CONTEXT_TYPE) == 0) {
gst_query_parse_context (query, &old_context);
if (old_context)
context = gst_context_copy (old_context);
else
context = gst_context_new (GST_CUDA_CONTEXT_TYPE, TRUE);
context_set_cuda_context (context, cuda_ctx);
gst_query_set_context (query, context);
gst_context_unref (context);
GST_CAT_DEBUG_OBJECT (GST_CAT_CONTEXT, element,
"successfully set %" GST_PTR_FORMAT " on %" GST_PTR_FORMAT, cuda_ctx,
query);
return TRUE;
}
return FALSE;
}
/**
* gst_context_new_cuda_context:
* @cuda_ctx: (transfer none): a #GstCudaContext
*
* Returns: (transfer full) (nullable): a new #GstContext embedding the @cuda_ctx
* or %NULL
*
* Since: 1.22
*/
GstContext *
gst_context_new_cuda_context (GstCudaContext * cuda_ctx)
{
GstContext *context;
g_return_val_if_fail (GST_IS_CUDA_CONTEXT (cuda_ctx), NULL);
_init_debug ();
context = gst_context_new (GST_CUDA_CONTEXT_TYPE, TRUE);
context_set_cuda_context (context, cuda_ctx);
return context;
}
static const gchar *gst_cuda_quark_strings[] =
{ "GstCudaQuarkGraphicsResource" };
static GQuark gst_cuda_quark_table[GST_CUDA_QUARK_MAX];
static void
init_cuda_quark_once (void)
{
static gsize once_init = 0;
if (g_once_init_enter (&once_init)) {
gint i;
for (i = 0; i < GST_CUDA_QUARK_MAX; i++)
gst_cuda_quark_table[i] =
g_quark_from_static_string (gst_cuda_quark_strings[i]);
g_once_init_leave (&once_init, 1);
}
}
/**
* gst_cuda_quark_from_id: (skip)
* @id: a #GstCudaQuarkId
*
* Returns: the GQuark for given @id or 0 if @id is unknown value
*
* Since: 1.22
*/
GQuark
gst_cuda_quark_from_id (GstCudaQuarkId id)
{
g_return_val_if_fail (id < GST_CUDA_QUARK_MAX, 0);
init_cuda_quark_once ();
_init_debug ();
return gst_cuda_quark_table[id];
}
/**
* gst_cuda_graphics_resource_new: (skip)
* @context: (transfer none): a #GstCudaContext
* @graphics_context: (transfer none) (nullable): a graphics API specific context object
* @type: a #GstCudaGraphicsResourceType of resource registration
*
* Create new #GstCudaGraphicsResource with given @context and @type
*
* Returns: a new #GstCudaGraphicsResource.
* Free with gst_cuda_graphics_resource_free
*
* Since: 1.22
*/
GstCudaGraphicsResource *
gst_cuda_graphics_resource_new (GstCudaContext *
context, GstObject * graphics_context, GstCudaGraphicsResourceType type)
{
GstCudaGraphicsResource *resource;
g_return_val_if_fail (GST_IS_CUDA_CONTEXT (context), NULL);
_init_debug ();
resource = g_new0 (GstCudaGraphicsResource, 1);
resource->cuda_context = gst_object_ref (context);
if (graphics_context)
resource->graphics_context = gst_object_ref (graphics_context);
return resource;
}
/**
* gst_cuda_graphics_resource_register_gl_buffer: (skip)
* @resource a #GstCudaGraphicsResource
* @buffer: a GL buffer object
* @flags: a `CUgraphicsRegisterFlags`
*
* Register the @buffer for access by CUDA.
* Must be called from the gl context thread with current cuda context was
* pushed on the current thread
*
* Returns: whether @buffer was registered or not
*
* Since: 1.22
*/
gboolean
gst_cuda_graphics_resource_register_gl_buffer (GstCudaGraphicsResource *
resource, guint buffer, CUgraphicsRegisterFlags flags)
{
CUresult cuda_ret;
g_return_val_if_fail (resource != NULL, FALSE);
g_return_val_if_fail (resource->registered == FALSE, FALSE);
_init_debug ();
cuda_ret = CuGraphicsGLRegisterBuffer (&resource->resource, buffer, flags);
if (!gst_cuda_result (cuda_ret))
return FALSE;
resource->registered = TRUE;
resource->type = GST_CUDA_GRAPHICS_RESOURCE_GL_BUFFER;
resource->flags = flags;
return TRUE;
}
#ifdef G_OS_WIN32
/**
* gst_cuda_graphics_resource_register_d3d11_resource: (skip)
* @resource a #GstCudaGraphicsResource
* @d3d11_resource: a ID3D11Resource
* @flags: a CUgraphicsRegisterFlags
*
* Register the @d3d11_resource for accessing by CUDA.
* Must be called with d3d11 device lock with current cuda context was
* pushed on the current thread
*
* Returns: whether @d3d11_resource was registered or not
*
* Since: 1.22
*/
gboolean
gst_cuda_graphics_resource_register_d3d11_resource (GstCudaGraphicsResource *
resource, ID3D11Resource * d3d11_resource, CUgraphicsRegisterFlags flags)
{
CUresult cuda_ret;
g_return_val_if_fail (resource != NULL, FALSE);
g_return_val_if_fail (resource->registered == FALSE, FALSE);
_init_debug ();
cuda_ret = CuGraphicsD3D11RegisterResource (&resource->resource,
d3d11_resource, flags);
if (!gst_cuda_result (cuda_ret))
return FALSE;
resource->registered = TRUE;
resource->type = GST_CUDA_GRAPHICS_RESOURCE_D3D11_RESOURCE;
resource->flags = flags;
return TRUE;
}
#endif
/**
* gst_cuda_graphics_resource_unregister: (skip)
* @resource: a #GstCudaGraphicsResource
*
* Unregister previously registered resource.
* For GL resource, this method must be called from gl context thread.
* Also, current cuda context should be pushed on the current thread
* before calling this method.
*
* Since: 1.22
*/
void
gst_cuda_graphics_resource_unregister (GstCudaGraphicsResource * resource)
{
g_return_if_fail (resource != NULL);
_init_debug ();
if (!resource->registered)
return;
gst_cuda_result (CuGraphicsUnregisterResource (resource->resource));
resource->resource = NULL;
resource->registered = FALSE;
return;
}
/**
* gst_cuda_graphics_resource_map: (skip)
* @resource: a #GstCudaGraphicsResource
* @stream: a CUstream
* @flags: a CUgraphicsMapResourceFlags
*
* Map previously registered resource with map flags
*
* Returns: the `CUgraphicsResource` if successful or %NULL when failed
*
* Since: 1.22
*/
CUgraphicsResource
gst_cuda_graphics_resource_map (GstCudaGraphicsResource * resource,
CUstream stream, CUgraphicsMapResourceFlags flags)
{
CUresult cuda_ret;
g_return_val_if_fail (resource != NULL, NULL);
g_return_val_if_fail (resource->registered != FALSE, NULL);
_init_debug ();
cuda_ret = CuGraphicsResourceSetMapFlags (resource->resource, flags);
if (!gst_cuda_result (cuda_ret))
return NULL;
cuda_ret = CuGraphicsMapResources (1, &resource->resource, stream);
if (!gst_cuda_result (cuda_ret))
return NULL;
resource->mapped = TRUE;
return resource->resource;
}
/**
* gst_cuda_graphics_resource_unmap: (skip)
* @resource: a #GstCudaGraphicsResource
* @stream: a `CUstream`
*
* Unmap previously mapped resource
*
* Since: 1.22
*/
void
gst_cuda_graphics_resource_unmap (GstCudaGraphicsResource * resource,
CUstream stream)
{
g_return_if_fail (resource != NULL);
g_return_if_fail (resource->registered != FALSE);
_init_debug ();
if (!resource->mapped)
return;
gst_cuda_result (CuGraphicsUnmapResources (1, &resource->resource, stream));
resource->mapped = FALSE;
}
#ifdef HAVE_NVCODEC_GST_GL
static void
unregister_resource_from_gl_thread (GstGLContext * gl_context,
GstCudaGraphicsResource * resource)
{
GstCudaContext *cuda_context = resource->cuda_context;
if (!gst_cuda_context_push (cuda_context)) {
GST_WARNING_OBJECT (cuda_context, "failed to push CUDA context");
return;
}
gst_cuda_graphics_resource_unregister (resource);
if (!gst_cuda_context_pop (NULL)) {
GST_WARNING_OBJECT (cuda_context, "failed to pop CUDA context");
}
}
#endif
#ifdef GST_CUDA_HAS_D3D
static void
unregister_d3d11_resource (GstCudaGraphicsResource * resource)
{
GstCudaContext *cuda_context = resource->cuda_context;
GstD3D11Device *device = GST_D3D11_DEVICE (resource->graphics_context);
if (!gst_cuda_context_push (cuda_context)) {
GST_WARNING_OBJECT (cuda_context, "failed to push CUDA context");
return;
}
gst_d3d11_device_lock (device);
gst_cuda_graphics_resource_unregister (resource);
gst_d3d11_device_unlock (device);
if (!gst_cuda_context_pop (NULL)) {
GST_WARNING_OBJECT (cuda_context, "failed to pop CUDA context");
}
}
#endif
/**
* gst_cuda_graphics_resource_free: (skip)
* @resource: a #GstCudaGraphicsResource
*
* Free @resource
*
* Since: 1.22
*/
void
gst_cuda_graphics_resource_free (GstCudaGraphicsResource * resource)
{
g_return_if_fail (resource != NULL);
if (resource->registered) {
#ifdef HAVE_NVCODEC_GST_GL
if (resource->type == GST_CUDA_GRAPHICS_RESOURCE_GL_BUFFER) {
gst_gl_context_thread_add ((GstGLContext *) resource->graphics_context,
(GstGLContextThreadFunc) unregister_resource_from_gl_thread,
resource);
} else
#endif
#ifdef GST_CUDA_HAS_D3D
if (resource->type == GST_CUDA_GRAPHICS_RESOURCE_D3D11_RESOURCE) {
unregister_d3d11_resource (resource);
} else
#endif
{
/* FIXME: currently only opengl & d3d11 */
g_assert_not_reached ();
}
}
gst_object_unref (resource->cuda_context);
if (resource->graphics_context)
gst_object_unref (resource->graphics_context);
g_free (resource);
}
const gchar *
gst_cuda_buffer_copy_type_to_string (GstCudaBufferCopyType type)
{
switch (type) {
case GST_CUDA_BUFFER_COPY_SYSTEM:
return "SYSTEM";
case GST_CUDA_BUFFER_COPY_CUDA:
return "CUDA";
case GST_CUDA_BUFFER_COPY_GL:
return "GL";
case GST_CUDA_BUFFER_COPY_D3D11:
return "D3D11";
case GST_CUDA_BUFFER_COPY_NVMM:
return "NVMM";
default:
g_assert_not_reached ();
break;
}
return "UNKNOWN";
}
static gboolean
gst_cuda_buffer_fallback_copy (GstBuffer * dst, const GstVideoInfo * dst_info,
GstBuffer * src, const GstVideoInfo * src_info)
{
GstVideoFrame dst_frame, src_frame;
guint i, j;
if (!gst_video_frame_map (&dst_frame, dst_info, dst, GST_MAP_WRITE)) {
GST_ERROR ("Failed to map dst buffer");
return FALSE;
}
if (!gst_video_frame_map (&src_frame, src_info, src, GST_MAP_READ)) {
gst_video_frame_unmap (&dst_frame);
GST_ERROR ("Failed to map src buffer");
return FALSE;
}
/* src and dst resolutions can be different, pick min value */
for (i = 0; i < GST_VIDEO_FRAME_N_PLANES (&dst_frame); i++) {
guint dst_width_in_bytes, src_width_in_bytes;
guint dst_height, src_height;
guint width_in_bytes, height;
guint dst_stride, src_stride;
guint8 *dst_data, *src_data;
dst_width_in_bytes = GST_VIDEO_FRAME_COMP_WIDTH (&dst_frame, i) *
GST_VIDEO_FRAME_COMP_PSTRIDE (&dst_frame, i);
src_width_in_bytes = GST_VIDEO_FRAME_COMP_WIDTH (&src_frame, i) *
GST_VIDEO_FRAME_COMP_PSTRIDE (&src_frame, i);
width_in_bytes = MIN (dst_width_in_bytes, src_width_in_bytes);
dst_height = GST_VIDEO_FRAME_COMP_HEIGHT (&dst_frame, i);
src_height = GST_VIDEO_FRAME_COMP_HEIGHT (&src_frame, i);
height = MIN (dst_height, src_height);
dst_stride = GST_VIDEO_FRAME_PLANE_STRIDE (&dst_frame, i);
src_stride = GST_VIDEO_FRAME_PLANE_STRIDE (&src_frame, i);
dst_data = GST_VIDEO_FRAME_PLANE_DATA (&dst_frame, i);
src_data = GST_VIDEO_FRAME_PLANE_DATA (&src_frame, i);
for (j = 0; j < height; j++) {
memcpy (dst_data, src_data, width_in_bytes);
dst_data += dst_stride;
src_data += src_stride;
}
}
gst_video_frame_unmap (&src_frame);
gst_video_frame_unmap (&dst_frame);
return TRUE;
}
static gboolean
map_buffer_and_fill_copy2d (GstBuffer * buf, const GstVideoInfo * info,
GstCudaBufferCopyType copy_type, GstVideoFrame * frame,
GstMapInfo * map_info, gboolean is_src,
CUDA_MEMCPY2D copy_params[GST_VIDEO_MAX_PLANES])
{
gboolean buffer_mapped = FALSE;
guint i;
#ifdef HAVE_NVCODEC_NVMM
if (copy_type == GST_CUDA_BUFFER_COPY_NVMM) {
NvBufSurface *surface;
NvBufSurfaceParams *surface_params;
NvBufSurfacePlaneParams *plane_params;
if (!gst_buffer_map (buf, map_info, GST_MAP_READ)) {
GST_ERROR ("Failed to map input NVMM buffer");
memset (map_info, 0, sizeof (GstMapInfo));
return FALSE;
}
surface = (NvBufSurface *) map_info->data;
GST_TRACE ("batch-size %d, num-filled %d, memType %d",
surface->batchSize, surface->numFilled, surface->memType);
surface_params = surface->surfaceList;
buffer_mapped = TRUE;
if (!surface_params) {
GST_ERROR ("NVMM memory doesn't hold buffer");
goto error;
}
plane_params = &surface_params->planeParams;
if (plane_params->num_planes != GST_VIDEO_INFO_N_PLANES (info)) {
GST_ERROR ("num_planes mismatch, %d / %d",
plane_params->num_planes, GST_VIDEO_INFO_N_PLANES (info));
goto error;
}
switch (surface->memType) {
/* TODO: NVBUF_MEM_DEFAULT on jetson is SURFACE_ARRAY */
case NVBUF_MEM_DEFAULT:
case NVBUF_MEM_CUDA_DEVICE:
{
for (i = 0; i < plane_params->num_planes; i++) {
if (is_src) {
copy_params[i].srcMemoryType = CU_MEMORYTYPE_DEVICE;
copy_params[i].srcDevice = (CUdeviceptr)
((guint8 *) surface_params->dataPtr + plane_params->offset[i]);
copy_params[i].srcPitch = plane_params->pitch[i];
} else {
copy_params[i].dstMemoryType = CU_MEMORYTYPE_DEVICE;
copy_params[i].dstDevice = (CUdeviceptr)
((guint8 *) surface_params->dataPtr + plane_params->offset[i]);
copy_params[i].dstPitch = plane_params->pitch[i];
}
}
break;
}
case NVBUF_MEM_CUDA_PINNED:
{
for (i = 0; i < plane_params->num_planes; i++) {
if (is_src) {
copy_params[i].srcMemoryType = CU_MEMORYTYPE_HOST;
copy_params[i].srcHost =
((guint8 *) surface_params->dataPtr + plane_params->offset[i]);
copy_params[i].srcPitch = plane_params->pitch[i];
} else {
copy_params[i].dstMemoryType = CU_MEMORYTYPE_HOST;
copy_params[i].dstHost =
((guint8 *) surface_params->dataPtr + plane_params->offset[i]);
copy_params[i].dstPitch = plane_params->pitch[i];
}
}
break;
}
case NVBUF_MEM_CUDA_UNIFIED:
{
for (i = 0; i < plane_params->num_planes; i++) {
if (is_src) {
copy_params[i].srcMemoryType = CU_MEMORYTYPE_UNIFIED;
copy_params[i].srcDevice = (CUdeviceptr)
((guint8 *) surface_params->dataPtr + plane_params->offset[i]);
copy_params[i].srcPitch = plane_params->pitch[i];
} else {
copy_params[i].dstMemoryType = CU_MEMORYTYPE_UNIFIED;
copy_params[i].dstDevice = (CUdeviceptr)
((guint8 *) surface_params->dataPtr + plane_params->offset[i]);
copy_params[i].dstPitch = plane_params->pitch[i];
}
}
break;
}
default:
GST_ERROR ("Unexpected NVMM memory type %d", surface->memType);
goto error;
}
for (i = 0; i < plane_params->num_planes; i++) {
gsize width_in_bytes, height;
width_in_bytes = plane_params->width[i] * plane_params->bytesPerPix[i];
height = plane_params->height[i];
if (copy_params[i].WidthInBytes == 0 ||
width_in_bytes < copy_params[i].WidthInBytes) {
copy_params[i].WidthInBytes = width_in_bytes;
}
if (copy_params[i].Height == 0 || height < copy_params[i].Height) {
copy_params[i].Height = height;
}
}
} else
#endif
{
GstMapFlags map_flags;
if (is_src)
map_flags = GST_MAP_READ;
else
map_flags = GST_MAP_WRITE;
if (copy_type == GST_CUDA_BUFFER_COPY_CUDA)
map_flags |= GST_MAP_CUDA;
if (!gst_video_frame_map (frame, info, buf, map_flags)) {
GST_ERROR ("Failed to map buffer");
goto error;
}
for (i = 0; i < GST_VIDEO_FRAME_N_PLANES (frame); i++) {
gsize width_in_bytes, height;
if (is_src) {
if (copy_type == GST_CUDA_BUFFER_COPY_CUDA) {
copy_params[i].srcMemoryType = CU_MEMORYTYPE_DEVICE;
copy_params[i].srcDevice =
(CUdeviceptr) GST_VIDEO_FRAME_PLANE_DATA (frame, i);
} else {
copy_params[i].srcMemoryType = CU_MEMORYTYPE_HOST;
copy_params[i].srcHost = GST_VIDEO_FRAME_PLANE_DATA (frame, i);
}
copy_params[i].srcPitch = GST_VIDEO_FRAME_PLANE_STRIDE (frame, i);
} else {
if (copy_type == GST_CUDA_BUFFER_COPY_CUDA) {
copy_params[i].dstMemoryType = CU_MEMORYTYPE_DEVICE;
copy_params[i].dstDevice =
(CUdeviceptr) GST_VIDEO_FRAME_PLANE_DATA (frame, i);
} else {
copy_params[i].dstMemoryType = CU_MEMORYTYPE_HOST;
copy_params[i].dstHost = GST_VIDEO_FRAME_PLANE_DATA (frame, i);
}
copy_params[i].dstPitch = GST_VIDEO_FRAME_PLANE_STRIDE (frame, i);
}
width_in_bytes = GST_VIDEO_FRAME_COMP_WIDTH (frame, i) *
GST_VIDEO_FRAME_COMP_PSTRIDE (frame, i);
height = GST_VIDEO_FRAME_COMP_HEIGHT (frame, i);
if (copy_params[i].WidthInBytes == 0 ||
width_in_bytes < copy_params[i].WidthInBytes) {
copy_params[i].WidthInBytes = width_in_bytes;
}
if (copy_params[i].Height == 0 || height < copy_params[i].Height) {
copy_params[i].Height = height;
}
}
}
return TRUE;
error:
if (buffer_mapped) {
gst_buffer_unmap (buf, map_info);
memset (map_info, 0, sizeof (GstMapInfo));
}
return FALSE;
}
static void
unmap_buffer_or_frame (GstBuffer * buf, GstVideoFrame * frame,
GstMapInfo * map_info)
{
if (frame->buffer)
gst_video_frame_unmap (frame);
if (map_info->data)
gst_buffer_unmap (buf, map_info);
}
static gboolean
gst_cuda_buffer_copy_internal (GstBuffer * dst_buf,
GstCudaBufferCopyType dst_type, const GstVideoInfo * dst_info,
GstBuffer * src_buf, GstCudaBufferCopyType src_type,
const GstVideoInfo * src_info, GstCudaContext * context, CUstream stream)
{
GstVideoFrame dst_frame, src_frame;
gboolean ret = FALSE;
GstMapInfo dst_map, src_map;
guint i;
CUDA_MEMCPY2D copy_params[GST_VIDEO_MAX_PLANES];
memset (copy_params, 0, sizeof (copy_params));
memset (&dst_frame, 0, sizeof (GstVideoFrame));
memset (&src_frame, 0, sizeof (GstVideoFrame));
memset (&dst_map, 0, sizeof (GstMapInfo));
memset (&src_map, 0, sizeof (GstMapInfo));
if (!map_buffer_and_fill_copy2d (dst_buf, dst_info,
dst_type, &dst_frame, &dst_map, FALSE, copy_params)) {
GST_ERROR_OBJECT (context, "Failed to map output buffer");
return FALSE;
}
if (!map_buffer_and_fill_copy2d (src_buf, src_info,
src_type, &src_frame, &src_map, TRUE, copy_params)) {
GST_ERROR_OBJECT (context, "Failed to map input buffer");
unmap_buffer_or_frame (dst_buf, &dst_frame, &dst_map);
return FALSE;
}
if (!gst_cuda_context_push (context)) {
GST_ERROR_OBJECT (context, "Failed to push our context");
goto unmap_and_out;
}
for (i = 0; i < GST_VIDEO_INFO_N_PLANES (dst_info); i++) {
ret = gst_cuda_result (CuMemcpy2DAsync (&copy_params[i], stream));
if (!ret) {
GST_ERROR_OBJECT (context, "Failed to copy plane %d", i);
break;
}
}
gst_cuda_result (CuStreamSynchronize (stream));
gst_cuda_context_pop (NULL);
unmap_and_out:
unmap_buffer_or_frame (dst_buf, &src_frame, &src_map);
unmap_buffer_or_frame (src_buf, &dst_frame, &dst_map);
return ret;
}
#ifdef HAVE_NVCODEC_GST_GL
static gboolean
ensure_gl_interop (void)
{
guint device_count = 0;
CUdevice device_list[1] = { 0, };
CUresult cuda_ret;
cuda_ret = CuGLGetDevices (&device_count,
device_list, 1, CU_GL_DEVICE_LIST_ALL);
if (cuda_ret != CUDA_SUCCESS || device_count == 0)
return FALSE;
return TRUE;
}
typedef struct _GLCopyData
{
GstBuffer *src_buf;
const GstVideoInfo *src_info;
GstBuffer *dst_buf;
const GstVideoInfo *dst_info;
gboolean pbo_to_cuda;
GstCudaBufferCopyType copy_type;
GstCudaContext *context;
CUstream stream;
gboolean ret;
} GLCopyData;
static GstCudaGraphicsResource *
ensure_cuda_gl_graphics_resource (GstCudaContext * context, GstMemory * mem)
{
GQuark quark;
GstCudaGraphicsResource *ret = NULL;
if (!gst_is_gl_memory_pbo (mem)) {
GST_WARNING_OBJECT (context, "memory is not GL PBO memory, %s",
mem->allocator->mem_type);
return NULL;
}
quark = gst_cuda_quark_from_id (GST_CUDA_QUARK_GRAPHICS_RESOURCE);
ret = (GstCudaGraphicsResource *)
gst_mini_object_get_qdata (GST_MINI_OBJECT (mem), quark);
if (!ret) {
GstGLMemoryPBO *pbo;
GstGLBuffer *buf;
GstMapInfo info;
ret = gst_cuda_graphics_resource_new (context,
GST_OBJECT (GST_GL_BASE_MEMORY_CAST (mem)->context),
GST_CUDA_GRAPHICS_RESOURCE_GL_BUFFER);
if (!gst_memory_map (mem, &info, (GstMapFlags) (GST_MAP_READ | GST_MAP_GL))) {
GST_ERROR_OBJECT (context, "Failed to map gl memory");
gst_cuda_graphics_resource_free (ret);
return NULL;
}
pbo = (GstGLMemoryPBO *) mem;
buf = pbo->pbo;
if (!gst_cuda_graphics_resource_register_gl_buffer (ret,
buf->id, CU_GRAPHICS_REGISTER_FLAGS_NONE)) {
GST_ERROR_OBJECT (context, "Failed to register gl buffer");
gst_memory_unmap (mem, &info);
gst_cuda_graphics_resource_free (ret);
return NULL;
}
gst_memory_unmap (mem, &info);
gst_mini_object_set_qdata (GST_MINI_OBJECT (mem), quark, ret,
(GDestroyNotify) gst_cuda_graphics_resource_free);
}
return ret;
}
static void
gl_copy_thread_func (GstGLContext * gl_context, GLCopyData * data)
{
GstCudaGraphicsResource *resources[GST_VIDEO_MAX_PLANES];
guint num_resources;
GstBuffer *gl_buf, *cuda_buf;
GstVideoFrame cuda_frame;
GstMapInfo cuda_map_info;
CUDA_MEMCPY2D copy_params[GST_VIDEO_MAX_PLANES];
guint i;
GstCudaContext *context = data->context;
CUstream stream = data->stream;
memset (copy_params, 0, sizeof (copy_params));
memset (&cuda_frame, 0, sizeof (GstVideoFrame));
memset (&cuda_map_info, 0, sizeof (GstMapInfo));
data->ret = FALSE;
/* Incompatible gl context */
if (!ensure_gl_interop ())
return;
if (data->pbo_to_cuda) {
gl_buf = data->src_buf;
cuda_buf = data->dst_buf;
if (!map_buffer_and_fill_copy2d (cuda_buf,
data->dst_info, data->copy_type, &cuda_frame, &cuda_map_info,
FALSE, copy_params)) {
GST_ERROR_OBJECT (context, "Failed to map output CUDA buffer");
return;
}
} else {
gl_buf = data->dst_buf;
cuda_buf = data->src_buf;
if (!map_buffer_and_fill_copy2d (cuda_buf,
data->src_info, data->copy_type, &cuda_frame, &cuda_map_info,
TRUE, copy_params)) {
GST_ERROR_OBJECT (context, "Failed to map input CUDA buffer");
return;
}
}
num_resources = gst_buffer_n_memory (gl_buf);
g_assert (num_resources >= GST_VIDEO_INFO_N_PLANES (data->src_info));
if (!gst_cuda_context_push (context)) {
GST_ERROR_OBJECT (context, "Failed to push context");
unmap_buffer_or_frame (cuda_buf, &cuda_frame, &cuda_map_info);
return;
}
for (i = 0; i < GST_VIDEO_INFO_N_PLANES (data->src_info); i++) {
GstMemory *mem = gst_buffer_peek_memory (gl_buf, i);
GstGLMemoryPBO *pbo;
resources[i] = ensure_cuda_gl_graphics_resource (context, mem);
if (!resources[i])
goto out;
pbo = (GstGLMemoryPBO *) mem;
if (!data->pbo_to_cuda) {
/* Need PBO -> texture */
GST_MINI_OBJECT_FLAG_SET (mem, GST_GL_BASE_MEMORY_TRANSFER_NEED_UPLOAD);
/* PBO -> sysmem */
GST_MINI_OBJECT_FLAG_SET (pbo->pbo,
GST_GL_BASE_MEMORY_TRANSFER_NEED_DOWNLOAD);
} else {
/* get the texture into the PBO */
gst_gl_memory_pbo_upload_transfer (pbo);
gst_gl_memory_pbo_download_transfer (pbo);
}
}
for (i = 0; i < GST_VIDEO_INFO_N_PLANES (data->src_info); i++) {
CUgraphicsResource cuda_resource;
CUdeviceptr dev_ptr;
size_t size;
gboolean copy_ret;
gsize width_in_bytes, height;
if (data->pbo_to_cuda) {
cuda_resource =
gst_cuda_graphics_resource_map (resources[i], stream,
CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY);
} else {
cuda_resource =
gst_cuda_graphics_resource_map (resources[i], stream,
CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD);
}
if (!cuda_resource) {
GST_ERROR_OBJECT (context, "Failed to map graphics resource %d", i);
goto out;
}
if (!gst_cuda_result (CuGraphicsResourceGetMappedPointer (&dev_ptr, &size,
cuda_resource))) {
gst_cuda_graphics_resource_unmap (resources[i], stream);
GST_ERROR_OBJECT (context, "Failed to get mapped pointer");
goto out;
}
if (data->pbo_to_cuda) {
copy_params[i].srcMemoryType = CU_MEMORYTYPE_DEVICE;
copy_params[i].srcDevice = dev_ptr;
copy_params[i].srcPitch = GST_VIDEO_INFO_PLANE_STRIDE (data->src_info, i);
width_in_bytes = GST_VIDEO_INFO_COMP_WIDTH (data->src_info, i) *
GST_VIDEO_INFO_COMP_PSTRIDE (data->src_info, i);
height = GST_VIDEO_INFO_COMP_HEIGHT (data->src_info, i);
} else {
copy_params[i].dstMemoryType = CU_MEMORYTYPE_DEVICE;
copy_params[i].dstDevice = dev_ptr;
copy_params[i].dstPitch = GST_VIDEO_INFO_PLANE_STRIDE (data->dst_info, i);
width_in_bytes = GST_VIDEO_INFO_COMP_WIDTH (data->dst_info, i) *
GST_VIDEO_INFO_COMP_PSTRIDE (data->dst_info, i);
height = GST_VIDEO_INFO_COMP_HEIGHT (data->dst_info, i);
}
if (width_in_bytes < copy_params[i].WidthInBytes)
copy_params[i].WidthInBytes = width_in_bytes;
if (height < copy_params[i].Height)
copy_params[i].Height = height;
copy_ret = gst_cuda_result (CuMemcpy2DAsync (&copy_params[i], stream));
gst_cuda_graphics_resource_unmap (resources[i], stream);
if (!copy_ret) {
GST_ERROR_OBJECT (context, "Failed to copy plane %d", i);
goto out;
}
}
data->ret = TRUE;
out:
gst_cuda_result (CuStreamSynchronize (stream));
gst_cuda_context_pop (NULL);
unmap_buffer_or_frame (cuda_buf, &cuda_frame, &cuda_map_info);
}
static gboolean
cuda_copy_gl_interop (GstBuffer * dst_buf, const GstVideoInfo * dst_info,
GstBuffer * src_buf, const GstVideoInfo * src_info,
GstGLContext * gl_context, GstCudaContext * context, CUstream stream,
gboolean pbo_to_cuda, GstCudaBufferCopyType copy_type)
{
GLCopyData data;
g_assert (copy_type == GST_CUDA_BUFFER_COPY_CUDA ||
copy_type == GST_CUDA_BUFFER_COPY_NVMM);
data.src_buf = src_buf;
data.src_info = src_info;
data.dst_buf = dst_buf;
data.dst_info = dst_info;
data.pbo_to_cuda = pbo_to_cuda;
data.copy_type = copy_type;
data.context = context;
data.stream = stream;
data.ret = FALSE;
gst_gl_context_thread_add (gl_context,
(GstGLContextThreadFunc) gl_copy_thread_func, &data);
return data.ret;
}
#endif
#ifdef GST_CUDA_HAS_D3D
static gboolean
ensure_d3d11_interop (GstCudaContext * context, GstD3D11Device * device)
{
guint device_count = 0;
guint cuda_device_id;
CUdevice device_list[1] = { 0, };
CUresult cuda_ret;
g_object_get (context, "cuda-device-id", &cuda_device_id, NULL);
cuda_ret = CuD3D11GetDevices (&device_count,
device_list, 1, gst_d3d11_device_get_device_handle (device),
CU_D3D11_DEVICE_LIST_ALL);
if (cuda_ret != CUDA_SUCCESS || device_count == 0)
return FALSE;
if (device_list[0] != (CUdevice) cuda_device_id)
return FALSE;
return TRUE;
}
static GstCudaGraphicsResource *
ensure_cuda_d3d11_graphics_resource (GstCudaContext * context, GstMemory * mem)
{
GQuark quark;
GstCudaGraphicsResource *ret = NULL;
if (!gst_is_d3d11_memory (mem)) {
GST_WARNING_OBJECT (context, "memory is not D3D11 memory, %s",
mem->allocator->mem_type);
return NULL;
}
quark = gst_cuda_quark_from_id (GST_CUDA_QUARK_GRAPHICS_RESOURCE);
ret = (GstCudaGraphicsResource *)
gst_mini_object_get_qdata (GST_MINI_OBJECT (mem), quark);
if (!ret) {
ret = gst_cuda_graphics_resource_new (context,
GST_OBJECT (GST_D3D11_MEMORY_CAST (mem)->device),
GST_CUDA_GRAPHICS_RESOURCE_D3D11_RESOURCE);
if (!gst_cuda_graphics_resource_register_d3d11_resource (ret,
gst_d3d11_memory_get_resource_handle (GST_D3D11_MEMORY_CAST (mem)),
CU_GRAPHICS_REGISTER_FLAGS_SURFACE_LOAD_STORE)) {
GST_ERROR_OBJECT (context, "failed to register d3d11 resource");
gst_cuda_graphics_resource_free (ret);
return NULL;
}
gst_mini_object_set_qdata (GST_MINI_OBJECT (mem), quark, ret,
(GDestroyNotify) gst_cuda_graphics_resource_free);
}
return ret;
}
static gboolean
cuda_copy_d3d11_interop (GstBuffer * dst_buf, const GstVideoInfo * dst_info,
GstBuffer * src_buf, const GstVideoInfo * src_info, GstD3D11Device * device,
GstCudaContext * context, CUstream stream, gboolean d3d11_to_cuda)
{
GstCudaGraphicsResource *resources[GST_VIDEO_MAX_PLANES];
D3D11_TEXTURE2D_DESC desc[GST_VIDEO_MAX_PLANES];
guint num_resources;
GstBuffer *d3d11_buf, *cuda_buf;
GstVideoFrame d3d11_frame, cuda_frame;
GstMapInfo cuda_map_info;
CUDA_MEMCPY2D copy_params[GST_VIDEO_MAX_PLANES];
guint i;
gboolean ret = FALSE;
memset (copy_params, 0, sizeof (copy_params));
memset (&cuda_frame, 0, sizeof (GstVideoFrame));
memset (&cuda_map_info, 0, sizeof (GstMapInfo));
/* Incompatible d3d11 device */
if (!ensure_d3d11_interop (context, device))
return FALSE;
if (d3d11_to_cuda) {
d3d11_buf = src_buf;
cuda_buf = dst_buf;
if (!gst_video_frame_map (&d3d11_frame, src_info, d3d11_buf,
GST_MAP_READ | GST_MAP_D3D11)) {
GST_ERROR_OBJECT (context, "Failed to map input D3D11 buffer");
return FALSE;
}
if (!map_buffer_and_fill_copy2d (cuda_buf,
dst_info, GST_CUDA_BUFFER_COPY_CUDA, &cuda_frame, &cuda_map_info,
FALSE, copy_params)) {
GST_ERROR_OBJECT (context, "Failed to map output CUDA buffer");
gst_video_frame_unmap (&d3d11_frame);
return FALSE;
}
} else {
d3d11_buf = dst_buf;
cuda_buf = src_buf;
if (!gst_video_frame_map (&d3d11_frame, dst_info, d3d11_buf,
GST_MAP_WRITE | GST_MAP_D3D11)) {
GST_ERROR_OBJECT (context, "Failed to map output D3D11 buffer");
return FALSE;
}
if (!map_buffer_and_fill_copy2d (cuda_buf,
src_info, GST_CUDA_BUFFER_COPY_CUDA, &cuda_frame, &cuda_map_info,
TRUE, copy_params)) {
GST_ERROR_OBJECT (context, "Failed to map input CUDA buffer");
gst_video_frame_unmap (&d3d11_frame);
return FALSE;
}
}
num_resources = gst_buffer_n_memory (d3d11_buf);
g_assert (num_resources >= GST_VIDEO_FRAME_N_PLANES (&d3d11_frame));
if (!gst_cuda_context_push (context)) {
GST_ERROR_OBJECT (context, "Failed to push context");
gst_video_frame_unmap (&d3d11_frame);
unmap_buffer_or_frame (cuda_buf, &cuda_frame, &cuda_map_info);
return FALSE;
}
for (i = 0; i < GST_VIDEO_FRAME_N_PLANES (&d3d11_frame); i++) {
GstMemory *mem = gst_buffer_peek_memory (d3d11_buf, i);
resources[i] = ensure_cuda_d3d11_graphics_resource (context, mem);
if (!resources[i]
|| !gst_d3d11_memory_get_texture_desc (GST_D3D11_MEMORY_CAST (mem),
&desc[i]))
goto out;
}
for (i = 0; i < GST_VIDEO_FRAME_N_PLANES (&d3d11_frame); i++) {
CUgraphicsResource cuda_resource;
CUarray d3d11_array;
gboolean copy_ret;
if (d3d11_to_cuda) {
cuda_resource =
gst_cuda_graphics_resource_map (resources[i], stream,
CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY);
} else {
cuda_resource =
gst_cuda_graphics_resource_map (resources[i], stream,
CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD);
}
if (!cuda_resource) {
GST_ERROR_OBJECT (context, "Failed to map graphics resource %d", i);
goto out;
}
if (!gst_cuda_result (CuGraphicsSubResourceGetMappedArray (&d3d11_array,
cuda_resource, 0, 0))) {
gst_cuda_graphics_resource_unmap (resources[i], stream);
GST_ERROR_OBJECT (context, "Failed to get mapped array");
goto out;
}
if (d3d11_to_cuda) {
copy_params[i].srcMemoryType = CU_MEMORYTYPE_ARRAY;
copy_params[i].srcArray = d3d11_array;
copy_params[i].srcPitch =
desc[i].Width * GST_VIDEO_FRAME_COMP_PSTRIDE (&d3d11_frame, i);
} else {
copy_params[i].dstMemoryType = CU_MEMORYTYPE_ARRAY;
copy_params[i].dstArray = d3d11_array;
copy_params[i].dstPitch =
desc[i].Width * GST_VIDEO_FRAME_COMP_PSTRIDE (&d3d11_frame, i);
}
copy_ret = gst_cuda_result (CuMemcpy2DAsync (&copy_params[i], stream));
gst_cuda_graphics_resource_unmap (resources[i], stream);
if (!copy_ret) {
GST_ERROR_OBJECT (context, "Failed to copy plane %d", i);
goto out;
}
}
ret = TRUE;
out:
gst_cuda_result (CuStreamSynchronize (stream));
gst_cuda_context_pop (NULL);
gst_video_frame_unmap (&d3d11_frame);
unmap_buffer_or_frame (cuda_buf, &cuda_frame, &cuda_map_info);
return ret;
}
#endif
gboolean
gst_cuda_buffer_copy (GstBuffer * dst, GstCudaBufferCopyType dst_type,
const GstVideoInfo * dst_info, GstBuffer * src,
GstCudaBufferCopyType src_type, const GstVideoInfo * src_info,
GstCudaContext * context, CUstream stream)
{
gboolean use_copy_2d = FALSE;
GstMemory *dst_mem, *src_mem;
#ifdef GST_CUDA_HAS_D3D
D3D11_TEXTURE2D_DESC desc;
#endif
GstCudaContext *cuda_context;
g_return_val_if_fail (GST_IS_BUFFER (dst), FALSE);
g_return_val_if_fail (dst_info != NULL, FALSE);
g_return_val_if_fail (GST_IS_BUFFER (src), FALSE);
g_return_val_if_fail (src_info != NULL, FALSE);
g_return_val_if_fail (GST_IS_CUDA_CONTEXT (context), FALSE);
_init_debug ();
if (dst_type == GST_CUDA_BUFFER_COPY_NVMM &&
src_type == GST_CUDA_BUFFER_COPY_NVMM) {
GST_ERROR_OBJECT (context, "Not supported copy NVMM -> NVMM");
return FALSE;
}
if (GST_VIDEO_INFO_FORMAT (dst_info) != GST_VIDEO_INFO_FORMAT (src_info)) {
GST_ERROR_OBJECT (context,
"Copy between different format is not supported");
return FALSE;
}
if (dst_type == GST_CUDA_BUFFER_COPY_CUDA ||
dst_type == GST_CUDA_BUFFER_COPY_NVMM ||
src_type == GST_CUDA_BUFFER_COPY_CUDA ||
src_type == GST_CUDA_BUFFER_COPY_NVMM) {
use_copy_2d = TRUE;
}
if (!use_copy_2d) {
GST_TRACE_OBJECT (context, "Not a device memory, use system memory copy");
return gst_cuda_buffer_fallback_copy (dst, dst_info, src, src_info);
}
dst_mem = gst_buffer_peek_memory (dst, 0);
src_mem = gst_buffer_peek_memory (src, 0);
#ifdef HAVE_NVCODEC_GST_GL
if (src_type == GST_CUDA_BUFFER_COPY_GL && gst_is_gl_memory_pbo (src_mem)) {
GstGLMemory *gl_mem = (GstGLMemory *) src_mem;
GstGLContext *gl_context = gl_mem->mem.context;
GstCudaContext *cuda_context = context;
if (dst_type == GST_CUDA_BUFFER_COPY_CUDA && gst_is_cuda_memory (dst_mem))
cuda_context = GST_CUDA_MEMORY_CAST (dst_mem)->context;
GST_TRACE_OBJECT (context, "GL -> %s",
gst_cuda_buffer_copy_type_to_string (dst_type));
return cuda_copy_gl_interop (dst, dst_info, src, src_info, gl_context,
cuda_context, stream, TRUE, dst_type);
}
if (dst_type == GST_CUDA_BUFFER_COPY_GL && gst_is_gl_memory_pbo (dst_mem)) {
GstGLMemory *gl_mem = (GstGLMemory *) dst_mem;
GstGLContext *gl_context = gl_mem->mem.context;
GstCudaContext *cuda_context = context;
if (src_type == GST_CUDA_BUFFER_COPY_CUDA && gst_is_cuda_memory (src_mem))
cuda_context = GST_CUDA_MEMORY_CAST (src_mem)->context;
GST_TRACE_OBJECT (context, "%s -> GL",
gst_cuda_buffer_copy_type_to_string (src_type));
return cuda_copy_gl_interop (dst, dst_info, src, src_info, gl_context,
cuda_context, stream, FALSE, src_type);
}
#endif
#ifdef GST_CUDA_HAS_D3D
if (src_type == GST_CUDA_BUFFER_COPY_D3D11 && gst_is_d3d11_memory (src_mem) &&
gst_d3d11_memory_get_texture_desc (GST_D3D11_MEMORY_CAST (src_mem), &desc)
&& desc.Usage == D3D11_USAGE_DEFAULT && gst_is_cuda_memory (dst_mem)) {
GstD3D11Memory *dmem = GST_D3D11_MEMORY_CAST (src_mem);
GstD3D11Device *device = dmem->device;
GstCudaContext *cuda_context = GST_CUDA_MEMORY_CAST (dst_mem)->context;
gboolean ret;
GST_TRACE_OBJECT (context, "D3D11 -> CUDA");
gst_d3d11_device_lock (device);
ret = cuda_copy_d3d11_interop (dst, dst_info, src, src_info, device,
cuda_context, stream, TRUE);
gst_d3d11_device_unlock (device);
return ret;
}
if (dst_type == GST_CUDA_BUFFER_COPY_D3D11 && gst_is_d3d11_memory (dst_mem) &&
gst_d3d11_memory_get_texture_desc (GST_D3D11_MEMORY_CAST (dst_mem), &desc)
&& desc.Usage == D3D11_USAGE_DEFAULT && gst_is_cuda_memory (src_mem)) {
GstD3D11Memory *dmem = GST_D3D11_MEMORY_CAST (dst_mem);
GstD3D11Device *device = dmem->device;
GstCudaContext *cuda_context = GST_CUDA_MEMORY_CAST (src_mem)->context;
gboolean ret;
GST_TRACE_OBJECT (context, "CUDA -> D3D11");
gst_d3d11_device_lock (device);
ret = cuda_copy_d3d11_interop (dst, dst_info, src, src_info, device,
cuda_context, stream, FALSE);
gst_d3d11_device_unlock (device);
return ret;
}
#endif
if (gst_is_cuda_memory (dst_mem)) {
cuda_context = GST_CUDA_MEMORY_CAST (dst_mem)->context;
} else if (gst_is_cuda_memory (src_mem)) {
cuda_context = GST_CUDA_MEMORY_CAST (src_mem)->context;
} else {
cuda_context = context;
}
GST_TRACE_OBJECT (context, "%s -> %s",
gst_cuda_buffer_copy_type_to_string (src_type),
gst_cuda_buffer_copy_type_to_string (dst_type));
return gst_cuda_buffer_copy_internal (dst, dst_type, dst_info,
src, src_type, src_info, cuda_context, stream);
}