nvcodec: Move CUDA <-> GL, D3D11, NVMM copy function to utils

This method can be used in other elements as well.

Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/1945>
This commit is contained in:
Seungha Yang 2022-03-14 01:19:53 +09:00 committed by GStreamer Marge Bot
parent b3df58add1
commit c08ce58753
3 changed files with 1007 additions and 818 deletions

View file

@ -50,15 +50,6 @@
GST_DEBUG_CATEGORY_STATIC (gst_cuda_memory_copy_debug);
#define GST_CAT_DEFAULT gst_cuda_memory_copy_debug
typedef enum
{
GST_CUDA_MEMORY_COPY_MEM_SYSTEM,
GST_CUDA_MEMORY_COPY_MEM_CUDA,
GST_CUDA_MEMORY_COPY_MEM_NVMM,
GST_CUDA_MEMORY_COPY_MEM_GL,
GST_CUDA_MEMORY_COPY_MEM_D3D11,
} GstCudaMemoryCopyMemType;
typedef struct _GstCudaMemoryCopyClassData
{
GstCaps *sink_caps;
@ -783,698 +774,6 @@ gst_cuda_memory_copy_set_info (GstCudaBaseTransform * btrans,
return TRUE;
}
static gboolean
gst_cuda_memory_copy_transform_sysmem (GstCudaMemoryCopy * self,
GstBuffer * inbuf, GstVideoInfo * in_info, GstBuffer * outbuf,
GstVideoInfo * out_info)
{
GstVideoFrame in_frame, out_frame;
gboolean ret;
if (!gst_video_frame_map (&in_frame, in_info, inbuf, GST_MAP_READ)) {
GST_ERROR_OBJECT (self, "Failed to map input buffer");
return FALSE;
}
if (!gst_video_frame_map (&out_frame, out_info, outbuf, GST_MAP_WRITE)) {
gst_video_frame_unmap (&in_frame);
GST_ERROR_OBJECT (self, "Failed to map input buffer");
return FALSE;
}
ret = gst_video_frame_copy (&out_frame, &in_frame);
gst_video_frame_unmap (&out_frame);
gst_video_frame_unmap (&in_frame);
if (!ret)
GST_ERROR_OBJECT (self, "Failed to copy buffer");
return ret;
}
static gboolean
gst_cuda_memory_copy_map_and_fill_copy2d (GstCudaMemoryCopy * self,
GstBuffer * buf, GstVideoInfo * info, GstCudaMemoryCopyMemType mem_type,
GstVideoFrame * frame, GstMapInfo * map_info, gboolean is_src,
CUDA_MEMCPY2D copy_params[GST_VIDEO_MAX_PLANES])
{
gboolean buffer_mapped = FALSE;
guint i;
#ifdef HAVE_NVCODEC_NVMM
if (mem_type == GST_CUDA_MEMORY_COPY_MEM_NVMM) {
NvBufSurface *surface;
NvBufSurfaceParams *surface_params;
NvBufSurfacePlaneParams *plane_params;
if (!gst_buffer_map (buf, map_info, GST_MAP_READ)) {
GST_ERROR_OBJECT (self, "Failed to map input NVMM buffer");
memset (map_info, 0, sizeof (GstMapInfo));
return FALSE;
}
surface = (NvBufSurface *) map_info->data;
GST_TRACE_OBJECT (self, "batch-size %d, num-filled %d, memType %d",
surface->batchSize, surface->numFilled, surface->memType);
surface_params = surface->surfaceList;
buffer_mapped = TRUE;
if (!surface_params) {
GST_ERROR_OBJECT (self, "NVMM memory doesn't hold buffer");
goto error;
}
plane_params = &surface_params->planeParams;
if (plane_params->num_planes != GST_VIDEO_INFO_N_PLANES (info)) {
GST_ERROR_OBJECT (self, "num_planes mismatch, %d / %d",
plane_params->num_planes, GST_VIDEO_INFO_N_PLANES (info));
goto error;
}
switch (surface->memType) {
/* TODO: NVBUF_MEM_DEFAULT on jetson is SURFACE_ARRAY */
case NVBUF_MEM_DEFAULT:
case NVBUF_MEM_CUDA_DEVICE:
{
for (i = 0; i < plane_params->num_planes; i++) {
if (is_src) {
copy_params[i].srcMemoryType = CU_MEMORYTYPE_DEVICE;
copy_params[i].srcDevice = (CUdeviceptr)
((guint8 *) surface_params->dataPtr + plane_params->offset[i]);
copy_params[i].srcPitch = plane_params->pitch[i];
} else {
copy_params[i].dstMemoryType = CU_MEMORYTYPE_DEVICE;
copy_params[i].dstDevice = (CUdeviceptr)
((guint8 *) surface_params->dataPtr + plane_params->offset[i]);
copy_params[i].dstPitch = plane_params->pitch[i];
}
}
break;
}
case NVBUF_MEM_CUDA_PINNED:
{
for (i = 0; i < plane_params->num_planes; i++) {
if (is_src) {
copy_params[i].srcMemoryType = CU_MEMORYTYPE_HOST;
copy_params[i].srcHost =
((guint8 *) surface_params->dataPtr + plane_params->offset[i]);
copy_params[i].srcPitch = plane_params->pitch[i];
} else {
copy_params[i].dstMemoryType = CU_MEMORYTYPE_HOST;
copy_params[i].dstHost =
((guint8 *) surface_params->dataPtr + plane_params->offset[i]);
copy_params[i].dstPitch = plane_params->pitch[i];
}
}
break;
}
case NVBUF_MEM_CUDA_UNIFIED:
{
for (i = 0; i < plane_params->num_planes; i++) {
if (is_src) {
copy_params[i].srcMemoryType = CU_MEMORYTYPE_UNIFIED;
copy_params[i].srcDevice = (CUdeviceptr)
((guint8 *) surface_params->dataPtr + plane_params->offset[i]);
copy_params[i].srcPitch = plane_params->pitch[i];
} else {
copy_params[i].dstMemoryType = CU_MEMORYTYPE_UNIFIED;
copy_params[i].dstDevice = (CUdeviceptr)
((guint8 *) surface_params->dataPtr + plane_params->offset[i]);
copy_params[i].dstPitch = plane_params->pitch[i];
}
}
break;
}
default:
GST_ERROR_OBJECT (self, "Unexpected NVMM memory type %d",
surface->memType);
goto error;
}
for (i = 0; i < plane_params->num_planes; i++) {
copy_params[i].WidthInBytes = plane_params->width[i] *
plane_params->bytesPerPix[i];
copy_params[i].Height = plane_params->height[i];
}
} else
#endif
{
GstMapFlags map_flags;
if (is_src)
map_flags = GST_MAP_READ;
else
map_flags = GST_MAP_WRITE;
if (mem_type == GST_CUDA_MEMORY_COPY_MEM_CUDA)
map_flags |= GST_MAP_CUDA;
if (!gst_video_frame_map (frame, info, buf, map_flags)) {
GST_ERROR_OBJECT (self, "Failed to map buffer");
goto error;
}
for (i = 0; i < GST_VIDEO_FRAME_N_PLANES (frame); i++) {
if (is_src) {
if (mem_type == GST_CUDA_MEMORY_COPY_MEM_CUDA) {
copy_params[i].srcMemoryType = CU_MEMORYTYPE_DEVICE;
copy_params[i].srcDevice =
(CUdeviceptr) GST_VIDEO_FRAME_PLANE_DATA (frame, i);
} else {
copy_params[i].srcMemoryType = CU_MEMORYTYPE_HOST;
copy_params[i].srcHost = GST_VIDEO_FRAME_PLANE_DATA (frame, i);
}
copy_params[i].srcPitch = GST_VIDEO_FRAME_PLANE_STRIDE (frame, i);
} else {
if (mem_type == GST_CUDA_MEMORY_COPY_MEM_CUDA) {
copy_params[i].dstMemoryType = CU_MEMORYTYPE_DEVICE;
copy_params[i].dstDevice =
(CUdeviceptr) GST_VIDEO_FRAME_PLANE_DATA (frame, i);
} else {
copy_params[i].dstMemoryType = CU_MEMORYTYPE_HOST;
copy_params[i].dstHost = GST_VIDEO_FRAME_PLANE_DATA (frame, i);
}
copy_params[i].dstPitch = GST_VIDEO_FRAME_PLANE_STRIDE (frame, i);
}
copy_params[i].WidthInBytes = GST_VIDEO_FRAME_COMP_WIDTH (frame, i) *
GST_VIDEO_FRAME_COMP_PSTRIDE (frame, i);
copy_params[i].Height = GST_VIDEO_FRAME_COMP_HEIGHT (frame, i);
}
}
return TRUE;
error:
if (buffer_mapped) {
gst_buffer_unmap (buf, map_info);
memset (map_info, 0, sizeof (GstMapInfo));
}
return FALSE;
}
static void
gst_cuda_memory_copy_unmap (GstCudaMemoryCopy * self, GstBuffer * buf,
GstVideoFrame * frame, GstMapInfo * map_info)
{
if (frame->buffer)
gst_video_frame_unmap (frame);
if (map_info->data)
gst_buffer_unmap (buf, map_info);
}
static gboolean
gst_cuda_memory_copy_transform_cuda (GstCudaMemoryCopy * self,
GstBuffer * inbuf, GstVideoInfo * in_info, GstCudaMemoryCopyMemType in_type,
GstBuffer * outbuf, GstVideoInfo * out_info,
GstCudaMemoryCopyMemType out_type)
{
GstCudaBaseTransform *trans = GST_CUDA_BASE_TRANSFORM (self);
GstVideoFrame in_frame, out_frame;
gboolean ret = FALSE;
CUstream cuda_stream = trans->cuda_stream;
GstMapInfo in_map, out_map;
guint i;
CUDA_MEMCPY2D copy_params[GST_VIDEO_MAX_PLANES];
memset (copy_params, 0, sizeof (copy_params));
memset (&in_frame, 0, sizeof (GstVideoFrame));
memset (&out_frame, 0, sizeof (GstVideoFrame));
memset (&in_map, 0, sizeof (GstMapInfo));
memset (&out_map, 0, sizeof (GstMapInfo));
if (!gst_cuda_memory_copy_map_and_fill_copy2d (self, inbuf, in_info,
in_type, &in_frame, &in_map, TRUE, copy_params)) {
GST_ERROR_OBJECT (self, "Failed to map input buffer");
return FALSE;
}
if (!gst_cuda_memory_copy_map_and_fill_copy2d (self, outbuf, out_info,
out_type, &out_frame, &out_map, FALSE, copy_params)) {
GST_ERROR_OBJECT (self, "Failed to map output buffer");
gst_cuda_memory_copy_unmap (self, inbuf, &in_frame, &in_map);
return FALSE;
}
if (!gst_cuda_context_push (trans->context)) {
GST_ERROR_OBJECT (self, "Failed to push our context");
gst_cuda_context_pop (NULL);
goto unmap_and_out;
}
for (i = 0; i < GST_VIDEO_INFO_N_PLANES (in_info); i++) {
ret = gst_cuda_result (CuMemcpy2DAsync (&copy_params[i], cuda_stream));
if (!ret) {
GST_ERROR_OBJECT (self, "Failed to copy plane %d", i);
break;
}
}
gst_cuda_result (CuStreamSynchronize (cuda_stream));
gst_cuda_context_pop (NULL);
unmap_and_out:
gst_cuda_memory_copy_unmap (self, inbuf, &in_frame, &in_map);
gst_cuda_memory_copy_unmap (self, outbuf, &out_frame, &out_map);
return ret;
}
#ifdef HAVE_NVCODEC_GST_GL
typedef struct _GLCopyData
{
GstCudaMemoryCopy *self;
GstBuffer *inbuf;
GstVideoInfo *in_info;
GstBuffer *outbuf;
GstVideoInfo *out_info;
gboolean pbo_to_cuda;
GstCudaMemoryCopyMemType cuda_mem_type;
gboolean ret;
} GLCopyData;
static GstCudaGraphicsResource *
ensure_cuda_gl_graphics_resource (GstCudaMemoryCopy * self, GstMemory * mem)
{
GstCudaBaseTransform *trans = GST_CUDA_BASE_TRANSFORM (self);
GQuark quark;
GstCudaGraphicsResource *ret = NULL;
if (!gst_is_gl_memory_pbo (mem)) {
GST_WARNING_OBJECT (self, "memory is not GL PBO memory, %s",
mem->allocator->mem_type);
return NULL;
}
quark = gst_cuda_quark_from_id (GST_CUDA_QUARK_GRAPHICS_RESOURCE);
ret = (GstCudaGraphicsResource *)
gst_mini_object_get_qdata (GST_MINI_OBJECT (mem), quark);
if (!ret) {
GstGLMemoryPBO *pbo;
GstGLBuffer *buf;
GstMapInfo info;
ret = gst_cuda_graphics_resource_new (trans->context,
GST_OBJECT (GST_GL_BASE_MEMORY_CAST (mem)->context),
GST_CUDA_GRAPHICS_RESOURCE_GL_BUFFER);
if (!gst_memory_map (mem, &info, (GstMapFlags) (GST_MAP_READ | GST_MAP_GL))) {
GST_ERROR_OBJECT (self, "Failed to map gl memory");
gst_cuda_graphics_resource_free (ret);
return NULL;
}
pbo = (GstGLMemoryPBO *) mem;
buf = pbo->pbo;
if (!gst_cuda_graphics_resource_register_gl_buffer (ret,
buf->id, CU_GRAPHICS_REGISTER_FLAGS_NONE)) {
GST_ERROR_OBJECT (self, "Failed to register gl buffer");
gst_memory_unmap (mem, &info);
gst_cuda_graphics_resource_free (ret);
return NULL;
}
gst_memory_unmap (mem, &info);
gst_mini_object_set_qdata (GST_MINI_OBJECT (mem), quark, ret,
(GDestroyNotify) gst_cuda_graphics_resource_free);
}
return ret;
}
static void
gl_copy_thread_func (GstGLContext * gl_context, GLCopyData * data)
{
GstCudaMemoryCopy *self = data->self;
GstCudaBaseTransform *trans = GST_CUDA_BASE_TRANSFORM (self);
GstCudaGraphicsResource *resources[GST_VIDEO_MAX_PLANES];
guint num_resources;
GstBuffer *gl_buf, *cuda_buf;
GstVideoFrame cuda_frame;
GstMapInfo cuda_map_info;
CUDA_MEMCPY2D copy_params[GST_VIDEO_MAX_PLANES];
CUstream cuda_stream = trans->cuda_stream;
gboolean ret = FALSE;
guint i;
memset (copy_params, 0, sizeof (copy_params));
memset (&cuda_frame, 0, sizeof (GstVideoFrame));
memset (&cuda_map_info, 0, sizeof (GstMapInfo));
data->ret = FALSE;
/* Incompatible gl context */
gst_cuda_memory_copy_ensure_gl_interop (gl_context, &ret);
if (!ret)
return;
if (data->pbo_to_cuda) {
gl_buf = data->inbuf;
cuda_buf = data->outbuf;
if (!gst_cuda_memory_copy_map_and_fill_copy2d (self, cuda_buf,
data->out_info, data->cuda_mem_type, &cuda_frame, &cuda_map_info,
FALSE, copy_params)) {
GST_ERROR_OBJECT (self, "Failed to map output CUDA buffer");
return;
}
} else {
gl_buf = data->outbuf;
cuda_buf = data->inbuf;
if (!gst_cuda_memory_copy_map_and_fill_copy2d (self, cuda_buf,
data->in_info, data->cuda_mem_type, &cuda_frame, &cuda_map_info,
TRUE, copy_params)) {
GST_ERROR_OBJECT (self, "Failed to map input CUDA buffer");
return;
}
}
num_resources = gst_buffer_n_memory (gl_buf);
g_assert (num_resources >= GST_VIDEO_INFO_N_PLANES (data->in_info));
if (!gst_cuda_context_push (trans->context)) {
GST_ERROR_OBJECT (self, "Failed to push context");
gst_cuda_memory_copy_unmap (self, cuda_buf, &cuda_frame, &cuda_map_info);
return;
}
for (i = 0; i < GST_VIDEO_INFO_N_PLANES (data->in_info); i++) {
GstMemory *mem = gst_buffer_peek_memory (gl_buf, i);
GstGLMemoryPBO *pbo;
resources[i] = ensure_cuda_gl_graphics_resource (self, mem);
if (!resources[i])
goto out;
pbo = (GstGLMemoryPBO *) mem;
if (!data->pbo_to_cuda) {
/* Need PBO -> texture */
GST_MINI_OBJECT_FLAG_SET (mem, GST_GL_BASE_MEMORY_TRANSFER_NEED_UPLOAD);
/* PBO -> sysmem */
GST_MINI_OBJECT_FLAG_SET (pbo->pbo,
GST_GL_BASE_MEMORY_TRANSFER_NEED_DOWNLOAD);
} else {
/* get the texture into the PBO */
gst_gl_memory_pbo_upload_transfer (pbo);
gst_gl_memory_pbo_download_transfer (pbo);
}
}
for (i = 0; i < GST_VIDEO_INFO_N_PLANES (data->in_info); i++) {
CUgraphicsResource cuda_resource;
CUdeviceptr dev_ptr;
size_t size;
gboolean copy_ret;
if (data->pbo_to_cuda) {
cuda_resource =
gst_cuda_graphics_resource_map (resources[i], cuda_stream,
CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY);
} else {
cuda_resource =
gst_cuda_graphics_resource_map (resources[i], cuda_stream,
CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD);
}
if (!cuda_resource) {
GST_ERROR_OBJECT (self, "Failed to map graphics resource %d", i);
goto out;
}
if (!gst_cuda_result (CuGraphicsResourceGetMappedPointer (&dev_ptr, &size,
cuda_resource))) {
gst_cuda_graphics_resource_unmap (resources[i], cuda_stream);
GST_ERROR_OBJECT (self, "Failed to get mapped pointer");
goto out;
}
if (data->pbo_to_cuda) {
copy_params[i].srcMemoryType = CU_MEMORYTYPE_DEVICE;
copy_params[i].srcDevice = dev_ptr;
copy_params[i].srcPitch = GST_VIDEO_INFO_PLANE_STRIDE (data->in_info, i);
} else {
copy_params[i].dstMemoryType = CU_MEMORYTYPE_DEVICE;
copy_params[i].dstDevice = dev_ptr;
copy_params[i].dstPitch = GST_VIDEO_INFO_PLANE_STRIDE (data->out_info, i);
}
copy_ret = gst_cuda_result (CuMemcpy2DAsync (&copy_params[i], cuda_stream));
gst_cuda_graphics_resource_unmap (resources[i], cuda_stream);
if (!copy_ret) {
GST_ERROR_OBJECT (self, "Failed to copy plane %d", i);
goto out;
}
}
data->ret = TRUE;
out:
gst_cuda_result (CuStreamSynchronize (cuda_stream));
gst_cuda_memory_copy_unmap (self, cuda_buf, &cuda_frame, &cuda_map_info);
}
static gboolean
gst_cuda_memory_copy_gl_interop (GstCudaMemoryCopy * self,
GstBuffer * inbuf, GstVideoInfo * in_info, GstBuffer * outbuf,
GstVideoInfo * out_info, GstGLContext * context, gboolean pbo_to_cuda,
GstCudaMemoryCopyMemType cuda_mem_type)
{
GLCopyData data;
g_assert (cuda_mem_type == GST_CUDA_MEMORY_COPY_MEM_CUDA ||
cuda_mem_type == GST_CUDA_MEMORY_COPY_MEM_NVMM);
data.self = self;
data.inbuf = inbuf;
data.in_info = in_info;
data.outbuf = outbuf;
data.out_info = out_info;
data.pbo_to_cuda = pbo_to_cuda;
data.cuda_mem_type = cuda_mem_type;
data.ret = FALSE;
gst_gl_context_thread_add (context,
(GstGLContextThreadFunc) gl_copy_thread_func, &data);
return data.ret;
}
#endif
#ifdef HAVE_NVCODEC_GST_D3D11
static GstCudaGraphicsResource *
ensure_cuda_d3d11_graphics_resource (GstCudaMemoryCopy * self, GstMemory * mem)
{
GstCudaBaseTransform *trans = GST_CUDA_BASE_TRANSFORM (self);
GQuark quark;
GstCudaGraphicsResource *ret = NULL;
if (!gst_is_d3d11_memory (mem)) {
GST_WARNING_OBJECT (self, "memory is not D3D11 memory, %s",
mem->allocator->mem_type);
return NULL;
}
quark = gst_cuda_quark_from_id (GST_CUDA_QUARK_GRAPHICS_RESOURCE);
ret = (GstCudaGraphicsResource *)
gst_mini_object_get_qdata (GST_MINI_OBJECT (mem), quark);
if (!ret) {
ret = gst_cuda_graphics_resource_new (trans->context,
GST_OBJECT (GST_D3D11_MEMORY_CAST (mem)->device),
GST_CUDA_GRAPHICS_RESOURCE_D3D11_RESOURCE);
if (!gst_cuda_graphics_resource_register_d3d11_resource (ret,
gst_d3d11_memory_get_resource_handle (GST_D3D11_MEMORY_CAST (mem)),
CU_GRAPHICS_REGISTER_FLAGS_SURFACE_LOAD_STORE)) {
GST_ERROR_OBJECT (self, "failed to register d3d11 resource");
gst_cuda_graphics_resource_free (ret);
return NULL;
}
gst_mini_object_set_qdata (GST_MINI_OBJECT (mem), quark, ret,
(GDestroyNotify) gst_cuda_graphics_resource_free);
}
return ret;
}
static gboolean
gst_cuda_memory_copy_d3d11_interop (GstCudaMemoryCopy * self,
GstBuffer * inbuf, GstVideoInfo * in_info, GstBuffer * outbuf,
GstVideoInfo * out_info, GstD3D11Device * device, gboolean d3d11_to_cuda,
GstCudaMemoryCopyMemType cuda_mem_type)
{
GstCudaBaseTransform *trans = GST_CUDA_BASE_TRANSFORM (self);
GstCudaGraphicsResource *resources[GST_VIDEO_MAX_PLANES];
D3D11_TEXTURE2D_DESC desc[GST_VIDEO_MAX_PLANES];
guint num_resources;
GstBuffer *d3d11_buf, *cuda_buf;
GstVideoFrame d3d11_frame, cuda_frame;
GstMapInfo cuda_map_info;
CUDA_MEMCPY2D copy_params[GST_VIDEO_MAX_PLANES];
CUstream cuda_stream = trans->cuda_stream;
gboolean ret = FALSE;
guint i;
g_assert (cuda_mem_type == GST_CUDA_MEMORY_COPY_MEM_CUDA ||
cuda_mem_type == GST_CUDA_MEMORY_COPY_MEM_NVMM);
memset (copy_params, 0, sizeof (copy_params));
memset (&cuda_frame, 0, sizeof (GstVideoFrame));
memset (&cuda_map_info, 0, sizeof (GstMapInfo));
/* Incompatible d3d11 device */
ret =
gst_cuda_memory_copy_ensure_d3d11_interop (GST_CUDA_BASE_TRANSFORM
(self)->context, device);
if (!ret)
return FALSE;
if (d3d11_to_cuda) {
d3d11_buf = inbuf;
cuda_buf = outbuf;
if (!gst_video_frame_map (&d3d11_frame, in_info, d3d11_buf,
GST_MAP_READ | GST_MAP_D3D11)) {
GST_ERROR_OBJECT (self, "Failed to map input D3D11 buffer");
return FALSE;
}
if (!gst_cuda_memory_copy_map_and_fill_copy2d (self, cuda_buf,
out_info, cuda_mem_type, &cuda_frame, &cuda_map_info,
FALSE, copy_params)) {
GST_ERROR_OBJECT (self, "Failed to map output CUDA buffer");
gst_video_frame_unmap (&d3d11_frame);
return FALSE;
}
} else {
d3d11_buf = outbuf;
cuda_buf = inbuf;
if (!gst_video_frame_map (&d3d11_frame, out_info, d3d11_buf,
GST_MAP_WRITE | GST_MAP_D3D11)) {
GST_ERROR_OBJECT (self, "Failed to map output D3D11 buffer");
return FALSE;
}
if (!gst_cuda_memory_copy_map_and_fill_copy2d (self, cuda_buf,
in_info, cuda_mem_type, &cuda_frame, &cuda_map_info,
TRUE, copy_params)) {
GST_ERROR_OBJECT (self, "Failed to map input CUDA buffer");
gst_video_frame_unmap (&d3d11_frame);
return FALSE;
}
}
num_resources = gst_buffer_n_memory (d3d11_buf);
g_assert (num_resources >= GST_VIDEO_FRAME_N_PLANES (&d3d11_frame));
if (!gst_cuda_context_push (trans->context)) {
GST_ERROR_OBJECT (self, "Failed to push context");
gst_video_frame_unmap (&d3d11_frame);
gst_cuda_memory_copy_unmap (self, cuda_buf, &cuda_frame, &cuda_map_info);
return FALSE;
}
for (i = 0; i < GST_VIDEO_FRAME_N_PLANES (&d3d11_frame); i++) {
GstMemory *mem = gst_buffer_peek_memory (d3d11_buf, i);
resources[i] = ensure_cuda_d3d11_graphics_resource (self, mem);
if (!resources[i]
|| !gst_d3d11_memory_get_texture_desc (GST_D3D11_MEMORY_CAST (mem),
&desc[i]))
goto out;
}
for (i = 0; i < GST_VIDEO_FRAME_N_PLANES (&d3d11_frame); i++) {
CUgraphicsResource cuda_resource;
CUarray d3d11_array;
gboolean copy_ret;
if (d3d11_to_cuda) {
cuda_resource =
gst_cuda_graphics_resource_map (resources[i], cuda_stream,
CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY);
} else {
cuda_resource =
gst_cuda_graphics_resource_map (resources[i], cuda_stream,
CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD);
}
if (!cuda_resource) {
GST_ERROR_OBJECT (self, "Failed to map graphics resource %d", i);
goto out;
}
if (!gst_cuda_result (CuGraphicsSubResourceGetMappedArray (&d3d11_array,
cuda_resource, 0, 0))) {
gst_cuda_graphics_resource_unmap (resources[i], cuda_stream);
GST_ERROR_OBJECT (self, "Failed to get mapped array");
goto out;
}
if (d3d11_to_cuda) {
copy_params[i].srcMemoryType = CU_MEMORYTYPE_ARRAY;
copy_params[i].srcArray = d3d11_array;
copy_params[i].srcPitch =
desc[i].Width * GST_VIDEO_FRAME_COMP_PSTRIDE (&d3d11_frame, i);
} else {
copy_params[i].dstMemoryType = CU_MEMORYTYPE_ARRAY;
copy_params[i].dstArray = d3d11_array;
copy_params[i].dstPitch =
desc[i].Width * GST_VIDEO_FRAME_COMP_PSTRIDE (&d3d11_frame, i);
}
copy_ret = gst_cuda_result (CuMemcpy2DAsync (&copy_params[i], cuda_stream));
gst_cuda_graphics_resource_unmap (resources[i], cuda_stream);
if (!copy_ret) {
GST_ERROR_OBJECT (self, "Failed to copy plane %d", i);
goto out;
}
}
ret = TRUE;
out:
gst_cuda_result (CuStreamSynchronize (cuda_stream));
gst_video_frame_unmap (&d3d11_frame);
gst_cuda_memory_copy_unmap (self, cuda_buf, &cuda_frame, &cuda_map_info);
return ret;
}
#endif
static const gchar *
mem_type_to_string (GstCudaMemoryCopyMemType type)
{
switch (type) {
case GST_CUDA_MEMORY_COPY_MEM_SYSTEM:
return "SYSTEM";
case GST_CUDA_MEMORY_COPY_MEM_CUDA:
return "CUDA";
case GST_CUDA_MEMORY_COPY_MEM_NVMM:
return "NVMM";
case GST_CUDA_MEMORY_COPY_MEM_GL:
return "GL";
case GST_CUDA_MEMORY_COPY_MEM_D3D11:
return "D3D11";
default:
g_assert_not_reached ();
break;
}
return "UNKNOWN";
}
static GstFlowReturn
gst_cuda_memory_copy_transform (GstBaseTransform * trans, GstBuffer * inbuf,
GstBuffer * outbuf)
@ -1485,8 +784,8 @@ gst_cuda_memory_copy_transform (GstBaseTransform * trans, GstBuffer * inbuf,
GstMemory *out_mem;
GstVideoInfo *in_info, *out_info;
gboolean ret = FALSE;
GstCudaMemoryCopyMemType in_type = GST_CUDA_MEMORY_COPY_MEM_SYSTEM;
GstCudaMemoryCopyMemType out_type = GST_CUDA_MEMORY_COPY_MEM_SYSTEM;
GstCudaBufferCopyType in_type = GST_CUDA_BUFFER_COPY_SYSTEM;
GstCudaBufferCopyType out_type = GST_CUDA_BUFFER_COPY_SYSTEM;
gboolean use_device_copy = FALSE;
#ifdef HAVE_NVCODEC_GST_D3D11
D3D11_TEXTURE2D_DESC desc;
@ -1508,174 +807,124 @@ gst_cuda_memory_copy_transform (GstBaseTransform * trans, GstBuffer * inbuf,
}
if (self->in_nvmm) {
in_type = GST_CUDA_MEMORY_COPY_MEM_NVMM;
in_type = GST_CUDA_BUFFER_COPY_NVMM;
use_device_copy = TRUE;
} else if (gst_is_cuda_memory (in_mem)) {
in_type = GST_CUDA_MEMORY_COPY_MEM_CUDA;
in_type = GST_CUDA_BUFFER_COPY_CUDA;
use_device_copy = TRUE;
#ifdef HAVE_NVCODEC_GST_GL
} else if (self->gl_context && gst_is_gl_memory_pbo (in_mem)) {
in_type = GST_CUDA_MEMORY_COPY_MEM_GL;
in_type = GST_CUDA_BUFFER_COPY_GL;
#endif
#ifdef HAVE_NVCODEC_GST_D3D11
} else if (self->d3d11_device && gst_is_d3d11_memory (in_mem)
&& gst_d3d11_memory_get_texture_desc (GST_D3D11_MEMORY_CAST (in_mem),
&desc) && desc.Usage == D3D11_USAGE_DEFAULT) {
in_type = GST_CUDA_MEMORY_COPY_MEM_D3D11;
in_type = GST_CUDA_BUFFER_COPY_D3D11;
#endif
} else {
in_type = GST_CUDA_MEMORY_COPY_MEM_SYSTEM;
in_type = GST_CUDA_BUFFER_COPY_SYSTEM;
}
if (self->out_nvmm) {
out_type = GST_CUDA_MEMORY_COPY_MEM_NVMM;
out_type = GST_CUDA_BUFFER_COPY_NVMM;
use_device_copy = TRUE;
} else if (gst_is_cuda_memory (out_mem)) {
out_type = GST_CUDA_MEMORY_COPY_MEM_CUDA;
out_type = GST_CUDA_BUFFER_COPY_CUDA;
use_device_copy = TRUE;
#ifdef HAVE_NVCODEC_GST_GL
} else if (self->gl_context && gst_is_gl_memory_pbo (out_mem)) {
out_type = GST_CUDA_MEMORY_COPY_MEM_GL;
out_type = GST_CUDA_BUFFER_COPY_GL;
#endif
#ifdef HAVE_NVCODEC_GST_D3D11
} else if (self->d3d11_device && gst_is_d3d11_memory (out_mem)
&& gst_d3d11_memory_get_texture_desc (GST_D3D11_MEMORY_CAST (out_mem),
&desc) && desc.Usage == D3D11_USAGE_DEFAULT) {
out_type = GST_CUDA_MEMORY_COPY_MEM_D3D11;
out_type = GST_CUDA_BUFFER_COPY_D3D11;
#endif
} else {
out_type = GST_CUDA_MEMORY_COPY_MEM_SYSTEM;
out_type = GST_CUDA_BUFFER_COPY_SYSTEM;
}
if (!use_device_copy) {
GST_TRACE_OBJECT (self, "Both in/out buffers are not CUDA");
if (!gst_cuda_memory_copy_transform_sysmem (self, inbuf, in_info,
outbuf, out_info)) {
if (!gst_cuda_buffer_copy (outbuf, GST_CUDA_BUFFER_COPY_SYSTEM, out_info,
inbuf, GST_CUDA_BUFFER_COPY_SYSTEM, in_info, ctrans->context,
ctrans->cuda_stream)) {
return GST_FLOW_ERROR;
}
return GST_FLOW_OK;
}
#ifdef HAVE_NVCODEC_GST_GL
if (in_type == GST_CUDA_MEMORY_COPY_MEM_GL) {
GstGLMemory *gl_mem = (GstGLMemory *) in_mem;
GstGLContext *context = gl_mem->mem.context;
GST_TRACE_OBJECT (self, "GL -> %s", mem_type_to_string (out_type));
ret = gst_cuda_buffer_copy (outbuf, out_type, out_info, inbuf, in_type,
in_info, ctrans->context, ctrans->cuda_stream);
ret = gst_cuda_memory_copy_gl_interop (self, inbuf, in_info,
outbuf, out_info, context, TRUE, out_type);
/* system memory <-> CUDA copy fallback if possible */
if (!ret) {
GstCudaBufferCopyType fallback_in_type = in_type;
GstCudaBufferCopyType fallback_out_type = out_type;
if (!ret) {
GST_LOG_OBJECT (self, "GL interop failed, try normal CUDA copy");
GST_LOG_OBJECT (self,
"Copy %s -> %s failed, checking whether fallback is possible",
gst_cuda_buffery_copy_type_to_string (in_type),
gst_cuda_buffery_copy_type_to_string (out_type));
/* We cannot use software fallback for NVMM */
if (out_type == GST_CUDA_MEMORY_COPY_MEM_NVMM) {
ret = gst_cuda_memory_copy_transform_cuda (self, inbuf, in_info,
GST_CUDA_MEMORY_COPY_MEM_SYSTEM, outbuf, out_info, out_type);
} else {
ret = !gst_cuda_memory_copy_transform_sysmem (self, inbuf, in_info,
outbuf, out_info);
}
switch (in_type) {
case GST_CUDA_BUFFER_COPY_GL:
case GST_CUDA_BUFFER_COPY_D3D11:
fallback_in_type = GST_CUDA_BUFFER_COPY_SYSTEM;
break;
default:
break;
}
if (!ret)
return GST_FLOW_ERROR;
return GST_FLOW_OK;
}
if (out_type == GST_CUDA_MEMORY_COPY_MEM_GL) {
GstGLMemory *gl_mem = (GstGLMemory *) out_mem;
GstGLContext *context = gl_mem->mem.context;
GST_TRACE_OBJECT (self, "%s -> GL", mem_type_to_string (in_type));
ret = gst_cuda_memory_copy_gl_interop (self, inbuf, in_info,
outbuf, out_info, context, FALSE, in_type);
if (!ret) {
GST_LOG_OBJECT (self, "GL interop failed, try normal CUDA copy");
/* We cannot use software fallback for NVMM */
if (in_type == GST_CUDA_MEMORY_COPY_MEM_NVMM) {
ret = gst_cuda_memory_copy_transform_cuda (self, inbuf, in_info,
in_type, outbuf, out_info, GST_CUDA_MEMORY_COPY_MEM_SYSTEM);
} else {
ret = !gst_cuda_memory_copy_transform_sysmem (self, inbuf, in_info,
outbuf, out_info);
}
switch (out_type) {
case GST_CUDA_BUFFER_COPY_GL:
case GST_CUDA_BUFFER_COPY_D3D11:
fallback_out_type = GST_CUDA_BUFFER_COPY_SYSTEM;
break;
default:
break;
}
if (!ret)
if (in_type == fallback_in_type && out_type == fallback_out_type) {
GST_ERROR_OBJECT (self, "Failed to copy %s -> %s",
gst_cuda_buffery_copy_type_to_string (in_type),
gst_cuda_buffery_copy_type_to_string (out_type));
return GST_FLOW_ERROR;
return GST_FLOW_OK;
}
#endif /* HAVE_NVCODEC_GST_GL */
#ifdef HAVE_NVCODEC_GST_D3D11
if (in_type == GST_CUDA_MEMORY_COPY_MEM_D3D11) {
GstD3D11Memory *dmem = (GstD3D11Memory *) in_mem;
GstD3D11Device *device = dmem->device;
GST_TRACE_OBJECT (self, "D3D11 -> %s", mem_type_to_string (out_type));
gst_d3d11_device_lock (device);
ret = gst_cuda_memory_copy_d3d11_interop (self, inbuf, in_info,
outbuf, out_info, device, TRUE, out_type);
gst_d3d11_device_unlock (device);
if (!ret) {
GST_LOG_OBJECT (self, "D3D11 interop failed, try normal CUDA copy");
ret = !gst_cuda_memory_copy_transform_sysmem (self, inbuf, in_info,
outbuf, out_info);
}
if (!ret)
return GST_FLOW_ERROR;
GST_LOG_OBJECT (self, "Trying %s -> %s fallback",
gst_cuda_buffery_copy_type_to_string (fallback_in_type),
gst_cuda_buffery_copy_type_to_string (fallback_out_type));
return GST_FLOW_OK;
}
if (out_type == GST_CUDA_MEMORY_COPY_MEM_D3D11) {
GstD3D11Memory *dmem = (GstD3D11Memory *) out_mem;
GstD3D11Device *device = dmem->device;
GST_TRACE_OBJECT (self, "%s -> D3D11", mem_type_to_string (in_type));
gst_d3d11_device_lock (device);
ret = gst_cuda_memory_copy_d3d11_interop (self, inbuf, in_info,
outbuf, out_info, device, FALSE, in_type);
gst_d3d11_device_unlock (device);
if (!ret) {
GST_LOG_OBJECT (self, "D3D11 interop failed, try normal CUDA copy");
ret = !gst_cuda_memory_copy_transform_sysmem (self, inbuf, in_info,
outbuf, out_info);
}
if (!ret)
return GST_FLOW_ERROR;
return GST_FLOW_OK;
}
#endif /* HAVE_NVCODEC_GST_D3D11 */
GST_TRACE_OBJECT (self, "%s -> %s",
mem_type_to_string (in_type), mem_type_to_string (out_type));
ret = gst_cuda_memory_copy_transform_cuda (self, inbuf, in_info, in_type,
outbuf, out_info, out_type);
if (!ret && !self->in_nvmm && !self->out_nvmm) {
GST_LOG_OBJECT (self, "Failed to copy using fast path, trying fallback");
ret =
gst_cuda_memory_copy_transform_sysmem (self, inbuf, in_info, outbuf,
out_info);
ret = gst_cuda_buffer_copy (outbuf, fallback_out_type, out_info, inbuf,
fallback_in_type, in_info, ctrans->context, ctrans->cuda_stream);
}
if (ret)
return GST_FLOW_OK;
if (in_type == GST_CUDA_BUFFER_COPY_NVMM ||
out_type == GST_CUDA_BUFFER_COPY_NVMM) {
GST_ERROR_OBJECT (self, "Failed to copy NVMM memory");
return GST_FLOW_ERROR;
}
/* final fallback using system memory */
ret = gst_cuda_buffer_copy (outbuf, GST_CUDA_BUFFER_COPY_SYSTEM, out_info,
inbuf, GST_CUDA_BUFFER_COPY_SYSTEM, in_info, ctrans->context,
ctrans->cuda_stream);
if (ret)
return GST_FLOW_OK;
GST_ERROR_OBJECT (self, "Failed to copy %s -> %s",
gst_cuda_buffery_copy_type_to_string (in_type),
gst_cuda_buffery_copy_type_to_string (out_type));
return GST_FLOW_ERROR;
}

View file

@ -33,6 +33,12 @@
#include <gst/d3d11/gstd3d11.h>
#endif
#ifdef HAVE_NVCODEC_NVMM
#include "gstcudanvmm.h"
#endif
#include "gstcudamemory.h"
GST_DEBUG_CATEGORY_STATIC (gst_cuda_utils_debug);
#define GST_CAT_DEFAULT gst_cuda_utils_debug
GST_DEBUG_CATEGORY_STATIC (GST_CAT_CONTEXT);
@ -658,3 +664,916 @@ gst_cuda_graphics_resource_free (GstCudaGraphicsResource * resource)
gst_object_unref (resource->graphics_context);
g_free (resource);
}
const gchar *
gst_cuda_buffery_copy_type_to_string (GstCudaBufferCopyType type)
{
switch (type) {
case GST_CUDA_BUFFER_COPY_SYSTEM:
return "SYSTEM";
case GST_CUDA_BUFFER_COPY_CUDA:
return "CUDA";
case GST_CUDA_BUFFER_COPY_GL:
return "GL";
case GST_CUDA_BUFFER_COPY_D3D11:
return "D3D11";
case GST_CUDA_BUFFER_COPY_NVMM:
return "NVMM";
default:
g_assert_not_reached ();
break;
}
return "UNKNOWN";
}
static gboolean
gst_cuda_buffer_fallback_copy (GstBuffer * dst, const GstVideoInfo * dst_info,
GstBuffer * src, const GstVideoInfo * src_info)
{
GstVideoFrame dst_frame, src_frame;
guint i, j;
if (!gst_video_frame_map (&dst_frame, dst_info, dst, GST_MAP_WRITE)) {
GST_ERROR ("Failed to map dst buffer");
return FALSE;
}
if (!gst_video_frame_map (&src_frame, src_info, src, GST_MAP_READ)) {
gst_video_frame_unmap (&dst_frame);
GST_ERROR ("Failed to map src buffer");
return FALSE;
}
/* src and dst resolutions can be different, pick min value */
for (i = 0; GST_VIDEO_FRAME_N_PLANES (&dst_frame); i++) {
guint dst_width_in_bytes, src_width_in_bytes;
guint dst_height, src_height;
guint width_in_bytes, height;
guint dst_stride, src_stride;
guint8 *dst_data, *src_data;
dst_width_in_bytes = GST_VIDEO_FRAME_COMP_WIDTH (&dst_frame, i) *
GST_VIDEO_FRAME_COMP_PSTRIDE (&dst_frame, i);
src_width_in_bytes = GST_VIDEO_FRAME_COMP_WIDTH (&src_frame, i) *
GST_VIDEO_FRAME_COMP_PSTRIDE (&src_frame, i);
width_in_bytes = MIN (dst_width_in_bytes, src_width_in_bytes);
dst_height = GST_VIDEO_FRAME_COMP_HEIGHT (&dst_frame, i);
src_height = GST_VIDEO_FRAME_COMP_HEIGHT (&src_frame, i);
height = MIN (dst_height, src_height);
dst_stride = GST_VIDEO_FRAME_PLANE_STRIDE (&dst_frame, i);
src_stride = GST_VIDEO_FRAME_PLANE_STRIDE (&src_frame, i);
dst_data = GST_VIDEO_FRAME_PLANE_DATA (&dst_frame, i);
src_data = GST_VIDEO_FRAME_PLANE_DATA (&src_frame, i);
for (j = 0; j < height; j++) {
memcpy (dst_data, src_data, width_in_bytes);
dst_data += dst_stride;
src_data += src_stride;
}
}
gst_video_frame_unmap (&src_frame);
gst_video_frame_unmap (&dst_frame);
return TRUE;
}
static gboolean
map_buffer_and_fill_copy2d (GstBuffer * buf, const GstVideoInfo * info,
GstCudaBufferCopyType copy_type, GstVideoFrame * frame,
GstMapInfo * map_info, gboolean is_src,
CUDA_MEMCPY2D copy_params[GST_VIDEO_MAX_PLANES])
{
gboolean buffer_mapped = FALSE;
guint i;
#ifdef HAVE_NVCODEC_NVMM
if (copy_type == GST_CUDA_BUFFER_COPY_NVMM) {
NvBufSurface *surface;
NvBufSurfaceParams *surface_params;
NvBufSurfacePlaneParams *plane_params;
if (!gst_buffer_map (buf, map_info, GST_MAP_READ)) {
GST_ERROR ("Failed to map input NVMM buffer");
memset (map_info, 0, sizeof (GstMapInfo));
return FALSE;
}
surface = (NvBufSurface *) map_info->data;
GST_TRACE ("batch-size %d, num-filled %d, memType %d",
surface->batchSize, surface->numFilled, surface->memType);
surface_params = surface->surfaceList;
buffer_mapped = TRUE;
if (!surface_params) {
GST_ERROR ("NVMM memory doesn't hold buffer");
goto error;
}
plane_params = &surface_params->planeParams;
if (plane_params->num_planes != GST_VIDEO_INFO_N_PLANES (info)) {
GST_ERROR ("num_planes mismatch, %d / %d",
plane_params->num_planes, GST_VIDEO_INFO_N_PLANES (info));
goto error;
}
switch (surface->memType) {
/* TODO: NVBUF_MEM_DEFAULT on jetson is SURFACE_ARRAY */
case NVBUF_MEM_DEFAULT:
case NVBUF_MEM_CUDA_DEVICE:
{
for (i = 0; i < plane_params->num_planes; i++) {
if (is_src) {
copy_params[i].srcMemoryType = CU_MEMORYTYPE_DEVICE;
copy_params[i].srcDevice = (CUdeviceptr)
((guint8 *) surface_params->dataPtr + plane_params->offset[i]);
copy_params[i].srcPitch = plane_params->pitch[i];
} else {
copy_params[i].dstMemoryType = CU_MEMORYTYPE_DEVICE;
copy_params[i].dstDevice = (CUdeviceptr)
((guint8 *) surface_params->dataPtr + plane_params->offset[i]);
copy_params[i].dstPitch = plane_params->pitch[i];
}
}
break;
}
case NVBUF_MEM_CUDA_PINNED:
{
for (i = 0; i < plane_params->num_planes; i++) {
if (is_src) {
copy_params[i].srcMemoryType = CU_MEMORYTYPE_HOST;
copy_params[i].srcHost =
((guint8 *) surface_params->dataPtr + plane_params->offset[i]);
copy_params[i].srcPitch = plane_params->pitch[i];
} else {
copy_params[i].dstMemoryType = CU_MEMORYTYPE_HOST;
copy_params[i].dstHost =
((guint8 *) surface_params->dataPtr + plane_params->offset[i]);
copy_params[i].dstPitch = plane_params->pitch[i];
}
}
break;
}
case NVBUF_MEM_CUDA_UNIFIED:
{
for (i = 0; i < plane_params->num_planes; i++) {
if (is_src) {
copy_params[i].srcMemoryType = CU_MEMORYTYPE_UNIFIED;
copy_params[i].srcDevice = (CUdeviceptr)
((guint8 *) surface_params->dataPtr + plane_params->offset[i]);
copy_params[i].srcPitch = plane_params->pitch[i];
} else {
copy_params[i].dstMemoryType = CU_MEMORYTYPE_UNIFIED;
copy_params[i].dstDevice = (CUdeviceptr)
((guint8 *) surface_params->dataPtr + plane_params->offset[i]);
copy_params[i].dstPitch = plane_params->pitch[i];
}
}
break;
}
default:
GST_ERROR ("Unexpected NVMM memory type %d", surface->memType);
goto error;
}
for (i = 0; i < plane_params->num_planes; i++) {
gsize width_in_bytes, height;
width_in_bytes = plane_params->width[i] * plane_params->bytesPerPix[i];
height = plane_params->height[i];
if (copy_params[i].WidthInBytes == 0 ||
width_in_bytes < copy_params[i].WidthInBytes) {
copy_params[i].WidthInBytes = width_in_bytes;
}
if (copy_params[i].Height == 0 || height < copy_params[i].Height) {
copy_params[i].Height = height;
}
}
} else
#endif
{
GstMapFlags map_flags;
if (is_src)
map_flags = GST_MAP_READ;
else
map_flags = GST_MAP_WRITE;
if (copy_type == GST_CUDA_BUFFER_COPY_CUDA)
map_flags |= GST_MAP_CUDA;
if (!gst_video_frame_map (frame, info, buf, map_flags)) {
GST_ERROR ("Failed to map buffer");
goto error;
}
for (i = 0; i < GST_VIDEO_FRAME_N_PLANES (frame); i++) {
gsize width_in_bytes, height;
if (is_src) {
if (copy_type == GST_CUDA_BUFFER_COPY_CUDA) {
copy_params[i].srcMemoryType = CU_MEMORYTYPE_DEVICE;
copy_params[i].srcDevice =
(CUdeviceptr) GST_VIDEO_FRAME_PLANE_DATA (frame, i);
} else {
copy_params[i].srcMemoryType = CU_MEMORYTYPE_HOST;
copy_params[i].srcHost = GST_VIDEO_FRAME_PLANE_DATA (frame, i);
}
copy_params[i].srcPitch = GST_VIDEO_FRAME_PLANE_STRIDE (frame, i);
} else {
if (copy_type == GST_CUDA_BUFFER_COPY_CUDA) {
copy_params[i].dstMemoryType = CU_MEMORYTYPE_DEVICE;
copy_params[i].dstDevice =
(CUdeviceptr) GST_VIDEO_FRAME_PLANE_DATA (frame, i);
} else {
copy_params[i].dstMemoryType = CU_MEMORYTYPE_HOST;
copy_params[i].dstHost = GST_VIDEO_FRAME_PLANE_DATA (frame, i);
}
copy_params[i].dstPitch = GST_VIDEO_FRAME_PLANE_STRIDE (frame, i);
}
width_in_bytes = GST_VIDEO_FRAME_COMP_WIDTH (frame, i) *
GST_VIDEO_FRAME_COMP_PSTRIDE (frame, i);
height = GST_VIDEO_FRAME_COMP_HEIGHT (frame, i);
if (copy_params[i].WidthInBytes == 0 ||
width_in_bytes < copy_params[i].WidthInBytes) {
copy_params[i].WidthInBytes = width_in_bytes;
}
if (copy_params[i].Height == 0 || height < copy_params[i].Height) {
copy_params[i].Height = height;
}
}
}
return TRUE;
error:
if (buffer_mapped) {
gst_buffer_unmap (buf, map_info);
memset (map_info, 0, sizeof (GstMapInfo));
}
return FALSE;
}
static void
unmap_buffer_or_frame (GstBuffer * buf, GstVideoFrame * frame,
GstMapInfo * map_info)
{
if (frame->buffer)
gst_video_frame_unmap (frame);
if (map_info->data)
gst_buffer_unmap (buf, map_info);
}
static gboolean
gst_cuda_buffer_copy_internal (GstBuffer * dst_buf,
GstCudaBufferCopyType dst_type, const GstVideoInfo * dst_info,
GstBuffer * src_buf, GstCudaBufferCopyType src_type,
const GstVideoInfo * src_info, GstCudaContext * context, CUstream stream)
{
GstVideoFrame dst_frame, src_frame;
gboolean ret = FALSE;
GstMapInfo dst_map, src_map;
guint i;
CUDA_MEMCPY2D copy_params[GST_VIDEO_MAX_PLANES];
memset (copy_params, 0, sizeof (copy_params));
memset (&dst_frame, 0, sizeof (GstVideoFrame));
memset (&src_frame, 0, sizeof (GstVideoFrame));
memset (&dst_map, 0, sizeof (GstMapInfo));
memset (&src_map, 0, sizeof (GstMapInfo));
if (!map_buffer_and_fill_copy2d (dst_buf, dst_info,
dst_type, &dst_frame, &dst_map, FALSE, copy_params)) {
GST_ERROR_OBJECT (context, "Failed to map output buffer");
return FALSE;
}
if (!map_buffer_and_fill_copy2d (src_buf, src_info,
src_type, &src_frame, &src_map, TRUE, copy_params)) {
GST_ERROR_OBJECT (context, "Failed to map input buffer");
unmap_buffer_or_frame (dst_buf, &dst_frame, &dst_map);
return FALSE;
}
if (!gst_cuda_context_push (context)) {
GST_ERROR_OBJECT (context, "Failed to push our context");
goto unmap_and_out;
}
for (i = 0; i < GST_VIDEO_INFO_N_PLANES (dst_info); i++) {
ret = gst_cuda_result (CuMemcpy2DAsync (&copy_params[i], stream));
if (!ret) {
GST_ERROR_OBJECT (context, "Failed to copy plane %d", i);
break;
}
}
gst_cuda_result (CuStreamSynchronize (stream));
gst_cuda_context_pop (NULL);
unmap_and_out:
unmap_buffer_or_frame (dst_buf, &src_frame, &src_map);
unmap_buffer_or_frame (src_buf, &dst_frame, &dst_map);
return ret;
}
#ifdef HAVE_NVCODEC_GST_GL
static gboolean
ensure_gl_interop (void)
{
guint device_count = 0;
CUdevice device_list[1] = { 0, };
CUresult cuda_ret;
cuda_ret = CuGLGetDevices (&device_count,
device_list, 1, CU_GL_DEVICE_LIST_ALL);
if (cuda_ret != CUDA_SUCCESS || device_count == 0)
return FALSE;
return TRUE;
}
typedef struct _GLCopyData
{
GstBuffer *src_buf;
const GstVideoInfo *src_info;
GstBuffer *dst_buf;
const GstVideoInfo *dst_info;
gboolean pbo_to_cuda;
GstCudaBufferCopyType copy_type;
GstCudaContext *context;
CUstream stream;
gboolean ret;
} GLCopyData;
static GstCudaGraphicsResource *
ensure_cuda_gl_graphics_resource (GstCudaContext * context, GstMemory * mem)
{
GQuark quark;
GstCudaGraphicsResource *ret = NULL;
if (!gst_is_gl_memory_pbo (mem)) {
GST_WARNING_OBJECT (context, "memory is not GL PBO memory, %s",
mem->allocator->mem_type);
return NULL;
}
quark = gst_cuda_quark_from_id (GST_CUDA_QUARK_GRAPHICS_RESOURCE);
ret = (GstCudaGraphicsResource *)
gst_mini_object_get_qdata (GST_MINI_OBJECT (mem), quark);
if (!ret) {
GstGLMemoryPBO *pbo;
GstGLBuffer *buf;
GstMapInfo info;
ret = gst_cuda_graphics_resource_new (context,
GST_OBJECT (GST_GL_BASE_MEMORY_CAST (mem)->context),
GST_CUDA_GRAPHICS_RESOURCE_GL_BUFFER);
if (!gst_memory_map (mem, &info, (GstMapFlags) (GST_MAP_READ | GST_MAP_GL))) {
GST_ERROR_OBJECT (context, "Failed to map gl memory");
gst_cuda_graphics_resource_free (ret);
return NULL;
}
pbo = (GstGLMemoryPBO *) mem;
buf = pbo->pbo;
if (!gst_cuda_graphics_resource_register_gl_buffer (ret,
buf->id, CU_GRAPHICS_REGISTER_FLAGS_NONE)) {
GST_ERROR_OBJECT (context, "Failed to register gl buffer");
gst_memory_unmap (mem, &info);
gst_cuda_graphics_resource_free (ret);
return NULL;
}
gst_memory_unmap (mem, &info);
gst_mini_object_set_qdata (GST_MINI_OBJECT (mem), quark, ret,
(GDestroyNotify) gst_cuda_graphics_resource_free);
}
return ret;
}
static void
gl_copy_thread_func (GstGLContext * gl_context, GLCopyData * data)
{
GstCudaGraphicsResource *resources[GST_VIDEO_MAX_PLANES];
guint num_resources;
GstBuffer *gl_buf, *cuda_buf;
GstVideoFrame cuda_frame;
GstMapInfo cuda_map_info;
CUDA_MEMCPY2D copy_params[GST_VIDEO_MAX_PLANES];
guint i;
GstCudaContext *context = data->context;
CUstream stream = data->stream;
memset (copy_params, 0, sizeof (copy_params));
memset (&cuda_frame, 0, sizeof (GstVideoFrame));
memset (&cuda_map_info, 0, sizeof (GstMapInfo));
data->ret = FALSE;
/* Incompatible gl context */
if (!ensure_gl_interop ())
return;
if (data->pbo_to_cuda) {
gl_buf = data->src_buf;
cuda_buf = data->dst_buf;
if (!map_buffer_and_fill_copy2d (cuda_buf,
data->dst_info, data->copy_type, &cuda_frame, &cuda_map_info,
FALSE, copy_params)) {
GST_ERROR_OBJECT (context, "Failed to map output CUDA buffer");
return;
}
} else {
gl_buf = data->dst_buf;
cuda_buf = data->src_buf;
if (!map_buffer_and_fill_copy2d (cuda_buf,
data->src_info, data->copy_type, &cuda_frame, &cuda_map_info,
TRUE, copy_params)) {
GST_ERROR_OBJECT (context, "Failed to map input CUDA buffer");
return;
}
}
num_resources = gst_buffer_n_memory (gl_buf);
g_assert (num_resources >= GST_VIDEO_INFO_N_PLANES (data->src_info));
if (!gst_cuda_context_push (context)) {
GST_ERROR_OBJECT (context, "Failed to push context");
unmap_buffer_or_frame (cuda_buf, &cuda_frame, &cuda_map_info);
return;
}
for (i = 0; i < GST_VIDEO_INFO_N_PLANES (data->src_info); i++) {
GstMemory *mem = gst_buffer_peek_memory (gl_buf, i);
GstGLMemoryPBO *pbo;
resources[i] = ensure_cuda_gl_graphics_resource (context, mem);
if (!resources[i])
goto out;
pbo = (GstGLMemoryPBO *) mem;
if (!data->pbo_to_cuda) {
/* Need PBO -> texture */
GST_MINI_OBJECT_FLAG_SET (mem, GST_GL_BASE_MEMORY_TRANSFER_NEED_UPLOAD);
/* PBO -> sysmem */
GST_MINI_OBJECT_FLAG_SET (pbo->pbo,
GST_GL_BASE_MEMORY_TRANSFER_NEED_DOWNLOAD);
} else {
/* get the texture into the PBO */
gst_gl_memory_pbo_upload_transfer (pbo);
gst_gl_memory_pbo_download_transfer (pbo);
}
}
for (i = 0; i < GST_VIDEO_INFO_N_PLANES (data->src_info); i++) {
CUgraphicsResource cuda_resource;
CUdeviceptr dev_ptr;
size_t size;
gboolean copy_ret;
gsize width_in_bytes, height;
if (data->pbo_to_cuda) {
cuda_resource =
gst_cuda_graphics_resource_map (resources[i], stream,
CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY);
} else {
cuda_resource =
gst_cuda_graphics_resource_map (resources[i], stream,
CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD);
}
if (!cuda_resource) {
GST_ERROR_OBJECT (context, "Failed to map graphics resource %d", i);
goto out;
}
if (!gst_cuda_result (CuGraphicsResourceGetMappedPointer (&dev_ptr, &size,
cuda_resource))) {
gst_cuda_graphics_resource_unmap (resources[i], stream);
GST_ERROR_OBJECT (context, "Failed to get mapped pointer");
goto out;
}
if (data->pbo_to_cuda) {
copy_params[i].srcMemoryType = CU_MEMORYTYPE_DEVICE;
copy_params[i].srcDevice = dev_ptr;
copy_params[i].srcPitch = GST_VIDEO_INFO_PLANE_STRIDE (data->src_info, i);
width_in_bytes = GST_VIDEO_INFO_COMP_WIDTH (data->src_info, i) *
GST_VIDEO_INFO_COMP_PSTRIDE (data->src_info, i);
height = GST_VIDEO_INFO_COMP_HEIGHT (data->src_info, i);
} else {
copy_params[i].dstMemoryType = CU_MEMORYTYPE_DEVICE;
copy_params[i].dstDevice = dev_ptr;
copy_params[i].dstPitch = GST_VIDEO_INFO_PLANE_STRIDE (data->dst_info, i);
width_in_bytes = GST_VIDEO_INFO_COMP_WIDTH (data->dst_info, i) *
GST_VIDEO_INFO_COMP_PSTRIDE (data->dst_info, i);
height = GST_VIDEO_INFO_COMP_HEIGHT (data->dst_info, i);
}
if (width_in_bytes < copy_params[i].WidthInBytes)
copy_params[i].WidthInBytes = width_in_bytes;
if (height < copy_params[i].Height)
copy_params[i].Height = height;
copy_ret = gst_cuda_result (CuMemcpy2DAsync (&copy_params[i], stream));
gst_cuda_graphics_resource_unmap (resources[i], stream);
if (!copy_ret) {
GST_ERROR_OBJECT (context, "Failed to copy plane %d", i);
goto out;
}
}
data->ret = TRUE;
out:
gst_cuda_result (CuStreamSynchronize (stream));
gst_cuda_context_pop (NULL);
unmap_buffer_or_frame (cuda_buf, &cuda_frame, &cuda_map_info);
}
static gboolean
cuda_copy_gl_interop (GstBuffer * dst_buf, const GstVideoInfo * dst_info,
GstBuffer * src_buf, const GstVideoInfo * src_info,
GstGLContext * gl_context, GstCudaContext * context, CUstream stream,
gboolean pbo_to_cuda, GstCudaBufferCopyType copy_type)
{
GLCopyData data;
g_assert (copy_type == GST_CUDA_BUFFER_COPY_CUDA ||
copy_type == GST_CUDA_BUFFER_COPY_NVMM);
data.src_buf = src_buf;
data.src_info = src_info;
data.dst_buf = dst_buf;
data.dst_info = dst_info;
data.pbo_to_cuda = pbo_to_cuda;
data.copy_type = copy_type;
data.context = context;
data.stream = stream;
data.ret = FALSE;
gst_gl_context_thread_add (gl_context,
(GstGLContextThreadFunc) gl_copy_thread_func, &data);
return data.ret;
}
#endif
#ifdef HAVE_NVCODEC_GST_D3D11
static gboolean
ensure_d3d11_interop (GstCudaContext * context, GstD3D11Device * device)
{
guint device_count = 0;
guint cuda_device_id;
CUdevice device_list[1] = { 0, };
CUresult cuda_ret;
g_object_get (context, "cuda-device-id", &cuda_device_id, NULL);
cuda_ret = CuD3D11GetDevices (&device_count,
device_list, 1, gst_d3d11_device_get_device_handle (device),
CU_D3D11_DEVICE_LIST_ALL);
if (cuda_ret != CUDA_SUCCESS || device_count == 0)
return FALSE;
if (device_list[0] != (CUdevice) cuda_device_id)
return FALSE;
return TRUE;
}
static GstCudaGraphicsResource *
ensure_cuda_d3d11_graphics_resource (GstCudaContext * context, GstMemory * mem)
{
GQuark quark;
GstCudaGraphicsResource *ret = NULL;
if (!gst_is_d3d11_memory (mem)) {
GST_WARNING_OBJECT (context, "memory is not D3D11 memory, %s",
mem->allocator->mem_type);
return NULL;
}
quark = gst_cuda_quark_from_id (GST_CUDA_QUARK_GRAPHICS_RESOURCE);
ret = (GstCudaGraphicsResource *)
gst_mini_object_get_qdata (GST_MINI_OBJECT (mem), quark);
if (!ret) {
ret = gst_cuda_graphics_resource_new (context,
GST_OBJECT (GST_D3D11_MEMORY_CAST (mem)->device),
GST_CUDA_GRAPHICS_RESOURCE_D3D11_RESOURCE);
if (!gst_cuda_graphics_resource_register_d3d11_resource (ret,
gst_d3d11_memory_get_resource_handle (GST_D3D11_MEMORY_CAST (mem)),
CU_GRAPHICS_REGISTER_FLAGS_SURFACE_LOAD_STORE)) {
GST_ERROR_OBJECT (context, "failed to register d3d11 resource");
gst_cuda_graphics_resource_free (ret);
return NULL;
}
gst_mini_object_set_qdata (GST_MINI_OBJECT (mem), quark, ret,
(GDestroyNotify) gst_cuda_graphics_resource_free);
}
return ret;
}
static gboolean
cuda_copy_d3d11_interop (GstBuffer * dst_buf, const GstVideoInfo * dst_info,
GstBuffer * src_buf, const GstVideoInfo * src_info, GstD3D11Device * device,
GstCudaContext * context, CUstream stream, gboolean d3d11_to_cuda)
{
GstCudaGraphicsResource *resources[GST_VIDEO_MAX_PLANES];
D3D11_TEXTURE2D_DESC desc[GST_VIDEO_MAX_PLANES];
guint num_resources;
GstBuffer *d3d11_buf, *cuda_buf;
GstVideoFrame d3d11_frame, cuda_frame;
GstMapInfo cuda_map_info;
CUDA_MEMCPY2D copy_params[GST_VIDEO_MAX_PLANES];
guint i;
gboolean ret = FALSE;
memset (copy_params, 0, sizeof (copy_params));
memset (&cuda_frame, 0, sizeof (GstVideoFrame));
memset (&cuda_map_info, 0, sizeof (GstMapInfo));
/* Incompatible d3d11 device */
if (!ensure_d3d11_interop (context, device))
return FALSE;
if (d3d11_to_cuda) {
d3d11_buf = src_buf;
cuda_buf = dst_buf;
if (!gst_video_frame_map (&d3d11_frame, src_info, d3d11_buf,
GST_MAP_READ | GST_MAP_D3D11)) {
GST_ERROR_OBJECT (context, "Failed to map input D3D11 buffer");
return FALSE;
}
if (!map_buffer_and_fill_copy2d (cuda_buf,
dst_info, GST_CUDA_BUFFER_COPY_CUDA, &cuda_frame, &cuda_map_info,
FALSE, copy_params)) {
GST_ERROR_OBJECT (context, "Failed to map output CUDA buffer");
gst_video_frame_unmap (&d3d11_frame);
return FALSE;
}
} else {
d3d11_buf = dst_buf;
cuda_buf = src_buf;
if (!gst_video_frame_map (&d3d11_frame, dst_info, d3d11_buf,
GST_MAP_WRITE | GST_MAP_D3D11)) {
GST_ERROR_OBJECT (context, "Failed to map output D3D11 buffer");
return FALSE;
}
if (!map_buffer_and_fill_copy2d (cuda_buf,
src_info, GST_CUDA_BUFFER_COPY_CUDA, &cuda_frame, &cuda_map_info,
TRUE, copy_params)) {
GST_ERROR_OBJECT (context, "Failed to map input CUDA buffer");
gst_video_frame_unmap (&d3d11_frame);
return FALSE;
}
}
num_resources = gst_buffer_n_memory (d3d11_buf);
g_assert (num_resources >= GST_VIDEO_FRAME_N_PLANES (&d3d11_frame));
if (!gst_cuda_context_push (context)) {
GST_ERROR_OBJECT (context, "Failed to push context");
gst_video_frame_unmap (&d3d11_frame);
unmap_buffer_or_frame (cuda_buf, &cuda_frame, &cuda_map_info);
return FALSE;
}
for (i = 0; i < GST_VIDEO_FRAME_N_PLANES (&d3d11_frame); i++) {
GstMemory *mem = gst_buffer_peek_memory (d3d11_buf, i);
resources[i] = ensure_cuda_d3d11_graphics_resource (context, mem);
if (!resources[i]
|| !gst_d3d11_memory_get_texture_desc (GST_D3D11_MEMORY_CAST (mem),
&desc[i]))
goto out;
}
for (i = 0; i < GST_VIDEO_FRAME_N_PLANES (&d3d11_frame); i++) {
CUgraphicsResource cuda_resource;
CUarray d3d11_array;
gboolean copy_ret;
if (d3d11_to_cuda) {
cuda_resource =
gst_cuda_graphics_resource_map (resources[i], stream,
CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY);
} else {
cuda_resource =
gst_cuda_graphics_resource_map (resources[i], stream,
CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD);
}
if (!cuda_resource) {
GST_ERROR_OBJECT (context, "Failed to map graphics resource %d", i);
goto out;
}
if (!gst_cuda_result (CuGraphicsSubResourceGetMappedArray (&d3d11_array,
cuda_resource, 0, 0))) {
gst_cuda_graphics_resource_unmap (resources[i], stream);
GST_ERROR_OBJECT (context, "Failed to get mapped array");
goto out;
}
if (d3d11_to_cuda) {
copy_params[i].srcMemoryType = CU_MEMORYTYPE_ARRAY;
copy_params[i].srcArray = d3d11_array;
copy_params[i].srcPitch =
desc[i].Width * GST_VIDEO_FRAME_COMP_PSTRIDE (&d3d11_frame, i);
} else {
copy_params[i].dstMemoryType = CU_MEMORYTYPE_ARRAY;
copy_params[i].dstArray = d3d11_array;
copy_params[i].dstPitch =
desc[i].Width * GST_VIDEO_FRAME_COMP_PSTRIDE (&d3d11_frame, i);
}
copy_ret = gst_cuda_result (CuMemcpy2DAsync (&copy_params[i], stream));
gst_cuda_graphics_resource_unmap (resources[i], stream);
if (!copy_ret) {
GST_ERROR_OBJECT (context, "Failed to copy plane %d", i);
goto out;
}
}
ret = TRUE;
out:
gst_cuda_result (CuStreamSynchronize (stream));
gst_cuda_context_pop (NULL);
gst_video_frame_unmap (&d3d11_frame);
unmap_buffer_or_frame (cuda_buf, &cuda_frame, &cuda_map_info);
return ret;
}
#endif
gboolean
gst_cuda_buffer_copy (GstBuffer * dst, GstCudaBufferCopyType dst_type,
const GstVideoInfo * dst_info, GstBuffer * src,
GstCudaBufferCopyType src_type, const GstVideoInfo * src_info,
GstCudaContext * context, CUstream stream)
{
gboolean use_copy_2d = FALSE;
GstMemory *dst_mem, *src_mem;
#ifdef HAVE_NVCODEC_GST_D3D11
D3D11_TEXTURE2D_DESC desc;
#endif
GstCudaContext *cuda_context;
g_return_val_if_fail (GST_IS_BUFFER (dst), FALSE);
g_return_val_if_fail (dst_info != NULL, FALSE);
g_return_val_if_fail (GST_IS_BUFFER (src), FALSE);
g_return_val_if_fail (src_info != NULL, FALSE);
g_return_val_if_fail (GST_IS_CUDA_CONTEXT (context), FALSE);
if (dst_type == GST_CUDA_BUFFER_COPY_NVMM &&
src_type == GST_CUDA_BUFFER_COPY_NVMM) {
GST_ERROR_OBJECT (context, "Not supported copy NVMM -> NVMM");
return FALSE;
}
if (GST_VIDEO_INFO_FORMAT (dst_info) != GST_VIDEO_INFO_FORMAT (src_info)) {
GST_ERROR_OBJECT (context,
"Copy between different format is not supported");
return FALSE;
}
if (dst_type == GST_CUDA_BUFFER_COPY_CUDA ||
dst_type == GST_CUDA_BUFFER_COPY_NVMM ||
src_type == GST_CUDA_BUFFER_COPY_CUDA ||
src_type == GST_CUDA_BUFFER_COPY_NVMM) {
use_copy_2d = TRUE;
}
if (!use_copy_2d) {
GST_TRACE_OBJECT (context, "Not a device memory, use system memory copy");
return gst_cuda_buffer_fallback_copy (dst, dst_info, src, src_info);
}
dst_mem = gst_buffer_peek_memory (dst, 0);
src_mem = gst_buffer_peek_memory (src, 0);
#ifdef HAVE_NVCODEC_GST_GL
if (src_type == GST_CUDA_BUFFER_COPY_GL && gst_is_gl_memory_pbo (src_mem)) {
GstGLMemory *gl_mem = (GstGLMemory *) src_mem;
GstGLContext *gl_context = gl_mem->mem.context;
GstCudaContext *cuda_context = context;
if (dst_type == GST_CUDA_BUFFER_COPY_CUDA && gst_is_cuda_memory (dst_mem))
cuda_context = GST_CUDA_MEMORY_CAST (dst_mem)->context;
GST_TRACE_OBJECT (context, "GL -> %s",
gst_cuda_buffery_copy_type_to_string (dst_type));
return cuda_copy_gl_interop (dst, dst_info, src, src_info, gl_context,
cuda_context, stream, TRUE, dst_type);
}
if (dst_type == GST_CUDA_BUFFER_COPY_GL && gst_is_gl_memory_pbo (dst_mem)) {
GstGLMemory *gl_mem = (GstGLMemory *) dst_mem;
GstGLContext *gl_context = gl_mem->mem.context;
GstCudaContext *cuda_context = context;
if (src_type == GST_CUDA_BUFFER_COPY_CUDA && gst_is_cuda_memory (src_mem))
cuda_context = GST_CUDA_MEMORY_CAST (src_mem)->context;
GST_TRACE_OBJECT (context, "%s -> GL",
gst_cuda_buffery_copy_type_to_string (src_type));
return cuda_copy_gl_interop (dst, dst_info, src, src_info, gl_context,
cuda_context, stream, FALSE, src_type);
}
#endif
#ifdef HAVE_NVCODEC_GST_D3D11
if (src_type == GST_CUDA_BUFFER_COPY_D3D11 && gst_is_d3d11_memory (src_mem) &&
gst_d3d11_memory_get_texture_desc (GST_D3D11_MEMORY_CAST (src_mem), &desc)
&& desc.Usage == D3D11_USAGE_DEFAULT && gst_is_cuda_memory (dst_mem)) {
GstD3D11Memory *dmem = GST_D3D11_MEMORY_CAST (src_mem);
GstD3D11Device *device = dmem->device;
GstCudaContext *cuda_context = GST_CUDA_MEMORY_CAST (dst_mem)->context;
gboolean ret;
GST_TRACE_OBJECT (context, "D3D11 -> CUDA");
gst_d3d11_device_lock (device);
ret = cuda_copy_d3d11_interop (dst, dst_info, src, src_info, device,
cuda_context, stream, TRUE);
gst_d3d11_device_unlock (device);
return ret;
}
if (dst_type == GST_CUDA_BUFFER_COPY_D3D11 && gst_is_d3d11_memory (dst_mem) &&
gst_d3d11_memory_get_texture_desc (GST_D3D11_MEMORY_CAST (dst_mem), &desc)
&& desc.Usage == D3D11_USAGE_DEFAULT && gst_is_cuda_memory (src_mem)) {
GstD3D11Memory *dmem = GST_D3D11_MEMORY_CAST (dst_mem);
GstD3D11Device *device = dmem->device;
GstCudaContext *cuda_context = GST_CUDA_MEMORY_CAST (src_mem)->context;
gboolean ret;
GST_TRACE_OBJECT (context, "CUDA -> D3D11");
gst_d3d11_device_lock (device);
ret = cuda_copy_d3d11_interop (dst, dst_info, src, src_info, device,
cuda_context, stream, FALSE);
gst_d3d11_device_unlock (device);
return ret;
}
#endif
if (gst_is_cuda_memory (dst_mem)) {
cuda_context = GST_CUDA_MEMORY_CAST (dst_mem)->context;
} else if (gst_is_cuda_memory (src_mem)) {
cuda_context = GST_CUDA_MEMORY_CAST (src_mem)->context;
} else {
cuda_context = context;
}
GST_TRACE_OBJECT (context, "%s -> %s",
gst_cuda_buffery_copy_type_to_string (src_type),
gst_cuda_buffery_copy_type_to_string (dst_type));
return gst_cuda_buffer_copy_internal (dst, dst_type, dst_info,
src, src_type, src_info, cuda_context, stream);
}

View file

@ -21,6 +21,7 @@
#define __GST_CUDA_UTILS_H__
#include <gst/gst.h>
#include <gst/video/video.h>
#include "gstcudaloader.h"
#include "gstcudacontext.h"
@ -140,6 +141,26 @@ void gst_cuda_graphics_resource_unmap (GstCudaGraphicsResource * reso
void gst_cuda_graphics_resource_free (GstCudaGraphicsResource * resource);
typedef enum
{
GST_CUDA_BUFFER_COPY_SYSTEM,
GST_CUDA_BUFFER_COPY_CUDA,
GST_CUDA_BUFFER_COPY_GL,
GST_CUDA_BUFFER_COPY_D3D11,
GST_CUDA_BUFFER_COPY_NVMM,
} GstCudaBufferCopyType;
const gchar * gst_cuda_buffery_copy_type_to_string (GstCudaBufferCopyType type);
gboolean gst_cuda_buffer_copy (GstBuffer * dst,
GstCudaBufferCopyType dst_type,
const GstVideoInfo * dst_info,
GstBuffer * src,
GstCudaBufferCopyType src_type,
const GstVideoInfo * src_info,
GstCudaContext * context,
CUstream stream);
G_END_DECLS
#endif /* __GST_CUDA_UTILS_H__ */