nvcodec: Add CUDA specific memory and bufferpool

Introducing CUDA buffer pool with generic CUDA memory support.
Likewise GL memory, any elements which are able to access CUDA device
memory directly can map this CUDA memory without upload/download
overhead via the "GST_MAP_CUDA" map flag.
Also usual GstMemory map/unmap is also possible with internal staging memory.

For staging, CUDA Host allocated memory is used (see CuMemAllocHost API).
The memory is allowing system access but has lower overhead
during GPU upload/download than normal system memory.

Part-of: <https://gitlab.freedesktop.org/gstreamer/gst-plugins-bad/-/merge_requests/1633>
This commit is contained in:
Seungha Yang 2019-08-19 18:02:56 +09:00 committed by GStreamer Merge Bot
parent 11353b3f6e
commit cf5ef5635f
8 changed files with 981 additions and 0 deletions

View file

@ -0,0 +1,259 @@
/* GStreamer
* Copyright (C) <2018-2019> Seungha Yang <seungha.yang@navercorp.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "gstcudabufferpool.h"
#include "gstcudacontext.h"
#include "gstcudamemory.h"
GST_DEBUG_CATEGORY_STATIC (gst_cuda_buffer_pool_debug);
#define GST_CAT_DEFAULT gst_cuda_buffer_pool_debug
struct _GstCudaBufferPoolPrivate
{
GstCudaContext *context;
GstAllocator *allocator;
GstVideoInfo info;
gboolean add_videometa;
gboolean need_alignment;
GstCudaAllocationParams params;
};
#define gst_cuda_buffer_pool_parent_class parent_class
G_DEFINE_TYPE_WITH_PRIVATE (GstCudaBufferPool, gst_cuda_buffer_pool,
GST_TYPE_BUFFER_POOL);
static const gchar **
gst_cuda_buffer_pool_get_options (GstBufferPool * pool)
{
static const gchar *options[] = { GST_BUFFER_POOL_OPTION_VIDEO_META,
GST_BUFFER_POOL_OPTION_VIDEO_ALIGNMENT, NULL
};
return options;
}
static gboolean
gst_cuda_buffer_pool_set_config (GstBufferPool * pool, GstStructure * config)
{
GstCudaBufferPool *cuda_pool = GST_CUDA_BUFFER_POOL_CAST (pool);
GstCudaBufferPoolPrivate *priv = cuda_pool->priv;
GstCaps *caps = NULL;
guint size, min_buffers, max_buffers;
guint max_align, n;
GstAllocator *allocator = NULL;
GstAllocationParams *params = (GstAllocationParams *) & priv->params;
GstVideoInfo *info = &priv->params.info;
if (!gst_buffer_pool_config_get_params (config, &caps, &size, &min_buffers,
&max_buffers))
goto wrong_config;
if (caps == NULL)
goto no_caps;
if (!gst_buffer_pool_config_get_allocator (config, &allocator, params))
goto wrong_config;
/* now parse the caps from the config */
if (!gst_video_info_from_caps (info, caps))
goto wrong_caps;
GST_LOG_OBJECT (pool, "%dx%d, caps %" GST_PTR_FORMAT,
GST_VIDEO_INFO_WIDTH (info), GST_VIDEO_INFO_HEIGHT (info), caps);
gst_clear_object (&priv->allocator);
if (allocator) {
if (!GST_IS_CUDA_ALLOCATOR (allocator)) {
goto wrong_allocator;
} else {
priv->allocator = gst_object_ref (allocator);
}
} else {
allocator = priv->allocator = gst_cuda_allocator_new (priv->context);
if (G_UNLIKELY (priv->allocator == NULL))
goto no_allocator;
}
priv->add_videometa = gst_buffer_pool_config_has_option (config,
GST_BUFFER_POOL_OPTION_VIDEO_META);
priv->need_alignment = gst_buffer_pool_config_has_option (config,
GST_BUFFER_POOL_OPTION_VIDEO_ALIGNMENT);
max_align = params->align;
/* do memory align */
if (priv->need_alignment && priv->add_videometa) {
GstVideoAlignment valign;
gst_buffer_pool_config_get_video_alignment (config, &valign);
for (n = 0; n < GST_VIDEO_MAX_PLANES; ++n)
max_align |= valign.stride_align[n];
for (n = 0; n < GST_VIDEO_MAX_PLANES; ++n)
valign.stride_align[n] = max_align;
if (!gst_video_info_align (info, &valign))
goto failed_to_align;
gst_buffer_pool_config_set_video_alignment (config, &valign);
}
if (params->align < max_align) {
GST_WARNING_OBJECT (pool, "allocation params alignment %u is smaller "
"than the max specified video stride alignment %u, fixing",
(guint) params->align, max_align);
params->align = max_align;
gst_buffer_pool_config_set_allocator (config, allocator, params);
}
gst_buffer_pool_config_set_params (config, caps, GST_VIDEO_INFO_SIZE (info),
min_buffers, max_buffers);
return GST_BUFFER_POOL_CLASS (parent_class)->set_config (pool, config);
/* ERRORS */
wrong_config:
{
GST_WARNING_OBJECT (pool, "invalid config");
return FALSE;
}
no_caps:
{
GST_WARNING_OBJECT (pool, "no caps in config");
return FALSE;
}
wrong_caps:
{
GST_WARNING_OBJECT (pool,
"failed getting geometry from caps %" GST_PTR_FORMAT, caps);
return FALSE;
}
no_allocator:
{
GST_WARNING_OBJECT (pool, "Could not create new CUDA allocator");
return FALSE;
}
wrong_allocator:
{
GST_WARNING_OBJECT (pool, "Incorrect allocator type for this pool");
return FALSE;
}
failed_to_align:
{
GST_WARNING_OBJECT (pool, "Failed to align");
return FALSE;
}
}
static GstFlowReturn
gst_cuda_buffer_pool_alloc (GstBufferPool * pool, GstBuffer ** buffer,
GstBufferPoolAcquireParams * params)
{
GstCudaBufferPool *cuda_pool = GST_CUDA_BUFFER_POOL_CAST (pool);
GstCudaBufferPoolPrivate *priv = cuda_pool->priv;
GstVideoInfo *info;
GstBuffer *cuda;
GstMemory *mem;
info = &priv->params.info;
cuda = gst_buffer_new ();
mem = gst_cuda_allocator_alloc (GST_ALLOCATOR_CAST (priv->allocator),
GST_VIDEO_INFO_SIZE (info), &priv->params);
if (mem == NULL) {
gst_buffer_unref (cuda);
GST_WARNING_OBJECT (pool, "Cannot create CUDA memory");
return GST_FLOW_ERROR;
}
gst_buffer_append_memory (cuda, mem);
if (priv->add_videometa) {
GST_DEBUG_OBJECT (pool, "adding GstVideoMeta");
gst_buffer_add_video_meta_full (cuda, GST_VIDEO_FRAME_FLAG_NONE,
GST_VIDEO_INFO_FORMAT (info), GST_VIDEO_INFO_WIDTH (info),
GST_VIDEO_INFO_HEIGHT (info), GST_VIDEO_INFO_N_PLANES (info),
info->offset, info->stride);
}
*buffer = cuda;
return GST_FLOW_OK;
}
GstBufferPool *
gst_cuda_buffer_pool_new (GstCudaContext * context)
{
GstCudaBufferPool *pool;
pool = g_object_new (GST_TYPE_CUDA_BUFFER_POOL, NULL);
gst_object_ref_sink (pool);
pool->priv->context = gst_object_ref (context);
GST_LOG_OBJECT (pool, "new CUDA buffer pool %p", pool);
return GST_BUFFER_POOL_CAST (pool);
}
static void
gst_cuda_buffer_pool_dispose (GObject * object)
{
GstCudaBufferPool *pool = GST_CUDA_BUFFER_POOL_CAST (object);
GstCudaBufferPoolPrivate *priv = pool->priv;
GST_LOG_OBJECT (pool, "finalize CUDA buffer pool %p", pool);
gst_clear_object (&priv->allocator);
gst_clear_object (&priv->context);
G_OBJECT_CLASS (parent_class)->dispose (object);
}
static void
gst_cuda_buffer_pool_class_init (GstCudaBufferPoolClass * klass)
{
GObjectClass *gobject_class = (GObjectClass *) klass;
GstBufferPoolClass *gstbufferpool_class = (GstBufferPoolClass *) klass;
gobject_class->dispose = gst_cuda_buffer_pool_dispose;
gstbufferpool_class->get_options = gst_cuda_buffer_pool_get_options;
gstbufferpool_class->set_config = gst_cuda_buffer_pool_set_config;
gstbufferpool_class->alloc_buffer = gst_cuda_buffer_pool_alloc;
GST_DEBUG_CATEGORY_INIT (gst_cuda_buffer_pool_debug, "cudabufferpool", 0,
"CUDA Buffer Pool");
}
static void
gst_cuda_buffer_pool_init (GstCudaBufferPool * pool)
{
pool->priv = gst_cuda_buffer_pool_get_instance_private (pool);
}

View file

@ -0,0 +1,66 @@
/* GStreamer
* Copyright (C) <2018-2019> Seungha Yang <seungha.yang@navercorp.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
#ifndef __GST_CUDA_BUFFER_POOL_H__
#define __GST_CUDA_BUFFER_POOL_H__
#include <gst/video/gstvideometa.h>
#include <gst/video/gstvideopool.h>
#include "gstcudamemory.h"
G_BEGIN_DECLS
#define GST_TYPE_CUDA_BUFFER_POOL (gst_cuda_buffer_pool_get_type ())
#define GST_CUDA_BUFFER_POOL(obj) (G_TYPE_CHECK_INSTANCE_CAST ((obj),GST_TYPE_CUDA_BUFFER_POOL,GstCudaBufferPool))
#define GST_CUDA_BUFFER_POOL_CLASS(klass) (G_TYPE_CHECK_CLASS_CAST ((klass), GST_TYPE_CUDA_BUFFER_POOL,GstCudaBufferPoolClass))
#define GST_CUDA_BUFFER_POOL_GET_CLASS(obj) (G_TYPE_INSTANCE_GET_CLASS((obj), GST_TYPE_CUDA_BUFFER_POOL,GstCudaBufferPoolClass))
#define GST_IS_CUDA_BUFFER_POOL(obj) (G_TYPE_CHECK_INSTANCE_TYPE ((obj),GST_TYPE_CUDA_BUFFER_POOL))
#define GST_IS_CUDA_BUFFER_POOL_CLASS(klass) (G_TYPE_CHECK_CLASS_TYPE ((klass), GST_TYPE_CUDA_BUFFER_POOL))
#define GST_CUDA_BUFFER_POOL_CAST(obj) ((GstCudaBufferPool*)(obj))
typedef struct _GstCudaBufferPool GstCudaBufferPool;
typedef struct _GstCudaBufferPoolClass GstCudaBufferPoolClass;
typedef struct _GstCudaBufferPoolPrivate GstCudaBufferPoolPrivate;
/*
* GstCudaBufferPool:
*/
struct _GstCudaBufferPool
{
GstBufferPool parent;
GstCudaBufferPoolPrivate *priv;
};
/*
* GstCudaBufferPoolClass:
*/
struct _GstCudaBufferPoolClass
{
GstBufferPoolClass parent_class;
};
GType gst_cuda_buffer_pool_get_type (void);
GstBufferPool * gst_cuda_buffer_pool_new (GstCudaContext * context);
G_END_DECLS
#endif /* __GST_CUDA_BUFFER_POOL_H__ */

View file

@ -69,10 +69,14 @@ typedef struct _GstNvCodecCudaVTable
CUresult (CUDAAPI * CuMemAlloc) (CUdeviceptr * dptr, unsigned int bytesize);
CUresult (CUDAAPI * CuMemAllocPitch) (CUdeviceptr * dptr, size_t * pPitch,
size_t WidthInBytes, size_t Height, unsigned int ElementSizeBytes);
CUresult (CUDAAPI * CuMemAllocHost) (void **pp, unsigned int bytesize);
CUresult (CUDAAPI * CuMemcpy2D) (const CUDA_MEMCPY2D * pCopy);
CUresult (CUDAAPI * CuMemcpy2DAsync) (const CUDA_MEMCPY2D * pCopy,
CUstream hStream);
CUresult (CUDAAPI * CuMemFree) (CUdeviceptr dptr);
CUresult (CUDAAPI * CuMemFreeHost) (void *p);
CUresult (CUDAAPI * CuStreamCreate) (CUstream * phStream,
unsigned int Flags);
CUresult (CUDAAPI * CuStreamDestroy) (CUstream hStream);
@ -136,9 +140,12 @@ gst_cuda_load_library (void)
LOAD_SYMBOL (cuMemAlloc, CuMemAlloc);
LOAD_SYMBOL (cuMemAllocPitch, CuMemAllocPitch);
LOAD_SYMBOL (cuMemAllocHost, CuMemAllocHost);
LOAD_SYMBOL (cuMemcpy2D, CuMemcpy2D);
LOAD_SYMBOL (cuMemcpy2DAsync, CuMemcpy2DAsync);
LOAD_SYMBOL (cuMemFree, CuMemFree);
LOAD_SYMBOL (cuMemFreeHost, CuMemFreeHost);
LOAD_SYMBOL (cuStreamCreate, CuStreamCreate);
LOAD_SYMBOL (cuStreamDestroy, CuStreamDestroy);
@ -285,6 +292,14 @@ CuMemAllocPitch (CUdeviceptr * dptr, size_t * pPitch, size_t WidthInBytes,
ElementSizeBytes);
}
CUresult CUDAAPI
CuMemAllocHost (void **pp, unsigned int bytesize)
{
g_assert (gst_cuda_vtable.CuMemAllocHost != NULL);
return gst_cuda_vtable.CuMemAllocHost (pp, bytesize);
}
CUresult CUDAAPI
CuMemcpy2D (const CUDA_MEMCPY2D * pCopy)
{
@ -309,6 +324,14 @@ CuMemFree (CUdeviceptr dptr)
return gst_cuda_vtable.CuMemFree (dptr);
}
CUresult CUDAAPI
CuMemFreeHost (void *p)
{
g_assert (gst_cuda_vtable.CuMemFreeHost != NULL);
return gst_cuda_vtable.CuMemFreeHost (p);
}
CUresult CUDAAPI
CuStreamCreate (CUstream * phStream, unsigned int Flags)
{

View file

@ -90,6 +90,10 @@ CUresult CUDAAPI CuMemAllocPitch (CUdeviceptr * dptr,
size_t Height,
unsigned int ElementSizeBytes);
G_GNUC_INTERNAL
CUresult CUDAAPI CuMemAllocHost (void **pp,
unsigned int bytesize);
G_GNUC_INTERNAL
CUresult CUDAAPI CuMemcpy2D (const CUDA_MEMCPY2D * pCopy);
@ -99,6 +103,9 @@ CUresult CUDAAPI CuMemcpy2DAsync (const CUDA_MEMCPY2D *pCopy, CUstream hStrea
G_GNUC_INTERNAL
CUresult CUDAAPI CuMemFree (CUdeviceptr dptr);
G_GNUC_INTERNAL
CUresult CUDAAPI CuMemFreeHost (void *p);
G_GNUC_INTERNAL
CUresult CUDAAPI CuStreamCreate (CUstream *phStream,
unsigned int Flags);

485
sys/nvcodec/gstcudamemory.c Normal file
View file

@ -0,0 +1,485 @@
/* GStreamer
* Copyright (C) <2018-2019> Seungha Yang <seungha.yang@navercorp.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "gstcudamemory.h"
#include "gstcudautils.h"
#include <string.h>
GST_DEBUG_CATEGORY_STATIC (cudaallocator_debug);
#define GST_CAT_DEFAULT cudaallocator_debug
GST_DEBUG_CATEGORY_STATIC (GST_CAT_MEMORY);
#define gst_cuda_allocator_parent_class parent_class
G_DEFINE_TYPE (GstCudaAllocator, gst_cuda_allocator, GST_TYPE_ALLOCATOR);
static void gst_cuda_allocator_dispose (GObject * object);
static void gst_cuda_allocator_free (GstAllocator * allocator,
GstMemory * memory);
static gpointer cuda_mem_map (GstCudaMemory * mem, gsize maxsize,
GstMapFlags flags);
static void cuda_mem_unmap_full (GstCudaMemory * mem, GstMapInfo * info);
static GstMemory *cuda_mem_copy (GstMemory * mem, gssize offset, gssize size);
static GstMemory *
gst_cuda_allocator_dummy_alloc (GstAllocator * allocator, gsize size,
GstAllocationParams * params)
{
g_return_val_if_reached (NULL);
}
static void
gst_cuda_allocator_class_init (GstCudaAllocatorClass * klass)
{
GObjectClass *gobject_class = G_OBJECT_CLASS (klass);
GstAllocatorClass *allocator_class = GST_ALLOCATOR_CLASS (klass);
gobject_class->dispose = gst_cuda_allocator_dispose;
allocator_class->alloc = GST_DEBUG_FUNCPTR (gst_cuda_allocator_dummy_alloc);
allocator_class->free = GST_DEBUG_FUNCPTR (gst_cuda_allocator_free);
GST_DEBUG_CATEGORY_INIT (cudaallocator_debug, "cudaallocator", 0,
"CUDA Allocator");
GST_DEBUG_CATEGORY_GET (GST_CAT_MEMORY, "GST_MEMORY");
}
static void
gst_cuda_allocator_init (GstCudaAllocator * allocator)
{
GstAllocator *alloc = GST_ALLOCATOR_CAST (allocator);
GST_DEBUG_OBJECT (allocator, "init");
alloc->mem_type = GST_CUDA_MEMORY_TYPE_NAME;
alloc->mem_map = (GstMemoryMapFunction) cuda_mem_map;
alloc->mem_unmap_full = (GstMemoryUnmapFullFunction) cuda_mem_unmap_full;
alloc->mem_copy = (GstMemoryCopyFunction) cuda_mem_copy;
GST_OBJECT_FLAG_SET (allocator, GST_ALLOCATOR_FLAG_CUSTOM_ALLOC);
}
static void
gst_cuda_allocator_dispose (GObject * object)
{
GstCudaAllocator *self = GST_CUDA_ALLOCATOR_CAST (object);
GST_DEBUG_OBJECT (self, "dispose");
gst_clear_object (&self->context);
G_OBJECT_CLASS (parent_class)->dispose (object);
}
GstMemory *
gst_cuda_allocator_alloc (GstAllocator * allocator, gsize size,
GstCudaAllocationParams * params)
{
GstCudaAllocator *self = GST_CUDA_ALLOCATOR_CAST (allocator);
gsize maxsize = size + params->parent.prefix + params->parent.padding;
gsize align = params->parent.align;
gsize offset = params->parent.prefix;
GstMemoryFlags flags = params->parent.flags;
CUdeviceptr data;
gboolean ret = FALSE;
GstCudaMemory *mem;
GstVideoInfo *info = &params->info;
gint i;
guint width, height;
gsize stride, plane_offset;
if (!gst_cuda_context_push (self->context))
return NULL;
/* ensure configured alignment */
align |= gst_memory_alignment;
/* allocate more to compensate for alignment */
maxsize += align;
GST_CAT_DEBUG_OBJECT (GST_CAT_MEMORY, self, "allocate new cuda memory");
width = GST_VIDEO_INFO_COMP_WIDTH (info, 0) *
GST_VIDEO_INFO_COMP_PSTRIDE (info, 0);
height = 0;
for (i = 0; i < GST_VIDEO_INFO_N_PLANES (info); i++)
height += GST_VIDEO_INFO_COMP_HEIGHT (info, i);
ret = gst_cuda_result (CuMemAllocPitch (&data, &stride, width, height, 16));
gst_cuda_context_pop (NULL);
if (G_UNLIKELY (!ret)) {
GST_CAT_ERROR_OBJECT (GST_CAT_MEMORY, self, "CUDA allocation failure");
return NULL;
}
mem = g_new0 (GstCudaMemory, 1);
g_mutex_init (&mem->lock);
mem->data = data;
mem->alloc_params = *params;
mem->stride = stride;
plane_offset = 0;
for (i = 0; i < GST_VIDEO_INFO_N_PLANES (info); i++) {
mem->offset[i] = plane_offset;
plane_offset += stride * GST_VIDEO_INFO_COMP_HEIGHT (info, i);
}
mem->context = gst_object_ref (self->context);
gst_memory_init (GST_MEMORY_CAST (mem),
flags, GST_ALLOCATOR_CAST (self), NULL, maxsize, align, offset, size);
return GST_MEMORY_CAST (mem);
}
static void
gst_cuda_allocator_free (GstAllocator * allocator, GstMemory * memory)
{
GstCudaAllocator *self = GST_CUDA_ALLOCATOR_CAST (allocator);
GstCudaMemory *mem = GST_CUDA_MEMORY_CAST (memory);
GST_CAT_DEBUG_OBJECT (GST_CAT_MEMORY, allocator, "free cuda memory");
g_mutex_clear (&mem->lock);
gst_cuda_context_push (self->context);
if (mem->data)
gst_cuda_result (CuMemFree (mem->data));
if (mem->map_alloc_data)
gst_cuda_result (CuMemFreeHost (mem->map_alloc_data));
gst_cuda_context_pop (NULL);
gst_object_unref (mem->context);
g_free (mem);
}
/* called with lock */
static gboolean
gst_cuda_memory_upload_transfer (GstCudaMemory * mem)
{
gint i;
GstVideoInfo *info = &mem->alloc_params.info;
gboolean ret = TRUE;
if (!mem->map_data) {
GST_CAT_ERROR (GST_CAT_MEMORY, "no staging memory to upload");
return FALSE;
}
for (i = 0; i < GST_VIDEO_INFO_N_PLANES (info); i++) {
CUDA_MEMCPY2D param = { 0, };
param.srcMemoryType = CU_MEMORYTYPE_HOST;
param.srcHost =
(guint8 *) mem->map_data + GST_VIDEO_INFO_PLANE_OFFSET (info, i);
param.srcPitch = GST_VIDEO_INFO_PLANE_STRIDE (info, i);
param.dstMemoryType = CU_MEMORYTYPE_DEVICE;
param.dstDevice = mem->data + mem->offset[i];
param.dstPitch = mem->stride;
param.WidthInBytes = GST_VIDEO_INFO_COMP_WIDTH (info, i) *
GST_VIDEO_INFO_COMP_PSTRIDE (info, i);
param.Height = GST_VIDEO_INFO_COMP_HEIGHT (info, i);
if (!gst_cuda_result (CuMemcpy2DAsync (&param, NULL))) {
GST_CAT_ERROR (GST_CAT_MEMORY, "Failed to copy %dth plane", i);
ret = FALSE;
break;
}
}
gst_cuda_result (CuStreamSynchronize (NULL));
return ret;
}
/* called with lock */
static gboolean
gst_cuda_memory_download_transfer (GstCudaMemory * mem)
{
gint i;
GstVideoInfo *info = &mem->alloc_params.info;
if (!mem->map_data) {
GST_CAT_ERROR (GST_CAT_MEMORY, "no staging memory to upload");
return FALSE;
}
for (i = 0; i < GST_VIDEO_INFO_N_PLANES (info); i++) {
CUDA_MEMCPY2D param = { 0, };
param.srcMemoryType = CU_MEMORYTYPE_DEVICE;
param.srcDevice = mem->data + mem->offset[i];
param.srcPitch = mem->stride;
param.dstMemoryType = CU_MEMORYTYPE_HOST;
param.dstHost =
(guint8 *) mem->map_data + GST_VIDEO_INFO_PLANE_OFFSET (info, i);
param.dstPitch = GST_VIDEO_INFO_PLANE_STRIDE (info, i);
param.WidthInBytes = GST_VIDEO_INFO_COMP_WIDTH (info, i) *
GST_VIDEO_INFO_COMP_PSTRIDE (info, i);
param.Height = GST_VIDEO_INFO_COMP_HEIGHT (info, i);
if (!gst_cuda_result (CuMemcpy2DAsync (&param, NULL))) {
GST_CAT_ERROR (GST_CAT_MEMORY, "Failed to copy %dth plane", i);
CuMemFreeHost (mem->map_alloc_data);
mem->map_alloc_data = mem->map_data = mem->align_data = NULL;
break;
}
}
gst_cuda_result (CuStreamSynchronize (NULL));
return ! !mem->map_data;
}
static gpointer
gst_cuda_memory_device_memory_map (GstCudaMemory * mem)
{
GstMemory *memory = GST_MEMORY_CAST (mem);
gpointer data;
gsize aoffset;
gsize align = memory->align;
if (mem->map_data) {
return mem->map_data;
}
GST_CAT_DEBUG (GST_CAT_MEMORY, "alloc host memory for map");
if (!mem->map_alloc_data) {
gsize maxsize;
guint8 *align_data;
maxsize = memory->maxsize + align;
if (!gst_cuda_context_push (mem->context)) {
GST_CAT_ERROR (GST_CAT_MEMORY, "cannot push cuda context");
return NULL;
}
if (!gst_cuda_result (CuMemAllocHost (&data, maxsize))) {
GST_CAT_ERROR (GST_CAT_MEMORY, "cannot alloc host memory");
gst_cuda_context_pop (NULL);
return NULL;
}
if (!gst_cuda_context_pop (NULL)) {
GST_CAT_WARNING (GST_CAT_MEMORY, "cannot pop cuda context");
}
mem->map_alloc_data = data;
align_data = data;
/* do align */
if ((aoffset = ((guintptr) align_data & align))) {
aoffset = (align + 1) - aoffset;
align_data += aoffset;
}
mem->align_data = align_data;
/* first memory, always need download to staging */
GST_MINI_OBJECT_FLAG_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_DOWNLOAD);
}
mem->map_data = mem->align_data;
if (GST_MEMORY_FLAG_IS_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_DOWNLOAD)) {
if (!gst_cuda_context_push (mem->context)) {
GST_CAT_ERROR (GST_CAT_MEMORY, "cannot push cuda context");
return NULL;
}
gst_cuda_memory_download_transfer (mem);
if (!gst_cuda_context_pop (NULL)) {
GST_CAT_WARNING (GST_CAT_MEMORY, "cannot pop cuda context");
}
}
return mem->map_data;
}
static gpointer
cuda_mem_map (GstCudaMemory * mem, gsize maxsize, GstMapFlags flags)
{
gpointer ret = NULL;
g_mutex_lock (&mem->lock);
mem->map_count++;
if ((flags & GST_MAP_CUDA) == GST_MAP_CUDA) {
/* upload from staging to device memory if necessary */
if (GST_MEMORY_FLAG_IS_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_UPLOAD)) {
if (!gst_cuda_context_push (mem->context)) {
GST_CAT_ERROR (GST_CAT_MEMORY, "cannot push cuda context");
g_mutex_unlock (&mem->lock);
return NULL;
}
if (!gst_cuda_memory_upload_transfer (mem)) {
g_mutex_unlock (&mem->lock);
return NULL;
}
gst_cuda_context_pop (NULL);
}
GST_MEMORY_FLAG_UNSET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_UPLOAD);
if ((flags & GST_MAP_WRITE) == GST_MAP_WRITE)
GST_MINI_OBJECT_FLAG_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_DOWNLOAD);
g_mutex_unlock (&mem->lock);
return (gpointer) mem->data;
}
ret = gst_cuda_memory_device_memory_map (mem);
if (ret == NULL) {
mem->map_count--;
g_mutex_unlock (&mem->lock);
return NULL;
}
if ((flags & GST_MAP_WRITE) == GST_MAP_WRITE)
GST_MINI_OBJECT_FLAG_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_UPLOAD);
GST_MEMORY_FLAG_UNSET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_DOWNLOAD);
g_mutex_unlock (&mem->lock);
return ret;
}
static void
cuda_mem_unmap_full (GstCudaMemory * mem, GstMapInfo * info)
{
g_mutex_lock (&mem->lock);
mem->map_count--;
GST_CAT_TRACE (GST_CAT_MEMORY,
"unmap CUDA memory %p, map count %d, have map_data %s",
mem, mem->map_count, mem->map_data ? "true" : "false");
if ((info->flags & GST_MAP_CUDA) == GST_MAP_CUDA) {
if ((info->flags & GST_MAP_WRITE) == GST_MAP_WRITE)
GST_MINI_OBJECT_FLAG_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_DOWNLOAD);
g_mutex_unlock (&mem->lock);
return;
}
if ((info->flags & GST_MAP_WRITE))
GST_MINI_OBJECT_FLAG_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_UPLOAD);
if (mem->map_count > 0 || !mem->map_data) {
g_mutex_unlock (&mem->lock);
return;
}
mem->map_data = NULL;
g_mutex_unlock (&mem->lock);
return;
}
static GstMemory *
cuda_mem_copy (GstMemory * mem, gssize offset, gssize size)
{
GstMemory *copy;
GstCudaMemory *src_mem = GST_CUDA_MEMORY_CAST (mem);
GstCudaMemory *dst_mem;
GstCudaContext *ctx = GST_CUDA_ALLOCATOR_CAST (mem->allocator)->context;
gint i;
GstVideoInfo *info;
/* offset and size are ignored */
copy = gst_cuda_allocator_alloc (mem->allocator, mem->size,
&src_mem->alloc_params);
dst_mem = GST_CUDA_MEMORY_CAST (copy);
info = &src_mem->alloc_params.info;
if (!gst_cuda_context_push (ctx)) {
GST_CAT_ERROR (GST_CAT_MEMORY, "cannot push cuda context");
gst_cuda_allocator_free (mem->allocator, copy);
return NULL;
}
for (i = 0; i < GST_VIDEO_INFO_N_PLANES (info); i++) {
CUDA_MEMCPY2D param = { 0, };
param.srcMemoryType = CU_MEMORYTYPE_DEVICE;
param.srcDevice = src_mem->data + src_mem->offset[i];
param.srcPitch = src_mem->stride;
param.dstMemoryType = CU_MEMORYTYPE_DEVICE;
param.dstDevice = dst_mem->data + dst_mem->offset[i];
param.dstPitch = dst_mem->stride;
param.WidthInBytes = GST_VIDEO_INFO_COMP_WIDTH (info, i) *
GST_VIDEO_INFO_COMP_PSTRIDE (info, i);
param.Height = GST_VIDEO_INFO_COMP_HEIGHT (info, i);
if (!gst_cuda_result (CuMemcpy2DAsync (&param, NULL))) {
GST_CAT_ERROR_OBJECT (GST_CAT_MEMORY,
mem->allocator, "Failed to copy %dth plane", i);
gst_cuda_context_pop (NULL);
gst_cuda_allocator_free (mem->allocator, copy);
return NULL;
}
}
gst_cuda_result (CuStreamSynchronize (NULL));
if (!gst_cuda_context_pop (NULL)) {
GST_CAT_WARNING (GST_CAT_MEMORY, "cannot pop cuda context");
}
return copy;
}
GstAllocator *
gst_cuda_allocator_new (GstCudaContext * context)
{
GstCudaAllocator *allocator;
g_return_val_if_fail (GST_IS_CUDA_CONTEXT (context), NULL);
allocator = g_object_new (GST_TYPE_CUDA_ALLOCATOR, NULL);
allocator->context = gst_object_ref (context);
return GST_ALLOCATOR_CAST (allocator);
}
gboolean
gst_is_cuda_memory (GstMemory * mem)
{
return mem != NULL && mem->allocator != NULL &&
GST_IS_CUDA_ALLOCATOR (mem->allocator);
}

138
sys/nvcodec/gstcudamemory.h Normal file
View file

@ -0,0 +1,138 @@
/* GStreamer
* Copyright (C) <2018-2019> Seungha Yang <seungha.yang@navercorp.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
#ifndef __GST_CUDA_MEMORY_H__
#define __GST_CUDA_MEMORY_H__
#include <gst/gst.h>
#include <gst/gstallocator.h>
#include <gst/video/video.h>
#include "gstcudaloader.h"
#include "gstcudacontext.h"
G_BEGIN_DECLS
#define GST_TYPE_CUDA_ALLOCATOR (gst_cuda_allocator_get_type())
#define GST_CUDA_ALLOCATOR(obj) (G_TYPE_CHECK_INSTANCE_CAST((obj),GST_TYPE_CUDA_ALLOCATOR,GstCudaAllocator))
#define GST_CUDA_ALLOCATOR_CLASS(klass) (G_TYPE_CHECK_CLASS_CAST((klass), GST_TYPE_CUDA_ALLOCATOR,GstCudaAllocatorClass))
#define GST_CUDA_ALLOCATOR_GET_CLASS(obj) (G_TYPE_INSTANCE_GET_CLASS((obj), GST_TYPE_CUDA_ALLOCATOR,GstCudaAllocatorClass))
#define GST_IS_CUDA_ALLOCATOR(obj) (G_TYPE_CHECK_INSTANCE_TYPE((obj),GST_TYPE_CUDA_ALLOCATOR))
#define GST_IS_CUDA_ALLOCATOR_CLASS(klass) (G_TYPE_CHECK_CLASS_TYPE((klass), GST_TYPE_CUDA_ALLOCATOR))
#define GST_CUDA_ALLOCATOR_CAST(obj) ((GstCudaAllocator *)(obj))
#define GST_CUDA_MEMORY_CAST(mem) ((GstCudaMemory *) (mem))
typedef struct _GstCudaAllocationParams GstCudaAllocationParams;
typedef struct _GstCudaAllocator GstCudaAllocator;
typedef struct _GstCudaAllocatorClass GstCudaAllocatorClass;
typedef struct _GstCudaMemory GstCudaMemory;
/**
* GST_MAP_CUDA:
*
* Flag indicating that we should map the CUDA device memory
* instead of to system memory.
*
* Combining #GST_MAP_CUDA with #GST_MAP_WRITE has the same semantics as though
* you are writing to CUDA device/host memory.
* Conversely, combining #GST_MAP_CUDA with
* #GST_MAP_READ has the same semantics as though you are reading from
* CUDA device/host memory
*/
#define GST_MAP_CUDA (GST_MAP_FLAG_LAST << 1)
#define GST_CUDA_MEMORY_TYPE_NAME "gst.cuda.memory"
/**
* GST_CAPS_FEATURE_MEMORY_CUDA_MEMORY:
*
* Name of the caps feature for indicating the use of #GstCudaMemory
*/
#define GST_CAPS_FEATURE_MEMORY_CUDA_MEMORY "memory:CUDAMemory"
struct _GstCudaAllocationParams
{
GstAllocationParams parent;
GstVideoInfo info;
};
struct _GstCudaAllocator
{
GstAllocator parent;
GstCudaContext *context;
};
struct _GstCudaAllocatorClass
{
GstAllocatorClass parent_class;
};
GType gst_cuda_allocator_get_type (void);
GstAllocator * gst_cuda_allocator_new (GstCudaContext * context);
GstMemory * gst_cuda_allocator_alloc (GstAllocator * allocator,
gsize size,
GstCudaAllocationParams * params);
/**
* GstCudaMemoryTransfer:
* @GST_CUDA_MEMORY_TRANSFER_NEED_DOWNLOAD: the device memory needs downloading
* to the staging memory
* @GST_CUDA_MEMORY_TRANSFER_NEED_UPLOAD: the staging memory needs uploading
* to the device memory
*/
typedef enum
{
GST_CUDA_MEMORY_TRANSFER_NEED_DOWNLOAD = (GST_MEMORY_FLAG_LAST << 0),
GST_CUDA_MEMORY_TRANSFER_NEED_UPLOAD = (GST_MEMORY_FLAG_LAST << 1)
} GstCudaMemoryTransfer;
struct _GstCudaMemory
{
GstMemory mem;
GstCudaContext *context;
CUdeviceptr data;
GstCudaAllocationParams alloc_params;
/* offset and stride of CUDA device memory */
gsize offset[GST_VIDEO_MAX_PLANES];
gint stride;
/* allocated CUDA Host memory */
gpointer map_alloc_data;
/* aligned CUDA Host memory */
guint8 *align_data;
/* pointing align_data if the memory is mapped */
gpointer map_data;
gint map_count;
GMutex lock;
};
gboolean gst_is_cuda_memory (GstMemory * mem);
G_END_DECLS
#endif /* __GST_CUDA_MEMORY_H__ */

View file

@ -12,6 +12,8 @@ nvcodec_sources = [
'gstnvdecoder.c',
'gstnvh264dec.c',
'gstnvh265dec.c',
'gstcudamemory.c',
'gstcudabufferpool.c',
]
if get_option('nvcodec').disabled()

View file

@ -114,6 +114,7 @@ typedef enum
#define cuMemAlloc cuMemAlloc_v2
#define cuMemAllocPitch cuMemAllocPitch_v2
#define cuMemAllocHost cuMemAllocHost_v2
#define cuMemcpy2D cuMemcpy2D_v2
#define cuMemcpy2DAsync cuMemcpy2DAsync_v2
#define cuMemFree cuMemFree_v2