nvenc: Support CUDA buffer pool

When upstream support CUDA memory (only nvdec for now), we will create
CUDA buffer pool.

Part-of: <https://gitlab.freedesktop.org/gstreamer/gst-plugins-bad/-/merge_requests/1633>
This commit is contained in:
Seungha Yang 2019-08-30 13:57:15 +09:00 committed by GStreamer Merge Bot
parent 8635d12929
commit a8e9d616d6
3 changed files with 197 additions and 15 deletions

View file

@ -23,6 +23,7 @@
#include "gstnvbaseenc.h"
#include "gstcudautils.h"
#include "gstcudabufferpool.h"
#include <gst/pbutils/codec-utils.h>
@ -249,6 +250,8 @@ static GstCaps *gst_nv_base_enc_getcaps (GstVideoEncoder * enc,
static gboolean gst_nv_base_enc_stop_bitstream_thread (GstNvBaseEnc * nvenc,
gboolean force);
static gboolean gst_nv_base_enc_drain_encoder (GstNvBaseEnc * nvenc);
static gboolean gst_nv_base_enc_propose_allocation (GstVideoEncoder * enc,
GstQuery * query);
static void
gst_nv_base_enc_class_init (GstNvBaseEncClass * klass)
@ -276,6 +279,8 @@ gst_nv_base_enc_class_init (GstNvBaseEncClass * klass)
videoenc_class->finish = GST_DEBUG_FUNCPTR (gst_nv_base_enc_finish);
videoenc_class->sink_query = GST_DEBUG_FUNCPTR (gst_nv_base_enc_sink_query);
videoenc_class->sink_event = GST_DEBUG_FUNCPTR (gst_nv_base_enc_sink_event);
videoenc_class->propose_allocation =
GST_DEBUG_FUNCPTR (gst_nv_base_enc_propose_allocation);
g_object_class_install_property (gobject_class, PROP_DEVICE_ID,
g_param_spec_uint ("cuda-device-id",
@ -564,6 +569,129 @@ gst_nv_base_enc_sink_query (GstVideoEncoder * enc, GstQuery * query)
return GST_VIDEO_ENCODER_CLASS (parent_class)->sink_query (enc, query);
}
#ifdef HAVE_NVCODEC_GST_GL
static gboolean
gst_nv_base_enc_ensure_gl_context (GstNvBaseEnc * nvenc)
{
if (!nvenc->display) {
GST_DEBUG_OBJECT (nvenc, "No available OpenGL display");
return FALSE;
}
if (!gst_gl_query_local_gl_context (GST_ELEMENT (nvenc), GST_PAD_SINK,
(GstGLContext **) & nvenc->gl_context)) {
GST_INFO_OBJECT (nvenc, "failed to query local OpenGL context");
if (nvenc->gl_context)
gst_object_unref (nvenc->gl_context);
nvenc->gl_context =
(GstObject *) gst_gl_display_get_gl_context_for_thread ((GstGLDisplay *)
nvenc->display, NULL);
if (!nvenc->gl_context
|| !gst_gl_display_add_context ((GstGLDisplay *) nvenc->display,
(GstGLContext *) nvenc->gl_context)) {
if (nvenc->gl_context)
gst_object_unref (nvenc->gl_context);
if (!gst_gl_display_create_context ((GstGLDisplay *) nvenc->display,
(GstGLContext *) nvenc->other_context,
(GstGLContext **) & nvenc->gl_context, NULL)) {
GST_ERROR_OBJECT (nvenc, "failed to create OpenGL context");
return FALSE;
}
if (!gst_gl_display_add_context ((GstGLDisplay *) nvenc->display,
(GstGLContext *) nvenc->gl_context)) {
GST_ERROR_OBJECT (nvenc,
"failed to add the OpenGL context to the display");
return FALSE;
}
}
}
if (!gst_gl_context_check_gl_version ((GstGLContext *) nvenc->gl_context,
SUPPORTED_GL_APIS, 3, 0)) {
GST_WARNING_OBJECT (nvenc, "OpenGL context could not support PBO download");
return FALSE;
}
return TRUE;
}
#endif
static gboolean
gst_nv_base_enc_propose_allocation (GstVideoEncoder * enc, GstQuery * query)
{
GstNvBaseEnc *nvenc = GST_NV_BASE_ENC (enc);
GstCaps *caps;
GstVideoInfo info;
GstBufferPool *pool;
GstStructure *config;
GstCapsFeatures *features;
GST_DEBUG_OBJECT (nvenc, "propose allocation");
gst_query_parse_allocation (query, &caps, NULL);
if (caps == NULL)
return FALSE;
if (!gst_video_info_from_caps (&info, caps)) {
GST_WARNING_OBJECT (nvenc, "failed to get video info");
return FALSE;
}
features = gst_caps_get_features (caps, 0);
#if HAVE_NVCODEC_GST_GL
if (features && gst_caps_features_contains (features,
GST_CAPS_FEATURE_MEMORY_GL_MEMORY)) {
GST_DEBUG_OBJECT (nvenc, "upsteram support GL memory");
if (!gst_nv_base_enc_ensure_gl_context (nvenc)) {
GST_WARNING_OBJECT (nvenc, "Could not get gl context");
goto done;
}
pool = gst_gl_buffer_pool_new ((GstGLContext *) nvenc->gl_context);
} else
#endif
if (features && gst_caps_features_contains (features,
GST_CAPS_FEATURE_MEMORY_CUDA_MEMORY)) {
GST_DEBUG_OBJECT (nvenc, "upstream support CUDA memory");
pool = gst_cuda_buffer_pool_new (nvenc->cuda_ctx);
} else {
GST_DEBUG_OBJECT (nvenc, "use system memory");
goto done;
}
if (G_UNLIKELY (pool == NULL)) {
GST_WARNING_OBJECT (nvenc, "cannot create buffer pool");
goto done;
}
config = gst_buffer_pool_get_config (pool);
gst_buffer_pool_config_set_params (config, caps, GST_VIDEO_INFO_SIZE (&info),
nvenc->items->len, nvenc->items->len);
gst_query_add_allocation_pool (query, pool, GST_VIDEO_INFO_SIZE (&info),
nvenc->items->len, nvenc->items->len);
gst_buffer_pool_config_add_option (config, GST_BUFFER_POOL_OPTION_VIDEO_META);
gst_query_add_allocation_meta (query, GST_VIDEO_META_API_TYPE, NULL);
if (!gst_buffer_pool_set_config (pool, config))
goto error_pool_config;
gst_object_unref (pool);
done:
return GST_VIDEO_ENCODER_CLASS (parent_class)->propose_allocation (enc,
query);
error_pool_config:
{
if (pool)
gst_object_unref (pool);
GST_WARNING_OBJECT (nvenc, "failed to set config");
return FALSE;
}
}
static gboolean
gst_nv_base_enc_sink_event (GstVideoEncoder * enc, GstEvent * event)
{
@ -649,6 +777,10 @@ gst_nv_base_enc_stop (GstVideoEncoder * enc)
gst_object_unref (nvenc->other_context);
nvenc->other_context = NULL;
}
if (nvenc->gl_context) {
gst_object_unref (nvenc->gl_context);
nvenc->gl_context = NULL;
}
if (nvenc->items) {
g_array_free (nvenc->items, TRUE);
@ -1717,7 +1849,6 @@ gst_nv_base_enc_set_format (GstVideoEncoder * enc, GstVideoCodecState * state)
if (!reconfigure) {
nvenc->input_info = *info;
nvenc->gl_input = FALSE;
}
if (nvenc->input_state)
@ -1727,9 +1858,7 @@ gst_nv_base_enc_set_format (GstVideoEncoder * enc, GstVideoCodecState * state)
/* now allocate some buffers only on first configuration */
if (!reconfigure) {
#if HAVE_NVCODEC_GST_GL
GstCapsFeatures *features;
#endif
guint i;
guint input_width, input_height;
guint n_bufs;
@ -1744,11 +1873,17 @@ gst_nv_base_enc_set_format (GstVideoEncoder * enc, GstVideoCodecState * state)
/* input buffers */
g_array_set_size (nvenc->items, n_bufs);
#if HAVE_NVCODEC_GST_GL
nvenc->mem_type = GST_NVENC_MEM_TYPE_SYSTEM;
features = gst_caps_get_features (state->caps, 0);
if (gst_caps_features_contains (features,
GST_CAPS_FEATURE_MEMORY_CUDA_MEMORY)) {
nvenc->mem_type = GST_NVENC_MEM_TYPE_CUDA;
}
#if HAVE_NVCODEC_GST_GL
else if (gst_caps_features_contains (features,
GST_CAPS_FEATURE_MEMORY_GL_MEMORY)) {
nvenc->gl_input = TRUE;
nvenc->mem_type = GST_NVENC_MEM_TYPE_GL;
}
#endif
@ -2090,26 +2225,37 @@ _map_gl_input_buffer (GstGLContext * context, GstNvEncGLMapData * data)
static gboolean
gst_nv_base_enc_upload_frame (GstNvBaseEnc * nvenc, GstVideoFrame * frame,
GstNvEncInputResource * resource)
GstNvEncInputResource * resource, gboolean use_device_memory)
{
gint i;
CUdeviceptr dst = resource->cuda_pointer;
GstVideoInfo *info = &frame->info;
CUresult cuda_ret;
GstCudaMemory *cuda_mem = NULL;
if (!gst_cuda_context_push (nvenc->cuda_ctx)) {
GST_ERROR_OBJECT (nvenc, "cannot push context");
return FALSE;
}
if (use_device_memory) {
cuda_mem = (GstCudaMemory *) gst_buffer_peek_memory (frame->buffer, 0);
}
for (i = 0; i < GST_VIDEO_FRAME_N_PLANES (frame); i++) {
CUDA_MEMCPY2D param = { 0, };
guint dest_stride = _get_cuda_device_stride (&nvenc->input_info, i,
resource->cuda_stride);
param.srcMemoryType = CU_MEMORYTYPE_HOST;
param.srcHost = GST_VIDEO_FRAME_PLANE_DATA (frame, i);
param.srcPitch = GST_VIDEO_FRAME_PLANE_STRIDE (frame, i);
if (use_device_memory) {
param.srcMemoryType = CU_MEMORYTYPE_DEVICE;
param.srcDevice = cuda_mem->data + cuda_mem->offset[i];
param.srcPitch = cuda_mem->stride;
} else {
param.srcMemoryType = CU_MEMORYTYPE_HOST;
param.srcHost = GST_VIDEO_FRAME_PLANE_DATA (frame, i);
param.srcPitch = GST_VIDEO_FRAME_PLANE_STRIDE (frame, i);
}
param.dstMemoryType = CU_MEMORYTYPE_DEVICE;
param.dstDevice = dst;
@ -2268,6 +2414,7 @@ gst_nv_base_enc_handle_frame (GstVideoEncoder * enc, GstVideoCodecFrame * frame)
GstMapFlags in_map_flags = GST_MAP_READ;
GstNvEncFrameState *state = NULL;
GstNvEncInputResource *resource = NULL;
gboolean use_device_memory = FALSE;
g_assert (nvenc->encoder != NULL);
@ -2292,10 +2439,27 @@ gst_nv_base_enc_handle_frame (GstVideoEncoder * enc, GstVideoCodecFrame * frame)
GST_VIDEO_CODEC_FRAME_SET_FORCE_KEYFRAME (frame);
}
#if HAVE_NVCODEC_GST_GL
if (nvenc->gl_input)
if (nvenc->mem_type == GST_NVENC_MEM_TYPE_GL)
in_map_flags |= GST_MAP_GL;
#endif
if (nvenc->mem_type == GST_NVENC_MEM_TYPE_CUDA) {
GstMemory *mem;
if ((mem = gst_buffer_peek_memory (frame->input_buffer, 0)) &&
gst_is_cuda_memory (mem)) {
GstCudaMemory *cmem = GST_CUDA_MEMORY_CAST (mem);
/* FIXME: enhance CUDA memory copy over multiple-gpu */
if (cmem->context == nvenc->cuda_ctx ||
gst_cuda_context_get_handle (cmem->context) ==
gst_cuda_context_get_handle (nvenc->cuda_ctx)) {
use_device_memory = TRUE;
in_map_flags |= GST_MAP_CUDA;
}
}
}
if (!gst_video_frame_map (&vframe, info, frame->input_buffer, in_map_flags)) {
goto drop;
}
@ -2315,7 +2479,7 @@ gst_nv_base_enc_handle_frame (GstVideoEncoder * enc, GstVideoCodecFrame * frame)
resource = state->in_buf;
#if HAVE_NVCODEC_GST_GL
if (nvenc->gl_input) {
if (nvenc->mem_type == GST_NVENC_MEM_TYPE_GL) {
GstGLMemory *gl_mem;
GstNvEncGLMapData data;
@ -2335,7 +2499,8 @@ gst_nv_base_enc_handle_frame (GstVideoEncoder * enc, GstVideoCodecFrame * frame)
}
} else
#endif
if (!gst_nv_base_enc_upload_frame (nvenc, &vframe, resource)) {
if (!gst_nv_base_enc_upload_frame (nvenc,
&vframe, resource, use_device_memory)) {
flow = GST_FLOW_ERROR;
goto unmap_and_drop;
}

View file

@ -61,6 +61,14 @@ typedef enum {
GST_NV_RC_MODE_VBR_HQ,
} GstNvRCMode;
typedef enum
{
GST_NVENC_MEM_TYPE_SYSTEM = 0,
GST_NVENC_MEM_TYPE_GL,
GST_NVENC_MEM_TYPE_CUDA,
/* FIXME: add support D3D11 memory */
} GstNvEncMemType;
typedef struct {
gboolean weighted_prediction;
gint rc_modes;
@ -112,7 +120,7 @@ typedef struct {
GstVideoCodecState *input_state;
volatile gint reconfig; /* ATOMIC */
gboolean gl_input;
GstNvEncMemType mem_type;
/* array of allocated input/output buffers (GstNvEncFrameState),
* and hold the ownership of the GstNvEncFrameState. */
@ -137,6 +145,7 @@ typedef struct {
GstObject *display; /* GstGLDisplay */
GstObject *other_context; /* GstGLContext */
GstObject *gl_context; /* GstGLContext */
GstVideoInfo input_info; /* buffer configuration for buffers sent to NVENC */

View file

@ -24,6 +24,8 @@
#include "gstnvenc.h"
#include "gstnvh264enc.h"
#include "gstnvh265enc.h"
#include "gstcudabufferpool.h"
#include <gmodule.h>
#if HAVE_NVCODEC_GST_GL
@ -787,15 +789,21 @@ gst_nv_enc_register (GstPlugin * plugin, GUID codec_id, const gchar * codec,
g_value_unset (interlace_modes);
g_free (interlace_modes);
}
#if HAVE_NVCODEC_GST_GL
{
GstCaps *cuda_caps = gst_caps_copy (sink_templ);
#if HAVE_NVCODEC_GST_GL
GstCaps *gl_caps = gst_caps_copy (sink_templ);
gst_caps_set_features_simple (gl_caps,
gst_caps_features_from_string (GST_CAPS_FEATURE_MEMORY_GL_MEMORY));
gst_caps_append (sink_templ, gl_caps);
}
#endif
gst_caps_set_features_simple (cuda_caps,
gst_caps_features_from_string (GST_CAPS_FEATURE_MEMORY_CUDA_MEMORY));
gst_caps_append (sink_templ, cuda_caps);
}
name = g_strdup_printf ("video/x-%s", codec);
src_templ = gst_caps_new_simple (name,
"width", GST_TYPE_INT_RANGE, min_width, max_width,