gstreamer/subprojects/gst-plugins-bad/sys/nvcodec/gstnvdecobject.cpp
Seungha Yang 1aa9e74aaf cudadownload: Always download CUDA memory if it's bound to decoder
Decoder bounded CUDA memory is allocated by driver and the pool size
is fixed. Since we don't know how many buffers would be held by
downstream non-CUDA element, we should download such CUDA memory
and release it back to decoder.

Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/4810>
2023-06-08 22:27:06 +00:00

608 lines
16 KiB
C++

/* GStreamer
* Copyright (C) 2023 Seungha Yang <seungha@centricular.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "gstnvdecobject.h"
#include <vector>
#include <mutex>
#include <condition_variable>
#include <map>
#include <memory>
#include <string.h>
#include <algorithm>
#include <gst/cuda/gstcuda-private.h>
extern "C"
{
GST_DEBUG_CATEGORY_EXTERN (gst_nv_decoder_debug);
}
#define GST_CAT_DEFAULT gst_nv_decoder_debug
GST_DEFINE_MINI_OBJECT_TYPE (GstNvDecSurface, gst_nv_dec_surface);
static GstNvDecSurface *gst_nv_dec_surface_new (guint seq_num);
/* *INDENT-OFF* */
struct GstNvDecOutput
{
GstNvDecObject *self = nullptr;
CUdeviceptr devptr = 0;
guint seq_num = 0;
};
struct GstNvDecObjectPrivate
{
std::vector < GstNvDecSurface * >surface_queue;
std::map < CUdeviceptr, GstMemory *> output_map;
std::map < CUdeviceptr, GstMemory *> free_output_map;
std::mutex lock;
std::condition_variable cond;
};
/* *INDENT-ON* */
struct _GstNvDecObject
{
GstObject parent;
GstNvDecObjectPrivate *priv;
CUvideodecoder handle;
CUVIDDECODECREATEINFO create_info;
GstVideoInfo video_info;
GstCudaContext *context;
gboolean flushing;
guint pool_size;
guint num_mapped;
gboolean alloc_aux_frame;
guint plane_height;
guint seq_num;
};
static void gst_nv_dec_object_finalize (GObject * object);
#define gst_nv_dec_object_parent_class parent_class
G_DEFINE_TYPE (GstNvDecObject, gst_nv_dec_object, GST_TYPE_OBJECT);
static void
gst_nv_dec_object_class_init (GstNvDecObjectClass * klass)
{
GObjectClass *object_class = G_OBJECT_CLASS (klass);
object_class->finalize = gst_nv_dec_object_finalize;
}
static void
gst_nv_dec_object_init (GstNvDecObject * self)
{
self->priv = new GstNvDecObjectPrivate ();
}
static void
gst_nv_dec_object_finalize (GObject * object)
{
GstNvDecObject *self = GST_NV_DEC_OBJECT (object);
GstNvDecObjectPrivate *priv = self->priv;
GST_DEBUG_OBJECT (self, "Finalize");
gst_cuda_context_push (self->context);
/* *INDENT-OFF* */
for (auto it : priv->surface_queue)
gst_nv_dec_surface_unref (it);
/* *INDENT-OFF* */
for (auto it : priv->free_output_map)
gst_memory_unref (it.second);
/* *INDENT-ON* */
delete self->priv;
CuvidDestroyDecoder (self->handle);
gst_cuda_context_pop (nullptr);
gst_object_unref (self->context);
G_OBJECT_CLASS (parent_class)->finalize (object);
}
GstNvDecObject *
gst_nv_dec_object_new (GstCudaContext * context,
CUVIDDECODECREATEINFO * create_info, const GstVideoInfo * video_info,
gboolean alloc_aux_frame)
{
GstNvDecObject *self;
CUresult ret;
CUvideodecoder handle = nullptr;
guint pool_size;
if (!gst_cuda_context_push (context)) {
GST_ERROR_OBJECT (context, "Failed to push context");
return nullptr;
}
ret = CuvidCreateDecoder (&handle, create_info);
gst_cuda_context_pop (nullptr);
if (!gst_cuda_result (ret)) {
GST_ERROR_OBJECT (context, "Could not create decoder instance");
return nullptr;
}
pool_size = create_info->ulNumDecodeSurfaces;
if (alloc_aux_frame)
pool_size /= 2;
self = (GstNvDecObject *)
g_object_new (GST_TYPE_NV_DEC_OBJECT, nullptr);
gst_object_ref_sink (self);
self->context = (GstCudaContext *) gst_object_ref (context);
self->handle = handle;
self->create_info = *create_info;
self->video_info = *video_info;
self->pool_size = pool_size;
self->plane_height = create_info->ulTargetHeight;
for (guint i = 0; i < pool_size; i++) {
GstNvDecSurface *surf = gst_nv_dec_surface_new (0);
surf->index = i;
/* [0, pool_size - 1]: output picture
* [pool_size, pool_size * 2 - 1]: decoder output without film-grain,
* used for reference picture */
if (alloc_aux_frame)
surf->decode_frame_index = i + pool_size;
else
surf->decode_frame_index = i;
self->priv->surface_queue.push_back (surf);
}
return self;
}
gboolean
gst_nv_dec_object_reconfigure (GstNvDecObject * object,
CUVIDRECONFIGUREDECODERINFO * reconfigure_info,
const GstVideoInfo * video_info, gboolean alloc_aux_frame)
{
GstNvDecObjectPrivate *priv = object->priv;
CUresult ret;
guint pool_size;
if (!gst_cuvid_can_reconfigure ())
return FALSE;
pool_size = reconfigure_info->ulNumDecodeSurfaces;
if (alloc_aux_frame)
pool_size /= 2;
std::lock_guard < std::mutex > lk (priv->lock);
if (!gst_cuda_context_push (object->context)) {
GST_ERROR_OBJECT (object, "Couldn't push context");
return FALSE;
}
ret = CuvidReconfigureDecoder (object->handle, reconfigure_info);
gst_cuda_context_pop (nullptr);
if (!gst_cuda_result (ret)) {
GST_ERROR_OBJECT (object, "Couldn't reconfigure decoder");
return FALSE;
}
if ((guint) priv->surface_queue.size () != object->pool_size) {
GST_WARNING_OBJECT (object, "Unused surfaces %u != pool size %u",
(guint) priv->surface_queue.size (), object->pool_size);
}
/* Release old surfaces and create new ones */
/* *INDENT-OFF* */
for (auto it : priv->surface_queue)
gst_nv_dec_surface_unref (it);
/* *INDENT-ON* */
priv->surface_queue.clear ();
object->pool_size = pool_size;
object->video_info = *video_info;
object->seq_num++;
object->plane_height = reconfigure_info->ulTargetHeight;
for (guint i = 0; i < pool_size; i++) {
GstNvDecSurface *surf = gst_nv_dec_surface_new (object->seq_num);
surf->index = i;
/* [0, pool_size - 1]: output picture
* [pool_size, pool_size * 2 - 1]: decoder output without film-grain,
* used for reference picture */
if (alloc_aux_frame)
surf->decode_frame_index = i + pool_size;
else
surf->decode_frame_index = i;
object->priv->surface_queue.push_back (surf);
}
return TRUE;
}
void
gst_nv_dec_object_set_flushing (GstNvDecObject * object, gboolean flushing)
{
GstNvDecObjectPrivate *priv = object->priv;
std::lock_guard < std::mutex > lk (priv->lock);
object->flushing = flushing;
priv->cond.notify_all ();
}
static gboolean
gst_nv_dec_object_unmap_surface_unlocked (GstNvDecObject * self,
GstNvDecSurface * surface)
{
gboolean ret = TRUE;
if (!gst_cuda_result (CuvidUnmapVideoFrame (self->handle, surface->devptr))) {
GST_ERROR_OBJECT (self, "Couldn't unmap surface %d", surface->index);
ret = FALSE;
} else {
surface->devptr = 0;
self->num_mapped--;
GST_LOG_OBJECT (self, "Surface %d is unmapped, num-mapped %d",
surface->index, self->num_mapped);
}
self->priv->cond.notify_all ();
return ret;
}
GstFlowReturn
gst_nv_dec_object_acquire_surface (GstNvDecObject * object,
GstNvDecSurface ** surface)
{
GstNvDecObjectPrivate *priv = object->priv;
GstNvDecSurface *surf = nullptr;
std::unique_lock < std::mutex > lk (priv->lock);
do {
if (object->flushing) {
GST_DEBUG_OBJECT (object, "We are flushing");
return GST_FLOW_FLUSHING;
}
if (!priv->surface_queue.empty ()) {
surf = priv->surface_queue[0];
priv->surface_queue.erase (priv->surface_queue.begin ());
break;
}
GST_LOG_OBJECT (object, "No available surface, waiting for release");
priv->cond.wait (lk);
} while (true);
g_assert (surf);
g_assert (!surf->object);
surf->object = (GstNvDecObject *) gst_object_ref (object);
*surface = surf;
return GST_FLOW_OK;
}
gboolean
gst_nv_dec_object_decode (GstNvDecObject * object, CUVIDPICPARAMS * params)
{
gboolean ret = TRUE;
GST_LOG_OBJECT (object, "picture index: %u", params->CurrPicIdx);
if (!gst_cuda_context_push (object->context)) {
GST_ERROR_OBJECT (object, "Failed to push CUDA context");
return FALSE;
}
if (!gst_cuda_result (CuvidDecodePicture (object->handle, params))) {
GST_ERROR_OBJECT (object, "Failed to decode picture");
ret = FALSE;
}
if (!gst_cuda_context_pop (nullptr))
GST_WARNING_OBJECT (object, "Failed to pop CUDA context");
return ret;
}
GstFlowReturn
gst_nv_dec_object_map_surface (GstNvDecObject * object,
GstNvDecSurface * surface, GstCudaStream * stream)
{
GstNvDecObjectPrivate *priv = object->priv;
if (surface->devptr) {
GST_ERROR_OBJECT (object, "Mapped Surface %d was not cleared",
surface->index);
return GST_FLOW_ERROR;
}
std::unique_lock < std::mutex > lk (priv->lock);
do {
if (object->flushing) {
GST_DEBUG_OBJECT (object, "We are flushing");
return GST_FLOW_FLUSHING;
}
if (object->num_mapped < (guint) object->create_info.ulNumOutputSurfaces) {
CUVIDPROCPARAMS params = { 0 };
params.progressive_frame = 1;
params.output_stream = gst_cuda_stream_get_handle (stream);
if (!gst_cuda_result (CuvidMapVideoFrame (object->handle, surface->index,
&surface->devptr, &surface->pitch, &params))) {
GST_ERROR_OBJECT (object, "Couldn't map picture");
return GST_FLOW_ERROR;
}
object->num_mapped++;
GST_LOG_OBJECT (object, "Surface %d is mapped, num-mapped %d",
surface->index, object->num_mapped);
break;
}
GST_LOG_OBJECT (object, "No available output surface, waiting for release");
priv->cond.wait (lk);
} while (true);
return GST_FLOW_OK;
}
gboolean
gst_nv_dec_object_unmap_surface (GstNvDecObject * object,
GstNvDecSurface * surface)
{
GstNvDecObjectPrivate *priv = object->priv;
std::lock_guard < std::mutex > lk (priv->lock);
return gst_nv_dec_object_unmap_surface_unlocked (object, surface);
}
static gboolean
gst_nv_dec_output_release (GstCudaMemory * mem)
{
GstNvDecOutput *output = (GstNvDecOutput *)
gst_cuda_memory_get_user_data (mem);
GstNvDecObject *self = output->self;
GstNvDecObjectPrivate *priv = self->priv;
GST_LOG_OBJECT (self, "Release memory %p", mem);
gst_memory_ref (GST_MEMORY_CAST (mem));
GST_MINI_OBJECT_CAST (mem)->dispose = nullptr;
output->self = nullptr;
{
std::lock_guard < std::mutex > lk (priv->lock);
self->num_mapped--;
gst_cuda_context_push (self->context);
if (!gst_cuda_result (CuvidUnmapVideoFrame (self->handle, output->devptr))) {
GST_ERROR_OBJECT (self, "Couldn't unmap frame");
} else {
GST_LOG_OBJECT (self, "Exported surface is freed, num-mapped %d",
self->num_mapped);
}
gst_cuda_context_pop (nullptr);
priv->free_output_map[output->devptr] = GST_MEMORY_CAST (mem);
priv->cond.notify_all ();
}
gst_object_unref (self);
return FALSE;
}
static void
gst_nv_dec_output_free (GstNvDecOutput * output)
{
delete output;
}
GstFlowReturn
gst_nv_dec_object_export_surface (GstNvDecObject * object,
GstNvDecSurface * surface, GstCudaStream * stream, GstMemory ** memory)
{
GstNvDecObjectPrivate *priv = object->priv;
GstVideoInfo info;
gsize offset;
GstMemory *mem = nullptr;
GstNvDecOutput *output;
if (!surface->devptr) {
GST_ERROR_OBJECT (object, "Surface %d is not mapped", surface->index);
return GST_FLOW_ERROR;
}
GST_LOG_OBJECT (object, "Exporting surface %d", surface->index);
offset = surface->pitch * object->plane_height;
info = object->video_info;
switch (GST_VIDEO_INFO_FORMAT (&info)) {
case GST_VIDEO_FORMAT_NV12:
case GST_VIDEO_FORMAT_P010_10LE:
case GST_VIDEO_FORMAT_P016_LE:
info.stride[0] = surface->pitch;
info.stride[1] = surface->pitch;
info.offset[0] = 0;
info.offset[1] = offset;
info.size = offset + offset / 2;
break;
case GST_VIDEO_FORMAT_Y444:
case GST_VIDEO_FORMAT_Y444_16LE:
info.stride[0] = surface->pitch;
info.stride[1] = surface->pitch;
info.stride[2] = surface->pitch;
info.offset[0] = 0;
info.offset[1] = offset;
info.offset[2] = offset * 2;
info.size = offset * 3;
break;
default:
GST_ERROR_OBJECT (object, "Unexpected format %s",
gst_video_format_to_string (GST_VIDEO_INFO_FORMAT (&info)));
return GST_FLOW_ERROR;
}
std::unique_lock < std::mutex > lk (priv->lock);
auto output_iter = priv->output_map.find (surface->devptr);
if (output_iter != priv->output_map.end ())
mem = output_iter->second;
if (mem) {
do {
if (object->flushing) {
GST_DEBUG_OBJECT (object, "We are flushing");
return GST_FLOW_FLUSHING;
}
auto iter = priv->free_output_map.find (surface->devptr);
if (iter != priv->free_output_map.end ()) {
priv->free_output_map.erase (iter);
break;
}
GST_LOG_OBJECT (object, "Waiting for output release");
priv->cond.wait (lk);
} while (true);
output = (GstNvDecOutput *)
gst_cuda_memory_get_user_data (GST_CUDA_MEMORY_CAST (mem));
if (output->seq_num != object->seq_num) {
GST_DEBUG_OBJECT (object,
"output belongs to previous sequence, need new memory");
gst_memory_unref (mem);
mem = nullptr;
}
}
if (!mem) {
output = new GstNvDecOutput ();
output->devptr = surface->devptr;
output->seq_num = object->seq_num;
GST_LOG_OBJECT (object, "New output, allocating memory");
mem = gst_cuda_allocator_alloc_wrapped (nullptr, object->context,
stream, &info, output->devptr, output,
(GDestroyNotify) gst_nv_dec_output_free);
gst_cuda_memory_set_from_fixed_pool (mem);
priv->output_map[output->devptr] = mem;
} else {
GST_LOG_OBJECT (object, "Reuse memory");
}
GST_MINI_OBJECT_CAST (mem)->dispose =
(GstMiniObjectDisposeFunction) gst_nv_dec_output_release;
output = (GstNvDecOutput *)
gst_cuda_memory_get_user_data (GST_CUDA_MEMORY_CAST (mem));
g_assert (!output->self);
output->self = (GstNvDecObject *) gst_object_ref (object);
surface->devptr = 0;
*memory = mem;
return GST_FLOW_OK;
}
static gboolean
gst_nv_dec_surface_dispose (GstNvDecSurface * surf)
{
GstNvDecObject *object;
GstNvDecObjectPrivate *priv;
gboolean ret = FALSE;
if (!surf->object)
return TRUE;
object = (GstNvDecObject *) g_steal_pointer (&surf->object);
priv = object->priv;
/* *INDENT-OFF* */
{
std::lock_guard < std::mutex > lk (priv->lock);
if (surf->seq_num == object->seq_num) {
/* Back to surface queue */
gst_nv_dec_surface_ref (surf);
/* Keep sorted order */
priv->surface_queue.insert (
std::upper_bound (priv->surface_queue.begin (),
priv->surface_queue.end(), surf,
[] (const GstNvDecSurface * a, const GstNvDecSurface * b)
{
return a->index < b->index;
}), surf);
priv->cond.notify_all ();
} else {
GST_WARNING_OBJECT (object, "Releasing surface %p of previous sequence",
surf);
/* Shouldn't happen (e.g., surfaces were not flushed before reconfigure) */
ret = TRUE;
}
}
/* *INDENT-ON* */
gst_object_unref (object);
return ret;
}
static GstNvDecSurface *
gst_nv_dec_surface_new (guint seq_num)
{
GstNvDecSurface *surf = g_new0 (GstNvDecSurface, 1);
surf->seq_num = seq_num;
gst_mini_object_init (GST_MINI_OBJECT_CAST (surf),
0, GST_TYPE_NV_DEC_SURFACE, nullptr,
(GstMiniObjectDisposeFunction) gst_nv_dec_surface_dispose,
(GstMiniObjectFreeFunction) g_free);
return surf;
}