mirror of
https://gitlab.freedesktop.org/gstreamer/gstreamer.git
synced 2025-01-11 18:05:37 +00:00
1aa9e74aaf
Decoder bounded CUDA memory is allocated by driver and the pool size is fixed. Since we don't know how many buffers would be held by downstream non-CUDA element, we should download such CUDA memory and release it back to decoder. Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/4810>
607 lines
16 KiB
C++
607 lines
16 KiB
C++
/* GStreamer
|
|
* Copyright (C) 2023 Seungha Yang <seungha@centricular.com>
|
|
*
|
|
* This library is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Library General Public
|
|
* License as published by the Free Software Foundation; either
|
|
* version 2 of the License, or (at your option) any later version.
|
|
*
|
|
* This library is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Library General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Library General Public
|
|
* License along with this library; if not, write to the
|
|
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
|
|
* Boston, MA 02110-1301, USA.
|
|
*/
|
|
|
|
#ifdef HAVE_CONFIG_H
|
|
#include "config.h"
|
|
#endif
|
|
|
|
#include "gstnvdecobject.h"
|
|
#include <vector>
|
|
#include <mutex>
|
|
#include <condition_variable>
|
|
#include <map>
|
|
#include <memory>
|
|
#include <string.h>
|
|
#include <algorithm>
|
|
#include <gst/cuda/gstcuda-private.h>
|
|
|
|
extern "C"
|
|
{
|
|
GST_DEBUG_CATEGORY_EXTERN (gst_nv_decoder_debug);
|
|
}
|
|
|
|
#define GST_CAT_DEFAULT gst_nv_decoder_debug
|
|
|
|
GST_DEFINE_MINI_OBJECT_TYPE (GstNvDecSurface, gst_nv_dec_surface);
|
|
static GstNvDecSurface *gst_nv_dec_surface_new (guint seq_num);
|
|
|
|
/* *INDENT-OFF* */
|
|
struct GstNvDecOutput
|
|
{
|
|
GstNvDecObject *self = nullptr;
|
|
CUdeviceptr devptr = 0;
|
|
guint seq_num = 0;
|
|
};
|
|
|
|
struct GstNvDecObjectPrivate
|
|
{
|
|
std::vector < GstNvDecSurface * >surface_queue;
|
|
std::map < CUdeviceptr, GstMemory *> output_map;
|
|
std::map < CUdeviceptr, GstMemory *> free_output_map;
|
|
|
|
std::mutex lock;
|
|
std::condition_variable cond;
|
|
};
|
|
/* *INDENT-ON* */
|
|
|
|
struct _GstNvDecObject
|
|
{
|
|
GstObject parent;
|
|
|
|
GstNvDecObjectPrivate *priv;
|
|
|
|
CUvideodecoder handle;
|
|
CUVIDDECODECREATEINFO create_info;
|
|
|
|
GstVideoInfo video_info;
|
|
|
|
GstCudaContext *context;
|
|
|
|
gboolean flushing;
|
|
|
|
guint pool_size;
|
|
guint num_mapped;
|
|
gboolean alloc_aux_frame;
|
|
guint plane_height;
|
|
guint seq_num;
|
|
};
|
|
|
|
static void gst_nv_dec_object_finalize (GObject * object);
|
|
|
|
#define gst_nv_dec_object_parent_class parent_class
|
|
G_DEFINE_TYPE (GstNvDecObject, gst_nv_dec_object, GST_TYPE_OBJECT);
|
|
|
|
static void
|
|
gst_nv_dec_object_class_init (GstNvDecObjectClass * klass)
|
|
{
|
|
GObjectClass *object_class = G_OBJECT_CLASS (klass);
|
|
|
|
object_class->finalize = gst_nv_dec_object_finalize;
|
|
}
|
|
|
|
static void
|
|
gst_nv_dec_object_init (GstNvDecObject * self)
|
|
{
|
|
self->priv = new GstNvDecObjectPrivate ();
|
|
}
|
|
|
|
static void
|
|
gst_nv_dec_object_finalize (GObject * object)
|
|
{
|
|
GstNvDecObject *self = GST_NV_DEC_OBJECT (object);
|
|
GstNvDecObjectPrivate *priv = self->priv;
|
|
|
|
GST_DEBUG_OBJECT (self, "Finalize");
|
|
|
|
gst_cuda_context_push (self->context);
|
|
/* *INDENT-OFF* */
|
|
for (auto it : priv->surface_queue)
|
|
gst_nv_dec_surface_unref (it);
|
|
|
|
/* *INDENT-OFF* */
|
|
for (auto it : priv->free_output_map)
|
|
gst_memory_unref (it.second);
|
|
/* *INDENT-ON* */
|
|
|
|
delete self->priv;
|
|
|
|
CuvidDestroyDecoder (self->handle);
|
|
gst_cuda_context_pop (nullptr);
|
|
|
|
gst_object_unref (self->context);
|
|
|
|
G_OBJECT_CLASS (parent_class)->finalize (object);
|
|
}
|
|
|
|
GstNvDecObject *
|
|
gst_nv_dec_object_new (GstCudaContext * context,
|
|
CUVIDDECODECREATEINFO * create_info, const GstVideoInfo * video_info,
|
|
gboolean alloc_aux_frame)
|
|
{
|
|
GstNvDecObject *self;
|
|
CUresult ret;
|
|
CUvideodecoder handle = nullptr;
|
|
guint pool_size;
|
|
|
|
if (!gst_cuda_context_push (context)) {
|
|
GST_ERROR_OBJECT (context, "Failed to push context");
|
|
return nullptr;
|
|
}
|
|
|
|
ret = CuvidCreateDecoder (&handle, create_info);
|
|
gst_cuda_context_pop (nullptr);
|
|
|
|
if (!gst_cuda_result (ret)) {
|
|
GST_ERROR_OBJECT (context, "Could not create decoder instance");
|
|
return nullptr;
|
|
}
|
|
|
|
pool_size = create_info->ulNumDecodeSurfaces;
|
|
if (alloc_aux_frame)
|
|
pool_size /= 2;
|
|
|
|
self = (GstNvDecObject *)
|
|
g_object_new (GST_TYPE_NV_DEC_OBJECT, nullptr);
|
|
gst_object_ref_sink (self);
|
|
self->context = (GstCudaContext *) gst_object_ref (context);
|
|
self->handle = handle;
|
|
self->create_info = *create_info;
|
|
self->video_info = *video_info;
|
|
self->pool_size = pool_size;
|
|
self->plane_height = create_info->ulTargetHeight;
|
|
|
|
for (guint i = 0; i < pool_size; i++) {
|
|
GstNvDecSurface *surf = gst_nv_dec_surface_new (0);
|
|
|
|
surf->index = i;
|
|
|
|
/* [0, pool_size - 1]: output picture
|
|
* [pool_size, pool_size * 2 - 1]: decoder output without film-grain,
|
|
* used for reference picture */
|
|
if (alloc_aux_frame)
|
|
surf->decode_frame_index = i + pool_size;
|
|
else
|
|
surf->decode_frame_index = i;
|
|
|
|
self->priv->surface_queue.push_back (surf);
|
|
}
|
|
|
|
return self;
|
|
}
|
|
|
|
gboolean
|
|
gst_nv_dec_object_reconfigure (GstNvDecObject * object,
|
|
CUVIDRECONFIGUREDECODERINFO * reconfigure_info,
|
|
const GstVideoInfo * video_info, gboolean alloc_aux_frame)
|
|
{
|
|
GstNvDecObjectPrivate *priv = object->priv;
|
|
CUresult ret;
|
|
guint pool_size;
|
|
|
|
if (!gst_cuvid_can_reconfigure ())
|
|
return FALSE;
|
|
|
|
pool_size = reconfigure_info->ulNumDecodeSurfaces;
|
|
if (alloc_aux_frame)
|
|
pool_size /= 2;
|
|
|
|
std::lock_guard < std::mutex > lk (priv->lock);
|
|
if (!gst_cuda_context_push (object->context)) {
|
|
GST_ERROR_OBJECT (object, "Couldn't push context");
|
|
return FALSE;
|
|
}
|
|
|
|
ret = CuvidReconfigureDecoder (object->handle, reconfigure_info);
|
|
gst_cuda_context_pop (nullptr);
|
|
|
|
if (!gst_cuda_result (ret)) {
|
|
GST_ERROR_OBJECT (object, "Couldn't reconfigure decoder");
|
|
return FALSE;
|
|
}
|
|
|
|
if ((guint) priv->surface_queue.size () != object->pool_size) {
|
|
GST_WARNING_OBJECT (object, "Unused surfaces %u != pool size %u",
|
|
(guint) priv->surface_queue.size (), object->pool_size);
|
|
}
|
|
|
|
/* Release old surfaces and create new ones */
|
|
/* *INDENT-OFF* */
|
|
for (auto it : priv->surface_queue)
|
|
gst_nv_dec_surface_unref (it);
|
|
/* *INDENT-ON* */
|
|
|
|
priv->surface_queue.clear ();
|
|
|
|
object->pool_size = pool_size;
|
|
object->video_info = *video_info;
|
|
object->seq_num++;
|
|
object->plane_height = reconfigure_info->ulTargetHeight;
|
|
|
|
for (guint i = 0; i < pool_size; i++) {
|
|
GstNvDecSurface *surf = gst_nv_dec_surface_new (object->seq_num);
|
|
|
|
surf->index = i;
|
|
|
|
/* [0, pool_size - 1]: output picture
|
|
* [pool_size, pool_size * 2 - 1]: decoder output without film-grain,
|
|
* used for reference picture */
|
|
if (alloc_aux_frame)
|
|
surf->decode_frame_index = i + pool_size;
|
|
else
|
|
surf->decode_frame_index = i;
|
|
|
|
object->priv->surface_queue.push_back (surf);
|
|
}
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
void
|
|
gst_nv_dec_object_set_flushing (GstNvDecObject * object, gboolean flushing)
|
|
{
|
|
GstNvDecObjectPrivate *priv = object->priv;
|
|
std::lock_guard < std::mutex > lk (priv->lock);
|
|
object->flushing = flushing;
|
|
priv->cond.notify_all ();
|
|
}
|
|
|
|
static gboolean
|
|
gst_nv_dec_object_unmap_surface_unlocked (GstNvDecObject * self,
|
|
GstNvDecSurface * surface)
|
|
{
|
|
gboolean ret = TRUE;
|
|
|
|
if (!gst_cuda_result (CuvidUnmapVideoFrame (self->handle, surface->devptr))) {
|
|
GST_ERROR_OBJECT (self, "Couldn't unmap surface %d", surface->index);
|
|
ret = FALSE;
|
|
} else {
|
|
surface->devptr = 0;
|
|
self->num_mapped--;
|
|
|
|
GST_LOG_OBJECT (self, "Surface %d is unmapped, num-mapped %d",
|
|
surface->index, self->num_mapped);
|
|
}
|
|
self->priv->cond.notify_all ();
|
|
|
|
return ret;
|
|
}
|
|
|
|
GstFlowReturn
|
|
gst_nv_dec_object_acquire_surface (GstNvDecObject * object,
|
|
GstNvDecSurface ** surface)
|
|
{
|
|
GstNvDecObjectPrivate *priv = object->priv;
|
|
GstNvDecSurface *surf = nullptr;
|
|
std::unique_lock < std::mutex > lk (priv->lock);
|
|
|
|
do {
|
|
if (object->flushing) {
|
|
GST_DEBUG_OBJECT (object, "We are flushing");
|
|
return GST_FLOW_FLUSHING;
|
|
}
|
|
|
|
if (!priv->surface_queue.empty ()) {
|
|
surf = priv->surface_queue[0];
|
|
priv->surface_queue.erase (priv->surface_queue.begin ());
|
|
break;
|
|
}
|
|
|
|
GST_LOG_OBJECT (object, "No available surface, waiting for release");
|
|
priv->cond.wait (lk);
|
|
} while (true);
|
|
|
|
g_assert (surf);
|
|
g_assert (!surf->object);
|
|
|
|
surf->object = (GstNvDecObject *) gst_object_ref (object);
|
|
|
|
*surface = surf;
|
|
|
|
return GST_FLOW_OK;
|
|
}
|
|
|
|
gboolean
|
|
gst_nv_dec_object_decode (GstNvDecObject * object, CUVIDPICPARAMS * params)
|
|
{
|
|
gboolean ret = TRUE;
|
|
|
|
GST_LOG_OBJECT (object, "picture index: %u", params->CurrPicIdx);
|
|
|
|
if (!gst_cuda_context_push (object->context)) {
|
|
GST_ERROR_OBJECT (object, "Failed to push CUDA context");
|
|
return FALSE;
|
|
}
|
|
|
|
if (!gst_cuda_result (CuvidDecodePicture (object->handle, params))) {
|
|
GST_ERROR_OBJECT (object, "Failed to decode picture");
|
|
ret = FALSE;
|
|
}
|
|
|
|
if (!gst_cuda_context_pop (nullptr))
|
|
GST_WARNING_OBJECT (object, "Failed to pop CUDA context");
|
|
|
|
return ret;
|
|
}
|
|
|
|
GstFlowReturn
|
|
gst_nv_dec_object_map_surface (GstNvDecObject * object,
|
|
GstNvDecSurface * surface, GstCudaStream * stream)
|
|
{
|
|
GstNvDecObjectPrivate *priv = object->priv;
|
|
|
|
if (surface->devptr) {
|
|
GST_ERROR_OBJECT (object, "Mapped Surface %d was not cleared",
|
|
surface->index);
|
|
return GST_FLOW_ERROR;
|
|
}
|
|
|
|
std::unique_lock < std::mutex > lk (priv->lock);
|
|
do {
|
|
if (object->flushing) {
|
|
GST_DEBUG_OBJECT (object, "We are flushing");
|
|
return GST_FLOW_FLUSHING;
|
|
}
|
|
|
|
if (object->num_mapped < (guint) object->create_info.ulNumOutputSurfaces) {
|
|
CUVIDPROCPARAMS params = { 0 };
|
|
|
|
params.progressive_frame = 1;
|
|
params.output_stream = gst_cuda_stream_get_handle (stream);
|
|
|
|
if (!gst_cuda_result (CuvidMapVideoFrame (object->handle, surface->index,
|
|
&surface->devptr, &surface->pitch, ¶ms))) {
|
|
GST_ERROR_OBJECT (object, "Couldn't map picture");
|
|
return GST_FLOW_ERROR;
|
|
}
|
|
|
|
object->num_mapped++;
|
|
GST_LOG_OBJECT (object, "Surface %d is mapped, num-mapped %d",
|
|
surface->index, object->num_mapped);
|
|
break;
|
|
}
|
|
|
|
GST_LOG_OBJECT (object, "No available output surface, waiting for release");
|
|
priv->cond.wait (lk);
|
|
} while (true);
|
|
|
|
return GST_FLOW_OK;
|
|
}
|
|
|
|
gboolean
|
|
gst_nv_dec_object_unmap_surface (GstNvDecObject * object,
|
|
GstNvDecSurface * surface)
|
|
{
|
|
GstNvDecObjectPrivate *priv = object->priv;
|
|
std::lock_guard < std::mutex > lk (priv->lock);
|
|
|
|
return gst_nv_dec_object_unmap_surface_unlocked (object, surface);
|
|
}
|
|
|
|
static gboolean
|
|
gst_nv_dec_output_release (GstCudaMemory * mem)
|
|
{
|
|
GstNvDecOutput *output = (GstNvDecOutput *)
|
|
gst_cuda_memory_get_user_data (mem);
|
|
GstNvDecObject *self = output->self;
|
|
GstNvDecObjectPrivate *priv = self->priv;
|
|
|
|
GST_LOG_OBJECT (self, "Release memory %p", mem);
|
|
|
|
gst_memory_ref (GST_MEMORY_CAST (mem));
|
|
GST_MINI_OBJECT_CAST (mem)->dispose = nullptr;
|
|
|
|
output->self = nullptr;
|
|
|
|
{
|
|
std::lock_guard < std::mutex > lk (priv->lock);
|
|
|
|
self->num_mapped--;
|
|
gst_cuda_context_push (self->context);
|
|
if (!gst_cuda_result (CuvidUnmapVideoFrame (self->handle, output->devptr))) {
|
|
GST_ERROR_OBJECT (self, "Couldn't unmap frame");
|
|
} else {
|
|
GST_LOG_OBJECT (self, "Exported surface is freed, num-mapped %d",
|
|
self->num_mapped);
|
|
}
|
|
gst_cuda_context_pop (nullptr);
|
|
|
|
priv->free_output_map[output->devptr] = GST_MEMORY_CAST (mem);
|
|
priv->cond.notify_all ();
|
|
}
|
|
|
|
gst_object_unref (self);
|
|
|
|
return FALSE;
|
|
}
|
|
|
|
static void
|
|
gst_nv_dec_output_free (GstNvDecOutput * output)
|
|
{
|
|
delete output;
|
|
}
|
|
|
|
GstFlowReturn
|
|
gst_nv_dec_object_export_surface (GstNvDecObject * object,
|
|
GstNvDecSurface * surface, GstCudaStream * stream, GstMemory ** memory)
|
|
{
|
|
GstNvDecObjectPrivate *priv = object->priv;
|
|
GstVideoInfo info;
|
|
gsize offset;
|
|
GstMemory *mem = nullptr;
|
|
GstNvDecOutput *output;
|
|
|
|
if (!surface->devptr) {
|
|
GST_ERROR_OBJECT (object, "Surface %d is not mapped", surface->index);
|
|
return GST_FLOW_ERROR;
|
|
}
|
|
|
|
GST_LOG_OBJECT (object, "Exporting surface %d", surface->index);
|
|
|
|
offset = surface->pitch * object->plane_height;
|
|
|
|
info = object->video_info;
|
|
switch (GST_VIDEO_INFO_FORMAT (&info)) {
|
|
case GST_VIDEO_FORMAT_NV12:
|
|
case GST_VIDEO_FORMAT_P010_10LE:
|
|
case GST_VIDEO_FORMAT_P016_LE:
|
|
info.stride[0] = surface->pitch;
|
|
info.stride[1] = surface->pitch;
|
|
info.offset[0] = 0;
|
|
info.offset[1] = offset;
|
|
info.size = offset + offset / 2;
|
|
break;
|
|
case GST_VIDEO_FORMAT_Y444:
|
|
case GST_VIDEO_FORMAT_Y444_16LE:
|
|
info.stride[0] = surface->pitch;
|
|
info.stride[1] = surface->pitch;
|
|
info.stride[2] = surface->pitch;
|
|
info.offset[0] = 0;
|
|
info.offset[1] = offset;
|
|
info.offset[2] = offset * 2;
|
|
info.size = offset * 3;
|
|
break;
|
|
default:
|
|
GST_ERROR_OBJECT (object, "Unexpected format %s",
|
|
gst_video_format_to_string (GST_VIDEO_INFO_FORMAT (&info)));
|
|
return GST_FLOW_ERROR;
|
|
}
|
|
|
|
std::unique_lock < std::mutex > lk (priv->lock);
|
|
auto output_iter = priv->output_map.find (surface->devptr);
|
|
if (output_iter != priv->output_map.end ())
|
|
mem = output_iter->second;
|
|
|
|
if (mem) {
|
|
do {
|
|
if (object->flushing) {
|
|
GST_DEBUG_OBJECT (object, "We are flushing");
|
|
return GST_FLOW_FLUSHING;
|
|
}
|
|
|
|
auto iter = priv->free_output_map.find (surface->devptr);
|
|
if (iter != priv->free_output_map.end ()) {
|
|
priv->free_output_map.erase (iter);
|
|
break;
|
|
}
|
|
|
|
GST_LOG_OBJECT (object, "Waiting for output release");
|
|
priv->cond.wait (lk);
|
|
} while (true);
|
|
|
|
output = (GstNvDecOutput *)
|
|
gst_cuda_memory_get_user_data (GST_CUDA_MEMORY_CAST (mem));
|
|
if (output->seq_num != object->seq_num) {
|
|
GST_DEBUG_OBJECT (object,
|
|
"output belongs to previous sequence, need new memory");
|
|
gst_memory_unref (mem);
|
|
mem = nullptr;
|
|
}
|
|
}
|
|
|
|
if (!mem) {
|
|
output = new GstNvDecOutput ();
|
|
output->devptr = surface->devptr;
|
|
output->seq_num = object->seq_num;
|
|
|
|
GST_LOG_OBJECT (object, "New output, allocating memory");
|
|
|
|
mem = gst_cuda_allocator_alloc_wrapped (nullptr, object->context,
|
|
stream, &info, output->devptr, output,
|
|
(GDestroyNotify) gst_nv_dec_output_free);
|
|
gst_cuda_memory_set_from_fixed_pool (mem);
|
|
|
|
priv->output_map[output->devptr] = mem;
|
|
} else {
|
|
GST_LOG_OBJECT (object, "Reuse memory");
|
|
}
|
|
|
|
GST_MINI_OBJECT_CAST (mem)->dispose =
|
|
(GstMiniObjectDisposeFunction) gst_nv_dec_output_release;
|
|
|
|
output = (GstNvDecOutput *)
|
|
gst_cuda_memory_get_user_data (GST_CUDA_MEMORY_CAST (mem));
|
|
|
|
g_assert (!output->self);
|
|
|
|
output->self = (GstNvDecObject *) gst_object_ref (object);
|
|
surface->devptr = 0;
|
|
|
|
*memory = mem;
|
|
|
|
return GST_FLOW_OK;
|
|
}
|
|
|
|
static gboolean
|
|
gst_nv_dec_surface_dispose (GstNvDecSurface * surf)
|
|
{
|
|
GstNvDecObject *object;
|
|
GstNvDecObjectPrivate *priv;
|
|
gboolean ret = FALSE;
|
|
|
|
if (!surf->object)
|
|
return TRUE;
|
|
|
|
object = (GstNvDecObject *) g_steal_pointer (&surf->object);
|
|
priv = object->priv;
|
|
|
|
/* *INDENT-OFF* */
|
|
{
|
|
std::lock_guard < std::mutex > lk (priv->lock);
|
|
|
|
if (surf->seq_num == object->seq_num) {
|
|
/* Back to surface queue */
|
|
gst_nv_dec_surface_ref (surf);
|
|
|
|
/* Keep sorted order */
|
|
priv->surface_queue.insert (
|
|
std::upper_bound (priv->surface_queue.begin (),
|
|
priv->surface_queue.end(), surf,
|
|
[] (const GstNvDecSurface * a, const GstNvDecSurface * b)
|
|
{
|
|
return a->index < b->index;
|
|
}), surf);
|
|
priv->cond.notify_all ();
|
|
} else {
|
|
GST_WARNING_OBJECT (object, "Releasing surface %p of previous sequence",
|
|
surf);
|
|
/* Shouldn't happen (e.g., surfaces were not flushed before reconfigure) */
|
|
ret = TRUE;
|
|
}
|
|
}
|
|
/* *INDENT-ON* */
|
|
|
|
gst_object_unref (object);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static GstNvDecSurface *
|
|
gst_nv_dec_surface_new (guint seq_num)
|
|
{
|
|
GstNvDecSurface *surf = g_new0 (GstNvDecSurface, 1);
|
|
|
|
surf->seq_num = seq_num;
|
|
|
|
gst_mini_object_init (GST_MINI_OBJECT_CAST (surf),
|
|
0, GST_TYPE_NV_DEC_SURFACE, nullptr,
|
|
(GstMiniObjectDisposeFunction) gst_nv_dec_surface_dispose,
|
|
(GstMiniObjectFreeFunction) g_free);
|
|
|
|
return surf;
|
|
}
|