mirror of
https://gitlab.freedesktop.org/gstreamer/gstreamer.git
synced 2024-12-29 11:40:38 +00:00
1741 lines
50 KiB
C++
1741 lines
50 KiB
C++
/* GStreamer
|
|
* Copyright (C) 2024 Seungha Yang <seungha@centricular.com>
|
|
*
|
|
* This library is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Library General Public
|
|
* License as published by the Free Software Foundation; either
|
|
* version 2 of the License, or (at your option) any later version.
|
|
*
|
|
* This library is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Library General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Library General Public
|
|
* License along with this library; if not, write to the
|
|
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
|
|
* Boston, MA 02110-1301, USA.
|
|
*/
|
|
|
|
#ifdef HAVE_CONFIG_H
|
|
#include "config.h"
|
|
#endif
|
|
|
|
#include "gstnvcompvideodec.h"
|
|
|
|
#ifdef HAVE_GST_GL
|
|
#include <gst/gl/gl.h>
|
|
#include <gst/gl/gstglfuncs.h>
|
|
#endif
|
|
|
|
#include <nvcomp/nvcompManagerFactory.hpp>
|
|
#include <nvcomp/ans.h>
|
|
#include <nvcomp/bitcomp.h>
|
|
#include <nvcomp/cascaded.h>
|
|
#include <nvcomp/deflate.h>
|
|
#include <nvcomp/gdeflate.h>
|
|
#include <nvcomp/lz4.h>
|
|
#include <nvcomp/snappy.h>
|
|
#include <nvcomp/zstd.h>
|
|
#include <memory>
|
|
#include <string>
|
|
#include <string.h>
|
|
#include <vector>
|
|
|
|
GST_DEBUG_CATEGORY_STATIC (gst_nv_comp_video_dec_debug);
|
|
#define GST_CAT_DEFAULT gst_nv_comp_video_dec_debug
|
|
|
|
#ifdef HAVE_GST_GL
|
|
#define SRC_CAPS \
|
|
GST_VIDEO_CAPS_MAKE_WITH_FEATURES (GST_CAPS_FEATURE_MEMORY_CUDA_MEMORY, \
|
|
GST_VIDEO_FORMATS_ALL) ";" \
|
|
GST_VIDEO_CAPS_MAKE_WITH_FEATURES (GST_CAPS_FEATURE_MEMORY_GL_MEMORY, \
|
|
GST_VIDEO_FORMATS_ALL) ";" \
|
|
GST_VIDEO_CAPS_MAKE (GST_VIDEO_FORMATS_ALL)
|
|
#else
|
|
#define SRC_CAPS \
|
|
GST_VIDEO_CAPS_MAKE_WITH_FEATURES (GST_CAPS_FEATURE_MEMORY_CUDA_MEMORY, \
|
|
GST_VIDEO_FORMATS_ALL) ";" \
|
|
GST_VIDEO_CAPS_MAKE (GST_VIDEO_FORMATS_ALL)
|
|
#endif
|
|
|
|
static GstStaticPadTemplate sink_template =
|
|
GST_STATIC_PAD_TEMPLATE ("sink", GST_PAD_SINK, GST_PAD_ALWAYS,
|
|
GST_STATIC_CAPS ("video/x-nvcomp; video/x-nvcomp-lz4; "
|
|
"video/x-nvcomp-snappy; video/x-nvcomp-gdeflate; "
|
|
"video/x-nvcomp-deflate; video/x-nvcomp-zstd; video/x-nvcomp-cascaded; "
|
|
"video/x-nvcomp-bitcomp; video/x-nvcomp-ans"));
|
|
|
|
static GstStaticPadTemplate src_template =
|
|
GST_STATIC_PAD_TEMPLATE ("src", GST_PAD_SRC, GST_PAD_ALWAYS,
|
|
GST_STATIC_CAPS (SRC_CAPS));
|
|
|
|
/* *INDENT-OFF* */
|
|
using namespace nvcomp;
|
|
|
|
struct DecoderTask
|
|
{
|
|
~DecoderTask ()
|
|
{
|
|
if (ctx) {
|
|
gst_cuda_context_push (ctx);
|
|
clear_resource ();
|
|
gst_cuda_context_pop (nullptr);
|
|
gst_object_unref (ctx);
|
|
}
|
|
}
|
|
|
|
void clear_resource ()
|
|
{
|
|
if (!ctx)
|
|
return;
|
|
|
|
if (device_compressed)
|
|
CuMemFree ((CUdeviceptr) device_compressed);
|
|
device_compressed = nullptr;
|
|
|
|
if (host_compressed)
|
|
CuMemFreeHost (host_compressed);
|
|
host_compressed = nullptr;
|
|
|
|
if (device_compressed_bytes)
|
|
CuMemFree ((CUdeviceptr) device_compressed_bytes);
|
|
device_compressed_bytes = nullptr;
|
|
|
|
if (device_compressed_ptrs)
|
|
CuMemFree ((CUdeviceptr) device_compressed_ptrs);
|
|
device_compressed_ptrs = nullptr;
|
|
|
|
if (host_compressed_bytes)
|
|
CuMemFreeHost (host_compressed_bytes);
|
|
host_compressed_bytes = nullptr;
|
|
|
|
if (host_compressed_ptrs)
|
|
CuMemFreeHost (host_compressed_ptrs);
|
|
host_compressed_ptrs = nullptr;
|
|
|
|
if (device_uncompressed)
|
|
CuMemFree ((CUdeviceptr) device_uncompressed);
|
|
device_uncompressed = nullptr;
|
|
|
|
if (device_uncompressed_temp)
|
|
CuMemFree ((CUdeviceptr) device_uncompressed_temp);
|
|
device_uncompressed_temp = nullptr;
|
|
|
|
if (host_uncompressed)
|
|
CuMemFreeHost (host_uncompressed);
|
|
host_uncompressed = nullptr;
|
|
|
|
if (device_uncompressed_bytes)
|
|
CuMemFree ((CUdeviceptr) device_uncompressed_bytes);
|
|
device_uncompressed_bytes = nullptr;
|
|
|
|
if (device_uncompressed_ptrs)
|
|
CuMemFree ((CUdeviceptr) device_uncompressed_ptrs);
|
|
device_uncompressed_ptrs = nullptr;
|
|
|
|
if (host_uncompressed_bytes)
|
|
CuMemFreeHost (host_uncompressed_bytes);
|
|
host_uncompressed_bytes = nullptr;
|
|
|
|
if (host_uncompressed_ptrs)
|
|
CuMemFreeHost (host_uncompressed_ptrs);
|
|
host_uncompressed_ptrs = nullptr;
|
|
|
|
if (device_actual_uncompressed_bytes)
|
|
CuMemFree ((CUdeviceptr) device_actual_uncompressed_bytes);
|
|
device_actual_uncompressed_bytes = nullptr;
|
|
|
|
if (temp_ptr)
|
|
CuMemFree ((CUdeviceptr) temp_ptr);
|
|
temp_ptr = nullptr;
|
|
|
|
if (device_statuses)
|
|
CuMemFree ((CUdeviceptr) device_statuses);
|
|
device_statuses = nullptr;
|
|
|
|
batch_size = 0;
|
|
max_compressed_chunk_size = 0;
|
|
max_uncompressed_chunk_size = 0;
|
|
}
|
|
|
|
bool allocate_batched (size_t num_chunks,
|
|
size_t compressed_chunk_size,
|
|
size_t uncompressed_chunk_size, size_t temp_bytes)
|
|
{
|
|
size_t compressed_alloc;
|
|
size_t uncompressed_alloc;
|
|
size_t alloc_size = num_chunks * sizeof (size_t);
|
|
uint8_t *src;
|
|
|
|
compressed_chunk_size = GST_ROUND_UP_8 (compressed_chunk_size);
|
|
uncompressed_chunk_size = GST_ROUND_UP_8 (uncompressed_chunk_size);
|
|
|
|
compressed_alloc = num_chunks * compressed_chunk_size;
|
|
uncompressed_alloc = num_chunks * uncompressed_chunk_size;
|
|
|
|
auto ret = CuMemAlloc ((CUdeviceptr *) &device_compressed,
|
|
compressed_alloc);
|
|
if (!gst_cuda_result (ret))
|
|
return false;
|
|
|
|
ret = CuMemAllocHost ((void **) &host_compressed, compressed_alloc);
|
|
if (!gst_cuda_result (ret))
|
|
return false;
|
|
|
|
ret = CuMemAlloc ((CUdeviceptr *) &device_compressed_bytes,
|
|
alloc_size);
|
|
if (!gst_cuda_result (ret))
|
|
return false;
|
|
|
|
ret = CuMemAlloc ((CUdeviceptr *) &device_compressed_ptrs,
|
|
alloc_size);
|
|
if (!gst_cuda_result (ret))
|
|
return false;
|
|
|
|
ret = CuMemAllocHost ((void **) &host_compressed_bytes,
|
|
alloc_size);
|
|
if (!gst_cuda_result (ret))
|
|
return false;
|
|
|
|
ret = CuMemAllocHost ((void **) &host_compressed_ptrs,
|
|
alloc_size);
|
|
if (!gst_cuda_result (ret))
|
|
return false;
|
|
|
|
src = device_compressed;
|
|
for (size_t i = 0; i < num_chunks; i++) {
|
|
host_compressed_ptrs[i] = src;
|
|
src += compressed_chunk_size;
|
|
}
|
|
|
|
ret = CuMemcpyHtoD ((CUdeviceptr) device_compressed_ptrs,
|
|
host_compressed_ptrs, alloc_size);
|
|
if (!gst_cuda_result (ret))
|
|
return false;
|
|
|
|
ret = CuMemAlloc ((CUdeviceptr *) &device_uncompressed_temp,
|
|
uncompressed_alloc);
|
|
if (!gst_cuda_result (ret))
|
|
return false;
|
|
|
|
ret = CuMemAlloc ((CUdeviceptr *) &device_uncompressed,
|
|
uncompressed_alloc);
|
|
if (!gst_cuda_result (ret))
|
|
return false;
|
|
|
|
ret = CuMemAllocHost ((void **) &host_uncompressed, uncompressed_alloc);
|
|
if (!gst_cuda_result (ret))
|
|
return false;
|
|
|
|
ret = CuMemAlloc ((CUdeviceptr *) &device_uncompressed_bytes,
|
|
alloc_size);
|
|
if (!gst_cuda_result (ret))
|
|
return false;
|
|
|
|
ret = CuMemAlloc ((CUdeviceptr *) &device_uncompressed_ptrs,
|
|
alloc_size);
|
|
if (!gst_cuda_result (ret))
|
|
return false;
|
|
|
|
ret = CuMemAllocHost ((void **) &host_uncompressed_bytes,
|
|
alloc_size);
|
|
if (!gst_cuda_result (ret))
|
|
return false;
|
|
|
|
ret = CuMemAllocHost ((void **) &host_uncompressed_ptrs,
|
|
alloc_size);
|
|
if (!gst_cuda_result (ret))
|
|
return false;
|
|
|
|
src = device_uncompressed_temp;
|
|
for (size_t i = 0; i < num_chunks; i++) {
|
|
host_uncompressed_bytes[i] = uncompressed_chunk_size;
|
|
host_uncompressed_ptrs[i] = src;
|
|
src += uncompressed_chunk_size;
|
|
}
|
|
|
|
ret = CuMemcpyHtoD ((CUdeviceptr) device_uncompressed_bytes,
|
|
host_uncompressed_bytes, alloc_size);
|
|
if (!gst_cuda_result (ret))
|
|
return false;
|
|
|
|
ret = CuMemcpyHtoD ((CUdeviceptr) device_uncompressed_ptrs,
|
|
host_uncompressed_ptrs, alloc_size);
|
|
if (!gst_cuda_result (ret))
|
|
return false;
|
|
|
|
ret = CuMemAlloc ((CUdeviceptr *) &device_actual_uncompressed_bytes,
|
|
alloc_size);
|
|
if (!gst_cuda_result (ret))
|
|
return false;
|
|
|
|
if (temp_bytes > 0) {
|
|
ret = CuMemAlloc ((CUdeviceptr *) &temp_ptr, temp_bytes);
|
|
if (!gst_cuda_result (ret))
|
|
return false;
|
|
}
|
|
|
|
ret = CuMemAlloc ((CUdeviceptr *) &device_statuses,
|
|
sizeof (nvcompStatus_t) * num_chunks);
|
|
if (!gst_cuda_result (ret))
|
|
return false;
|
|
|
|
batched = TRUE;
|
|
batch_size = num_chunks;
|
|
temp_size = temp_bytes;
|
|
max_compressed_chunk_size = compressed_chunk_size;
|
|
max_uncompressed_chunk_size = uncompressed_chunk_size;
|
|
compressed_alloc_size = compressed_alloc;
|
|
uncompressed_alloc_size = uncompressed_alloc;
|
|
|
|
return true;
|
|
}
|
|
|
|
GstCudaContext *ctx = nullptr;
|
|
|
|
uint8_t *device_compressed = nullptr;
|
|
uint8_t *host_compressed = nullptr;
|
|
|
|
size_t *device_compressed_bytes = nullptr;
|
|
void **device_compressed_ptrs = nullptr;
|
|
|
|
size_t *host_compressed_bytes = nullptr;
|
|
void **host_compressed_ptrs = nullptr;
|
|
|
|
uint8_t *device_uncompressed = nullptr;
|
|
uint8_t *device_uncompressed_temp = nullptr;
|
|
uint8_t *host_uncompressed = nullptr;
|
|
|
|
size_t *device_uncompressed_bytes = nullptr;
|
|
void **device_uncompressed_ptrs = nullptr;
|
|
|
|
size_t *host_uncompressed_bytes = nullptr;
|
|
void **host_uncompressed_ptrs = nullptr;
|
|
|
|
size_t *device_actual_uncompressed_bytes = nullptr;
|
|
|
|
void *temp_ptr = nullptr;
|
|
size_t temp_size = 0;
|
|
|
|
nvcompStatus_t *device_statuses = nullptr;
|
|
|
|
gboolean batched = FALSE;
|
|
size_t batch_size = 0;
|
|
size_t max_uncompressed_chunk_size = 0;
|
|
size_t max_compressed_chunk_size = 0;
|
|
size_t uncompressed_alloc_size = 0;
|
|
size_t compressed_alloc_size = 0;
|
|
};
|
|
|
|
struct BatchedDecompBase
|
|
{
|
|
virtual nvcompStatus_t get_temp_size(
|
|
size_t num_chunks,
|
|
size_t max_uncompressed_chunk_bytes,
|
|
size_t * temp_bytes) = 0;
|
|
|
|
virtual nvcompStatus_t decompress(
|
|
void **device_compressed_ptrs,
|
|
size_t *device_compressed_bytes,
|
|
size_t *device_uncompressed_bytes,
|
|
size_t *device_actual_uncompressed_bytes,
|
|
size_t batch_size,
|
|
void *device_temp_ptr,
|
|
size_t temp_bytes,
|
|
void **device_uncompressed_ptrs,
|
|
nvcompStatus_t *device_statuses,
|
|
cudaStream_t stream) = 0;
|
|
};
|
|
|
|
template <auto T, auto D>
|
|
class BatchedDecomp : public BatchedDecompBase
|
|
{
|
|
public:
|
|
BatchedDecomp () {}
|
|
|
|
nvcompStatus_t get_temp_size(
|
|
size_t num_chunks,
|
|
size_t max_uncompressed_chunk_bytes,
|
|
size_t * temp_bytes)
|
|
{
|
|
return T (num_chunks, max_uncompressed_chunk_bytes, temp_bytes);
|
|
}
|
|
|
|
nvcompStatus_t decompress(
|
|
void **device_compressed_ptrs,
|
|
size_t *device_compressed_bytes,
|
|
size_t *device_uncompressed_bytes,
|
|
size_t *device_actual_uncompressed_bytes,
|
|
size_t batch_size,
|
|
void *device_temp_ptr,
|
|
size_t temp_bytes,
|
|
void **device_uncompressed_ptrs,
|
|
nvcompStatus_t *device_statuses,
|
|
cudaStream_t stream)
|
|
{
|
|
return D (device_compressed_ptrs, device_compressed_bytes,
|
|
device_uncompressed_bytes, device_actual_uncompressed_bytes,
|
|
batch_size, device_temp_ptr, temp_bytes, device_uncompressed_ptrs,
|
|
device_statuses, stream);
|
|
}
|
|
};
|
|
|
|
struct GstNvCompVideoDecPrivate
|
|
{
|
|
GstNvCompVideoDecPrivate ()
|
|
{
|
|
gst_video_info_init (&info);
|
|
}
|
|
|
|
GstCudaContext *ctx = nullptr;
|
|
GstCudaStream *stream = nullptr;
|
|
|
|
#ifdef HAVE_GST_GL
|
|
GstGLDisplay *gl_display = nullptr;
|
|
GstGLContext *gl_context = nullptr;
|
|
GstGLContext *other_gl_context = nullptr;
|
|
#endif
|
|
|
|
GstVideoCodecState *state = nullptr;
|
|
std::shared_ptr<nvcompManagerBase> manager;
|
|
std::shared_ptr<BatchedDecompBase> batched_decomp;
|
|
std::shared_ptr<DecoderTask> task;
|
|
gboolean gl_interop = FALSE;
|
|
|
|
GstVideoInfo info;
|
|
gboolean batched = FALSE;
|
|
GstNvCompMethod method;
|
|
};
|
|
/* *INDENT-ON* */
|
|
|
|
struct _GstNvCompVideoDec
|
|
{
|
|
GstVideoDecoder parent;
|
|
GstNvCompVideoDecPrivate *priv;
|
|
};
|
|
|
|
static void gst_nv_comp_video_dec_finalize (GObject * object);
|
|
|
|
static void gst_nv_comp_video_dec_set_context (GstElement * element,
|
|
GstContext * context);
|
|
|
|
static gboolean gst_nv_comp_video_dec_open (GstVideoDecoder * decoder);
|
|
static gboolean gst_nv_comp_video_dec_close (GstVideoDecoder * decoder);
|
|
static gboolean gst_nv_comp_video_dec_sink_query (GstVideoDecoder * decoder,
|
|
GstQuery * query);
|
|
static gboolean gst_nv_comp_video_dec_src_query (GstVideoDecoder * decoder,
|
|
GstQuery * query);
|
|
static gboolean
|
|
gst_nv_comp_video_dec_decide_allocation (GstVideoDecoder * decoder,
|
|
GstQuery * query);
|
|
static gboolean gst_nv_comp_video_dec_set_format (GstVideoDecoder * decoder,
|
|
GstVideoCodecState * state);
|
|
static gboolean gst_nv_comp_video_dec_negotiate (GstVideoDecoder * decoder);
|
|
static GstFlowReturn
|
|
gst_nv_comp_video_dec_handle_frame (GstVideoDecoder * decoder,
|
|
GstVideoCodecFrame * frame);
|
|
|
|
#define gst_nv_comp_video_dec_parent_class parent_class
|
|
G_DEFINE_TYPE (GstNvCompVideoDec,
|
|
gst_nv_comp_video_dec, GST_TYPE_VIDEO_DECODER);
|
|
|
|
static void
|
|
gst_nv_comp_video_dec_class_init (GstNvCompVideoDecClass * klass)
|
|
{
|
|
auto object_class = G_OBJECT_CLASS (klass);
|
|
auto element_class = GST_ELEMENT_CLASS (klass);
|
|
auto decoder_class = GST_VIDEO_DECODER_CLASS (klass);
|
|
|
|
object_class->finalize = gst_nv_comp_video_dec_finalize;
|
|
|
|
gst_element_class_add_static_pad_template (element_class, &sink_template);
|
|
gst_element_class_add_static_pad_template (element_class, &src_template);
|
|
|
|
gst_element_class_set_static_metadata (element_class,
|
|
"nvCOMP Video Decoder", "Decoder/Video/Hardware",
|
|
"Decompress a video stream using nvCOMP library",
|
|
"Seungha Yang <seungha@centricular.com>");
|
|
|
|
element_class->set_context =
|
|
GST_DEBUG_FUNCPTR (gst_nv_comp_video_dec_set_context);
|
|
|
|
decoder_class->open = GST_DEBUG_FUNCPTR (gst_nv_comp_video_dec_open);
|
|
decoder_class->close = GST_DEBUG_FUNCPTR (gst_nv_comp_video_dec_close);
|
|
decoder_class->sink_query =
|
|
GST_DEBUG_FUNCPTR (gst_nv_comp_video_dec_sink_query);
|
|
decoder_class->src_query =
|
|
GST_DEBUG_FUNCPTR (gst_nv_comp_video_dec_src_query);
|
|
decoder_class->decide_allocation =
|
|
GST_DEBUG_FUNCPTR (gst_nv_comp_video_dec_decide_allocation);
|
|
decoder_class->set_format =
|
|
GST_DEBUG_FUNCPTR (gst_nv_comp_video_dec_set_format);
|
|
decoder_class->negotiate =
|
|
GST_DEBUG_FUNCPTR (gst_nv_comp_video_dec_negotiate);
|
|
decoder_class->handle_frame =
|
|
GST_DEBUG_FUNCPTR (gst_nv_comp_video_dec_handle_frame);
|
|
|
|
GST_DEBUG_CATEGORY_INIT (gst_nv_comp_video_dec_debug,
|
|
"nvcompvideodec", 0, "nvcompvideodec");
|
|
}
|
|
|
|
static void
|
|
gst_nv_comp_video_dec_init (GstNvCompVideoDec * self)
|
|
{
|
|
self->priv = new GstNvCompVideoDecPrivate ();
|
|
}
|
|
|
|
static void
|
|
gst_nv_comp_video_dec_finalize (GObject * object)
|
|
{
|
|
auto self = GST_NV_COMP_VIDEO_DEC (object);
|
|
|
|
delete self->priv;
|
|
|
|
G_OBJECT_CLASS (parent_class)->finalize (object);
|
|
}
|
|
|
|
static void
|
|
gst_nv_comp_video_dec_set_context (GstElement * element, GstContext * context)
|
|
{
|
|
auto self = GST_NV_COMP_VIDEO_DEC (element);
|
|
auto priv = self->priv;
|
|
|
|
gst_cuda_handle_set_context (element, context, -1, &priv->ctx);
|
|
#ifdef HAVE_GST_GL
|
|
if (gst_gl_handle_set_context (element, context, &priv->gl_display,
|
|
&priv->other_gl_context)) {
|
|
if (priv->gl_display)
|
|
gst_gl_display_filter_gl_api (priv->gl_display, GST_GL_API_OPENGL3);
|
|
}
|
|
#endif
|
|
|
|
GST_ELEMENT_CLASS (parent_class)->set_context (element, context);
|
|
}
|
|
|
|
static gboolean
|
|
gst_nv_comp_video_dec_open (GstVideoDecoder * decoder)
|
|
{
|
|
auto self = GST_NV_COMP_VIDEO_DEC (decoder);
|
|
auto priv = self->priv;
|
|
|
|
if (!gst_cuda_ensure_element_context (GST_ELEMENT_CAST (decoder),
|
|
-1, &priv->ctx)) {
|
|
GST_ERROR_OBJECT (self, "Couldn't get cuda context");
|
|
return FALSE;
|
|
}
|
|
|
|
priv->stream = gst_cuda_stream_new (priv->ctx);
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
static gboolean
|
|
gst_nv_comp_video_dec_close (GstVideoDecoder * decoder)
|
|
{
|
|
auto self = GST_NV_COMP_VIDEO_DEC (decoder);
|
|
auto priv = self->priv;
|
|
|
|
if (priv->ctx) {
|
|
gst_cuda_context_push (priv->ctx);
|
|
priv->manager = nullptr;
|
|
priv->task = nullptr;
|
|
|
|
gst_cuda_context_pop (nullptr);
|
|
}
|
|
|
|
gst_clear_cuda_stream (&priv->stream);
|
|
gst_clear_object (&priv->ctx);
|
|
|
|
#ifdef HAVE_GST_GL
|
|
gst_clear_object (&priv->other_gl_context);
|
|
gst_clear_object (&priv->gl_context);
|
|
gst_clear_object (&priv->gl_context);
|
|
#endif
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
static gboolean
|
|
gst_nv_comp_video_dec_handle_context_query (GstNvCompVideoDec * self,
|
|
GstQuery * query)
|
|
{
|
|
auto priv = self->priv;
|
|
|
|
#ifdef HAVE_GST_GL
|
|
{
|
|
GstGLDisplay *display = nullptr;
|
|
GstGLContext *other = nullptr;
|
|
GstGLContext *local = nullptr;
|
|
|
|
if (priv->gl_display)
|
|
display = (GstGLDisplay *) gst_object_ref (priv->gl_display);
|
|
if (priv->gl_context)
|
|
local = (GstGLContext *) gst_object_ref (priv->gl_context);
|
|
if (priv->other_gl_context)
|
|
other = (GstGLContext *) gst_object_ref (priv->other_gl_context);
|
|
|
|
auto ret = gst_gl_handle_context_query (GST_ELEMENT (self), query,
|
|
display, local, other);
|
|
gst_clear_object (&display);
|
|
gst_clear_object (&other);
|
|
gst_clear_object (&local);
|
|
|
|
if (ret)
|
|
return TRUE;
|
|
}
|
|
#endif
|
|
|
|
if (gst_cuda_handle_context_query (GST_ELEMENT (self), query, priv->ctx))
|
|
return TRUE;
|
|
|
|
return FALSE;
|
|
}
|
|
|
|
static gboolean
|
|
gst_nv_comp_video_dec_sink_query (GstVideoDecoder * decoder, GstQuery * query)
|
|
{
|
|
auto self = GST_NV_COMP_VIDEO_DEC (decoder);
|
|
|
|
switch (GST_QUERY_TYPE (query)) {
|
|
case GST_QUERY_CONTEXT:
|
|
if (gst_nv_comp_video_dec_handle_context_query (self, query))
|
|
return TRUE;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
return GST_VIDEO_DECODER_CLASS (parent_class)->sink_query (decoder, query);
|
|
}
|
|
|
|
static gboolean
|
|
gst_nv_comp_video_dec_src_query (GstVideoDecoder * decoder, GstQuery * query)
|
|
{
|
|
auto self = GST_NV_COMP_VIDEO_DEC (decoder);
|
|
|
|
switch (GST_QUERY_TYPE (query)) {
|
|
case GST_QUERY_CONTEXT:
|
|
if (gst_nv_comp_video_dec_handle_context_query (self, query))
|
|
return TRUE;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
return GST_VIDEO_DECODER_CLASS (parent_class)->src_query (decoder, query);
|
|
}
|
|
|
|
#ifdef HAVE_GST_GL
|
|
static void
|
|
check_cuda_device_from_gl_context (GstGLContext * context, gboolean * ret)
|
|
{
|
|
guint device_count = 0;
|
|
CUdevice device_list[1] = { 0, };
|
|
CUresult cuda_ret;
|
|
|
|
*ret = FALSE;
|
|
cuda_ret = CuGLGetDevices (&device_count,
|
|
device_list, 1, CU_GL_DEVICE_LIST_ALL);
|
|
|
|
if (!gst_cuda_result (cuda_ret) || device_count == 0)
|
|
return;
|
|
|
|
*ret = TRUE;
|
|
}
|
|
|
|
static gboolean
|
|
gst_nv_comp_video_dec_ensure_gl_context (GstNvCompVideoDec * self)
|
|
{
|
|
auto priv = self->priv;
|
|
gboolean ret = FALSE;
|
|
|
|
if (!gst_gl_ensure_element_data (GST_ELEMENT (self), &priv->gl_display,
|
|
&priv->other_gl_context)) {
|
|
GST_DEBUG_OBJECT (self, "Couldn't get GL display");
|
|
return FALSE;
|
|
}
|
|
|
|
gst_gl_display_filter_gl_api (priv->gl_display, GST_GL_API_OPENGL3);
|
|
|
|
if (!gst_gl_display_ensure_context (priv->gl_display, priv->other_gl_context,
|
|
&priv->gl_context, nullptr)) {
|
|
GST_DEBUG_OBJECT (self, "Couldn't get GL context");
|
|
return FALSE;
|
|
}
|
|
|
|
gst_gl_context_thread_add (priv->gl_context,
|
|
(GstGLContextThreadFunc) check_cuda_device_from_gl_context, &ret);
|
|
|
|
return ret;
|
|
}
|
|
#endif
|
|
|
|
static gboolean
|
|
gst_nv_comp_video_dec_decide_allocation (GstVideoDecoder * decoder,
|
|
GstQuery * query)
|
|
{
|
|
auto self = GST_NV_COMP_VIDEO_DEC (decoder);
|
|
auto priv = self->priv;
|
|
GstBufferPool *pool = nullptr;
|
|
guint size;
|
|
guint min = 0;
|
|
guint max = 0;
|
|
GstCaps *caps;
|
|
|
|
gst_query_parse_allocation (query, &caps, nullptr);
|
|
if (!caps) {
|
|
GST_WARNING_OBJECT (self, "null caps in query");
|
|
return FALSE;
|
|
}
|
|
|
|
GstVideoInfo info;
|
|
if (!gst_video_info_from_caps (&info, caps)) {
|
|
GST_WARNING_OBJECT (self, "Failed to convert caps into info");
|
|
return FALSE;
|
|
}
|
|
|
|
gboolean update_pool = FALSE;
|
|
if (gst_query_get_n_allocation_pools (query) > 0) {
|
|
gst_query_parse_nth_allocation_pool (query, 0, &pool, &size, &min, &max);
|
|
update_pool = TRUE;
|
|
}
|
|
|
|
auto features = gst_caps_get_features (caps, 0);
|
|
gboolean use_cuda_pool = FALSE;
|
|
if (gst_caps_features_contains (features,
|
|
GST_CAPS_FEATURE_MEMORY_CUDA_MEMORY)) {
|
|
GST_DEBUG_OBJECT (self, "Downstream support CUDA memory");
|
|
if (pool) {
|
|
if (!GST_IS_CUDA_BUFFER_POOL (pool)) {
|
|
gst_clear_object (&pool);
|
|
} else {
|
|
auto cuda_pool = GST_CUDA_BUFFER_POOL (pool);
|
|
if (cuda_pool->context != priv->ctx)
|
|
gst_clear_object (&pool);
|
|
}
|
|
}
|
|
|
|
if (!pool)
|
|
pool = gst_cuda_buffer_pool_new (priv->ctx);
|
|
use_cuda_pool = TRUE;
|
|
}
|
|
#ifdef HAVE_GST_GL
|
|
else if (gst_caps_features_contains (features,
|
|
GST_CAPS_FEATURE_MEMORY_GL_MEMORY) && priv->gl_interop) {
|
|
GST_DEBUG_OBJECT (self, "Downstream support GL memory");
|
|
if (!gst_nv_comp_video_dec_ensure_gl_context (self)) {
|
|
priv->gl_interop = FALSE;
|
|
} else {
|
|
if (pool && !GST_IS_GL_BUFFER_POOL (pool))
|
|
gst_clear_object (&pool);
|
|
|
|
if (!pool)
|
|
pool = gst_gl_buffer_pool_new (priv->gl_context);
|
|
}
|
|
}
|
|
#endif
|
|
|
|
if (!pool)
|
|
pool = gst_video_buffer_pool_new ();
|
|
|
|
auto config = gst_buffer_pool_get_config (pool);
|
|
|
|
size = GST_VIDEO_INFO_SIZE (&info);
|
|
gst_buffer_pool_config_set_params (config, caps, size, 0, 0);
|
|
if (use_cuda_pool && priv->stream) {
|
|
/* Set our stream on buffer pool config so that CUstream can be shared */
|
|
gst_buffer_pool_config_set_cuda_stream (config, priv->stream);
|
|
}
|
|
|
|
if (!gst_buffer_pool_set_config (pool, config)) {
|
|
GST_WARNING_OBJECT (self, "Failed to set pool config");
|
|
gst_object_unref (pool);
|
|
return FALSE;
|
|
}
|
|
|
|
config = gst_buffer_pool_get_config (pool);
|
|
gst_buffer_pool_config_get_params (config, nullptr, &size, nullptr, nullptr);
|
|
gst_structure_free (config);
|
|
|
|
if (update_pool)
|
|
gst_query_set_nth_allocation_pool (query, 0, pool, size, min, max);
|
|
else
|
|
gst_query_add_allocation_pool (query, pool, size, min, max);
|
|
gst_object_unref (pool);
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
static gboolean
|
|
gst_nv_comp_video_dec_alloc_task (GstNvCompVideoDec * self,
|
|
DecoderTask * task, gboolean batched, gsize size)
|
|
{
|
|
if (batched)
|
|
return TRUE;
|
|
|
|
task->uncompressed_alloc_size = size;
|
|
auto cuda_ret =
|
|
CuMemAlloc ((CUdeviceptr *) & task->device_uncompressed, size);
|
|
if (!gst_cuda_result (cuda_ret))
|
|
return FALSE;
|
|
|
|
cuda_ret = CuMemAllocHost ((void **) &task->host_uncompressed, size);
|
|
if (!gst_cuda_result (cuda_ret))
|
|
return FALSE;
|
|
|
|
task->compressed_alloc_size = size;
|
|
cuda_ret = CuMemAlloc ((CUdeviceptr *) & task->device_compressed, size);
|
|
if (!gst_cuda_result (cuda_ret))
|
|
return FALSE;
|
|
|
|
cuda_ret = CuMemAllocHost ((void **) &task->host_compressed, size);
|
|
if (!gst_cuda_result (cuda_ret))
|
|
return FALSE;
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
static gboolean
|
|
gst_nv_comp_video_dec_set_format (GstVideoDecoder * decoder,
|
|
GstVideoCodecState * state)
|
|
{
|
|
auto self = GST_NV_COMP_VIDEO_DEC (decoder);
|
|
auto priv = self->priv;
|
|
|
|
if (!priv->ctx) {
|
|
GST_ERROR_OBJECT (self, "CUDA context was not configured");
|
|
return FALSE;
|
|
}
|
|
|
|
GST_DEBUG_OBJECT (self, "Set format with caps %" GST_PTR_FORMAT, state->caps);
|
|
|
|
g_clear_pointer (&priv->state, gst_video_codec_state_unref);
|
|
priv->state = gst_video_codec_state_ref (state);
|
|
|
|
auto s = gst_caps_get_structure (state->caps, 0);
|
|
std::string mime_type = gst_structure_get_name (s);
|
|
|
|
auto format_str = gst_structure_get_string (s, "format");
|
|
if (!format_str) {
|
|
GST_ERROR_OBJECT (self, "Unknown video format");
|
|
return FALSE;
|
|
}
|
|
|
|
GstVideoFormat format = gst_video_format_from_string (format_str);
|
|
if (format == GST_VIDEO_FORMAT_UNKNOWN || format == GST_VIDEO_FORMAT_ENCODED) {
|
|
GST_ERROR_OBJECT (self, "Invalid format string %s", format_str);
|
|
return FALSE;
|
|
}
|
|
|
|
s = gst_structure_copy (s);
|
|
gst_structure_set_name (s, "video/x-raw");
|
|
|
|
auto video_caps = gst_caps_new_empty ();
|
|
gst_caps_append_structure (video_caps, s);
|
|
|
|
auto ret = gst_video_info_from_caps (&priv->info, video_caps);
|
|
gst_caps_unref (video_caps);
|
|
if (!ret) {
|
|
GST_ERROR_OBJECT (self, "Couldn't build output caps");
|
|
return FALSE;
|
|
}
|
|
|
|
if (!gst_cuda_context_push (priv->ctx)) {
|
|
GST_ERROR_OBJECT (self, "Couldn't push context");
|
|
return FALSE;
|
|
}
|
|
|
|
priv->manager = nullptr;
|
|
priv->batched_decomp = nullptr;
|
|
priv->task = nullptr;
|
|
|
|
priv->batched = TRUE;
|
|
if (mime_type == "video/x-nvcomp") {
|
|
priv->batched = FALSE;
|
|
} else if (mime_type == "video/x-nvcomp-lz4") {
|
|
priv->batched_decomp = std::make_shared < BatchedDecomp <
|
|
nvcompBatchedLZ4DecompressGetTempSize,
|
|
nvcompBatchedLZ4DecompressAsync >> ();
|
|
} else if (mime_type == "video/x-nvcomp-snappy") {
|
|
priv->batched_decomp = std::make_shared < BatchedDecomp <
|
|
nvcompBatchedSnappyDecompressGetTempSize,
|
|
nvcompBatchedSnappyDecompressAsync >> ();
|
|
} else if (mime_type == "video/x-nvcomp-gdeflate") {
|
|
priv->batched_decomp = std::make_shared < BatchedDecomp <
|
|
nvcompBatchedGdeflateDecompressGetTempSize,
|
|
nvcompBatchedGdeflateDecompressAsync >> ();
|
|
} else if (mime_type == "video/x-nvcomp-deflate") {
|
|
priv->batched_decomp = std::make_shared < BatchedDecomp <
|
|
nvcompBatchedDeflateDecompressGetTempSize,
|
|
nvcompBatchedDeflateDecompressAsync >> ();
|
|
} else if (mime_type == "video/x-nvcomp-zstd") {
|
|
priv->batched_decomp = std::make_shared < BatchedDecomp <
|
|
nvcompBatchedZstdDecompressGetTempSize,
|
|
nvcompBatchedZstdDecompressAsync >> ();
|
|
} else if (mime_type == "video/x-nvcomp-cascaded") {
|
|
priv->batched_decomp = std::make_shared < BatchedDecomp <
|
|
nvcompBatchedCascadedDecompressGetTempSize,
|
|
nvcompBatchedCascadedDecompressAsync >> ();
|
|
} else if (mime_type == "video/x-nvcomp-bitcomp") {
|
|
priv->batched_decomp = std::make_shared < BatchedDecomp <
|
|
nvcompBatchedBitcompDecompressGetTempSize,
|
|
nvcompBatchedBitcompDecompressAsync >> ();
|
|
} else if (mime_type == "video/x-nvcomp-ans") {
|
|
priv->batched_decomp = std::make_shared < BatchedDecomp <
|
|
nvcompBatchedANSDecompressGetTempSize,
|
|
nvcompBatchedANSDecompressAsync >> ();
|
|
} else {
|
|
gst_cuda_context_pop (nullptr);
|
|
g_assert_not_reached ();
|
|
return FALSE;
|
|
}
|
|
|
|
auto task = std::make_shared < DecoderTask > ();
|
|
task->ctx = (GstCudaContext *) gst_object_ref (priv->ctx);
|
|
|
|
if (!gst_nv_comp_video_dec_alloc_task (self, task.get (), priv->batched,
|
|
priv->info.size)) {
|
|
task = nullptr;
|
|
gst_cuda_context_pop (nullptr);
|
|
return FALSE;
|
|
}
|
|
|
|
priv->task = task;
|
|
gst_cuda_context_pop (nullptr);
|
|
|
|
return gst_video_decoder_negotiate (decoder);
|
|
}
|
|
|
|
static gboolean
|
|
is_supported_cuda_format (GstVideoFormat format)
|
|
{
|
|
switch (format) {
|
|
case GST_VIDEO_FORMAT_I420:
|
|
case GST_VIDEO_FORMAT_YV12:
|
|
case GST_VIDEO_FORMAT_NV12:
|
|
case GST_VIDEO_FORMAT_NV21:
|
|
case GST_VIDEO_FORMAT_P010_10LE:
|
|
case GST_VIDEO_FORMAT_P012_LE:
|
|
case GST_VIDEO_FORMAT_P016_LE:
|
|
case GST_VIDEO_FORMAT_I420_10LE:
|
|
case GST_VIDEO_FORMAT_I420_12LE:
|
|
case GST_VIDEO_FORMAT_Y444:
|
|
case GST_VIDEO_FORMAT_Y444_10LE:
|
|
case GST_VIDEO_FORMAT_Y444_12LE:
|
|
case GST_VIDEO_FORMAT_Y444_16LE:
|
|
case GST_VIDEO_FORMAT_BGRA:
|
|
case GST_VIDEO_FORMAT_RGBA:
|
|
case GST_VIDEO_FORMAT_RGBx:
|
|
case GST_VIDEO_FORMAT_BGRx:
|
|
case GST_VIDEO_FORMAT_ARGB:
|
|
case GST_VIDEO_FORMAT_ABGR:
|
|
case GST_VIDEO_FORMAT_RGB:
|
|
case GST_VIDEO_FORMAT_BGR:
|
|
case GST_VIDEO_FORMAT_BGR10A2_LE:
|
|
case GST_VIDEO_FORMAT_RGB10A2_LE:
|
|
case GST_VIDEO_FORMAT_Y42B:
|
|
case GST_VIDEO_FORMAT_I422_10LE:
|
|
case GST_VIDEO_FORMAT_I422_12LE:
|
|
case GST_VIDEO_FORMAT_YUY2:
|
|
case GST_VIDEO_FORMAT_UYVY:
|
|
case GST_VIDEO_FORMAT_RGBP:
|
|
case GST_VIDEO_FORMAT_BGRP:
|
|
case GST_VIDEO_FORMAT_GBR:
|
|
case GST_VIDEO_FORMAT_GBR_10LE:
|
|
case GST_VIDEO_FORMAT_GBR_12LE:
|
|
case GST_VIDEO_FORMAT_GBR_16LE:
|
|
case GST_VIDEO_FORMAT_GBRA:
|
|
case GST_VIDEO_FORMAT_VUYA:
|
|
return TRUE;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
return FALSE;
|
|
}
|
|
|
|
#ifdef HAVE_GST_GL
|
|
static gboolean
|
|
is_supported_gl_format (GstVideoFormat format)
|
|
{
|
|
auto gl_caps = gst_caps_from_string ("video/x-raw, format = (string) "
|
|
GST_GL_COLOR_CONVERT_FORMATS);
|
|
auto our_caps = gst_caps_new_empty_simple ("video/x-raw");
|
|
gst_caps_set_simple (our_caps,
|
|
"format", G_TYPE_STRING, gst_video_format_to_string (format), nullptr);
|
|
auto ret = gst_caps_is_subset (our_caps, gl_caps);
|
|
gst_caps_unref (gl_caps);
|
|
gst_caps_unref (our_caps);
|
|
|
|
return ret;
|
|
}
|
|
#endif
|
|
|
|
static gboolean
|
|
gst_nv_comp_video_dec_negotiate (GstVideoDecoder * decoder)
|
|
{
|
|
auto self = GST_NV_COMP_VIDEO_DEC (decoder);
|
|
auto priv = self->priv;
|
|
gboolean is_cuda = FALSE;
|
|
#ifdef HAVE_GST_GL
|
|
gboolean is_gl = FALSE;
|
|
#endif
|
|
|
|
auto peer_caps = gst_pad_get_allowed_caps (decoder->srcpad);
|
|
GST_DEBUG_OBJECT (self, "Allowed caps %" GST_PTR_FORMAT, peer_caps);
|
|
|
|
if (!peer_caps || gst_caps_is_any (peer_caps)) {
|
|
GST_DEBUG_OBJECT (self,
|
|
"cannot determine output format, use system memory");
|
|
} else {
|
|
GstCapsFeatures *features;
|
|
guint size = gst_caps_get_size (peer_caps);
|
|
guint i;
|
|
|
|
for (i = 0; i < size; i++) {
|
|
features = gst_caps_get_features (peer_caps, i);
|
|
|
|
if (!features)
|
|
continue;
|
|
|
|
if (gst_caps_features_contains (features,
|
|
GST_CAPS_FEATURE_MEMORY_CUDA_MEMORY)) {
|
|
is_cuda = TRUE;
|
|
}
|
|
#ifdef HAVE_GST_GL
|
|
if (gst_caps_features_contains (features,
|
|
GST_CAPS_FEATURE_MEMORY_GL_MEMORY)) {
|
|
is_gl = TRUE;
|
|
}
|
|
#endif
|
|
}
|
|
}
|
|
gst_clear_caps (&peer_caps);
|
|
|
|
auto state = gst_video_decoder_set_interlaced_output_state (decoder,
|
|
GST_VIDEO_INFO_FORMAT (&priv->info),
|
|
GST_VIDEO_INFO_INTERLACE_MODE (&priv->info), priv->info.width,
|
|
priv->info.height, priv->state);
|
|
|
|
if (!state) {
|
|
GST_ERROR_OBJECT (self, "Couldn't set output state");
|
|
return FALSE;
|
|
}
|
|
|
|
priv->gl_interop = FALSE;
|
|
|
|
state->caps = gst_video_info_to_caps (&state->info);
|
|
auto format = GST_VIDEO_INFO_FORMAT (&priv->info);
|
|
if (is_cuda && is_supported_cuda_format (format)) {
|
|
gst_caps_set_features_simple (state->caps,
|
|
gst_caps_features_new_static_str (GST_CAPS_FEATURE_MEMORY_CUDA_MEMORY,
|
|
nullptr));
|
|
}
|
|
#ifdef HAVE_GST_GL
|
|
else if (is_gl && is_supported_gl_format (format)) {
|
|
gst_caps_set_features_simple (state->caps,
|
|
gst_caps_features_new_static_str (GST_CAPS_FEATURE_MEMORY_GL_MEMORY,
|
|
nullptr));
|
|
priv->gl_interop = TRUE;
|
|
}
|
|
#endif
|
|
|
|
return GST_VIDEO_DECODER_CLASS (parent_class)->negotiate (decoder);
|
|
}
|
|
|
|
static gboolean
|
|
gst_nv_comp_video_dec_download (GstNvCompVideoDec * self, GstVideoFrame * frame,
|
|
CUstream stream, gboolean is_device_copy)
|
|
{
|
|
auto priv = self->priv;
|
|
auto info = &priv->info;
|
|
auto finfo = info->finfo;
|
|
gint comp[GST_VIDEO_MAX_COMPONENTS];
|
|
CUresult ret = CUDA_SUCCESS;
|
|
auto task = priv->task;
|
|
|
|
for (guint i = 0; i < GST_VIDEO_FRAME_N_PLANES (frame); i++) {
|
|
guint8 *sp;
|
|
if (is_device_copy)
|
|
sp = task->device_uncompressed + info->offset[i];
|
|
else
|
|
sp = task->host_uncompressed + info->offset[i];
|
|
|
|
guint8 *dp = (guint8 *) GST_VIDEO_FRAME_PLANE_DATA (frame, i);
|
|
guint ss, ds;
|
|
guint w, h;
|
|
|
|
if (GST_VIDEO_FORMAT_INFO_HAS_PALETTE (finfo) && i == 1) {
|
|
if (is_device_copy) {
|
|
ret = CuMemcpyDtoDAsync ((CUdeviceptr) dp, (CUdeviceptr) sp,
|
|
256 * 4, stream);
|
|
} else {
|
|
memcpy (dp, sp, 256 * 4);
|
|
}
|
|
|
|
if (!gst_cuda_result (ret)) {
|
|
GST_ERROR_OBJECT (self, "CUDA memcpy failed");
|
|
return FALSE;
|
|
}
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
ds = GST_VIDEO_FRAME_PLANE_STRIDE (frame, i);
|
|
ss = GST_VIDEO_INFO_PLANE_STRIDE (info, i);
|
|
|
|
gst_video_format_info_component (finfo, i, comp);
|
|
|
|
w = GST_VIDEO_INFO_COMP_WIDTH (info, comp[0]) *
|
|
GST_VIDEO_INFO_COMP_PSTRIDE (info, comp[0]);
|
|
if (w == 0)
|
|
w = MIN (ss, ds);
|
|
|
|
h = GST_VIDEO_INFO_COMP_HEIGHT (info, comp[0]);
|
|
|
|
if (GST_VIDEO_FORMAT_INFO_IS_TILED (finfo)) {
|
|
gint tile_size;
|
|
gint sx_tiles, sy_tiles, dx_tiles, dy_tiles;
|
|
GstVideoTileMode mode;
|
|
|
|
tile_size = GST_VIDEO_FORMAT_INFO_TILE_SIZE (info->finfo, i);
|
|
|
|
mode = GST_VIDEO_FORMAT_INFO_TILE_MODE (info->finfo);
|
|
|
|
sx_tiles = GST_VIDEO_TILE_X_TILES (ss);
|
|
sy_tiles = GST_VIDEO_TILE_Y_TILES (ss);
|
|
|
|
dx_tiles = GST_VIDEO_TILE_X_TILES (ds);
|
|
dy_tiles = GST_VIDEO_TILE_Y_TILES (ds);
|
|
|
|
w = MIN (sx_tiles, dx_tiles);
|
|
h = MIN (sy_tiles, dy_tiles);
|
|
|
|
for (guint j = 0; j < h; j++) {
|
|
for (guint k = 0; k < w; k++) {
|
|
guint si, di;
|
|
guint8 *cur_dp;
|
|
guint8 *cur_sp;
|
|
|
|
si = gst_video_tile_get_index (mode, k, j, sx_tiles, sy_tiles);
|
|
di = gst_video_tile_get_index (mode, k, j, dx_tiles, dy_tiles);
|
|
|
|
cur_dp = dp + (di * tile_size);
|
|
cur_sp = sp + (si * tile_size);
|
|
|
|
if (is_device_copy) {
|
|
ret = CuMemcpyDtoDAsync ((CUdeviceptr) cur_dp, (CUdeviceptr) cur_sp,
|
|
w, stream);
|
|
} else {
|
|
memcpy (cur_dp, cur_sp, w);
|
|
}
|
|
|
|
if (!gst_cuda_result (ret)) {
|
|
GST_ERROR_OBJECT (self, "CUDA memcpy failed");
|
|
return FALSE;
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
if (is_device_copy) {
|
|
CUDA_MEMCPY2D params = { };
|
|
params.srcMemoryType = CU_MEMORYTYPE_DEVICE;
|
|
params.srcDevice = (CUdeviceptr) sp;
|
|
params.srcPitch = ss;
|
|
|
|
params.dstMemoryType = CU_MEMORYTYPE_DEVICE;
|
|
params.dstDevice = (CUdeviceptr) dp;
|
|
params.dstPitch = ds;
|
|
|
|
params.WidthInBytes = w;
|
|
params.Height = h;
|
|
|
|
ret = CuMemcpy2DAsync (¶ms, stream);
|
|
|
|
if (!gst_cuda_result (ret)) {
|
|
GST_ERROR_OBJECT (self, "CUDA memcpy failed");
|
|
return FALSE;
|
|
}
|
|
} else {
|
|
for (guint j = 0; j < h; j++) {
|
|
memcpy (dp, sp, w);
|
|
dp += ds;
|
|
sp += ss;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
#ifdef HAVE_GST_GL
|
|
struct GLInteropData
|
|
{
|
|
GstNvCompVideoDec *self = nullptr;
|
|
GstBuffer *buffer = nullptr;
|
|
gboolean ret = FALSE;
|
|
};
|
|
|
|
static GstCudaGraphicsResource *
|
|
ensure_gl_cuda_resource (GstNvCompVideoDec * self, GstMemory * mem)
|
|
{
|
|
auto priv = self->priv;
|
|
GstCudaGraphicsResource *resource;
|
|
GQuark quark;
|
|
|
|
if (!gst_is_gl_memory_pbo (mem)) {
|
|
GST_WARNING_OBJECT (self, "memory is not GL PBO memory, %s",
|
|
mem->allocator->mem_type);
|
|
return nullptr;
|
|
}
|
|
|
|
quark = gst_cuda_quark_from_id (GST_CUDA_QUARK_GRAPHICS_RESOURCE);
|
|
resource = (GstCudaGraphicsResource *)
|
|
gst_mini_object_get_qdata (GST_MINI_OBJECT (mem), quark);
|
|
|
|
if (!resource) {
|
|
GstMapInfo map_info;
|
|
GstGLMemoryPBO *pbo = (GstGLMemoryPBO *) mem;
|
|
GstGLBuffer *gl_buf = pbo->pbo;
|
|
gboolean ret;
|
|
|
|
if (!gst_memory_map (mem, &map_info,
|
|
(GstMapFlags) (GST_MAP_READ | GST_MAP_GL))) {
|
|
GST_ERROR_OBJECT (self, "Couldn't map gl memory");
|
|
return nullptr;
|
|
}
|
|
|
|
resource = gst_cuda_graphics_resource_new (priv->ctx,
|
|
GST_OBJECT (GST_GL_BASE_MEMORY_CAST (mem)->context),
|
|
GST_CUDA_GRAPHICS_RESOURCE_GL_BUFFER);
|
|
|
|
GST_LOG_OBJECT (self, "registering gl buffer %d to CUDA", gl_buf->id);
|
|
ret = gst_cuda_graphics_resource_register_gl_buffer (resource, gl_buf->id,
|
|
CU_GRAPHICS_REGISTER_FLAGS_NONE);
|
|
gst_memory_unmap (mem, &map_info);
|
|
|
|
if (!ret) {
|
|
GST_ERROR_OBJECT (self, "Couldn't register gl buffer %d", gl_buf->id);
|
|
gst_cuda_graphics_resource_free (resource);
|
|
return nullptr;
|
|
}
|
|
|
|
gst_mini_object_set_qdata (GST_MINI_OBJECT (mem), quark, resource,
|
|
(GDestroyNotify) gst_cuda_graphics_resource_free);
|
|
}
|
|
|
|
return resource;
|
|
}
|
|
|
|
static void
|
|
gst_nv_comp_video_dec_download_gl (GstGLContext * context, GLInteropData * data)
|
|
{
|
|
auto self = data->self;
|
|
auto priv = self->priv;
|
|
auto info = &priv->info;
|
|
auto finfo = info->finfo;
|
|
GstCudaGraphicsResource *gst_res[GST_VIDEO_MAX_PLANES] = { nullptr, };
|
|
CUgraphicsResource cuda_res[GST_VIDEO_MAX_PLANES] = { nullptr, };
|
|
CUdeviceptr src_devptr[GST_VIDEO_MAX_PLANES] = { 0, };
|
|
CUstream stream = gst_cuda_stream_get_handle (priv->stream);
|
|
CUresult ret;
|
|
gint comp[GST_VIDEO_MAX_COMPONENTS];
|
|
auto task = priv->task;
|
|
|
|
if (!gst_cuda_context_push (priv->ctx)) {
|
|
GST_ERROR_OBJECT (self, "Couldn't push context");
|
|
return;
|
|
}
|
|
|
|
for (guint i = 0; i < GST_VIDEO_INFO_N_PLANES (info); i++) {
|
|
GstMemory *mem = gst_buffer_peek_memory (data->buffer, i);
|
|
gsize src_size;
|
|
|
|
if (!gst_is_gl_memory_pbo (mem)) {
|
|
GST_ERROR_OBJECT (self, "Not a GL PBO memory");
|
|
goto out;
|
|
}
|
|
|
|
gst_res[i] = ensure_gl_cuda_resource (self, mem);
|
|
if (!gst_res[i]) {
|
|
GST_ERROR_OBJECT (self, "Couldn't get resource %d", i);
|
|
goto out;
|
|
}
|
|
|
|
cuda_res[i] = gst_cuda_graphics_resource_map (gst_res[i], stream,
|
|
CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD);
|
|
if (!cuda_res[i]) {
|
|
GST_ERROR_OBJECT (self, "Couldn't map resource");
|
|
goto out;
|
|
}
|
|
|
|
ret = CuGraphicsResourceGetMappedPointer (&src_devptr[i],
|
|
&src_size, cuda_res[i]);
|
|
if (!gst_cuda_result (ret)) {
|
|
GST_ERROR_OBJECT (self, "Couldn't get mapped device pointer");
|
|
goto out;
|
|
}
|
|
|
|
/* Need PBO -> texture */
|
|
GST_MINI_OBJECT_FLAG_SET (mem, GST_GL_BASE_MEMORY_TRANSFER_NEED_UPLOAD);
|
|
}
|
|
|
|
for (guint i = 0; i < GST_VIDEO_INFO_N_PLANES (info); i++) {
|
|
guint8 *sp = task->device_uncompressed + info->offset[i];
|
|
guint8 *dp = (guint8 *) src_devptr[i];
|
|
guint ss, ds;
|
|
guint w, h;
|
|
|
|
if (GST_VIDEO_FORMAT_INFO_HAS_PALETTE (finfo) && i == 1) {
|
|
ret = CuMemcpyDtoDAsync ((CUdeviceptr) dp, (CUdeviceptr) sp,
|
|
256 * 4, stream);
|
|
|
|
if (!gst_cuda_result (ret)) {
|
|
GST_ERROR_OBJECT (self, "CUDA memcpy failed");
|
|
goto out;
|
|
}
|
|
|
|
data->ret = TRUE;
|
|
goto out;
|
|
}
|
|
|
|
auto meta = gst_buffer_get_video_meta (data->buffer);
|
|
if (meta)
|
|
ds = meta->stride[i];
|
|
else
|
|
ds = GST_VIDEO_INFO_PLANE_STRIDE (info, i);
|
|
|
|
ss = GST_VIDEO_INFO_PLANE_STRIDE (info, i);
|
|
|
|
gst_video_format_info_component (finfo, i, comp);
|
|
|
|
w = GST_VIDEO_INFO_COMP_WIDTH (info, comp[0]) *
|
|
GST_VIDEO_INFO_COMP_PSTRIDE (info, comp[0]);
|
|
if (w == 0)
|
|
w = MIN (ss, ds);
|
|
|
|
h = GST_VIDEO_INFO_COMP_HEIGHT (info, comp[0]);
|
|
|
|
if (GST_VIDEO_FORMAT_INFO_IS_TILED (finfo)) {
|
|
gint tile_size;
|
|
gint sx_tiles, sy_tiles, dx_tiles, dy_tiles;
|
|
GstVideoTileMode mode;
|
|
|
|
tile_size = GST_VIDEO_FORMAT_INFO_TILE_SIZE (info->finfo, i);
|
|
|
|
mode = GST_VIDEO_FORMAT_INFO_TILE_MODE (info->finfo);
|
|
|
|
sx_tiles = GST_VIDEO_TILE_X_TILES (ss);
|
|
sy_tiles = GST_VIDEO_TILE_Y_TILES (ss);
|
|
|
|
dx_tiles = GST_VIDEO_TILE_X_TILES (ds);
|
|
dy_tiles = GST_VIDEO_TILE_Y_TILES (ds);
|
|
|
|
w = MIN (sx_tiles, dx_tiles);
|
|
h = MIN (sy_tiles, dy_tiles);
|
|
|
|
for (guint j = 0; j < h; j++) {
|
|
for (guint k = 0; k < w; k++) {
|
|
guint si, di;
|
|
guint8 *cur_dp;
|
|
guint8 *cur_sp;
|
|
|
|
si = gst_video_tile_get_index (mode, k, j, sx_tiles, sy_tiles);
|
|
di = gst_video_tile_get_index (mode, k, j, dx_tiles, dy_tiles);
|
|
|
|
cur_dp = dp + (di * tile_size);
|
|
cur_sp = sp + (si * tile_size);
|
|
|
|
ret = CuMemcpyDtoDAsync ((CUdeviceptr) cur_dp, (CUdeviceptr) cur_sp,
|
|
w, stream);
|
|
|
|
if (!gst_cuda_result (ret)) {
|
|
GST_ERROR_OBJECT (self, "CUDA memcpy failed");
|
|
goto out;
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
CUDA_MEMCPY2D params = { };
|
|
params.srcMemoryType = CU_MEMORYTYPE_DEVICE;
|
|
params.srcDevice = (CUdeviceptr) sp;
|
|
params.srcPitch = ss;
|
|
|
|
params.dstMemoryType = CU_MEMORYTYPE_DEVICE;
|
|
params.dstDevice = (CUdeviceptr) dp;
|
|
params.dstPitch = ds;
|
|
|
|
params.WidthInBytes = w;
|
|
params.Height = h;
|
|
|
|
ret = CuMemcpy2DAsync (¶ms, stream);
|
|
if (!gst_cuda_result (ret)) {
|
|
GST_ERROR_OBJECT (self, "CUDA memcpy failed");
|
|
goto out;
|
|
}
|
|
}
|
|
}
|
|
|
|
data->ret = TRUE;
|
|
|
|
out:
|
|
for (guint i = 0; i < gst_buffer_n_memory (data->buffer); i++) {
|
|
if (!gst_res[i])
|
|
break;
|
|
|
|
gst_cuda_graphics_resource_unmap (gst_res[i], stream);
|
|
}
|
|
|
|
CuStreamSynchronize (stream);
|
|
gst_cuda_context_pop (nullptr);
|
|
}
|
|
#endif
|
|
|
|
struct ChunkData
|
|
{
|
|
size_t uncomp_size = 0;
|
|
size_t comp_size = 0;
|
|
size_t offset = 0;
|
|
};
|
|
|
|
static gboolean
|
|
gst_nv_comp_video_dec_parse_header (GstNvCompVideoDec * self,
|
|
const guint8 * data, gsize size,
|
|
size_t &uncompressed_chunk_size, size_t &max_compressed_chunk_size,
|
|
size_t &batch_size, std::vector < ChunkData > &compressed_chunks)
|
|
{
|
|
guint32 val;
|
|
const guint8 *ptr = data;
|
|
gsize remaining = size;
|
|
|
|
if (size <= GST_NV_COMP_HEADER_MIN_SIZE) {
|
|
GST_ERROR_OBJECT (self, "Too small size");
|
|
return FALSE;
|
|
}
|
|
|
|
val = GST_READ_UINT32_LE (ptr);
|
|
if (val != GST_NV_COMP_HEADER_VERSION) {
|
|
GST_ERROR_OBJECT (self, "Invalid version");
|
|
return FALSE;
|
|
}
|
|
ptr += sizeof (guint32);
|
|
remaining -= sizeof (guint32);
|
|
|
|
uncompressed_chunk_size = GST_READ_UINT32_LE (ptr);
|
|
ptr += sizeof (guint32);
|
|
remaining -= sizeof (guint32);
|
|
|
|
max_compressed_chunk_size = GST_READ_UINT32_LE (ptr);
|
|
ptr += sizeof (guint32);
|
|
remaining -= sizeof (guint32);
|
|
|
|
batch_size = GST_READ_UINT32_LE (ptr);
|
|
ptr += sizeof (guint32);
|
|
remaining -= sizeof (guint32);
|
|
|
|
compressed_chunks.resize (batch_size);
|
|
size_t total_compressed_size = 0;
|
|
for (size_t i = 0; i < batch_size; i++) {
|
|
if (remaining < sizeof (guint32))
|
|
return FALSE;
|
|
|
|
compressed_chunks[i].uncomp_size = GST_READ_UINT32_LE (ptr);
|
|
ptr += sizeof (guint32);
|
|
remaining -= sizeof (guint32);
|
|
|
|
if (remaining < sizeof (guint32))
|
|
return FALSE;
|
|
|
|
compressed_chunks[i].comp_size = GST_READ_UINT32_LE (ptr);
|
|
total_compressed_size += compressed_chunks[i].comp_size;
|
|
|
|
ptr += sizeof (guint32);
|
|
remaining -= sizeof (guint32);
|
|
}
|
|
|
|
if (remaining != total_compressed_size) {
|
|
GST_ERROR_OBJECT (self, "Size mismatch, remaining: %" G_GSIZE_FORMAT
|
|
", total compressed: %" G_GSIZE_FORMAT, remaining,
|
|
total_compressed_size);
|
|
return FALSE;
|
|
}
|
|
|
|
for (size_t i = 0; i < batch_size; i++) {
|
|
compressed_chunks[i].offset = ptr - data;
|
|
ptr += compressed_chunks[i].comp_size;
|
|
}
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
static GstFlowReturn
|
|
gst_nv_comp_video_dec_handle_frame (GstVideoDecoder * decoder,
|
|
GstVideoCodecFrame * frame)
|
|
{
|
|
auto self = GST_NV_COMP_VIDEO_DEC (decoder);
|
|
auto priv = self->priv;
|
|
CUstream stream = nullptr;
|
|
GstVideoFrame vframe;
|
|
GstMapInfo map_info;
|
|
CUresult cuda_ret;
|
|
gboolean need_copy = TRUE;
|
|
GstMemory *mem;
|
|
nvcompStatus_t status;
|
|
auto task = priv->task;
|
|
GstFlowReturn ret;
|
|
|
|
if (!priv->ctx || !priv->task) {
|
|
GST_ERROR_OBJECT (self, "Context was not configured");
|
|
goto error;
|
|
}
|
|
|
|
ret = gst_video_decoder_allocate_output_frame (decoder, frame);
|
|
if (ret != GST_FLOW_OK) {
|
|
gst_video_decoder_release_frame (decoder, frame);
|
|
return ret;
|
|
}
|
|
|
|
if (!gst_cuda_context_push (priv->ctx)) {
|
|
GST_ERROR_OBJECT (self, "Couldn't push context");
|
|
goto error;
|
|
}
|
|
|
|
stream = gst_cuda_stream_get_handle (priv->stream);
|
|
|
|
if (!gst_buffer_map (frame->input_buffer, &map_info, GST_MAP_READ)) {
|
|
GST_ERROR_OBJECT (self, "Couldn't map input buffer");
|
|
gst_cuda_context_pop (nullptr);
|
|
goto error;
|
|
}
|
|
|
|
if (priv->batched) {
|
|
g_assert (priv->batched_decomp);
|
|
|
|
/* Parse custom header */
|
|
size_t uncompressed_chunk_size;
|
|
size_t max_compressed_chunk_size;
|
|
size_t batch_size;
|
|
std::vector < ChunkData > compressed_chunks;
|
|
guint8 *mapped_data = map_info.data;
|
|
uint8_t *uncompressed;
|
|
if (!gst_nv_comp_video_dec_parse_header (self, mapped_data,
|
|
map_info.size, uncompressed_chunk_size, max_compressed_chunk_size,
|
|
batch_size, compressed_chunks)) {
|
|
gst_buffer_unmap (frame->input_buffer, &map_info);
|
|
gst_cuda_context_pop (nullptr);
|
|
goto error;
|
|
}
|
|
|
|
GST_LOG_OBJECT (self, "batch size %" G_GSIZE_FORMAT
|
|
", uncompressed-chunk-size %" G_GSIZE_FORMAT
|
|
", compressed-chunk-size %" G_GSIZE_FORMAT,
|
|
batch_size, uncompressed_chunk_size, max_compressed_chunk_size);
|
|
|
|
if (task->batch_size < batch_size ||
|
|
task->max_uncompressed_chunk_size < uncompressed_chunk_size ||
|
|
task->max_compressed_chunk_size < max_compressed_chunk_size) {
|
|
task->clear_resource ();
|
|
}
|
|
|
|
if (task->batch_size == 0) {
|
|
size_t temp_size = 0;
|
|
|
|
GST_DEBUG_OBJECT (self, "Allocating resource");
|
|
|
|
status = priv->batched_decomp->get_temp_size (batch_size,
|
|
uncompressed_chunk_size, &temp_size);
|
|
if (status != nvcompSuccess) {
|
|
GST_ERROR_OBJECT (self, "Couldn't get temp size");
|
|
gst_buffer_unmap (frame->input_buffer, &map_info);
|
|
gst_cuda_context_pop (nullptr);
|
|
goto error;
|
|
}
|
|
|
|
if (!task->allocate_batched (batch_size,
|
|
max_compressed_chunk_size, uncompressed_chunk_size, temp_size)) {
|
|
GST_ERROR_OBJECT (self, "Couldn't allocate resource");
|
|
gst_buffer_unmap (frame->input_buffer, &map_info);
|
|
gst_cuda_context_pop (nullptr);
|
|
goto error;
|
|
}
|
|
}
|
|
|
|
for (size_t i = 0; i < batch_size; i++) {
|
|
memcpy (task->host_compressed + (i * task->max_compressed_chunk_size),
|
|
mapped_data + compressed_chunks[i].offset,
|
|
compressed_chunks[i].comp_size);
|
|
task->host_compressed_bytes[i] = compressed_chunks[i].comp_size;
|
|
}
|
|
gst_buffer_unmap (frame->input_buffer, &map_info);
|
|
|
|
for (size_t i = 0; i < batch_size; i++) {
|
|
GST_LOG_OBJECT (self, "Uploading chunk %" G_GSIZE_FORMAT
|
|
", size %" G_GSIZE_FORMAT, i, compressed_chunks[i].comp_size);
|
|
auto offset = i * task->max_compressed_chunk_size;
|
|
|
|
cuda_ret = CuMemcpyHtoDAsync ((CUdeviceptr)
|
|
(task->device_compressed + offset),
|
|
task->host_compressed + offset,
|
|
compressed_chunks[i].comp_size, stream);
|
|
if (!gst_cuda_result (cuda_ret)) {
|
|
gst_cuda_context_pop (nullptr);
|
|
goto error;
|
|
}
|
|
}
|
|
|
|
cuda_ret = CuMemcpyHtoDAsync ((CUdeviceptr) task->device_compressed_bytes,
|
|
task->host_compressed_bytes, sizeof (size_t) * batch_size, stream);
|
|
if (!gst_cuda_result (cuda_ret)) {
|
|
gst_cuda_context_pop (nullptr);
|
|
goto error;
|
|
}
|
|
|
|
status = priv->batched_decomp->decompress (task->device_compressed_ptrs,
|
|
task->device_compressed_bytes, task->device_uncompressed_bytes,
|
|
task->device_actual_uncompressed_bytes, batch_size,
|
|
task->temp_ptr, task->temp_size, task->device_uncompressed_ptrs,
|
|
task->device_statuses, (cudaStream_t) stream);
|
|
if (status != nvcompSuccess) {
|
|
GST_ERROR_OBJECT (self, "Couldn't decompress stream, status: %d", status);
|
|
gst_cuda_context_pop (nullptr);
|
|
goto error;
|
|
}
|
|
|
|
uncompressed = task->device_uncompressed;
|
|
for (size_t i = 0; i < batch_size; i++) {
|
|
auto size = compressed_chunks[i].uncomp_size;
|
|
cuda_ret = CuMemcpyDtoDAsync ((CUdeviceptr) uncompressed,
|
|
(CUdeviceptr) task->host_uncompressed_ptrs[i], size, stream);
|
|
|
|
if (!gst_cuda_result (cuda_ret)) {
|
|
gst_cuda_context_pop (nullptr);
|
|
goto error;
|
|
}
|
|
uncompressed += size;
|
|
}
|
|
} else {
|
|
if (task->compressed_alloc_size < map_info.size) {
|
|
if (task->device_compressed)
|
|
CuMemFree ((CUdeviceptr) task->device_compressed);
|
|
task->device_compressed = nullptr;
|
|
|
|
if (task->host_compressed)
|
|
CuMemFreeHost (task->host_compressed);
|
|
task->host_compressed = nullptr;
|
|
|
|
task->compressed_alloc_size = GST_ROUND_UP_128 (map_info.size);
|
|
auto cuda_ret = CuMemAlloc ((CUdeviceptr *) & task->device_compressed,
|
|
task->compressed_alloc_size);
|
|
if (!gst_cuda_result (cuda_ret)) {
|
|
gst_buffer_unmap (frame->input_buffer, &map_info);
|
|
gst_cuda_context_pop (nullptr);
|
|
goto error;
|
|
}
|
|
|
|
cuda_ret = CuMemAllocHost ((void **) &task->host_compressed,
|
|
task->compressed_alloc_size);
|
|
if (!gst_cuda_result (cuda_ret)) {
|
|
gst_buffer_unmap (frame->input_buffer, &map_info);
|
|
gst_cuda_context_pop (nullptr);
|
|
goto error;
|
|
}
|
|
}
|
|
|
|
memcpy (task->host_compressed, map_info.data, map_info.size);
|
|
|
|
cuda_ret = CuMemcpyHtoDAsync ((CUdeviceptr) task->device_compressed,
|
|
task->host_compressed, map_info.size, stream);
|
|
gst_buffer_unmap (frame->input_buffer, &map_info);
|
|
|
|
if (!gst_cuda_result (cuda_ret)) {
|
|
GST_ERROR_OBJECT (self, "Couldn't copy compressed memory");
|
|
gst_cuda_context_pop (nullptr);
|
|
goto error;
|
|
}
|
|
|
|
if (!priv->manager) {
|
|
priv->manager = create_manager (task->device_compressed,
|
|
(cudaStream_t) stream);
|
|
}
|
|
|
|
{
|
|
auto config =
|
|
priv->manager->configure_decompression (task->device_compressed);
|
|
if (config.decomp_data_size != priv->info.size) {
|
|
GST_ERROR_OBJECT (self, "size mismatch, expected %" G_GSIZE_FORMAT
|
|
", required %" G_GSIZE_FORMAT, priv->info.size,
|
|
config.decomp_data_size);
|
|
gst_cuda_context_pop (nullptr);
|
|
goto error;
|
|
}
|
|
|
|
priv->manager->decompress (task->device_uncompressed,
|
|
task->device_compressed, config);
|
|
}
|
|
}
|
|
|
|
mem = gst_buffer_peek_memory (frame->output_buffer, 0);
|
|
#ifdef HAVE_GST_GL
|
|
if (priv->gl_interop && gst_buffer_n_memory (frame->output_buffer) ==
|
|
GST_VIDEO_INFO_N_PLANES (&priv->info)) {
|
|
GLInteropData interop_data;
|
|
interop_data.self = self;
|
|
interop_data.buffer = frame->output_buffer;
|
|
interop_data.ret = FALSE;
|
|
|
|
auto gl_mem = (GstGLMemory *) mem;
|
|
gst_gl_context_thread_add (gl_mem->mem.context,
|
|
(GstGLContextThreadFunc) gst_nv_comp_video_dec_download_gl,
|
|
&interop_data);
|
|
if (interop_data.ret) {
|
|
need_copy = FALSE;
|
|
GST_TRACE_OBJECT (self, "CUDA -> GL copy done");
|
|
} else {
|
|
priv->gl_interop = FALSE;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
if (need_copy) {
|
|
GstMapFlags map_flags = GST_MAP_WRITE;
|
|
gboolean device_copy = FALSE;
|
|
gboolean do_sync = TRUE;
|
|
if (gst_is_cuda_memory (mem)) {
|
|
auto cmem = GST_CUDA_MEMORY_CAST (mem);
|
|
if (cmem->context == priv->ctx) {
|
|
map_flags = (GstMapFlags) (GST_MAP_WRITE | GST_MAP_CUDA);
|
|
device_copy = TRUE;
|
|
auto mem_stream = gst_cuda_memory_get_stream (cmem);
|
|
if (mem_stream && mem_stream == priv->stream)
|
|
do_sync = FALSE;
|
|
}
|
|
}
|
|
|
|
if (!device_copy) {
|
|
cuda_ret = CuMemcpyDtoHAsync (task->host_uncompressed,
|
|
(CUdeviceptr) task->device_uncompressed, priv->info.size, stream);
|
|
if (!gst_cuda_result (cuda_ret)) {
|
|
GST_ERROR_OBJECT (self, "Couldn't download image");
|
|
gst_cuda_context_pop (nullptr);
|
|
goto error;
|
|
}
|
|
CuStreamSynchronize (stream);
|
|
do_sync = FALSE;
|
|
}
|
|
|
|
gst_video_frame_map (&vframe, &priv->info, frame->output_buffer, map_flags);
|
|
gst_nv_comp_video_dec_download (self, &vframe, stream, device_copy);
|
|
if (do_sync)
|
|
CuStreamSynchronize (stream);
|
|
gst_video_frame_unmap (&vframe);
|
|
}
|
|
gst_cuda_context_pop (nullptr);
|
|
|
|
return gst_video_decoder_finish_frame (decoder, frame);
|
|
|
|
error:
|
|
gst_video_decoder_release_frame (decoder, frame);
|
|
return GST_FLOW_ERROR;
|
|
}
|