From b1d13e10af26ee8a062d3a333e9a694444e804ee Mon Sep 17 00:00:00 2001 From: Matthew Waters Date: Thu, 30 Jul 2015 16:42:38 +1000 Subject: [PATCH] Add Nvidia based hardware encoder element Currently only h264 is supported https://bugzilla.gnome.org/show_bug.cgi?id=753405 --- configure.ac | 103 +++ sys/Makefile.am | 11 +- sys/nvenc/Makefile.am | 33 + sys/nvenc/README | 30 + sys/nvenc/TODO | 11 + sys/nvenc/gstnvbaseenc.c | 1567 ++++++++++++++++++++++++++++++++++++++ sys/nvenc/gstnvbaseenc.h | 114 +++ sys/nvenc/gstnvenc.c | 328 ++++++++ sys/nvenc/gstnvenc.h | 40 + sys/nvenc/gstnvh264enc.c | 610 +++++++++++++++ sys/nvenc/gstnvh264enc.h | 59 ++ 11 files changed, 2904 insertions(+), 2 deletions(-) create mode 100644 sys/nvenc/Makefile.am create mode 100644 sys/nvenc/README create mode 100644 sys/nvenc/TODO create mode 100644 sys/nvenc/gstnvbaseenc.c create mode 100644 sys/nvenc/gstnvbaseenc.h create mode 100644 sys/nvenc/gstnvenc.c create mode 100644 sys/nvenc/gstnvenc.h create mode 100644 sys/nvenc/gstnvh264enc.c create mode 100644 sys/nvenc/gstnvh264enc.h diff --git a/configure.ac b/configure.ac index c8d3f810b0..d982ba2842 100644 --- a/configure.ac +++ b/configure.ac @@ -1726,6 +1726,108 @@ AC_SUBST(LIBUDEV_LIBS) AC_SUBST(LIBUSB_CFLAGS) AC_SUBST(LIBUSB_LIBS) +dnl *** NVENC *** +translit(dnm, m, l) AM_CONDITIONAL(USE_NVENC, true) +AG_GST_CHECK_FEATURE(NVENC, [NVIDIA Encode API], nvenc, [ + AG_GST_PKG_CHECK_MODULES(GST_VIDEO, gstreamer-video-1.0) + AG_GST_PKG_CHECK_MODULES(GST_PBUTILS, gstreamer-pbutils-1.0) + AC_ARG_WITH([cuda-prefix], + AS_HELP_STRING([--with-cuda-prefix], + [Use the provided prefix for detecting the cuda installation]), + [AS_IF([test "x$with_cuda_prefix" != "x"], + [CUDA_PREFIX="$with_cuda_prefix"])], + [CUDA_PREFIX=""]) + + HAVE_CUDA="yes" + if test "x$CUDA_PREFIX" != "x"; then + dnl only override if not already set + if test "x$CUDA_CFLAGS" = "x" -a "x$CUDA_LIBS" = "x"; then + dnl this is an educated guess, user can always override these + CUDA_CFLAGS="-I$CUDA_PREFIX/include" + CUDA_LIBS="-L$CUDA_PREFIX/lib -L$CUDA_PREFIX/lib64 -L$CUDA_PREFIX/lib/stubs -L$CUDA_PREFIX/lib64/stubs -lcuda -lcudart" + fi + else + PKG_CHECK_MODULES([CUDA], [cuda-7.5 cudart-7.5],, [ + PKG_CHECK_MODULES([CUDA], [cuda-7.0 cudart-7.0],, [ + PKG_CHECK_MODULES([CUDA], [cuda-6.5 cudart-6.5],, [ + AC_MSG_WARN([Could not find cuda headers/libraries])])])]) + fi + + HAVE_CUDA_H=no + HAVE_CUDART_H=no + save_CPPFLAGS="$CPPFLAGS" + CPPFLAGS="$CUDA_CFLAGS $save_CPPFLAGS " + AC_CHECK_HEADER([cuda.h], [HAVE_CUDA_H=yes], + AC_MSG_WARN([Could not find cuda.h])) + AC_CHECK_HEADER([cuda_runtime_api.h], [HAVE_CUDART_H=yes], + AC_MSG_WARN([Could not find cuda_runtime_api.h])) + CPPFLAGS=$save_CPPFLAGS + + dnl libcuda and libcudart libraries + save_LIBS="$LIBS" + LIBS="$CUDA_LIBS $save_LIBS" + HAVE_CUDART_LIB="no" + AC_CHECK_LIB(cudart,cudaGetErrorString,[HAVE_CUDART_LIB="yes"], [ + AC_MSG_WARN([Could not find cudart library])]) + HAVE_CUDA_LIB="no" + AC_CHECK_LIB(cuda,cuInit,[HAVE_CUDA_LIB="yes"], [ + AC_MSG_WARN([Could not find cuda library])]) + LIBS="$save_LIBS" + + dnl nvEncodeAPI.h header + HAVE_NVENCODEAPI_H=no + AC_ARG_VAR(NVENCODE_CFLAGS, [C compiler flags for NvEncodeAPI.h]) + save_CPPFLAGS="$CPPFLAGS" + CPPFLAGS="$NVENCODE_CFLAGS $save_CPPFLAGS" + AC_CHECK_HEADER([nvEncodeAPI.h], [ + AC_PREPROC_IFELSE([AC_LANG_SOURCE([[#include + #if NVENCAPI_MAJOR_VERSION < 5 + #error "Need nvEncodeAPI.h >= 5.0" + #endif + ]])], [ + HAVE_NVENCODEAPI_H=yes + ], [ + AC_MSG_WARN([nvEncodeAPI.h must be >= 5.0]) + ]) + ], + AC_MSG_WARN([Could not find nvEncodeAPI.h])) + AC_SUBST(NVENCODE_CFLAGS) + CPPFLAGS="$save_CPPFLAGS" + + dnl libnvnidia-encode library + HAVE_NVENCODE_LIB=no + AC_ARG_VAR(NVENCODE_LIBS, [linker flags for nvidia-encode]) + saved_LIBS="$LIBS" + LIBS="$NVENCODE_LIBS $saved_LIBS" + AC_CHECK_LIB(nvidia-encode, NvEncodeAPICreateInstance, [HAVE_NVENCODE_LIB="yes"], + AC_MSG_WARN([Could not find library nvidia-encode])) + NVENCODE_LIBS="$NVENCODE_LIBS -lnvidia-encode" + AC_SUBST(NVENCODE_LIBS) + LIBS="$saved_LIBS" + + USE_NVENC_GST_GL=no + if test "x$HAVE_CUDA_H" = "xyes" \ + -a "x$HAVE_CUDART_H" = "xyes" \ + -a "x$HAVE_CUDA_LIB" = "xyes" \ + -a "x$HAVE_CUDART_LIB" = "xyes" \ + -a "x$HAVE_NVENCODEAPI_H" = "xyes" \ + -a "x$HAVE_NVENCODE_LIB" = "xyes"; then + HAVE_NVENC="yes" + if test x"$USE_OPENGL" = x"yes"; then + dnl cuda-gl interop header + save_CPPFLAGS="$CPPFLAGS" + CPPFLAGS="$save_CPPFLAGS $CUDA_CFLAGS" + AC_CHECK_HEADER([cuda_gl_interop.h], [ + USE_NVENC_GST_GL="yes" + AC_DEFINE(HAVE_NVENC_GST_GL, [1] , [NVENC GStreamer OpenGL support available]) + ]) + CPPFLAGS="$save_CPPFLAGS" + fi + else + HAVE_NVENC="no" + fi +]) +AM_CONDITIONAL(USE_NVENC_GST_GL, test "x$USE_NVENC_GST_GL" = "xyes") dnl *** ext plug-ins *** dnl keep this list sorted alphabetically ! @@ -3363,6 +3465,7 @@ sys/dshowvideosink/Makefile sys/dvb/Makefile sys/fbdev/Makefile sys/linsys/Makefile +sys/nvenc/Makefile sys/opensles/Makefile sys/shm/Makefile sys/uvch264/Makefile diff --git a/sys/Makefile.am b/sys/Makefile.am index 1051ce5463..a0d7ca6d54 100644 --- a/sys/Makefile.am +++ b/sys/Makefile.am @@ -142,9 +142,16 @@ else UVCH264_DIR= endif -SUBDIRS = $(ACM_DIR) $(ANDROID_MEDIA_DIR) $(APPLE_MEDIA_DIR) $(AVC_DIR) $(BLUEZ_DIR) $(D3DVIDEOSINK_DIR) $(DECKLINK_DIR) $(DIRECTSOUND_DIR) $(WINKS_DIR) $(DVB_DIR) $(FBDEV_DIR) $(LINSYS_DIR) $(OPENSLES_DIR) $(PVR_DIR) $(SHM_DIR) $(UVCH264_DIR) $(VCD_DIR) $(VDPAU_DIR) $(WININET_DIR) $(WINSCREENCAP_DIR) $(WASAPI_DIR) +if USE_NVENC +NVENC_DIR=nvenc +else +NVENC_DIR= +endif + +SUBDIRS = $(ACM_DIR) $(ANDROID_MEDIA_DIR) $(APPLE_MEDIA_DIR) $(AVC_DIR) $(BLUEZ_DIR) $(D3DVIDEOSINK_DIR) $(DECKLINK_DIR) $(DIRECTSOUND_DIR) $(WINKS_DIR) $(DVB_DIR) $(FBDEV_DIR) $(LINSYS_DIR) $(OPENSLES_DIR) $(PVR_DIR) $(SHM_DIR) $(UVCH264_DIR) $(VCD_DIR) $(VDPAU_DIR) $(WININET_DIR) $(WINSCREENCAP_DIR) $(WASAPI_DIR) $(NVENC_DIR) DIST_SUBDIRS = acmenc acmmp3dec androidmedia applemedia applemedia-nonpublic avc bluez d3dvideosink decklink directsound dvb linsys fbdev dshowdecwrapper dshowsrcwrapper dshowvideosink \ - opensles pvr2d shm uvch264 vcd vdpau wasapi wininet winks winscreencap + opensles pvr2d shm uvch264 vcd vdpau wasapi wininet winks winscreencap \ + nvenc include $(top_srcdir)/common/parallel-subdirs.mak diff --git a/sys/nvenc/Makefile.am b/sys/nvenc/Makefile.am new file mode 100644 index 0000000000..68c9a2c06c --- /dev/null +++ b/sys/nvenc/Makefile.am @@ -0,0 +1,33 @@ +plugin_LTLIBRARIES = libgstnvenc.la + +libgstnvenc_la_SOURCES = \ + gstnvenc.c \ + gstnvbaseenc.c \ + gstnvh264enc.c + +noinst_HEADERS = \ + gstnvenc.h \ + gstnvbaseenc.h \ + gstnvh264enc.h + +libgstnvenc_la_CFLAGS = \ + -I$(top_srcdir)/gst-libs \ + $(GST_CFLAGS) \ + $(GST_PBUTILS_CFLAGS) \ + $(GST_VIDEO_CFLAGS) \ + $(CUDA_CFLAGS) \ + $(NVENCODE_CFLAGS) + +libgstnvenc_la_LIBADD = \ + $(GST_LIBS) \ + $(GST_PBUTILS_LIBS) \ + $(GST_VIDEO_LIBS) \ + $(CUDA_LIBS) \ + $(NVENCODE_LIBS) + +if USE_NVENC_GST_GL +libgstnvenc_la_LIBADD += \ + $(top_builddir)/gst-libs/gst/gl/libgstgl-$(GST_API_VERSION).la +endif +libgstnvenc_la_LDFLAGS = $(GST_PLUGIN_LDFLAGS) +libgstnvenc_la_LIBTOOLFLAGS = $(GST_PLUGIN_LIBTOOLFLAGS) diff --git a/sys/nvenc/README b/sys/nvenc/README new file mode 100644 index 0000000000..62a52d1984 --- /dev/null +++ b/sys/nvenc/README @@ -0,0 +1,30 @@ +This plugin is intended for use with NVIDIA hardware. Specifically, the NVENC +block available in recent NVIDIA GPU hardware. This is provided by a +libnvidia-encode library provided by NVIDIA graphic drivers. + +Requirements +------------ +Cuda > 6.5 +NVENC 5.0 + +See https://developer.nvidia.com/nvidia-video-codec-sdk for a list of +supported GPU's. + +Building +-------- +1. Retrieve the NVENC SDK +from https://developer.nvidia.com/nvidia-video-codec-sdk +- http://developer.download.nvidia.com/compute/nvenc/v5.0/nvenc_5.0.1_sdk.zip +2. unzip this somewhere and retreive or note the location of the +nvEncodeAPI.h under nvenc_api-5.0.1/Samples/common/inc/ +3. Retreive a version of cuda from +https://developer.nvidia.com/cuda-downloads and install somewhere noting +the installation prefix (typically /opt/cuda or /usr/local/cuda) +4. Now that the dependencies are sorted, there are a couple of +environment variables and/or or configure arguments that are needed to +detect the necessary libraries/headers. + +More information is available from the following locations + +[1] - https://developer.nvidia.com/cuda-downloads +[2] - https://developer.nvidia.com/nvidia-video-codec-sdk diff --git a/sys/nvenc/TODO b/sys/nvenc/TODO new file mode 100644 index 0000000000..27422cedf3 --- /dev/null +++ b/sys/nvenc/TODO @@ -0,0 +1,11 @@ +- check supported encoding formats (H.264 etc.), don't assume H.264 + +- check performance (time taken) of first cuInit() + +- provide buffer pool + +- more formats + +- renegotiation + +- support outputting of AVC as well as byte-stream, negotiate automatically diff --git a/sys/nvenc/gstnvbaseenc.c b/sys/nvenc/gstnvbaseenc.c new file mode 100644 index 0000000000..154546d31e --- /dev/null +++ b/sys/nvenc/gstnvbaseenc.c @@ -0,0 +1,1567 @@ +/* GStreamer NVENC plugin + * Copyright (C) 2015 Centricular Ltd + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "gstnvbaseenc.h" + +#include + +#include + +#if HAVE_NVENC_GST_GL +#include +#include +#include +#include +#endif + +/* TODO: + * - reset last_flow on FLUSH_STOP (seeking) + */ + +#define N_BUFFERS_PER_FRAME 1 +#define SUPPORTED_GL_APIS GST_GL_API_OPENGL3 + +/* magic pointer value we can put in the async queue to signal shut down */ +#define SHUTDOWN_COOKIE ((gpointer)GINT_TO_POINTER (1)) + +#define parent_class gst_nv_base_enc_parent_class +G_DEFINE_ABSTRACT_TYPE (GstNvBaseEnc, gst_nv_base_enc, GST_TYPE_VIDEO_ENCODER); + +static GstStaticPadTemplate sink_factory = GST_STATIC_PAD_TEMPLATE ("sink", + GST_PAD_SINK, + GST_PAD_ALWAYS, + GST_STATIC_CAPS ("video/x-raw, " "format = (string) NV12, " // TODO: I420, YV12, Y444 support + "width = (int) [ 16, 4096 ], height = (int) [ 16, 2160 ], " + "framerate = (fraction) [0, MAX]," + "interlace-mode = { progressive, mixed, interleaved } " +#if HAVE_NVENC_GST_GL + ";" + "video/x-raw(memory:GLMemory), " + "format = (string) { NV12, Y444 }, " + "width = (int) [ 16, 4096 ], height = (int) [ 16, 2160 ], " + "framerate = (fraction) [0, MAX]," + "interlace-mode = { progressive, mixed, interleaved } " +#endif + )); + +enum +{ + PROP_0, + PROP_DEVICE_ID, +}; + +#if HAVE_NVENC_GST_GL +struct gl_input_resource +{ + GstGLMemory *gl_mem[GST_VIDEO_MAX_PLANES]; + struct cudaGraphicsResource *cuda_texture; + gpointer cuda_plane_pointers[GST_VIDEO_MAX_PLANES]; + gpointer cuda_pointer; + gsize cuda_stride; + gsize cuda_num_bytes; + NV_ENC_REGISTER_RESOURCE nv_resource; + NV_ENC_MAP_INPUT_RESOURCE nv_mapped_resource; +}; +#endif + +struct frame_state +{ + gint n_buffers; + gpointer in_bufs[N_BUFFERS_PER_FRAME]; + gpointer out_bufs[N_BUFFERS_PER_FRAME]; +}; + +static gboolean gst_nv_base_enc_open (GstVideoEncoder * enc); +static gboolean gst_nv_base_enc_close (GstVideoEncoder * enc); +static gboolean gst_nv_base_enc_start (GstVideoEncoder * enc); +static gboolean gst_nv_base_enc_stop (GstVideoEncoder * enc); +static void gst_nv_base_enc_set_context (GstElement * element, + GstContext * context); +static gboolean gst_nv_base_enc_sink_query (GstVideoEncoder * enc, + GstQuery * query); +static gboolean gst_nv_base_enc_set_format (GstVideoEncoder * enc, + GstVideoCodecState * state); +static GstFlowReturn gst_nv_base_enc_handle_frame (GstVideoEncoder * enc, + GstVideoCodecFrame * frame); +static void gst_nv_base_enc_free_buffers (GstNvBaseEnc * nvenc); +static GstFlowReturn gst_nv_base_enc_finish (GstVideoEncoder * enc); +static void gst_nv_base_enc_set_property (GObject * object, guint prop_id, + const GValue * value, GParamSpec * pspec); +static void gst_nv_base_enc_get_property (GObject * object, guint prop_id, + GValue * value, GParamSpec * pspec); +static void gst_nv_base_enc_finalize (GObject * obj); +static GstCaps *gst_nv_base_enc_getcaps (GstVideoEncoder * enc, + GstCaps * filter); + +static void +gst_nv_base_enc_class_init (GstNvBaseEncClass * klass) +{ + GObjectClass *gobject_class = G_OBJECT_CLASS (klass); + GstElementClass *element_class = GST_ELEMENT_CLASS (klass); + GstVideoEncoderClass *videoenc_class = GST_VIDEO_ENCODER_CLASS (klass); + + gobject_class->set_property = gst_nv_base_enc_set_property; + gobject_class->get_property = gst_nv_base_enc_get_property; + gobject_class->finalize = gst_nv_base_enc_finalize; + + element_class->set_context = GST_DEBUG_FUNCPTR (gst_nv_base_enc_set_context); + + videoenc_class->open = GST_DEBUG_FUNCPTR (gst_nv_base_enc_open); + videoenc_class->close = GST_DEBUG_FUNCPTR (gst_nv_base_enc_close); + + videoenc_class->start = GST_DEBUG_FUNCPTR (gst_nv_base_enc_start); + videoenc_class->stop = GST_DEBUG_FUNCPTR (gst_nv_base_enc_stop); + + videoenc_class->set_format = GST_DEBUG_FUNCPTR (gst_nv_base_enc_set_format); + videoenc_class->getcaps = GST_DEBUG_FUNCPTR (gst_nv_base_enc_getcaps); + videoenc_class->handle_frame = + GST_DEBUG_FUNCPTR (gst_nv_base_enc_handle_frame); + videoenc_class->finish = GST_DEBUG_FUNCPTR (gst_nv_base_enc_finish); + videoenc_class->sink_query = GST_DEBUG_FUNCPTR (gst_nv_base_enc_sink_query); + + gst_element_class_add_pad_template (element_class, + gst_static_pad_template_get (&sink_factory)); + + g_object_class_install_property (gobject_class, PROP_DEVICE_ID, + g_param_spec_uint ("cuda-device-id", + "Cuda Device ID", + "Set the GPU device to use for operations", + 0, G_MAXUINT, 0, G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)); +} + +static gboolean +_get_supported_input_formats (GstNvBaseEnc * nvenc) +{ + GstNvBaseEncClass *nvenc_class = GST_NV_BASE_ENC_GET_CLASS (nvenc); + guint64 format_mask = 0; + uint32_t i, num = 0; + NV_ENC_BUFFER_FORMAT formats[64]; + GValue list = G_VALUE_INIT; + GValue val = G_VALUE_INIT; + + NvEncGetInputFormats (nvenc->encoder, nvenc_class->codec_id, formats, + G_N_ELEMENTS (formats), &num); + + for (i = 0; i < num; ++i) { + GST_INFO_OBJECT (nvenc, "input format: 0x%08x", formats[i]); + /* Apparently we can just ignore the tiled formats and can feed + * it the respective untiled planar format instead ?! */ + switch (formats[i]) { + case NV_ENC_BUFFER_FORMAT_NV12_PL: + case NV_ENC_BUFFER_FORMAT_NV12_TILED16x16: + case NV_ENC_BUFFER_FORMAT_NV12_TILED64x16: + format_mask |= (1 << GST_VIDEO_FORMAT_NV12); + break; + case NV_ENC_BUFFER_FORMAT_YV12_PL: + case NV_ENC_BUFFER_FORMAT_YV12_TILED16x16: + case NV_ENC_BUFFER_FORMAT_YV12_TILED64x16: + format_mask |= (1 << GST_VIDEO_FORMAT_YV12); + break; + case NV_ENC_BUFFER_FORMAT_IYUV_PL: + case NV_ENC_BUFFER_FORMAT_IYUV_TILED16x16: + case NV_ENC_BUFFER_FORMAT_IYUV_TILED64x16: + format_mask |= (1 << GST_VIDEO_FORMAT_I420); + break; + case NV_ENC_BUFFER_FORMAT_YUV444_PL: + case NV_ENC_BUFFER_FORMAT_YUV444_TILED16x16: + case NV_ENC_BUFFER_FORMAT_YUV444_TILED64x16:{ + NV_ENC_CAPS_PARAM caps_param = { 0, }; + int yuv444_supported = 0; + + caps_param.version = NV_ENC_CAPS_PARAM_VER; + caps_param.capsToQuery = NV_ENC_CAPS_SUPPORT_YUV444_ENCODE; + + if (NvEncGetEncodeCaps (nvenc->encoder, nvenc_class->codec_id, + &caps_param, &yuv444_supported) != NV_ENC_SUCCESS) + yuv444_supported = 0; + + if (yuv444_supported) + format_mask |= (1 << GST_VIDEO_FORMAT_Y444); + break; + } + default: + GST_FIXME ("unmapped input format: 0x%08x", formats[i]); + break; + } + } + + if (format_mask == 0) + return FALSE; + + /* process a second time so we can add formats in the order we want */ + g_value_init (&list, GST_TYPE_LIST); + g_value_init (&val, G_TYPE_STRING); + if ((format_mask & (1 << GST_VIDEO_FORMAT_NV12))) { + g_value_set_static_string (&val, "NV12"); + gst_value_list_append_value (&list, &val); + } + if ((format_mask & (1 << GST_VIDEO_FORMAT_YV12))) { + g_value_set_static_string (&val, "YV12"); + gst_value_list_append_value (&list, &val); + } + if ((format_mask & (1 << GST_VIDEO_FORMAT_I420))) { + g_value_set_static_string (&val, "I420"); + gst_value_list_append_value (&list, &val); + } + if ((format_mask & (1 << GST_VIDEO_FORMAT_Y444))) { + g_value_set_static_string (&val, "Y444"); + gst_value_list_append_value (&list, &val); + } + g_value_unset (&val); + + GST_OBJECT_LOCK (nvenc); + g_free (nvenc->input_formats); + nvenc->input_formats = g_memdup (&list, sizeof (GValue)); + GST_OBJECT_UNLOCK (nvenc); + + return TRUE; +} + +static gboolean +gst_nv_base_enc_open (GstVideoEncoder * enc) +{ + GstNvBaseEnc *nvenc = GST_NV_BASE_ENC (enc); + + nvenc->cuda_ctx = gst_nvenc_create_cuda_context (nvenc->cuda_device_id); + if (nvenc->cuda_ctx == NULL) { + GST_ELEMENT_ERROR (enc, LIBRARY, INIT, (NULL), + ("Failed to create CUDA context, perhaps CUDA is not supported.")); + return FALSE; + } + + { + NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS params = { 0, }; + NVENCSTATUS nv_ret; + + params.version = NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER; + params.apiVersion = NVENCAPI_VERSION; + params.device = nvenc->cuda_ctx; + params.deviceType = NV_ENC_DEVICE_TYPE_CUDA; + nv_ret = NvEncOpenEncodeSessionEx (¶ms, &nvenc->encoder); + if (nv_ret != NV_ENC_SUCCESS) { + GST_ERROR ("Failed to create NVENC encoder session, ret=%d", nv_ret); + if (gst_nvenc_destroy_cuda_context (nvenc->cuda_ctx)) + nvenc->cuda_ctx = NULL; + return FALSE; + } + GST_INFO ("created NVENC encoder %p", nvenc->encoder); + } + + /* query supported input formats */ + if (!_get_supported_input_formats (nvenc)) { + GST_WARNING_OBJECT (nvenc, "No supported input formats"); + gst_nv_base_enc_close (enc); + return FALSE; + } + + return TRUE; +} + +static void +gst_nv_base_enc_set_context (GstElement * element, GstContext * context) +{ + GstNvBaseEnc *nvenc = GST_NV_BASE_ENC (element); + +#if HAVE_NVENC_GST_GL + gst_gl_handle_set_context (element, context, + (GstGLDisplay **) & nvenc->display, + (GstGLContext **) & nvenc->other_context); + if (nvenc->display) + gst_gl_display_filter_gl_api (GST_GL_DISPLAY (nvenc->display), + SUPPORTED_GL_APIS); +#endif +} + +static gboolean +gst_nv_base_enc_sink_query (GstVideoEncoder * enc, GstQuery * query) +{ + GstNvBaseEnc *nvenc = GST_NV_BASE_ENC (enc); + + switch (GST_QUERY_TYPE (query)) { +#if HAVE_NVENC_GST_GL + case GST_QUERY_CONTEXT:{ + gboolean ret; + + ret = gst_gl_handle_context_query ((GstElement *) nvenc, query, + (GstGLDisplay **) & nvenc->display, + (GstGLContext **) & nvenc->other_context); + if (nvenc->display) + gst_gl_display_filter_gl_api (GST_GL_DISPLAY (nvenc->display), + SUPPORTED_GL_APIS); + + if (ret) + return ret; + break; + } +#endif + default: + break; + } + + return GST_VIDEO_ENCODER_CLASS (parent_class)->sink_query (enc, query); +} + +static gboolean +gst_nv_base_enc_start (GstVideoEncoder * enc) +{ + GstNvBaseEnc *nvenc = GST_NV_BASE_ENC (enc); + + nvenc->bitstream_pool = g_async_queue_new (); + nvenc->bitstream_queue = g_async_queue_new (); + nvenc->in_bufs_pool = g_async_queue_new (); + + nvenc->last_flow = GST_FLOW_OK; + +#if HAVE_NVENC_GST_GL + { + gst_gl_ensure_element_data (GST_ELEMENT (nvenc), + (GstGLDisplay **) & nvenc->display, + (GstGLContext **) & nvenc->other_context); + if (nvenc->display) + gst_gl_display_filter_gl_api (GST_GL_DISPLAY (nvenc->display), + SUPPORTED_GL_APIS); + } +#endif + + return TRUE; +} + +static gboolean +gst_nv_base_enc_stop (GstVideoEncoder * enc) +{ + GstNvBaseEnc *nvenc = GST_NV_BASE_ENC (enc); + + gst_nv_base_enc_free_buffers (nvenc); + + if (nvenc->bitstream_pool) { + g_async_queue_unref (nvenc->bitstream_pool); + nvenc->bitstream_pool = NULL; + } + if (nvenc->bitstream_queue) { + g_async_queue_unref (nvenc->bitstream_queue); + nvenc->bitstream_queue = NULL; + } + if (nvenc->in_bufs_pool) { + g_async_queue_unref (nvenc->in_bufs_pool); + nvenc->in_bufs_pool = NULL; + } + if (nvenc->display) { + gst_object_unref (nvenc->display); + nvenc->display = NULL; + } + if (nvenc->other_context) { + gst_object_unref (nvenc->other_context); + nvenc->other_context = NULL; + } + + return TRUE; +} + +static GValue * +_get_interlace_modes (GstNvBaseEnc * nvenc) +{ + GstNvBaseEncClass *nvenc_class = GST_NV_BASE_ENC_GET_CLASS (nvenc); + NV_ENC_CAPS_PARAM caps_param = { 0, }; + GValue *list = g_new0 (GValue, 1); + GValue val = G_VALUE_INIT; + + g_value_init (list, GST_TYPE_LIST); + g_value_init (&val, G_TYPE_STRING); + + g_value_set_static_string (&val, "progressive"); + gst_value_list_append_value (list, &val); + + caps_param.version = NV_ENC_CAPS_PARAM_VER; + caps_param.capsToQuery = NV_ENC_CAPS_SUPPORT_FIELD_ENCODING; + + if (NvEncGetEncodeCaps (nvenc->encoder, nvenc_class->codec_id, + &caps_param, &nvenc->interlace_modes) != NV_ENC_SUCCESS) + nvenc->interlace_modes = 0; + + if (nvenc->interlace_modes >= 1) { + g_value_set_static_string (&val, "interleaved"); + gst_value_list_append_value (list, &val); + g_value_set_static_string (&val, "mixed"); + gst_value_list_append_value (list, &val); + } + /* TODO: figure out what nvenc frame based interlacing means in gst terms */ + + return list; +} + +static GstCaps * +gst_nv_base_enc_getcaps (GstVideoEncoder * enc, GstCaps * filter) +{ + GstNvBaseEnc *nvenc = GST_NV_BASE_ENC (enc); + GstCaps *supported_incaps = NULL; + GstCaps *template_caps, *caps; + + GST_OBJECT_LOCK (nvenc); + + if (nvenc->input_formats != NULL) { + GValue *val; + + template_caps = gst_pad_get_pad_template_caps (enc->sinkpad); + supported_incaps = gst_caps_copy (template_caps); + gst_caps_set_value (supported_incaps, "format", nvenc->input_formats); + + val = _get_interlace_modes (nvenc); + gst_caps_set_value (supported_incaps, "interlace-mode", val); + g_free (val); + + GST_LOG_OBJECT (enc, "codec input caps %" GST_PTR_FORMAT, supported_incaps); + GST_LOG_OBJECT (enc, " template caps %" GST_PTR_FORMAT, template_caps); + caps = gst_caps_intersect (template_caps, supported_incaps); + gst_caps_unref (template_caps); + gst_caps_unref (supported_incaps); + supported_incaps = caps; + GST_LOG_OBJECT (enc, " supported caps %" GST_PTR_FORMAT, supported_incaps); + } + + GST_OBJECT_UNLOCK (nvenc); + + caps = gst_video_encoder_proxy_getcaps (enc, supported_incaps, filter); + + if (supported_incaps) + gst_caps_unref (supported_incaps); + + GST_DEBUG_OBJECT (nvenc, " returning caps %" GST_PTR_FORMAT, caps); + + return caps; +} + +static gboolean +gst_nv_base_enc_close (GstVideoEncoder * enc) +{ + GstNvBaseEnc *nvenc = GST_NV_BASE_ENC (enc); + + if (nvenc->encoder) { + if (NvEncDestroyEncoder (nvenc->encoder) != NV_ENC_SUCCESS) + return FALSE; + nvenc->encoder = NULL; + } + + if (nvenc->cuda_ctx) { + if (!gst_nvenc_destroy_cuda_context (nvenc->cuda_ctx)) + return FALSE; + nvenc->cuda_ctx = NULL; + } + + GST_OBJECT_LOCK (nvenc); + g_free (nvenc->input_formats); + nvenc->input_formats = NULL; + GST_OBJECT_UNLOCK (nvenc); + + if (nvenc->input_state) { + gst_video_codec_state_unref (nvenc->input_state); + nvenc->input_state = NULL; + } + + if (nvenc->bitstream_pool != NULL) { + g_assert (g_async_queue_length (nvenc->bitstream_pool) == 0); + g_async_queue_unref (nvenc->bitstream_pool); + nvenc->bitstream_pool = NULL; + } + + return TRUE; +} + +static void +gst_nv_base_enc_init (GstNvBaseEnc * nvenc) +{ + GstVideoEncoder *encoder = GST_VIDEO_ENCODER (nvenc); + + GST_VIDEO_ENCODER_STREAM_LOCK (encoder); + GST_VIDEO_ENCODER_STREAM_UNLOCK (encoder); +} + +static void +gst_nv_base_enc_finalize (GObject * obj) +{ + G_OBJECT_CLASS (gst_nv_base_enc_parent_class)->finalize (obj); +} + +static GstVideoCodecFrame * +_find_frame_with_output_buffer (GstNvBaseEnc * nvenc, NV_ENC_OUTPUT_PTR out_buf) +{ + GList *l = gst_video_encoder_get_frames (GST_VIDEO_ENCODER (nvenc)); + gint i; + + for (; l; l = l->next) { + GstVideoCodecFrame *frame = (GstVideoCodecFrame *) l->data; + struct frame_state *state = frame->user_data; + + for (i = 0; i < N_BUFFERS_PER_FRAME; i++) { + if (!state->out_bufs[i]) + break; + + if (state->out_bufs[i] == out_buf) + return frame; + } + } + + return NULL; +} + +static gpointer +gst_nv_base_enc_bitstream_thread (gpointer user_data) +{ + GstVideoEncoder *enc = user_data; + GstNvBaseEnc *nvenc = user_data; + + /* overview of operation: + * 1. retreive the next buffer submitted to the bitstream pool + * 2. wait for that buffer to be ready from nvenc (LockBitsream) + * 3. retreive the GstVideoCodecFrame associated with that buffer + * 4. for each buffer in the frame + * 4.1 (step 2): wait for that buffer to be ready from nvenc (LockBitsream) + * 4.2 create an output GstBuffer from the nvenc buffers + * 4.3 unlock the nvenc bitstream buffers UnlockBitsream + * 5. finish_frame() + * 6. cleanup + */ + do { + GstBuffer *buffers[N_BUFFERS_PER_FRAME]; + struct frame_state *state = NULL; + GstVideoCodecFrame *frame = NULL; + NVENCSTATUS nv_ret; + GstFlowReturn flow = GST_FLOW_OK; + gint i; + + { + NV_ENC_LOCK_BITSTREAM lock_bs = { 0, }; + NV_ENC_OUTPUT_PTR out_buf; + + for (i = 0; i < N_BUFFERS_PER_FRAME; i++) { + /* get and lock bitstream buffers */ + GstVideoCodecFrame *tmp_frame; + + if (state && i >= state->n_buffers) + break; + + GST_LOG_OBJECT (enc, "wait for bitstream buffer.."); + + /* assumes buffers are submitted in order */ + out_buf = g_async_queue_pop (nvenc->bitstream_queue); + if ((gpointer) out_buf == SHUTDOWN_COOKIE) + break; + + GST_LOG_OBJECT (nvenc, "waiting for output buffer %p to be ready", + out_buf); + + lock_bs.version = NV_ENC_LOCK_BITSTREAM_VER; + lock_bs.outputBitstream = out_buf; + lock_bs.doNotWait = 0; + + /* FIXME: this would need to be updated for other slice modes */ + lock_bs.sliceOffsets = NULL; + + nv_ret = NvEncLockBitstream (nvenc->encoder, &lock_bs); + if (nv_ret != NV_ENC_SUCCESS) { + /* FIXME: what to do here? */ + GST_ELEMENT_ERROR (nvenc, STREAM, ENCODE, (NULL), + ("Failed to lock bitstream buffer %p, ret %d", + lock_bs.outputBitstream, nv_ret)); + out_buf = SHUTDOWN_COOKIE; + break; + } + + GST_LOG_OBJECT (nvenc, "picture type %d", lock_bs.pictureType); + + tmp_frame = _find_frame_with_output_buffer (nvenc, out_buf); + g_assert (tmp_frame != NULL); + if (frame) + g_assert (frame == tmp_frame); + frame = tmp_frame; + + state = frame->user_data; + g_assert (state->out_bufs[i] == out_buf); + + /* copy into output buffer */ + buffers[i] = + gst_buffer_new_allocate (NULL, lock_bs.bitstreamSizeInBytes, NULL); + gst_buffer_fill (buffers[i], 0, lock_bs.bitstreamBufferPtr, + lock_bs.bitstreamSizeInBytes); + + if (lock_bs.pictureType == NV_ENC_PIC_TYPE_IDR) { + GST_DEBUG_OBJECT (nvenc, "This is a keyframe"); + GST_VIDEO_CODEC_FRAME_SET_SYNC_POINT (frame); + } + + /* TODO: use lock_bs.outputTimeStamp and lock_bs.outputDuration */ + /* TODO: check pts/dts is handled properly if there are B-frames */ + + nv_ret = NvEncUnlockBitstream (nvenc->encoder, state->out_bufs[i]); + if (nv_ret != NV_ENC_SUCCESS) { + /* FIXME: what to do here? */ + GST_ELEMENT_ERROR (nvenc, STREAM, ENCODE, (NULL), + ("Failed to unlock bitstream buffer %p, ret %d", + lock_bs.outputBitstream, nv_ret)); + state->out_bufs[i] = SHUTDOWN_COOKIE; + break; + } + + GST_LOG_OBJECT (nvenc, "returning bitstream buffer %p to pool", + state->out_bufs[i]); + g_async_queue_push (nvenc->bitstream_pool, state->out_bufs[i]); + } + + if (out_buf == SHUTDOWN_COOKIE) + break; + } + + { + GstBuffer *output_buffer = gst_buffer_new (); + + for (i = 0; i < state->n_buffers; i++) + output_buffer = gst_buffer_append (output_buffer, buffers[i]); + + frame->output_buffer = output_buffer; + } + + for (i = 0; i < state->n_buffers; i++) { + void *in_buf = state->in_bufs[i]; + g_assert (in_buf != NULL); + +#if HAVE_NVENC_GST_GL + if (nvenc->gl_input) { + struct gl_input_resource *in_gl_resource = in_buf; + + nv_ret = + NvEncUnmapInputResource (nvenc->encoder, + in_gl_resource->nv_mapped_resource.mappedResource); + if (nv_ret != NV_ENC_SUCCESS) { + GST_ERROR_OBJECT (nvenc, "Failed to unmap input resource %p, ret %d", + in_gl_resource, nv_ret); + break; + } + + memset (&in_gl_resource->nv_mapped_resource, 0, + sizeof (in_gl_resource->nv_mapped_resource)); + } +#endif + + g_async_queue_push (nvenc->in_bufs_pool, in_buf); + } + + flow = gst_video_encoder_finish_frame (enc, frame); + frame = NULL; + + if (flow != GST_FLOW_OK) { + GST_INFO_OBJECT (enc, "got flow %s", gst_flow_get_name (flow)); + g_atomic_int_set (&nvenc->last_flow, flow); + break; + } + } + while (TRUE); + + GST_INFO_OBJECT (nvenc, "exiting thread"); + + return NULL; +} + +static gboolean +gst_nv_base_enc_start_bitstream_thread (GstNvBaseEnc * nvenc) +{ + gchar *name = g_strdup_printf ("%s-read-bits", GST_OBJECT_NAME (nvenc)); + + g_assert (nvenc->bitstream_thread == NULL); + + g_assert (g_async_queue_length (nvenc->bitstream_queue) == 0); + + nvenc->bitstream_thread = + g_thread_try_new (name, gst_nv_base_enc_bitstream_thread, nvenc, NULL); + + g_free (name); + + if (nvenc->bitstream_thread == NULL) + return FALSE; + + GST_INFO_OBJECT (nvenc, "started thread to read bitstream"); + return TRUE; +} + +static gboolean +gst_nv_base_enc_stop_bitstream_thread (GstNvBaseEnc * nvenc) +{ + gpointer out_buf; + + if (nvenc->bitstream_thread == NULL) + return TRUE; + + /* FIXME */ + GST_FIXME_OBJECT (nvenc, "stop bitstream reading thread properly"); + g_async_queue_lock (nvenc->bitstream_queue); + g_async_queue_lock (nvenc->bitstream_pool); + while ((out_buf = g_async_queue_try_pop_unlocked (nvenc->bitstream_queue))) { + GST_INFO_OBJECT (nvenc, "stole bitstream buffer %p from queue", out_buf); + g_async_queue_push_unlocked (nvenc->bitstream_pool, out_buf); + } + g_async_queue_push_unlocked (nvenc->bitstream_queue, SHUTDOWN_COOKIE); + g_async_queue_unlock (nvenc->bitstream_pool); + g_async_queue_unlock (nvenc->bitstream_queue); + + /* temporary unlock, so other thread can find and push frame */ + GST_VIDEO_ENCODER_STREAM_UNLOCK (nvenc); + g_thread_join (nvenc->bitstream_thread); + GST_VIDEO_ENCODER_STREAM_LOCK (nvenc); + + nvenc->bitstream_thread = NULL; + return TRUE; +} + +static void +gst_nv_base_enc_reset_queues (GstNvBaseEnc * nvenc, gboolean refill) +{ + gpointer ptr; + gint i; + + GST_INFO_OBJECT (nvenc, "clearing queues"); + + while ((ptr = g_async_queue_try_pop (nvenc->bitstream_queue))) { + /* do nothing */ + } + while ((ptr = g_async_queue_try_pop (nvenc->bitstream_pool))) { + /* do nothing */ + } + while ((ptr = g_async_queue_try_pop (nvenc->in_bufs_pool))) { + /* do nothing */ + } + + if (refill) { + GST_INFO_OBJECT (nvenc, "refilling buffer pools"); + for (i = 0; i < nvenc->n_bufs; ++i) { + g_async_queue_push (nvenc->bitstream_pool, nvenc->input_bufs[i]); + g_async_queue_push (nvenc->in_bufs_pool, nvenc->output_bufs[i]); + } + } +} + +static void +gst_nv_base_enc_free_buffers (GstNvBaseEnc * nvenc) +{ + NVENCSTATUS nv_ret; + guint i; + + if (nvenc->encoder == NULL) + return; + + gst_nv_base_enc_reset_queues (nvenc, FALSE); + + for (i = 0; i < nvenc->n_bufs; ++i) { + NV_ENC_OUTPUT_PTR out_buf = nvenc->output_bufs[i]; + +#if HAVE_NVENC_GST_GL + if (nvenc->gl_input) { + struct gl_input_resource *in_gl_resource = nvenc->input_bufs[i]; + + cuCtxPushCurrent (nvenc->cuda_ctx); + nv_ret = + NvEncUnregisterResource (nvenc->encoder, + in_gl_resource->nv_resource.registeredResource); + if (nv_ret != NV_ENC_SUCCESS) + GST_ERROR_OBJECT (nvenc, "Failed to unregister resource %p, ret %d", + in_gl_resource, nv_ret); + + g_free (in_gl_resource); + cuCtxPopCurrent (NULL); + } else +#endif + { + NV_ENC_INPUT_PTR in_buf = (NV_ENC_INPUT_PTR) nvenc->input_bufs[i]; + + GST_DEBUG_OBJECT (nvenc, "Destroying input buffer %p", in_buf); + nv_ret = NvEncDestroyInputBuffer (nvenc->encoder, in_buf); + if (nv_ret != NV_ENC_SUCCESS) { + GST_ERROR_OBJECT (nvenc, "Failed to destroy input buffer %p, ret %d", + in_buf, nv_ret); + } + } + + GST_DEBUG_OBJECT (nvenc, "Destroying output bitstream buffer %p", out_buf); + nv_ret = NvEncDestroyBitstreamBuffer (nvenc->encoder, out_buf); + if (nv_ret != NV_ENC_SUCCESS) { + GST_ERROR_OBJECT (nvenc, "Failed to destroy output buffer %p, ret %d", + out_buf, nv_ret); + } + } + + nvenc->n_bufs = 0; + g_free (nvenc->output_bufs); + nvenc->output_bufs = NULL; + g_free (nvenc->input_bufs); + nvenc->input_bufs = NULL; +} + +static inline guint +_get_plane_width (GstVideoInfo * info, guint plane) +{ + if (GST_VIDEO_INFO_IS_YUV (info)) + /* For now component width and plane width are the same and the + * plane-component mapping matches + */ + return GST_VIDEO_INFO_COMP_WIDTH (info, plane); + else /* RGB, GRAY */ + return GST_VIDEO_INFO_WIDTH (info); +} + +static inline guint +_get_plane_height (GstVideoInfo * info, guint plane) +{ + if (GST_VIDEO_INFO_IS_YUV (info)) + /* For now component width and plane width are the same and the + * plane-component mapping matches + */ + return GST_VIDEO_INFO_COMP_HEIGHT (info, plane); + else /* RGB, GRAY */ + return GST_VIDEO_INFO_HEIGHT (info); +} + +static inline gsize +_get_frame_data_height (GstVideoInfo * info) +{ + gsize ret = 0; + gint i; + + for (i = 0; i < GST_VIDEO_INFO_N_PLANES (info); i++) { + ret += _get_plane_height (info, i); + } + + return ret; +} + +void +gst_nv_base_enc_set_max_encode_size (GstNvBaseEnc * nvenc, guint max_width, + guint max_height) +{ + nvenc->max_encode_width = max_width; + nvenc->max_encode_height = max_height; +} + +void +gst_nv_base_enc_get_max_encode_size (GstNvBaseEnc * nvenc, guint * max_width, + guint * max_height) +{ + *max_width = nvenc->max_encode_width; + *max_height = nvenc->max_encode_height; +} + +static gboolean +gst_nv_base_enc_set_format (GstVideoEncoder * enc, GstVideoCodecState * state) +{ + GstNvBaseEncClass *nvenc_class = GST_NV_BASE_ENC_GET_CLASS (enc); + GstNvBaseEnc *nvenc = GST_NV_BASE_ENC (enc); + GstVideoInfo *info = &state->info; + GstVideoCodecState *old_state = nvenc->input_state; + NVENCSTATUS nv_ret; + + g_assert (nvenc_class->initialize_encoder); + if (!nvenc_class->initialize_encoder (nvenc, old_state, state)) { + GST_ERROR_OBJECT (enc, "Subclass failed to reconfigure encoder"); + return FALSE; + } + + if (!nvenc->max_encode_width && !nvenc->max_encode_height) { + gst_nv_base_enc_set_max_encode_size (nvenc, GST_VIDEO_INFO_WIDTH (info), + GST_VIDEO_INFO_HEIGHT (info)); + } + + if (!old_state) { + nvenc->input_info = *info; + nvenc->gl_input = FALSE; + } + + if (nvenc->input_state) + gst_video_codec_state_unref (nvenc->input_state); + nvenc->input_state = gst_video_codec_state_ref (state); + GST_INFO_OBJECT (nvenc, "configured encoder"); + + /* now allocate some buffers only on first configuration */ + if (!old_state) { +#if HAVE_NVENC_GST_GL + GstCapsFeatures *features; +#endif + guint num_macroblocks, i; + guint input_width, input_height; + + input_width = GST_VIDEO_INFO_WIDTH (info); + input_height = GST_VIDEO_INFO_HEIGHT (info); + + num_macroblocks = (GST_ROUND_UP_16 (input_width) >> 4) + * (GST_ROUND_UP_16 (input_height) >> 4); + nvenc->n_bufs = (num_macroblocks >= 8160) ? 32 : 48; + + /* input buffers */ + nvenc->input_bufs = g_new0 (gpointer, nvenc->n_bufs); + +#if HAVE_NVENC_GST_GL + features = gst_caps_get_features (state->caps, 0); + if (gst_caps_features_contains (features, + GST_CAPS_FEATURE_MEMORY_GL_MEMORY)) { + guint pixel_depth = 0; + nvenc->gl_input = TRUE; + + for (i = 0; i < GST_VIDEO_INFO_N_COMPONENTS (info); i++) { + pixel_depth += GST_VIDEO_INFO_COMP_DEPTH (info, i); + } + + cuCtxPushCurrent (nvenc->cuda_ctx); + for (i = 0; i < nvenc->n_bufs; ++i) { + struct gl_input_resource *in_gl_resource = + g_new0 (struct gl_input_resource, 1); + CUresult cu_ret; + + memset (&in_gl_resource->nv_resource, 0, + sizeof (in_gl_resource->nv_resource)); + memset (&in_gl_resource->nv_mapped_resource, 0, + sizeof (in_gl_resource->nv_mapped_resource)); + + /* scratch buffer for non-contigious planer into a contigious buffer */ + cu_ret = + cuMemAllocPitch ((CUdeviceptr *) & in_gl_resource->cuda_pointer, + &in_gl_resource->cuda_stride, input_width, + _get_frame_data_height (info), 16); + if (cu_ret != CUDA_SUCCESS) { + const gchar *err; + + cuGetErrorString (cu_ret, &err); + GST_ERROR_OBJECT (nvenc, "failed to alocate cuda scratch buffer " + "ret %d error :%s", cu_ret, err); + g_assert_not_reached (); + } + + in_gl_resource->nv_resource.version = NV_ENC_REGISTER_RESOURCE_VER; + in_gl_resource->nv_resource.resourceType = + NV_ENC_INPUT_RESOURCE_TYPE_CUDADEVICEPTR; + in_gl_resource->nv_resource.width = input_width; + in_gl_resource->nv_resource.height = input_height; + in_gl_resource->nv_resource.pitch = in_gl_resource->cuda_stride; + in_gl_resource->nv_resource.bufferFormat = + gst_nvenc_get_nv_buffer_format (GST_VIDEO_INFO_FORMAT (info)); + in_gl_resource->nv_resource.resourceToRegister = + in_gl_resource->cuda_pointer; + + nv_ret = + NvEncRegisterResource (nvenc->encoder, + &in_gl_resource->nv_resource); + if (nv_ret != NV_ENC_SUCCESS) + GST_ERROR_OBJECT (nvenc, "Failed to register resource %p, ret %d", + in_gl_resource, nv_ret); + + nvenc->input_bufs[i] = in_gl_resource; + g_async_queue_push (nvenc->in_bufs_pool, nvenc->input_bufs[i]); + } + + cuCtxPopCurrent (NULL); + } else +#endif + { + for (i = 0; i < nvenc->n_bufs; ++i) { + NV_ENC_CREATE_INPUT_BUFFER cin_buf = { 0, }; + + cin_buf.version = NV_ENC_CREATE_INPUT_BUFFER_VER; + + cin_buf.width = GST_ROUND_UP_32 (input_width); + cin_buf.height = GST_ROUND_UP_32 (input_height); + + cin_buf.memoryHeap = NV_ENC_MEMORY_HEAP_SYSMEM_CACHED; + cin_buf.bufferFmt = + gst_nvenc_get_nv_buffer_format (GST_VIDEO_INFO_FORMAT (info)); + + nv_ret = NvEncCreateInputBuffer (nvenc->encoder, &cin_buf); + + if (nv_ret != NV_ENC_SUCCESS) { + GST_WARNING_OBJECT (enc, "Failed to allocate input buffer: %d", + nv_ret); + /* FIXME: clean up */ + return FALSE; + } + + nvenc->input_bufs[i] = cin_buf.inputBuffer; + + GST_INFO_OBJECT (nvenc, "allocated input buffer %2d: %p", i, + nvenc->input_bufs[i]); + + g_async_queue_push (nvenc->in_bufs_pool, nvenc->input_bufs[i]); + } + } + + /* output buffers */ + nvenc->output_bufs = g_new0 (NV_ENC_OUTPUT_PTR, nvenc->n_bufs); + for (i = 0; i < nvenc->n_bufs; ++i) { + NV_ENC_CREATE_BITSTREAM_BUFFER cout_buf = { 0, }; + + cout_buf.version = NV_ENC_CREATE_BITSTREAM_BUFFER_VER; + + /* 1 MB should be large enough to hold most output frames. + * NVENC will automatically increase this if it's not enough. */ + cout_buf.size = 1024 * 1024; + cout_buf.memoryHeap = NV_ENC_MEMORY_HEAP_SYSMEM_CACHED; + + nv_ret = NvEncCreateBitstreamBuffer (nvenc->encoder, &cout_buf); + if (nv_ret != NV_ENC_SUCCESS) { + GST_WARNING_OBJECT (enc, "Failed to allocate input buffer: %d", nv_ret); + /* FIXME: clean up */ + return FALSE; + } + + nvenc->output_bufs[i] = cout_buf.bitstreamBuffer; + + GST_INFO_OBJECT (nvenc, "allocated output buffer %2d: %p", i, + nvenc->output_bufs[i]); + + g_async_queue_push (nvenc->bitstream_pool, nvenc->output_bufs[i]); + } + +#if 0 + /* Get SPS/PPS */ + { + NV_ENC_SEQUENCE_PARAM_PAYLOAD seq_param = { 0 }; + uint32_t seq_size = 0; + + seq_param.version = NV_ENC_SEQUENCE_PARAM_PAYLOAD_VER; + seq_param.spsppsBuffer = g_alloca (1024); + seq_param.inBufferSize = 1024; + seq_param.outSPSPPSPayloadSize = &seq_size; + + nv_ret = NvEncGetSequenceParams (nvenc->encoder, &seq_param); + if (nv_ret != NV_ENC_SUCCESS) { + GST_WARNING_OBJECT (enc, "Failed to retrieve SPS/PPS: %d", nv_ret); + return FALSE; + } + + /* FIXME: use SPS/PPS */ + GST_MEMDUMP_OBJECT (enc, "SPS/PPS", seq_param.spsppsBuffer, seq_size); + } +#endif + } + + g_assert (nvenc_class->set_src_caps); + if (!nvenc_class->set_src_caps (nvenc, state)) { + GST_ERROR_OBJECT (nvenc, "Subclass failed to set output caps"); + /* FIXME: clean up */ + return FALSE; + } + + return TRUE; +} + +static inline guint +_plane_get_n_components (GstVideoInfo * info, guint plane) +{ + switch (GST_VIDEO_INFO_FORMAT (info)) { + case GST_VIDEO_FORMAT_RGBx: + case GST_VIDEO_FORMAT_BGRx: + case GST_VIDEO_FORMAT_xRGB: + case GST_VIDEO_FORMAT_xBGR: + case GST_VIDEO_FORMAT_RGBA: + case GST_VIDEO_FORMAT_BGRA: + case GST_VIDEO_FORMAT_ARGB: + case GST_VIDEO_FORMAT_ABGR: + case GST_VIDEO_FORMAT_AYUV: + return 4; + case GST_VIDEO_FORMAT_RGB: + case GST_VIDEO_FORMAT_BGR: + case GST_VIDEO_FORMAT_RGB16: + case GST_VIDEO_FORMAT_BGR16: + return 3; + case GST_VIDEO_FORMAT_GRAY16_BE: + case GST_VIDEO_FORMAT_GRAY16_LE: + case GST_VIDEO_FORMAT_YUY2: + case GST_VIDEO_FORMAT_UYVY: + return 2; + case GST_VIDEO_FORMAT_NV12: + case GST_VIDEO_FORMAT_NV21: + return plane == 0 ? 1 : 2; + case GST_VIDEO_FORMAT_GRAY8: + case GST_VIDEO_FORMAT_Y444: + case GST_VIDEO_FORMAT_Y42B: + case GST_VIDEO_FORMAT_Y41B: + case GST_VIDEO_FORMAT_I420: + case GST_VIDEO_FORMAT_YV12: + return 1; + default: + g_assert_not_reached (); + return 1; + } +} + +#if HAVE_NVENC_GST_GL +struct map_gl_input +{ + GstNvBaseEnc *nvenc; + GstVideoCodecFrame *frame; + GstVideoInfo *info; + struct gl_input_resource *in_gl_resource; +}; + +static void +_map_gl_input_buffer (GstGLContext * context, struct map_gl_input *data) +{ + cudaError_t cuda_ret; + guint8 *data_pointer; + guint i; + + cuCtxPushCurrent (data->nvenc->cuda_ctx); + data_pointer = data->in_gl_resource->cuda_pointer; + for (i = 0; i < GST_VIDEO_INFO_N_PLANES (data->info); i++) { + guint plane_n_components; + GstGLBaseBuffer *gl_buf_obj; + GstGLMemory *gl_mem; + guint src_stride, dest_stride; + + gl_mem = + (GstGLMemory *) gst_buffer_peek_memory (data->frame->input_buffer, i); + g_assert (gst_is_gl_memory ((GstMemory *) gl_mem)); + data->in_gl_resource->gl_mem[i] = gl_mem; + plane_n_components = _plane_get_n_components (data->info, i); + + gl_buf_obj = (GstGLBaseBuffer *) gl_mem; + + /* get the texture into the PBO */ + gst_gl_memory_upload_transfer (gl_mem); + gst_gl_memory_download_transfer (gl_mem); + + GST_LOG_OBJECT (data->nvenc, "attempting to copy texture %u into cuda", + gl_mem->tex_id); + + cuda_ret = + cudaGraphicsGLRegisterBuffer (&data->in_gl_resource->cuda_texture, + gl_buf_obj->id, cudaGraphicsRegisterFlagsReadOnly); + if (cuda_ret != cudaSuccess) { + GST_ERROR_OBJECT (data->nvenc, "failed to register GL texture %u to cuda " + "ret :%d", gl_mem->tex_id, cuda_ret); + g_assert_not_reached (); + } + + cuda_ret = + cudaGraphicsMapResources (1, &data->in_gl_resource->cuda_texture, 0); + if (cuda_ret != cudaSuccess) { + GST_ERROR_OBJECT (data->nvenc, "failed to map GL texture %u into cuda " + "ret :%d", gl_mem->tex_id, cuda_ret); + g_assert_not_reached (); + } + + cuda_ret = + cudaGraphicsResourceGetMappedPointer (&data->in_gl_resource-> + cuda_plane_pointers[i], &data->in_gl_resource->cuda_num_bytes, + data->in_gl_resource->cuda_texture); + if (cuda_ret != cudaSuccess) { + GST_ERROR_OBJECT (data->nvenc, "failed to get mapped pointer of map GL " + "texture %u in cuda ret :%d", gl_mem->tex_id, cuda_ret); + g_assert_not_reached (); + } + + src_stride = GST_VIDEO_INFO_PLANE_STRIDE (data->info, i); + dest_stride = data->in_gl_resource->cuda_stride; + + /* copy into scratch buffer */ + cuda_ret = + cudaMemcpy2D (data_pointer, dest_stride, + data->in_gl_resource->cuda_plane_pointers[i], src_stride, + _get_plane_width (data->info, i) * plane_n_components, + _get_plane_height (data->info, i), cudaMemcpyDeviceToDevice); + if (cuda_ret != cudaSuccess) { + GST_ERROR_OBJECT (data->nvenc, "failed to copy GL texture %u into cuda " + "ret :%d", gl_mem->tex_id, cuda_ret); + g_assert_not_reached (); + } + + cuda_ret = + cudaGraphicsUnmapResources (1, &data->in_gl_resource->cuda_texture, 0); + if (cuda_ret != cudaSuccess) { + GST_ERROR_OBJECT (data->nvenc, "failed to unmap GL texture %u from cuda " + "ret :%d", gl_mem->tex_id, cuda_ret); + g_assert_not_reached (); + } + + cuda_ret = + cudaGraphicsUnregisterResource (data->in_gl_resource->cuda_texture); + if (cuda_ret != cudaSuccess) { + GST_ERROR_OBJECT (data->nvenc, "failed to unregister GL texture %u from " + "cuda ret :%d", gl_mem->tex_id, cuda_ret); + g_assert_not_reached (); + } + + data_pointer = + data_pointer + + data->in_gl_resource->cuda_stride * + _get_plane_height (&data->nvenc->input_info, i); + } + cuCtxPopCurrent (NULL); +} +#endif + +static GstFlowReturn +_acquire_input_buffer (GstNvBaseEnc * nvenc, gpointer * input) +{ + g_assert (input); + + GST_LOG_OBJECT (nvenc, "acquiring input buffer.."); + GST_VIDEO_ENCODER_STREAM_UNLOCK (nvenc); + *input = g_async_queue_pop (nvenc->in_bufs_pool); + GST_VIDEO_ENCODER_STREAM_LOCK (nvenc); + + return GST_FLOW_OK; +} + +static GstFlowReturn +_submit_input_buffer (GstNvBaseEnc * nvenc, GstVideoCodecFrame * frame, + GstVideoFrame * vframe, void *inputBuffer, void *inputBufferPtr, + NV_ENC_BUFFER_FORMAT bufferFormat, void *outputBufferPtr) +{ + GstNvBaseEncClass *nvenc_class = GST_NV_BASE_ENC_GET_CLASS (nvenc); + NV_ENC_PIC_PARAMS pic_params = { 0, }; + NVENCSTATUS nv_ret; + + GST_LOG_OBJECT (nvenc, "%u: input buffer %p, output buffer %p, " + "pts %" GST_TIME_FORMAT, frame->system_frame_number, inputBuffer, + outputBufferPtr, GST_TIME_ARGS (frame->pts)); + + pic_params.version = NV_ENC_PIC_PARAMS_VER; + pic_params.inputBuffer = inputBufferPtr; + pic_params.bufferFmt = bufferFormat; + + pic_params.inputWidth = GST_VIDEO_FRAME_WIDTH (vframe); + pic_params.inputHeight = GST_VIDEO_FRAME_HEIGHT (vframe); + pic_params.outputBitstream = outputBufferPtr; + pic_params.completionEvent = NULL; + if (GST_VIDEO_FRAME_IS_INTERLACED (vframe)) { + if (GST_VIDEO_FRAME_IS_TFF (vframe)) + pic_params.pictureStruct = NV_ENC_PIC_STRUCT_FIELD_TOP_BOTTOM; + else + pic_params.pictureStruct = NV_ENC_PIC_STRUCT_FIELD_BOTTOM_TOP; + } else { + pic_params.pictureStruct = NV_ENC_PIC_STRUCT_FRAME; + } + pic_params.inputTimeStamp = frame->pts; + pic_params.inputDuration = + GST_CLOCK_TIME_IS_VALID (frame->duration) ? frame->duration : 0; + pic_params.frameIdx = frame->system_frame_number; + + if (GST_VIDEO_CODEC_FRAME_IS_FORCE_KEYFRAME (frame)) + pic_params.encodePicFlags = NV_ENC_PIC_FLAG_FORCEIDR; + else + pic_params.encodePicFlags = 0; + + if (nvenc_class->set_pic_params + && !nvenc_class->set_pic_params (nvenc, frame, &pic_params)) { + GST_ERROR_OBJECT (nvenc, "Subclass failed to submit buffer"); + return GST_FLOW_ERROR; + } + + nv_ret = NvEncEncodePicture (nvenc->encoder, &pic_params); + if (nv_ret == NV_ENC_SUCCESS) { + GST_LOG_OBJECT (nvenc, "Encoded picture"); + } else if (nv_ret == NV_ENC_ERR_NEED_MORE_INPUT) { + /* FIXME: we should probably queue pending output buffers here and only + * submit them to the async queue once we got sucess back */ + GST_DEBUG_OBJECT (nvenc, "Encoded picture (encoder needs more input)"); + } else { + GST_ERROR_OBJECT (nvenc, "Failed to encode picture: %d", nv_ret); + GST_DEBUG_OBJECT (nvenc, "re-enqueueing input buffer %p", inputBuffer); + g_async_queue_push (nvenc->in_bufs_pool, inputBuffer); + GST_DEBUG_OBJECT (nvenc, "re-enqueueing output buffer %p", outputBufferPtr); + g_async_queue_push (nvenc->bitstream_pool, outputBufferPtr); + + return GST_FLOW_ERROR; + } + + g_async_queue_push (nvenc->bitstream_queue, outputBufferPtr); + + return GST_FLOW_OK; +} + +static GstFlowReturn +gst_nv_base_enc_handle_frame (GstVideoEncoder * enc, GstVideoCodecFrame * frame) +{ + gpointer input_buffer = NULL; + GstNvBaseEnc *nvenc = GST_NV_BASE_ENC (enc); + NV_ENC_OUTPUT_PTR out_buf; + NVENCSTATUS nv_ret; + GstVideoFrame vframe; + GstVideoInfo *info = &nvenc->input_state->info; + GstFlowReturn flow = GST_FLOW_OK; + GstMapFlags in_map_flags = GST_MAP_READ; + struct frame_state *state; + guint frame_n = 0; + + g_assert (nvenc->encoder != NULL); + +#if HAVE_NVENC_GST_GL + if (nvenc->gl_input) + in_map_flags |= GST_MAP_GL; +#endif + + if (!gst_video_frame_map (&vframe, info, frame->input_buffer, in_map_flags)) + return GST_FLOW_ERROR; + + /* make sure our thread that waits for output to be ready is started */ + if (nvenc->bitstream_thread == NULL) { + if (!gst_nv_base_enc_start_bitstream_thread (nvenc)) + goto error; + } + + flow = _acquire_input_buffer (nvenc, &input_buffer); + if (flow != GST_FLOW_OK) + return flow; + if (input_buffer == NULL) + return GST_FLOW_ERROR; + + state = frame->user_data; + if (!state) + state = g_new0 (struct frame_state, 1); + state->n_buffers = 1; + +#if HAVE_NVENC_GST_GL + if (nvenc->gl_input) { + struct gl_input_resource *in_gl_resource = input_buffer; + struct map_gl_input data; + + GST_LOG_OBJECT (enc, "got input buffer %p", in_gl_resource); + + in_gl_resource->gl_mem[0] = + (GstGLMemory *) gst_buffer_peek_memory (frame->input_buffer, 0); + g_assert (gst_is_gl_memory ((GstMemory *) in_gl_resource->gl_mem[0])); + + data.nvenc = nvenc; + data.frame = frame; + data.info = &vframe.info; + data.in_gl_resource = in_gl_resource; + + gst_gl_context_thread_add (in_gl_resource->gl_mem[0]->mem.context, + (GstGLContextThreadFunc) _map_gl_input_buffer, &data); + + in_gl_resource->nv_mapped_resource.version = NV_ENC_MAP_INPUT_RESOURCE_VER; + in_gl_resource->nv_mapped_resource.registeredResource = + in_gl_resource->nv_resource.registeredResource; + + nv_ret = + NvEncMapInputResource (nvenc->encoder, + &in_gl_resource->nv_mapped_resource); + if (nv_ret != NV_ENC_SUCCESS) { + GST_ERROR_OBJECT (nvenc, "Failed to map input resource %p, ret %d", + in_gl_resource, nv_ret); + goto error; + } + + out_buf = g_async_queue_try_pop (nvenc->bitstream_pool); + if (out_buf == NULL) { + GST_DEBUG_OBJECT (nvenc, "wait for output buf to become available again"); + out_buf = g_async_queue_pop (nvenc->bitstream_pool); + } + + state->in_bufs[frame_n] = in_gl_resource; + state->out_bufs[frame_n++] = out_buf; + + frame->user_data = state; + frame->user_data_destroy_notify = (GDestroyNotify) g_free; + + flow = + _submit_input_buffer (nvenc, frame, &vframe, in_gl_resource, + in_gl_resource->nv_mapped_resource.mappedResource, + in_gl_resource->nv_mapped_resource.mappedBufferFmt, out_buf); + + /* encoder will keep frame in list internally, we'll look it up again later + * in the thread where we get the output buffers and finish it there */ + gst_video_codec_frame_unref (frame); + frame = NULL; + } +#endif + + if (!nvenc->gl_input) { + NV_ENC_LOCK_INPUT_BUFFER in_buf_lock = { 0, }; + NV_ENC_INPUT_PTR in_buf = input_buffer; + guint8 *src, *dest; + guint src_stride, dest_stride; + guint height, width; + guint y; + + GST_LOG_OBJECT (enc, "got input buffer %p", in_buf); + + in_buf_lock.version = NV_ENC_LOCK_INPUT_BUFFER_VER; + in_buf_lock.inputBuffer = in_buf; + + nv_ret = NvEncLockInputBuffer (nvenc->encoder, &in_buf_lock); + if (nv_ret != NV_ENC_SUCCESS) { + GST_ERROR_OBJECT (nvenc, "Failed to lock input buffer: %d", nv_ret); + /* FIXME: post proper error message */ + goto error; + } + GST_LOG_OBJECT (nvenc, "Locked input buffer %p", in_buf); + + width = GST_VIDEO_FRAME_WIDTH (&vframe); + height = GST_VIDEO_FRAME_HEIGHT (&vframe); + + // FIXME: this only works for NV12 + g_assert (GST_VIDEO_FRAME_FORMAT (&vframe) == GST_VIDEO_FORMAT_NV12); + + /* copy Y plane */ + src = GST_VIDEO_FRAME_PLANE_DATA (&vframe, 0); + src_stride = GST_VIDEO_FRAME_PLANE_STRIDE (&vframe, 0); + dest = in_buf_lock.bufferDataPtr; + dest_stride = in_buf_lock.pitch; + for (y = 0; y < height; ++y) { + memcpy (dest, src, width); + dest += dest_stride; + src += src_stride; + } + + /* copy UV plane */ + src = GST_VIDEO_FRAME_PLANE_DATA (&vframe, 1); + src_stride = GST_VIDEO_FRAME_PLANE_STRIDE (&vframe, 1); + dest = + (guint8 *) in_buf_lock.bufferDataPtr + + GST_ROUND_UP_32 (GST_VIDEO_INFO_HEIGHT (&nvenc->input_info)) * + in_buf_lock.pitch; + dest_stride = in_buf_lock.pitch; + for (y = 0; y < GST_ROUND_UP_2 (height) / 2; ++y) { + memcpy (dest, src, width); + dest += dest_stride; + src += src_stride; + } + + nv_ret = NvEncUnlockInputBuffer (nvenc->encoder, in_buf); + if (nv_ret != NV_ENC_SUCCESS) { + GST_ERROR_OBJECT (nvenc, "Failed to unlock input buffer: %d", nv_ret); + goto error; + } + + out_buf = g_async_queue_try_pop (nvenc->bitstream_pool); + if (out_buf == NULL) { + GST_DEBUG_OBJECT (nvenc, "wait for output buf to become available again"); + out_buf = g_async_queue_pop (nvenc->bitstream_pool); + } + + state->in_bufs[frame_n] = in_buf; + state->out_bufs[frame_n++] = out_buf; + frame->user_data = state; + frame->user_data_destroy_notify = (GDestroyNotify) g_free; + + flow = + _submit_input_buffer (nvenc, frame, &vframe, in_buf, in_buf, + gst_nvenc_get_nv_buffer_format (GST_VIDEO_INFO_FORMAT (info)), out_buf); + + /* encoder will keep frame in list internally, we'll look it up again later + * in the thread where we get the output buffers and finish it there */ + gst_video_codec_frame_unref (frame); + frame = NULL; + } + + if (flow != GST_FLOW_OK) + goto out; + + flow = g_atomic_int_get (&nvenc->last_flow); + +out: + + gst_video_frame_unmap (&vframe); + + return flow; + +error: + flow = GST_FLOW_ERROR; + goto out; +} + +static gboolean +gst_nv_base_enc_drain_encoder (GstNvBaseEnc * nvenc) +{ + NV_ENC_PIC_PARAMS pic_params = { 0, }; + NVENCSTATUS nv_ret; + + GST_INFO_OBJECT (nvenc, "draining encoder"); + + if (nvenc->input_state == NULL) { + GST_DEBUG_OBJECT (nvenc, "no input state, nothing to do"); + return TRUE; + } + + pic_params.version = NV_ENC_PIC_PARAMS_VER; + pic_params.encodePicFlags = NV_ENC_PIC_FLAG_EOS; + + nv_ret = NvEncEncodePicture (nvenc->encoder, &pic_params); + if (nv_ret != NV_ENC_SUCCESS) { + GST_LOG_OBJECT (nvenc, "Failed to drain encoder, ret %d", nv_ret); + return FALSE; + } + + return TRUE; +} + +static GstFlowReturn +gst_nv_base_enc_finish (GstVideoEncoder * enc) +{ + GstNvBaseEnc *nvenc = GST_NV_BASE_ENC (enc); + + GST_FIXME_OBJECT (enc, "implement finish"); + + gst_nv_base_enc_drain_encoder (nvenc); + + /* wait for encoder to output the remaining buffers */ + gst_nv_base_enc_stop_bitstream_thread (nvenc); + + return GST_FLOW_OK; +} + +#if 0 +static gboolean +gst_nv_base_enc_flush (GstVideoEncoder * enc) +{ + GstNvBaseEnc *nvenc = GST_NV_BASE_ENC (enc); + GST_INFO_OBJECT (nvenc, "done flushing encoder"); + return TRUE; +} +#endif + +static void +gst_nv_base_enc_set_property (GObject * object, guint prop_id, + const GValue * value, GParamSpec * pspec) +{ + GstNvBaseEnc *nvenc = GST_NV_BASE_ENC (object); + + switch (prop_id) { + case PROP_DEVICE_ID: + nvenc->cuda_device_id = g_value_get_uint (value); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); + break; + } +} + +static void +gst_nv_base_enc_get_property (GObject * object, guint prop_id, GValue * value, + GParamSpec * pspec) +{ + GstNvBaseEnc *nvenc = GST_NV_BASE_ENC (object); + + switch (prop_id) { + case PROP_DEVICE_ID: + g_value_set_uint (value, nvenc->cuda_device_id); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); + break; + } +} diff --git a/sys/nvenc/gstnvbaseenc.h b/sys/nvenc/gstnvbaseenc.h new file mode 100644 index 0000000000..0a843e9eff --- /dev/null +++ b/sys/nvenc/gstnvbaseenc.h @@ -0,0 +1,114 @@ +/* GStreamer NVENC plugin + * Copyright (C) 2015 Centricular Ltd + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#ifndef __GST_NV_BASE_ENC_H_INCLUDED__ +#define __GST_NV_BASE_ENC_H_INCLUDED__ + +#include "gstnvenc.h" + +#include + +#define GST_TYPE_NV_BASE_ENC \ + (gst_nv_base_enc_get_type()) +#define GST_NV_BASE_ENC(obj) \ + (G_TYPE_CHECK_INSTANCE_CAST((obj),GST_TYPE_NV_BASE_ENC,GstNvBaseEnc)) +#define GST_NV_BASE_ENC_CLASS(klass) \ + (G_TYPE_CHECK_CLASS_CAST((klass),GST_TYPE_NV_BASE_ENC,GstNvBaseEncClass)) +#define GST_NV_BASE_ENC_GET_CLASS(obj) \ + (G_TYPE_INSTANCE_GET_CLASS((obj),GST_TYPE_NV_BASE_ENC,GstNvBaseEncClass)) +#define GST_IS_NV_BASE_ENC(obj) \ + (G_TYPE_CHECK_INSTANCE_TYPE((obj),GST_TYPE_NV_BASE_ENC)) +#define GST_IS_NV_BASE_ENC_CLASS(obj) \ + (G_TYPE_CHECK_CLASS_TYPE((klass),GST_TYPE_NV_BASE_ENC)) + +typedef struct { + GstVideoEncoder video_encoder; + + /* properties */ + guint cuda_device_id; + + CUcontext cuda_ctx; + void * encoder; + + /* the supported input formats */ + GValue * input_formats; /* OBJECT LOCK */ + + GstVideoCodecState *input_state; + gboolean gl_input; + + /* allocated buffers */ + gpointer *input_bufs; /* array of n_allocs input buffers */ + NV_ENC_OUTPUT_PTR *output_bufs; /* array of n_allocs output buffers */ + guint n_bufs; + + /* input and output buffers currently available */ + GAsyncQueue *in_bufs_pool; + GAsyncQueue *bitstream_pool; + + /* output bufs in use (input bufs in use are tracked via the codec frames) */ + GAsyncQueue *bitstream_queue; + + /* we spawn a thread that does the (blocking) waits for output buffers + * to become available, so we can continue to feed data to the encoder + * while we wait */ + GThread *bitstream_thread; + + /* supported interlacing input modes. + * 0 = none, 1 = fields, 2 = interleaved */ + gint interlace_modes; + + void *display; /* GstGLDisplay */ + void *other_context; /* GstGLContext */ + + /* the maximum buffer size the encoder is configured for */ + guint max_encode_width; + guint max_encode_height; + + GstVideoInfo input_info; /* buffer configuration for buffers sent to NVENC */ + + GstFlowReturn last_flow; /* ATOMIC */ +} GstNvBaseEnc; + +typedef struct { + GstVideoEncoderClass video_encoder_class; + + GUID codec_id; + + gboolean (*initialize_encoder) (GstNvBaseEnc * nvenc, + GstVideoCodecState * old_state, + GstVideoCodecState * state); + gboolean (*set_src_caps) (GstNvBaseEnc * nvenc, + GstVideoCodecState * state); + gboolean (*set_pic_params) (GstNvBaseEnc * nvenc, + GstVideoCodecFrame * frame, + NV_ENC_PIC_PARAMS * pic_params); +} GstNvBaseEncClass; + +G_GNUC_INTERNAL +GType gst_nv_base_enc_get_type (void); + + +void gst_nv_base_enc_get_max_encode_size (GstNvBaseEnc * nvenc, + guint * max_width, + guint * max_height); +void gst_nv_base_enc_set_max_encode_size (GstNvBaseEnc * nvenc, + guint max_width, + guint max_height); + +#endif /* __GST_NV_BASE_ENC_H_INCLUDED__ */ diff --git a/sys/nvenc/gstnvenc.c b/sys/nvenc/gstnvenc.c new file mode 100644 index 0000000000..6b0d5685c1 --- /dev/null +++ b/sys/nvenc/gstnvenc.c @@ -0,0 +1,328 @@ +/* GStreamer NVENC plugin + * Copyright (C) 2015 Centricular Ltd + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "gstnvenc.h" +#include "gstnvh264enc.h" + +GST_DEBUG_CATEGORY (gst_nvenc_debug); + +static NV_ENCODE_API_FUNCTION_LIST nvenc_api; + +NVENCSTATUS +NvEncOpenEncodeSessionEx (NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS * params, + void **encoder) +{ + g_assert (nvenc_api.nvEncOpenEncodeSessionEx != NULL); + return nvenc_api.nvEncOpenEncodeSessionEx (params, encoder); +} + +NVENCSTATUS +NvEncDestroyEncoder (void *encoder) +{ + g_assert (nvenc_api.nvEncDestroyEncoder != NULL); + return nvenc_api.nvEncDestroyEncoder (encoder); +} + +NVENCSTATUS +NvEncGetEncodeGUIDs (void *encoder, GUID * array, uint32_t array_size, + uint32_t * count) +{ + g_assert (nvenc_api.nvEncGetEncodeGUIDs != NULL); + return nvenc_api.nvEncGetEncodeGUIDs (encoder, array, array_size, count); +} + +NVENCSTATUS +NvEncGetEncodeProfileGUIDCount (void *encoder, GUID encodeGUID, + uint32_t * encodeProfileGUIDCount) +{ + g_assert (nvenc_api.nvEncGetEncodeProfileGUIDCount != NULL); + return nvenc_api.nvEncGetEncodeProfileGUIDCount (encoder, encodeGUID, + encodeProfileGUIDCount); +} + +NVENCSTATUS +NvEncGetEncodeProfileGUIDs (void *encoder, GUID encodeGUID, + GUID * profileGUIDs, uint32_t guidArraySize, uint32_t * GUIDCount) +{ + g_assert (nvenc_api.nvEncGetEncodeProfileGUIDs != NULL); + return nvenc_api.nvEncGetEncodeProfileGUIDs (encoder, encodeGUID, + profileGUIDs, guidArraySize, GUIDCount); +} + +NVENCSTATUS +NvEncGetInputFormats (void *encoder, GUID enc_guid, + NV_ENC_BUFFER_FORMAT * array, uint32_t size, uint32_t * num) +{ + g_assert (nvenc_api.nvEncGetInputFormats != NULL); + return nvenc_api.nvEncGetInputFormats (encoder, enc_guid, array, size, num); +} + +NVENCSTATUS +NvEncGetEncodePresetConfig (void *encoder, GUID encodeGUID, + GUID presetGUID, NV_ENC_PRESET_CONFIG * presetConfig) +{ + g_assert (nvenc_api.nvEncGetEncodePresetConfig != NULL); + return nvenc_api.nvEncGetEncodePresetConfig (encoder, encodeGUID, presetGUID, + presetConfig); +} + +NVENCSTATUS +NvEncGetEncodeCaps (void *encoder, GUID encodeGUID, + NV_ENC_CAPS_PARAM * capsParam, int *capsVal) +{ + g_assert (nvenc_api.nvEncGetEncodeCaps != NULL); + return nvenc_api.nvEncGetEncodeCaps (encoder, encodeGUID, capsParam, capsVal); +} + +NVENCSTATUS +NvEncGetSequenceParams (void *encoder, + NV_ENC_SEQUENCE_PARAM_PAYLOAD * sequenceParamPayload) +{ + g_assert (nvenc_api.nvEncGetSequenceParams != NULL); + return nvenc_api.nvEncGetSequenceParams (encoder, sequenceParamPayload); +} + +NVENCSTATUS +NvEncInitializeEncoder (void *encoder, NV_ENC_INITIALIZE_PARAMS * params) +{ + g_assert (nvenc_api.nvEncInitializeEncoder != NULL); + return nvenc_api.nvEncInitializeEncoder (encoder, params); +} + +NVENCSTATUS +NvEncReconfigureEncoder (void *encoder, NV_ENC_RECONFIGURE_PARAMS * params) +{ + g_assert (nvenc_api.nvEncReconfigureEncoder != NULL); + return nvenc_api.nvEncReconfigureEncoder (encoder, params); +} + +NVENCSTATUS +NvEncRegisterResource (void *encoder, NV_ENC_REGISTER_RESOURCE * params) +{ + g_assert (nvenc_api.nvEncRegisterResource != NULL); + return nvenc_api.nvEncRegisterResource (encoder, params); +} + +NVENCSTATUS +NvEncUnregisterResource (void *encoder, NV_ENC_REGISTERED_PTR resource) +{ + g_assert (nvenc_api.nvEncUnregisterResource != NULL); + return nvenc_api.nvEncUnregisterResource (encoder, resource); +} + +NVENCSTATUS +NvEncMapInputResource (void *encoder, NV_ENC_MAP_INPUT_RESOURCE * params) +{ + g_assert (nvenc_api.nvEncMapInputResource != NULL); + return nvenc_api.nvEncMapInputResource (encoder, params); +} + +NVENCSTATUS +NvEncUnmapInputResource (void *encoder, NV_ENC_INPUT_PTR input_buffer) +{ + g_assert (nvenc_api.nvEncUnmapInputResource != NULL); + return nvenc_api.nvEncUnmapInputResource (encoder, input_buffer); +} + +NVENCSTATUS +NvEncCreateInputBuffer (void *encoder, NV_ENC_CREATE_INPUT_BUFFER * input_buf) +{ + g_assert (nvenc_api.nvEncCreateInputBuffer != NULL); + return nvenc_api.nvEncCreateInputBuffer (encoder, input_buf); +} + +NVENCSTATUS +NvEncLockInputBuffer (void *encoder, NV_ENC_LOCK_INPUT_BUFFER * input_buf) +{ + g_assert (nvenc_api.nvEncLockInputBuffer != NULL); + return nvenc_api.nvEncLockInputBuffer (encoder, input_buf); +} + +NVENCSTATUS +NvEncUnlockInputBuffer (void *encoder, NV_ENC_INPUT_PTR input_buf) +{ + g_assert (nvenc_api.nvEncUnlockInputBuffer != NULL); + return nvenc_api.nvEncUnlockInputBuffer (encoder, input_buf); +} + +NVENCSTATUS +NvEncDestroyInputBuffer (void *encoder, NV_ENC_INPUT_PTR input_buf) +{ + g_assert (nvenc_api.nvEncDestroyInputBuffer != NULL); + return nvenc_api.nvEncDestroyInputBuffer (encoder, input_buf); +} + +NVENCSTATUS +NvEncCreateBitstreamBuffer (void *encoder, NV_ENC_CREATE_BITSTREAM_BUFFER * bb) +{ + g_assert (nvenc_api.nvEncCreateBitstreamBuffer != NULL); + return nvenc_api.nvEncCreateBitstreamBuffer (encoder, bb); +} + +NVENCSTATUS +NvEncLockBitstream (void *encoder, NV_ENC_LOCK_BITSTREAM * lock_bs) +{ + g_assert (nvenc_api.nvEncLockBitstream != NULL); + return nvenc_api.nvEncLockBitstream (encoder, lock_bs); +} + +NVENCSTATUS +NvEncUnlockBitstream (void *encoder, NV_ENC_OUTPUT_PTR bb) +{ + g_assert (nvenc_api.nvEncUnlockBitstream != NULL); + return nvenc_api.nvEncUnlockBitstream (encoder, bb); +} + +NVENCSTATUS +NvEncDestroyBitstreamBuffer (void *encoder, NV_ENC_OUTPUT_PTR bit_buf) +{ + g_assert (nvenc_api.nvEncDestroyBitstreamBuffer != NULL); + return nvenc_api.nvEncDestroyBitstreamBuffer (encoder, bit_buf); +} + +NVENCSTATUS +NvEncEncodePicture (void *encoder, NV_ENC_PIC_PARAMS * pic_params) +{ + g_assert (nvenc_api.nvEncEncodePicture != NULL); + return nvenc_api.nvEncEncodePicture (encoder, pic_params); +} + +gboolean +gst_nvenc_cmp_guid (GUID g1, GUID g2) +{ + return (g1.Data1 == g2.Data1 && g1.Data2 == g2.Data2 && g1.Data3 == g2.Data3 + && g1.Data4[0] == g2.Data4[0] && g1.Data4[1] == g2.Data4[1] + && g1.Data4[2] == g2.Data4[2] && g1.Data4[3] == g2.Data4[3] + && g1.Data4[4] == g2.Data4[4] && g1.Data4[5] == g2.Data4[5] + && g1.Data4[6] == g2.Data4[6] && g1.Data4[7] == g2.Data4[7]); +} + +NV_ENC_BUFFER_FORMAT +gst_nvenc_get_nv_buffer_format (GstVideoFormat fmt) +{ + switch (fmt) { + case GST_VIDEO_FORMAT_NV12: + return NV_ENC_BUFFER_FORMAT_NV12_PL; + case GST_VIDEO_FORMAT_YV12: + return NV_ENC_BUFFER_FORMAT_YV12_PL; + case GST_VIDEO_FORMAT_I420: + return NV_ENC_BUFFER_FORMAT_IYUV_PL; + case GST_VIDEO_FORMAT_Y444: + return NV_ENC_BUFFER_FORMAT_YUV444_PL; + default: + break; + } + return NV_ENC_BUFFER_FORMAT_UNDEFINED; +} + +CUcontext +gst_nvenc_create_cuda_context (guint device_id) +{ + CUcontext cuda_ctx, old_ctx; + CUresult cres = CUDA_SUCCESS; + CUdevice cdev = 0, cuda_dev = -1; + int dev_count = 0; + char name[256]; + int min = 0, maj = 0; + int i; + + GST_INFO ("Initialising CUDA.."); + + cres = cuInit (0); + + if (cres != CUDA_SUCCESS) { + GST_WARNING ("Failed to initialise CUDA, error code: 0x%08x", cres); + return NULL; + } + + GST_INFO ("Initialised CUDA"); + + cres = cuDeviceGetCount (&dev_count); + if (cres != CUDA_SUCCESS || dev_count == 0) { + GST_WARNING ("No CUDA devices detected"); + return NULL; + } + + GST_INFO ("%d CUDA device(s) detected", dev_count); + for (i = 0; i < dev_count; ++i) { + if (cuDeviceGet (&cdev, i) == CUDA_SUCCESS + && cuDeviceGetName (name, sizeof (name), cdev) == CUDA_SUCCESS + && cuDeviceComputeCapability (&maj, &min, cdev) == CUDA_SUCCESS) { + GST_INFO ("GPU #%d supports NVENC: %s (%s) (Compute SM %d.%d)", + i, (((maj << 4) + min) >= 0x30) ? "yes" : "no", name, maj, min); + if (i == device_id) { + cuda_dev = cdev; + } + } + } + + if (cuda_dev == -1) { + GST_WARNING ("Device with id %d does not exist or does not support NVENC", + device_id); + return NULL; + } + + if (cuCtxCreate (&cuda_ctx, 0, cuda_dev) != CUDA_SUCCESS) { + GST_WARNING ("Failed to create CUDA context for cuda device %d", cuda_dev); + return NULL; + } + + if (cuCtxPopCurrent (&old_ctx) != CUDA_SUCCESS) { + return NULL; + } + + GST_INFO ("Created CUDA context %p", cuda_ctx); + + return cuda_ctx; +} + +gboolean +gst_nvenc_destroy_cuda_context (CUcontext ctx) +{ + GST_INFO ("Destroying CUDA context %p", ctx); + return (cuCtxDestroy (ctx) == CUDA_SUCCESS); +} + +static gboolean +plugin_init (GstPlugin * plugin) +{ + GST_DEBUG_CATEGORY_INIT (gst_nvenc_debug, "nvenc", 0, "Nvidia NVENC encoder"); + + nvenc_api.version = NV_ENCODE_API_FUNCTION_LIST_VER; + if (NvEncodeAPICreateInstance (&nvenc_api) != NV_ENC_SUCCESS) { + GST_ERROR ("Failed to get NVEncodeAPI function table!"); + } else { + GST_INFO ("Created NVEncodeAPI instance, got function table"); + + gst_element_register (plugin, "nvh264enc", GST_RANK_PRIMARY * 2, + gst_nv_h264_enc_get_type ()); + } + + return TRUE; +} + +GST_PLUGIN_DEFINE (GST_VERSION_MAJOR, + GST_VERSION_MINOR, + nvenc, + "GStreamer NVENC plugin", + plugin_init, VERSION, "LGPL", GST_PACKAGE_NAME, GST_PACKAGE_ORIGIN) diff --git a/sys/nvenc/gstnvenc.h b/sys/nvenc/gstnvenc.h new file mode 100644 index 0000000000..f4eb34b6b6 --- /dev/null +++ b/sys/nvenc/gstnvenc.h @@ -0,0 +1,40 @@ +/* GStreamer NVENC plugin + * Copyright (C) 2015 Centricular Ltd + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#ifndef __GST_NVENC_H_INCLUDED__ +#define __GST_NVENC_H_INCLUDED__ + +#include +#include + +#include +#include + +GST_DEBUG_CATEGORY_EXTERN (gst_nvenc_debug); +#define GST_CAT_DEFAULT gst_nvenc_debug + +CUcontext gst_nvenc_create_cuda_context (guint device_id); + +gboolean gst_nvenc_destroy_cuda_context (CUcontext ctx); + +gboolean gst_nvenc_cmp_guid (GUID g1, GUID g2); + +NV_ENC_BUFFER_FORMAT gst_nvenc_get_nv_buffer_format (GstVideoFormat fmt); + +#endif /* __GST_NVENC_H_INCLUDED__ */ diff --git a/sys/nvenc/gstnvh264enc.c b/sys/nvenc/gstnvh264enc.c new file mode 100644 index 0000000000..170e3f7024 --- /dev/null +++ b/sys/nvenc/gstnvh264enc.c @@ -0,0 +1,610 @@ +/* GStreamer NVENC plugin + * Copyright (C) 2015 Centricular Ltd + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "gstnvh264enc.h" + +#include + +#include + +#if HAVE_GST_GL +#include +#include +#include +#define GST_USE_UNSTABLE_API +#include +#endif + +#define parent_class gst_nv_h264_enc_parent_class +G_DEFINE_TYPE (GstNvH264Enc, gst_nv_h264_enc, GST_TYPE_NV_BASE_ENC); + +/* *INDENT-OFF* */ +static GstStaticPadTemplate src_factory = GST_STATIC_PAD_TEMPLATE ("src", + GST_PAD_SRC, + GST_PAD_ALWAYS, + GST_STATIC_CAPS ("video/x-h264, " + "width = (int) [ 1, 4096 ], height = (int) [ 1, 2160 ], " + "framerate = (fraction) [0/1, MAX], " + "stream-format = (string) byte-stream, " // TODO: avc support + "alignment = (string) au, " + "profile = (string) { high, main, baseline }") // TODO: a couple of others + ); +/* *INDENT-ON* */ + +static gboolean gst_nv_h264_enc_open (GstVideoEncoder * enc); +static gboolean gst_nv_h264_enc_close (GstVideoEncoder * enc); +static GstCaps *gst_nv_h264_enc_getcaps (GstVideoEncoder * enc, + GstCaps * filter); +static gboolean gst_nv_h264_enc_set_src_caps (GstNvBaseEnc * nvenc, + GstVideoCodecState * state); +static gboolean gst_nv_h264_enc_initialize_encoder (GstNvBaseEnc * nvenc, + GstVideoCodecState * old_state, GstVideoCodecState * state); +static gboolean gst_nv_h264_enc_set_pic_params (GstNvBaseEnc * nvenc, + GstVideoCodecFrame * frame, NV_ENC_PIC_PARAMS * pic_params); +static void gst_nv_h264_enc_set_property (GObject * object, guint prop_id, + const GValue * value, GParamSpec * pspec); +static void gst_nv_h264_enc_get_property (GObject * object, guint prop_id, + GValue * value, GParamSpec * pspec); +static void gst_nv_h264_enc_finalize (GObject * obj); + +static void +gst_nv_h264_enc_class_init (GstNvH264EncClass * klass) +{ + GObjectClass *gobject_class = G_OBJECT_CLASS (klass); + GstElementClass *element_class = GST_ELEMENT_CLASS (klass); + GstVideoEncoderClass *videoenc_class = GST_VIDEO_ENCODER_CLASS (klass); + GstNvBaseEncClass *nvenc_class = GST_NV_BASE_ENC_CLASS (klass); + + gobject_class->set_property = gst_nv_h264_enc_set_property; + gobject_class->get_property = gst_nv_h264_enc_get_property; + gobject_class->finalize = gst_nv_h264_enc_finalize; + + videoenc_class->open = GST_DEBUG_FUNCPTR (gst_nv_h264_enc_open); + videoenc_class->close = GST_DEBUG_FUNCPTR (gst_nv_h264_enc_close); + + videoenc_class->getcaps = GST_DEBUG_FUNCPTR (gst_nv_h264_enc_getcaps); + + nvenc_class->codec_id = NV_ENC_CODEC_H264_GUID; + nvenc_class->initialize_encoder = gst_nv_h264_enc_initialize_encoder; + nvenc_class->set_src_caps = gst_nv_h264_enc_set_src_caps; + nvenc_class->set_pic_params = gst_nv_h264_enc_set_pic_params; + + gst_element_class_add_pad_template (element_class, + gst_static_pad_template_get (&src_factory)); + + gst_element_class_set_static_metadata (element_class, + "NVENC H.264 Video Encoder", + "Codec/Encoder/Video", + "Encode H.264 video streams using NVIDIA's hardware-accelerated NVENC encoder API", + "Tim-Philipp Müller \n" + "Matthew Waters "); +} + +static void +gst_nv_h264_enc_init (GstNvH264Enc * nvenc) +{ +} + +static void +gst_nv_h264_enc_finalize (GObject * obj) +{ + G_OBJECT_CLASS (gst_nv_h264_enc_parent_class)->finalize (obj); +} + +static gboolean +_get_supported_profiles (GstNvH264Enc * nvenc) +{ + NVENCSTATUS nv_ret; + GUID profile_guids[64]; + GValue list = G_VALUE_INIT; + GValue val = G_VALUE_INIT; + guint i, n, n_profiles; + + nv_ret = + NvEncGetEncodeProfileGUIDCount (GST_NV_BASE_ENC (nvenc)->encoder, + NV_ENC_CODEC_H264_GUID, &n); + if (nv_ret != NV_ENC_SUCCESS) + return FALSE; + + nv_ret = + NvEncGetEncodeProfileGUIDs (GST_NV_BASE_ENC (nvenc)->encoder, + NV_ENC_CODEC_H264_GUID, profile_guids, G_N_ELEMENTS (profile_guids), &n); + if (nv_ret != NV_ENC_SUCCESS) + return FALSE; + + n_profiles = 0; + g_value_init (&list, GST_TYPE_LIST); + for (i = 0; i < n; i++) { + g_value_init (&val, G_TYPE_STRING); + + if (gst_nvenc_cmp_guid (profile_guids[i], + NV_ENC_H264_PROFILE_BASELINE_GUID)) { + g_value_set_static_string (&val, "baseline"); + gst_value_list_append_value (&list, &val); + n_profiles++; + } else if (gst_nvenc_cmp_guid (profile_guids[i], + NV_ENC_H264_PROFILE_MAIN_GUID)) { + g_value_set_static_string (&val, "main"); + gst_value_list_append_value (&list, &val); + n_profiles++; + } else if (gst_nvenc_cmp_guid (profile_guids[i], + NV_ENC_H264_PROFILE_HIGH_GUID)) { + g_value_set_static_string (&val, "high"); + gst_value_list_append_value (&list, &val); + n_profiles++; + } + /* TODO: map HIGH_444, STEREO, CONSTRAINED_HIGH, SVC_TEMPORAL_SCALABILITY */ + + g_value_unset (&val); + } + + if (n_profiles == 0) + return FALSE; + + GST_OBJECT_LOCK (nvenc); + g_free (nvenc->supported_profiles); + nvenc->supported_profiles = g_memdup (&list, sizeof (GValue)); + GST_OBJECT_UNLOCK (nvenc); + + return TRUE; +} + +static gboolean +gst_nv_h264_enc_open (GstVideoEncoder * enc) +{ + GstNvH264Enc *nvenc = GST_NV_H264_ENC (enc); + + if (!GST_VIDEO_ENCODER_CLASS (gst_nv_h264_enc_parent_class)->open (enc)) + return FALSE; + + /* Check if H.264 is supported */ + { + uint32_t i, num = 0; + GUID guids[16]; + + NvEncGetEncodeGUIDs (GST_NV_BASE_ENC (nvenc)->encoder, guids, + G_N_ELEMENTS (guids), &num); + + for (i = 0; i < num; ++i) { + if (gst_nvenc_cmp_guid (guids[i], NV_ENC_CODEC_H264_GUID)) + break; + } + GST_INFO_OBJECT (enc, "H.264 encoding %ssupported", (i == num) ? "un" : ""); + if (i == num) { + gst_nv_h264_enc_close (enc); + return FALSE; + } + } + + /* query supported input formats */ + if (!_get_supported_profiles (nvenc)) { + GST_WARNING_OBJECT (nvenc, "No supported encoding profiles"); + gst_nv_h264_enc_close (enc); + return FALSE; + } + + return TRUE; +} + +static gboolean +gst_nv_h264_enc_close (GstVideoEncoder * enc) +{ + GstNvH264Enc *nvenc = GST_NV_H264_ENC (enc); + + GST_OBJECT_LOCK (nvenc); + g_free (nvenc->supported_profiles); + nvenc->supported_profiles = NULL; + GST_OBJECT_UNLOCK (nvenc); + + return GST_VIDEO_ENCODER_CLASS (gst_nv_h264_enc_parent_class)->close (enc); +} + +static GValue * +_get_interlace_modes (GstNvH264Enc * nvenc) +{ + NV_ENC_CAPS_PARAM caps_param = { 0, }; + GValue *list = g_new0 (GValue, 1); + GValue val = G_VALUE_INIT; + + g_value_init (list, GST_TYPE_LIST); + g_value_init (&val, G_TYPE_STRING); + + g_value_set_static_string (&val, "progressive"); + gst_value_list_append_value (list, &val); + + caps_param.version = NV_ENC_CAPS_PARAM_VER; + caps_param.capsToQuery = NV_ENC_CAPS_SUPPORT_FIELD_ENCODING; + + if (NvEncGetEncodeCaps (GST_NV_BASE_ENC (nvenc)->encoder, + NV_ENC_CODEC_H264_GUID, &caps_param, + &nvenc->interlace_modes) != NV_ENC_SUCCESS) + nvenc->interlace_modes = 0; + + if (nvenc->interlace_modes >= 1) { + g_value_set_static_string (&val, "interleaved"); + gst_value_list_append_value (list, &val); + g_value_set_static_string (&val, "mixed"); + gst_value_list_append_value (list, &val); + } + /* TODO: figure out what nvenc frame based interlacing means in gst terms */ + + return list; +} + +static GstCaps * +gst_nv_h264_enc_getcaps (GstVideoEncoder * enc, GstCaps * filter) +{ + GstNvH264Enc *nvenc = GST_NV_H264_ENC (enc); + GstCaps *supported_incaps = NULL; + GstCaps *template_caps, *caps; + GValue *input_formats = GST_NV_BASE_ENC (enc)->input_formats; + + GST_OBJECT_LOCK (nvenc); + + if (input_formats != NULL) { + GValue *val; + + template_caps = gst_pad_get_pad_template_caps (enc->sinkpad); + supported_incaps = gst_caps_copy (template_caps); + gst_caps_set_value (supported_incaps, "format", input_formats); + + val = _get_interlace_modes (nvenc); + gst_caps_set_value (supported_incaps, "interlace-mode", val); + g_free (val); + + GST_LOG_OBJECT (enc, "codec input caps %" GST_PTR_FORMAT, supported_incaps); + GST_LOG_OBJECT (enc, " template caps %" GST_PTR_FORMAT, template_caps); + caps = gst_caps_intersect (template_caps, supported_incaps); + gst_caps_unref (template_caps); + gst_caps_unref (supported_incaps); + supported_incaps = caps; + GST_LOG_OBJECT (enc, " supported caps %" GST_PTR_FORMAT, supported_incaps); + } + + GST_OBJECT_UNLOCK (nvenc); + + caps = gst_video_encoder_proxy_getcaps (enc, supported_incaps, filter); + + if (supported_incaps) + gst_caps_unref (supported_incaps); + + GST_DEBUG_OBJECT (nvenc, " returning caps %" GST_PTR_FORMAT, caps); + + return caps; +} + +static gboolean +gst_nv_h264_enc_set_profile_and_level (GstNvH264Enc * nvenc, GstCaps * caps) +{ +#define N_BYTES_SPS 128 + guint8 sps[N_BYTES_SPS]; + NV_ENC_SEQUENCE_PARAM_PAYLOAD spp = { 0, }; + GstStructure *s; + const gchar *profile; + GstCaps *allowed_caps; + GstStructure *s2; + const gchar *allowed_profile; + NVENCSTATUS nv_ret; + guint32 seq_size; + + spp.version = NV_ENC_SEQUENCE_PARAM_PAYLOAD_VER; + spp.inBufferSize = N_BYTES_SPS; + spp.spsId = 0; + spp.ppsId = 0; + spp.spsppsBuffer = &sps; + spp.outSPSPPSPayloadSize = &seq_size; + nv_ret = NvEncGetSequenceParams (GST_NV_BASE_ENC (nvenc)->encoder, &spp); + if (nv_ret != NV_ENC_SUCCESS) { + GST_ELEMENT_ERROR (nvenc, STREAM, ENCODE, ("Encode header failed."), + ("NvEncGetSequenceParams return code=%d", nv_ret)); + return FALSE; + } + + if (seq_size < 8) { + GST_ELEMENT_ERROR (nvenc, STREAM, ENCODE, ("Encode header failed."), + ("NvEncGetSequenceParams returned incomplete data")); + return FALSE; + } + + /* skip nal header and identifier */ + gst_codec_utils_h264_caps_set_level_and_profile (caps, &sps[5], 3); + + /* Constrained baseline is a strict subset of baseline. If downstream + * wanted baseline and we produced constrained baseline, we can just + * set the profile to baseline in the caps to make negotiation happy. + * Same goes for baseline as subset of main profile and main as a subset + * of high profile. + */ + s = gst_caps_get_structure (caps, 0); + profile = gst_structure_get_string (s, "profile"); + + allowed_caps = gst_pad_get_allowed_caps (GST_VIDEO_ENCODER_SRC_PAD (nvenc)); + + if (allowed_caps == NULL) + goto no_peer; + + if (!gst_caps_can_intersect (allowed_caps, caps)) { + allowed_caps = gst_caps_make_writable (allowed_caps); + allowed_caps = gst_caps_truncate (allowed_caps); + s2 = gst_caps_get_structure (allowed_caps, 0); + gst_structure_fixate_field_string (s2, "profile", profile); + allowed_profile = gst_structure_get_string (s2, "profile"); + if (!strcmp (allowed_profile, "high")) { + if (!strcmp (profile, "constrained-baseline") + || !strcmp (profile, "baseline") || !strcmp (profile, "main")) { + gst_structure_set (s, "profile", G_TYPE_STRING, "high", NULL); + GST_INFO_OBJECT (nvenc, "downstream requested high profile, but " + "encoder will now output %s profile (which is a subset), due " + "to how it's been configured", profile); + } + } else if (!strcmp (allowed_profile, "main")) { + if (!strcmp (profile, "constrained-baseline") + || !strcmp (profile, "baseline")) { + gst_structure_set (s, "profile", G_TYPE_STRING, "main", NULL); + GST_INFO_OBJECT (nvenc, "downstream requested main profile, but " + "encoder will now output %s profile (which is a subset), due " + "to how it's been configured", profile); + } + } else if (!strcmp (allowed_profile, "baseline")) { + if (!strcmp (profile, "constrained-baseline")) + gst_structure_set (s, "profile", G_TYPE_STRING, "baseline", NULL); + } + } + gst_caps_unref (allowed_caps); + +no_peer: + + return TRUE; + +#undef N_BYTES_SPS +} + +static gboolean +gst_nv_h264_enc_set_src_caps (GstNvBaseEnc * nvenc, GstVideoCodecState * state) +{ + GstNvH264Enc *h264enc = GST_NV_H264_ENC (nvenc); + GstVideoCodecState *out_state; + GstStructure *s; + GstCaps *out_caps; + + out_caps = gst_caps_new_empty_simple ("video/x-h264"); + s = gst_caps_get_structure (out_caps, 0); + + /* TODO: add support for avc format as well */ + gst_structure_set (s, "stream-format", G_TYPE_STRING, "byte-stream", + "alignment", G_TYPE_STRING, "au", NULL); + + if (!gst_nv_h264_enc_set_profile_and_level (h264enc, out_caps)) { + gst_caps_unref (out_caps); + return FALSE; + } + + out_state = gst_video_encoder_set_output_state (GST_VIDEO_ENCODER (nvenc), + out_caps, state); + + GST_INFO_OBJECT (nvenc, "output caps: %" GST_PTR_FORMAT, out_state->caps); + + /* encoder will keep it around for us */ + gst_video_codec_state_unref (out_state); + + /* TODO: would be nice to also send some tags with the codec name */ + return TRUE; +} + +static gboolean +gst_nv_h264_enc_initialize_encoder (GstNvBaseEnc * nvenc, + GstVideoCodecState * old_state, GstVideoCodecState * state) +{ + GstNvH264Enc *h264enc = GST_NV_H264_ENC (nvenc); + NV_ENC_RECONFIGURE_PARAMS reconfigure_params = { 0, }; + NV_ENC_INITIALIZE_PARAMS init_params = { 0, }; + NV_ENC_INITIALIZE_PARAMS *params; + NV_ENC_PRESET_CONFIG preset_config = { 0, }; + NVENCSTATUS nv_ret; + GstVideoInfo *info = &state->info; + GstCaps *allowed_caps, *template_caps; + GUID selected_profile = NV_ENC_CODEC_PROFILE_AUTOSELECT_GUID; + int level_idc = NV_ENC_LEVEL_AUTOSELECT; + + /* TODO: support reconfiguration */ + if (old_state) { + reconfigure_params.version = NV_ENC_RECONFIGURE_PARAMS_VER; + params = &reconfigure_params.reInitEncodeParams; + } else { + params = &init_params; + } + + template_caps = gst_static_pad_template_get_caps (&src_factory); + allowed_caps = gst_pad_get_allowed_caps (GST_VIDEO_ENCODER_SRC_PAD (h264enc)); + + if (template_caps == allowed_caps) { + GST_INFO_OBJECT (h264enc, "downstream has ANY caps"); + } else if (allowed_caps) { + GstStructure *s; + const gchar *profile; + const gchar *level; + + if (gst_caps_is_empty (allowed_caps)) { + gst_caps_unref (allowed_caps); + gst_caps_unref (template_caps); + return FALSE; + } + + allowed_caps = gst_caps_make_writable (allowed_caps); + allowed_caps = gst_caps_fixate (allowed_caps); + s = gst_caps_get_structure (allowed_caps, 0); + + profile = gst_structure_get_string (s, "profile"); + if (profile) { + if (!strcmp (profile, "baseline")) { + selected_profile = NV_ENC_H264_PROFILE_BASELINE_GUID; + } else if (g_str_has_prefix (profile, "high-4:4:4")) { + selected_profile = NV_ENC_H264_PROFILE_HIGH_444_GUID; + } else if (g_str_has_prefix (profile, "high-10")) { + g_assert_not_reached (); + } else if (g_str_has_prefix (profile, "high-4:2:2")) { + g_assert_not_reached (); + } else if (g_str_has_prefix (profile, "high")) { + selected_profile = NV_ENC_H264_PROFILE_HIGH_GUID; + } else if (g_str_has_prefix (profile, "main")) { + selected_profile = NV_ENC_H264_PROFILE_MAIN_GUID; + } else { + g_assert_not_reached (); + } + } + + level = gst_structure_get_string (s, "level"); + if (level) + /* matches values stored in NV_ENC_LEVEL */ + level_idc = gst_codec_utils_h264_get_level_idc (level); + + gst_caps_unref (allowed_caps); + } + gst_caps_unref (template_caps); + + params->version = NV_ENC_INITIALIZE_PARAMS_VER; + params->encodeGUID = NV_ENC_CODEC_H264_GUID; + params->encodeWidth = GST_VIDEO_INFO_WIDTH (info); + params->encodeHeight = GST_VIDEO_INFO_HEIGHT (info); + /* FIXME: make this a property */ + params->presetGUID = NV_ENC_PRESET_HP_GUID; // _DEFAULT + params->enablePTD = 1; + if (!old_state) { + /* this sets the required buffer size and the maximum allowed size on + * subsequent reconfigures */ + /* FIXME: propertise this */ + params->maxEncodeWidth = GST_VIDEO_INFO_WIDTH (info); + params->maxEncodeHeight = GST_VIDEO_INFO_HEIGHT (info); + gst_nv_base_enc_set_max_encode_size (GST_NV_BASE_ENC (h264enc), + params->maxEncodeWidth, params->maxEncodeHeight); + } else { + guint max_width, max_height; + + gst_nv_base_enc_get_max_encode_size (GST_NV_BASE_ENC (h264enc), + &max_width, &max_height); + + if (GST_VIDEO_INFO_WIDTH (info) > max_width + || GST_VIDEO_INFO_HEIGHT (info) > max_height) { + GST_ELEMENT_ERROR (h264enc, STREAM, FORMAT, ("%s", "Requested stream " + "size is larger than the maximum configured size"), (NULL)); + return FALSE; + } + } + + preset_config.version = NV_ENC_PRESET_CONFIG_VER; + preset_config.presetCfg.version = NV_ENC_CONFIG_VER; + + nv_ret = + NvEncGetEncodePresetConfig (GST_NV_BASE_ENC (h264enc)->encoder, + params->encodeGUID, params->presetGUID, &preset_config); + if (nv_ret != NV_ENC_SUCCESS) { + GST_ELEMENT_ERROR (h264enc, LIBRARY, SETTINGS, (NULL), + ("Failed to get encode preset configuration: %d", nv_ret)); + return FALSE; + } + params->encodeConfig = &preset_config.presetCfg; + + /* override some defaults */ + GST_LOG_OBJECT (h264enc, "setting parameters"); + preset_config.presetCfg.version = NV_ENC_CONFIG_VER; + preset_config.presetCfg.profileGUID = selected_profile; + preset_config.presetCfg.encodeCodecConfig.h264Config.level = level_idc; + preset_config.presetCfg.encodeCodecConfig.h264Config.chromaFormatIDC = 1; + if (GST_VIDEO_INFO_FORMAT (info) == GST_VIDEO_FORMAT_Y444) { + GST_DEBUG_OBJECT (h264enc, "have Y444 input, setting config accordingly"); + preset_config.presetCfg.encodeCodecConfig. + h264Config.separateColourPlaneFlag = 1; + preset_config.presetCfg.encodeCodecConfig.h264Config.chromaFormatIDC = 3; + } + + /* FIXME: make property */ + preset_config.presetCfg.encodeCodecConfig.h264Config.outputAUD = 1; + + if (GST_VIDEO_INFO_IS_INTERLACED (info)) { + if (GST_VIDEO_INFO_INTERLACE_MODE (info) == + GST_VIDEO_INTERLACE_MODE_INTERLEAVED + || GST_VIDEO_INFO_INTERLACE_MODE (info) == + GST_VIDEO_INTERLACE_MODE_MIXED) { + preset_config.presetCfg.frameFieldMode = + NV_ENC_PARAMS_FRAME_FIELD_MODE_FIELD; + } + } + + if (info->fps_d > 0 && info->fps_n > 0) { + params->frameRateNum = info->fps_n; + params->frameRateDen = info->fps_d; + } else { + GST_FIXME_OBJECT (h264enc, "variable framerate"); + } + + if (old_state) { + nv_ret = + NvEncReconfigureEncoder (GST_NV_BASE_ENC (h264enc)->encoder, + &reconfigure_params); + } else { + nv_ret = + NvEncInitializeEncoder (GST_NV_BASE_ENC (h264enc)->encoder, params); + } + + if (nv_ret != NV_ENC_SUCCESS) { + GST_ELEMENT_ERROR (h264enc, LIBRARY, SETTINGS, (NULL), + ("Failed to %sinit encoder: %d", old_state ? "re" : "", nv_ret)); + return FALSE; + } + GST_INFO_OBJECT (h264enc, "configured encoder"); + + return TRUE; +} + +static gboolean +gst_nv_h264_enc_set_pic_params (GstNvBaseEnc * enc, GstVideoCodecFrame * frame, + NV_ENC_PIC_PARAMS * pic_params) +{ + /* encode whole picture in one single slice */ + pic_params->codecPicParams.h264PicParams.sliceMode = 0; + pic_params->codecPicParams.h264PicParams.sliceModeData = 0; + + return TRUE; +} + +static void +gst_nv_h264_enc_set_property (GObject * object, guint prop_id, + const GValue * value, GParamSpec * pspec) +{ + switch (prop_id) { + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); + break; + } +} + +static void +gst_nv_h264_enc_get_property (GObject * object, guint prop_id, GValue * value, + GParamSpec * pspec) +{ + switch (prop_id) { + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); + break; + } +} diff --git a/sys/nvenc/gstnvh264enc.h b/sys/nvenc/gstnvh264enc.h new file mode 100644 index 0000000000..8ed65748c2 --- /dev/null +++ b/sys/nvenc/gstnvh264enc.h @@ -0,0 +1,59 @@ +/* GStreamer NVENC plugin + * Copyright (C) 2015 Centricular Ltd + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#ifndef __GST_NV_H264_ENC_H_INCLUDED__ +#define __GST_NV_H264_ENC_H_INCLUDED__ + +#include "gstnvbaseenc.h" + +#define GST_TYPE_NV_H264_ENC \ + (gst_nv_h264_enc_get_type()) +#define GST_NV_H264_ENC(obj) \ + (G_TYPE_CHECK_INSTANCE_CAST((obj),GST_TYPE_NV_H264_ENC,GstNvH264Enc)) +#define GST_NV_H264_ENC_CLASS(klass) \ + (G_TYPE_CHECK_CLASS_CAST((klass),GST_TYPE_NV_H264_ENC,GstNvH264EncClass)) +#define GST_NV_H264_ENC_GET_CLASS(obj) \ + (G_TYPE_INSTANCE_GET_CLASS((obj),GST_TYPE_NV_H264_ENC,GstNvH264EncClass)) +#define GST_IS_NV_H264_ENC(obj) \ + (G_TYPE_CHECK_INSTANCE_TYPE((obj),GST_TYPE_NV_H264_ENC)) +#define GST_IS_NV_H264_ENC_CLASS(obj) \ + (G_TYPE_CHECK_CLASS_TYPE((klass),GST_TYPE_NV_H264_ENC)) + +typedef struct { + GstNvBaseEnc base_nvenc; + + /* the supported input formats */ + GValue * supported_profiles; /* OBJECT LOCK */ + + GstVideoCodecState *input_state; + gboolean gl_input; + + /* supported interlacing input modes. + * 0 = none, 1 = fields, 2 = interleaved */ + gint interlace_modes; +} GstNvH264Enc; + +typedef struct { + GstNvBaseEncClass video_encoder_class; +} GstNvH264EncClass; + +G_GNUC_INTERNAL +GType gst_nv_h264_enc_get_type (void); + +#endif /* __GST_NV_H264_ENC_H_INCLUDED__ */