From b1d13e10af26ee8a062d3a333e9a694444e804ee Mon Sep 17 00:00:00 2001
From: Matthew Waters <matthew@centricular.com>
Date: Thu, 30 Jul 2015 16:42:38 +1000
Subject: [PATCH] Add Nvidia based hardware encoder element

Currently only h264 is supported

https://bugzilla.gnome.org/show_bug.cgi?id=753405
---
 configure.ac             |  103 +++
 sys/Makefile.am          |   11 +-
 sys/nvenc/Makefile.am    |   33 +
 sys/nvenc/README         |   30 +
 sys/nvenc/TODO           |   11 +
 sys/nvenc/gstnvbaseenc.c | 1567 ++++++++++++++++++++++++++++++++++++++
 sys/nvenc/gstnvbaseenc.h |  114 +++
 sys/nvenc/gstnvenc.c     |  328 ++++++++
 sys/nvenc/gstnvenc.h     |   40 +
 sys/nvenc/gstnvh264enc.c |  610 +++++++++++++++
 sys/nvenc/gstnvh264enc.h |   59 ++
 11 files changed, 2904 insertions(+), 2 deletions(-)
 create mode 100644 sys/nvenc/Makefile.am
 create mode 100644 sys/nvenc/README
 create mode 100644 sys/nvenc/TODO
 create mode 100644 sys/nvenc/gstnvbaseenc.c
 create mode 100644 sys/nvenc/gstnvbaseenc.h
 create mode 100644 sys/nvenc/gstnvenc.c
 create mode 100644 sys/nvenc/gstnvenc.h
 create mode 100644 sys/nvenc/gstnvh264enc.c
 create mode 100644 sys/nvenc/gstnvh264enc.h

diff --git a/configure.ac b/configure.ac
index c8d3f810b0..d982ba2842 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1726,6 +1726,108 @@ AC_SUBST(LIBUDEV_LIBS)
 AC_SUBST(LIBUSB_CFLAGS)
 AC_SUBST(LIBUSB_LIBS)
 
+dnl *** NVENC ***
+translit(dnm, m, l) AM_CONDITIONAL(USE_NVENC, true)
+AG_GST_CHECK_FEATURE(NVENC, [NVIDIA Encode API], nvenc, [
+  AG_GST_PKG_CHECK_MODULES(GST_VIDEO, gstreamer-video-1.0)
+  AG_GST_PKG_CHECK_MODULES(GST_PBUTILS, gstreamer-pbutils-1.0)
+  AC_ARG_WITH([cuda-prefix],
+          AS_HELP_STRING([--with-cuda-prefix],
+          [Use the provided prefix for detecting the cuda installation]),
+          [AS_IF([test "x$with_cuda_prefix" != "x"],
+                 [CUDA_PREFIX="$with_cuda_prefix"])],
+          [CUDA_PREFIX=""])
+
+  HAVE_CUDA="yes"
+  if test "x$CUDA_PREFIX" != "x"; then
+    dnl only override if not already set
+    if test "x$CUDA_CFLAGS" = "x" -a "x$CUDA_LIBS" = "x"; then
+      dnl this is an educated guess, user can always override these
+      CUDA_CFLAGS="-I$CUDA_PREFIX/include"
+      CUDA_LIBS="-L$CUDA_PREFIX/lib -L$CUDA_PREFIX/lib64 -L$CUDA_PREFIX/lib/stubs -L$CUDA_PREFIX/lib64/stubs -lcuda -lcudart"
+    fi
+  else
+    PKG_CHECK_MODULES([CUDA], [cuda-7.5 cudart-7.5],, [
+      PKG_CHECK_MODULES([CUDA], [cuda-7.0 cudart-7.0],, [
+        PKG_CHECK_MODULES([CUDA], [cuda-6.5 cudart-6.5],, [
+          AC_MSG_WARN([Could not find cuda headers/libraries])])])])
+  fi
+
+  HAVE_CUDA_H=no
+  HAVE_CUDART_H=no
+  save_CPPFLAGS="$CPPFLAGS"
+  CPPFLAGS="$CUDA_CFLAGS $save_CPPFLAGS "
+  AC_CHECK_HEADER([cuda.h], [HAVE_CUDA_H=yes],
+      AC_MSG_WARN([Could not find cuda.h]))
+  AC_CHECK_HEADER([cuda_runtime_api.h], [HAVE_CUDART_H=yes],
+      AC_MSG_WARN([Could not find cuda_runtime_api.h]))
+  CPPFLAGS=$save_CPPFLAGS
+
+  dnl libcuda and libcudart libraries
+  save_LIBS="$LIBS"
+  LIBS="$CUDA_LIBS $save_LIBS"
+  HAVE_CUDART_LIB="no"
+  AC_CHECK_LIB(cudart,cudaGetErrorString,[HAVE_CUDART_LIB="yes"], [
+      AC_MSG_WARN([Could not find cudart library])])
+  HAVE_CUDA_LIB="no"
+  AC_CHECK_LIB(cuda,cuInit,[HAVE_CUDA_LIB="yes"], [
+      AC_MSG_WARN([Could not find cuda library])])
+  LIBS="$save_LIBS"
+
+  dnl nvEncodeAPI.h header
+  HAVE_NVENCODEAPI_H=no
+  AC_ARG_VAR(NVENCODE_CFLAGS, [C compiler flags for NvEncodeAPI.h])
+  save_CPPFLAGS="$CPPFLAGS"
+  CPPFLAGS="$NVENCODE_CFLAGS $save_CPPFLAGS"
+  AC_CHECK_HEADER([nvEncodeAPI.h], [
+      AC_PREPROC_IFELSE([AC_LANG_SOURCE([[#include <nvEncodeAPI.h>
+          #if NVENCAPI_MAJOR_VERSION < 5
+          #error "Need nvEncodeAPI.h >= 5.0"
+          #endif
+          ]])], [
+          HAVE_NVENCODEAPI_H=yes
+          ], [
+          AC_MSG_WARN([nvEncodeAPI.h must be >= 5.0])
+          ])
+      ],
+      AC_MSG_WARN([Could not find nvEncodeAPI.h]))
+  AC_SUBST(NVENCODE_CFLAGS)
+  CPPFLAGS="$save_CPPFLAGS"
+
+  dnl libnvnidia-encode library
+  HAVE_NVENCODE_LIB=no
+  AC_ARG_VAR(NVENCODE_LIBS, [linker flags for nvidia-encode])
+  saved_LIBS="$LIBS"
+  LIBS="$NVENCODE_LIBS $saved_LIBS"
+  AC_CHECK_LIB(nvidia-encode, NvEncodeAPICreateInstance, [HAVE_NVENCODE_LIB="yes"],
+      AC_MSG_WARN([Could not find library nvidia-encode]))
+  NVENCODE_LIBS="$NVENCODE_LIBS -lnvidia-encode"
+  AC_SUBST(NVENCODE_LIBS)
+  LIBS="$saved_LIBS"
+
+  USE_NVENC_GST_GL=no
+  if test "x$HAVE_CUDA_H" = "xyes" \
+      -a "x$HAVE_CUDART_H" = "xyes" \
+      -a "x$HAVE_CUDA_LIB" = "xyes" \
+      -a "x$HAVE_CUDART_LIB" = "xyes" \
+      -a "x$HAVE_NVENCODEAPI_H" = "xyes" \
+      -a "x$HAVE_NVENCODE_LIB" = "xyes"; then
+    HAVE_NVENC="yes"
+    if test x"$USE_OPENGL" = x"yes"; then
+      dnl cuda-gl interop header
+      save_CPPFLAGS="$CPPFLAGS"
+      CPPFLAGS="$save_CPPFLAGS $CUDA_CFLAGS"
+      AC_CHECK_HEADER([cuda_gl_interop.h], [
+        USE_NVENC_GST_GL="yes"
+        AC_DEFINE(HAVE_NVENC_GST_GL, [1] , [NVENC GStreamer OpenGL support available])
+        ])
+      CPPFLAGS="$save_CPPFLAGS"
+    fi
+  else
+    HAVE_NVENC="no"
+  fi
+])
+AM_CONDITIONAL(USE_NVENC_GST_GL, test "x$USE_NVENC_GST_GL" = "xyes")
 
 dnl *** ext plug-ins ***
 dnl keep this list sorted alphabetically !
@@ -3363,6 +3465,7 @@ sys/dshowvideosink/Makefile
 sys/dvb/Makefile
 sys/fbdev/Makefile
 sys/linsys/Makefile
+sys/nvenc/Makefile
 sys/opensles/Makefile
 sys/shm/Makefile
 sys/uvch264/Makefile
diff --git a/sys/Makefile.am b/sys/Makefile.am
index 1051ce5463..a0d7ca6d54 100644
--- a/sys/Makefile.am
+++ b/sys/Makefile.am
@@ -142,9 +142,16 @@ else
 UVCH264_DIR=
 endif
 
-SUBDIRS = $(ACM_DIR) $(ANDROID_MEDIA_DIR) $(APPLE_MEDIA_DIR) $(AVC_DIR) $(BLUEZ_DIR) $(D3DVIDEOSINK_DIR) $(DECKLINK_DIR) $(DIRECTSOUND_DIR) $(WINKS_DIR) $(DVB_DIR) $(FBDEV_DIR) $(LINSYS_DIR) $(OPENSLES_DIR) $(PVR_DIR) $(SHM_DIR) $(UVCH264_DIR) $(VCD_DIR) $(VDPAU_DIR) $(WININET_DIR) $(WINSCREENCAP_DIR) $(WASAPI_DIR)
+if USE_NVENC
+NVENC_DIR=nvenc
+else
+NVENC_DIR=
+endif
+
+SUBDIRS = $(ACM_DIR) $(ANDROID_MEDIA_DIR) $(APPLE_MEDIA_DIR) $(AVC_DIR) $(BLUEZ_DIR) $(D3DVIDEOSINK_DIR) $(DECKLINK_DIR) $(DIRECTSOUND_DIR) $(WINKS_DIR) $(DVB_DIR) $(FBDEV_DIR) $(LINSYS_DIR) $(OPENSLES_DIR) $(PVR_DIR) $(SHM_DIR) $(UVCH264_DIR) $(VCD_DIR) $(VDPAU_DIR) $(WININET_DIR) $(WINSCREENCAP_DIR) $(WASAPI_DIR) $(NVENC_DIR)
 
 DIST_SUBDIRS = acmenc acmmp3dec androidmedia applemedia applemedia-nonpublic avc bluez d3dvideosink decklink directsound dvb linsys fbdev dshowdecwrapper dshowsrcwrapper dshowvideosink \
-		opensles pvr2d shm uvch264 vcd vdpau wasapi wininet winks winscreencap
+		opensles pvr2d shm uvch264 vcd vdpau wasapi wininet winks winscreencap \
+		nvenc
 
 include $(top_srcdir)/common/parallel-subdirs.mak
diff --git a/sys/nvenc/Makefile.am b/sys/nvenc/Makefile.am
new file mode 100644
index 0000000000..68c9a2c06c
--- /dev/null
+++ b/sys/nvenc/Makefile.am
@@ -0,0 +1,33 @@
+plugin_LTLIBRARIES = libgstnvenc.la
+
+libgstnvenc_la_SOURCES = \
+	gstnvenc.c \
+	gstnvbaseenc.c \
+	gstnvh264enc.c
+
+noinst_HEADERS = \
+	gstnvenc.h \
+	gstnvbaseenc.h \
+	gstnvh264enc.h
+
+libgstnvenc_la_CFLAGS = \
+	-I$(top_srcdir)/gst-libs \
+	$(GST_CFLAGS) \
+	$(GST_PBUTILS_CFLAGS) \
+	$(GST_VIDEO_CFLAGS) \
+	$(CUDA_CFLAGS) \
+	$(NVENCODE_CFLAGS)
+
+libgstnvenc_la_LIBADD = \
+	$(GST_LIBS) \
+	$(GST_PBUTILS_LIBS) \
+	$(GST_VIDEO_LIBS) \
+	$(CUDA_LIBS) \
+	$(NVENCODE_LIBS)
+
+if USE_NVENC_GST_GL
+libgstnvenc_la_LIBADD += \
+	$(top_builddir)/gst-libs/gst/gl/libgstgl-$(GST_API_VERSION).la
+endif
+libgstnvenc_la_LDFLAGS = $(GST_PLUGIN_LDFLAGS)
+libgstnvenc_la_LIBTOOLFLAGS = $(GST_PLUGIN_LIBTOOLFLAGS)
diff --git a/sys/nvenc/README b/sys/nvenc/README
new file mode 100644
index 0000000000..62a52d1984
--- /dev/null
+++ b/sys/nvenc/README
@@ -0,0 +1,30 @@
+This plugin is intended for use with NVIDIA hardware.  Specifically, the NVENC
+block available in recent NVIDIA GPU hardware.  This is provided by a
+libnvidia-encode library provided by NVIDIA graphic drivers.
+
+Requirements
+------------
+Cuda > 6.5
+NVENC 5.0
+
+See https://developer.nvidia.com/nvidia-video-codec-sdk for a list of
+supported GPU's.
+
+Building
+--------
+1. Retrieve the NVENC SDK
+from https://developer.nvidia.com/nvidia-video-codec-sdk
+- http://developer.download.nvidia.com/compute/nvenc/v5.0/nvenc_5.0.1_sdk.zip
+2. unzip this somewhere and retreive or note the location of the
+nvEncodeAPI.h under nvenc_api-5.0.1/Samples/common/inc/
+3. Retreive a version of cuda from
+https://developer.nvidia.com/cuda-downloads and install somewhere noting
+the installation prefix (typically /opt/cuda or /usr/local/cuda)
+4. Now that the dependencies are sorted, there are a couple of
+environment variables and/or or configure arguments that are needed to
+detect the necessary libraries/headers.
+
+More information is available from the following locations
+
+[1] - https://developer.nvidia.com/cuda-downloads
+[2] - https://developer.nvidia.com/nvidia-video-codec-sdk 
diff --git a/sys/nvenc/TODO b/sys/nvenc/TODO
new file mode 100644
index 0000000000..27422cedf3
--- /dev/null
+++ b/sys/nvenc/TODO
@@ -0,0 +1,11 @@
+- check supported encoding formats (H.264 etc.), don't assume H.264
+
+- check performance (time taken) of first cuInit()
+
+- provide buffer pool
+
+- more formats
+
+- renegotiation
+
+- support outputting of AVC as well as byte-stream, negotiate automatically
diff --git a/sys/nvenc/gstnvbaseenc.c b/sys/nvenc/gstnvbaseenc.c
new file mode 100644
index 0000000000..154546d31e
--- /dev/null
+++ b/sys/nvenc/gstnvbaseenc.c
@@ -0,0 +1,1567 @@
+/* GStreamer NVENC plugin
+ * Copyright (C) 2015 Centricular Ltd
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "gstnvbaseenc.h"
+
+#include <gst/pbutils/codec-utils.h>
+
+#include <string.h>
+
+#if HAVE_NVENC_GST_GL
+#include <cuda.h>
+#include <cuda_runtime_api.h>
+#include <cuda_gl_interop.h>
+#include <gst/gl/gl.h>
+#endif
+
+/* TODO:
+ *  - reset last_flow on FLUSH_STOP (seeking)
+ */
+
+#define N_BUFFERS_PER_FRAME 1
+#define SUPPORTED_GL_APIS GST_GL_API_OPENGL3
+
+/* magic pointer value we can put in the async queue to signal shut down */
+#define SHUTDOWN_COOKIE ((gpointer)GINT_TO_POINTER (1))
+
+#define parent_class gst_nv_base_enc_parent_class
+G_DEFINE_ABSTRACT_TYPE (GstNvBaseEnc, gst_nv_base_enc, GST_TYPE_VIDEO_ENCODER);
+
+static GstStaticPadTemplate sink_factory = GST_STATIC_PAD_TEMPLATE ("sink",
+    GST_PAD_SINK,
+    GST_PAD_ALWAYS,
+    GST_STATIC_CAPS ("video/x-raw, " "format = (string) NV12, " // TODO: I420, YV12, Y444 support
+        "width = (int) [ 16, 4096 ], height = (int) [ 16, 2160 ], "
+        "framerate = (fraction) [0, MAX],"
+        "interlace-mode = { progressive, mixed, interleaved } "
+#if HAVE_NVENC_GST_GL
+        ";"
+        "video/x-raw(memory:GLMemory), "
+        "format = (string) { NV12, Y444 }, "
+        "width = (int) [ 16, 4096 ], height = (int) [ 16, 2160 ], "
+        "framerate = (fraction) [0, MAX],"
+        "interlace-mode = { progressive, mixed, interleaved } "
+#endif
+    ));
+
+enum
+{
+  PROP_0,
+  PROP_DEVICE_ID,
+};
+
+#if HAVE_NVENC_GST_GL
+struct gl_input_resource
+{
+  GstGLMemory *gl_mem[GST_VIDEO_MAX_PLANES];
+  struct cudaGraphicsResource *cuda_texture;
+  gpointer cuda_plane_pointers[GST_VIDEO_MAX_PLANES];
+  gpointer cuda_pointer;
+  gsize cuda_stride;
+  gsize cuda_num_bytes;
+  NV_ENC_REGISTER_RESOURCE nv_resource;
+  NV_ENC_MAP_INPUT_RESOURCE nv_mapped_resource;
+};
+#endif
+
+struct frame_state
+{
+  gint n_buffers;
+  gpointer in_bufs[N_BUFFERS_PER_FRAME];
+  gpointer out_bufs[N_BUFFERS_PER_FRAME];
+};
+
+static gboolean gst_nv_base_enc_open (GstVideoEncoder * enc);
+static gboolean gst_nv_base_enc_close (GstVideoEncoder * enc);
+static gboolean gst_nv_base_enc_start (GstVideoEncoder * enc);
+static gboolean gst_nv_base_enc_stop (GstVideoEncoder * enc);
+static void gst_nv_base_enc_set_context (GstElement * element,
+    GstContext * context);
+static gboolean gst_nv_base_enc_sink_query (GstVideoEncoder * enc,
+    GstQuery * query);
+static gboolean gst_nv_base_enc_set_format (GstVideoEncoder * enc,
+    GstVideoCodecState * state);
+static GstFlowReturn gst_nv_base_enc_handle_frame (GstVideoEncoder * enc,
+    GstVideoCodecFrame * frame);
+static void gst_nv_base_enc_free_buffers (GstNvBaseEnc * nvenc);
+static GstFlowReturn gst_nv_base_enc_finish (GstVideoEncoder * enc);
+static void gst_nv_base_enc_set_property (GObject * object, guint prop_id,
+    const GValue * value, GParamSpec * pspec);
+static void gst_nv_base_enc_get_property (GObject * object, guint prop_id,
+    GValue * value, GParamSpec * pspec);
+static void gst_nv_base_enc_finalize (GObject * obj);
+static GstCaps *gst_nv_base_enc_getcaps (GstVideoEncoder * enc,
+    GstCaps * filter);
+
+static void
+gst_nv_base_enc_class_init (GstNvBaseEncClass * klass)
+{
+  GObjectClass *gobject_class = G_OBJECT_CLASS (klass);
+  GstElementClass *element_class = GST_ELEMENT_CLASS (klass);
+  GstVideoEncoderClass *videoenc_class = GST_VIDEO_ENCODER_CLASS (klass);
+
+  gobject_class->set_property = gst_nv_base_enc_set_property;
+  gobject_class->get_property = gst_nv_base_enc_get_property;
+  gobject_class->finalize = gst_nv_base_enc_finalize;
+
+  element_class->set_context = GST_DEBUG_FUNCPTR (gst_nv_base_enc_set_context);
+
+  videoenc_class->open = GST_DEBUG_FUNCPTR (gst_nv_base_enc_open);
+  videoenc_class->close = GST_DEBUG_FUNCPTR (gst_nv_base_enc_close);
+
+  videoenc_class->start = GST_DEBUG_FUNCPTR (gst_nv_base_enc_start);
+  videoenc_class->stop = GST_DEBUG_FUNCPTR (gst_nv_base_enc_stop);
+
+  videoenc_class->set_format = GST_DEBUG_FUNCPTR (gst_nv_base_enc_set_format);
+  videoenc_class->getcaps = GST_DEBUG_FUNCPTR (gst_nv_base_enc_getcaps);
+  videoenc_class->handle_frame =
+      GST_DEBUG_FUNCPTR (gst_nv_base_enc_handle_frame);
+  videoenc_class->finish = GST_DEBUG_FUNCPTR (gst_nv_base_enc_finish);
+  videoenc_class->sink_query = GST_DEBUG_FUNCPTR (gst_nv_base_enc_sink_query);
+
+  gst_element_class_add_pad_template (element_class,
+      gst_static_pad_template_get (&sink_factory));
+
+  g_object_class_install_property (gobject_class, PROP_DEVICE_ID,
+      g_param_spec_uint ("cuda-device-id",
+          "Cuda Device ID",
+          "Set the GPU device to use for operations",
+          0, G_MAXUINT, 0, G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS));
+}
+
+static gboolean
+_get_supported_input_formats (GstNvBaseEnc * nvenc)
+{
+  GstNvBaseEncClass *nvenc_class = GST_NV_BASE_ENC_GET_CLASS (nvenc);
+  guint64 format_mask = 0;
+  uint32_t i, num = 0;
+  NV_ENC_BUFFER_FORMAT formats[64];
+  GValue list = G_VALUE_INIT;
+  GValue val = G_VALUE_INIT;
+
+  NvEncGetInputFormats (nvenc->encoder, nvenc_class->codec_id, formats,
+      G_N_ELEMENTS (formats), &num);
+
+  for (i = 0; i < num; ++i) {
+    GST_INFO_OBJECT (nvenc, "input format: 0x%08x", formats[i]);
+    /* Apparently we can just ignore the tiled formats and can feed
+     * it the respective untiled planar format instead ?! */
+    switch (formats[i]) {
+      case NV_ENC_BUFFER_FORMAT_NV12_PL:
+      case NV_ENC_BUFFER_FORMAT_NV12_TILED16x16:
+      case NV_ENC_BUFFER_FORMAT_NV12_TILED64x16:
+        format_mask |= (1 << GST_VIDEO_FORMAT_NV12);
+        break;
+      case NV_ENC_BUFFER_FORMAT_YV12_PL:
+      case NV_ENC_BUFFER_FORMAT_YV12_TILED16x16:
+      case NV_ENC_BUFFER_FORMAT_YV12_TILED64x16:
+        format_mask |= (1 << GST_VIDEO_FORMAT_YV12);
+        break;
+      case NV_ENC_BUFFER_FORMAT_IYUV_PL:
+      case NV_ENC_BUFFER_FORMAT_IYUV_TILED16x16:
+      case NV_ENC_BUFFER_FORMAT_IYUV_TILED64x16:
+        format_mask |= (1 << GST_VIDEO_FORMAT_I420);
+        break;
+      case NV_ENC_BUFFER_FORMAT_YUV444_PL:
+      case NV_ENC_BUFFER_FORMAT_YUV444_TILED16x16:
+      case NV_ENC_BUFFER_FORMAT_YUV444_TILED64x16:{
+        NV_ENC_CAPS_PARAM caps_param = { 0, };
+        int yuv444_supported = 0;
+
+        caps_param.version = NV_ENC_CAPS_PARAM_VER;
+        caps_param.capsToQuery = NV_ENC_CAPS_SUPPORT_YUV444_ENCODE;
+
+        if (NvEncGetEncodeCaps (nvenc->encoder, nvenc_class->codec_id,
+                &caps_param, &yuv444_supported) != NV_ENC_SUCCESS)
+          yuv444_supported = 0;
+
+        if (yuv444_supported)
+          format_mask |= (1 << GST_VIDEO_FORMAT_Y444);
+        break;
+      }
+      default:
+        GST_FIXME ("unmapped input format: 0x%08x", formats[i]);
+        break;
+    }
+  }
+
+  if (format_mask == 0)
+    return FALSE;
+
+  /* process a second time so we can add formats in the order we want */
+  g_value_init (&list, GST_TYPE_LIST);
+  g_value_init (&val, G_TYPE_STRING);
+  if ((format_mask & (1 << GST_VIDEO_FORMAT_NV12))) {
+    g_value_set_static_string (&val, "NV12");
+    gst_value_list_append_value (&list, &val);
+  }
+  if ((format_mask & (1 << GST_VIDEO_FORMAT_YV12))) {
+    g_value_set_static_string (&val, "YV12");
+    gst_value_list_append_value (&list, &val);
+  }
+  if ((format_mask & (1 << GST_VIDEO_FORMAT_I420))) {
+    g_value_set_static_string (&val, "I420");
+    gst_value_list_append_value (&list, &val);
+  }
+  if ((format_mask & (1 << GST_VIDEO_FORMAT_Y444))) {
+    g_value_set_static_string (&val, "Y444");
+    gst_value_list_append_value (&list, &val);
+  }
+  g_value_unset (&val);
+
+  GST_OBJECT_LOCK (nvenc);
+  g_free (nvenc->input_formats);
+  nvenc->input_formats = g_memdup (&list, sizeof (GValue));
+  GST_OBJECT_UNLOCK (nvenc);
+
+  return TRUE;
+}
+
+static gboolean
+gst_nv_base_enc_open (GstVideoEncoder * enc)
+{
+  GstNvBaseEnc *nvenc = GST_NV_BASE_ENC (enc);
+
+  nvenc->cuda_ctx = gst_nvenc_create_cuda_context (nvenc->cuda_device_id);
+  if (nvenc->cuda_ctx == NULL) {
+    GST_ELEMENT_ERROR (enc, LIBRARY, INIT, (NULL),
+        ("Failed to create CUDA context, perhaps CUDA is not supported."));
+    return FALSE;
+  }
+
+  {
+    NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS params = { 0, };
+    NVENCSTATUS nv_ret;
+
+    params.version = NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER;
+    params.apiVersion = NVENCAPI_VERSION;
+    params.device = nvenc->cuda_ctx;
+    params.deviceType = NV_ENC_DEVICE_TYPE_CUDA;
+    nv_ret = NvEncOpenEncodeSessionEx (&params, &nvenc->encoder);
+    if (nv_ret != NV_ENC_SUCCESS) {
+      GST_ERROR ("Failed to create NVENC encoder session, ret=%d", nv_ret);
+      if (gst_nvenc_destroy_cuda_context (nvenc->cuda_ctx))
+        nvenc->cuda_ctx = NULL;
+      return FALSE;
+    }
+    GST_INFO ("created NVENC encoder %p", nvenc->encoder);
+  }
+
+  /* query supported input formats */
+  if (!_get_supported_input_formats (nvenc)) {
+    GST_WARNING_OBJECT (nvenc, "No supported input formats");
+    gst_nv_base_enc_close (enc);
+    return FALSE;
+  }
+
+  return TRUE;
+}
+
+static void
+gst_nv_base_enc_set_context (GstElement * element, GstContext * context)
+{
+  GstNvBaseEnc *nvenc = GST_NV_BASE_ENC (element);
+
+#if HAVE_NVENC_GST_GL
+  gst_gl_handle_set_context (element, context,
+      (GstGLDisplay **) & nvenc->display,
+      (GstGLContext **) & nvenc->other_context);
+  if (nvenc->display)
+    gst_gl_display_filter_gl_api (GST_GL_DISPLAY (nvenc->display),
+        SUPPORTED_GL_APIS);
+#endif
+}
+
+static gboolean
+gst_nv_base_enc_sink_query (GstVideoEncoder * enc, GstQuery * query)
+{
+  GstNvBaseEnc *nvenc = GST_NV_BASE_ENC (enc);
+
+  switch (GST_QUERY_TYPE (query)) {
+#if HAVE_NVENC_GST_GL
+    case GST_QUERY_CONTEXT:{
+      gboolean ret;
+
+      ret = gst_gl_handle_context_query ((GstElement *) nvenc, query,
+          (GstGLDisplay **) & nvenc->display,
+          (GstGLContext **) & nvenc->other_context);
+      if (nvenc->display)
+        gst_gl_display_filter_gl_api (GST_GL_DISPLAY (nvenc->display),
+            SUPPORTED_GL_APIS);
+
+      if (ret)
+        return ret;
+      break;
+    }
+#endif
+    default:
+      break;
+  }
+
+  return GST_VIDEO_ENCODER_CLASS (parent_class)->sink_query (enc, query);
+}
+
+static gboolean
+gst_nv_base_enc_start (GstVideoEncoder * enc)
+{
+  GstNvBaseEnc *nvenc = GST_NV_BASE_ENC (enc);
+
+  nvenc->bitstream_pool = g_async_queue_new ();
+  nvenc->bitstream_queue = g_async_queue_new ();
+  nvenc->in_bufs_pool = g_async_queue_new ();
+
+  nvenc->last_flow = GST_FLOW_OK;
+
+#if HAVE_NVENC_GST_GL
+  {
+    gst_gl_ensure_element_data (GST_ELEMENT (nvenc),
+        (GstGLDisplay **) & nvenc->display,
+        (GstGLContext **) & nvenc->other_context);
+    if (nvenc->display)
+      gst_gl_display_filter_gl_api (GST_GL_DISPLAY (nvenc->display),
+          SUPPORTED_GL_APIS);
+  }
+#endif
+
+  return TRUE;
+}
+
+static gboolean
+gst_nv_base_enc_stop (GstVideoEncoder * enc)
+{
+  GstNvBaseEnc *nvenc = GST_NV_BASE_ENC (enc);
+
+  gst_nv_base_enc_free_buffers (nvenc);
+
+  if (nvenc->bitstream_pool) {
+    g_async_queue_unref (nvenc->bitstream_pool);
+    nvenc->bitstream_pool = NULL;
+  }
+  if (nvenc->bitstream_queue) {
+    g_async_queue_unref (nvenc->bitstream_queue);
+    nvenc->bitstream_queue = NULL;
+  }
+  if (nvenc->in_bufs_pool) {
+    g_async_queue_unref (nvenc->in_bufs_pool);
+    nvenc->in_bufs_pool = NULL;
+  }
+  if (nvenc->display) {
+    gst_object_unref (nvenc->display);
+    nvenc->display = NULL;
+  }
+  if (nvenc->other_context) {
+    gst_object_unref (nvenc->other_context);
+    nvenc->other_context = NULL;
+  }
+
+  return TRUE;
+}
+
+static GValue *
+_get_interlace_modes (GstNvBaseEnc * nvenc)
+{
+  GstNvBaseEncClass *nvenc_class = GST_NV_BASE_ENC_GET_CLASS (nvenc);
+  NV_ENC_CAPS_PARAM caps_param = { 0, };
+  GValue *list = g_new0 (GValue, 1);
+  GValue val = G_VALUE_INIT;
+
+  g_value_init (list, GST_TYPE_LIST);
+  g_value_init (&val, G_TYPE_STRING);
+
+  g_value_set_static_string (&val, "progressive");
+  gst_value_list_append_value (list, &val);
+
+  caps_param.version = NV_ENC_CAPS_PARAM_VER;
+  caps_param.capsToQuery = NV_ENC_CAPS_SUPPORT_FIELD_ENCODING;
+
+  if (NvEncGetEncodeCaps (nvenc->encoder, nvenc_class->codec_id,
+          &caps_param, &nvenc->interlace_modes) != NV_ENC_SUCCESS)
+    nvenc->interlace_modes = 0;
+
+  if (nvenc->interlace_modes >= 1) {
+    g_value_set_static_string (&val, "interleaved");
+    gst_value_list_append_value (list, &val);
+    g_value_set_static_string (&val, "mixed");
+    gst_value_list_append_value (list, &val);
+  }
+  /* TODO: figure out what nvenc frame based interlacing means in gst terms */
+
+  return list;
+}
+
+static GstCaps *
+gst_nv_base_enc_getcaps (GstVideoEncoder * enc, GstCaps * filter)
+{
+  GstNvBaseEnc *nvenc = GST_NV_BASE_ENC (enc);
+  GstCaps *supported_incaps = NULL;
+  GstCaps *template_caps, *caps;
+
+  GST_OBJECT_LOCK (nvenc);
+
+  if (nvenc->input_formats != NULL) {
+    GValue *val;
+
+    template_caps = gst_pad_get_pad_template_caps (enc->sinkpad);
+    supported_incaps = gst_caps_copy (template_caps);
+    gst_caps_set_value (supported_incaps, "format", nvenc->input_formats);
+
+    val = _get_interlace_modes (nvenc);
+    gst_caps_set_value (supported_incaps, "interlace-mode", val);
+    g_free (val);
+
+    GST_LOG_OBJECT (enc, "codec input caps %" GST_PTR_FORMAT, supported_incaps);
+    GST_LOG_OBJECT (enc, "   template caps %" GST_PTR_FORMAT, template_caps);
+    caps = gst_caps_intersect (template_caps, supported_incaps);
+    gst_caps_unref (template_caps);
+    gst_caps_unref (supported_incaps);
+    supported_incaps = caps;
+    GST_LOG_OBJECT (enc, "  supported caps %" GST_PTR_FORMAT, supported_incaps);
+  }
+
+  GST_OBJECT_UNLOCK (nvenc);
+
+  caps = gst_video_encoder_proxy_getcaps (enc, supported_incaps, filter);
+
+  if (supported_incaps)
+    gst_caps_unref (supported_incaps);
+
+  GST_DEBUG_OBJECT (nvenc, "  returning caps %" GST_PTR_FORMAT, caps);
+
+  return caps;
+}
+
+static gboolean
+gst_nv_base_enc_close (GstVideoEncoder * enc)
+{
+  GstNvBaseEnc *nvenc = GST_NV_BASE_ENC (enc);
+
+  if (nvenc->encoder) {
+    if (NvEncDestroyEncoder (nvenc->encoder) != NV_ENC_SUCCESS)
+      return FALSE;
+    nvenc->encoder = NULL;
+  }
+
+  if (nvenc->cuda_ctx) {
+    if (!gst_nvenc_destroy_cuda_context (nvenc->cuda_ctx))
+      return FALSE;
+    nvenc->cuda_ctx = NULL;
+  }
+
+  GST_OBJECT_LOCK (nvenc);
+  g_free (nvenc->input_formats);
+  nvenc->input_formats = NULL;
+  GST_OBJECT_UNLOCK (nvenc);
+
+  if (nvenc->input_state) {
+    gst_video_codec_state_unref (nvenc->input_state);
+    nvenc->input_state = NULL;
+  }
+
+  if (nvenc->bitstream_pool != NULL) {
+    g_assert (g_async_queue_length (nvenc->bitstream_pool) == 0);
+    g_async_queue_unref (nvenc->bitstream_pool);
+    nvenc->bitstream_pool = NULL;
+  }
+
+  return TRUE;
+}
+
+static void
+gst_nv_base_enc_init (GstNvBaseEnc * nvenc)
+{
+  GstVideoEncoder *encoder = GST_VIDEO_ENCODER (nvenc);
+
+  GST_VIDEO_ENCODER_STREAM_LOCK (encoder);
+  GST_VIDEO_ENCODER_STREAM_UNLOCK (encoder);
+}
+
+static void
+gst_nv_base_enc_finalize (GObject * obj)
+{
+  G_OBJECT_CLASS (gst_nv_base_enc_parent_class)->finalize (obj);
+}
+
+static GstVideoCodecFrame *
+_find_frame_with_output_buffer (GstNvBaseEnc * nvenc, NV_ENC_OUTPUT_PTR out_buf)
+{
+  GList *l = gst_video_encoder_get_frames (GST_VIDEO_ENCODER (nvenc));
+  gint i;
+
+  for (; l; l = l->next) {
+    GstVideoCodecFrame *frame = (GstVideoCodecFrame *) l->data;
+    struct frame_state *state = frame->user_data;
+
+    for (i = 0; i < N_BUFFERS_PER_FRAME; i++) {
+      if (!state->out_bufs[i])
+        break;
+
+      if (state->out_bufs[i] == out_buf)
+        return frame;
+    }
+  }
+
+  return NULL;
+}
+
+static gpointer
+gst_nv_base_enc_bitstream_thread (gpointer user_data)
+{
+  GstVideoEncoder *enc = user_data;
+  GstNvBaseEnc *nvenc = user_data;
+
+  /* overview of operation:
+   * 1. retreive the next buffer submitted to the bitstream pool
+   * 2. wait for that buffer to be ready from nvenc (LockBitsream)
+   * 3. retreive the GstVideoCodecFrame associated with that buffer
+   * 4. for each buffer in the frame
+   * 4.1 (step 2): wait for that buffer to be ready from nvenc (LockBitsream)
+   * 4.2 create an output GstBuffer from the nvenc buffers
+   * 4.3 unlock the nvenc bitstream buffers UnlockBitsream
+   * 5. finish_frame()
+   * 6. cleanup
+   */
+  do {
+    GstBuffer *buffers[N_BUFFERS_PER_FRAME];
+    struct frame_state *state = NULL;
+    GstVideoCodecFrame *frame = NULL;
+    NVENCSTATUS nv_ret;
+    GstFlowReturn flow = GST_FLOW_OK;
+    gint i;
+
+    {
+      NV_ENC_LOCK_BITSTREAM lock_bs = { 0, };
+      NV_ENC_OUTPUT_PTR out_buf;
+
+      for (i = 0; i < N_BUFFERS_PER_FRAME; i++) {
+        /* get and lock bitstream buffers */
+        GstVideoCodecFrame *tmp_frame;
+
+        if (state && i >= state->n_buffers)
+          break;
+
+        GST_LOG_OBJECT (enc, "wait for bitstream buffer..");
+
+        /* assumes buffers are submitted in order */
+        out_buf = g_async_queue_pop (nvenc->bitstream_queue);
+        if ((gpointer) out_buf == SHUTDOWN_COOKIE)
+          break;
+
+        GST_LOG_OBJECT (nvenc, "waiting for output buffer %p to be ready",
+            out_buf);
+
+        lock_bs.version = NV_ENC_LOCK_BITSTREAM_VER;
+        lock_bs.outputBitstream = out_buf;
+        lock_bs.doNotWait = 0;
+
+        /* FIXME: this would need to be updated for other slice modes */
+        lock_bs.sliceOffsets = NULL;
+
+        nv_ret = NvEncLockBitstream (nvenc->encoder, &lock_bs);
+        if (nv_ret != NV_ENC_SUCCESS) {
+          /* FIXME: what to do here? */
+          GST_ELEMENT_ERROR (nvenc, STREAM, ENCODE, (NULL),
+              ("Failed to lock bitstream buffer %p, ret %d",
+                  lock_bs.outputBitstream, nv_ret));
+          out_buf = SHUTDOWN_COOKIE;
+          break;
+        }
+
+        GST_LOG_OBJECT (nvenc, "picture type %d", lock_bs.pictureType);
+
+        tmp_frame = _find_frame_with_output_buffer (nvenc, out_buf);
+        g_assert (tmp_frame != NULL);
+        if (frame)
+          g_assert (frame == tmp_frame);
+        frame = tmp_frame;
+
+        state = frame->user_data;
+        g_assert (state->out_bufs[i] == out_buf);
+
+        /* copy into output buffer */
+        buffers[i] =
+            gst_buffer_new_allocate (NULL, lock_bs.bitstreamSizeInBytes, NULL);
+        gst_buffer_fill (buffers[i], 0, lock_bs.bitstreamBufferPtr,
+            lock_bs.bitstreamSizeInBytes);
+
+        if (lock_bs.pictureType == NV_ENC_PIC_TYPE_IDR) {
+          GST_DEBUG_OBJECT (nvenc, "This is a keyframe");
+          GST_VIDEO_CODEC_FRAME_SET_SYNC_POINT (frame);
+        }
+
+        /* TODO: use lock_bs.outputTimeStamp and lock_bs.outputDuration */
+        /* TODO: check pts/dts is handled properly if there are B-frames */
+
+        nv_ret = NvEncUnlockBitstream (nvenc->encoder, state->out_bufs[i]);
+        if (nv_ret != NV_ENC_SUCCESS) {
+          /* FIXME: what to do here? */
+          GST_ELEMENT_ERROR (nvenc, STREAM, ENCODE, (NULL),
+              ("Failed to unlock bitstream buffer %p, ret %d",
+                  lock_bs.outputBitstream, nv_ret));
+          state->out_bufs[i] = SHUTDOWN_COOKIE;
+          break;
+        }
+
+        GST_LOG_OBJECT (nvenc, "returning bitstream buffer %p to pool",
+            state->out_bufs[i]);
+        g_async_queue_push (nvenc->bitstream_pool, state->out_bufs[i]);
+      }
+
+      if (out_buf == SHUTDOWN_COOKIE)
+        break;
+    }
+
+    {
+      GstBuffer *output_buffer = gst_buffer_new ();
+
+      for (i = 0; i < state->n_buffers; i++)
+        output_buffer = gst_buffer_append (output_buffer, buffers[i]);
+
+      frame->output_buffer = output_buffer;
+    }
+
+    for (i = 0; i < state->n_buffers; i++) {
+      void *in_buf = state->in_bufs[i];
+      g_assert (in_buf != NULL);
+
+#if HAVE_NVENC_GST_GL
+      if (nvenc->gl_input) {
+        struct gl_input_resource *in_gl_resource = in_buf;
+
+        nv_ret =
+            NvEncUnmapInputResource (nvenc->encoder,
+            in_gl_resource->nv_mapped_resource.mappedResource);
+        if (nv_ret != NV_ENC_SUCCESS) {
+          GST_ERROR_OBJECT (nvenc, "Failed to unmap input resource %p, ret %d",
+              in_gl_resource, nv_ret);
+          break;
+        }
+
+        memset (&in_gl_resource->nv_mapped_resource, 0,
+            sizeof (in_gl_resource->nv_mapped_resource));
+      }
+#endif
+
+      g_async_queue_push (nvenc->in_bufs_pool, in_buf);
+    }
+
+    flow = gst_video_encoder_finish_frame (enc, frame);
+    frame = NULL;
+
+    if (flow != GST_FLOW_OK) {
+      GST_INFO_OBJECT (enc, "got flow %s", gst_flow_get_name (flow));
+      g_atomic_int_set (&nvenc->last_flow, flow);
+      break;
+    }
+  }
+  while (TRUE);
+
+  GST_INFO_OBJECT (nvenc, "exiting thread");
+
+  return NULL;
+}
+
+static gboolean
+gst_nv_base_enc_start_bitstream_thread (GstNvBaseEnc * nvenc)
+{
+  gchar *name = g_strdup_printf ("%s-read-bits", GST_OBJECT_NAME (nvenc));
+
+  g_assert (nvenc->bitstream_thread == NULL);
+
+  g_assert (g_async_queue_length (nvenc->bitstream_queue) == 0);
+
+  nvenc->bitstream_thread =
+      g_thread_try_new (name, gst_nv_base_enc_bitstream_thread, nvenc, NULL);
+
+  g_free (name);
+
+  if (nvenc->bitstream_thread == NULL)
+    return FALSE;
+
+  GST_INFO_OBJECT (nvenc, "started thread to read bitstream");
+  return TRUE;
+}
+
+static gboolean
+gst_nv_base_enc_stop_bitstream_thread (GstNvBaseEnc * nvenc)
+{
+  gpointer out_buf;
+
+  if (nvenc->bitstream_thread == NULL)
+    return TRUE;
+
+  /* FIXME */
+  GST_FIXME_OBJECT (nvenc, "stop bitstream reading thread properly");
+  g_async_queue_lock (nvenc->bitstream_queue);
+  g_async_queue_lock (nvenc->bitstream_pool);
+  while ((out_buf = g_async_queue_try_pop_unlocked (nvenc->bitstream_queue))) {
+    GST_INFO_OBJECT (nvenc, "stole bitstream buffer %p from queue", out_buf);
+    g_async_queue_push_unlocked (nvenc->bitstream_pool, out_buf);
+  }
+  g_async_queue_push_unlocked (nvenc->bitstream_queue, SHUTDOWN_COOKIE);
+  g_async_queue_unlock (nvenc->bitstream_pool);
+  g_async_queue_unlock (nvenc->bitstream_queue);
+
+  /* temporary unlock, so other thread can find and push frame */
+  GST_VIDEO_ENCODER_STREAM_UNLOCK (nvenc);
+  g_thread_join (nvenc->bitstream_thread);
+  GST_VIDEO_ENCODER_STREAM_LOCK (nvenc);
+
+  nvenc->bitstream_thread = NULL;
+  return TRUE;
+}
+
+static void
+gst_nv_base_enc_reset_queues (GstNvBaseEnc * nvenc, gboolean refill)
+{
+  gpointer ptr;
+  gint i;
+
+  GST_INFO_OBJECT (nvenc, "clearing queues");
+
+  while ((ptr = g_async_queue_try_pop (nvenc->bitstream_queue))) {
+    /* do nothing */
+  }
+  while ((ptr = g_async_queue_try_pop (nvenc->bitstream_pool))) {
+    /* do nothing */
+  }
+  while ((ptr = g_async_queue_try_pop (nvenc->in_bufs_pool))) {
+    /* do nothing */
+  }
+
+  if (refill) {
+    GST_INFO_OBJECT (nvenc, "refilling buffer pools");
+    for (i = 0; i < nvenc->n_bufs; ++i) {
+      g_async_queue_push (nvenc->bitstream_pool, nvenc->input_bufs[i]);
+      g_async_queue_push (nvenc->in_bufs_pool, nvenc->output_bufs[i]);
+    }
+  }
+}
+
+static void
+gst_nv_base_enc_free_buffers (GstNvBaseEnc * nvenc)
+{
+  NVENCSTATUS nv_ret;
+  guint i;
+
+  if (nvenc->encoder == NULL)
+    return;
+
+  gst_nv_base_enc_reset_queues (nvenc, FALSE);
+
+  for (i = 0; i < nvenc->n_bufs; ++i) {
+    NV_ENC_OUTPUT_PTR out_buf = nvenc->output_bufs[i];
+
+#if HAVE_NVENC_GST_GL
+    if (nvenc->gl_input) {
+      struct gl_input_resource *in_gl_resource = nvenc->input_bufs[i];
+
+      cuCtxPushCurrent (nvenc->cuda_ctx);
+      nv_ret =
+          NvEncUnregisterResource (nvenc->encoder,
+          in_gl_resource->nv_resource.registeredResource);
+      if (nv_ret != NV_ENC_SUCCESS)
+        GST_ERROR_OBJECT (nvenc, "Failed to unregister resource %p, ret %d",
+            in_gl_resource, nv_ret);
+
+      g_free (in_gl_resource);
+      cuCtxPopCurrent (NULL);
+    } else
+#endif
+    {
+      NV_ENC_INPUT_PTR in_buf = (NV_ENC_INPUT_PTR) nvenc->input_bufs[i];
+
+      GST_DEBUG_OBJECT (nvenc, "Destroying input buffer %p", in_buf);
+      nv_ret = NvEncDestroyInputBuffer (nvenc->encoder, in_buf);
+      if (nv_ret != NV_ENC_SUCCESS) {
+        GST_ERROR_OBJECT (nvenc, "Failed to destroy input buffer %p, ret %d",
+            in_buf, nv_ret);
+      }
+    }
+
+    GST_DEBUG_OBJECT (nvenc, "Destroying output bitstream buffer %p", out_buf);
+    nv_ret = NvEncDestroyBitstreamBuffer (nvenc->encoder, out_buf);
+    if (nv_ret != NV_ENC_SUCCESS) {
+      GST_ERROR_OBJECT (nvenc, "Failed to destroy output buffer %p, ret %d",
+          out_buf, nv_ret);
+    }
+  }
+
+  nvenc->n_bufs = 0;
+  g_free (nvenc->output_bufs);
+  nvenc->output_bufs = NULL;
+  g_free (nvenc->input_bufs);
+  nvenc->input_bufs = NULL;
+}
+
+static inline guint
+_get_plane_width (GstVideoInfo * info, guint plane)
+{
+  if (GST_VIDEO_INFO_IS_YUV (info))
+    /* For now component width and plane width are the same and the
+     * plane-component mapping matches
+     */
+    return GST_VIDEO_INFO_COMP_WIDTH (info, plane);
+  else                          /* RGB, GRAY */
+    return GST_VIDEO_INFO_WIDTH (info);
+}
+
+static inline guint
+_get_plane_height (GstVideoInfo * info, guint plane)
+{
+  if (GST_VIDEO_INFO_IS_YUV (info))
+    /* For now component width and plane width are the same and the
+     * plane-component mapping matches
+     */
+    return GST_VIDEO_INFO_COMP_HEIGHT (info, plane);
+  else                          /* RGB, GRAY */
+    return GST_VIDEO_INFO_HEIGHT (info);
+}
+
+static inline gsize
+_get_frame_data_height (GstVideoInfo * info)
+{
+  gsize ret = 0;
+  gint i;
+
+  for (i = 0; i < GST_VIDEO_INFO_N_PLANES (info); i++) {
+    ret += _get_plane_height (info, i);
+  }
+
+  return ret;
+}
+
+void
+gst_nv_base_enc_set_max_encode_size (GstNvBaseEnc * nvenc, guint max_width,
+    guint max_height)
+{
+  nvenc->max_encode_width = max_width;
+  nvenc->max_encode_height = max_height;
+}
+
+void
+gst_nv_base_enc_get_max_encode_size (GstNvBaseEnc * nvenc, guint * max_width,
+    guint * max_height)
+{
+  *max_width = nvenc->max_encode_width;
+  *max_height = nvenc->max_encode_height;
+}
+
+static gboolean
+gst_nv_base_enc_set_format (GstVideoEncoder * enc, GstVideoCodecState * state)
+{
+  GstNvBaseEncClass *nvenc_class = GST_NV_BASE_ENC_GET_CLASS (enc);
+  GstNvBaseEnc *nvenc = GST_NV_BASE_ENC (enc);
+  GstVideoInfo *info = &state->info;
+  GstVideoCodecState *old_state = nvenc->input_state;
+  NVENCSTATUS nv_ret;
+
+  g_assert (nvenc_class->initialize_encoder);
+  if (!nvenc_class->initialize_encoder (nvenc, old_state, state)) {
+    GST_ERROR_OBJECT (enc, "Subclass failed to reconfigure encoder");
+    return FALSE;
+  }
+
+  if (!nvenc->max_encode_width && !nvenc->max_encode_height) {
+    gst_nv_base_enc_set_max_encode_size (nvenc, GST_VIDEO_INFO_WIDTH (info),
+        GST_VIDEO_INFO_HEIGHT (info));
+  }
+
+  if (!old_state) {
+    nvenc->input_info = *info;
+    nvenc->gl_input = FALSE;
+  }
+
+  if (nvenc->input_state)
+    gst_video_codec_state_unref (nvenc->input_state);
+  nvenc->input_state = gst_video_codec_state_ref (state);
+  GST_INFO_OBJECT (nvenc, "configured encoder");
+
+  /* now allocate some buffers only on first configuration */
+  if (!old_state) {
+#if HAVE_NVENC_GST_GL
+    GstCapsFeatures *features;
+#endif
+    guint num_macroblocks, i;
+    guint input_width, input_height;
+
+    input_width = GST_VIDEO_INFO_WIDTH (info);
+    input_height = GST_VIDEO_INFO_HEIGHT (info);
+
+    num_macroblocks = (GST_ROUND_UP_16 (input_width) >> 4)
+        * (GST_ROUND_UP_16 (input_height) >> 4);
+    nvenc->n_bufs = (num_macroblocks >= 8160) ? 32 : 48;
+
+    /* input buffers */
+    nvenc->input_bufs = g_new0 (gpointer, nvenc->n_bufs);
+
+#if HAVE_NVENC_GST_GL
+    features = gst_caps_get_features (state->caps, 0);
+    if (gst_caps_features_contains (features,
+            GST_CAPS_FEATURE_MEMORY_GL_MEMORY)) {
+      guint pixel_depth = 0;
+      nvenc->gl_input = TRUE;
+
+      for (i = 0; i < GST_VIDEO_INFO_N_COMPONENTS (info); i++) {
+        pixel_depth += GST_VIDEO_INFO_COMP_DEPTH (info, i);
+      }
+
+      cuCtxPushCurrent (nvenc->cuda_ctx);
+      for (i = 0; i < nvenc->n_bufs; ++i) {
+        struct gl_input_resource *in_gl_resource =
+            g_new0 (struct gl_input_resource, 1);
+        CUresult cu_ret;
+
+        memset (&in_gl_resource->nv_resource, 0,
+            sizeof (in_gl_resource->nv_resource));
+        memset (&in_gl_resource->nv_mapped_resource, 0,
+            sizeof (in_gl_resource->nv_mapped_resource));
+
+        /* scratch buffer for non-contigious planer into a contigious buffer */
+        cu_ret =
+            cuMemAllocPitch ((CUdeviceptr *) & in_gl_resource->cuda_pointer,
+            &in_gl_resource->cuda_stride, input_width,
+            _get_frame_data_height (info), 16);
+        if (cu_ret != CUDA_SUCCESS) {
+          const gchar *err;
+
+          cuGetErrorString (cu_ret, &err);
+          GST_ERROR_OBJECT (nvenc, "failed to alocate cuda scratch buffer "
+              "ret %d error :%s", cu_ret, err);
+          g_assert_not_reached ();
+        }
+
+        in_gl_resource->nv_resource.version = NV_ENC_REGISTER_RESOURCE_VER;
+        in_gl_resource->nv_resource.resourceType =
+            NV_ENC_INPUT_RESOURCE_TYPE_CUDADEVICEPTR;
+        in_gl_resource->nv_resource.width = input_width;
+        in_gl_resource->nv_resource.height = input_height;
+        in_gl_resource->nv_resource.pitch = in_gl_resource->cuda_stride;
+        in_gl_resource->nv_resource.bufferFormat =
+            gst_nvenc_get_nv_buffer_format (GST_VIDEO_INFO_FORMAT (info));
+        in_gl_resource->nv_resource.resourceToRegister =
+            in_gl_resource->cuda_pointer;
+
+        nv_ret =
+            NvEncRegisterResource (nvenc->encoder,
+            &in_gl_resource->nv_resource);
+        if (nv_ret != NV_ENC_SUCCESS)
+          GST_ERROR_OBJECT (nvenc, "Failed to register resource %p, ret %d",
+              in_gl_resource, nv_ret);
+
+        nvenc->input_bufs[i] = in_gl_resource;
+        g_async_queue_push (nvenc->in_bufs_pool, nvenc->input_bufs[i]);
+      }
+
+      cuCtxPopCurrent (NULL);
+    } else
+#endif
+    {
+      for (i = 0; i < nvenc->n_bufs; ++i) {
+        NV_ENC_CREATE_INPUT_BUFFER cin_buf = { 0, };
+
+        cin_buf.version = NV_ENC_CREATE_INPUT_BUFFER_VER;
+
+        cin_buf.width = GST_ROUND_UP_32 (input_width);
+        cin_buf.height = GST_ROUND_UP_32 (input_height);
+
+        cin_buf.memoryHeap = NV_ENC_MEMORY_HEAP_SYSMEM_CACHED;
+        cin_buf.bufferFmt =
+            gst_nvenc_get_nv_buffer_format (GST_VIDEO_INFO_FORMAT (info));
+
+        nv_ret = NvEncCreateInputBuffer (nvenc->encoder, &cin_buf);
+
+        if (nv_ret != NV_ENC_SUCCESS) {
+          GST_WARNING_OBJECT (enc, "Failed to allocate input buffer: %d",
+              nv_ret);
+          /* FIXME: clean up */
+          return FALSE;
+        }
+
+        nvenc->input_bufs[i] = cin_buf.inputBuffer;
+
+        GST_INFO_OBJECT (nvenc, "allocated  input buffer %2d: %p", i,
+            nvenc->input_bufs[i]);
+
+        g_async_queue_push (nvenc->in_bufs_pool, nvenc->input_bufs[i]);
+      }
+    }
+
+    /* output buffers */
+    nvenc->output_bufs = g_new0 (NV_ENC_OUTPUT_PTR, nvenc->n_bufs);
+    for (i = 0; i < nvenc->n_bufs; ++i) {
+      NV_ENC_CREATE_BITSTREAM_BUFFER cout_buf = { 0, };
+
+      cout_buf.version = NV_ENC_CREATE_BITSTREAM_BUFFER_VER;
+
+      /* 1 MB should be large enough to hold most output frames.
+       * NVENC will automatically increase this if it's not enough. */
+      cout_buf.size = 1024 * 1024;
+      cout_buf.memoryHeap = NV_ENC_MEMORY_HEAP_SYSMEM_CACHED;
+
+      nv_ret = NvEncCreateBitstreamBuffer (nvenc->encoder, &cout_buf);
+      if (nv_ret != NV_ENC_SUCCESS) {
+        GST_WARNING_OBJECT (enc, "Failed to allocate input buffer: %d", nv_ret);
+        /* FIXME: clean up */
+        return FALSE;
+      }
+
+      nvenc->output_bufs[i] = cout_buf.bitstreamBuffer;
+
+      GST_INFO_OBJECT (nvenc, "allocated output buffer %2d: %p", i,
+          nvenc->output_bufs[i]);
+
+      g_async_queue_push (nvenc->bitstream_pool, nvenc->output_bufs[i]);
+    }
+
+#if 0
+    /* Get SPS/PPS */
+    {
+      NV_ENC_SEQUENCE_PARAM_PAYLOAD seq_param = { 0 };
+      uint32_t seq_size = 0;
+
+      seq_param.version = NV_ENC_SEQUENCE_PARAM_PAYLOAD_VER;
+      seq_param.spsppsBuffer = g_alloca (1024);
+      seq_param.inBufferSize = 1024;
+      seq_param.outSPSPPSPayloadSize = &seq_size;
+
+      nv_ret = NvEncGetSequenceParams (nvenc->encoder, &seq_param);
+      if (nv_ret != NV_ENC_SUCCESS) {
+        GST_WARNING_OBJECT (enc, "Failed to retrieve SPS/PPS: %d", nv_ret);
+        return FALSE;
+      }
+
+      /* FIXME: use SPS/PPS */
+      GST_MEMDUMP_OBJECT (enc, "SPS/PPS", seq_param.spsppsBuffer, seq_size);
+    }
+#endif
+  }
+
+  g_assert (nvenc_class->set_src_caps);
+  if (!nvenc_class->set_src_caps (nvenc, state)) {
+    GST_ERROR_OBJECT (nvenc, "Subclass failed to set output caps");
+    /* FIXME: clean up */
+    return FALSE;
+  }
+
+  return TRUE;
+}
+
+static inline guint
+_plane_get_n_components (GstVideoInfo * info, guint plane)
+{
+  switch (GST_VIDEO_INFO_FORMAT (info)) {
+    case GST_VIDEO_FORMAT_RGBx:
+    case GST_VIDEO_FORMAT_BGRx:
+    case GST_VIDEO_FORMAT_xRGB:
+    case GST_VIDEO_FORMAT_xBGR:
+    case GST_VIDEO_FORMAT_RGBA:
+    case GST_VIDEO_FORMAT_BGRA:
+    case GST_VIDEO_FORMAT_ARGB:
+    case GST_VIDEO_FORMAT_ABGR:
+    case GST_VIDEO_FORMAT_AYUV:
+      return 4;
+    case GST_VIDEO_FORMAT_RGB:
+    case GST_VIDEO_FORMAT_BGR:
+    case GST_VIDEO_FORMAT_RGB16:
+    case GST_VIDEO_FORMAT_BGR16:
+      return 3;
+    case GST_VIDEO_FORMAT_GRAY16_BE:
+    case GST_VIDEO_FORMAT_GRAY16_LE:
+    case GST_VIDEO_FORMAT_YUY2:
+    case GST_VIDEO_FORMAT_UYVY:
+      return 2;
+    case GST_VIDEO_FORMAT_NV12:
+    case GST_VIDEO_FORMAT_NV21:
+      return plane == 0 ? 1 : 2;
+    case GST_VIDEO_FORMAT_GRAY8:
+    case GST_VIDEO_FORMAT_Y444:
+    case GST_VIDEO_FORMAT_Y42B:
+    case GST_VIDEO_FORMAT_Y41B:
+    case GST_VIDEO_FORMAT_I420:
+    case GST_VIDEO_FORMAT_YV12:
+      return 1;
+    default:
+      g_assert_not_reached ();
+      return 1;
+  }
+}
+
+#if HAVE_NVENC_GST_GL
+struct map_gl_input
+{
+  GstNvBaseEnc *nvenc;
+  GstVideoCodecFrame *frame;
+  GstVideoInfo *info;
+  struct gl_input_resource *in_gl_resource;
+};
+
+static void
+_map_gl_input_buffer (GstGLContext * context, struct map_gl_input *data)
+{
+  cudaError_t cuda_ret;
+  guint8 *data_pointer;
+  guint i;
+
+  cuCtxPushCurrent (data->nvenc->cuda_ctx);
+  data_pointer = data->in_gl_resource->cuda_pointer;
+  for (i = 0; i < GST_VIDEO_INFO_N_PLANES (data->info); i++) {
+    guint plane_n_components;
+    GstGLBaseBuffer *gl_buf_obj;
+    GstGLMemory *gl_mem;
+    guint src_stride, dest_stride;
+
+    gl_mem =
+        (GstGLMemory *) gst_buffer_peek_memory (data->frame->input_buffer, i);
+    g_assert (gst_is_gl_memory ((GstMemory *) gl_mem));
+    data->in_gl_resource->gl_mem[i] = gl_mem;
+    plane_n_components = _plane_get_n_components (data->info, i);
+
+    gl_buf_obj = (GstGLBaseBuffer *) gl_mem;
+
+    /* get the texture into the PBO */
+    gst_gl_memory_upload_transfer (gl_mem);
+    gst_gl_memory_download_transfer (gl_mem);
+
+    GST_LOG_OBJECT (data->nvenc, "attempting to copy texture %u into cuda",
+        gl_mem->tex_id);
+
+    cuda_ret =
+        cudaGraphicsGLRegisterBuffer (&data->in_gl_resource->cuda_texture,
+        gl_buf_obj->id, cudaGraphicsRegisterFlagsReadOnly);
+    if (cuda_ret != cudaSuccess) {
+      GST_ERROR_OBJECT (data->nvenc, "failed to register GL texture %u to cuda "
+          "ret :%d", gl_mem->tex_id, cuda_ret);
+      g_assert_not_reached ();
+    }
+
+    cuda_ret =
+        cudaGraphicsMapResources (1, &data->in_gl_resource->cuda_texture, 0);
+    if (cuda_ret != cudaSuccess) {
+      GST_ERROR_OBJECT (data->nvenc, "failed to map GL texture %u into cuda "
+          "ret :%d", gl_mem->tex_id, cuda_ret);
+      g_assert_not_reached ();
+    }
+
+    cuda_ret =
+        cudaGraphicsResourceGetMappedPointer (&data->in_gl_resource->
+        cuda_plane_pointers[i], &data->in_gl_resource->cuda_num_bytes,
+        data->in_gl_resource->cuda_texture);
+    if (cuda_ret != cudaSuccess) {
+      GST_ERROR_OBJECT (data->nvenc, "failed to get mapped pointer of map GL "
+          "texture %u in cuda ret :%d", gl_mem->tex_id, cuda_ret);
+      g_assert_not_reached ();
+    }
+
+    src_stride = GST_VIDEO_INFO_PLANE_STRIDE (data->info, i);
+    dest_stride = data->in_gl_resource->cuda_stride;
+
+    /* copy into scratch buffer */
+    cuda_ret =
+        cudaMemcpy2D (data_pointer, dest_stride,
+        data->in_gl_resource->cuda_plane_pointers[i], src_stride,
+        _get_plane_width (data->info, i) * plane_n_components,
+        _get_plane_height (data->info, i), cudaMemcpyDeviceToDevice);
+    if (cuda_ret != cudaSuccess) {
+      GST_ERROR_OBJECT (data->nvenc, "failed to copy GL texture %u into cuda "
+          "ret :%d", gl_mem->tex_id, cuda_ret);
+      g_assert_not_reached ();
+    }
+
+    cuda_ret =
+        cudaGraphicsUnmapResources (1, &data->in_gl_resource->cuda_texture, 0);
+    if (cuda_ret != cudaSuccess) {
+      GST_ERROR_OBJECT (data->nvenc, "failed to unmap GL texture %u from cuda "
+          "ret :%d", gl_mem->tex_id, cuda_ret);
+      g_assert_not_reached ();
+    }
+
+    cuda_ret =
+        cudaGraphicsUnregisterResource (data->in_gl_resource->cuda_texture);
+    if (cuda_ret != cudaSuccess) {
+      GST_ERROR_OBJECT (data->nvenc, "failed to unregister GL texture %u from "
+          "cuda ret :%d", gl_mem->tex_id, cuda_ret);
+      g_assert_not_reached ();
+    }
+
+    data_pointer =
+        data_pointer +
+        data->in_gl_resource->cuda_stride *
+        _get_plane_height (&data->nvenc->input_info, i);
+  }
+  cuCtxPopCurrent (NULL);
+}
+#endif
+
+static GstFlowReturn
+_acquire_input_buffer (GstNvBaseEnc * nvenc, gpointer * input)
+{
+  g_assert (input);
+
+  GST_LOG_OBJECT (nvenc, "acquiring input buffer..");
+  GST_VIDEO_ENCODER_STREAM_UNLOCK (nvenc);
+  *input = g_async_queue_pop (nvenc->in_bufs_pool);
+  GST_VIDEO_ENCODER_STREAM_LOCK (nvenc);
+
+  return GST_FLOW_OK;
+}
+
+static GstFlowReturn
+_submit_input_buffer (GstNvBaseEnc * nvenc, GstVideoCodecFrame * frame,
+    GstVideoFrame * vframe, void *inputBuffer, void *inputBufferPtr,
+    NV_ENC_BUFFER_FORMAT bufferFormat, void *outputBufferPtr)
+{
+  GstNvBaseEncClass *nvenc_class = GST_NV_BASE_ENC_GET_CLASS (nvenc);
+  NV_ENC_PIC_PARAMS pic_params = { 0, };
+  NVENCSTATUS nv_ret;
+
+  GST_LOG_OBJECT (nvenc, "%u: input buffer %p, output buffer %p, "
+      "pts %" GST_TIME_FORMAT, frame->system_frame_number, inputBuffer,
+      outputBufferPtr, GST_TIME_ARGS (frame->pts));
+
+  pic_params.version = NV_ENC_PIC_PARAMS_VER;
+  pic_params.inputBuffer = inputBufferPtr;
+  pic_params.bufferFmt = bufferFormat;
+
+  pic_params.inputWidth = GST_VIDEO_FRAME_WIDTH (vframe);
+  pic_params.inputHeight = GST_VIDEO_FRAME_HEIGHT (vframe);
+  pic_params.outputBitstream = outputBufferPtr;
+  pic_params.completionEvent = NULL;
+  if (GST_VIDEO_FRAME_IS_INTERLACED (vframe)) {
+    if (GST_VIDEO_FRAME_IS_TFF (vframe))
+      pic_params.pictureStruct = NV_ENC_PIC_STRUCT_FIELD_TOP_BOTTOM;
+    else
+      pic_params.pictureStruct = NV_ENC_PIC_STRUCT_FIELD_BOTTOM_TOP;
+  } else {
+    pic_params.pictureStruct = NV_ENC_PIC_STRUCT_FRAME;
+  }
+  pic_params.inputTimeStamp = frame->pts;
+  pic_params.inputDuration =
+      GST_CLOCK_TIME_IS_VALID (frame->duration) ? frame->duration : 0;
+  pic_params.frameIdx = frame->system_frame_number;
+
+  if (GST_VIDEO_CODEC_FRAME_IS_FORCE_KEYFRAME (frame))
+    pic_params.encodePicFlags = NV_ENC_PIC_FLAG_FORCEIDR;
+  else
+    pic_params.encodePicFlags = 0;
+
+  if (nvenc_class->set_pic_params
+      && !nvenc_class->set_pic_params (nvenc, frame, &pic_params)) {
+    GST_ERROR_OBJECT (nvenc, "Subclass failed to submit buffer");
+    return GST_FLOW_ERROR;
+  }
+
+  nv_ret = NvEncEncodePicture (nvenc->encoder, &pic_params);
+  if (nv_ret == NV_ENC_SUCCESS) {
+    GST_LOG_OBJECT (nvenc, "Encoded picture");
+  } else if (nv_ret == NV_ENC_ERR_NEED_MORE_INPUT) {
+    /* FIXME: we should probably queue pending output buffers here and only
+     * submit them to the async queue once we got sucess back */
+    GST_DEBUG_OBJECT (nvenc, "Encoded picture (encoder needs more input)");
+  } else {
+    GST_ERROR_OBJECT (nvenc, "Failed to encode picture: %d", nv_ret);
+    GST_DEBUG_OBJECT (nvenc, "re-enqueueing input buffer %p", inputBuffer);
+    g_async_queue_push (nvenc->in_bufs_pool, inputBuffer);
+    GST_DEBUG_OBJECT (nvenc, "re-enqueueing output buffer %p", outputBufferPtr);
+    g_async_queue_push (nvenc->bitstream_pool, outputBufferPtr);
+
+    return GST_FLOW_ERROR;
+  }
+
+  g_async_queue_push (nvenc->bitstream_queue, outputBufferPtr);
+
+  return GST_FLOW_OK;
+}
+
+static GstFlowReturn
+gst_nv_base_enc_handle_frame (GstVideoEncoder * enc, GstVideoCodecFrame * frame)
+{
+  gpointer input_buffer = NULL;
+  GstNvBaseEnc *nvenc = GST_NV_BASE_ENC (enc);
+  NV_ENC_OUTPUT_PTR out_buf;
+  NVENCSTATUS nv_ret;
+  GstVideoFrame vframe;
+  GstVideoInfo *info = &nvenc->input_state->info;
+  GstFlowReturn flow = GST_FLOW_OK;
+  GstMapFlags in_map_flags = GST_MAP_READ;
+  struct frame_state *state;
+  guint frame_n = 0;
+
+  g_assert (nvenc->encoder != NULL);
+
+#if HAVE_NVENC_GST_GL
+  if (nvenc->gl_input)
+    in_map_flags |= GST_MAP_GL;
+#endif
+
+  if (!gst_video_frame_map (&vframe, info, frame->input_buffer, in_map_flags))
+    return GST_FLOW_ERROR;
+
+  /* make sure our thread that waits for output to be ready is started */
+  if (nvenc->bitstream_thread == NULL) {
+    if (!gst_nv_base_enc_start_bitstream_thread (nvenc))
+      goto error;
+  }
+
+  flow = _acquire_input_buffer (nvenc, &input_buffer);
+  if (flow != GST_FLOW_OK)
+    return flow;
+  if (input_buffer == NULL)
+    return GST_FLOW_ERROR;
+
+  state = frame->user_data;
+  if (!state)
+    state = g_new0 (struct frame_state, 1);
+  state->n_buffers = 1;
+
+#if HAVE_NVENC_GST_GL
+  if (nvenc->gl_input) {
+    struct gl_input_resource *in_gl_resource = input_buffer;
+    struct map_gl_input data;
+
+    GST_LOG_OBJECT (enc, "got input buffer %p", in_gl_resource);
+
+    in_gl_resource->gl_mem[0] =
+        (GstGLMemory *) gst_buffer_peek_memory (frame->input_buffer, 0);
+    g_assert (gst_is_gl_memory ((GstMemory *) in_gl_resource->gl_mem[0]));
+
+    data.nvenc = nvenc;
+    data.frame = frame;
+    data.info = &vframe.info;
+    data.in_gl_resource = in_gl_resource;
+
+    gst_gl_context_thread_add (in_gl_resource->gl_mem[0]->mem.context,
+        (GstGLContextThreadFunc) _map_gl_input_buffer, &data);
+
+    in_gl_resource->nv_mapped_resource.version = NV_ENC_MAP_INPUT_RESOURCE_VER;
+    in_gl_resource->nv_mapped_resource.registeredResource =
+        in_gl_resource->nv_resource.registeredResource;
+
+    nv_ret =
+        NvEncMapInputResource (nvenc->encoder,
+        &in_gl_resource->nv_mapped_resource);
+    if (nv_ret != NV_ENC_SUCCESS) {
+      GST_ERROR_OBJECT (nvenc, "Failed to map input resource %p, ret %d",
+          in_gl_resource, nv_ret);
+      goto error;
+    }
+
+    out_buf = g_async_queue_try_pop (nvenc->bitstream_pool);
+    if (out_buf == NULL) {
+      GST_DEBUG_OBJECT (nvenc, "wait for output buf to become available again");
+      out_buf = g_async_queue_pop (nvenc->bitstream_pool);
+    }
+
+    state->in_bufs[frame_n] = in_gl_resource;
+    state->out_bufs[frame_n++] = out_buf;
+
+    frame->user_data = state;
+    frame->user_data_destroy_notify = (GDestroyNotify) g_free;
+
+    flow =
+        _submit_input_buffer (nvenc, frame, &vframe, in_gl_resource,
+        in_gl_resource->nv_mapped_resource.mappedResource,
+        in_gl_resource->nv_mapped_resource.mappedBufferFmt, out_buf);
+
+    /* encoder will keep frame in list internally, we'll look it up again later
+     * in the thread where we get the output buffers and finish it there */
+    gst_video_codec_frame_unref (frame);
+    frame = NULL;
+  }
+#endif
+
+  if (!nvenc->gl_input) {
+    NV_ENC_LOCK_INPUT_BUFFER in_buf_lock = { 0, };
+    NV_ENC_INPUT_PTR in_buf = input_buffer;
+    guint8 *src, *dest;
+    guint src_stride, dest_stride;
+    guint height, width;
+    guint y;
+
+    GST_LOG_OBJECT (enc, "got input buffer %p", in_buf);
+
+    in_buf_lock.version = NV_ENC_LOCK_INPUT_BUFFER_VER;
+    in_buf_lock.inputBuffer = in_buf;
+
+    nv_ret = NvEncLockInputBuffer (nvenc->encoder, &in_buf_lock);
+    if (nv_ret != NV_ENC_SUCCESS) {
+      GST_ERROR_OBJECT (nvenc, "Failed to lock input buffer: %d", nv_ret);
+      /* FIXME: post proper error message */
+      goto error;
+    }
+    GST_LOG_OBJECT (nvenc, "Locked input buffer %p", in_buf);
+
+    width = GST_VIDEO_FRAME_WIDTH (&vframe);
+    height = GST_VIDEO_FRAME_HEIGHT (&vframe);
+
+    // FIXME: this only works for NV12
+    g_assert (GST_VIDEO_FRAME_FORMAT (&vframe) == GST_VIDEO_FORMAT_NV12);
+
+    /* copy Y plane */
+    src = GST_VIDEO_FRAME_PLANE_DATA (&vframe, 0);
+    src_stride = GST_VIDEO_FRAME_PLANE_STRIDE (&vframe, 0);
+    dest = in_buf_lock.bufferDataPtr;
+    dest_stride = in_buf_lock.pitch;
+    for (y = 0; y < height; ++y) {
+      memcpy (dest, src, width);
+      dest += dest_stride;
+      src += src_stride;
+    }
+
+    /* copy UV plane */
+    src = GST_VIDEO_FRAME_PLANE_DATA (&vframe, 1);
+    src_stride = GST_VIDEO_FRAME_PLANE_STRIDE (&vframe, 1);
+    dest =
+        (guint8 *) in_buf_lock.bufferDataPtr +
+        GST_ROUND_UP_32 (GST_VIDEO_INFO_HEIGHT (&nvenc->input_info)) *
+        in_buf_lock.pitch;
+    dest_stride = in_buf_lock.pitch;
+    for (y = 0; y < GST_ROUND_UP_2 (height) / 2; ++y) {
+      memcpy (dest, src, width);
+      dest += dest_stride;
+      src += src_stride;
+    }
+
+    nv_ret = NvEncUnlockInputBuffer (nvenc->encoder, in_buf);
+    if (nv_ret != NV_ENC_SUCCESS) {
+      GST_ERROR_OBJECT (nvenc, "Failed to unlock input buffer: %d", nv_ret);
+      goto error;
+    }
+
+    out_buf = g_async_queue_try_pop (nvenc->bitstream_pool);
+    if (out_buf == NULL) {
+      GST_DEBUG_OBJECT (nvenc, "wait for output buf to become available again");
+      out_buf = g_async_queue_pop (nvenc->bitstream_pool);
+    }
+
+    state->in_bufs[frame_n] = in_buf;
+    state->out_bufs[frame_n++] = out_buf;
+    frame->user_data = state;
+    frame->user_data_destroy_notify = (GDestroyNotify) g_free;
+
+    flow =
+        _submit_input_buffer (nvenc, frame, &vframe, in_buf, in_buf,
+        gst_nvenc_get_nv_buffer_format (GST_VIDEO_INFO_FORMAT (info)), out_buf);
+
+    /* encoder will keep frame in list internally, we'll look it up again later
+     * in the thread where we get the output buffers and finish it there */
+    gst_video_codec_frame_unref (frame);
+    frame = NULL;
+  }
+
+  if (flow != GST_FLOW_OK)
+    goto out;
+
+  flow = g_atomic_int_get (&nvenc->last_flow);
+
+out:
+
+  gst_video_frame_unmap (&vframe);
+
+  return flow;
+
+error:
+  flow = GST_FLOW_ERROR;
+  goto out;
+}
+
+static gboolean
+gst_nv_base_enc_drain_encoder (GstNvBaseEnc * nvenc)
+{
+  NV_ENC_PIC_PARAMS pic_params = { 0, };
+  NVENCSTATUS nv_ret;
+
+  GST_INFO_OBJECT (nvenc, "draining encoder");
+
+  if (nvenc->input_state == NULL) {
+    GST_DEBUG_OBJECT (nvenc, "no input state, nothing to do");
+    return TRUE;
+  }
+
+  pic_params.version = NV_ENC_PIC_PARAMS_VER;
+  pic_params.encodePicFlags = NV_ENC_PIC_FLAG_EOS;
+
+  nv_ret = NvEncEncodePicture (nvenc->encoder, &pic_params);
+  if (nv_ret != NV_ENC_SUCCESS) {
+    GST_LOG_OBJECT (nvenc, "Failed to drain encoder, ret %d", nv_ret);
+    return FALSE;
+  }
+
+  return TRUE;
+}
+
+static GstFlowReturn
+gst_nv_base_enc_finish (GstVideoEncoder * enc)
+{
+  GstNvBaseEnc *nvenc = GST_NV_BASE_ENC (enc);
+
+  GST_FIXME_OBJECT (enc, "implement finish");
+
+  gst_nv_base_enc_drain_encoder (nvenc);
+
+  /* wait for encoder to output the remaining buffers */
+  gst_nv_base_enc_stop_bitstream_thread (nvenc);
+
+  return GST_FLOW_OK;
+}
+
+#if 0
+static gboolean
+gst_nv_base_enc_flush (GstVideoEncoder * enc)
+{
+  GstNvBaseEnc *nvenc = GST_NV_BASE_ENC (enc);
+  GST_INFO_OBJECT (nvenc, "done flushing encoder");
+  return TRUE;
+}
+#endif
+
+static void
+gst_nv_base_enc_set_property (GObject * object, guint prop_id,
+    const GValue * value, GParamSpec * pspec)
+{
+  GstNvBaseEnc *nvenc = GST_NV_BASE_ENC (object);
+
+  switch (prop_id) {
+    case PROP_DEVICE_ID:
+      nvenc->cuda_device_id = g_value_get_uint (value);
+      break;
+    default:
+      G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
+      break;
+  }
+}
+
+static void
+gst_nv_base_enc_get_property (GObject * object, guint prop_id, GValue * value,
+    GParamSpec * pspec)
+{
+  GstNvBaseEnc *nvenc = GST_NV_BASE_ENC (object);
+
+  switch (prop_id) {
+    case PROP_DEVICE_ID:
+      g_value_set_uint (value, nvenc->cuda_device_id);
+      break;
+    default:
+      G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
+      break;
+  }
+}
diff --git a/sys/nvenc/gstnvbaseenc.h b/sys/nvenc/gstnvbaseenc.h
new file mode 100644
index 0000000000..0a843e9eff
--- /dev/null
+++ b/sys/nvenc/gstnvbaseenc.h
@@ -0,0 +1,114 @@
+/* GStreamer NVENC plugin
+ * Copyright (C) 2015 Centricular Ltd
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifndef __GST_NV_BASE_ENC_H_INCLUDED__
+#define __GST_NV_BASE_ENC_H_INCLUDED__
+
+#include "gstnvenc.h"
+
+#include <gst/video/gstvideoencoder.h>
+
+#define GST_TYPE_NV_BASE_ENC \
+  (gst_nv_base_enc_get_type())
+#define GST_NV_BASE_ENC(obj) \
+  (G_TYPE_CHECK_INSTANCE_CAST((obj),GST_TYPE_NV_BASE_ENC,GstNvBaseEnc))
+#define GST_NV_BASE_ENC_CLASS(klass) \
+  (G_TYPE_CHECK_CLASS_CAST((klass),GST_TYPE_NV_BASE_ENC,GstNvBaseEncClass))
+#define GST_NV_BASE_ENC_GET_CLASS(obj) \
+  (G_TYPE_INSTANCE_GET_CLASS((obj),GST_TYPE_NV_BASE_ENC,GstNvBaseEncClass))
+#define GST_IS_NV_BASE_ENC(obj) \
+  (G_TYPE_CHECK_INSTANCE_TYPE((obj),GST_TYPE_NV_BASE_ENC))
+#define GST_IS_NV_BASE_ENC_CLASS(obj) \
+  (G_TYPE_CHECK_CLASS_TYPE((klass),GST_TYPE_NV_BASE_ENC))
+
+typedef struct {
+  GstVideoEncoder video_encoder;
+
+  /* properties */
+  guint           cuda_device_id;
+
+  CUcontext       cuda_ctx;
+  void          * encoder;
+
+  /* the supported input formats */
+  GValue        * input_formats;                  /* OBJECT LOCK */
+
+  GstVideoCodecState *input_state;
+  gboolean            gl_input;
+
+  /* allocated buffers */
+  gpointer          *input_bufs;   /* array of n_allocs input buffers  */
+  NV_ENC_OUTPUT_PTR *output_bufs;  /* array of n_allocs output buffers */
+  guint              n_bufs;
+
+  /* input and output buffers currently available */
+  GAsyncQueue    *in_bufs_pool;
+  GAsyncQueue    *bitstream_pool;
+
+  /* output bufs in use (input bufs in use are tracked via the codec frames) */
+  GAsyncQueue    *bitstream_queue;
+
+  /* we spawn a thread that does the (blocking) waits for output buffers
+   * to become available, so we can continue to feed data to the encoder
+   * while we wait */
+  GThread        *bitstream_thread;
+
+  /* supported interlacing input modes.
+   * 0 = none, 1 = fields, 2 = interleaved */
+  gint            interlace_modes;
+
+  void           *display;            /* GstGLDisplay */
+  void           *other_context;      /* GstGLContext */
+
+  /* the maximum buffer size the encoder is configured for */
+  guint               max_encode_width;
+  guint               max_encode_height;
+
+  GstVideoInfo        input_info;     /* buffer configuration for buffers sent to NVENC */
+
+  GstFlowReturn   last_flow;          /* ATOMIC */
+} GstNvBaseEnc;
+
+typedef struct {
+  GstVideoEncoderClass video_encoder_class;
+
+  GUID codec_id;
+
+  gboolean (*initialize_encoder) (GstNvBaseEnc * nvenc,
+                                  GstVideoCodecState * old_state,
+                                  GstVideoCodecState * state);
+  gboolean (*set_src_caps)       (GstNvBaseEnc * nvenc,
+                                  GstVideoCodecState * state);
+  gboolean (*set_pic_params)     (GstNvBaseEnc * nvenc,
+                                  GstVideoCodecFrame * frame,
+                                  NV_ENC_PIC_PARAMS * pic_params);
+} GstNvBaseEncClass;
+
+G_GNUC_INTERNAL
+GType gst_nv_base_enc_get_type (void);
+
+
+void gst_nv_base_enc_get_max_encode_size      (GstNvBaseEnc * nvenc,
+                                               guint * max_width,
+                                               guint * max_height);
+void gst_nv_base_enc_set_max_encode_size      (GstNvBaseEnc * nvenc,
+                                               guint max_width,
+                                               guint max_height);
+
+#endif /* __GST_NV_BASE_ENC_H_INCLUDED__ */
diff --git a/sys/nvenc/gstnvenc.c b/sys/nvenc/gstnvenc.c
new file mode 100644
index 0000000000..6b0d5685c1
--- /dev/null
+++ b/sys/nvenc/gstnvenc.c
@@ -0,0 +1,328 @@
+/* GStreamer NVENC plugin
+ * Copyright (C) 2015 Centricular Ltd
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "gstnvenc.h"
+#include "gstnvh264enc.h"
+
+GST_DEBUG_CATEGORY (gst_nvenc_debug);
+
+static NV_ENCODE_API_FUNCTION_LIST nvenc_api;
+
+NVENCSTATUS
+NvEncOpenEncodeSessionEx (NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS * params,
+    void **encoder)
+{
+  g_assert (nvenc_api.nvEncOpenEncodeSessionEx != NULL);
+  return nvenc_api.nvEncOpenEncodeSessionEx (params, encoder);
+}
+
+NVENCSTATUS
+NvEncDestroyEncoder (void *encoder)
+{
+  g_assert (nvenc_api.nvEncDestroyEncoder != NULL);
+  return nvenc_api.nvEncDestroyEncoder (encoder);
+}
+
+NVENCSTATUS
+NvEncGetEncodeGUIDs (void *encoder, GUID * array, uint32_t array_size,
+    uint32_t * count)
+{
+  g_assert (nvenc_api.nvEncGetEncodeGUIDs != NULL);
+  return nvenc_api.nvEncGetEncodeGUIDs (encoder, array, array_size, count);
+}
+
+NVENCSTATUS
+NvEncGetEncodeProfileGUIDCount (void *encoder, GUID encodeGUID,
+    uint32_t * encodeProfileGUIDCount)
+{
+  g_assert (nvenc_api.nvEncGetEncodeProfileGUIDCount != NULL);
+  return nvenc_api.nvEncGetEncodeProfileGUIDCount (encoder, encodeGUID,
+      encodeProfileGUIDCount);
+}
+
+NVENCSTATUS
+NvEncGetEncodeProfileGUIDs (void *encoder, GUID encodeGUID,
+    GUID * profileGUIDs, uint32_t guidArraySize, uint32_t * GUIDCount)
+{
+  g_assert (nvenc_api.nvEncGetEncodeProfileGUIDs != NULL);
+  return nvenc_api.nvEncGetEncodeProfileGUIDs (encoder, encodeGUID,
+      profileGUIDs, guidArraySize, GUIDCount);
+}
+
+NVENCSTATUS
+NvEncGetInputFormats (void *encoder, GUID enc_guid,
+    NV_ENC_BUFFER_FORMAT * array, uint32_t size, uint32_t * num)
+{
+  g_assert (nvenc_api.nvEncGetInputFormats != NULL);
+  return nvenc_api.nvEncGetInputFormats (encoder, enc_guid, array, size, num);
+}
+
+NVENCSTATUS
+NvEncGetEncodePresetConfig (void *encoder, GUID encodeGUID,
+    GUID presetGUID, NV_ENC_PRESET_CONFIG * presetConfig)
+{
+  g_assert (nvenc_api.nvEncGetEncodePresetConfig != NULL);
+  return nvenc_api.nvEncGetEncodePresetConfig (encoder, encodeGUID, presetGUID,
+      presetConfig);
+}
+
+NVENCSTATUS
+NvEncGetEncodeCaps (void *encoder, GUID encodeGUID,
+    NV_ENC_CAPS_PARAM * capsParam, int *capsVal)
+{
+  g_assert (nvenc_api.nvEncGetEncodeCaps != NULL);
+  return nvenc_api.nvEncGetEncodeCaps (encoder, encodeGUID, capsParam, capsVal);
+}
+
+NVENCSTATUS
+NvEncGetSequenceParams (void *encoder,
+    NV_ENC_SEQUENCE_PARAM_PAYLOAD * sequenceParamPayload)
+{
+  g_assert (nvenc_api.nvEncGetSequenceParams != NULL);
+  return nvenc_api.nvEncGetSequenceParams (encoder, sequenceParamPayload);
+}
+
+NVENCSTATUS
+NvEncInitializeEncoder (void *encoder, NV_ENC_INITIALIZE_PARAMS * params)
+{
+  g_assert (nvenc_api.nvEncInitializeEncoder != NULL);
+  return nvenc_api.nvEncInitializeEncoder (encoder, params);
+}
+
+NVENCSTATUS
+NvEncReconfigureEncoder (void *encoder, NV_ENC_RECONFIGURE_PARAMS * params)
+{
+  g_assert (nvenc_api.nvEncReconfigureEncoder != NULL);
+  return nvenc_api.nvEncReconfigureEncoder (encoder, params);
+}
+
+NVENCSTATUS
+NvEncRegisterResource (void *encoder, NV_ENC_REGISTER_RESOURCE * params)
+{
+  g_assert (nvenc_api.nvEncRegisterResource != NULL);
+  return nvenc_api.nvEncRegisterResource (encoder, params);
+}
+
+NVENCSTATUS
+NvEncUnregisterResource (void *encoder, NV_ENC_REGISTERED_PTR resource)
+{
+  g_assert (nvenc_api.nvEncUnregisterResource != NULL);
+  return nvenc_api.nvEncUnregisterResource (encoder, resource);
+}
+
+NVENCSTATUS
+NvEncMapInputResource (void *encoder, NV_ENC_MAP_INPUT_RESOURCE * params)
+{
+  g_assert (nvenc_api.nvEncMapInputResource != NULL);
+  return nvenc_api.nvEncMapInputResource (encoder, params);
+}
+
+NVENCSTATUS
+NvEncUnmapInputResource (void *encoder, NV_ENC_INPUT_PTR input_buffer)
+{
+  g_assert (nvenc_api.nvEncUnmapInputResource != NULL);
+  return nvenc_api.nvEncUnmapInputResource (encoder, input_buffer);
+}
+
+NVENCSTATUS
+NvEncCreateInputBuffer (void *encoder, NV_ENC_CREATE_INPUT_BUFFER * input_buf)
+{
+  g_assert (nvenc_api.nvEncCreateInputBuffer != NULL);
+  return nvenc_api.nvEncCreateInputBuffer (encoder, input_buf);
+}
+
+NVENCSTATUS
+NvEncLockInputBuffer (void *encoder, NV_ENC_LOCK_INPUT_BUFFER * input_buf)
+{
+  g_assert (nvenc_api.nvEncLockInputBuffer != NULL);
+  return nvenc_api.nvEncLockInputBuffer (encoder, input_buf);
+}
+
+NVENCSTATUS
+NvEncUnlockInputBuffer (void *encoder, NV_ENC_INPUT_PTR input_buf)
+{
+  g_assert (nvenc_api.nvEncUnlockInputBuffer != NULL);
+  return nvenc_api.nvEncUnlockInputBuffer (encoder, input_buf);
+}
+
+NVENCSTATUS
+NvEncDestroyInputBuffer (void *encoder, NV_ENC_INPUT_PTR input_buf)
+{
+  g_assert (nvenc_api.nvEncDestroyInputBuffer != NULL);
+  return nvenc_api.nvEncDestroyInputBuffer (encoder, input_buf);
+}
+
+NVENCSTATUS
+NvEncCreateBitstreamBuffer (void *encoder, NV_ENC_CREATE_BITSTREAM_BUFFER * bb)
+{
+  g_assert (nvenc_api.nvEncCreateBitstreamBuffer != NULL);
+  return nvenc_api.nvEncCreateBitstreamBuffer (encoder, bb);
+}
+
+NVENCSTATUS
+NvEncLockBitstream (void *encoder, NV_ENC_LOCK_BITSTREAM * lock_bs)
+{
+  g_assert (nvenc_api.nvEncLockBitstream != NULL);
+  return nvenc_api.nvEncLockBitstream (encoder, lock_bs);
+}
+
+NVENCSTATUS
+NvEncUnlockBitstream (void *encoder, NV_ENC_OUTPUT_PTR bb)
+{
+  g_assert (nvenc_api.nvEncUnlockBitstream != NULL);
+  return nvenc_api.nvEncUnlockBitstream (encoder, bb);
+}
+
+NVENCSTATUS
+NvEncDestroyBitstreamBuffer (void *encoder, NV_ENC_OUTPUT_PTR bit_buf)
+{
+  g_assert (nvenc_api.nvEncDestroyBitstreamBuffer != NULL);
+  return nvenc_api.nvEncDestroyBitstreamBuffer (encoder, bit_buf);
+}
+
+NVENCSTATUS
+NvEncEncodePicture (void *encoder, NV_ENC_PIC_PARAMS * pic_params)
+{
+  g_assert (nvenc_api.nvEncEncodePicture != NULL);
+  return nvenc_api.nvEncEncodePicture (encoder, pic_params);
+}
+
+gboolean
+gst_nvenc_cmp_guid (GUID g1, GUID g2)
+{
+  return (g1.Data1 == g2.Data1 && g1.Data2 == g2.Data2 && g1.Data3 == g2.Data3
+      && g1.Data4[0] == g2.Data4[0] && g1.Data4[1] == g2.Data4[1]
+      && g1.Data4[2] == g2.Data4[2] && g1.Data4[3] == g2.Data4[3]
+      && g1.Data4[4] == g2.Data4[4] && g1.Data4[5] == g2.Data4[5]
+      && g1.Data4[6] == g2.Data4[6] && g1.Data4[7] == g2.Data4[7]);
+}
+
+NV_ENC_BUFFER_FORMAT
+gst_nvenc_get_nv_buffer_format (GstVideoFormat fmt)
+{
+  switch (fmt) {
+    case GST_VIDEO_FORMAT_NV12:
+      return NV_ENC_BUFFER_FORMAT_NV12_PL;
+    case GST_VIDEO_FORMAT_YV12:
+      return NV_ENC_BUFFER_FORMAT_YV12_PL;
+    case GST_VIDEO_FORMAT_I420:
+      return NV_ENC_BUFFER_FORMAT_IYUV_PL;
+    case GST_VIDEO_FORMAT_Y444:
+      return NV_ENC_BUFFER_FORMAT_YUV444_PL;
+    default:
+      break;
+  }
+  return NV_ENC_BUFFER_FORMAT_UNDEFINED;
+}
+
+CUcontext
+gst_nvenc_create_cuda_context (guint device_id)
+{
+  CUcontext cuda_ctx, old_ctx;
+  CUresult cres = CUDA_SUCCESS;
+  CUdevice cdev = 0, cuda_dev = -1;
+  int dev_count = 0;
+  char name[256];
+  int min = 0, maj = 0;
+  int i;
+
+  GST_INFO ("Initialising CUDA..");
+
+  cres = cuInit (0);
+
+  if (cres != CUDA_SUCCESS) {
+    GST_WARNING ("Failed to initialise CUDA, error code: 0x%08x", cres);
+    return NULL;
+  }
+
+  GST_INFO ("Initialised CUDA");
+
+  cres = cuDeviceGetCount (&dev_count);
+  if (cres != CUDA_SUCCESS || dev_count == 0) {
+    GST_WARNING ("No CUDA devices detected");
+    return NULL;
+  }
+
+  GST_INFO ("%d CUDA device(s) detected", dev_count);
+  for (i = 0; i < dev_count; ++i) {
+    if (cuDeviceGet (&cdev, i) == CUDA_SUCCESS
+        && cuDeviceGetName (name, sizeof (name), cdev) == CUDA_SUCCESS
+        && cuDeviceComputeCapability (&maj, &min, cdev) == CUDA_SUCCESS) {
+      GST_INFO ("GPU #%d supports NVENC: %s (%s) (Compute SM %d.%d)",
+          i, (((maj << 4) + min) >= 0x30) ? "yes" : "no", name, maj, min);
+      if (i == device_id) {
+        cuda_dev = cdev;
+      }
+    }
+  }
+
+  if (cuda_dev == -1) {
+    GST_WARNING ("Device with id %d does not exist or does not support NVENC",
+        device_id);
+    return NULL;
+  }
+
+  if (cuCtxCreate (&cuda_ctx, 0, cuda_dev) != CUDA_SUCCESS) {
+    GST_WARNING ("Failed to create CUDA context for cuda device %d", cuda_dev);
+    return NULL;
+  }
+
+  if (cuCtxPopCurrent (&old_ctx) != CUDA_SUCCESS) {
+    return NULL;
+  }
+
+  GST_INFO ("Created CUDA context %p", cuda_ctx);
+
+  return cuda_ctx;
+}
+
+gboolean
+gst_nvenc_destroy_cuda_context (CUcontext ctx)
+{
+  GST_INFO ("Destroying CUDA context %p", ctx);
+  return (cuCtxDestroy (ctx) == CUDA_SUCCESS);
+}
+
+static gboolean
+plugin_init (GstPlugin * plugin)
+{
+  GST_DEBUG_CATEGORY_INIT (gst_nvenc_debug, "nvenc", 0, "Nvidia NVENC encoder");
+
+  nvenc_api.version = NV_ENCODE_API_FUNCTION_LIST_VER;
+  if (NvEncodeAPICreateInstance (&nvenc_api) != NV_ENC_SUCCESS) {
+    GST_ERROR ("Failed to get NVEncodeAPI function table!");
+  } else {
+    GST_INFO ("Created NVEncodeAPI instance, got function table");
+
+    gst_element_register (plugin, "nvh264enc", GST_RANK_PRIMARY * 2,
+        gst_nv_h264_enc_get_type ());
+  }
+
+  return TRUE;
+}
+
+GST_PLUGIN_DEFINE (GST_VERSION_MAJOR,
+    GST_VERSION_MINOR,
+    nvenc,
+    "GStreamer NVENC plugin",
+    plugin_init, VERSION, "LGPL", GST_PACKAGE_NAME, GST_PACKAGE_ORIGIN)
diff --git a/sys/nvenc/gstnvenc.h b/sys/nvenc/gstnvenc.h
new file mode 100644
index 0000000000..f4eb34b6b6
--- /dev/null
+++ b/sys/nvenc/gstnvenc.h
@@ -0,0 +1,40 @@
+/* GStreamer NVENC plugin
+ * Copyright (C) 2015 Centricular Ltd
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifndef __GST_NVENC_H_INCLUDED__
+#define __GST_NVENC_H_INCLUDED__
+
+#include <gst/gst.h>
+#include <gst/video/video.h>
+
+#include <nvEncodeAPI.h>
+#include <cuda.h>
+
+GST_DEBUG_CATEGORY_EXTERN (gst_nvenc_debug);
+#define GST_CAT_DEFAULT gst_nvenc_debug
+
+CUcontext               gst_nvenc_create_cuda_context (guint device_id);
+
+gboolean                gst_nvenc_destroy_cuda_context (CUcontext ctx);
+
+gboolean                gst_nvenc_cmp_guid (GUID g1, GUID g2);
+
+NV_ENC_BUFFER_FORMAT    gst_nvenc_get_nv_buffer_format (GstVideoFormat fmt);
+
+#endif /* __GST_NVENC_H_INCLUDED__ */
diff --git a/sys/nvenc/gstnvh264enc.c b/sys/nvenc/gstnvh264enc.c
new file mode 100644
index 0000000000..170e3f7024
--- /dev/null
+++ b/sys/nvenc/gstnvh264enc.c
@@ -0,0 +1,610 @@
+/* GStreamer NVENC plugin
+ * Copyright (C) 2015 Centricular Ltd
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "gstnvh264enc.h"
+
+#include <gst/pbutils/codec-utils.h>
+
+#include <string.h>
+
+#if HAVE_GST_GL
+#include <cuda.h>
+#include <cuda_runtime_api.h>
+#include <cuda_gl_interop.h>
+#define GST_USE_UNSTABLE_API
+#include <gst/gl/gl.h>
+#endif
+
+#define parent_class gst_nv_h264_enc_parent_class
+G_DEFINE_TYPE (GstNvH264Enc, gst_nv_h264_enc, GST_TYPE_NV_BASE_ENC);
+
+/* *INDENT-OFF* */
+static GstStaticPadTemplate src_factory = GST_STATIC_PAD_TEMPLATE ("src",
+    GST_PAD_SRC,
+    GST_PAD_ALWAYS,
+    GST_STATIC_CAPS ("video/x-h264, "
+        "width = (int) [ 1, 4096 ], height = (int) [ 1, 2160 ], "
+        "framerate = (fraction) [0/1, MAX], "
+        "stream-format = (string) byte-stream, " // TODO: avc support
+        "alignment = (string) au, "
+        "profile = (string) { high, main, baseline }") // TODO: a couple of others
+    );
+/* *INDENT-ON* */
+
+static gboolean gst_nv_h264_enc_open (GstVideoEncoder * enc);
+static gboolean gst_nv_h264_enc_close (GstVideoEncoder * enc);
+static GstCaps *gst_nv_h264_enc_getcaps (GstVideoEncoder * enc,
+    GstCaps * filter);
+static gboolean gst_nv_h264_enc_set_src_caps (GstNvBaseEnc * nvenc,
+    GstVideoCodecState * state);
+static gboolean gst_nv_h264_enc_initialize_encoder (GstNvBaseEnc * nvenc,
+    GstVideoCodecState * old_state, GstVideoCodecState * state);
+static gboolean gst_nv_h264_enc_set_pic_params (GstNvBaseEnc * nvenc,
+    GstVideoCodecFrame * frame, NV_ENC_PIC_PARAMS * pic_params);
+static void gst_nv_h264_enc_set_property (GObject * object, guint prop_id,
+    const GValue * value, GParamSpec * pspec);
+static void gst_nv_h264_enc_get_property (GObject * object, guint prop_id,
+    GValue * value, GParamSpec * pspec);
+static void gst_nv_h264_enc_finalize (GObject * obj);
+
+static void
+gst_nv_h264_enc_class_init (GstNvH264EncClass * klass)
+{
+  GObjectClass *gobject_class = G_OBJECT_CLASS (klass);
+  GstElementClass *element_class = GST_ELEMENT_CLASS (klass);
+  GstVideoEncoderClass *videoenc_class = GST_VIDEO_ENCODER_CLASS (klass);
+  GstNvBaseEncClass *nvenc_class = GST_NV_BASE_ENC_CLASS (klass);
+
+  gobject_class->set_property = gst_nv_h264_enc_set_property;
+  gobject_class->get_property = gst_nv_h264_enc_get_property;
+  gobject_class->finalize = gst_nv_h264_enc_finalize;
+
+  videoenc_class->open = GST_DEBUG_FUNCPTR (gst_nv_h264_enc_open);
+  videoenc_class->close = GST_DEBUG_FUNCPTR (gst_nv_h264_enc_close);
+
+  videoenc_class->getcaps = GST_DEBUG_FUNCPTR (gst_nv_h264_enc_getcaps);
+
+  nvenc_class->codec_id = NV_ENC_CODEC_H264_GUID;
+  nvenc_class->initialize_encoder = gst_nv_h264_enc_initialize_encoder;
+  nvenc_class->set_src_caps = gst_nv_h264_enc_set_src_caps;
+  nvenc_class->set_pic_params = gst_nv_h264_enc_set_pic_params;
+
+  gst_element_class_add_pad_template (element_class,
+      gst_static_pad_template_get (&src_factory));
+
+  gst_element_class_set_static_metadata (element_class,
+      "NVENC H.264 Video Encoder",
+      "Codec/Encoder/Video",
+      "Encode H.264 video streams using NVIDIA's hardware-accelerated NVENC encoder API",
+      "Tim-Philipp Müller <tim@centricular.com>\n"
+      "Matthew Waters <matthew@centricular.com>");
+}
+
+static void
+gst_nv_h264_enc_init (GstNvH264Enc * nvenc)
+{
+}
+
+static void
+gst_nv_h264_enc_finalize (GObject * obj)
+{
+  G_OBJECT_CLASS (gst_nv_h264_enc_parent_class)->finalize (obj);
+}
+
+static gboolean
+_get_supported_profiles (GstNvH264Enc * nvenc)
+{
+  NVENCSTATUS nv_ret;
+  GUID profile_guids[64];
+  GValue list = G_VALUE_INIT;
+  GValue val = G_VALUE_INIT;
+  guint i, n, n_profiles;
+
+  nv_ret =
+      NvEncGetEncodeProfileGUIDCount (GST_NV_BASE_ENC (nvenc)->encoder,
+      NV_ENC_CODEC_H264_GUID, &n);
+  if (nv_ret != NV_ENC_SUCCESS)
+    return FALSE;
+
+  nv_ret =
+      NvEncGetEncodeProfileGUIDs (GST_NV_BASE_ENC (nvenc)->encoder,
+      NV_ENC_CODEC_H264_GUID, profile_guids, G_N_ELEMENTS (profile_guids), &n);
+  if (nv_ret != NV_ENC_SUCCESS)
+    return FALSE;
+
+  n_profiles = 0;
+  g_value_init (&list, GST_TYPE_LIST);
+  for (i = 0; i < n; i++) {
+    g_value_init (&val, G_TYPE_STRING);
+
+    if (gst_nvenc_cmp_guid (profile_guids[i],
+            NV_ENC_H264_PROFILE_BASELINE_GUID)) {
+      g_value_set_static_string (&val, "baseline");
+      gst_value_list_append_value (&list, &val);
+      n_profiles++;
+    } else if (gst_nvenc_cmp_guid (profile_guids[i],
+            NV_ENC_H264_PROFILE_MAIN_GUID)) {
+      g_value_set_static_string (&val, "main");
+      gst_value_list_append_value (&list, &val);
+      n_profiles++;
+    } else if (gst_nvenc_cmp_guid (profile_guids[i],
+            NV_ENC_H264_PROFILE_HIGH_GUID)) {
+      g_value_set_static_string (&val, "high");
+      gst_value_list_append_value (&list, &val);
+      n_profiles++;
+    }
+    /* TODO: map HIGH_444, STEREO, CONSTRAINED_HIGH, SVC_TEMPORAL_SCALABILITY */
+
+    g_value_unset (&val);
+  }
+
+  if (n_profiles == 0)
+    return FALSE;
+
+  GST_OBJECT_LOCK (nvenc);
+  g_free (nvenc->supported_profiles);
+  nvenc->supported_profiles = g_memdup (&list, sizeof (GValue));
+  GST_OBJECT_UNLOCK (nvenc);
+
+  return TRUE;
+}
+
+static gboolean
+gst_nv_h264_enc_open (GstVideoEncoder * enc)
+{
+  GstNvH264Enc *nvenc = GST_NV_H264_ENC (enc);
+
+  if (!GST_VIDEO_ENCODER_CLASS (gst_nv_h264_enc_parent_class)->open (enc))
+    return FALSE;
+
+  /* Check if H.264 is supported */
+  {
+    uint32_t i, num = 0;
+    GUID guids[16];
+
+    NvEncGetEncodeGUIDs (GST_NV_BASE_ENC (nvenc)->encoder, guids,
+        G_N_ELEMENTS (guids), &num);
+
+    for (i = 0; i < num; ++i) {
+      if (gst_nvenc_cmp_guid (guids[i], NV_ENC_CODEC_H264_GUID))
+        break;
+    }
+    GST_INFO_OBJECT (enc, "H.264 encoding %ssupported", (i == num) ? "un" : "");
+    if (i == num) {
+      gst_nv_h264_enc_close (enc);
+      return FALSE;
+    }
+  }
+
+  /* query supported input formats */
+  if (!_get_supported_profiles (nvenc)) {
+    GST_WARNING_OBJECT (nvenc, "No supported encoding profiles");
+    gst_nv_h264_enc_close (enc);
+    return FALSE;
+  }
+
+  return TRUE;
+}
+
+static gboolean
+gst_nv_h264_enc_close (GstVideoEncoder * enc)
+{
+  GstNvH264Enc *nvenc = GST_NV_H264_ENC (enc);
+
+  GST_OBJECT_LOCK (nvenc);
+  g_free (nvenc->supported_profiles);
+  nvenc->supported_profiles = NULL;
+  GST_OBJECT_UNLOCK (nvenc);
+
+  return GST_VIDEO_ENCODER_CLASS (gst_nv_h264_enc_parent_class)->close (enc);
+}
+
+static GValue *
+_get_interlace_modes (GstNvH264Enc * nvenc)
+{
+  NV_ENC_CAPS_PARAM caps_param = { 0, };
+  GValue *list = g_new0 (GValue, 1);
+  GValue val = G_VALUE_INIT;
+
+  g_value_init (list, GST_TYPE_LIST);
+  g_value_init (&val, G_TYPE_STRING);
+
+  g_value_set_static_string (&val, "progressive");
+  gst_value_list_append_value (list, &val);
+
+  caps_param.version = NV_ENC_CAPS_PARAM_VER;
+  caps_param.capsToQuery = NV_ENC_CAPS_SUPPORT_FIELD_ENCODING;
+
+  if (NvEncGetEncodeCaps (GST_NV_BASE_ENC (nvenc)->encoder,
+          NV_ENC_CODEC_H264_GUID, &caps_param,
+          &nvenc->interlace_modes) != NV_ENC_SUCCESS)
+    nvenc->interlace_modes = 0;
+
+  if (nvenc->interlace_modes >= 1) {
+    g_value_set_static_string (&val, "interleaved");
+    gst_value_list_append_value (list, &val);
+    g_value_set_static_string (&val, "mixed");
+    gst_value_list_append_value (list, &val);
+  }
+  /* TODO: figure out what nvenc frame based interlacing means in gst terms */
+
+  return list;
+}
+
+static GstCaps *
+gst_nv_h264_enc_getcaps (GstVideoEncoder * enc, GstCaps * filter)
+{
+  GstNvH264Enc *nvenc = GST_NV_H264_ENC (enc);
+  GstCaps *supported_incaps = NULL;
+  GstCaps *template_caps, *caps;
+  GValue *input_formats = GST_NV_BASE_ENC (enc)->input_formats;
+
+  GST_OBJECT_LOCK (nvenc);
+
+  if (input_formats != NULL) {
+    GValue *val;
+
+    template_caps = gst_pad_get_pad_template_caps (enc->sinkpad);
+    supported_incaps = gst_caps_copy (template_caps);
+    gst_caps_set_value (supported_incaps, "format", input_formats);
+
+    val = _get_interlace_modes (nvenc);
+    gst_caps_set_value (supported_incaps, "interlace-mode", val);
+    g_free (val);
+
+    GST_LOG_OBJECT (enc, "codec input caps %" GST_PTR_FORMAT, supported_incaps);
+    GST_LOG_OBJECT (enc, "   template caps %" GST_PTR_FORMAT, template_caps);
+    caps = gst_caps_intersect (template_caps, supported_incaps);
+    gst_caps_unref (template_caps);
+    gst_caps_unref (supported_incaps);
+    supported_incaps = caps;
+    GST_LOG_OBJECT (enc, "  supported caps %" GST_PTR_FORMAT, supported_incaps);
+  }
+
+  GST_OBJECT_UNLOCK (nvenc);
+
+  caps = gst_video_encoder_proxy_getcaps (enc, supported_incaps, filter);
+
+  if (supported_incaps)
+    gst_caps_unref (supported_incaps);
+
+  GST_DEBUG_OBJECT (nvenc, "  returning caps %" GST_PTR_FORMAT, caps);
+
+  return caps;
+}
+
+static gboolean
+gst_nv_h264_enc_set_profile_and_level (GstNvH264Enc * nvenc, GstCaps * caps)
+{
+#define N_BYTES_SPS 128
+  guint8 sps[N_BYTES_SPS];
+  NV_ENC_SEQUENCE_PARAM_PAYLOAD spp = { 0, };
+  GstStructure *s;
+  const gchar *profile;
+  GstCaps *allowed_caps;
+  GstStructure *s2;
+  const gchar *allowed_profile;
+  NVENCSTATUS nv_ret;
+  guint32 seq_size;
+
+  spp.version = NV_ENC_SEQUENCE_PARAM_PAYLOAD_VER;
+  spp.inBufferSize = N_BYTES_SPS;
+  spp.spsId = 0;
+  spp.ppsId = 0;
+  spp.spsppsBuffer = &sps;
+  spp.outSPSPPSPayloadSize = &seq_size;
+  nv_ret = NvEncGetSequenceParams (GST_NV_BASE_ENC (nvenc)->encoder, &spp);
+  if (nv_ret != NV_ENC_SUCCESS) {
+    GST_ELEMENT_ERROR (nvenc, STREAM, ENCODE, ("Encode header failed."),
+        ("NvEncGetSequenceParams return code=%d", nv_ret));
+    return FALSE;
+  }
+
+  if (seq_size < 8) {
+    GST_ELEMENT_ERROR (nvenc, STREAM, ENCODE, ("Encode header failed."),
+        ("NvEncGetSequenceParams returned incomplete data"));
+    return FALSE;
+  }
+
+  /* skip nal header and identifier */
+  gst_codec_utils_h264_caps_set_level_and_profile (caps, &sps[5], 3);
+
+  /* Constrained baseline is a strict subset of baseline. If downstream
+   * wanted baseline and we produced constrained baseline, we can just
+   * set the profile to baseline in the caps to make negotiation happy.
+   * Same goes for baseline as subset of main profile and main as a subset
+   * of high profile.
+   */
+  s = gst_caps_get_structure (caps, 0);
+  profile = gst_structure_get_string (s, "profile");
+
+  allowed_caps = gst_pad_get_allowed_caps (GST_VIDEO_ENCODER_SRC_PAD (nvenc));
+
+  if (allowed_caps == NULL)
+    goto no_peer;
+
+  if (!gst_caps_can_intersect (allowed_caps, caps)) {
+    allowed_caps = gst_caps_make_writable (allowed_caps);
+    allowed_caps = gst_caps_truncate (allowed_caps);
+    s2 = gst_caps_get_structure (allowed_caps, 0);
+    gst_structure_fixate_field_string (s2, "profile", profile);
+    allowed_profile = gst_structure_get_string (s2, "profile");
+    if (!strcmp (allowed_profile, "high")) {
+      if (!strcmp (profile, "constrained-baseline")
+          || !strcmp (profile, "baseline") || !strcmp (profile, "main")) {
+        gst_structure_set (s, "profile", G_TYPE_STRING, "high", NULL);
+        GST_INFO_OBJECT (nvenc, "downstream requested high profile, but "
+            "encoder will now output %s profile (which is a subset), due "
+            "to how it's been configured", profile);
+      }
+    } else if (!strcmp (allowed_profile, "main")) {
+      if (!strcmp (profile, "constrained-baseline")
+          || !strcmp (profile, "baseline")) {
+        gst_structure_set (s, "profile", G_TYPE_STRING, "main", NULL);
+        GST_INFO_OBJECT (nvenc, "downstream requested main profile, but "
+            "encoder will now output %s profile (which is a subset), due "
+            "to how it's been configured", profile);
+      }
+    } else if (!strcmp (allowed_profile, "baseline")) {
+      if (!strcmp (profile, "constrained-baseline"))
+        gst_structure_set (s, "profile", G_TYPE_STRING, "baseline", NULL);
+    }
+  }
+  gst_caps_unref (allowed_caps);
+
+no_peer:
+
+  return TRUE;
+
+#undef N_BYTES_SPS
+}
+
+static gboolean
+gst_nv_h264_enc_set_src_caps (GstNvBaseEnc * nvenc, GstVideoCodecState * state)
+{
+  GstNvH264Enc *h264enc = GST_NV_H264_ENC (nvenc);
+  GstVideoCodecState *out_state;
+  GstStructure *s;
+  GstCaps *out_caps;
+
+  out_caps = gst_caps_new_empty_simple ("video/x-h264");
+  s = gst_caps_get_structure (out_caps, 0);
+
+  /* TODO: add support for avc format as well */
+  gst_structure_set (s, "stream-format", G_TYPE_STRING, "byte-stream",
+      "alignment", G_TYPE_STRING, "au", NULL);
+
+  if (!gst_nv_h264_enc_set_profile_and_level (h264enc, out_caps)) {
+    gst_caps_unref (out_caps);
+    return FALSE;
+  }
+
+  out_state = gst_video_encoder_set_output_state (GST_VIDEO_ENCODER (nvenc),
+      out_caps, state);
+
+  GST_INFO_OBJECT (nvenc, "output caps: %" GST_PTR_FORMAT, out_state->caps);
+
+  /* encoder will keep it around for us */
+  gst_video_codec_state_unref (out_state);
+
+  /* TODO: would be nice to also send some tags with the codec name */
+  return TRUE;
+}
+
+static gboolean
+gst_nv_h264_enc_initialize_encoder (GstNvBaseEnc * nvenc,
+    GstVideoCodecState * old_state, GstVideoCodecState * state)
+{
+  GstNvH264Enc *h264enc = GST_NV_H264_ENC (nvenc);
+  NV_ENC_RECONFIGURE_PARAMS reconfigure_params = { 0, };
+  NV_ENC_INITIALIZE_PARAMS init_params = { 0, };
+  NV_ENC_INITIALIZE_PARAMS *params;
+  NV_ENC_PRESET_CONFIG preset_config = { 0, };
+  NVENCSTATUS nv_ret;
+  GstVideoInfo *info = &state->info;
+  GstCaps *allowed_caps, *template_caps;
+  GUID selected_profile = NV_ENC_CODEC_PROFILE_AUTOSELECT_GUID;
+  int level_idc = NV_ENC_LEVEL_AUTOSELECT;
+
+  /* TODO: support reconfiguration */
+  if (old_state) {
+    reconfigure_params.version = NV_ENC_RECONFIGURE_PARAMS_VER;
+    params = &reconfigure_params.reInitEncodeParams;
+  } else {
+    params = &init_params;
+  }
+
+  template_caps = gst_static_pad_template_get_caps (&src_factory);
+  allowed_caps = gst_pad_get_allowed_caps (GST_VIDEO_ENCODER_SRC_PAD (h264enc));
+
+  if (template_caps == allowed_caps) {
+    GST_INFO_OBJECT (h264enc, "downstream has ANY caps");
+  } else if (allowed_caps) {
+    GstStructure *s;
+    const gchar *profile;
+    const gchar *level;
+
+    if (gst_caps_is_empty (allowed_caps)) {
+      gst_caps_unref (allowed_caps);
+      gst_caps_unref (template_caps);
+      return FALSE;
+    }
+
+    allowed_caps = gst_caps_make_writable (allowed_caps);
+    allowed_caps = gst_caps_fixate (allowed_caps);
+    s = gst_caps_get_structure (allowed_caps, 0);
+
+    profile = gst_structure_get_string (s, "profile");
+    if (profile) {
+      if (!strcmp (profile, "baseline")) {
+        selected_profile = NV_ENC_H264_PROFILE_BASELINE_GUID;
+      } else if (g_str_has_prefix (profile, "high-4:4:4")) {
+        selected_profile = NV_ENC_H264_PROFILE_HIGH_444_GUID;
+      } else if (g_str_has_prefix (profile, "high-10")) {
+        g_assert_not_reached ();
+      } else if (g_str_has_prefix (profile, "high-4:2:2")) {
+        g_assert_not_reached ();
+      } else if (g_str_has_prefix (profile, "high")) {
+        selected_profile = NV_ENC_H264_PROFILE_HIGH_GUID;
+      } else if (g_str_has_prefix (profile, "main")) {
+        selected_profile = NV_ENC_H264_PROFILE_MAIN_GUID;
+      } else {
+        g_assert_not_reached ();
+      }
+    }
+
+    level = gst_structure_get_string (s, "level");
+    if (level)
+      /* matches values stored in NV_ENC_LEVEL */
+      level_idc = gst_codec_utils_h264_get_level_idc (level);
+
+    gst_caps_unref (allowed_caps);
+  }
+  gst_caps_unref (template_caps);
+
+  params->version = NV_ENC_INITIALIZE_PARAMS_VER;
+  params->encodeGUID = NV_ENC_CODEC_H264_GUID;
+  params->encodeWidth = GST_VIDEO_INFO_WIDTH (info);
+  params->encodeHeight = GST_VIDEO_INFO_HEIGHT (info);
+  /* FIXME: make this a property */
+  params->presetGUID = NV_ENC_PRESET_HP_GUID;   // _DEFAULT
+  params->enablePTD = 1;
+  if (!old_state) {
+    /* this sets the required buffer size and the maximum allowed size on
+     * subsequent reconfigures */
+    /* FIXME: propertise this */
+    params->maxEncodeWidth = GST_VIDEO_INFO_WIDTH (info);
+    params->maxEncodeHeight = GST_VIDEO_INFO_HEIGHT (info);
+    gst_nv_base_enc_set_max_encode_size (GST_NV_BASE_ENC (h264enc),
+        params->maxEncodeWidth, params->maxEncodeHeight);
+  } else {
+    guint max_width, max_height;
+
+    gst_nv_base_enc_get_max_encode_size (GST_NV_BASE_ENC (h264enc),
+        &max_width, &max_height);
+
+    if (GST_VIDEO_INFO_WIDTH (info) > max_width
+        || GST_VIDEO_INFO_HEIGHT (info) > max_height) {
+      GST_ELEMENT_ERROR (h264enc, STREAM, FORMAT, ("%s", "Requested stream "
+              "size is larger than the maximum configured size"), (NULL));
+      return FALSE;
+    }
+  }
+
+  preset_config.version = NV_ENC_PRESET_CONFIG_VER;
+  preset_config.presetCfg.version = NV_ENC_CONFIG_VER;
+
+  nv_ret =
+      NvEncGetEncodePresetConfig (GST_NV_BASE_ENC (h264enc)->encoder,
+      params->encodeGUID, params->presetGUID, &preset_config);
+  if (nv_ret != NV_ENC_SUCCESS) {
+    GST_ELEMENT_ERROR (h264enc, LIBRARY, SETTINGS, (NULL),
+        ("Failed to get encode preset configuration: %d", nv_ret));
+    return FALSE;
+  }
+  params->encodeConfig = &preset_config.presetCfg;
+
+  /* override some defaults */
+  GST_LOG_OBJECT (h264enc, "setting parameters");
+  preset_config.presetCfg.version = NV_ENC_CONFIG_VER;
+  preset_config.presetCfg.profileGUID = selected_profile;
+  preset_config.presetCfg.encodeCodecConfig.h264Config.level = level_idc;
+  preset_config.presetCfg.encodeCodecConfig.h264Config.chromaFormatIDC = 1;
+  if (GST_VIDEO_INFO_FORMAT (info) == GST_VIDEO_FORMAT_Y444) {
+    GST_DEBUG_OBJECT (h264enc, "have Y444 input, setting config accordingly");
+    preset_config.presetCfg.encodeCodecConfig.
+        h264Config.separateColourPlaneFlag = 1;
+    preset_config.presetCfg.encodeCodecConfig.h264Config.chromaFormatIDC = 3;
+  }
+
+  /* FIXME: make property */
+  preset_config.presetCfg.encodeCodecConfig.h264Config.outputAUD = 1;
+
+  if (GST_VIDEO_INFO_IS_INTERLACED (info)) {
+    if (GST_VIDEO_INFO_INTERLACE_MODE (info) ==
+        GST_VIDEO_INTERLACE_MODE_INTERLEAVED
+        || GST_VIDEO_INFO_INTERLACE_MODE (info) ==
+        GST_VIDEO_INTERLACE_MODE_MIXED) {
+      preset_config.presetCfg.frameFieldMode =
+          NV_ENC_PARAMS_FRAME_FIELD_MODE_FIELD;
+    }
+  }
+
+  if (info->fps_d > 0 && info->fps_n > 0) {
+    params->frameRateNum = info->fps_n;
+    params->frameRateDen = info->fps_d;
+  } else {
+    GST_FIXME_OBJECT (h264enc, "variable framerate");
+  }
+
+  if (old_state) {
+    nv_ret =
+        NvEncReconfigureEncoder (GST_NV_BASE_ENC (h264enc)->encoder,
+        &reconfigure_params);
+  } else {
+    nv_ret =
+        NvEncInitializeEncoder (GST_NV_BASE_ENC (h264enc)->encoder, params);
+  }
+
+  if (nv_ret != NV_ENC_SUCCESS) {
+    GST_ELEMENT_ERROR (h264enc, LIBRARY, SETTINGS, (NULL),
+        ("Failed to %sinit encoder: %d", old_state ? "re" : "", nv_ret));
+    return FALSE;
+  }
+  GST_INFO_OBJECT (h264enc, "configured encoder");
+
+  return TRUE;
+}
+
+static gboolean
+gst_nv_h264_enc_set_pic_params (GstNvBaseEnc * enc, GstVideoCodecFrame * frame,
+    NV_ENC_PIC_PARAMS * pic_params)
+{
+  /* encode whole picture in one single slice */
+  pic_params->codecPicParams.h264PicParams.sliceMode = 0;
+  pic_params->codecPicParams.h264PicParams.sliceModeData = 0;
+
+  return TRUE;
+}
+
+static void
+gst_nv_h264_enc_set_property (GObject * object, guint prop_id,
+    const GValue * value, GParamSpec * pspec)
+{
+  switch (prop_id) {
+    default:
+      G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
+      break;
+  }
+}
+
+static void
+gst_nv_h264_enc_get_property (GObject * object, guint prop_id, GValue * value,
+    GParamSpec * pspec)
+{
+  switch (prop_id) {
+    default:
+      G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
+      break;
+  }
+}
diff --git a/sys/nvenc/gstnvh264enc.h b/sys/nvenc/gstnvh264enc.h
new file mode 100644
index 0000000000..8ed65748c2
--- /dev/null
+++ b/sys/nvenc/gstnvh264enc.h
@@ -0,0 +1,59 @@
+/* GStreamer NVENC plugin
+ * Copyright (C) 2015 Centricular Ltd
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifndef __GST_NV_H264_ENC_H_INCLUDED__
+#define __GST_NV_H264_ENC_H_INCLUDED__
+
+#include "gstnvbaseenc.h"
+
+#define GST_TYPE_NV_H264_ENC \
+  (gst_nv_h264_enc_get_type())
+#define GST_NV_H264_ENC(obj) \
+  (G_TYPE_CHECK_INSTANCE_CAST((obj),GST_TYPE_NV_H264_ENC,GstNvH264Enc))
+#define GST_NV_H264_ENC_CLASS(klass) \
+  (G_TYPE_CHECK_CLASS_CAST((klass),GST_TYPE_NV_H264_ENC,GstNvH264EncClass))
+#define GST_NV_H264_ENC_GET_CLASS(obj) \
+  (G_TYPE_INSTANCE_GET_CLASS((obj),GST_TYPE_NV_H264_ENC,GstNvH264EncClass))
+#define GST_IS_NV_H264_ENC(obj) \
+  (G_TYPE_CHECK_INSTANCE_TYPE((obj),GST_TYPE_NV_H264_ENC))
+#define GST_IS_NV_H264_ENC_CLASS(obj) \
+  (G_TYPE_CHECK_CLASS_TYPE((klass),GST_TYPE_NV_H264_ENC))
+
+typedef struct {
+  GstNvBaseEnc base_nvenc;
+
+  /* the supported input formats */
+  GValue        * supported_profiles;             /* OBJECT LOCK */
+
+  GstVideoCodecState *input_state;
+  gboolean            gl_input;
+
+  /* supported interlacing input modes.
+   * 0 = none, 1 = fields, 2 = interleaved */
+  gint            interlace_modes;
+} GstNvH264Enc;
+
+typedef struct {
+  GstNvBaseEncClass video_encoder_class;
+} GstNvH264EncClass;
+
+G_GNUC_INTERNAL
+GType gst_nv_h264_enc_get_type (void);
+
+#endif /* __GST_NV_H264_ENC_H_INCLUDED__ */