/* GStreamer NVENC plugin * Copyright (C) 2015 Centricular Ltd * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Library General Public License for more details. * * You should have received a copy of the GNU Library General Public * License along with this library; if not, write to the * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, * Boston, MA 02110-1301, USA. */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include "gstnvbaseenc.h" #include "gstcudautils.h" #include #include #define GST_CAT_DEFAULT gst_nvenc_debug #if HAVE_NVCODEC_GST_GL #include #endif /* TODO: * - reset last_flow on FLUSH_STOP (seeking) */ /* This currently supports both 5.x and 6.x versions of the NvEncodeAPI.h * header which are mostly API compatible. */ #define SUPPORTED_GL_APIS GST_GL_API_OPENGL3 /* magic pointer value we can put in the async queue to signal shut down */ #define SHUTDOWN_COOKIE ((gpointer)GINT_TO_POINTER (1)) #define parent_class gst_nv_base_enc_parent_class G_DEFINE_ABSTRACT_TYPE (GstNvBaseEnc, gst_nv_base_enc, GST_TYPE_VIDEO_ENCODER); #define GST_TYPE_NV_PRESET (gst_nv_preset_get_type()) static GType gst_nv_preset_get_type (void) { static GType nv_preset_type = 0; static const GEnumValue presets[] = { {GST_NV_PRESET_DEFAULT, "Default", "default"}, {GST_NV_PRESET_HP, "High Performance", "hp"}, {GST_NV_PRESET_HQ, "High Quality", "hq"}, /* {GST_NV_PRESET_BD, "BD", "bd"}, */ {GST_NV_PRESET_LOW_LATENCY_DEFAULT, "Low Latency", "low-latency"}, {GST_NV_PRESET_LOW_LATENCY_HQ, "Low Latency, High Quality", "low-latency-hq"}, {GST_NV_PRESET_LOW_LATENCY_HP, "Low Latency, High Performance", "low-latency-hp"}, {GST_NV_PRESET_LOSSLESS_DEFAULT, "Lossless", "lossless"}, {GST_NV_PRESET_LOSSLESS_HP, "Lossless, High Performance", "lossless-hp"}, {0, NULL, NULL}, }; if (!nv_preset_type) { nv_preset_type = g_enum_register_static ("GstNvPreset", presets); } return nv_preset_type; } static GUID _nv_preset_to_guid (GstNvPreset preset) { GUID null = { 0, }; switch (preset) { #define CASE(gst,nv) case G_PASTE(GST_NV_PRESET_,gst): return G_PASTE(G_PASTE(NV_ENC_PRESET_,nv),_GUID) CASE (DEFAULT, DEFAULT); CASE (HP, HP); CASE (HQ, HQ); /* CASE (BD, BD);*/ CASE (LOW_LATENCY_DEFAULT, LOW_LATENCY_DEFAULT); CASE (LOW_LATENCY_HQ, LOW_LATENCY_HQ); CASE (LOW_LATENCY_HP, LOW_LATENCY_HQ); CASE (LOSSLESS_DEFAULT, LOSSLESS_DEFAULT); CASE (LOSSLESS_HP, LOSSLESS_HP); #undef CASE default: return null; } } #define GST_TYPE_NV_RC_MODE (gst_nv_rc_mode_get_type()) static GType gst_nv_rc_mode_get_type (void) { static GType nv_rc_mode_type = 0; static const GEnumValue modes[] = { {GST_NV_RC_MODE_DEFAULT, "Default", "default"}, {GST_NV_RC_MODE_CONSTQP, "Constant Quantization", "constqp"}, {GST_NV_RC_MODE_CBR, "Constant Bit Rate", "cbr"}, {GST_NV_RC_MODE_VBR, "Variable Bit Rate", "vbr"}, {GST_NV_RC_MODE_VBR_MINQP, "Variable Bit Rate " "(with minimum quantization parameter, DEPRECATED)", "vbr-minqp"}, {GST_NV_RC_MODE_CBR_LOWDELAY_HQ, "Low-Delay CBR, High Quality", "cbr-ld-hq"}, {GST_NV_RC_MODE_CBR_HQ, "CBR, High Quality (slower)", "cbr-hq"}, {GST_NV_RC_MODE_VBR_HQ, "VBR, High Quality (slower)", "vbr-hq"}, {0, NULL, NULL}, }; if (!nv_rc_mode_type) { nv_rc_mode_type = g_enum_register_static ("GstNvRCMode", modes); } return nv_rc_mode_type; } static NV_ENC_PARAMS_RC_MODE _rc_mode_to_nv (GstNvRCMode mode) { switch (mode) { case GST_NV_RC_MODE_DEFAULT: return NV_ENC_PARAMS_RC_VBR; #define CASE(gst,nv) case G_PASTE(GST_NV_RC_MODE_,gst): return G_PASTE(NV_ENC_PARAMS_RC_,nv) CASE (CONSTQP, CONSTQP); CASE (CBR, CBR); CASE (VBR, VBR); CASE (VBR_MINQP, VBR_MINQP); CASE (CBR_LOWDELAY_HQ, CBR_LOWDELAY_HQ); CASE (CBR_HQ, CBR_HQ); CASE (VBR_HQ, VBR_HQ); #undef CASE default: return NV_ENC_PARAMS_RC_VBR; } } enum { PROP_0, PROP_DEVICE_ID, PROP_PRESET, PROP_BITRATE, PROP_RC_MODE, PROP_QP_MIN, PROP_QP_MAX, PROP_QP_CONST, PROP_GOP_SIZE, PROP_MAX_BITRATE, PROP_SPATIAL_AQ, PROP_AQ_STRENGTH, PROP_NON_REF_P, PROP_ZEROLATENCY, PROP_STRICT_GOP, PROP_CONST_QUALITY, PROP_I_ADAPT, PROP_QP_MIN_I, PROP_QP_MIN_P, PROP_QP_MIN_B, PROP_QP_MAX_I, PROP_QP_MAX_P, PROP_QP_MAX_B, PROP_QP_CONST_I, PROP_QP_CONST_P, PROP_QP_CONST_B, }; #define DEFAULT_PRESET GST_NV_PRESET_DEFAULT #define DEFAULT_BITRATE 0 #define DEFAULT_RC_MODE GST_NV_RC_MODE_DEFAULT #define DEFAULT_QP_MIN -1 #define DEFAULT_QP_MAX -1 #define DEFAULT_QP_CONST -1 #define DEFAULT_GOP_SIZE 75 #define DEFAULT_MAX_BITRATE 0 #define DEFAULT_SPATIAL_AQ FALSE #define DEFAULT_AQ_STRENGTH 0 #define DEFAULT_NON_REF_P FALSE #define DEFAULT_ZEROLATENCY FALSE #define DEFAULT_STRICT_GOP FALSE #define DEFAULT_CONST_QUALITY 0 #define DEFAULT_I_ADAPT FALSE #define DEFAULT_QP_DETAIL -1 /* This lock is needed to prevent the situation where multiple encoders are * initialised at the same time which appears to cause excessive CPU usage over * some period of time. */ G_LOCK_DEFINE_STATIC (initialization_lock); typedef struct { /* Allocated CUDA device memory and registered to NVENC to be used as input * buffer regardless of the input memory type (OpenGL or System memory) */ CUdeviceptr cuda_pointer; /* The stride of allocated CUDA device memory (CuMemAllocPitch). * This might be different from the stride of GstVideoInfo */ gsize cuda_stride; /* Registered NVENC resource (cuda_pointer is used for this) */ NV_ENC_REGISTER_RESOURCE nv_resource; /* Mapped resource of nv_resource */ NV_ENC_MAP_INPUT_RESOURCE nv_mapped_resource; /* whether nv_mapped_resource was mapped via NvEncMapInputResource() * and therefore should unmap via NvEncUnmapInputResource or not */ gboolean mapped; } GstNvEncInputResource; /* The pair of GstNvEncInputResource () and NV_ENC_OUTPUT_PTR. * The number of input/output resource are always identical */ typedef struct { GstNvEncInputResource *in_buf; NV_ENC_OUTPUT_PTR out_buf; } GstNvEncFrameState; static gboolean gst_nv_base_enc_open (GstVideoEncoder * enc); static gboolean gst_nv_base_enc_close (GstVideoEncoder * enc); static gboolean gst_nv_base_enc_start (GstVideoEncoder * enc); static gboolean gst_nv_base_enc_stop (GstVideoEncoder * enc); static void gst_nv_base_enc_set_context (GstElement * element, GstContext * context); static gboolean gst_nv_base_enc_sink_query (GstVideoEncoder * enc, GstQuery * query); static gboolean gst_nv_base_enc_set_format (GstVideoEncoder * enc, GstVideoCodecState * state); static GstFlowReturn gst_nv_base_enc_handle_frame (GstVideoEncoder * enc, GstVideoCodecFrame * frame); static void gst_nv_base_enc_free_buffers (GstNvBaseEnc * nvenc); static GstFlowReturn gst_nv_base_enc_finish (GstVideoEncoder * enc); static void gst_nv_base_enc_set_property (GObject * object, guint prop_id, const GValue * value, GParamSpec * pspec); static void gst_nv_base_enc_get_property (GObject * object, guint prop_id, GValue * value, GParamSpec * pspec); static void gst_nv_base_enc_finalize (GObject * obj); static GstCaps *gst_nv_base_enc_getcaps (GstVideoEncoder * enc, GstCaps * filter); static gboolean gst_nv_base_enc_stop_bitstream_thread (GstNvBaseEnc * nvenc, gboolean force); static gboolean gst_nv_base_enc_drain_encoder (GstNvBaseEnc * nvenc); static void gst_nv_base_enc_class_init (GstNvBaseEncClass * klass) { GObjectClass *gobject_class = G_OBJECT_CLASS (klass); GstElementClass *element_class = GST_ELEMENT_CLASS (klass); GstVideoEncoderClass *videoenc_class = GST_VIDEO_ENCODER_CLASS (klass); gobject_class->set_property = gst_nv_base_enc_set_property; gobject_class->get_property = gst_nv_base_enc_get_property; gobject_class->finalize = gst_nv_base_enc_finalize; element_class->set_context = GST_DEBUG_FUNCPTR (gst_nv_base_enc_set_context); videoenc_class->open = GST_DEBUG_FUNCPTR (gst_nv_base_enc_open); videoenc_class->close = GST_DEBUG_FUNCPTR (gst_nv_base_enc_close); videoenc_class->start = GST_DEBUG_FUNCPTR (gst_nv_base_enc_start); videoenc_class->stop = GST_DEBUG_FUNCPTR (gst_nv_base_enc_stop); videoenc_class->set_format = GST_DEBUG_FUNCPTR (gst_nv_base_enc_set_format); videoenc_class->getcaps = GST_DEBUG_FUNCPTR (gst_nv_base_enc_getcaps); videoenc_class->handle_frame = GST_DEBUG_FUNCPTR (gst_nv_base_enc_handle_frame); videoenc_class->finish = GST_DEBUG_FUNCPTR (gst_nv_base_enc_finish); videoenc_class->sink_query = GST_DEBUG_FUNCPTR (gst_nv_base_enc_sink_query); g_object_class_install_property (gobject_class, PROP_DEVICE_ID, g_param_spec_uint ("cuda-device-id", "Cuda Device ID", "Get the GPU device to use for operations", 0, G_MAXUINT, 0, G_PARAM_READABLE | G_PARAM_STATIC_STRINGS)); g_object_class_install_property (gobject_class, PROP_PRESET, g_param_spec_enum ("preset", "Encoding Preset", "Encoding Preset", GST_TYPE_NV_PRESET, DEFAULT_PRESET, G_PARAM_READWRITE | GST_PARAM_MUTABLE_PLAYING | G_PARAM_STATIC_STRINGS)); g_object_class_install_property (gobject_class, PROP_RC_MODE, g_param_spec_enum ("rc-mode", "RC Mode", "Rate Control Mode", GST_TYPE_NV_RC_MODE, DEFAULT_RC_MODE, G_PARAM_READWRITE | GST_PARAM_MUTABLE_PLAYING | G_PARAM_STATIC_STRINGS)); g_object_class_install_property (gobject_class, PROP_QP_MIN, g_param_spec_int ("qp-min", "Minimum Quantizer", "Minimum quantizer (-1 = from NVENC preset)", -1, 51, DEFAULT_QP_MIN, G_PARAM_READWRITE | GST_PARAM_MUTABLE_PLAYING | G_PARAM_STATIC_STRINGS)); g_object_class_install_property (gobject_class, PROP_QP_MAX, g_param_spec_int ("qp-max", "Maximum Quantizer", "Maximum quantizer (-1 = from NVENC preset)", -1, 51, DEFAULT_QP_MAX, G_PARAM_READWRITE | GST_PARAM_MUTABLE_PLAYING | G_PARAM_STATIC_STRINGS)); g_object_class_install_property (gobject_class, PROP_QP_CONST, g_param_spec_int ("qp-const", "Constant Quantizer", "Constant quantizer (-1 = from NVENC preset)", -1, 51, DEFAULT_QP_CONST, G_PARAM_READWRITE | GST_PARAM_MUTABLE_PLAYING | G_PARAM_STATIC_STRINGS)); g_object_class_install_property (gobject_class, PROP_GOP_SIZE, g_param_spec_int ("gop-size", "GOP size", "Number of frames between intra frames (-1 = infinite)", -1, G_MAXINT, DEFAULT_GOP_SIZE, (GParamFlags) (G_PARAM_READWRITE | GST_PARAM_MUTABLE_PLAYING | G_PARAM_STATIC_STRINGS))); g_object_class_install_property (gobject_class, PROP_BITRATE, g_param_spec_uint ("bitrate", "Bitrate", "Bitrate in kbit/sec (0 = from NVENC preset)", 0, 2000 * 1024, DEFAULT_BITRATE, G_PARAM_READWRITE | GST_PARAM_MUTABLE_PLAYING | G_PARAM_STATIC_STRINGS)); g_object_class_install_property (gobject_class, PROP_MAX_BITRATE, g_param_spec_uint ("max-bitrate", "Max Bitrate", "Maximum Bitrate in kbit/sec (ignored for CBR mode)", 0, 2000 * 1024, DEFAULT_MAX_BITRATE, G_PARAM_READWRITE | GST_PARAM_MUTABLE_PLAYING | G_PARAM_STATIC_STRINGS)); g_object_class_install_property (gobject_class, PROP_SPATIAL_AQ, g_param_spec_boolean ("spatial-aq", "Spatial AQ", "Spatial Adaptive Quantization", DEFAULT_SPATIAL_AQ, G_PARAM_READWRITE | GST_PARAM_MUTABLE_PLAYING | G_PARAM_STATIC_STRINGS)); g_object_class_install_property (gobject_class, PROP_AQ_STRENGTH, g_param_spec_uint ("aq-strength", "AQ Strength", "Adaptive Quantization Strength when spatial-aq is enabled" " from 1 (low) to 15 (aggressive), (0 = autoselect)", 0, 15, DEFAULT_AQ_STRENGTH, G_PARAM_READWRITE | GST_PARAM_MUTABLE_PLAYING | G_PARAM_STATIC_STRINGS)); g_object_class_install_property (gobject_class, PROP_NON_REF_P, g_param_spec_boolean ("nonref-p", "Nonref P", "Automatic insertion of non-reference P-frames", DEFAULT_NON_REF_P, G_PARAM_READWRITE | GST_PARAM_MUTABLE_PLAYING | G_PARAM_STATIC_STRINGS)); g_object_class_install_property (gobject_class, PROP_ZEROLATENCY, g_param_spec_boolean ("zerolatency", "Zerolatency", "Zero latency operation (no reordering delay)", DEFAULT_ZEROLATENCY, G_PARAM_READWRITE | GST_PARAM_MUTABLE_PLAYING | G_PARAM_STATIC_STRINGS)); g_object_class_install_property (gobject_class, PROP_STRICT_GOP, g_param_spec_boolean ("strict-gop", "Strict GOP", "Minimize GOP-to-GOP rate fluctuations", DEFAULT_STRICT_GOP, G_PARAM_READWRITE | GST_PARAM_MUTABLE_PLAYING | G_PARAM_STATIC_STRINGS)); g_object_class_install_property (gobject_class, PROP_CONST_QUALITY, g_param_spec_double ("const-quality", "Constant Quality", "Target Constant Quality level for VBR mode (0 = automatic)", 0, 51, DEFAULT_CONST_QUALITY, G_PARAM_READWRITE | GST_PARAM_MUTABLE_PLAYING | G_PARAM_STATIC_STRINGS)); g_object_class_install_property (gobject_class, PROP_I_ADAPT, g_param_spec_boolean ("i-adapt", "I Adapt", "Enable adaptive I-frame insert when lookahead is enabled", DEFAULT_I_ADAPT, G_PARAM_READWRITE | GST_PARAM_MUTABLE_READY | G_PARAM_STATIC_STRINGS)); g_object_class_install_property (gobject_class, PROP_QP_MIN_I, g_param_spec_int ("qp-min-i", "QP Min I", "Minimum QP value for I frame, When >= 0, \"qp-min-p\" and " "\"qp-min-b\" should be also >= 0. Overwritten by \"qp-min\"" " (-1 = from NVENC preset)", -1, 51, DEFAULT_QP_DETAIL, G_PARAM_READWRITE | GST_PARAM_MUTABLE_PLAYING | G_PARAM_STATIC_STRINGS)); g_object_class_install_property (gobject_class, PROP_QP_MIN_P, g_param_spec_int ("qp-min-p", "QP Min P", "Minimum QP value for P frame, When >= 0, \"qp-min-i\" and " "\"qp-min-b\" should be also >= 0. Overwritten by \"qp-min\"" " (-1 = from NVENC preset)", -1, 51, DEFAULT_QP_DETAIL, G_PARAM_READWRITE | GST_PARAM_MUTABLE_PLAYING | G_PARAM_STATIC_STRINGS)); g_object_class_install_property (gobject_class, PROP_QP_MIN_B, g_param_spec_int ("qp-min-b", "QP Min B", "Minimum QP value for B frame, When >= 0, \"qp-min-i\" and " "\"qp-min-p\" should be also >= 0. Overwritten by \"qp-min\"" " (-1 = from NVENC preset)", -1, 51, DEFAULT_QP_DETAIL, G_PARAM_READWRITE | GST_PARAM_MUTABLE_PLAYING | G_PARAM_STATIC_STRINGS)); g_object_class_install_property (gobject_class, PROP_QP_MAX_I, g_param_spec_int ("qp-max-i", "QP Max I", "Maximum QP value for I frame, When >= 0, \"qp-max-p\" and " "\"qp-max-b\" should be also >= 0. Overwritten by \"qp-max\"" " (-1 = from NVENC preset)", -1, 51, DEFAULT_QP_DETAIL, G_PARAM_READWRITE | GST_PARAM_MUTABLE_PLAYING | G_PARAM_STATIC_STRINGS)); g_object_class_install_property (gobject_class, PROP_QP_MAX_P, g_param_spec_int ("qp-max-p", "QP Max P", "Maximum QP value for P frame, When >= 0, \"qp-max-i\" and " "\"qp-max-b\" should be also >= 0. Overwritten by \"qp-max\"" " (-1 = from NVENC preset)", -1, 51, DEFAULT_QP_DETAIL, G_PARAM_READWRITE | GST_PARAM_MUTABLE_PLAYING | G_PARAM_STATIC_STRINGS)); g_object_class_install_property (gobject_class, PROP_QP_MAX_B, g_param_spec_int ("qp-max-b", "QP Max B", "Maximum QP value for B frame, When >= 0, \"qp-max-i\" and " "\"qp-max-p\" should be also >= 0. Overwritten by \"qp-max\"" " (-1 = from NVENC preset)", -1, 51, DEFAULT_QP_DETAIL, G_PARAM_READWRITE | GST_PARAM_MUTABLE_PLAYING | G_PARAM_STATIC_STRINGS)); g_object_class_install_property (gobject_class, PROP_QP_CONST_I, g_param_spec_int ("qp-const-i", "QP Const I", "Constant QP value for I frame, When >= 0, \"qp-const-p\" and " "\"qp-const-b\" should be also >= 0. Overwritten by \"qp-const\"" " (-1 = from NVENC preset)", -1, 51, DEFAULT_QP_DETAIL, G_PARAM_READWRITE | GST_PARAM_MUTABLE_PLAYING | G_PARAM_STATIC_STRINGS)); g_object_class_install_property (gobject_class, PROP_QP_CONST_P, g_param_spec_int ("qp-const-p", "QP Const P", "Constant QP value for P frame, When >= 0, \"qp-const-i\" and " "\"qp-const-b\" should be also >= 0. Overwritten by \"qp-const\"" " (-1 = from NVENC preset)", -1, 51, DEFAULT_QP_DETAIL, G_PARAM_READWRITE | GST_PARAM_MUTABLE_PLAYING | G_PARAM_STATIC_STRINGS)); g_object_class_install_property (gobject_class, PROP_QP_CONST_B, g_param_spec_int ("qp-const-b", "QP Const B", "Constant QP value for B frame, When >= 0, \"qp-const-i\" and " "\"qp-const-p\" should be also >= 0. Overwritten by \"qp-const\"" " (-1 = from NVENC preset)", -1, 51, DEFAULT_QP_DETAIL, G_PARAM_READWRITE | GST_PARAM_MUTABLE_PLAYING | G_PARAM_STATIC_STRINGS)); } static gboolean gst_nv_base_enc_open_encode_session (GstNvBaseEnc * nvenc) { NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS params = { 0, }; NVENCSTATUS nv_ret; params.version = gst_nvenc_get_open_encode_session_ex_params_version (); params.apiVersion = gst_nvenc_get_api_version (); params.device = gst_cuda_context_get_handle (nvenc->cuda_ctx); params.deviceType = NV_ENC_DEVICE_TYPE_CUDA; nv_ret = NvEncOpenEncodeSessionEx (¶ms, &nvenc->encoder); return nv_ret == NV_ENC_SUCCESS; } static gboolean gst_nv_base_enc_open (GstVideoEncoder * enc) { GstNvBaseEnc *nvenc = GST_NV_BASE_ENC (enc); GstNvBaseEncClass *klass = GST_NV_BASE_ENC_GET_CLASS (enc); GValue *formats = NULL; CUresult cuda_ret; if (!gst_cuda_ensure_element_context (GST_ELEMENT_CAST (enc), klass->cuda_device_id, &nvenc->cuda_ctx)) { GST_ERROR_OBJECT (nvenc, "failed to create CUDA context"); return FALSE; } if (gst_cuda_context_push (nvenc->cuda_ctx)) { cuda_ret = CuStreamCreate (&nvenc->cuda_stream, CU_STREAM_DEFAULT); if (!gst_cuda_result (cuda_ret)) { GST_WARNING_OBJECT (nvenc, "Could not create cuda stream, will use default stream"); nvenc->cuda_stream = NULL; } gst_cuda_context_pop (NULL); } if (!gst_nv_base_enc_open_encode_session (nvenc)) { GST_ERROR ("Failed to create NVENC encoder session"); gst_clear_object (&nvenc->cuda_ctx); return FALSE; } GST_INFO ("created NVENC encoder %p", nvenc->encoder); /* query supported input formats */ if (!gst_nvenc_get_supported_input_formats (nvenc->encoder, klass->codec_id, &formats)) { GST_WARNING_OBJECT (nvenc, "No supported input formats"); gst_nv_base_enc_close (enc); return FALSE; } nvenc->input_formats = formats; return TRUE; } static void gst_nv_base_enc_set_context (GstElement * element, GstContext * context) { GstNvBaseEnc *nvenc = GST_NV_BASE_ENC (element); GstNvBaseEncClass *klass = GST_NV_BASE_ENC_GET_CLASS (nvenc); if (gst_cuda_handle_set_context (element, context, klass->cuda_device_id, &nvenc->cuda_ctx)) { goto done; } #if HAVE_NVCODEC_GST_GL gst_gl_handle_set_context (element, context, (GstGLDisplay **) & nvenc->display, (GstGLContext **) & nvenc->other_context); if (nvenc->display) gst_gl_display_filter_gl_api (GST_GL_DISPLAY (nvenc->display), SUPPORTED_GL_APIS); #endif done: GST_ELEMENT_CLASS (parent_class)->set_context (element, context); } static gboolean gst_nv_base_enc_sink_query (GstVideoEncoder * enc, GstQuery * query) { GstNvBaseEnc *nvenc = GST_NV_BASE_ENC (enc); switch (GST_QUERY_TYPE (query)) { case GST_QUERY_CONTEXT:{ if (gst_cuda_handle_context_query (GST_ELEMENT (nvenc), query, nvenc->cuda_ctx)) return TRUE; #if HAVE_NVCODEC_GST_GL { gboolean ret; ret = gst_gl_handle_context_query ((GstElement *) nvenc, query, (GstGLDisplay *) nvenc->display, NULL, (GstGLContext *) nvenc->other_context); if (nvenc->display) { gst_gl_display_filter_gl_api (GST_GL_DISPLAY (nvenc->display), SUPPORTED_GL_APIS); } if (ret) return ret; } #endif break; } default: break; } return GST_VIDEO_ENCODER_CLASS (parent_class)->sink_query (enc, query); } static gboolean gst_nv_base_enc_start (GstVideoEncoder * enc) { GstNvBaseEnc *nvenc = GST_NV_BASE_ENC (enc); nvenc->available_queue = g_async_queue_new (); nvenc->pending_queue = g_async_queue_new (); nvenc->bitstream_queue = g_async_queue_new (); nvenc->items = g_array_new (FALSE, TRUE, sizeof (GstNvEncFrameState)); nvenc->last_flow = GST_FLOW_OK; memset (&nvenc->init_params, 0, sizeof (NV_ENC_INITIALIZE_PARAMS)); memset (&nvenc->config, 0, sizeof (NV_ENC_CONFIG)); #if HAVE_NVCODEC_GST_GL { gst_gl_ensure_element_data (GST_ELEMENT (nvenc), (GstGLDisplay **) & nvenc->display, (GstGLContext **) & nvenc->other_context); if (nvenc->display) gst_gl_display_filter_gl_api (GST_GL_DISPLAY (nvenc->display), SUPPORTED_GL_APIS); } #endif /* DTS can be negative if bframe was enabled */ gst_video_encoder_set_min_pts (enc, GST_SECOND * 60 * 60 * 1000); return TRUE; } static gboolean gst_nv_base_enc_stop (GstVideoEncoder * enc) { GstNvBaseEnc *nvenc = GST_NV_BASE_ENC (enc); gst_nv_base_enc_stop_bitstream_thread (nvenc, TRUE); gst_nv_base_enc_free_buffers (nvenc); if (nvenc->input_state) { gst_video_codec_state_unref (nvenc->input_state); nvenc->input_state = NULL; } if (nvenc->available_queue) { g_async_queue_unref (nvenc->available_queue); nvenc->available_queue = NULL; } if (nvenc->pending_queue) { g_async_queue_unref (nvenc->pending_queue); nvenc->pending_queue = NULL; } if (nvenc->bitstream_queue) { g_async_queue_unref (nvenc->bitstream_queue); nvenc->bitstream_queue = NULL; } if (nvenc->display) { gst_object_unref (nvenc->display); nvenc->display = NULL; } if (nvenc->other_context) { gst_object_unref (nvenc->other_context); nvenc->other_context = NULL; } if (nvenc->items) { g_array_free (nvenc->items, TRUE); nvenc->items = NULL; } return TRUE; } static void check_formats (const gchar * str, guint * max_chroma, guint * max_bit_minus8) { if (!str) return; if (g_strrstr (str, "-444") || g_strrstr (str, "-4:4:4")) *max_chroma = 2; else if ((g_strrstr (str, "-4:2:2") || g_strrstr (str, "-422")) && *max_chroma < 1) *max_chroma = 1; if (g_strrstr (str, "-12")) *max_bit_minus8 = 4; else if (g_strrstr (str, "-10") && *max_bit_minus8 < 2) *max_bit_minus8 = 2; } static gboolean gst_nv_base_enc_set_filtered_input_formats (GstNvBaseEnc * nvenc, GstCaps * caps, const GValue * input_formats, guint max_chroma, guint max_bit_minus8) { gint i; GValue supported_format = G_VALUE_INIT; gint num_format = 0; const GValue *last_format = NULL; g_value_init (&supported_format, GST_TYPE_LIST); for (i = 0; i < gst_value_list_get_size (input_formats); i++) { const GValue *val; GstVideoFormat format; val = gst_value_list_get_value (input_formats, i); format = gst_video_format_from_string (g_value_get_string (val)); switch (format) { case GST_VIDEO_FORMAT_NV12: case GST_VIDEO_FORMAT_YV12: case GST_VIDEO_FORMAT_I420: /* 8bits 4:2:0 formats are always supported */ case GST_VIDEO_FORMAT_BGRA: case GST_VIDEO_FORMAT_RGBA: /* NOTE: RGB formats seems to also supported format, which are * encoded to 4:2:0 formats */ gst_value_list_append_value (&supported_format, val); last_format = val; num_format++; break; case GST_VIDEO_FORMAT_Y444: if (max_chroma >= 2) { gst_value_list_append_value (&supported_format, val); last_format = val; num_format++; } break; case GST_VIDEO_FORMAT_P010_10LE: case GST_VIDEO_FORMAT_P010_10BE: case GST_VIDEO_FORMAT_BGR10A2_LE: case GST_VIDEO_FORMAT_RGB10A2_LE: case GST_VIDEO_FORMAT_Y444_16LE: case GST_VIDEO_FORMAT_Y444_16BE: if (max_bit_minus8 >= 2) { gst_value_list_append_value (&supported_format, val); last_format = val; num_format++; } break; default: break; } } if (num_format == 0) { g_value_unset (&supported_format); GST_WARNING_OBJECT (nvenc, "Cannot find matching input format"); return FALSE; } if (num_format > 1) gst_caps_set_value (caps, "format", &supported_format); else gst_caps_set_value (caps, "format", last_format); g_value_unset (&supported_format); return TRUE; } static GstCaps * gst_nv_base_enc_getcaps (GstVideoEncoder * enc, GstCaps * filter) { GstNvBaseEnc *nvenc = GST_NV_BASE_ENC (enc); GstNvBaseEncClass *klass = GST_NV_BASE_ENC_GET_CLASS (enc); GstCaps *supported_incaps = NULL; GstCaps *template_caps, *caps, *allowed; template_caps = gst_pad_get_pad_template_caps (enc->sinkpad); allowed = gst_pad_get_allowed_caps (enc->srcpad); GST_LOG_OBJECT (enc, "template caps %" GST_PTR_FORMAT, template_caps); GST_LOG_OBJECT (enc, "allowed caps %" GST_PTR_FORMAT, allowed); if (!allowed) { /* no peer */ supported_incaps = template_caps; template_caps = NULL; goto done; } else if (gst_caps_is_empty (allowed)) { /* couldn't be negotiated, just return empty caps */ gst_caps_unref (template_caps); return allowed; } GST_OBJECT_LOCK (nvenc); if (nvenc->input_formats != NULL) { GValue *val; gboolean has_profile = FALSE; guint max_chroma_index = 0; guint max_bit_minus8 = 0; gint i, j; for (i = 0; i < gst_caps_get_size (allowed); i++) { const GstStructure *allowed_s = gst_caps_get_structure (allowed, i); const GValue *val; if ((val = gst_structure_get_value (allowed_s, "profile"))) { if (G_VALUE_HOLDS_STRING (val)) { check_formats (g_value_get_string (val), &max_chroma_index, &max_bit_minus8); has_profile = TRUE; } else if (GST_VALUE_HOLDS_LIST (val)) { for (j = 0; j < gst_value_list_get_size (val); j++) { const GValue *vlist = gst_value_list_get_value (val, j); if (G_VALUE_HOLDS_STRING (vlist)) { check_formats (g_value_get_string (vlist), &max_chroma_index, &max_bit_minus8); has_profile = TRUE; } } } } } GST_LOG_OBJECT (enc, "downstream requested profile %d, max bitdepth %d, max chroma %d", has_profile, max_bit_minus8 + 8, max_chroma_index); supported_incaps = gst_caps_copy (template_caps); if (!has_profile || !gst_nv_base_enc_set_filtered_input_formats (nvenc, supported_incaps, nvenc->input_formats, max_chroma_index, max_bit_minus8)) { gst_caps_set_value (supported_incaps, "format", nvenc->input_formats); } val = gst_nvenc_get_interlace_modes (nvenc->encoder, klass->codec_id); gst_caps_set_value (supported_incaps, "interlace-mode", val); g_value_unset (val); g_free (val); GST_LOG_OBJECT (enc, "codec input caps %" GST_PTR_FORMAT, supported_incaps); GST_LOG_OBJECT (enc, " template caps %" GST_PTR_FORMAT, template_caps); caps = gst_caps_intersect (template_caps, supported_incaps); gst_caps_unref (supported_incaps); supported_incaps = caps; GST_LOG_OBJECT (enc, " supported caps %" GST_PTR_FORMAT, supported_incaps); } GST_OBJECT_UNLOCK (nvenc); done: caps = gst_video_encoder_proxy_getcaps (enc, supported_incaps, filter); if (supported_incaps) gst_caps_unref (supported_incaps); gst_clear_caps (&allowed); gst_clear_caps (&template_caps); GST_DEBUG_OBJECT (nvenc, " returning caps %" GST_PTR_FORMAT, caps); return caps; } static gboolean gst_nv_base_enc_close (GstVideoEncoder * enc) { GstNvBaseEnc *nvenc = GST_NV_BASE_ENC (enc); gboolean ret = TRUE; if (nvenc->encoder) { if (NvEncDestroyEncoder (nvenc->encoder) != NV_ENC_SUCCESS) ret = FALSE; nvenc->encoder = NULL; } if (nvenc->cuda_ctx && nvenc->cuda_stream) { if (gst_cuda_context_push (nvenc->cuda_ctx)) { gst_cuda_result (CuStreamDestroy (nvenc->cuda_stream)); gst_cuda_context_pop (NULL); } } gst_clear_object (&nvenc->cuda_ctx); nvenc->cuda_stream = NULL; GST_OBJECT_LOCK (nvenc); if (nvenc->input_formats) g_value_unset (nvenc->input_formats); g_free (nvenc->input_formats); nvenc->input_formats = NULL; GST_OBJECT_UNLOCK (nvenc); if (nvenc->input_state) { gst_video_codec_state_unref (nvenc->input_state); nvenc->input_state = NULL; } return ret; } static void gst_nv_base_enc_init (GstNvBaseEnc * nvenc) { GstVideoEncoder *encoder = GST_VIDEO_ENCODER (nvenc); GstNvEncQP qp_detail = { DEFAULT_QP_DETAIL, DEFAULT_QP_DETAIL, DEFAULT_QP_DETAIL }; nvenc->preset_enum = DEFAULT_PRESET; nvenc->selected_preset = _nv_preset_to_guid (nvenc->preset_enum); nvenc->rate_control_mode = DEFAULT_RC_MODE; nvenc->qp_min = DEFAULT_QP_MIN; nvenc->qp_max = DEFAULT_QP_MAX; nvenc->qp_const = DEFAULT_QP_CONST; nvenc->bitrate = DEFAULT_BITRATE; nvenc->gop_size = DEFAULT_GOP_SIZE; nvenc->max_bitrate = DEFAULT_MAX_BITRATE; nvenc->spatial_aq = DEFAULT_SPATIAL_AQ; nvenc->aq_strength = DEFAULT_AQ_STRENGTH; nvenc->non_refp = DEFAULT_NON_REF_P; nvenc->zerolatency = DEFAULT_ZEROLATENCY; nvenc->strict_gop = DEFAULT_STRICT_GOP; nvenc->const_quality = DEFAULT_CONST_QUALITY; nvenc->i_adapt = DEFAULT_I_ADAPT; nvenc->qp_min_detail = qp_detail; nvenc->qp_max_detail = qp_detail; nvenc->qp_const_detail = qp_detail; GST_VIDEO_ENCODER_STREAM_LOCK (encoder); GST_VIDEO_ENCODER_STREAM_UNLOCK (encoder); GST_PAD_SET_ACCEPT_INTERSECT (GST_VIDEO_ENCODER_SINK_PAD (encoder)); } static void gst_nv_base_enc_finalize (GObject * obj) { G_OBJECT_CLASS (gst_nv_base_enc_parent_class)->finalize (obj); } static GstVideoCodecFrame * _find_frame_with_output_buffer (GstNvBaseEnc * nvenc, NV_ENC_OUTPUT_PTR out_buf) { GList *l, *walk = gst_video_encoder_get_frames (GST_VIDEO_ENCODER (nvenc)); GstVideoCodecFrame *ret = NULL; for (l = walk; l; l = l->next) { GstVideoCodecFrame *frame = (GstVideoCodecFrame *) l->data; GstNvEncFrameState *state = gst_video_codec_frame_get_user_data (frame); if (!state || !state->out_buf) continue; if (state->out_buf == out_buf) { ret = frame; break; } } if (ret) gst_video_codec_frame_ref (ret); g_list_free_full (walk, (GDestroyNotify) gst_video_codec_frame_unref); return ret; } static const gchar * picture_type_to_string (NV_ENC_PIC_TYPE type) { switch (type) { case NV_ENC_PIC_TYPE_P: return "P"; case NV_ENC_PIC_TYPE_B: return "B"; case NV_ENC_PIC_TYPE_I: return "I"; case NV_ENC_PIC_TYPE_IDR: return "IDR"; case NV_ENC_PIC_TYPE_BI: return "BI"; case NV_ENC_PIC_TYPE_SKIPPED: return "SKIPPED"; case NV_ENC_PIC_TYPE_INTRA_REFRESH: return "INTRA-REFRESH"; case NV_ENC_PIC_TYPE_UNKNOWN: default: break; } return "UNKNOWN"; } static gpointer gst_nv_base_enc_bitstream_thread (gpointer user_data) { GstVideoEncoder *enc = user_data; GstNvBaseEnc *nvenc = user_data; GstFlowReturn flow = GST_FLOW_OK; /* overview of operation: * 1. retreive the next buffer submitted to the bitstream pool * 2. wait for that buffer to be ready from nvenc (LockBitsream) * 3. retreive the GstVideoCodecFrame associated with that buffer * 4. for each buffer in the frame * 4.1 (step 2): wait for that buffer to be ready from nvenc (LockBitsream) * 4.2 create an output GstBuffer from the nvenc buffers * 4.3 unlock the nvenc bitstream buffers UnlockBitsream * 5. finish_frame() * 6. cleanup */ do { GstBuffer *buffer = NULL; GstNvEncFrameState *state_in_queue = NULL; GstNvEncFrameState *state = NULL; GstVideoCodecFrame *frame = NULL; NVENCSTATUS nv_ret; NV_ENC_LOCK_BITSTREAM lock_bs = { 0, }; NV_ENC_OUTPUT_PTR out_buf; GstNvEncInputResource *resource; GST_LOG_OBJECT (enc, "wait for bitstream buffer.."); state_in_queue = g_async_queue_pop (nvenc->bitstream_queue); if ((gpointer) state_in_queue == SHUTDOWN_COOKIE) goto exit_thread; out_buf = state_in_queue->out_buf; resource = state_in_queue->in_buf; GST_LOG_OBJECT (nvenc, "waiting for output buffer %p to be ready", out_buf); lock_bs.version = gst_nvenc_get_lock_bitstream_version (); lock_bs.outputBitstream = out_buf; lock_bs.doNotWait = 0; /* FIXME: this would need to be updated for other slice modes */ lock_bs.sliceOffsets = NULL; if (!gst_cuda_context_push (nvenc->cuda_ctx)) { GST_ELEMENT_ERROR (nvenc, LIBRARY, ENCODE, (NULL), ("Failed to push current context")); goto error_shutdown; } nv_ret = NvEncLockBitstream (nvenc->encoder, &lock_bs); if (nv_ret != NV_ENC_SUCCESS) { gst_cuda_context_pop (NULL); GST_ELEMENT_ERROR (nvenc, STREAM, ENCODE, (NULL), ("Failed to lock bitstream buffer %p, ret %d", lock_bs.outputBitstream, nv_ret)); goto error_shutdown; } frame = _find_frame_with_output_buffer (nvenc, out_buf); state = gst_video_codec_frame_get_user_data (frame); g_assert (state->out_buf == out_buf); /* copy into output buffer */ buffer = gst_buffer_new_allocate (NULL, lock_bs.bitstreamSizeInBytes, NULL); gst_buffer_fill (buffer, 0, lock_bs.bitstreamBufferPtr, lock_bs.bitstreamSizeInBytes); if (lock_bs.pictureType == NV_ENC_PIC_TYPE_IDR) { GST_DEBUG_OBJECT (nvenc, "This is a keyframe"); GST_VIDEO_CODEC_FRAME_SET_SYNC_POINT (frame); } nv_ret = NvEncUnlockBitstream (nvenc->encoder, state->out_buf); if (nv_ret != NV_ENC_SUCCESS) { gst_cuda_context_pop (NULL); GST_ELEMENT_ERROR (nvenc, STREAM, ENCODE, (NULL), ("Failed to unlock bitstream buffer %p, ret %d", lock_bs.outputBitstream, nv_ret)); gst_buffer_unref (buffer); gst_video_encoder_finish_frame (enc, frame); goto error_shutdown; } frame->dts = frame->pts; frame->pts = lock_bs.outputTimeStamp; frame->duration = lock_bs.outputDuration; GST_LOG_OBJECT (nvenc, "frame index %" G_GUINT32_FORMAT ", frame type %s, dts %" GST_TIME_FORMAT ", pts %" GST_TIME_FORMAT, lock_bs.frameIdx, picture_type_to_string (lock_bs.pictureType), GST_TIME_ARGS (frame->dts), GST_TIME_ARGS (frame->pts)); frame->output_buffer = buffer; nv_ret = NvEncUnmapInputResource (nvenc->encoder, resource->nv_mapped_resource.mappedResource); resource->mapped = FALSE; if (nv_ret != NV_ENC_SUCCESS) { GST_ERROR_OBJECT (nvenc, "Failed to unmap input resource %p, ret %d", resource, nv_ret); } gst_cuda_context_pop (NULL); memset (&resource->nv_mapped_resource, 0, sizeof (resource->nv_mapped_resource)); g_async_queue_push (nvenc->available_queue, state_in_queue); /* Ugly but no other way to get DTS offset since nvenc dose not adjust * dts/pts even if bframe was enabled. So the output PTS can be smaller * than DTS. The maximum difference between DTS and PTS can be calculated * using the PTS difference between the first frame and the second frame. */ if (nvenc->bframes > 0) { if (nvenc->dts_offset == 0) { if (!nvenc->first_frame) { /* store the first frame to get dts offset */ nvenc->first_frame = frame; continue; } else { if (nvenc->first_frame->pts >= frame->pts) { GstClockTime duration = 0; GST_WARNING_OBJECT (enc, "Could not calculate DTS offset"); if (nvenc->input_info.fps_n > 0 && nvenc->input_info.fps_d > 0) { duration = gst_util_uint64_scale (GST_SECOND, nvenc->input_info.fps_d, nvenc->input_info.fps_n); } else if (nvenc->first_frame->duration > 0 && GST_CLOCK_TIME_IS_VALID (nvenc->first_frame->duration)) { duration = nvenc->first_frame->duration; } else { GST_WARNING_OBJECT (enc, "No way to get frame duration, assuming 30fps"); duration = gst_util_uint64_scale (GST_SECOND, 1, 30); } nvenc->dts_offset = duration * nvenc->bframes; } else { nvenc->dts_offset = frame->pts - nvenc->first_frame->pts; } /* + 1 to dts_offset to adjust fraction */ nvenc->dts_offset++; GST_DEBUG_OBJECT (enc, "Calculated DTS offset %" GST_TIME_FORMAT, GST_TIME_ARGS (nvenc->dts_offset)); } nvenc->first_frame->dts -= nvenc->dts_offset; gst_video_encoder_finish_frame (enc, nvenc->first_frame); nvenc->first_frame = NULL; } frame->dts -= nvenc->dts_offset; } flow = gst_video_encoder_finish_frame (enc, frame); if (flow != GST_FLOW_OK) { GST_INFO_OBJECT (enc, "got flow %s", gst_flow_get_name (flow)); g_atomic_int_set (&nvenc->last_flow, flow); g_async_queue_push (nvenc->available_queue, SHUTDOWN_COOKIE); goto exit_thread; } } while (TRUE); error_shutdown: { if (nvenc->first_frame) { gst_clear_buffer (&nvenc->first_frame->output_buffer); gst_video_encoder_finish_frame (enc, nvenc->first_frame); nvenc->first_frame = NULL; } g_atomic_int_set (&nvenc->last_flow, GST_FLOW_ERROR); g_async_queue_push (nvenc->available_queue, SHUTDOWN_COOKIE); goto exit_thread; } exit_thread: { if (nvenc->first_frame) { gst_video_encoder_finish_frame (enc, nvenc->first_frame); nvenc->first_frame = NULL; } GST_INFO_OBJECT (nvenc, "exiting thread"); return NULL; } } static gboolean gst_nv_base_enc_start_bitstream_thread (GstNvBaseEnc * nvenc) { gchar *name = g_strdup_printf ("%s-read-bits", GST_OBJECT_NAME (nvenc)); g_assert (nvenc->bitstream_thread == NULL); g_assert (g_async_queue_length (nvenc->bitstream_queue) == 0); nvenc->bitstream_thread = g_thread_try_new (name, gst_nv_base_enc_bitstream_thread, nvenc, NULL); g_free (name); if (nvenc->bitstream_thread == NULL) return FALSE; GST_INFO_OBJECT (nvenc, "started thread to read bitstream"); return TRUE; } static gboolean gst_nv_base_enc_stop_bitstream_thread (GstNvBaseEnc * nvenc, gboolean force) { GstNvEncFrameState *state; if (nvenc->bitstream_thread == NULL) return TRUE; /* Always send EOS packet to flush GPU. Otherwise, randomly crash happens * during NvEncDestroyEncoder especially when rc-lookahead or bframe was * enabled */ gst_nv_base_enc_drain_encoder (nvenc); if (force) { g_async_queue_lock (nvenc->available_queue); g_async_queue_lock (nvenc->pending_queue); g_async_queue_lock (nvenc->bitstream_queue); while ((state = g_async_queue_try_pop_unlocked (nvenc->bitstream_queue))) { GST_INFO_OBJECT (nvenc, "stole bitstream buffer %p from queue", state); g_async_queue_push_unlocked (nvenc->available_queue, state); } g_async_queue_push_unlocked (nvenc->bitstream_queue, SHUTDOWN_COOKIE); g_async_queue_unlock (nvenc->available_queue); g_async_queue_unlock (nvenc->pending_queue); g_async_queue_unlock (nvenc->bitstream_queue); } else { /* wait for encoder to drain the remaining buffers */ g_async_queue_push (nvenc->bitstream_queue, SHUTDOWN_COOKIE); } if (!force) { /* temporary unlock during finish, so other thread can find and push frame */ GST_VIDEO_ENCODER_STREAM_UNLOCK (nvenc); } g_thread_join (nvenc->bitstream_thread); if (!force) GST_VIDEO_ENCODER_STREAM_LOCK (nvenc); nvenc->bitstream_thread = NULL; return TRUE; } static void gst_nv_base_enc_reset_queues (GstNvBaseEnc * nvenc, gboolean refill) { gpointer ptr; gint i; GST_INFO_OBJECT (nvenc, "clearing queues"); while ((ptr = g_async_queue_try_pop (nvenc->available_queue))) { /* do nothing */ } while ((ptr = g_async_queue_try_pop (nvenc->pending_queue))) { /* do nothing */ } while ((ptr = g_async_queue_try_pop (nvenc->bitstream_queue))) { /* do nothing */ } if (refill) { GST_INFO_OBJECT (nvenc, "refilling buffer pools"); for (i = 0; i < nvenc->n_bufs; ++i) { g_async_queue_push (nvenc->available_queue, &g_array_index (nvenc->items, GstNvEncFrameState, i)); } } } static void gst_nv_base_enc_free_buffers (GstNvBaseEnc * nvenc) { NVENCSTATUS nv_ret; CUresult cuda_ret; guint i; if (nvenc->encoder == NULL) return; gst_nv_base_enc_reset_queues (nvenc, FALSE); gst_cuda_context_push (nvenc->cuda_ctx); for (i = 0; i < nvenc->n_bufs; ++i) { NV_ENC_OUTPUT_PTR out_buf = g_array_index (nvenc->items, GstNvEncFrameState, i).out_buf; GstNvEncInputResource *in_buf = g_array_index (nvenc->items, GstNvEncFrameState, i).in_buf; if (in_buf->mapped) { GST_LOG_OBJECT (nvenc, "Unmap resource %p", in_buf); nv_ret = NvEncUnmapInputResource (nvenc->encoder, in_buf->nv_mapped_resource.mappedResource); if (nv_ret != NV_ENC_SUCCESS) { GST_ERROR_OBJECT (nvenc, "Failed to unmap input resource %p, ret %d", in_buf, nv_ret); } } nv_ret = NvEncUnregisterResource (nvenc->encoder, in_buf->nv_resource.registeredResource); if (nv_ret != NV_ENC_SUCCESS) GST_ERROR_OBJECT (nvenc, "Failed to unregister resource %p, ret %d", in_buf, nv_ret); cuda_ret = CuMemFree (in_buf->cuda_pointer); if (!gst_cuda_result (cuda_ret)) { GST_ERROR_OBJECT (nvenc, "Failed to free CUDA device memory, ret %d", cuda_ret); } g_free (in_buf); GST_DEBUG_OBJECT (nvenc, "Destroying output bitstream buffer %p", out_buf); nv_ret = NvEncDestroyBitstreamBuffer (nvenc->encoder, out_buf); if (nv_ret != NV_ENC_SUCCESS) { GST_ERROR_OBJECT (nvenc, "Failed to destroy output buffer %p, ret %d", out_buf, nv_ret); } } gst_cuda_context_pop (NULL); g_array_set_size (nvenc->items, 0); } static inline guint _get_plane_width (GstVideoInfo * info, guint plane) { return GST_VIDEO_INFO_COMP_WIDTH (info, plane) * GST_VIDEO_INFO_COMP_PSTRIDE (info, plane); } static inline guint _get_plane_height (GstVideoInfo * info, guint plane) { if (GST_VIDEO_INFO_IS_YUV (info)) /* For now component width and plane width are the same and the * plane-component mapping matches */ return GST_VIDEO_INFO_COMP_HEIGHT (info, plane); else /* RGB, GRAY */ return GST_VIDEO_INFO_HEIGHT (info); } static inline gsize _get_frame_data_height (GstVideoInfo * info) { gsize ret = 0; gint i; for (i = 0; i < GST_VIDEO_INFO_N_PLANES (info); i++) { ret += _get_plane_height (info, i); } return ret; } static gboolean qp_has_values (const GstNvEncQP * qp) { return qp->qp_i >= 0 && qp->qp_p >= 0 && qp->qp_b >= 0; } static void gst_nv_base_enc_setup_rate_control (GstNvBaseEnc * nvenc, NV_ENC_RC_PARAMS * rc_params) { GstNvRCMode rc_mode = nvenc->rate_control_mode; NV_ENC_PARAMS_RC_MODE nv_rcmode; if (nvenc->bitrate) rc_params->averageBitRate = nvenc->bitrate * 1024; if (nvenc->max_bitrate) rc_params->maxBitRate = nvenc->max_bitrate * 1024; if (nvenc->vbv_buffersize) rc_params->vbvBufferSize = nvenc->vbv_buffersize * 1024; /* Guess the best matching mode */ if (rc_mode == GST_NV_RC_MODE_DEFAULT) { if (nvenc->qp_const >= 0) { /* constQP is used only for RC_CONSTQP mode */ rc_mode = GST_NV_RC_MODE_CONSTQP; } } if (nvenc->qp_min >= 0) { rc_params->enableMinQP = 1; rc_params->minQP.qpInterB = nvenc->qp_min; rc_params->minQP.qpInterP = nvenc->qp_min; rc_params->minQP.qpIntra = nvenc->qp_min; } else if (qp_has_values (&nvenc->qp_min_detail)) { rc_params->enableMinQP = 1; rc_params->minQP.qpInterB = nvenc->qp_min_detail.qp_b; rc_params->minQP.qpInterP = nvenc->qp_min_detail.qp_p; rc_params->minQP.qpIntra = nvenc->qp_min_detail.qp_i; } if (nvenc->qp_max >= 0) { rc_params->enableMaxQP = 1; rc_params->maxQP.qpInterB = nvenc->qp_max; rc_params->maxQP.qpInterP = nvenc->qp_max; rc_params->maxQP.qpIntra = nvenc->qp_max; } else if (qp_has_values (&nvenc->qp_max_detail)) { rc_params->enableMaxQP = 1; rc_params->maxQP.qpInterB = nvenc->qp_max_detail.qp_b; rc_params->maxQP.qpInterP = nvenc->qp_max_detail.qp_p; rc_params->maxQP.qpIntra = nvenc->qp_max_detail.qp_i; } if (nvenc->qp_const >= 0) { rc_params->constQP.qpInterB = nvenc->qp_const; rc_params->constQP.qpInterP = nvenc->qp_const; rc_params->constQP.qpIntra = nvenc->qp_const; } else if (qp_has_values (&nvenc->qp_const_detail)) { rc_params->constQP.qpInterB = nvenc->qp_const_detail.qp_b; rc_params->constQP.qpInterP = nvenc->qp_const_detail.qp_p; rc_params->constQP.qpIntra = nvenc->qp_const_detail.qp_i; } nv_rcmode = _rc_mode_to_nv (rc_mode); if (nv_rcmode == NV_ENC_PARAMS_RC_VBR_MINQP && nvenc->qp_min < 0) { GST_WARNING_OBJECT (nvenc, "vbr-minqp was requested without qp-min"); nv_rcmode = NV_ENC_PARAMS_RC_VBR; } rc_params->rateControlMode = nv_rcmode; if (nvenc->spatial_aq) { rc_params->enableAQ = 1; rc_params->aqStrength = nvenc->aq_strength; } rc_params->enableTemporalAQ = nvenc->temporal_aq; if (nvenc->rc_lookahead) { rc_params->enableLookahead = 1; rc_params->lookaheadDepth = nvenc->rc_lookahead; rc_params->disableIadapt = !nvenc->i_adapt; rc_params->disableBadapt = !nvenc->b_adapt; } rc_params->strictGOPTarget = nvenc->strict_gop; rc_params->enableNonRefP = nvenc->non_refp; rc_params->zeroReorderDelay = nvenc->zerolatency; if (nvenc->const_quality) { guint scaled = (gint) (nvenc->const_quality * 256.0); rc_params->targetQuality = (guint8) (scaled >> 8); rc_params->targetQualityLSB = (guint8) (scaled & 0xff); } } static gint gst_nv_base_enc_calculate_num_prealloc_buffers (GstNvBaseEnc * enc, NV_ENC_CONFIG * config) { gint num_buffers; /* At least 4 surfaces are required as documented by Nvidia Encoder guide */ num_buffers = 4; /* + lookahead depth */ num_buffers += config->rcParams.lookaheadDepth; /* + GOP size */ num_buffers += config->frameIntervalP; /* hardcoded upper bound "48" * The worst case * default num buffers: 4 * maximum allowed lookahead: 32 * max bfraems: 4 -> frameIntervalP: 5 * "4 + 32 + 5" < "48" so it seems to sufficiently safe upper bound */ num_buffers = MIN (num_buffers, 48); GST_DEBUG_OBJECT (enc, "Calculated num buffers: %d " "(lookahead %d, frameIntervalP %d)", num_buffers, config->rcParams.lookaheadDepth, config->frameIntervalP); return num_buffers; } /* GstVideoEncoder::set_format or by nvenc self if new properties were set. * * NvEncReconfigureEncoder with following conditions are not allowed * 1) GOP structure change * 2) sync-Async mode change (Async mode is Windows only and we didn't support it) * 3) MaxWidth, MaxHeight * 4) PTDmode (Picture Type Decision mode) * * So we will force to re-init the encode session if * 1) New resolution is larger than previous config * 2) GOP size changed * 3) Input pixel format change * pre-allocated CUDA memory could not ensure stride, width and height * * TODO: bframe also considered as force re-init case */ static gboolean gst_nv_base_enc_set_format (GstVideoEncoder * enc, GstVideoCodecState * state) { GstNvBaseEncClass *nvenc_class = GST_NV_BASE_ENC_GET_CLASS (enc); GstNvBaseEnc *nvenc = GST_NV_BASE_ENC (enc); GstVideoInfo *info = &state->info; GstVideoCodecState *old_state = nvenc->input_state; NV_ENC_RECONFIGURE_PARAMS reconfigure_params = { 0, }; NV_ENC_INITIALIZE_PARAMS *params = &nvenc->init_params; NV_ENC_PRESET_CONFIG preset_config = { 0, }; NVENCSTATUS nv_ret; gint dar_n, dar_d; gboolean reconfigure = FALSE; g_atomic_int_set (&nvenc->reconfig, FALSE); if (old_state) { gboolean larger_resolution; gboolean format_changed; gboolean gop_size_changed; larger_resolution = (GST_VIDEO_INFO_WIDTH (info) > nvenc->init_params.maxEncodeWidth || GST_VIDEO_INFO_HEIGHT (info) > nvenc->init_params.maxEncodeHeight); format_changed = GST_VIDEO_INFO_FORMAT (info) != GST_VIDEO_INFO_FORMAT (&old_state->info); if (nvenc->config.gopLength == NVENC_INFINITE_GOPLENGTH && nvenc->gop_size == -1) { gop_size_changed = FALSE; } else if (nvenc->config.gopLength != nvenc->gop_size) { gop_size_changed = TRUE; } else { gop_size_changed = FALSE; } if (larger_resolution || format_changed || gop_size_changed) { GST_DEBUG_OBJECT (nvenc, "resolution %dx%d -> %dx%d, format %s -> %s, re-init", nvenc->init_params.maxEncodeWidth, nvenc->init_params.maxEncodeHeight, GST_VIDEO_INFO_WIDTH (info), GST_VIDEO_INFO_HEIGHT (info), gst_video_format_to_string (GST_VIDEO_INFO_FORMAT (&old_state->info)), gst_video_format_to_string (GST_VIDEO_INFO_FORMAT (info))); gst_nv_base_enc_drain_encoder (nvenc); gst_nv_base_enc_stop_bitstream_thread (nvenc, FALSE); gst_nv_base_enc_free_buffers (nvenc); NvEncDestroyEncoder (nvenc->encoder); nvenc->encoder = NULL; if (!gst_nv_base_enc_open_encode_session (nvenc)) { GST_ERROR_OBJECT (nvenc, "Failed to open encode session"); return FALSE; } } else { reconfigure_params.version = gst_nvenc_get_reconfigure_params_version (); /* reset rate control state and start from IDR */ reconfigure_params.resetEncoder = TRUE; reconfigure_params.forceIDR = TRUE; reconfigure = TRUE; } } params->version = gst_nvenc_get_initialize_params_version (); params->encodeGUID = nvenc_class->codec_id; params->encodeWidth = GST_VIDEO_INFO_WIDTH (info); params->encodeHeight = GST_VIDEO_INFO_HEIGHT (info); { guint32 n_presets; GUID *presets; guint32 i; nv_ret = NvEncGetEncodePresetCount (nvenc->encoder, params->encodeGUID, &n_presets); if (nv_ret != NV_ENC_SUCCESS) { GST_ELEMENT_ERROR (nvenc, LIBRARY, SETTINGS, (NULL), ("Failed to get encoder presets")); return FALSE; } presets = g_new0 (GUID, n_presets); nv_ret = NvEncGetEncodePresetGUIDs (nvenc->encoder, params->encodeGUID, presets, n_presets, &n_presets); if (nv_ret != NV_ENC_SUCCESS) { GST_ELEMENT_ERROR (nvenc, LIBRARY, SETTINGS, (NULL), ("Failed to get encoder presets")); g_free (presets); return FALSE; } for (i = 0; i < n_presets; i++) { if (gst_nvenc_cmp_guid (presets[i], nvenc->selected_preset)) break; } g_free (presets); if (i >= n_presets) { GST_ELEMENT_ERROR (nvenc, LIBRARY, SETTINGS, (NULL), ("Selected preset not supported")); return FALSE; } params->presetGUID = nvenc->selected_preset; } params->enablePTD = 1; if (!reconfigure) { /* this sets the required buffer size and the maximum allowed size on * subsequent reconfigures */ params->maxEncodeWidth = GST_VIDEO_INFO_WIDTH (info); params->maxEncodeHeight = GST_VIDEO_INFO_HEIGHT (info); } preset_config.version = gst_nvenc_get_preset_config_version (); preset_config.presetCfg.version = gst_nvenc_get_config_version (); nv_ret = NvEncGetEncodePresetConfig (nvenc->encoder, params->encodeGUID, params->presetGUID, &preset_config); if (nv_ret != NV_ENC_SUCCESS) { GST_ELEMENT_ERROR (nvenc, LIBRARY, SETTINGS, (NULL), ("Failed to get encode preset configuration: %d", nv_ret)); return FALSE; } params->encodeConfig = &preset_config.presetCfg; if (GST_VIDEO_INFO_IS_INTERLACED (info)) { if (GST_VIDEO_INFO_INTERLACE_MODE (info) == GST_VIDEO_INTERLACE_MODE_INTERLEAVED || GST_VIDEO_INFO_INTERLACE_MODE (info) == GST_VIDEO_INTERLACE_MODE_MIXED) { preset_config.presetCfg.frameFieldMode = NV_ENC_PARAMS_FRAME_FIELD_MODE_FIELD; } } if (info->fps_d > 0 && info->fps_n > 0) { params->frameRateNum = info->fps_n; params->frameRateDen = info->fps_d; } else { params->frameRateNum = 0; params->frameRateDen = 1; } if (gst_util_fraction_multiply (GST_VIDEO_INFO_WIDTH (info), GST_VIDEO_INFO_HEIGHT (info), GST_VIDEO_INFO_PAR_N (info), GST_VIDEO_INFO_PAR_D (info), &dar_n, &dar_d) && dar_n > 0 && dar_d > 0) { params->darWidth = dar_n; params->darHeight = dar_d; } gst_nv_base_enc_setup_rate_control (nvenc, ¶ms->encodeConfig->rcParams); params->enableWeightedPrediction = nvenc->weighted_pred; if (nvenc->gop_size < 0) { params->encodeConfig->gopLength = NVENC_INFINITE_GOPLENGTH; params->encodeConfig->frameIntervalP = 1; } else if (nvenc->gop_size > 0) { params->encodeConfig->gopLength = nvenc->gop_size; /* frameIntervalP * 0: All Intra frames * 1: I/P only * n ( > 1): n - 1 bframes */ params->encodeConfig->frameIntervalP = nvenc->bframes + 1; } else { /* gop size == 0 means all intra frames */ params->encodeConfig->gopLength = 1; params->encodeConfig->frameIntervalP = 0; } g_assert (nvenc_class->set_encoder_config); if (!nvenc_class->set_encoder_config (nvenc, state, params->encodeConfig)) { GST_ERROR_OBJECT (enc, "Subclass failed to set encoder configuration"); return FALSE; } /* store the last config to reconfig/re-init decision in the next time */ nvenc->config = *params->encodeConfig; G_LOCK (initialization_lock); if (reconfigure) { reconfigure_params.reInitEncodeParams = nvenc->init_params; nv_ret = NvEncReconfigureEncoder (nvenc->encoder, &reconfigure_params); } else { nv_ret = NvEncInitializeEncoder (nvenc->encoder, params); } G_UNLOCK (initialization_lock); if (nv_ret != NV_ENC_SUCCESS) { GST_ELEMENT_ERROR (nvenc, LIBRARY, SETTINGS, (NULL), ("Failed to %sinit encoder: %d", reconfigure ? "re" : "", nv_ret)); return FALSE; } if (!reconfigure) { nvenc->input_info = *info; nvenc->gl_input = FALSE; } if (nvenc->input_state) gst_video_codec_state_unref (nvenc->input_state); nvenc->input_state = gst_video_codec_state_ref (state); GST_INFO_OBJECT (nvenc, "%sconfigured encoder", reconfigure ? "re" : ""); /* now allocate some buffers only on first configuration */ if (!reconfigure) { #if HAVE_NVCODEC_GST_GL GstCapsFeatures *features; #endif guint i; guint input_width, input_height; input_width = GST_VIDEO_INFO_WIDTH (info); input_height = GST_VIDEO_INFO_HEIGHT (info); nvenc->n_bufs = gst_nv_base_enc_calculate_num_prealloc_buffers (nvenc, params->encodeConfig); /* input buffers */ g_array_set_size (nvenc->items, nvenc->n_bufs); #if HAVE_NVCODEC_GST_GL features = gst_caps_get_features (state->caps, 0); if (gst_caps_features_contains (features, GST_CAPS_FEATURE_MEMORY_GL_MEMORY)) { nvenc->gl_input = TRUE; } #endif gst_cuda_context_push (nvenc->cuda_ctx); for (i = 0; i < nvenc->n_bufs; ++i) { GstNvEncInputResource *resource = g_new0 (GstNvEncInputResource, 1); CUresult cu_ret; memset (&resource->nv_resource, 0, sizeof (resource->nv_resource)); memset (&resource->nv_mapped_resource, 0, sizeof (resource->nv_mapped_resource)); /* scratch buffer for non-contigious planer into a contigious buffer */ cu_ret = CuMemAllocPitch (&resource->cuda_pointer, &resource->cuda_stride, _get_plane_width (info, 0), _get_frame_data_height (info), 16); if (!gst_cuda_result (cu_ret)) { GST_ERROR_OBJECT (nvenc, "failed to alocate cuda scratch buffer " "ret %d", cu_ret); g_assert_not_reached (); } resource->nv_resource.version = gst_nvenc_get_registure_resource_version (); resource->nv_resource.resourceType = NV_ENC_INPUT_RESOURCE_TYPE_CUDADEVICEPTR; resource->nv_resource.width = input_width; resource->nv_resource.height = input_height; resource->nv_resource.pitch = resource->cuda_stride; resource->nv_resource.bufferFormat = gst_nvenc_get_nv_buffer_format (GST_VIDEO_INFO_FORMAT (info)); resource->nv_resource.resourceToRegister = (gpointer) resource->cuda_pointer; nv_ret = NvEncRegisterResource (nvenc->encoder, &resource->nv_resource); if (nv_ret != NV_ENC_SUCCESS) GST_ERROR_OBJECT (nvenc, "Failed to register resource %p, ret %d", resource, nv_ret); g_array_index (nvenc->items, GstNvEncFrameState, i).in_buf = resource; } gst_cuda_context_pop (NULL); /* output buffers */ for (i = 0; i < nvenc->n_bufs; ++i) { NV_ENC_CREATE_BITSTREAM_BUFFER cout_buf = { 0, }; cout_buf.version = gst_nvenc_get_create_bitstream_buffer_version (); /* 1 MB should be large enough to hold most output frames. * NVENC will automatically increase this if it's not enough. */ cout_buf.size = 1024 * 1024; cout_buf.memoryHeap = NV_ENC_MEMORY_HEAP_SYSMEM_CACHED; G_LOCK (initialization_lock); nv_ret = NvEncCreateBitstreamBuffer (nvenc->encoder, &cout_buf); G_UNLOCK (initialization_lock); if (nv_ret != NV_ENC_SUCCESS) { GST_WARNING_OBJECT (enc, "Failed to allocate input buffer: %d", nv_ret); /* FIXME: clean up */ return FALSE; } GST_INFO_OBJECT (nvenc, "allocated output buffer %2d: %p", i, cout_buf.bitstreamBuffer); g_array_index (nvenc->items, GstNvEncFrameState, i).out_buf = cout_buf.bitstreamBuffer; g_async_queue_push (nvenc->available_queue, &g_array_index (nvenc->items, GstNvEncFrameState, i)); } #if 0 /* Get SPS/PPS */ { NV_ENC_SEQUENCE_PARAM_PAYLOAD seq_param = { 0 }; uint32_t seq_size = 0; seq_param.version = gst_nvenc_get_sequence_param_payload_version (); seq_param.spsppsBuffer = g_alloca (1024); seq_param.inBufferSize = 1024; seq_param.outSPSPPSPayloadSize = &seq_size; nv_ret = NvEncGetSequenceParams (nvenc->encoder, &seq_param); if (nv_ret != NV_ENC_SUCCESS) { GST_WARNING_OBJECT (enc, "Failed to retrieve SPS/PPS: %d", nv_ret); return FALSE; } /* FIXME: use SPS/PPS */ GST_MEMDUMP_OBJECT (enc, "SPS/PPS", seq_param.spsppsBuffer, seq_size); } #endif } g_assert (nvenc_class->set_src_caps); if (!nvenc_class->set_src_caps (nvenc, state)) { GST_ERROR_OBJECT (nvenc, "Subclass failed to set output caps"); /* FIXME: clean up */ return FALSE; } return TRUE; } static guint _get_cuda_device_stride (GstVideoInfo * info, guint plane, gsize cuda_stride) { switch (GST_VIDEO_INFO_FORMAT (info)) { case GST_VIDEO_FORMAT_NV12: case GST_VIDEO_FORMAT_NV21: case GST_VIDEO_FORMAT_P010_10LE: case GST_VIDEO_FORMAT_P010_10BE: case GST_VIDEO_FORMAT_Y444: case GST_VIDEO_FORMAT_BGRA: case GST_VIDEO_FORMAT_RGBA: case GST_VIDEO_FORMAT_BGR10A2_LE: case GST_VIDEO_FORMAT_RGB10A2_LE: case GST_VIDEO_FORMAT_Y444_16LE: case GST_VIDEO_FORMAT_Y444_16BE: return cuda_stride; case GST_VIDEO_FORMAT_I420: case GST_VIDEO_FORMAT_YV12: return plane == 0 ? cuda_stride : (GST_ROUND_UP_2 (cuda_stride) / 2); default: g_assert_not_reached (); return cuda_stride; } } #if HAVE_NVCODEC_GST_GL typedef struct _GstNvEncRegisterResourceData { GstMemory *mem; GstCudaGraphicsResource *resource; GstNvBaseEnc *nvenc; gboolean ret; } GstNvEncRegisterResourceData; static void register_cuda_resource (GstGLContext * context, GstNvEncRegisterResourceData * data) { GstMemory *mem = data->mem; GstCudaGraphicsResource *resource = data->resource; GstNvBaseEnc *nvenc = data->nvenc; GstMapInfo map_info = GST_MAP_INFO_INIT; GstGLBuffer *gl_buf_obj; data->ret = FALSE; if (!gst_cuda_context_push (nvenc->cuda_ctx)) { GST_WARNING_OBJECT (nvenc, "failed to push CUDA context"); return; } if (gst_memory_map (mem, &map_info, GST_MAP_READ | GST_MAP_GL)) { GstGLMemoryPBO *gl_mem = (GstGLMemoryPBO *) data->mem; gl_buf_obj = gl_mem->pbo; GST_LOG_OBJECT (nvenc, "registure glbuffer %d to CUDA resource", gl_buf_obj->id); if (gst_cuda_graphics_resource_register_gl_buffer (resource, gl_buf_obj->id, CU_GRAPHICS_REGISTER_FLAGS_NONE)) { data->ret = TRUE; } else { GST_WARNING_OBJECT (nvenc, "failed to register memory"); } gst_memory_unmap (mem, &map_info); } else { GST_WARNING_OBJECT (nvenc, "failed to map memory"); } if (!gst_cuda_context_pop (NULL)) GST_WARNING_OBJECT (nvenc, "failed to unlock CUDA context"); } static GstCudaGraphicsResource * ensure_cuda_graphics_resource (GstMemory * mem, GstNvBaseEnc * nvenc) { GQuark quark; GstCudaGraphicsResource *cgr_info; GstNvEncRegisterResourceData data; if (!gst_is_gl_memory_pbo (mem)) { GST_WARNING_OBJECT (nvenc, "memory is not GL PBO memory, %s", mem->allocator->mem_type); return NULL; } quark = gst_cuda_quark_from_id (GST_CUDA_QUARK_GRAPHICS_RESOURCE); cgr_info = gst_mini_object_get_qdata (GST_MINI_OBJECT (mem), quark); if (!cgr_info) { cgr_info = gst_cuda_graphics_resource_new (nvenc->cuda_ctx, GST_OBJECT (GST_GL_BASE_MEMORY_CAST (mem)->context), GST_CUDA_GRAPHICS_RESOURCE_GL_BUFFER); data.mem = mem; data.resource = cgr_info; data.nvenc = nvenc; gst_gl_context_thread_add ((GstGLContext *) cgr_info->graphics_context, (GstGLContextThreadFunc) register_cuda_resource, &data); if (!data.ret) { GST_WARNING_OBJECT (nvenc, "could not register resource"); gst_cuda_graphics_resource_free (cgr_info); return NULL; } gst_mini_object_set_qdata (GST_MINI_OBJECT (mem), quark, cgr_info, (GDestroyNotify) gst_cuda_graphics_resource_free); } return cgr_info; } typedef struct _GstNvEncGLMapData { GstNvBaseEnc *nvenc; GstBuffer *buffer; GstVideoInfo *info; GstNvEncInputResource *resource; gboolean ret; } GstNvEncGLMapData; static void _map_gl_input_buffer (GstGLContext * context, GstNvEncGLMapData * data) { GstNvBaseEnc *nvenc = data->nvenc; CUresult cuda_ret; CUdeviceptr data_pointer; guint i; CUDA_MEMCPY2D param; GstCudaGraphicsResource **resources; guint num_resources; data->ret = FALSE; num_resources = gst_buffer_n_memory (data->buffer); resources = g_newa (GstCudaGraphicsResource *, num_resources); for (i = 0; i < num_resources; i++) { GstMemory *mem; mem = gst_buffer_peek_memory (data->buffer, i); resources[i] = ensure_cuda_graphics_resource (mem, nvenc); if (!resources[i]) { GST_ERROR_OBJECT (nvenc, "could not register %dth memory", i); return; } } gst_cuda_context_push (nvenc->cuda_ctx); data_pointer = data->resource->cuda_pointer; for (i = 0; i < GST_VIDEO_INFO_N_PLANES (data->info); i++) { GstGLBuffer *gl_buf_obj; GstGLMemoryPBO *gl_mem; guint src_stride, dest_stride; CUgraphicsResource cuda_resource; gsize cuda_num_bytes; CUdeviceptr cuda_plane_pointer; gl_mem = (GstGLMemoryPBO *) gst_buffer_peek_memory (data->buffer, i); g_return_if_fail (gst_is_gl_memory_pbo ((GstMemory *) gl_mem)); gl_buf_obj = (GstGLBuffer *) gl_mem->pbo; g_return_if_fail (gl_buf_obj != NULL); /* get the texture into the PBO */ gst_gl_memory_pbo_upload_transfer (gl_mem); gst_gl_memory_pbo_download_transfer (gl_mem); GST_LOG_OBJECT (nvenc, "attempting to copy texture %u into cuda", gl_mem->mem.tex_id); cuda_resource = gst_cuda_graphics_resource_map (resources[i], nvenc->cuda_stream, CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY); if (!cuda_resource) { GST_ERROR_OBJECT (nvenc, "failed to map GL texture %u into cuda", gl_mem->mem.tex_id); g_assert_not_reached (); } cuda_ret = CuGraphicsResourceGetMappedPointer (&cuda_plane_pointer, &cuda_num_bytes, cuda_resource); if (!gst_cuda_result (cuda_ret)) { GST_ERROR_OBJECT (nvenc, "failed to get mapped pointer of map GL " "texture %u in cuda ret :%d", gl_mem->mem.tex_id, cuda_ret); g_assert_not_reached (); } src_stride = GST_VIDEO_INFO_PLANE_STRIDE (data->info, i); dest_stride = _get_cuda_device_stride (&nvenc->input_info, i, data->resource->cuda_stride); /* copy into scratch buffer */ param.srcXInBytes = 0; param.srcY = 0; param.srcMemoryType = CU_MEMORYTYPE_DEVICE; param.srcDevice = cuda_plane_pointer; param.srcPitch = src_stride; param.dstXInBytes = 0; param.dstY = 0; param.dstMemoryType = CU_MEMORYTYPE_DEVICE; param.dstDevice = data_pointer; param.dstPitch = dest_stride; param.WidthInBytes = _get_plane_width (data->info, i); param.Height = _get_plane_height (data->info, i); cuda_ret = CuMemcpy2DAsync (¶m, nvenc->cuda_stream); if (!gst_cuda_result (cuda_ret)) { GST_ERROR_OBJECT (data->nvenc, "failed to copy GL texture %u into cuda " "ret :%d", gl_mem->mem.tex_id, cuda_ret); g_assert_not_reached (); } gst_cuda_graphics_resource_unmap (resources[i], nvenc->cuda_stream); data_pointer += dest_stride * _get_plane_height (&nvenc->input_info, i); } gst_cuda_result (CuStreamSynchronize (nvenc->cuda_stream)); gst_cuda_context_pop (NULL); data->ret = TRUE; } #endif static gboolean gst_nv_base_enc_upload_frame (GstNvBaseEnc * nvenc, GstVideoFrame * frame, GstNvEncInputResource * resource) { gint i; CUdeviceptr dst = resource->cuda_pointer; GstVideoInfo *info = &frame->info; CUresult cuda_ret; if (!gst_cuda_context_push (nvenc->cuda_ctx)) { GST_ERROR_OBJECT (nvenc, "cannot push context"); return FALSE; } for (i = 0; i < GST_VIDEO_FRAME_N_PLANES (frame); i++) { CUDA_MEMCPY2D param = { 0, }; guint dest_stride = _get_cuda_device_stride (&nvenc->input_info, i, resource->cuda_stride); param.srcMemoryType = CU_MEMORYTYPE_HOST; param.srcHost = GST_VIDEO_FRAME_PLANE_DATA (frame, i); param.srcPitch = GST_VIDEO_FRAME_PLANE_STRIDE (frame, i); param.dstMemoryType = CU_MEMORYTYPE_DEVICE; param.dstDevice = dst; param.dstPitch = dest_stride; param.WidthInBytes = _get_plane_width (info, i); param.Height = _get_plane_height (info, i); cuda_ret = CuMemcpy2DAsync (¶m, nvenc->cuda_stream); if (!gst_cuda_result (cuda_ret)) { GST_ERROR_OBJECT (nvenc, "cannot copy %dth plane, ret %d", i, cuda_ret); gst_cuda_context_pop (NULL); return FALSE; } dst += dest_stride * _get_plane_height (&nvenc->input_info, i); } gst_cuda_result (CuStreamSynchronize (nvenc->cuda_stream)); gst_cuda_context_pop (NULL); return TRUE; } static GstFlowReturn _acquire_input_buffer (GstNvBaseEnc * nvenc, GstNvEncFrameState ** input) { GST_LOG_OBJECT (nvenc, "acquiring input buffer.."); GST_VIDEO_ENCODER_STREAM_UNLOCK (nvenc); *input = g_async_queue_pop (nvenc->available_queue); GST_VIDEO_ENCODER_STREAM_LOCK (nvenc); if (*input == SHUTDOWN_COOKIE) return g_atomic_int_get (&nvenc->last_flow); return GST_FLOW_OK; } static GstFlowReturn _submit_input_buffer (GstNvBaseEnc * nvenc, GstVideoCodecFrame * frame, GstVideoFrame * vframe, GstNvEncFrameState * state, void *inputBufferPtr, NV_ENC_BUFFER_FORMAT bufferFormat) { GstNvBaseEncClass *nvenc_class = GST_NV_BASE_ENC_GET_CLASS (nvenc); NV_ENC_PIC_PARAMS pic_params = { 0, }; NVENCSTATUS nv_ret; gpointer inputBuffer, outputBufferPtr; inputBuffer = state->in_buf; outputBufferPtr = state->out_buf; GST_LOG_OBJECT (nvenc, "%u: input buffer %p, output buffer %p, " "pts %" GST_TIME_FORMAT, frame->system_frame_number, inputBuffer, outputBufferPtr, GST_TIME_ARGS (frame->pts)); pic_params.version = gst_nvenc_get_pic_params_version (); pic_params.inputBuffer = inputBufferPtr; pic_params.bufferFmt = bufferFormat; pic_params.inputWidth = GST_VIDEO_FRAME_WIDTH (vframe); pic_params.inputHeight = GST_VIDEO_FRAME_HEIGHT (vframe); pic_params.outputBitstream = outputBufferPtr; pic_params.completionEvent = NULL; if (GST_VIDEO_FRAME_IS_INTERLACED (vframe)) { if (GST_VIDEO_FRAME_IS_TFF (vframe)) pic_params.pictureStruct = NV_ENC_PIC_STRUCT_FIELD_TOP_BOTTOM; else pic_params.pictureStruct = NV_ENC_PIC_STRUCT_FIELD_BOTTOM_TOP; } else { pic_params.pictureStruct = NV_ENC_PIC_STRUCT_FRAME; } pic_params.inputTimeStamp = frame->pts; pic_params.inputDuration = GST_CLOCK_TIME_IS_VALID (frame->duration) ? frame->duration : 0; pic_params.frameIdx = frame->system_frame_number; if (GST_VIDEO_CODEC_FRAME_IS_FORCE_KEYFRAME (frame)) pic_params.encodePicFlags = NV_ENC_PIC_FLAG_FORCEIDR; else pic_params.encodePicFlags = 0; if (nvenc_class->set_pic_params && !nvenc_class->set_pic_params (nvenc, frame, &pic_params)) { GST_ERROR_OBJECT (nvenc, "Subclass failed to submit buffer"); return GST_FLOW_ERROR; } if (!gst_cuda_context_push (nvenc->cuda_ctx)) { GST_ELEMENT_ERROR (nvenc, LIBRARY, ENCODE, (NULL), ("Failed to push current context")); return GST_FLOW_ERROR; } nv_ret = NvEncEncodePicture (nvenc->encoder, &pic_params); gst_cuda_context_pop (NULL); if (nv_ret == NV_ENC_SUCCESS) { GST_LOG_OBJECT (nvenc, "Encoded picture"); } else if (nv_ret == NV_ENC_ERR_NEED_MORE_INPUT) { GST_DEBUG_OBJECT (nvenc, "Encoded picture (encoder needs more input)"); } else { GST_ERROR_OBJECT (nvenc, "Failed to encode picture: %d", nv_ret); g_async_queue_push (nvenc->available_queue, state); return GST_FLOW_ERROR; } /* GstNvEncFrameState shouldn't be freed by DestroyNotify */ gst_video_codec_frame_set_user_data (frame, state, NULL); g_async_queue_push (nvenc->pending_queue, state); if (nv_ret == NV_ENC_SUCCESS) { GstNvEncFrameState *pending_state; gint len, i, end; /* HACK: NvEncEncodePicture() with returning NV_ENC_SUCCESS means that * we can pop encoded bitstream from GPU * (via NvEncLockBitstream and copy to memory then NvEncUnlockBitstream). * But if we try to pop every buffer from GPU when the rc-lookahead * was enabled, NvEncLockBitstream returns error NV_ENC_ERR_INVALID_PARAM * randomly (seemingly it's dependent on how fast the encoding thread * dequeued the encoded picture). * So make "pending_queue" having the number of lookahead pictures always, * so that GPU should be able to reference the lookahead pictures. * * This behavior is not documented by Nvidia. The guess here is that * the lookahead pictures are still used for rate-control by Nvidia driver * and dequeuing the lookahead picture from GPU seems to be causing the * problem. */ end = nvenc->rc_lookahead; g_async_queue_lock (nvenc->pending_queue); len = g_async_queue_length_unlocked (nvenc->pending_queue); for (i = len; i > end; i--) { pending_state = g_async_queue_pop_unlocked (nvenc->pending_queue); g_async_queue_push (nvenc->bitstream_queue, pending_state); } g_async_queue_unlock (nvenc->pending_queue); } return GST_FLOW_OK; } static GstFlowReturn gst_nv_base_enc_handle_frame (GstVideoEncoder * enc, GstVideoCodecFrame * frame) { GstNvBaseEnc *nvenc = GST_NV_BASE_ENC (enc); NVENCSTATUS nv_ret; GstVideoFrame vframe; GstVideoInfo *info = &nvenc->input_state->info; GstFlowReturn flow = GST_FLOW_OK; GstMapFlags in_map_flags = GST_MAP_READ; GstNvEncFrameState *state = NULL; GstNvEncInputResource *resource = NULL; g_assert (nvenc->encoder != NULL); if (g_atomic_int_compare_and_exchange (&nvenc->reconfig, TRUE, FALSE)) { if (!gst_nv_base_enc_set_format (enc, nvenc->input_state)) { flow = GST_FLOW_NOT_NEGOTIATED; goto drop; } /* reconfigured encode session should start from keyframe */ GST_VIDEO_CODEC_FRAME_SET_FORCE_KEYFRAME (frame); } #if HAVE_NVCODEC_GST_GL if (nvenc->gl_input) in_map_flags |= GST_MAP_GL; #endif if (!gst_video_frame_map (&vframe, info, frame->input_buffer, in_map_flags)) { goto drop; } /* make sure our thread that waits for output to be ready is started */ if (nvenc->bitstream_thread == NULL) { if (!gst_nv_base_enc_start_bitstream_thread (nvenc)) { gst_video_frame_unmap (&vframe); goto unmap_and_drop; } } flow = _acquire_input_buffer (nvenc, &state); if (flow != GST_FLOW_OK || state == SHUTDOWN_COOKIE || !state) goto unmap_and_drop; resource = state->in_buf; #if HAVE_NVCODEC_GST_GL if (nvenc->gl_input) { GstGLMemory *gl_mem; GstNvEncGLMapData data; gl_mem = (GstGLMemory *) gst_buffer_peek_memory (frame->input_buffer, 0); g_assert (gst_is_gl_memory ((GstMemory *) gl_mem)); data.nvenc = nvenc; data.buffer = frame->input_buffer; data.info = &vframe.info; data.resource = resource; gst_gl_context_thread_add (gl_mem->mem.context, (GstGLContextThreadFunc) _map_gl_input_buffer, &data); if (!data.ret) { flow = GST_FLOW_ERROR; goto unmap_and_drop; } } else #endif if (!gst_nv_base_enc_upload_frame (nvenc, &vframe, resource)) { flow = GST_FLOW_ERROR; goto unmap_and_drop; } resource->nv_mapped_resource.version = gst_nvenc_get_map_input_resource_version (); resource->nv_mapped_resource.registeredResource = resource->nv_resource.registeredResource; if (!gst_cuda_context_push (nvenc->cuda_ctx)) { GST_ELEMENT_ERROR (nvenc, LIBRARY, ENCODE, (NULL), ("Failed to push current context")); flow = GST_FLOW_ERROR; goto unmap_and_drop; } nv_ret = NvEncMapInputResource (nvenc->encoder, &resource->nv_mapped_resource); gst_cuda_context_pop (NULL); if (nv_ret != NV_ENC_SUCCESS) { GST_ERROR_OBJECT (nvenc, "Failed to map input resource %p, ret %d", resource, nv_ret); flow = GST_FLOW_ERROR; goto unmap_and_drop; } resource->mapped = TRUE; flow = _submit_input_buffer (nvenc, frame, &vframe, state, resource->nv_mapped_resource.mappedResource, resource->nv_mapped_resource.mappedBufferFmt); if (flow != GST_FLOW_OK) { GST_DEBUG_OBJECT (nvenc, "return state to pool"); g_async_queue_push (nvenc->available_queue, state); goto unmap_and_drop; } flow = g_atomic_int_get (&nvenc->last_flow); gst_video_frame_unmap (&vframe); /* encoder will keep frame in list internally, we'll look it up again later * in the thread where we get the output buffers and finish it there */ gst_video_codec_frame_unref (frame); return flow; /* ERRORS */ unmap_and_drop: { gst_video_frame_unmap (&vframe); goto drop; } drop: { gst_video_encoder_finish_frame (enc, frame); return flow; } } static gboolean gst_nv_base_enc_drain_encoder (GstNvBaseEnc * nvenc) { NV_ENC_PIC_PARAMS pic_params = { 0, }; NVENCSTATUS nv_ret; gboolean ret = TRUE; GST_INFO_OBJECT (nvenc, "draining encoder"); if (nvenc->input_state == NULL) { GST_DEBUG_OBJECT (nvenc, "no input state, nothing to do"); return TRUE; } pic_params.version = gst_nvenc_get_pic_params_version (); pic_params.encodePicFlags = NV_ENC_PIC_FLAG_EOS; if (!gst_cuda_context_push (nvenc->cuda_ctx)) { GST_ERROR_OBJECT (nvenc, "Could not push context"); return GST_FLOW_ERROR; } nv_ret = NvEncEncodePicture (nvenc->encoder, &pic_params); if (nv_ret != NV_ENC_SUCCESS) { GST_LOG_OBJECT (nvenc, "Failed to drain encoder, ret %d", nv_ret); ret = FALSE; } else { GstNvEncFrameState *pending_state; g_async_queue_lock (nvenc->pending_queue); while ((pending_state = g_async_queue_try_pop_unlocked (nvenc->pending_queue))) { g_async_queue_push (nvenc->bitstream_queue, pending_state); } g_async_queue_unlock (nvenc->pending_queue); } gst_cuda_context_pop (NULL); return ret; } static GstFlowReturn gst_nv_base_enc_finish (GstVideoEncoder * enc) { GstNvBaseEnc *nvenc = GST_NV_BASE_ENC (enc); gst_nv_base_enc_stop_bitstream_thread (nvenc, FALSE); return GST_FLOW_OK; } #if 0 static gboolean gst_nv_base_enc_flush (GstVideoEncoder * enc) { GstNvBaseEnc *nvenc = GST_NV_BASE_ENC (enc); GST_INFO_OBJECT (nvenc, "done flushing encoder"); return TRUE; } #endif void gst_nv_base_enc_schedule_reconfig (GstNvBaseEnc * nvenc) { g_atomic_int_set (&nvenc->reconfig, TRUE); } static void gst_nv_base_enc_set_property (GObject * object, guint prop_id, const GValue * value, GParamSpec * pspec) { GstNvBaseEnc *nvenc = GST_NV_BASE_ENC (object); GstNvBaseEncClass *klass = GST_NV_BASE_ENC_GET_CLASS (nvenc); gboolean reconfig = TRUE; switch (prop_id) { case PROP_PRESET: nvenc->preset_enum = g_value_get_enum (value); nvenc->selected_preset = _nv_preset_to_guid (nvenc->preset_enum); gst_nv_base_enc_schedule_reconfig (nvenc); break; case PROP_RC_MODE: { GstNvRCMode rc_mode = g_value_get_enum (value); NV_ENC_PARAMS_RC_MODE nv_rc_mode = _rc_mode_to_nv (rc_mode); if ((klass->device_caps.rc_modes & nv_rc_mode) == nv_rc_mode) { nvenc->rate_control_mode = rc_mode; } else { GST_WARNING_OBJECT (nvenc, "device does not support requested rate control mode %d", rc_mode); reconfig = FALSE; } break; } case PROP_QP_MIN: nvenc->qp_min = g_value_get_int (value); break; case PROP_QP_MAX: nvenc->qp_max = g_value_get_int (value); break; case PROP_QP_CONST: nvenc->qp_const = g_value_get_int (value); break; case PROP_BITRATE: nvenc->bitrate = g_value_get_uint (value); break; case PROP_GOP_SIZE: nvenc->gop_size = g_value_get_int (value); break; case PROP_MAX_BITRATE: nvenc->max_bitrate = g_value_get_uint (value); break; case PROP_SPATIAL_AQ: nvenc->spatial_aq = g_value_get_boolean (value); break; case PROP_AQ_STRENGTH: nvenc->aq_strength = g_value_get_uint (value); break; case PROP_NON_REF_P: nvenc->non_refp = g_value_get_boolean (value); break; case PROP_ZEROLATENCY: nvenc->zerolatency = g_value_get_boolean (value); break; case PROP_STRICT_GOP: nvenc->strict_gop = g_value_get_boolean (value); break; case PROP_CONST_QUALITY: nvenc->const_quality = g_value_get_double (value); break; case PROP_I_ADAPT: nvenc->i_adapt = g_value_get_boolean (value); break; case PROP_QP_MIN_I: nvenc->qp_min_detail.qp_i = g_value_get_int (value); break; case PROP_QP_MIN_P: nvenc->qp_min_detail.qp_p = g_value_get_int (value); break; case PROP_QP_MIN_B: nvenc->qp_min_detail.qp_b = g_value_get_int (value); break; case PROP_QP_MAX_I: nvenc->qp_max_detail.qp_i = g_value_get_int (value); break; case PROP_QP_MAX_P: nvenc->qp_max_detail.qp_p = g_value_get_int (value); break; case PROP_QP_MAX_B: nvenc->qp_max_detail.qp_b = g_value_get_int (value); break; case PROP_QP_CONST_I: nvenc->qp_const_detail.qp_i = g_value_get_int (value); break; case PROP_QP_CONST_P: nvenc->qp_const_detail.qp_p = g_value_get_int (value); break; case PROP_QP_CONST_B: nvenc->qp_const_detail.qp_b = g_value_get_int (value); break; default: G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); reconfig = FALSE; break; } if (reconfig) gst_nv_base_enc_schedule_reconfig (nvenc); } static void gst_nv_base_enc_get_property (GObject * object, guint prop_id, GValue * value, GParamSpec * pspec) { GstNvBaseEnc *nvenc = GST_NV_BASE_ENC (object); GstNvBaseEncClass *nvenc_class = GST_NV_BASE_ENC_GET_CLASS (object); switch (prop_id) { case PROP_DEVICE_ID: g_value_set_uint (value, nvenc_class->cuda_device_id); break; case PROP_PRESET: g_value_set_enum (value, nvenc->preset_enum); break; case PROP_RC_MODE: g_value_set_enum (value, nvenc->rate_control_mode); break; case PROP_QP_MIN: g_value_set_int (value, nvenc->qp_min); break; case PROP_QP_MAX: g_value_set_int (value, nvenc->qp_max); break; case PROP_QP_CONST: g_value_set_int (value, nvenc->qp_const); break; case PROP_BITRATE: g_value_set_uint (value, nvenc->bitrate); break; case PROP_GOP_SIZE: g_value_set_int (value, nvenc->gop_size); break; case PROP_MAX_BITRATE: g_value_set_uint (value, nvenc->max_bitrate); break; case PROP_SPATIAL_AQ: g_value_set_boolean (value, nvenc->spatial_aq); break; case PROP_AQ_STRENGTH: g_value_set_uint (value, nvenc->aq_strength); break; case PROP_NON_REF_P: g_value_set_boolean (value, nvenc->non_refp); break; case PROP_ZEROLATENCY: g_value_set_boolean (value, nvenc->zerolatency); break; case PROP_STRICT_GOP: g_value_set_boolean (value, nvenc->strict_gop); break; case PROP_CONST_QUALITY: g_value_set_double (value, nvenc->const_quality); break; case PROP_I_ADAPT: g_value_set_boolean (value, nvenc->i_adapt); break; case PROP_QP_MIN_I: g_value_set_int (value, nvenc->qp_min_detail.qp_i); break; case PROP_QP_MIN_P: g_value_set_int (value, nvenc->qp_min_detail.qp_p); break; case PROP_QP_MIN_B: g_value_set_int (value, nvenc->qp_min_detail.qp_b); break; case PROP_QP_MAX_I: g_value_set_int (value, nvenc->qp_max_detail.qp_i); break; case PROP_QP_MAX_P: g_value_set_int (value, nvenc->qp_max_detail.qp_p); break; case PROP_QP_MAX_B: g_value_set_int (value, nvenc->qp_max_detail.qp_b); break; case PROP_QP_CONST_I: g_value_set_int (value, nvenc->qp_const_detail.qp_i); break; case PROP_QP_CONST_P: g_value_set_int (value, nvenc->qp_const_detail.qp_p); break; case PROP_QP_CONST_B: g_value_set_int (value, nvenc->qp_const_detail.qp_b); break; default: G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); break; } } typedef struct { guint cuda_device_id; GstNvEncDeviceCaps device_caps; } GstNvEncClassData; static void gst_nv_base_enc_subclass_init (gpointer g_class, gpointer data) { GstNvBaseEncClass *nvbaseenc_class = GST_NV_BASE_ENC_CLASS (g_class); GstNvEncClassData *cdata = (GstNvEncClassData *) data; nvbaseenc_class->cuda_device_id = cdata->cuda_device_id; nvbaseenc_class->device_caps = cdata->device_caps; g_free (cdata); } GType gst_nv_base_enc_register (const char *codec, guint device_id, GstNvEncDeviceCaps * device_caps) { GTypeQuery type_query; GTypeInfo type_info = { 0, }; GType subtype; gchar *type_name; GstNvEncClassData *cdata; type_name = g_strdup_printf ("GstNvDevice%d%sEnc", device_id, codec); subtype = g_type_from_name (type_name); /* has already registered nvdeviceenc class */ if (subtype) goto done; cdata = g_new0 (GstNvEncClassData, 1); cdata->cuda_device_id = device_id; cdata->device_caps = *device_caps; g_type_query (GST_TYPE_NV_BASE_ENC, &type_query); memset (&type_info, 0, sizeof (type_info)); type_info.class_size = type_query.class_size; type_info.instance_size = type_query.instance_size; type_info.class_init = (GClassInitFunc) gst_nv_base_enc_subclass_init; type_info.class_data = cdata; subtype = g_type_register_static (GST_TYPE_NV_BASE_ENC, type_name, &type_info, 0); done: g_free (type_name); return subtype; }