nvdecoder: Enable zero-copy only if explicitly enabled

Keep pre-1.24 behavior unless user specifies the number of output surface size. We are calculating output surface size conservatively, and it can result in over allocation. Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/6102>
2025-01-03 14:08:56 +00:00 · 2024-02-12 22:16:23 +09:00 · 2024-02-12 22:16:23 +09:00 · cfa8386dd0
commit cfa8386dd0
parent 59358e439d
7 changed files with 43 additions and 39 deletions
--- a/subprojects/gst-plugins-bad/docs/plugins/gst_plugins_cache.json
+++ b/subprojects/gst-plugins-bad/docs/plugins/gst_plugins_cache.json
@ -225402,12 +225402,12 @@
                        "writable": true
                    },
                    "num-output-surfaces": {
-                        "blurb": "Maximum number of output surfaces simultaneously mapped in CUDA output mode (0 = auto)",
+                        "blurb": "Maximum number of output surfaces simultaneously mapped in CUDA output mode (0 = auto, 1 = always copy)",
                        "conditionally-available": false,
                        "construct": false,
                        "construct-only": false,
                        "controllable": false,
-                        "default": "0",
+                        "default": "1",
                        "max": "64",
                        "min": "0",
                        "mutable": "ready",
@ -227333,12 +227333,12 @@
                        "writable": true
                    },
                    "num-output-surfaces": {
-                        "blurb": "Maximum number of output surfaces simultaneously mapped in CUDA output mode (0 = auto)",
+                        "blurb": "Maximum number of output surfaces simultaneously mapped in CUDA output mode (0 = auto, 1 = always copy)",
                        "conditionally-available": false,
                        "construct": false,
                        "construct-only": false,
                        "controllable": false,
-                        "default": "0",
+                        "default": "1",
                        "max": "64",
                        "min": "0",
                        "mutable": "ready",
@ -227555,12 +227555,12 @@
                        "writable": true
                    },
                    "num-output-surfaces": {
-                        "blurb": "Maximum number of output surfaces simultaneously mapped in CUDA output mode (0 = auto)",
+                        "blurb": "Maximum number of output surfaces simultaneously mapped in CUDA output mode (0 = auto, 1 = always copy)",
                        "conditionally-available": false,
                        "construct": false,
                        "construct-only": false,
                        "controllable": false,
-                        "default": "0",
+                        "default": "1",
                        "max": "64",
                        "min": "0",
                        "mutable": "ready",
@ -227944,12 +227944,12 @@
                        "writable": true
                    },
                    "num-output-surfaces": {
-                        "blurb": "Maximum number of output surfaces simultaneously mapped in CUDA output mode (0 = auto)",
+                        "blurb": "Maximum number of output surfaces simultaneously mapped in CUDA output mode (0 = auto, 1 = always copy)",
                        "conditionally-available": false,
                        "construct": false,
                        "construct-only": false,
                        "controllable": false,
-                        "default": "0",
+                        "default": "1",
                        "max": "64",
                        "min": "0",
                        "mutable": "ready",
@ -228043,12 +228043,12 @@
                        "writable": true
                    },
                    "num-output-surfaces": {
-                        "blurb": "Maximum number of output surfaces simultaneously mapped in CUDA output mode (0 = auto)",
+                        "blurb": "Maximum number of output surfaces simultaneously mapped in CUDA output mode (0 = auto, 1 = always copy)",
                        "conditionally-available": false,
                        "construct": false,
                        "construct-only": false,
                        "controllable": false,
-                        "default": "0",
+                        "default": "1",
                        "max": "64",
                        "min": "0",
                        "mutable": "ready",
--- a/subprojects/gst-plugins-bad/sys/nvcodec/gstnvav1dec.cpp
+++ b/subprojects/gst-plugins-bad/sys/nvcodec/gstnvav1dec.cpp
@ -94,7 +94,7 @@ enum
  PROP_MAX_DISPLAY_DELAY,
 };
-#define DEFAULT_NUM_OUTPUT_SURFACES 0
+#define DEFAULT_NUM_OUTPUT_SURFACES 1
 #define DEFAULT_MAX_DISPLAY_DELAY -1
 static GTypeClass *parent_class = nullptr;
@ -162,16 +162,17 @@ gst_nv_av1_dec_class_init (GstNvAV1DecClass * klass,
  /**
   * GstNvAV1Dec:num-output-surfaces:
   *
-   * The number of output surfaces (0 = auto). This property will be used to
+   * The number of output surfaces (0 = auto, 1 = always copy).
-   * calculate the CUVIDDECODECREATEINFO.ulNumOutputSurfaces parameter
+   * This property will be used to calculate
-   * in case of CUDA output mode
+   * the CUVIDDECODECREATEINFO.ulNumOutputSurfaces parameter in case of
   * CUDA output mode.
   *
   * Since: 1.24
   */
  g_object_class_install_property (object_class, PROP_NUM_OUTPUT_SURFACES,
      g_param_spec_uint ("num-output-surfaces", "Num Output Surfaces",
          "Maximum number of output surfaces simultaneously mapped in CUDA "
-          "output mode (0 = auto)",
+          "output mode (0 = auto, 1 = always copy)",
          0, 64, DEFAULT_NUM_OUTPUT_SURFACES,
          (GParamFlags) (GST_PARAM_MUTABLE_READY | G_PARAM_READWRITE |
              G_PARAM_STATIC_STRINGS)));
--- a/subprojects/gst-plugins-bad/sys/nvcodec/gstnvdecoder.cpp
+++ b/subprojects/gst-plugins-bad/sys/nvcodec/gstnvdecoder.cpp
@ -411,9 +411,8 @@ gst_nv_decoder_new_picture (GstNvDecoder * decoder, GstCodecPicture * picture)
  if (!decoder->object) {
    if (decoder->output_type == GST_NV_DECODER_OUTPUT_TYPE_CUDA) {
-      if (decoder->num_output_surfaces == 0 ||
+      if (decoder->num_output_surfaces == 0) {
-          decoder->num_output_surfaces < decoder->downstream_min_buffers) {
+        /* Auto mode */
        /* Auto mode or user specified num-output-surfaces value is too small */
        decoder->create_info.ulNumOutputSurfaces =
            decoder->downstream_min_buffers + 2;
      } else {
@ -877,7 +876,7 @@ gst_nv_decoder_output_picture (GstNvDecoder * decoder,
    goto error;
  }
-  if (videodec->input_segment.rate > 0 &&
+  if (decoder->num_output_surfaces != 1 && videodec->input_segment.rate > 0 &&
      decoder->output_type == GST_NV_DECODER_OUTPUT_TYPE_CUDA &&
      (guint) decoder->create_info.ulNumOutputSurfaces >=
      decoder->downstream_min_buffers) {
--- a/subprojects/gst-plugins-bad/sys/nvcodec/gstnvh264dec.cpp
+++ b/subprojects/gst-plugins-bad/sys/nvcodec/gstnvh264dec.cpp
@ -150,7 +150,7 @@ enum
  PROP_MAX_DISPLAY_DELAY,
 };
-#define DEFAULT_NUM_OUTPUT_SURFACES 0
+#define DEFAULT_NUM_OUTPUT_SURFACES 1
 #define DEFAULT_MAX_DISPLAY_DELAY -1
 static GTypeClass *parent_class = nullptr;
@ -229,16 +229,17 @@ gst_nv_h264_dec_class_init (GstNvH264DecClass * klass,
  /**
   * GstNvH264Dec:num-output-surfaces:
   *
-   * The number of output surfaces (0 = auto). This property will be used to
+   * The number of output surfaces (0 = auto, 1 = always copy).
-   * calculate the CUVIDDECODECREATEINFO.ulNumOutputSurfaces parameter
+   * This property will be used to calculate
-   * in case of CUDA output mode
+   * the CUVIDDECODECREATEINFO.ulNumOutputSurfaces parameter in case of
   * CUDA output mode.
   *
   * Since: 1.24
   */
  g_object_class_install_property (object_class, PROP_NUM_OUTPUT_SURFACES,
      g_param_spec_uint ("num-output-surfaces", "Num Output Surfaces",
          "Maximum number of output surfaces simultaneously mapped in CUDA "
-          "output mode (0 = auto)",
+          "output mode (0 = auto, 1 = always copy)",
          0, 64, DEFAULT_NUM_OUTPUT_SURFACES,
          (GParamFlags) (GST_PARAM_MUTABLE_READY | G_PARAM_READWRITE |
              G_PARAM_STATIC_STRINGS)));
--- a/subprojects/gst-plugins-bad/sys/nvcodec/gstnvh265dec.cpp
+++ b/subprojects/gst-plugins-bad/sys/nvcodec/gstnvh265dec.cpp
@ -149,7 +149,7 @@ enum
  PROP_MAX_DISPLAY_DELAY,
 };
-#define DEFAULT_NUM_OUTPUT_SURFACES 0
+#define DEFAULT_NUM_OUTPUT_SURFACES 1
 #define DEFAULT_MAX_DISPLAY_DELAY -1
 static GTypeClass *parent_class = nullptr;
@ -225,16 +225,17 @@ gst_nv_h265_dec_class_init (GstNvH265DecClass * klass,
  /**
   * GstNvH265Dec:num-output-surfaces:
   *
-   * The number of output surfaces (0 = auto). This property will be used to
+   * The number of output surfaces (0 = auto, 1 = always copy).
-   * calculate the CUVIDDECODECREATEINFO.ulNumOutputSurfaces parameter
+   * This property will be used to calculate
-   * in case of CUDA output mode
+   * the CUVIDDECODECREATEINFO.ulNumOutputSurfaces parameter in case of
   * CUDA output mode.
   *
   * Since: 1.24
   */
  g_object_class_install_property (object_class, PROP_NUM_OUTPUT_SURFACES,
      g_param_spec_uint ("num-output-surfaces", "Num Output Surfaces",
          "Maximum number of output surfaces simultaneously mapped in CUDA "
-          "output mode (0 = auto)",
+          "output mode (0 = auto, 1 = always copy)",
          0, 64, DEFAULT_NUM_OUTPUT_SURFACES,
          (GParamFlags) (GST_PARAM_MUTABLE_READY | G_PARAM_READWRITE |
              G_PARAM_STATIC_STRINGS)));
--- a/subprojects/gst-plugins-bad/sys/nvcodec/gstnvvp8dec.cpp
+++ b/subprojects/gst-plugins-bad/sys/nvcodec/gstnvvp8dec.cpp
@ -78,7 +78,7 @@ enum
  PROP_MAX_DISPLAY_DELAY,
 };
-#define DEFAULT_NUM_OUTPUT_SURFACES 0
+#define DEFAULT_NUM_OUTPUT_SURFACES 1
 #define DEFAULT_MAX_DISPLAY_DELAY -1
 static GTypeClass *parent_class = nullptr;
@ -148,16 +148,17 @@ gst_nv_vp8_dec_class_init (GstNvVp8DecClass * klass,
  /**
   * GstNvVp8Dec:num-output-surfaces:
   *
-   * The number of output surfaces (0 = auto). This property will be used to
+   * The number of output surfaces (0 = auto, 1 = always copy).
-   * calculate the CUVIDDECODECREATEINFO.ulNumOutputSurfaces parameter
+   * This property will be used to calculate
-   * in case of CUDA output mode
+   * the CUVIDDECODECREATEINFO.ulNumOutputSurfaces parameter in case of
   * CUDA output mode.
   *
   * Since: 1.24
   */
  g_object_class_install_property (object_class, PROP_NUM_OUTPUT_SURFACES,
      g_param_spec_uint ("num-output-surfaces", "Num Output Surfaces",
          "Maximum number of output surfaces simultaneously mapped in CUDA "
-          "output mode (0 = auto)",
+          "output mode (0 = auto, 1 = always copy)",
          0, 64, DEFAULT_NUM_OUTPUT_SURFACES,
          (GParamFlags) (GST_PARAM_MUTABLE_READY | G_PARAM_READWRITE |
              G_PARAM_STATIC_STRINGS)));
--- a/subprojects/gst-plugins-bad/sys/nvcodec/gstnvvp9dec.cpp
+++ b/subprojects/gst-plugins-bad/sys/nvcodec/gstnvvp9dec.cpp
@ -79,7 +79,7 @@ enum
  PROP_MAX_DISPLAY_DELAY,
 };
-#define DEFAULT_NUM_OUTPUT_SURFACES 0
+#define DEFAULT_NUM_OUTPUT_SURFACES 1
 #define DEFAULT_MAX_DISPLAY_DELAY -1
 static GTypeClass *parent_class = nullptr;
@ -151,16 +151,17 @@ gst_nv_vp9_dec_class_init (GstNvVp9DecClass * klass,
  /**
   * GstNvVp9Dec:num-output-surfaces:
   *
-   * The number of output surfaces (0 = auto). This property will be used to
+   * The number of output surfaces (0 = auto, 1 = always copy).
-   * calculate the CUVIDDECODECREATEINFO.ulNumOutputSurfaces parameter
+   * This property will be used to calculate
-   * in case of CUDA output mode
+   * the CUVIDDECODECREATEINFO.ulNumOutputSurfaces parameter in case of
   * CUDA output mode.
   *
   * Since: 1.24
   */
  g_object_class_install_property (object_class, PROP_NUM_OUTPUT_SURFACES,
      g_param_spec_uint ("num-output-surfaces", "Num Output Surfaces",
          "Maximum number of output surfaces simultaneously mapped in CUDA "
-          "output mode (0 = auto)",
+          "output mode (0 = auto, 1 = always copy)",
          0, 64, DEFAULT_NUM_OUTPUT_SURFACES,
          (GParamFlags) (GST_PARAM_MUTABLE_READY | G_PARAM_READWRITE |
              G_PARAM_STATIC_STRINGS)));