cudaconverter: Add support for kernel precompile and cache

Port to precompile/cache approach Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/8536>
2025-02-26 16:06:52 +00:00 · 2025-02-21 18:40:21 +09:00 · 2025-02-21 18:40:21 +09:00 · 697cfe38ef
commit 697cfe38ef
parent 9a8f3a65a3
4 changed files with 364 additions and 1570 deletions
--- a/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcudanvrtc-private.h
+++ b/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcudanvrtc-private.h
@ -0,0 +1,39 @@
+/* GStreamer
+ * Copyright (C) 2025 Seungha Yang <seungha@centricular.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#pragma once
+
+#include <gst/gst.h>
+#include <gst/cuda/cuda-prelude.h>
+
+G_BEGIN_DECLS
+
+GST_CUDA_API
+gchar *   gst_cuda_nvrtc_compile_with_option (const gchar * source,
+                                              const gchar ** options,
+                                              guint num_options);
+
+GST_CUDA_API
+gchar *   gst_cuda_nvrtc_compile_cubin_with_option (const gchar * source,
+                                                    gint device,
+                                                    const gchar ** options,
+                                                    guint num_options);
+
+G_END_DECLS
+
--- a/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcudanvrtc.cpp
+++ b/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcudanvrtc.cpp
@ -27,7 +27,9 @@
 #include <nvrtc.h>
 #include <gmodule.h>
 #include "gstcuda-private.h"
+#include "gstcudanvrtc-private.h"
 #include <string>
+#include <vector>

 GST_DEBUG_CATEGORY_STATIC (gst_cuda_nvrtc_debug);
 #define GST_CAT_DEFAULT gst_cuda_nvrtc_debug
@ -285,22 +287,17 @@ NvrtcGetCUBIN (nvrtcProgram prog, char *cubin)
 }
 /* *INDENT-ON* */

-/**
- * gst_cuda_nvrtc_compile:
- * @source: Source code to compile
- *
- * Since: 1.22
- */
 gchar *
-gst_cuda_nvrtc_compile (const gchar * source)
+gst_cuda_nvrtc_compile_with_option (const gchar * source,
+    const gchar ** options, guint num_options)
 {
  nvrtcProgram prog;
  nvrtcResult ret;
  CUresult curet;
-  const gchar *opts[] = { "--gpu-architecture=compute_30" };
  gsize ptx_size;
  gchar *ptx = nullptr;
  int driverVersion;
+  std::vector < const gchar *>opts;

  g_return_val_if_fail (source != nullptr, nullptr);

@ -327,9 +324,11 @@ gst_cuda_nvrtc_compile (const gchar * source)

  /* Starting from CUDA 11, the lowest supported architecture is 5.2 */
  if (driverVersion >= 11000)
-    opts[0] = "--gpu-architecture=compute_52";
+    opts.push_back ("--gpu-architecture=compute_52");
+  else
+    opts.push_back ("--gpu-architecture=compute_30");

-  ret = NvrtcCompileProgram (prog, 1, opts);
+  ret = NvrtcCompileProgram (prog, opts.size (), opts.data ());
  if (ret != NVRTC_SUCCESS) {
    gsize log_size;

@ -374,17 +373,20 @@ error:
 }

 /**
- * gst_cuda_nvrtc_compile_cubin:
+ * gst_cuda_nvrtc_compile:
 * @source: Source code to compile
- * @device: CUDA device
 *
- * Returns: (transfer full): Compiled CUDA assembly code if successful,
- * otherwise %NULL
- *
- * Since: 1.24
+ * Since: 1.22
 */
 gchar *
-gst_cuda_nvrtc_compile_cubin (const gchar * source, gint device)
+gst_cuda_nvrtc_compile (const gchar * source)
+{
+  return gst_cuda_nvrtc_compile_with_option (source, nullptr, 0);
+}
+
+gchar *
+gst_cuda_nvrtc_compile_cubin_with_option (const gchar * source, gint device,
+    const gchar ** options, guint num_options)
 {
  nvrtcProgram prog;
  nvrtcResult ret;
@ -392,6 +394,7 @@ gst_cuda_nvrtc_compile_cubin (const gchar * source, gint device)
  gsize cubin_size;
  gchar *cubin = nullptr;
  gint major, minor;
+  std::vector < const gchar *>opts;

  g_return_val_if_fail (source != nullptr, nullptr);

@ -422,15 +425,18 @@ gst_cuda_nvrtc_compile_cubin (const gchar * source, gint device)
  std::string opt_str = "--gpu-architecture=sm_" +
      std::to_string (major) + std::to_string (minor);

+  opts.push_back (opt_str.c_str ());
+  for (guint i = 0; i < num_options; i++) {
+    opts.push_back (options[i]);
+  }
+
  ret = NvrtcCreateProgram (&prog, source, nullptr, 0, nullptr, nullptr);
  if (ret != NVRTC_SUCCESS) {
    GST_ERROR ("couldn't create nvrtc program, ret %d", ret);
    return nullptr;
  }

-  const char *opts[1] = { opt_str.c_str () };
-
-  ret = NvrtcCompileProgram (prog, 1, opts);
+  ret = NvrtcCompileProgram (prog, opts.size (), opts.data ());
  if (ret != NVRTC_SUCCESS) {
    gsize log_size;

@ -469,3 +475,19 @@ error:

  return nullptr;
 }
+
+/**
+ * gst_cuda_nvrtc_compile_cubin:
+ * @source: Source code to compile
+ * @device: CUDA device
+ *
+ * Returns: (transfer full): Compiled CUDA assembly code if successful,
+ * otherwise %NULL
+ *
+ * Since: 1.24
+ */
+gchar *
+gst_cuda_nvrtc_compile_cubin (const gchar * source, gint device)
+{
+  return gst_cuda_nvrtc_compile_cubin_with_option (source, device, nullptr, 0);
+}
--- a/subprojects/gst-plugins-bad/sys/nvcodec/gstcudaconverter.cpp
+++ b/subprojects/gst-plugins-bad/sys/nvcodec/gstcudaconverter.cpp
--- a/subprojects/gst-plugins-bad/sys/nvcodec/meson.build
+++ b/subprojects/gst-plugins-bad/sys/nvcodec/meson.build
@ -81,6 +81,7 @@ if not nvcodec_precompile_opt.disabled() and not meson.is_cross_build()
  nvcc = find_program ('nvcc', required : nvcodec_precompile_opt)
  if nvcc.found()
    subdir('kernel')
+    extra_args += ['-DNVCODEC_CUDA_PRECOMPILED']
  endif
 endif