cudaconverter: Add support for kernel precompile and cache

Port to precompile/cache approach Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/8536>
2025-02-26 16:06:52 +00:00 · 2025-02-21 18:40:21 +09:00 · 2025-02-21 18:40:21 +09:00 · 697cfe38ef
commit 697cfe38ef
parent 9a8f3a65a3
4 changed files with 364 additions and 1570 deletions
--- a/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcudanvrtc-private.h
+++ b/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcudanvrtc-private.h
@ -0,0 +1,39 @@
 /* GStreamer
 * Copyright (C) 2025 Seungha Yang <seungha@centricular.com>
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Library General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Library General Public License for more details.
 *
 * You should have received a copy of the GNU Library General Public
 * License along with this library; if not, write to the
 * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
 * Boston, MA 02110-1301, USA.
 */
 #pragma once
 #include <gst/gst.h>
 #include <gst/cuda/cuda-prelude.h>
 G_BEGIN_DECLS
 GST_CUDA_API
 gchar *   gst_cuda_nvrtc_compile_with_option (const gchar * source,
                                              const gchar ** options,
                                              guint num_options);
 GST_CUDA_API
 gchar *   gst_cuda_nvrtc_compile_cubin_with_option (const gchar * source,
                                                    gint device,
                                                    const gchar ** options,
                                                    guint num_options);
 G_END_DECLS
--- a/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcudanvrtc.cpp
+++ b/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcudanvrtc.cpp
@ -27,7 +27,9 @@
 #include <nvrtc.h>
 #include <gmodule.h>
 #include "gstcuda-private.h"
 #include "gstcudanvrtc-private.h"
 #include <string>
 #include <vector>
 GST_DEBUG_CATEGORY_STATIC (gst_cuda_nvrtc_debug);
 #define GST_CAT_DEFAULT gst_cuda_nvrtc_debug
@ -285,22 +287,17 @@ NvrtcGetCUBIN (nvrtcProgram prog, char *cubin)
 }
 /* *INDENT-ON* */
 /**
 * gst_cuda_nvrtc_compile:
 * @source: Source code to compile
 *
 * Since: 1.22
 */
 gchar *
-gst_cuda_nvrtc_compile (const gchar * source)
+gst_cuda_nvrtc_compile_with_option (const gchar * source,
    const gchar ** options, guint num_options)
 {
  nvrtcProgram prog;
  nvrtcResult ret;
  CUresult curet;
  const gchar *opts[] = { "--gpu-architecture=compute_30" };
  gsize ptx_size;
  gchar *ptx = nullptr;
  int driverVersion;
  std::vector < const gchar *>opts;
  g_return_val_if_fail (source != nullptr, nullptr);
@ -327,9 +324,11 @@ gst_cuda_nvrtc_compile (const gchar * source)
  /* Starting from CUDA 11, the lowest supported architecture is 5.2 */
  if (driverVersion >= 11000)
-    opts[0] = "--gpu-architecture=compute_52";
+    opts.push_back ("--gpu-architecture=compute_52");
  else
    opts.push_back ("--gpu-architecture=compute_30");
-  ret = NvrtcCompileProgram (prog, 1, opts);
+  ret = NvrtcCompileProgram (prog, opts.size (), opts.data ());
  if (ret != NVRTC_SUCCESS) {
    gsize log_size;
@ -374,17 +373,20 @@ error:
 }
 /**
- * gst_cuda_nvrtc_compile_cubin:
+ * gst_cuda_nvrtc_compile:
 * @source: Source code to compile
 * @device: CUDA device
 *
- * Returns: (transfer full): Compiled CUDA assembly code if successful,
+ * Since: 1.22
 * otherwise %NULL
 *
 * Since: 1.24
 */
 gchar *
-gst_cuda_nvrtc_compile_cubin (const gchar * source, gint device)
+gst_cuda_nvrtc_compile (const gchar * source)
 {
  return gst_cuda_nvrtc_compile_with_option (source, nullptr, 0);
 }
 gchar *
 gst_cuda_nvrtc_compile_cubin_with_option (const gchar * source, gint device,
    const gchar ** options, guint num_options)
 {
  nvrtcProgram prog;
  nvrtcResult ret;
@ -392,6 +394,7 @@ gst_cuda_nvrtc_compile_cubin (const gchar * source, gint device)
  gsize cubin_size;
  gchar *cubin = nullptr;
  gint major, minor;
  std::vector < const gchar *>opts;
  g_return_val_if_fail (source != nullptr, nullptr);
@ -422,15 +425,18 @@ gst_cuda_nvrtc_compile_cubin (const gchar * source, gint device)
  std::string opt_str = "--gpu-architecture=sm_" +
      std::to_string (major) + std::to_string (minor);
  opts.push_back (opt_str.c_str ());
  for (guint i = 0; i < num_options; i++) {
    opts.push_back (options[i]);
  }
  ret = NvrtcCreateProgram (&prog, source, nullptr, 0, nullptr, nullptr);
  if (ret != NVRTC_SUCCESS) {
    GST_ERROR ("couldn't create nvrtc program, ret %d", ret);
    return nullptr;
  }
-  const char *opts[1] = { opt_str.c_str () };
+  ret = NvrtcCompileProgram (prog, opts.size (), opts.data ());
  ret = NvrtcCompileProgram (prog, 1, opts);
  if (ret != NVRTC_SUCCESS) {
    gsize log_size;
@ -469,3 +475,19 @@ error:
  return nullptr;
 }
 /**
 * gst_cuda_nvrtc_compile_cubin:
 * @source: Source code to compile
 * @device: CUDA device
 *
 * Returns: (transfer full): Compiled CUDA assembly code if successful,
 * otherwise %NULL
 *
 * Since: 1.24
 */
 gchar *
 gst_cuda_nvrtc_compile_cubin (const gchar * source, gint device)
 {
  return gst_cuda_nvrtc_compile_cubin_with_option (source, device, nullptr, 0);
 }
--- a/subprojects/gst-plugins-bad/sys/nvcodec/gstcudaconverter.cpp
+++ b/subprojects/gst-plugins-bad/sys/nvcodec/gstcudaconverter.cpp
--- a/subprojects/gst-plugins-bad/sys/nvcodec/meson.build
+++ b/subprojects/gst-plugins-bad/sys/nvcodec/meson.build
@ -81,6 +81,7 @@ if not nvcodec_precompile_opt.disabled() and not meson.is_cross_build()
  nvcc = find_program ('nvcc', required : nvcodec_precompile_opt)
  if nvcc.found()
    subdir('kernel')
    extra_args += ['-DNVCODEC_CUDA_PRECOMPILED']
  endif
 endif