cudaconverter: Add support for kernel precompile and cache

Port to precompile/cache approach

Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/8536>
This commit is contained in:
Seungha Yang 2025-02-21 18:40:21 +09:00 committed by GStreamer Marge Bot
parent 9a8f3a65a3
commit 697cfe38ef
4 changed files with 364 additions and 1570 deletions

View file

@ -0,0 +1,39 @@
/* GStreamer
* Copyright (C) 2025 Seungha Yang <seungha@centricular.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
#pragma once
#include <gst/gst.h>
#include <gst/cuda/cuda-prelude.h>
G_BEGIN_DECLS
GST_CUDA_API
gchar * gst_cuda_nvrtc_compile_with_option (const gchar * source,
const gchar ** options,
guint num_options);
GST_CUDA_API
gchar * gst_cuda_nvrtc_compile_cubin_with_option (const gchar * source,
gint device,
const gchar ** options,
guint num_options);
G_END_DECLS

View file

@ -27,7 +27,9 @@
#include <nvrtc.h>
#include <gmodule.h>
#include "gstcuda-private.h"
#include "gstcudanvrtc-private.h"
#include <string>
#include <vector>
GST_DEBUG_CATEGORY_STATIC (gst_cuda_nvrtc_debug);
#define GST_CAT_DEFAULT gst_cuda_nvrtc_debug
@ -285,22 +287,17 @@ NvrtcGetCUBIN (nvrtcProgram prog, char *cubin)
}
/* *INDENT-ON* */
/**
* gst_cuda_nvrtc_compile:
* @source: Source code to compile
*
* Since: 1.22
*/
gchar *
gst_cuda_nvrtc_compile (const gchar * source)
gst_cuda_nvrtc_compile_with_option (const gchar * source,
const gchar ** options, guint num_options)
{
nvrtcProgram prog;
nvrtcResult ret;
CUresult curet;
const gchar *opts[] = { "--gpu-architecture=compute_30" };
gsize ptx_size;
gchar *ptx = nullptr;
int driverVersion;
std::vector < const gchar *>opts;
g_return_val_if_fail (source != nullptr, nullptr);
@ -327,9 +324,11 @@ gst_cuda_nvrtc_compile (const gchar * source)
/* Starting from CUDA 11, the lowest supported architecture is 5.2 */
if (driverVersion >= 11000)
opts[0] = "--gpu-architecture=compute_52";
opts.push_back ("--gpu-architecture=compute_52");
else
opts.push_back ("--gpu-architecture=compute_30");
ret = NvrtcCompileProgram (prog, 1, opts);
ret = NvrtcCompileProgram (prog, opts.size (), opts.data ());
if (ret != NVRTC_SUCCESS) {
gsize log_size;
@ -374,17 +373,20 @@ error:
}
/**
* gst_cuda_nvrtc_compile_cubin:
* gst_cuda_nvrtc_compile:
* @source: Source code to compile
* @device: CUDA device
*
* Returns: (transfer full): Compiled CUDA assembly code if successful,
* otherwise %NULL
*
* Since: 1.24
* Since: 1.22
*/
gchar *
gst_cuda_nvrtc_compile_cubin (const gchar * source, gint device)
gst_cuda_nvrtc_compile (const gchar * source)
{
return gst_cuda_nvrtc_compile_with_option (source, nullptr, 0);
}
gchar *
gst_cuda_nvrtc_compile_cubin_with_option (const gchar * source, gint device,
const gchar ** options, guint num_options)
{
nvrtcProgram prog;
nvrtcResult ret;
@ -392,6 +394,7 @@ gst_cuda_nvrtc_compile_cubin (const gchar * source, gint device)
gsize cubin_size;
gchar *cubin = nullptr;
gint major, minor;
std::vector < const gchar *>opts;
g_return_val_if_fail (source != nullptr, nullptr);
@ -422,15 +425,18 @@ gst_cuda_nvrtc_compile_cubin (const gchar * source, gint device)
std::string opt_str = "--gpu-architecture=sm_" +
std::to_string (major) + std::to_string (minor);
opts.push_back (opt_str.c_str ());
for (guint i = 0; i < num_options; i++) {
opts.push_back (options[i]);
}
ret = NvrtcCreateProgram (&prog, source, nullptr, 0, nullptr, nullptr);
if (ret != NVRTC_SUCCESS) {
GST_ERROR ("couldn't create nvrtc program, ret %d", ret);
return nullptr;
}
const char *opts[1] = { opt_str.c_str () };
ret = NvrtcCompileProgram (prog, 1, opts);
ret = NvrtcCompileProgram (prog, opts.size (), opts.data ());
if (ret != NVRTC_SUCCESS) {
gsize log_size;
@ -469,3 +475,19 @@ error:
return nullptr;
}
/**
* gst_cuda_nvrtc_compile_cubin:
* @source: Source code to compile
* @device: CUDA device
*
* Returns: (transfer full): Compiled CUDA assembly code if successful,
* otherwise %NULL
*
* Since: 1.24
*/
gchar *
gst_cuda_nvrtc_compile_cubin (const gchar * source, gint device)
{
return gst_cuda_nvrtc_compile_cubin_with_option (source, device, nullptr, 0);
}

File diff suppressed because it is too large Load diff

View file

@ -81,6 +81,7 @@ if not nvcodec_precompile_opt.disabled() and not meson.is_cross_build()
nvcc = find_program ('nvcc', required : nvcodec_precompile_opt)
if nvcc.found()
subdir('kernel')
extra_args += ['-DNVCODEC_CUDA_PRECOMPILED']
endif
endif