mirror of
https://gitlab.freedesktop.org/gstreamer/gstreamer.git
synced 2025-02-26 16:06:52 +00:00
cudaconverter: Add support for kernel precompile and cache
Port to precompile/cache approach Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/8536>
This commit is contained in:
parent
9a8f3a65a3
commit
697cfe38ef
4 changed files with 364 additions and 1570 deletions
|
@ -0,0 +1,39 @@
|
|||
/* GStreamer
|
||||
* Copyright (C) 2025 Seungha Yang <seungha@centricular.com>
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Library General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Library General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Library General Public
|
||||
* License along with this library; if not, write to the
|
||||
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
|
||||
* Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <gst/gst.h>
|
||||
#include <gst/cuda/cuda-prelude.h>
|
||||
|
||||
G_BEGIN_DECLS
|
||||
|
||||
GST_CUDA_API
|
||||
gchar * gst_cuda_nvrtc_compile_with_option (const gchar * source,
|
||||
const gchar ** options,
|
||||
guint num_options);
|
||||
|
||||
GST_CUDA_API
|
||||
gchar * gst_cuda_nvrtc_compile_cubin_with_option (const gchar * source,
|
||||
gint device,
|
||||
const gchar ** options,
|
||||
guint num_options);
|
||||
|
||||
G_END_DECLS
|
||||
|
|
@ -27,7 +27,9 @@
|
|||
#include <nvrtc.h>
|
||||
#include <gmodule.h>
|
||||
#include "gstcuda-private.h"
|
||||
#include "gstcudanvrtc-private.h"
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
GST_DEBUG_CATEGORY_STATIC (gst_cuda_nvrtc_debug);
|
||||
#define GST_CAT_DEFAULT gst_cuda_nvrtc_debug
|
||||
|
@ -285,22 +287,17 @@ NvrtcGetCUBIN (nvrtcProgram prog, char *cubin)
|
|||
}
|
||||
/* *INDENT-ON* */
|
||||
|
||||
/**
|
||||
* gst_cuda_nvrtc_compile:
|
||||
* @source: Source code to compile
|
||||
*
|
||||
* Since: 1.22
|
||||
*/
|
||||
gchar *
|
||||
gst_cuda_nvrtc_compile (const gchar * source)
|
||||
gst_cuda_nvrtc_compile_with_option (const gchar * source,
|
||||
const gchar ** options, guint num_options)
|
||||
{
|
||||
nvrtcProgram prog;
|
||||
nvrtcResult ret;
|
||||
CUresult curet;
|
||||
const gchar *opts[] = { "--gpu-architecture=compute_30" };
|
||||
gsize ptx_size;
|
||||
gchar *ptx = nullptr;
|
||||
int driverVersion;
|
||||
std::vector < const gchar *>opts;
|
||||
|
||||
g_return_val_if_fail (source != nullptr, nullptr);
|
||||
|
||||
|
@ -327,9 +324,11 @@ gst_cuda_nvrtc_compile (const gchar * source)
|
|||
|
||||
/* Starting from CUDA 11, the lowest supported architecture is 5.2 */
|
||||
if (driverVersion >= 11000)
|
||||
opts[0] = "--gpu-architecture=compute_52";
|
||||
opts.push_back ("--gpu-architecture=compute_52");
|
||||
else
|
||||
opts.push_back ("--gpu-architecture=compute_30");
|
||||
|
||||
ret = NvrtcCompileProgram (prog, 1, opts);
|
||||
ret = NvrtcCompileProgram (prog, opts.size (), opts.data ());
|
||||
if (ret != NVRTC_SUCCESS) {
|
||||
gsize log_size;
|
||||
|
||||
|
@ -374,17 +373,20 @@ error:
|
|||
}
|
||||
|
||||
/**
|
||||
* gst_cuda_nvrtc_compile_cubin:
|
||||
* gst_cuda_nvrtc_compile:
|
||||
* @source: Source code to compile
|
||||
* @device: CUDA device
|
||||
*
|
||||
* Returns: (transfer full): Compiled CUDA assembly code if successful,
|
||||
* otherwise %NULL
|
||||
*
|
||||
* Since: 1.24
|
||||
* Since: 1.22
|
||||
*/
|
||||
gchar *
|
||||
gst_cuda_nvrtc_compile_cubin (const gchar * source, gint device)
|
||||
gst_cuda_nvrtc_compile (const gchar * source)
|
||||
{
|
||||
return gst_cuda_nvrtc_compile_with_option (source, nullptr, 0);
|
||||
}
|
||||
|
||||
gchar *
|
||||
gst_cuda_nvrtc_compile_cubin_with_option (const gchar * source, gint device,
|
||||
const gchar ** options, guint num_options)
|
||||
{
|
||||
nvrtcProgram prog;
|
||||
nvrtcResult ret;
|
||||
|
@ -392,6 +394,7 @@ gst_cuda_nvrtc_compile_cubin (const gchar * source, gint device)
|
|||
gsize cubin_size;
|
||||
gchar *cubin = nullptr;
|
||||
gint major, minor;
|
||||
std::vector < const gchar *>opts;
|
||||
|
||||
g_return_val_if_fail (source != nullptr, nullptr);
|
||||
|
||||
|
@ -422,15 +425,18 @@ gst_cuda_nvrtc_compile_cubin (const gchar * source, gint device)
|
|||
std::string opt_str = "--gpu-architecture=sm_" +
|
||||
std::to_string (major) + std::to_string (minor);
|
||||
|
||||
opts.push_back (opt_str.c_str ());
|
||||
for (guint i = 0; i < num_options; i++) {
|
||||
opts.push_back (options[i]);
|
||||
}
|
||||
|
||||
ret = NvrtcCreateProgram (&prog, source, nullptr, 0, nullptr, nullptr);
|
||||
if (ret != NVRTC_SUCCESS) {
|
||||
GST_ERROR ("couldn't create nvrtc program, ret %d", ret);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
const char *opts[1] = { opt_str.c_str () };
|
||||
|
||||
ret = NvrtcCompileProgram (prog, 1, opts);
|
||||
ret = NvrtcCompileProgram (prog, opts.size (), opts.data ());
|
||||
if (ret != NVRTC_SUCCESS) {
|
||||
gsize log_size;
|
||||
|
||||
|
@ -469,3 +475,19 @@ error:
|
|||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/**
|
||||
* gst_cuda_nvrtc_compile_cubin:
|
||||
* @source: Source code to compile
|
||||
* @device: CUDA device
|
||||
*
|
||||
* Returns: (transfer full): Compiled CUDA assembly code if successful,
|
||||
* otherwise %NULL
|
||||
*
|
||||
* Since: 1.24
|
||||
*/
|
||||
gchar *
|
||||
gst_cuda_nvrtc_compile_cubin (const gchar * source, gint device)
|
||||
{
|
||||
return gst_cuda_nvrtc_compile_cubin_with_option (source, device, nullptr, 0);
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -81,6 +81,7 @@ if not nvcodec_precompile_opt.disabled() and not meson.is_cross_build()
|
|||
nvcc = find_program ('nvcc', required : nvcodec_precompile_opt)
|
||||
if nvcc.found()
|
||||
subdir('kernel')
|
||||
extra_args += ['-DNVCODEC_CUDA_PRECOMPILED']
|
||||
endif
|
||||
endif
|
||||
|
||||
|
|
Loading…
Reference in a new issue