mirror of
https://gitlab.freedesktop.org/gstreamer/gstreamer.git
synced 2025-02-26 16:06:52 +00:00
cudaconverter: Add support for kernel precompile and cache
Port to precompile/cache approach Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/8536>
This commit is contained in:
parent
9a8f3a65a3
commit
697cfe38ef
4 changed files with 364 additions and 1570 deletions
|
@ -0,0 +1,39 @@
|
||||||
|
/* GStreamer
|
||||||
|
* Copyright (C) 2025 Seungha Yang <seungha@centricular.com>
|
||||||
|
*
|
||||||
|
* This library is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Library General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This library is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Library General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Library General Public
|
||||||
|
* License along with this library; if not, write to the
|
||||||
|
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
|
||||||
|
* Boston, MA 02110-1301, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <gst/gst.h>
|
||||||
|
#include <gst/cuda/cuda-prelude.h>
|
||||||
|
|
||||||
|
G_BEGIN_DECLS
|
||||||
|
|
||||||
|
GST_CUDA_API
|
||||||
|
gchar * gst_cuda_nvrtc_compile_with_option (const gchar * source,
|
||||||
|
const gchar ** options,
|
||||||
|
guint num_options);
|
||||||
|
|
||||||
|
GST_CUDA_API
|
||||||
|
gchar * gst_cuda_nvrtc_compile_cubin_with_option (const gchar * source,
|
||||||
|
gint device,
|
||||||
|
const gchar ** options,
|
||||||
|
guint num_options);
|
||||||
|
|
||||||
|
G_END_DECLS
|
||||||
|
|
|
@ -27,7 +27,9 @@
|
||||||
#include <nvrtc.h>
|
#include <nvrtc.h>
|
||||||
#include <gmodule.h>
|
#include <gmodule.h>
|
||||||
#include "gstcuda-private.h"
|
#include "gstcuda-private.h"
|
||||||
|
#include "gstcudanvrtc-private.h"
|
||||||
#include <string>
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
GST_DEBUG_CATEGORY_STATIC (gst_cuda_nvrtc_debug);
|
GST_DEBUG_CATEGORY_STATIC (gst_cuda_nvrtc_debug);
|
||||||
#define GST_CAT_DEFAULT gst_cuda_nvrtc_debug
|
#define GST_CAT_DEFAULT gst_cuda_nvrtc_debug
|
||||||
|
@ -285,22 +287,17 @@ NvrtcGetCUBIN (nvrtcProgram prog, char *cubin)
|
||||||
}
|
}
|
||||||
/* *INDENT-ON* */
|
/* *INDENT-ON* */
|
||||||
|
|
||||||
/**
|
|
||||||
* gst_cuda_nvrtc_compile:
|
|
||||||
* @source: Source code to compile
|
|
||||||
*
|
|
||||||
* Since: 1.22
|
|
||||||
*/
|
|
||||||
gchar *
|
gchar *
|
||||||
gst_cuda_nvrtc_compile (const gchar * source)
|
gst_cuda_nvrtc_compile_with_option (const gchar * source,
|
||||||
|
const gchar ** options, guint num_options)
|
||||||
{
|
{
|
||||||
nvrtcProgram prog;
|
nvrtcProgram prog;
|
||||||
nvrtcResult ret;
|
nvrtcResult ret;
|
||||||
CUresult curet;
|
CUresult curet;
|
||||||
const gchar *opts[] = { "--gpu-architecture=compute_30" };
|
|
||||||
gsize ptx_size;
|
gsize ptx_size;
|
||||||
gchar *ptx = nullptr;
|
gchar *ptx = nullptr;
|
||||||
int driverVersion;
|
int driverVersion;
|
||||||
|
std::vector < const gchar *>opts;
|
||||||
|
|
||||||
g_return_val_if_fail (source != nullptr, nullptr);
|
g_return_val_if_fail (source != nullptr, nullptr);
|
||||||
|
|
||||||
|
@ -327,9 +324,11 @@ gst_cuda_nvrtc_compile (const gchar * source)
|
||||||
|
|
||||||
/* Starting from CUDA 11, the lowest supported architecture is 5.2 */
|
/* Starting from CUDA 11, the lowest supported architecture is 5.2 */
|
||||||
if (driverVersion >= 11000)
|
if (driverVersion >= 11000)
|
||||||
opts[0] = "--gpu-architecture=compute_52";
|
opts.push_back ("--gpu-architecture=compute_52");
|
||||||
|
else
|
||||||
|
opts.push_back ("--gpu-architecture=compute_30");
|
||||||
|
|
||||||
ret = NvrtcCompileProgram (prog, 1, opts);
|
ret = NvrtcCompileProgram (prog, opts.size (), opts.data ());
|
||||||
if (ret != NVRTC_SUCCESS) {
|
if (ret != NVRTC_SUCCESS) {
|
||||||
gsize log_size;
|
gsize log_size;
|
||||||
|
|
||||||
|
@ -374,17 +373,20 @@ error:
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* gst_cuda_nvrtc_compile_cubin:
|
* gst_cuda_nvrtc_compile:
|
||||||
* @source: Source code to compile
|
* @source: Source code to compile
|
||||||
* @device: CUDA device
|
|
||||||
*
|
*
|
||||||
* Returns: (transfer full): Compiled CUDA assembly code if successful,
|
* Since: 1.22
|
||||||
* otherwise %NULL
|
|
||||||
*
|
|
||||||
* Since: 1.24
|
|
||||||
*/
|
*/
|
||||||
gchar *
|
gchar *
|
||||||
gst_cuda_nvrtc_compile_cubin (const gchar * source, gint device)
|
gst_cuda_nvrtc_compile (const gchar * source)
|
||||||
|
{
|
||||||
|
return gst_cuda_nvrtc_compile_with_option (source, nullptr, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
gchar *
|
||||||
|
gst_cuda_nvrtc_compile_cubin_with_option (const gchar * source, gint device,
|
||||||
|
const gchar ** options, guint num_options)
|
||||||
{
|
{
|
||||||
nvrtcProgram prog;
|
nvrtcProgram prog;
|
||||||
nvrtcResult ret;
|
nvrtcResult ret;
|
||||||
|
@ -392,6 +394,7 @@ gst_cuda_nvrtc_compile_cubin (const gchar * source, gint device)
|
||||||
gsize cubin_size;
|
gsize cubin_size;
|
||||||
gchar *cubin = nullptr;
|
gchar *cubin = nullptr;
|
||||||
gint major, minor;
|
gint major, minor;
|
||||||
|
std::vector < const gchar *>opts;
|
||||||
|
|
||||||
g_return_val_if_fail (source != nullptr, nullptr);
|
g_return_val_if_fail (source != nullptr, nullptr);
|
||||||
|
|
||||||
|
@ -422,15 +425,18 @@ gst_cuda_nvrtc_compile_cubin (const gchar * source, gint device)
|
||||||
std::string opt_str = "--gpu-architecture=sm_" +
|
std::string opt_str = "--gpu-architecture=sm_" +
|
||||||
std::to_string (major) + std::to_string (minor);
|
std::to_string (major) + std::to_string (minor);
|
||||||
|
|
||||||
|
opts.push_back (opt_str.c_str ());
|
||||||
|
for (guint i = 0; i < num_options; i++) {
|
||||||
|
opts.push_back (options[i]);
|
||||||
|
}
|
||||||
|
|
||||||
ret = NvrtcCreateProgram (&prog, source, nullptr, 0, nullptr, nullptr);
|
ret = NvrtcCreateProgram (&prog, source, nullptr, 0, nullptr, nullptr);
|
||||||
if (ret != NVRTC_SUCCESS) {
|
if (ret != NVRTC_SUCCESS) {
|
||||||
GST_ERROR ("couldn't create nvrtc program, ret %d", ret);
|
GST_ERROR ("couldn't create nvrtc program, ret %d", ret);
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
const char *opts[1] = { opt_str.c_str () };
|
ret = NvrtcCompileProgram (prog, opts.size (), opts.data ());
|
||||||
|
|
||||||
ret = NvrtcCompileProgram (prog, 1, opts);
|
|
||||||
if (ret != NVRTC_SUCCESS) {
|
if (ret != NVRTC_SUCCESS) {
|
||||||
gsize log_size;
|
gsize log_size;
|
||||||
|
|
||||||
|
@ -469,3 +475,19 @@ error:
|
||||||
|
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* gst_cuda_nvrtc_compile_cubin:
|
||||||
|
* @source: Source code to compile
|
||||||
|
* @device: CUDA device
|
||||||
|
*
|
||||||
|
* Returns: (transfer full): Compiled CUDA assembly code if successful,
|
||||||
|
* otherwise %NULL
|
||||||
|
*
|
||||||
|
* Since: 1.24
|
||||||
|
*/
|
||||||
|
gchar *
|
||||||
|
gst_cuda_nvrtc_compile_cubin (const gchar * source, gint device)
|
||||||
|
{
|
||||||
|
return gst_cuda_nvrtc_compile_cubin_with_option (source, device, nullptr, 0);
|
||||||
|
}
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -81,6 +81,7 @@ if not nvcodec_precompile_opt.disabled() and not meson.is_cross_build()
|
||||||
nvcc = find_program ('nvcc', required : nvcodec_precompile_opt)
|
nvcc = find_program ('nvcc', required : nvcodec_precompile_opt)
|
||||||
if nvcc.found()
|
if nvcc.found()
|
||||||
subdir('kernel')
|
subdir('kernel')
|
||||||
|
extra_args += ['-DNVCODEC_CUDA_PRECOMPILED']
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue