mirror of
https://gitlab.freedesktop.org/gstreamer/gstreamer.git
synced 2024-11-26 19:51:11 +00:00
cuda: Prefer CUBIN over PTX
System installed NVRTC library might be newer version than driver, then generate PTX can be incompatible with the driver. Instead of the intermediate code PTX, use actual assembly code directly. Fixes: https://gitlab.freedesktop.org/gstreamer/gstreamer/-/issues/3108 Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/5639>
This commit is contained in:
parent
bf4755331a
commit
abe1f5044d
3 changed files with 131 additions and 5 deletions
|
@ -27,6 +27,7 @@
|
|||
#include <nvrtc.h>
|
||||
#include <gmodule.h>
|
||||
#include "gstcuda-private.h"
|
||||
#include <string>
|
||||
|
||||
GST_DEBUG_CATEGORY_STATIC (gst_cuda_nvrtc_debug);
|
||||
#define GST_CAT_DEFAULT gst_cuda_nvrtc_debug
|
||||
|
@ -60,6 +61,8 @@ typedef struct _GstCudaNvrtcVTable
|
|||
nvrtcResult (*NvrtcGetProgramLog) (nvrtcProgram prog, char *log);
|
||||
nvrtcResult (*NvrtcGetProgramLogSize) (nvrtcProgram prog,
|
||||
size_t * logSizeRet);
|
||||
nvrtcResult (*NvrtcGetCUBINSize) (nvrtcProgram prog, size_t *cubinSizeRet);
|
||||
nvrtcResult (*NvrtcGetCUBIN) (nvrtcProgram prog, char *cubin);
|
||||
} GstCudaNvrtcVTable;
|
||||
/* *INDENT-ON* */
|
||||
|
||||
|
@ -159,6 +162,8 @@ gst_cuda_nvrtc_load_library_once (void)
|
|||
LOAD_SYMBOL (nvrtcGetPTXSize, NvrtcGetPTXSize);
|
||||
LOAD_SYMBOL (nvrtcGetProgramLog, NvrtcGetProgramLog);
|
||||
LOAD_SYMBOL (nvrtcGetProgramLogSize, NvrtcGetProgramLogSize);
|
||||
LOAD_SYMBOL (nvrtcGetCUBINSize, NvrtcGetCUBINSize);
|
||||
LOAD_SYMBOL (nvrtcGetCUBIN, NvrtcGetCUBIN);
|
||||
|
||||
vtable->loaded = TRUE;
|
||||
|
||||
|
@ -251,6 +256,22 @@ NvrtcGetProgramLogSize (nvrtcProgram prog, size_t *logSizeRet)
|
|||
|
||||
return gst_cuda_nvrtc_vtable.NvrtcGetProgramLogSize (prog, logSizeRet);
|
||||
}
|
||||
|
||||
static nvrtcResult
|
||||
NvrtcGetCUBINSize (nvrtcProgram prog, size_t *cubinSizeRet)
|
||||
{
|
||||
g_assert (gst_cuda_nvrtc_vtable.NvrtcGetCUBINSize != nullptr);
|
||||
|
||||
return gst_cuda_nvrtc_vtable.NvrtcGetCUBINSize (prog, cubinSizeRet);
|
||||
}
|
||||
|
||||
static nvrtcResult
|
||||
NvrtcGetCUBIN (nvrtcProgram prog, char *cubin)
|
||||
{
|
||||
g_assert (gst_cuda_nvrtc_vtable.NvrtcGetCUBIN != nullptr);
|
||||
|
||||
return gst_cuda_nvrtc_vtable.NvrtcGetCUBIN (prog, cubin);
|
||||
}
|
||||
/* *INDENT-ON* */
|
||||
|
||||
/**
|
||||
|
@ -340,3 +361,95 @@ error:
|
|||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/**
|
||||
* gst_cuda_nvrtc_compile_cubin:
|
||||
* @source: Source code to compile
|
||||
* @device: CUDA device
|
||||
*
|
||||
* Returns: (transfer full): Compiled CUDA assembly code if successful,
|
||||
* otherwise %NULL
|
||||
*
|
||||
* Since: 1.24
|
||||
*/
|
||||
gchar *
|
||||
gst_cuda_nvrtc_compile_cubin (const gchar * source, gint device)
|
||||
{
|
||||
nvrtcProgram prog;
|
||||
nvrtcResult ret;
|
||||
CUresult curet;
|
||||
gsize cubin_size;
|
||||
gchar *cubin = nullptr;
|
||||
gint major, minor;
|
||||
|
||||
g_return_val_if_fail (source != nullptr, nullptr);
|
||||
|
||||
if (!gst_cuda_nvrtc_load_library ())
|
||||
return nullptr;
|
||||
|
||||
GST_TRACE ("CUDA kernel source \n%s", source);
|
||||
|
||||
curet = CuDeviceGetAttribute (&major,
|
||||
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, device);
|
||||
if (curet != CUDA_SUCCESS) {
|
||||
GST_ERROR ("Unknown major compute caps");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
curet = CuDeviceGetAttribute (&minor,
|
||||
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, device);
|
||||
if (curet != CUDA_SUCCESS) {
|
||||
GST_ERROR ("Unknown minor compute caps");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::string opt_str = "--gpu-architecture=sm_" +
|
||||
std::to_string (major) + std::to_string (minor);
|
||||
|
||||
ret = NvrtcCreateProgram (&prog, source, nullptr, 0, nullptr, nullptr);
|
||||
if (ret != NVRTC_SUCCESS) {
|
||||
GST_ERROR ("couldn't create nvrtc program, ret %d", ret);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
const char *opts[1] = { opt_str.c_str () };
|
||||
|
||||
ret = NvrtcCompileProgram (prog, 1, opts);
|
||||
if (ret != NVRTC_SUCCESS) {
|
||||
gsize log_size;
|
||||
|
||||
GST_ERROR ("couldn't compile nvrtc program, ret %d", ret);
|
||||
if (NvrtcGetProgramLogSize (prog, &log_size) == NVRTC_SUCCESS &&
|
||||
log_size > 0) {
|
||||
gchar *compile_log = (gchar *) g_alloca (log_size);
|
||||
if (NvrtcGetProgramLog (prog, compile_log) == NVRTC_SUCCESS) {
|
||||
GST_ERROR ("nvrtc compile log %s", compile_log);
|
||||
}
|
||||
}
|
||||
|
||||
goto error;
|
||||
}
|
||||
|
||||
ret = NvrtcGetCUBINSize (prog, &cubin_size);
|
||||
if (ret != NVRTC_SUCCESS) {
|
||||
GST_ERROR ("unknown ptx size, ret %d", ret);
|
||||
goto error;
|
||||
}
|
||||
|
||||
cubin = (gchar *) g_malloc0 (cubin_size);
|
||||
ret = NvrtcGetCUBIN (prog, cubin);
|
||||
if (ret != NVRTC_SUCCESS) {
|
||||
GST_ERROR ("couldn't get ptx, ret %d", ret);
|
||||
g_free (cubin);
|
||||
goto error;
|
||||
}
|
||||
|
||||
NvrtcDestroyProgram (&prog);
|
||||
|
||||
return cubin;
|
||||
|
||||
error:
|
||||
NvrtcDestroyProgram (&prog);
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
|
|
@ -30,5 +30,9 @@ gboolean gst_cuda_nvrtc_load_library (void);
|
|||
GST_CUDA_API
|
||||
gchar * gst_cuda_nvrtc_compile (const gchar * source);
|
||||
|
||||
GST_CUDA_API
|
||||
gchar * gst_cuda_nvrtc_compile_cubin (const gchar * source,
|
||||
gint device);
|
||||
|
||||
G_END_DECLS
|
||||
|
||||
|
|
|
@ -1708,7 +1708,7 @@ gst_cuda_converter_setup (GstCudaConverter * self)
|
|||
const GstVideoColorimetry *in_color;
|
||||
const GstVideoColorimetry *out_color;
|
||||
gchar *str;
|
||||
gchar *ptx;
|
||||
gchar *program = NULL;
|
||||
CUresult ret;
|
||||
|
||||
in_info = &priv->in_info;
|
||||
|
@ -2071,10 +2071,16 @@ gst_cuda_converter_setup (GstCudaConverter * self)
|
|||
write_func);
|
||||
|
||||
GST_LOG_OBJECT (self, "kernel code:\n%s\n", str);
|
||||
ptx = gst_cuda_nvrtc_compile (str);
|
||||
gint cuda_device;
|
||||
g_object_get (self->context, "cuda-device-id", &cuda_device, NULL);
|
||||
program = gst_cuda_nvrtc_compile_cubin (str, cuda_device);
|
||||
if (!program) {
|
||||
GST_WARNING_OBJECT (self, "Couldn't compile to cubin, trying ptx");
|
||||
program = gst_cuda_nvrtc_compile (str);
|
||||
}
|
||||
g_free (str);
|
||||
|
||||
if (!ptx) {
|
||||
if (!program) {
|
||||
GST_ERROR_OBJECT (self, "Could not compile code");
|
||||
return FALSE;
|
||||
}
|
||||
|
@ -2093,6 +2099,7 @@ gst_cuda_converter_setup (GstCudaConverter * self)
|
|||
|
||||
if (!gst_cuda_context_push (self->context)) {
|
||||
GST_ERROR_OBJECT (self, "Couldn't push context");
|
||||
g_free (program);
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
|
@ -2138,8 +2145,8 @@ gst_cuda_converter_setup (GstCudaConverter * self)
|
|||
priv->unpack_buffer.texture = texture;
|
||||
}
|
||||
|
||||
ret = CuModuleLoadData (&priv->module, ptx);
|
||||
g_free (ptx);
|
||||
ret = CuModuleLoadData (&priv->module, program);
|
||||
g_clear_pointer (&program, g_free);
|
||||
if (!gst_cuda_result (ret)) {
|
||||
GST_ERROR_OBJECT (self, "Could not load module");
|
||||
priv->module = NULL;
|
||||
|
@ -2168,6 +2175,8 @@ gst_cuda_converter_setup (GstCudaConverter * self)
|
|||
|
||||
error:
|
||||
gst_cuda_context_pop (NULL);
|
||||
g_free (program);
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue