nvcodec: Add generic CUDA video convert object

Introducing generic video convert object similar to video-converter
but using CUDA.

Part-of: <https://gitlab.freedesktop.org/gstreamer/gst-plugins-bad/-/merge_requests/1633>
This commit is contained in:
Seungha Yang 2019-10-16 22:42:39 +09:00 committed by GStreamer Merge Bot
parent 4cc73ff9d6
commit 592a8d5400
8 changed files with 2403 additions and 1 deletions

2085
sys/nvcodec/cuda-converter.c Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,62 @@
/* GStreamer
* Copyright (C) 2019 Seungha Yang <seungha.yang@navercorp.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
#ifndef __GST_CUDA_CONVERTER_H__
#define __GST_CUDA_CONVERTER_H__
#include <gst/video/video.h>
#include "gstcudacontext.h"
#include "gstcudamemory.h"
G_BEGIN_DECLS
typedef struct _GstCudaConverter GstCudaConverter;
#define GST_CUDA_CONVERTER_FORMATS \
"{ I420, YV12, NV12, NV21, P010_10LE, P016_LE, I420_10LE, Y444, Y444_16LE, " \
"BGRA, RGBA, RGBx, BGRx, ARGB, ABGR, RGB, BGR, BGR10A2_LE, RGB10A2_LE }"
G_GNUC_INTERNAL
GstCudaConverter * gst_cuda_converter_new (GstVideoInfo * in_info,
GstVideoInfo * out_info,
GstCudaContext * cuda_ctx);
G_GNUC_INTERNAL
void gst_cuda_converter_free (GstCudaConverter * convert);
G_GNUC_INTERNAL
gboolean gst_cuda_converter_frame (GstCudaConverter * convert,
const GstCudaMemory * src,
GstVideoInfo * in_info,
GstCudaMemory * dst,
GstVideoInfo * out_info,
CUstream cuda_stream);
G_GNUC_INTERNAL
gboolean gst_cuda_converter_frame_unlocked (GstCudaConverter * convert,
const GstCudaMemory * src,
GstVideoInfo * in_info,
GstCudaMemory * dst,
GstVideoInfo * out_info,
CUstream cuda_stream);
G_END_DECLS
#endif /* __GST_CUDA_CONVERTER_H__ */

View file

@ -46,6 +46,8 @@ struct _GstCudaContextPrivate
CUdevice device;
gint device_id;
gint tex_align;
GHashTable *accessible_peer;
};
@ -142,6 +144,7 @@ gst_cuda_context_constructed (GObject * object)
gchar name[256];
gint min = 0, maj = 0;
gint i;
gint tex_align = 0;
GList *iter;
if (g_once_init_enter (&once)) {
@ -166,11 +169,15 @@ gst_cuda_context_constructed (GObject * object)
gst_cuda_result (CuDeviceGetAttribute (&maj,
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cdev)) &&
gst_cuda_result (CuDeviceGetAttribute (&min,
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cdev))) {
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cdev)) &&
gst_cuda_result (CuDeviceGetAttribute (&tex_align,
CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT, cdev))) {
GST_INFO ("GPU #%d supports NVENC: %s (%s) (Compute SM %d.%d)", i,
(((maj << 4) + min) >= 0x30) ? "yes" : "no", name, maj, min);
if (priv->device_id == -1 || priv->device_id == cdev) {
priv->device_id = cuda_dev = cdev;
priv->tex_align = tex_align;
break;
}
}
}
@ -382,6 +389,23 @@ gst_cuda_context_get_handle (GstCudaContext * ctx)
return ctx->priv->context;
}
/**
* gst_cuda_context_get_texture_alignment:
* @ctx: a #GstCudaContext
*
* Get required texture alignment by device
*
* Returns: the #CUcontext of @ctx
*/
gint
gst_cuda_context_get_texture_alignment (GstCudaContext * ctx)
{
g_return_val_if_fail (ctx, 0);
g_return_val_if_fail (GST_IS_CUDA_CONTEXT (ctx), 0);
return ctx->priv->tex_align;
}
/**
* gst_cuda_context_can_access_peer:
* @ctx: a #GstCudaContext

View file

@ -72,6 +72,9 @@ gboolean gst_cuda_context_pop (CUcontext * cuda_ctx);
G_GNUC_INTERNAL
gpointer gst_cuda_context_get_handle (GstCudaContext * ctx);
G_GNUC_INTERNAL
gint gst_cuda_context_get_texture_alignment (GstCudaContext * ctx);
G_GNUC_INTERNAL
gboolean gst_cuda_context_can_access_peer (GstCudaContext * ctx,
GstCudaContext * peer);

View file

@ -92,6 +92,22 @@ typedef struct _GstNvCodecCudaVTable
CUdevice_attribute attrib, CUdevice dev);
CUresult (CUDAAPI * CuDeviceCanAccessPeer) (int *canAccessPeer,
CUdevice dev, CUdevice peerDev);
CUresult (CUDAAPI * CuDriverGetVersion) (int *driverVersion);
CUresult (CUDAAPI * CuModuleLoadData) (CUmodule * module,
const void *image);
CUresult (CUDAAPI * CuModuleUnload) (CUmodule module);
CUresult (CUDAAPI * CuModuleGetFunction) (CUfunction * hfunc,
CUmodule hmod, const char *name);
CUresult (CUDAAPI * CuTexObjectCreate) (CUtexObject * pTexObject,
const CUDA_RESOURCE_DESC * pResDesc, const CUDA_TEXTURE_DESC * pTexDesc,
const CUDA_RESOURCE_VIEW_DESC * pResViewDesc);
CUresult (CUDAAPI * CuTexObjectDestroy) (CUtexObject texObject);
CUresult (CUDAAPI * CuLaunchKernel) (CUfunction f, unsigned int gridDimX,
unsigned int gridDimY, unsigned int gridDimZ,
unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ,
unsigned int sharedMemBytes, CUstream hStream, void **kernelParams,
void **extra);
CUresult (CUDAAPI * CuGraphicsGLRegisterImage) (CUgraphicsResource *
pCudaResource, unsigned int image, unsigned int target,
@ -164,6 +180,15 @@ gst_cuda_load_library (void)
LOAD_SYMBOL (cuDeviceGetAttribute, CuDeviceGetAttribute);
LOAD_SYMBOL (cuDeviceCanAccessPeer, CuDeviceCanAccessPeer);
LOAD_SYMBOL (cuDriverGetVersion, CuDriverGetVersion);
LOAD_SYMBOL (cuModuleLoadData, CuModuleLoadData);
LOAD_SYMBOL (cuModuleUnload, CuModuleUnload);
LOAD_SYMBOL (cuModuleGetFunction, CuModuleGetFunction);
LOAD_SYMBOL (cuTexObjectCreate, CuTexObjectCreate);
LOAD_SYMBOL (cuTexObjectDestroy, CuTexObjectDestroy);
LOAD_SYMBOL (cuLaunchKernel, CuLaunchKernel);
/* cudaGL.h */
LOAD_SYMBOL (cuGraphicsGLRegisterImage, CuGraphicsGLRegisterImage);
LOAD_SYMBOL (cuGraphicsGLRegisterBuffer, CuGraphicsGLRegisterBuffer);
@ -420,6 +445,71 @@ CuDeviceCanAccessPeer (int *canAccessPeer, CUdevice dev, CUdevice peerDev)
return gst_cuda_vtable.CuDeviceCanAccessPeer (canAccessPeer, dev, peerDev);
}
CUresult CUDAAPI
CuDriverGetVersion (int *driverVersion)
{
g_assert (gst_cuda_vtable.CuDriverGetVersion != NULL);
return gst_cuda_vtable.CuDriverGetVersion (driverVersion);
}
CUresult CUDAAPI
CuModuleLoadData (CUmodule * module, const void *image)
{
g_assert (gst_cuda_vtable.CuModuleLoadData != NULL);
return gst_cuda_vtable.CuModuleLoadData (module, image);
}
CUresult CUDAAPI
CuModuleUnload (CUmodule module)
{
g_assert (gst_cuda_vtable.CuModuleUnload != NULL);
return gst_cuda_vtable.CuModuleUnload (module);
}
CUresult CUDAAPI
CuModuleGetFunction (CUfunction * hfunc, CUmodule hmod, const char *name)
{
g_assert (gst_cuda_vtable.CuModuleGetFunction != NULL);
return gst_cuda_vtable.CuModuleGetFunction (hfunc, hmod, name);
}
CUresult CUDAAPI
CuTexObjectCreate (CUtexObject * pTexObject,
const CUDA_RESOURCE_DESC * pResDesc, const CUDA_TEXTURE_DESC * pTexDesc,
const CUDA_RESOURCE_VIEW_DESC * pResViewDesc)
{
g_assert (gst_cuda_vtable.CuTexObjectCreate != NULL);
return gst_cuda_vtable.CuTexObjectCreate (pTexObject, pResDesc, pTexDesc,
pResViewDesc);
}
CUresult CUDAAPI
CuTexObjectDestroy (CUtexObject texObject)
{
g_assert (gst_cuda_vtable.CuTexObjectDestroy != NULL);
return gst_cuda_vtable.CuTexObjectDestroy (texObject);
}
CUresult CUDAAPI
CuLaunchKernel (CUfunction f, unsigned int gridDimX,
unsigned int gridDimY, unsigned int gridDimZ,
unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ,
unsigned int sharedMemBytes, CUstream hStream, void **kernelParams,
void **extra)
{
g_assert (gst_cuda_vtable.CuLaunchKernel != NULL);
return gst_cuda_vtable.CuLaunchKernel (f, gridDimX, gridDimY, gridDimZ,
blockDimX, blockDimY, blockDimZ, sharedMemBytes, hStream, kernelParams,
extra);
}
/* cudaGL.h */
CUresult CUDAAPI
CuGraphicsGLRegisterImage (CUgraphicsResource * pCudaResource,

View file

@ -145,6 +145,43 @@ CUresult CUDAAPI CuDeviceCanAccessPeer (int *canAccessPeer,
CUdevice dev,
CUdevice peerDev);
G_GNUC_INTERNAL
CUresult CUDAAPI CuDriverGetVersion (int * driverVersion);
G_GNUC_INTERNAL
CUresult CUDAAPI CuModuleLoadData (CUmodule* module,
const void *image);
G_GNUC_INTERNAL
CUresult CUDAAPI CuModuleUnload (CUmodule module);
G_GNUC_INTERNAL
CUresult CUDAAPI CuModuleGetFunction (CUfunction* hfunc,
CUmodule hmod,
const char* name);
G_GNUC_INTERNAL
CUresult CUDAAPI CuTexObjectCreate (CUtexObject *pTexObject,
const CUDA_RESOURCE_DESC *pResDesc,
const CUDA_TEXTURE_DESC *pTexDesc,
const CUDA_RESOURCE_VIEW_DESC *pResViewDesc);
G_GNUC_INTERNAL
CUresult CUDAAPI CuTexObjectDestroy (CUtexObject texObject);
G_GNUC_INTERNAL
CUresult CUDAAPI CuLaunchKernel (CUfunction f,
unsigned int gridDimX,
unsigned int gridDimY,
unsigned int gridDimZ,
unsigned int blockDimX,
unsigned int blockDimY,
unsigned int blockDimZ,
unsigned int sharedMemBytes,
CUstream hStream,
void **kernelParams,
void **extra);
/* cudaGL.h */
G_GNUC_INTERNAL
CUresult CUDAAPI CuGraphicsGLRegisterImage (CUgraphicsResource * pCudaResource,

View file

@ -19,6 +19,7 @@ nvcodec_sources = [
'gstcudaupload.c',
'gstcudanvrtc.c',
'gstnvrtcloader.c',
'cuda-converter.c',
]
if get_option('nvcodec').disabled()

View file

@ -27,7 +27,11 @@ typedef gpointer CUcontext;
typedef gpointer CUgraphicsResource;
typedef gpointer CUstream;
typedef gpointer CUarray;
typedef gpointer CUmodule;
typedef gpointer CUfunction;
typedef gpointer CUmipmappedArray;
typedef guint64 CUtexObject;
typedef guintptr CUdeviceptr;
typedef gint CUdevice;
@ -46,6 +50,7 @@ typedef enum
typedef enum
{
CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT = 14,
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR = 75,
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR = 76,
} CUdevice_attribute;
@ -70,6 +75,39 @@ typedef enum
CU_STREAM_NON_BLOCKING = 0x1
} CUstream_flags;
typedef enum
{
CU_TR_FILTER_MODE_POINT = 0,
CU_TR_FILTER_MODE_LINEAR = 1
} CUfilter_mode;
typedef enum
{
CU_TR_ADDRESS_MODE_WRAP = 0,
CU_TR_ADDRESS_MODE_CLAMP = 1,
CU_TR_ADDRESS_MODE_MIRROR = 2,
CU_TR_ADDRESS_MODE_BORDER = 3
} CUaddress_mode;
typedef enum
{
CU_RESOURCE_TYPE_ARRAY = 0,
CU_RESOURCE_TYPE_MIPMAPPED_ARRAY = 1,
CU_RESOURCE_TYPE_LINEAR = 2,
CU_RESOURCE_TYPE_PITCH2D = 3
} CUresourcetype;
typedef enum
{
CU_AD_FORMAT_UNSIGNED_INT8 = 1,
CU_AD_FORMAT_UNSIGNED_INT16 = 2,
} CUarray_format;
typedef enum
{
CU_RES_VIEW_FORMAT_NONE = 0,
} CUresourceViewFormat;
typedef struct
{
gsize srcXInBytes;
@ -97,6 +135,66 @@ typedef enum
CU_GL_DEVICE_LIST_ALL = 0x01,
} CUGLDeviceList;
typedef struct
{
CUaddress_mode addressMode[3];
CUfilter_mode filterMode;
guint flags;
guint maxAnisotropy;
CUfilter_mode mipmapFilterMode;
gfloat mipmapLevelBias;
gfloat minMipmapLevelClamp;
gfloat maxMipmapLevelClamp;
gfloat borderColor[4];
gint reserved[12];
} CUDA_TEXTURE_DESC;
typedef struct
{
CUresourcetype resType;
union {
struct {
CUarray hArray;
} array;
struct {
CUmipmappedArray hMipmappedArray;
} mipmap;
struct {
CUdeviceptr devPtr;
CUarray_format format;
guint numChannels;
gsize sizeInBytes;
} linear;
struct {
CUdeviceptr devPtr;
CUarray_format format;
guint numChannels;
gsize width;
gsize height;
gsize pitchInBytes;
} pitch2D;
struct {
gint reserved[32];
} reserved;
} res;
guint flags;
} CUDA_RESOURCE_DESC;
typedef struct
{
CUresourceViewFormat format;
gsize width;
gsize height;
gsize depth;
guint firstMipmapLevel;
guint lastMipmapLevel;
guint firstLayer;
guint lastLayer;
guint reserved[16];
} CUDA_RESOURCE_VIEW_DESC;
#define CUDA_VERSION 10000
#ifdef _WIN32
@ -120,6 +218,8 @@ typedef enum
#define cuMemFree cuMemFree_v2
#define cuGLGetDevices cuGLGetDevices_v2
#define CU_TRSF_READ_AS_INTEGER 1
G_END_DECLS
#endif /* __GST_CUDA_STUB_H__ */