mirror of
https://gitlab.freedesktop.org/gstreamer/gstreamer.git
synced 2024-11-23 10:11:08 +00:00
nvdec: Don't hardcode DPB size
Too many decode surface would waste GPU memory. Also it seems to be introducing additional latency depending on stream. Since nvcodec sdk version 9.0, CUVID parser API has been providing the minimum required number of surface. By using it, we can save GPU memory and reduce possible latency.
This commit is contained in:
parent
8f0ceba251
commit
b4efdeba11
6 changed files with 111 additions and 19 deletions
|
@ -44,6 +44,9 @@ typedef struct _GstnvdecCuvidVTable
|
|||
{
|
||||
gboolean loaded;
|
||||
|
||||
guint major_version;
|
||||
guint minor_version;
|
||||
|
||||
CUresult (CUDAAPI * CuvidCtxLockCreate) (CUvideoctxlock * pLock,
|
||||
CUcontext ctx);
|
||||
CUresult (CUDAAPI * CuvidCtxLockDestroy) (CUvideoctxlock lck);
|
||||
|
@ -72,7 +75,7 @@ typedef struct _GstnvdecCuvidVTable
|
|||
static GstnvdecCuvidVTable gst_cuvid_vtable = { 0, };
|
||||
|
||||
gboolean
|
||||
gst_cuvid_load_library (void)
|
||||
gst_cuvid_load_library (guint api_major_ver, guint api_minor_ver)
|
||||
{
|
||||
GModule *module;
|
||||
const gchar *filename = NVCUVID_LIBNAME;
|
||||
|
@ -104,6 +107,8 @@ gst_cuvid_load_library (void)
|
|||
LOAD_SYMBOL (cuvidGetDecoderCaps, CuvidGetDecoderCaps, FALSE);
|
||||
|
||||
vtable->loaded = TRUE;
|
||||
vtable->major_version = api_major_ver;
|
||||
vtable->minor_version = api_minor_ver;
|
||||
|
||||
return TRUE;
|
||||
|
||||
|
@ -113,6 +118,21 @@ error:
|
|||
return FALSE;
|
||||
}
|
||||
|
||||
gboolean
|
||||
gst_cuvid_get_api_version (guint * api_major_ver, guint * api_minor_ver)
|
||||
{
|
||||
if (!gst_cuvid_vtable.loaded)
|
||||
return FALSE;
|
||||
|
||||
if (api_major_ver)
|
||||
*api_major_ver = gst_cuvid_vtable.major_version;
|
||||
|
||||
if (api_minor_ver)
|
||||
*api_minor_ver = gst_cuvid_vtable.minor_version;
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
gboolean
|
||||
gst_cuvid_can_get_decoder_caps (void)
|
||||
{
|
||||
|
|
|
@ -28,7 +28,12 @@ G_BEGIN_DECLS
|
|||
|
||||
/* cuvid.h */
|
||||
G_GNUC_INTERNAL
|
||||
gboolean gst_cuvid_load_library (void);
|
||||
gboolean gst_cuvid_load_library (guint api_major_ver,
|
||||
guint api_minor_ver);
|
||||
|
||||
G_GNUC_INTERNAL
|
||||
gboolean gst_cuvid_get_api_version (guint * api_major_ver,
|
||||
guint * api_minor_ver);
|
||||
|
||||
G_GNUC_INTERNAL
|
||||
gboolean gst_cuvid_can_get_decoder_caps (void);
|
||||
|
|
|
@ -220,7 +220,48 @@ get_cuda_surface_format_from_gst (GstVideoFormat format)
|
|||
return cudaVideoSurfaceFormat_NV12;
|
||||
}
|
||||
|
||||
static gboolean CUDAAPI
|
||||
static guint
|
||||
calculate_num_decode_surface (cudaVideoCodec codec, guint width, guint height)
|
||||
{
|
||||
switch (codec) {
|
||||
case cudaVideoCodec_VP9:
|
||||
return 12;
|
||||
case cudaVideoCodec_H264:
|
||||
case cudaVideoCodec_H264_SVC:
|
||||
case cudaVideoCodec_H264_MVC:
|
||||
return 20;
|
||||
case cudaVideoCodec_HEVC:{
|
||||
gint max_dpb_size;
|
||||
gint MaxLumaPS;
|
||||
const gint MaxDpbPicBuf = 6;
|
||||
gint PicSizeInSamplesY;
|
||||
|
||||
/* A.4.1 */
|
||||
MaxLumaPS = 35651584;
|
||||
PicSizeInSamplesY = width * height;
|
||||
if (PicSizeInSamplesY <= (MaxLumaPS >> 2))
|
||||
max_dpb_size = MaxDpbPicBuf * 4;
|
||||
else if (PicSizeInSamplesY <= (MaxLumaPS >> 1))
|
||||
max_dpb_size = MaxDpbPicBuf * 2;
|
||||
else if (PicSizeInSamplesY <= ((3 * MaxLumaPS) >> 2))
|
||||
max_dpb_size = (MaxDpbPicBuf * 4) / 3;
|
||||
else
|
||||
max_dpb_size = MaxDpbPicBuf;
|
||||
|
||||
max_dpb_size = MIN (max_dpb_size, 16);
|
||||
|
||||
return max_dpb_size + 4;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return 8;
|
||||
}
|
||||
|
||||
/* 0: fail, 1: succeeded, > 1: override dpb size of parser
|
||||
* (set by CUVIDPARSERPARAMS::ulMaxNumDecodeSurfaces while creating parser) */
|
||||
static gint CUDAAPI
|
||||
parser_sequence_callback (GstNvDec * nvdec, CUVIDEOFORMAT * format)
|
||||
{
|
||||
guint width, height;
|
||||
|
@ -232,6 +273,8 @@ parser_sequence_callback (GstNvDec * nvdec, CUVIDEOFORMAT * format)
|
|||
GstCudaContext *ctx = nvdec->cuda_ctx;
|
||||
GstStructure *in_s = NULL;
|
||||
gboolean updata = FALSE;
|
||||
gint num_decode_surface = 0;
|
||||
guint major_api_ver = 0;
|
||||
|
||||
width = format->display_area.right - format->display_area.left;
|
||||
height = format->display_area.bottom - format->display_area.top;
|
||||
|
@ -252,7 +295,7 @@ parser_sequence_callback (GstNvDec * nvdec, CUVIDEOFORMAT * format)
|
|||
format->bit_depth_luma_minus8 + 8);
|
||||
|
||||
nvdec->last_ret = GST_FLOW_NOT_NEGOTIATED;
|
||||
return FALSE;
|
||||
return 0;
|
||||
}
|
||||
break;
|
||||
case cudaVideoChromaFormat_420:
|
||||
|
@ -275,7 +318,7 @@ parser_sequence_callback (GstNvDec * nvdec, CUVIDEOFORMAT * format)
|
|||
format->bit_depth_luma_minus8 + 8);
|
||||
|
||||
nvdec->last_ret = GST_FLOW_NOT_NEGOTIATED;
|
||||
return FALSE;
|
||||
return 0;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
|
@ -283,7 +326,7 @@ parser_sequence_callback (GstNvDec * nvdec, CUVIDEOFORMAT * format)
|
|||
format->chroma_format, format->bit_depth_luma_minus8 + 8);
|
||||
|
||||
nvdec->last_ret = GST_FLOW_NOT_NEGOTIATED;
|
||||
return FALSE;
|
||||
return 0;
|
||||
}
|
||||
|
||||
GST_DEBUG_OBJECT (nvdec,
|
||||
|
@ -359,6 +402,19 @@ parser_sequence_callback (GstNvDec * nvdec, CUVIDEOFORMAT * format)
|
|||
out_info->interlace_mode = GST_VIDEO_INTERLACE_MODE_MIXED;
|
||||
}
|
||||
|
||||
if (gst_cuvid_get_api_version (&major_api_ver, NULL) && major_api_ver >= 9) {
|
||||
/* min_num_decode_surfaces was introduced in nvcodec sdk 9.0 header */
|
||||
num_decode_surface = format->min_num_decode_surfaces;
|
||||
|
||||
GST_DEBUG_OBJECT (nvdec, "Num decode surface: %d", num_decode_surface);
|
||||
} else {
|
||||
num_decode_surface =
|
||||
calculate_num_decode_surface (format->codec, width, height);
|
||||
|
||||
GST_DEBUG_OBJECT (nvdec,
|
||||
"Calculated num decode surface: %d", num_decode_surface);
|
||||
}
|
||||
|
||||
if (!nvdec->decoder || !gst_video_info_is_equal (out_info, &prev_out_info)) {
|
||||
updata = TRUE;
|
||||
|
||||
|
@ -379,7 +435,7 @@ parser_sequence_callback (GstNvDec * nvdec, CUVIDEOFORMAT * format)
|
|||
GST_DEBUG_OBJECT (nvdec, "creating decoder");
|
||||
create_info.ulWidth = width;
|
||||
create_info.ulHeight = height;
|
||||
create_info.ulNumDecodeSurfaces = 20;
|
||||
create_info.ulNumDecodeSurfaces = num_decode_surface;
|
||||
create_info.CodecType = format->codec;
|
||||
create_info.ChromaFormat = format->chroma_format;
|
||||
create_info.ulCreationFlags = cudaVideoCreate_Default;
|
||||
|
@ -414,15 +470,15 @@ parser_sequence_callback (GstNvDec * nvdec, CUVIDEOFORMAT * format)
|
|||
if (!gst_pad_has_current_caps (GST_VIDEO_DECODER_SRC_PAD (nvdec)) || updata) {
|
||||
if (!gst_video_decoder_negotiate (GST_VIDEO_DECODER (nvdec))) {
|
||||
nvdec->last_ret = GST_FLOW_NOT_NEGOTIATED;
|
||||
return FALSE;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
return num_decode_surface;
|
||||
|
||||
error:
|
||||
nvdec->last_ret = GST_FLOW_ERROR;
|
||||
return FALSE;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static gboolean
|
||||
|
@ -860,7 +916,9 @@ gst_nvdec_set_format (GstVideoDecoder * decoder, GstVideoCodecState * state)
|
|||
return FALSE;
|
||||
|
||||
parser_params.CodecType = klass->codec_type;
|
||||
parser_params.ulMaxNumDecodeSurfaces = 20;
|
||||
/* ulMaxNumDecodeSurfaces will be updated by the return value of
|
||||
* SequenceCallback */
|
||||
parser_params.ulMaxNumDecodeSurfaces = 1;
|
||||
parser_params.ulErrorThreshold = 100;
|
||||
parser_params.ulMaxDisplayDelay = 0;
|
||||
parser_params.ulClockRate = GST_SECOND;
|
||||
|
|
|
@ -849,7 +849,7 @@ typedef struct
|
|||
} GstNvEncVersion;
|
||||
|
||||
gboolean
|
||||
gst_nvenc_load_library (void)
|
||||
gst_nvenc_load_library (guint * api_major_ver, guint * api_minor_ver)
|
||||
{
|
||||
GModule *module;
|
||||
NVENCSTATUS ret = NV_ENC_SUCCESS;
|
||||
|
@ -945,6 +945,9 @@ gst_nvenc_load_library (void)
|
|||
if (ret == NV_ENC_SUCCESS) {
|
||||
GST_INFO ("API version %d.%d load done",
|
||||
version_list[i].major, version_list[i].minor);
|
||||
|
||||
*api_major_ver = version_list[i].major;
|
||||
*api_minor_ver = version_list[i].minor;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -114,6 +114,7 @@ G_GNUC_INTERNAL
|
|||
guint32 gst_nvenc_get_open_encode_session_ex_params_version (void);
|
||||
|
||||
G_GNUC_INTERNAL
|
||||
gboolean gst_nvenc_load_library (void);
|
||||
gboolean gst_nvenc_load_library (guint * api_major_ver,
|
||||
guint * api_minor_ver);
|
||||
|
||||
#endif /* __GST_NVENC_H_INCLUDED__ */
|
||||
|
|
|
@ -46,6 +46,9 @@ plugin_init (GstPlugin * plugin)
|
|||
gint i;
|
||||
gboolean nvdec_available = TRUE;
|
||||
gboolean nvenc_available = TRUE;
|
||||
/* hardcoded minimum supported version */
|
||||
guint api_major_ver = 8;
|
||||
guint api_minor_ver = 1;
|
||||
|
||||
GST_DEBUG_CATEGORY_INIT (gst_nvcodec_debug, "nvcodec", 0, "nvcodec");
|
||||
GST_DEBUG_CATEGORY_INIT (gst_nvdec_debug, "nvdec", 0, "nvdec");
|
||||
|
@ -56,16 +59,18 @@ plugin_init (GstPlugin * plugin)
|
|||
return TRUE;
|
||||
}
|
||||
|
||||
if (!gst_cuvid_load_library ()) {
|
||||
GST_WARNING ("Failed to load nvdec library");
|
||||
nvdec_available = FALSE;
|
||||
}
|
||||
|
||||
if (!gst_nvenc_load_library ()) {
|
||||
/* get available API version from nvenc and it will be passed to
|
||||
* nvdec */
|
||||
if (!gst_nvenc_load_library (&api_major_ver, &api_minor_ver)) {
|
||||
GST_WARNING ("Failed to load nvenc library");
|
||||
nvenc_available = FALSE;
|
||||
}
|
||||
|
||||
if (!gst_cuvid_load_library (api_major_ver, api_minor_ver)) {
|
||||
GST_WARNING ("Failed to load nvdec library");
|
||||
nvdec_available = FALSE;
|
||||
}
|
||||
|
||||
if (!nvdec_available && !nvenc_available)
|
||||
return TRUE;
|
||||
|
||||
|
|
Loading…
Reference in a new issue