d3d11decoder: Enable high precision clock if needed

We've been doing retry with 1ms sleep if DecoderBeginFrame()
returned E_PENDING which means application should call
DecoderBeginFrame() again because GPU is busy.
The 1ms sleep() during retry would result in usually about 15ms delay
in reality because of bad clock precision on Windows.
To improve throughput performance, this commit will enable
high precision clock only for NVIDIA platform since
DecoderBeginFrame() call on the other GPU vendors seems to
succeed without retry.

Part-of: <https://gitlab.freedesktop.org/gstreamer/gst-plugins-bad/-/merge_requests/2099>
This commit is contained in:
Seungha Yang 2021-03-23 16:26:13 +09:00 committed by GStreamer Marge Bot
parent 176a00985a
commit e006366206
2 changed files with 72 additions and 7 deletions

View file

@ -56,6 +56,10 @@
#include "gstd3d11pluginutils.h"
#include <string.h>
#ifdef HAVE_WINMM
#include <timeapi.h>
#endif
GST_DEBUG_CATEGORY (d3d11_decoder_debug);
#define GST_CAT_DEFAULT d3d11_decoder_debug
@ -158,6 +162,9 @@ struct _GstD3D11Decoder
/* For device specific workaround */
gboolean can_direct_rendering;
/* For high precision clock */
guint timer_resolution;
};
static void gst_d3d11_decoder_constructed (GObject * object);
@ -166,6 +173,7 @@ static void gst_d3d11_decoder_set_property (GObject * object, guint prop_id,
static void gst_d3d11_decoder_get_property (GObject * object, guint prop_id,
GValue * value, GParamSpec * pspec);
static void gst_d3d11_decoder_dispose (GObject * obj);
static void gst_d3d11_decoder_finalize (GObject * obj);
#define parent_class gst_d3d11_decoder_parent_class
G_DEFINE_TYPE (GstD3D11Decoder, gst_d3d11_decoder, GST_TYPE_OBJECT);
@ -179,6 +187,7 @@ gst_d3d11_decoder_class_init (GstD3D11DecoderClass * klass)
gobject_class->set_property = gst_d3d11_decoder_set_property;
gobject_class->get_property = gst_d3d11_decoder_get_property;
gobject_class->dispose = gst_d3d11_decoder_dispose;
gobject_class->finalize = gst_d3d11_decoder_finalize;
g_object_class_install_property (gobject_class, PROP_DEVICE,
g_param_spec_object ("device", "Device",
@ -304,6 +313,20 @@ gst_d3d11_decoder_dispose (GObject * obj)
G_OBJECT_CLASS (parent_class)->dispose (obj);
}
static void
gst_d3d11_decoder_finalize (GObject * obj)
{
#if HAVE_WINMM
GstD3D11Decoder *self = GST_D3D11_DECODER (obj);
/* Restore clock precision */
if (self->timer_resolution)
timeEndPeriod (self->timer_resolution);
#endif
G_OBJECT_CLASS (parent_class)->finalize (obj);
}
GstD3D11Decoder *
gst_d3d11_decoder_new (GstD3D11Device * device)
{
@ -634,6 +657,36 @@ gst_d3d11_decoder_ensure_staging_texture (GstD3D11Decoder * self)
return TRUE;
}
static void
gst_d3d11_decoder_enable_high_precision_timer (GstD3D11Decoder * self)
{
#if HAVE_WINMM
GstD3D11DeviceVendor vendor;
if (self->timer_resolution)
return;
vendor = gst_d3d11_get_device_vendor (self->device);
/* Do this only for NVIDIA at the moment, other vendors doesn't seem to be
* requiring retry for BeginFrame() */
if (vendor == GST_D3D11_DEVICE_VENDOR_NVIDIA) {
TIMECAPS time_caps;
if (timeGetDevCaps (&time_caps, sizeof (TIMECAPS)) == TIMERR_NOERROR) {
guint resolution;
MMRESULT ret;
resolution = MIN (MAX (time_caps.wPeriodMin, 1), time_caps.wPeriodMax);
ret = timeBeginPeriod (resolution);
if (ret == TIMERR_NOERROR) {
self->timer_resolution = resolution;
GST_INFO_OBJECT (self, "Updated timer resolution to %d", resolution);
}
}
}
#endif
}
static gboolean
gst_d3d11_decoder_open (GstD3D11Decoder * self)
{
@ -826,6 +879,8 @@ gst_d3d11_decoder_open (GstD3D11Decoder * self)
self->opened = TRUE;
gst_d3d11_device_unlock (self->device);
gst_d3d11_decoder_enable_high_precision_timer (self);
return TRUE;
error:
@ -843,26 +898,27 @@ gst_d3d11_decoder_begin_frame (GstD3D11Decoder * decoder,
ID3D11VideoContext *video_context;
guint retry_count = 0;
HRESULT hr;
guint retry_threshold = 100;
/* if we have high resolution timer, do more retry */
if (decoder->timer_resolution)
retry_threshold = 500;
g_return_val_if_fail (GST_IS_D3D11_DECODER (decoder), FALSE);
g_return_val_if_fail (output_view != NULL, FALSE);
video_context = decoder->video_context;
gst_d3d11_device_lock (decoder->device);
do {
GST_LOG_OBJECT (decoder, "Try begin frame, retry count %d", retry_count);
gst_d3d11_device_lock (decoder->device);
hr = video_context->DecoderBeginFrame (decoder->decoder_handle,
output_view, content_key_size, content_key);
gst_d3d11_device_unlock (decoder->device);
/* HACK: Do 100 times retry with 1ms sleep per failure, since DXVA/D3D11
/* HACK: Do retry with 1ms sleep per failure, since DXVA/D3D11
* doesn't provide API for "GPU-IS-READY-TO-DECODE" like signal.
* In the worst case, we will error out after 100ms.
* Note that Windows' clock precision is known to be incorrect,
* so it would be longer than 100ms in reality.
*/
if (hr == E_PENDING && retry_count < 100) {
if (hr == E_PENDING && retry_count < retry_threshold) {
GST_LOG_OBJECT (decoder, "GPU is busy, try again. Retry count %d",
retry_count);
g_usleep (1000);
@ -874,6 +930,7 @@ gst_d3d11_decoder_begin_frame (GstD3D11Decoder * decoder,
retry_count++;
} while (TRUE);
gst_d3d11_device_unlock (decoder->device);
if (!gst_d3d11_result (hr, decoder->device)) {
GST_ERROR_OBJECT (decoder, "Failed to begin frame, hr: 0x%x", (guint) hr);

View file

@ -44,6 +44,8 @@ endif
d3dcompiler_lib = cc.find_library('d3dcompiler', required: d3d11_option)
runtimeobject_lib = cc.find_library('runtimeobject', required : false)
winmm_lib = cc.find_library('winmm', required: false)
has_decoder = false
have_d3d11 = cc.has_header('d3dcompiler.h')
if not have_d3d11
@ -58,6 +60,7 @@ if cc.has_header('dxva.h') and cc.has_header('d3d9.h')
d3d11_sources += d3d11_dec_sources
extra_c_args += ['-DHAVE_DXVA_H']
extra_dep += [gstcodecs_dep]
has_decoder = true
endif
if d3d11_winapi_only_app and (not d3dcompiler_lib.found() or not runtimeobject_lib.found())
@ -84,6 +87,11 @@ if d3d11_winapi_desktop
d3d11_sources += ['gstd3d11desktopdup.cpp', 'gstd3d11desktopdupsrc.cpp']
message('Enable D3D11 Desktop Duplication API')
endif
# multimedia clock is desktop only API
if has_decoder and winmm_lib.found() and cc.has_header('timeapi.h')
extra_cpp_args += ['-DHAVE_WINMM']
extra_dep += [winmm_lib]
endif
endif
# need dxgi1_5.h for HDR10 processing and d3d11_4.h for ID3D11VideoContext2 interface