d3d12: Add async-depth property

Adding a property to control the number of in-flight GPU commands
(default is unlimited). Note that actual maximum number is defined
in d3d12device's direct command queue object which is 32 now,
thus total number of scheduled GPU commands cannot exceed 32.

Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/7444>
This commit is contained in:
Seungha Yang 2024-09-03 19:33:41 +09:00 committed by GStreamer Marge Bot
parent 3506f5fb07
commit d0505fba55
3 changed files with 109 additions and 58 deletions

View file

@ -42,6 +42,7 @@
#include <future>
#include <vector>
#include <queue>
#include <atomic>
#include <string.h>
#include <wrl.h>
#include <gst/d3dshader/gstd3dshader.h>
@ -172,10 +173,12 @@ enum
PROP_ADAPTER,
PROP_BACKGROUND,
PROP_IGNORE_INACTIVE_PADS,
PROP_ASYNC_DEPTH,
};
#define DEFAULT_ADAPTER -1
#define DEFAULT_BACKGROUND GST_D3D12_COMPOSITOR_BACKGROUND_CHECKER
#define DEFAULT_ASYNC_DEPTH 0
static const D3D12_RENDER_TARGET_BLEND_DESC g_blend_source = {
TRUE,
@ -527,7 +530,6 @@ struct BackgroundRender
bool is_valid = false;
guint64 fence_val = 0;
};
/* *INDENT-ON* */
struct ClearColor
{
@ -556,8 +558,8 @@ struct GStD3D12CompositorPrivate
/* black/white/transparent */
ClearColor clear_color[3];
GstD3D12FenceDataPool *fence_data_pool;
std::vector < D3D12_CPU_DESCRIPTOR_HANDLE > rtv_handles;
std::queue < guint64 > scheduled;
std::vector<D3D12_CPU_DESCRIPTOR_HANDLE> rtv_handles;
std::queue<guint64> scheduled;
GstVideoInfo negotiated_info;
@ -568,7 +570,9 @@ struct GStD3D12CompositorPrivate
/* properties */
gint adapter = DEFAULT_ADAPTER;
GstD3D12CompositorBackground background = DEFAULT_BACKGROUND;
std::atomic<guint> async_depth = { DEFAULT_ASYNC_DEPTH };
};
/* *INDENT-ON* */
struct _GstD3D12Compositor
{
@ -1308,6 +1312,13 @@ gst_d3d12_compositor_class_init (GstD3D12CompositorClass * klass)
"Avoid timing out waiting for inactive pads", FALSE,
(GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
g_object_class_install_property (object_class, PROP_ASYNC_DEPTH,
g_param_spec_uint ("async-depth", "Async Depth",
"Number of in-flight GPU commands which can be scheduled without "
"synchronization (0 = unlimited)", 0, G_MAXINT, DEFAULT_ASYNC_DEPTH,
(GParamFlags) (GST_PARAM_MUTABLE_PLAYING |
G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
element_class->request_new_pad =
GST_DEBUG_FUNCPTR (gst_d3d12_compositor_request_new_pad);
element_class->release_pad =
@ -1389,6 +1400,9 @@ gst_d3d12_compositor_set_property (GObject * object,
gst_aggregator_set_ignore_inactive_pads (GST_AGGREGATOR (object),
g_value_get_boolean (value));
break;
case PROP_ASYNC_DEPTH:
priv->async_depth = g_value_get_uint (value);
break;
default:
G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
break;
@ -1414,6 +1428,9 @@ gst_d3d12_compositor_get_property (GObject * object,
g_value_set_boolean (value,
gst_aggregator_get_ignore_inactive_pads (GST_AGGREGATOR (object)));
break;
case PROP_ASYNC_DEPTH:
g_value_set_uint (value, priv->async_depth);
break;
default:
G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
break;
@ -2355,25 +2372,6 @@ gst_d3d12_compositor_aggregate_frames (GstVideoAggregator * vagg,
return GST_FLOW_ERROR;
}
auto completed = gst_d3d12_device_get_completed_value (self->device,
D3D12_COMMAND_LIST_TYPE_DIRECT);
while (!priv->scheduled.empty ()) {
if (priv->scheduled.front () > completed)
break;
priv->scheduled.pop ();
}
/* avoid too large buffering */
if (priv->scheduled.size () > 2) {
auto fence_to_wait = priv->scheduled.front ();
priv->scheduled.pop ();
GST_LOG_OBJECT (self, "Waiting for previous command, %" G_GUINT64_FORMAT,
fence_to_wait);
gst_d3d12_device_fence_wait (self->device,
D3D12_COMMAND_LIST_TYPE_DIRECT, fence_to_wait);
}
if (!gst_d3d12_compositor_draw_background (self)) {
GST_ERROR_OBJECT (self, "Couldn't draw background");
return GST_FLOW_ERROR;
@ -2429,6 +2427,26 @@ gst_d3d12_compositor_aggregate_frames (GstVideoAggregator * vagg,
return ret;
priv->scheduled.push (fence_val);
auto completed = gst_d3d12_device_get_completed_value (self->device,
D3D12_COMMAND_LIST_TYPE_DIRECT);
while (!priv->scheduled.empty ()) {
if (priv->scheduled.front () > completed)
break;
priv->scheduled.pop ();
}
auto async_depth = priv->async_depth.load ();
if (async_depth > 0 && priv->scheduled.size () > async_depth) {
auto fence_to_wait = priv->scheduled.front ();
priv->scheduled.pop ();
GST_LOG_OBJECT (self, "Waiting for previous command, %" G_GUINT64_FORMAT,
fence_to_wait);
gst_d3d12_device_fence_wait (self->device,
D3D12_COMMAND_LIST_TYPE_DIRECT, fence_to_wait);
}
if (priv->generated_output_buf != outbuf) {
GstVideoFrame out_frame, in_frame;
if (!gst_video_frame_map (&in_frame, &vagg->info,

View file

@ -28,6 +28,7 @@
#include <memory>
#include <queue>
#include <wrl.h>
#include <atomic>
/* *INDENT-OFF* */
using namespace Microsoft::WRL;
@ -65,6 +66,7 @@ enum
PROP_VIDEO_DIRECTION,
PROP_GAMMA_MODE,
PROP_PRIMARIES_MODE,
PROP_ASYNC_DEPTH,
};
#define DEFAULT_ADD_BORDERS TRUE
@ -72,8 +74,7 @@ enum
#define DEFAULT_GAMMA_MODE GST_VIDEO_GAMMA_MODE_NONE
#define DEFAULT_PRIMARIES_MODE GST_VIDEO_PRIMARIES_MODE_NONE
#define DEFAULT_SAMPLING_METHOD GST_D3D12_SAMPLING_METHOD_BILINEAR
#define ASYNC_DEPTH 2
#define DEFAULT_ASYNC_DEPTH 0
/* *INDENT-OFF* */
struct ConvertContext
@ -154,6 +155,8 @@ struct GstD3D12ConvertPrivate
/* method previously selected and used for negotiation */
GstVideoOrientationMethod active_method = GST_VIDEO_ORIENTATION_IDENTITY;
std::atomic<guint> async_depth = { DEFAULT_ASYNC_DEPTH };
std::mutex lock;
};
/* *INDENT-ON* */
@ -252,6 +255,13 @@ gst_d3d12_convert_class_init (GstD3D12ConvertClass * klass)
DEFAULT_PRIMARIES_MODE, (GParamFlags) (GST_PARAM_MUTABLE_PLAYING |
G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
g_object_class_install_property (object_class, PROP_ASYNC_DEPTH,
g_param_spec_uint ("async-depth", "Async Depth",
"Number of in-flight GPU commands which can be scheduled without "
"synchronization (0 = unlimited)", 0, G_MAXINT, DEFAULT_ASYNC_DEPTH,
(GParamFlags) (GST_PARAM_MUTABLE_PLAYING |
G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
gst_element_class_add_static_pad_template (element_class, &sink_template);
gst_element_class_add_static_pad_template (element_class, &src_template);
@ -425,6 +435,7 @@ gst_d3d12_convert_set_property (GObject * object, guint prop_id,
const GValue * value, GParamSpec * pspec)
{
auto self = GST_D3D12_CONVERT (object);
auto priv = self->priv;
switch (prop_id) {
case PROP_SAMPLING_METHOD:
@ -449,6 +460,9 @@ gst_d3d12_convert_set_property (GObject * object, guint prop_id,
gst_d3d12_convert_set_primaries_mode (self,
(GstVideoPrimariesMode) g_value_get_enum (value));
break;
case PROP_ASYNC_DEPTH:
priv->async_depth = g_value_get_uint (value);
break;
default:
G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
break;
@ -482,6 +496,9 @@ gst_d3d12_convert_get_property (GObject * object, guint prop_id,
case PROP_PRIMARIES_MODE:
g_value_set_enum (value, priv->primaries_mode);
break;
case PROP_ASYNC_DEPTH:
g_value_set_uint (value, priv->async_depth);
break;
default:
G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
break;
@ -1971,22 +1988,6 @@ gst_d3d12_convert_transform (GstBaseTransform * trans, GstBuffer * inbuf,
"src-height", (gint) in_rect.bottom - in_rect.top, nullptr);
}
auto completed = gst_d3d12_device_get_completed_value (priv->ctx->device,
D3D12_COMMAND_LIST_TYPE_DIRECT);
while (!priv->ctx->scheduled.empty ()) {
if (priv->ctx->scheduled.front () > completed)
break;
priv->ctx->scheduled.pop ();
}
if (priv->ctx->scheduled.size () >= ASYNC_DEPTH) {
auto fence_to_wait = priv->ctx->scheduled.front ();
priv->ctx->scheduled.pop ();
gst_d3d12_device_fence_wait (priv->ctx->device,
D3D12_COMMAND_LIST_TYPE_DIRECT, fence_to_wait);
}
GstD3D12CommandAllocator *gst_ca;
if (!gst_d3d12_command_allocator_pool_acquire (priv->ctx->ca_pool, &gst_ca)) {
GST_ERROR_OBJECT (self, "Couldn't acquire command allocator");
@ -2057,5 +2058,22 @@ gst_d3d12_convert_transform (GstBaseTransform * trans, GstBuffer * inbuf,
priv->ctx->scheduled.push (priv->ctx->fence_val);
auto completed = gst_d3d12_device_get_completed_value (priv->ctx->device,
D3D12_COMMAND_LIST_TYPE_DIRECT);
while (!priv->ctx->scheduled.empty ()) {
if (priv->ctx->scheduled.front () > completed)
break;
priv->ctx->scheduled.pop ();
}
auto async_depth = priv->async_depth.load ();
if (async_depth > 0 && priv->ctx->scheduled.size () > async_depth) {
auto fence_to_wait = priv->ctx->scheduled.front ();
priv->ctx->scheduled.pop ();
gst_d3d12_device_fence_wait (priv->ctx->device,
D3D12_COMMAND_LIST_TYPE_DIRECT, fence_to_wait);
}
return GST_FLOW_OK;
}

View file

@ -46,6 +46,7 @@
#include <memory>
#include <vector>
#include <queue>
#include <atomic>
#include <gst/d3dshader/gstd3dshader.h>
/* *INDENT-OFF* */
@ -120,13 +121,13 @@ enum
PROP_PATTERN,
PROP_ALPHA,
PROP_ALPHA_MODE,
PROP_ASYNC_DEPTH,
};
#define DEFAULT_ADAPTER -1
#define DEFAULT_PATTERN GST_D3D12_TEST_SRC_SMPTE
#define DEFAULT_ALPHA 1.0f
#define ASYNC_DEPTH 2
#define DEFAULT_ASYNC_DEPTH 0
struct ColorValue
{
@ -321,6 +322,7 @@ struct GstD3D12TestSrcPrivate
gint64 accum_frames = 0;
GstClockTime accum_rtime = 0;
GstClockTime running_time = 0;
std::atomic<guint> async_depth = { DEFAULT_ASYNC_DEPTH };
};
/* *INDENT-ON* */
@ -1387,6 +1389,12 @@ gst_d3d12_test_src_class_init (GstD3D12TestSrcClass * klass)
0, 1, DEFAULT_ALPHA,
(GParamFlags) (G_PARAM_READWRITE | GST_PARAM_MUTABLE_READY |
G_PARAM_STATIC_STRINGS)));
g_object_class_install_property (object_class, PROP_ASYNC_DEPTH,
g_param_spec_uint ("async-depth", "Async Depth",
"Number of in-flight GPU commands which can be scheduled without "
"synchronization (0 = unlimited)", 0, G_MAXINT, DEFAULT_ASYNC_DEPTH,
(GParamFlags) (GST_PARAM_MUTABLE_PLAYING |
G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
element_class->set_context =
GST_DEBUG_FUNCPTR (gst_d3d12_test_src_set_context);
@ -1456,6 +1464,9 @@ gst_d3d12_test_src_set_property (GObject * object, guint prop_id,
case PROP_ALPHA:
priv->alpha = g_value_get_float (value);
break;
case PROP_ASYNC_DEPTH:
priv->async_depth = g_value_get_uint (value);
break;
default:
G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
break;
@ -1482,6 +1493,9 @@ gst_d3d12_test_src_get_property (GObject * object, guint prop_id,
case PROP_ALPHA:
g_value_set_float (value, priv->alpha);
break;
case PROP_ASYNC_DEPTH:
g_value_set_uint (value, priv->async_depth);
break;
default:
G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
break;
@ -2166,22 +2180,6 @@ gst_d3d12_test_src_create (GstBaseSrc * bsrc, guint64 offset,
if (ret != GST_FLOW_OK)
return ret;
auto completed = gst_d3d12_device_get_completed_value (self->device,
D3D12_COMMAND_LIST_TYPE_DIRECT);
while (!priv->ctx->scheduled.empty ()) {
if (priv->ctx->scheduled.front () > completed)
break;
priv->ctx->scheduled.pop ();
}
if (priv->ctx->scheduled.size () >= ASYNC_DEPTH) {
auto fence_to_wait = priv->ctx->scheduled.front ();
priv->ctx->scheduled.pop ();
gst_d3d12_device_fence_wait (self->device,
D3D12_COMMAND_LIST_TYPE_DIRECT, fence_to_wait);
}
GstD3D12CommandAllocator *gst_ca;
if (!gst_d3d12_command_allocator_pool_acquire (priv->ctx->ca_pool, &gst_ca)) {
GST_ERROR_OBJECT (self, "Couldn't acquire command allocator");
@ -2264,6 +2262,23 @@ gst_d3d12_test_src_create (GstBaseSrc * bsrc, guint64 offset,
priv->ctx->scheduled.push (priv->ctx->fence_val);
auto completed = gst_d3d12_device_get_completed_value (self->device,
D3D12_COMMAND_LIST_TYPE_DIRECT);
while (!priv->ctx->scheduled.empty ()) {
if (priv->ctx->scheduled.front () > completed)
break;
priv->ctx->scheduled.pop ();
}
auto async_depth = priv->async_depth.load ();
if (async_depth > 0 && priv->ctx->scheduled.size () > async_depth) {
auto fence_to_wait = priv->ctx->scheduled.front ();
priv->ctx->scheduled.pop ();
gst_d3d12_device_fence_wait (self->device,
D3D12_COMMAND_LIST_TYPE_DIRECT, fence_to_wait);
}
if (priv->downstream_supports_d3d12) {
buffer = convert_buffer;
convert_buffer = nullptr;