mirror of
https://gitlab.freedesktop.org/gstreamer/gstreamer.git
synced 2024-11-14 13:21:28 +00:00
d3d12: Add async-depth property
Adding a property to control the number of in-flight GPU commands (default is unlimited). Note that actual maximum number is defined in d3d12device's direct command queue object which is 32 now, thus total number of scheduled GPU commands cannot exceed 32. Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/7444>
This commit is contained in:
parent
3506f5fb07
commit
d0505fba55
3 changed files with 109 additions and 58 deletions
|
@ -42,6 +42,7 @@
|
|||
#include <future>
|
||||
#include <vector>
|
||||
#include <queue>
|
||||
#include <atomic>
|
||||
#include <string.h>
|
||||
#include <wrl.h>
|
||||
#include <gst/d3dshader/gstd3dshader.h>
|
||||
|
@ -172,10 +173,12 @@ enum
|
|||
PROP_ADAPTER,
|
||||
PROP_BACKGROUND,
|
||||
PROP_IGNORE_INACTIVE_PADS,
|
||||
PROP_ASYNC_DEPTH,
|
||||
};
|
||||
|
||||
#define DEFAULT_ADAPTER -1
|
||||
#define DEFAULT_BACKGROUND GST_D3D12_COMPOSITOR_BACKGROUND_CHECKER
|
||||
#define DEFAULT_ASYNC_DEPTH 0
|
||||
|
||||
static const D3D12_RENDER_TARGET_BLEND_DESC g_blend_source = {
|
||||
TRUE,
|
||||
|
@ -527,7 +530,6 @@ struct BackgroundRender
|
|||
bool is_valid = false;
|
||||
guint64 fence_val = 0;
|
||||
};
|
||||
/* *INDENT-ON* */
|
||||
|
||||
struct ClearColor
|
||||
{
|
||||
|
@ -556,8 +558,8 @@ struct GStD3D12CompositorPrivate
|
|||
/* black/white/transparent */
|
||||
ClearColor clear_color[3];
|
||||
GstD3D12FenceDataPool *fence_data_pool;
|
||||
std::vector < D3D12_CPU_DESCRIPTOR_HANDLE > rtv_handles;
|
||||
std::queue < guint64 > scheduled;
|
||||
std::vector<D3D12_CPU_DESCRIPTOR_HANDLE> rtv_handles;
|
||||
std::queue<guint64> scheduled;
|
||||
|
||||
GstVideoInfo negotiated_info;
|
||||
|
||||
|
@ -568,7 +570,9 @@ struct GStD3D12CompositorPrivate
|
|||
/* properties */
|
||||
gint adapter = DEFAULT_ADAPTER;
|
||||
GstD3D12CompositorBackground background = DEFAULT_BACKGROUND;
|
||||
std::atomic<guint> async_depth = { DEFAULT_ASYNC_DEPTH };
|
||||
};
|
||||
/* *INDENT-ON* */
|
||||
|
||||
struct _GstD3D12Compositor
|
||||
{
|
||||
|
@ -1308,6 +1312,13 @@ gst_d3d12_compositor_class_init (GstD3D12CompositorClass * klass)
|
|||
"Avoid timing out waiting for inactive pads", FALSE,
|
||||
(GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
|
||||
|
||||
g_object_class_install_property (object_class, PROP_ASYNC_DEPTH,
|
||||
g_param_spec_uint ("async-depth", "Async Depth",
|
||||
"Number of in-flight GPU commands which can be scheduled without "
|
||||
"synchronization (0 = unlimited)", 0, G_MAXINT, DEFAULT_ASYNC_DEPTH,
|
||||
(GParamFlags) (GST_PARAM_MUTABLE_PLAYING |
|
||||
G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
|
||||
|
||||
element_class->request_new_pad =
|
||||
GST_DEBUG_FUNCPTR (gst_d3d12_compositor_request_new_pad);
|
||||
element_class->release_pad =
|
||||
|
@ -1389,6 +1400,9 @@ gst_d3d12_compositor_set_property (GObject * object,
|
|||
gst_aggregator_set_ignore_inactive_pads (GST_AGGREGATOR (object),
|
||||
g_value_get_boolean (value));
|
||||
break;
|
||||
case PROP_ASYNC_DEPTH:
|
||||
priv->async_depth = g_value_get_uint (value);
|
||||
break;
|
||||
default:
|
||||
G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
|
||||
break;
|
||||
|
@ -1414,6 +1428,9 @@ gst_d3d12_compositor_get_property (GObject * object,
|
|||
g_value_set_boolean (value,
|
||||
gst_aggregator_get_ignore_inactive_pads (GST_AGGREGATOR (object)));
|
||||
break;
|
||||
case PROP_ASYNC_DEPTH:
|
||||
g_value_set_uint (value, priv->async_depth);
|
||||
break;
|
||||
default:
|
||||
G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
|
||||
break;
|
||||
|
@ -2355,25 +2372,6 @@ gst_d3d12_compositor_aggregate_frames (GstVideoAggregator * vagg,
|
|||
return GST_FLOW_ERROR;
|
||||
}
|
||||
|
||||
auto completed = gst_d3d12_device_get_completed_value (self->device,
|
||||
D3D12_COMMAND_LIST_TYPE_DIRECT);
|
||||
while (!priv->scheduled.empty ()) {
|
||||
if (priv->scheduled.front () > completed)
|
||||
break;
|
||||
|
||||
priv->scheduled.pop ();
|
||||
}
|
||||
|
||||
/* avoid too large buffering */
|
||||
if (priv->scheduled.size () > 2) {
|
||||
auto fence_to_wait = priv->scheduled.front ();
|
||||
priv->scheduled.pop ();
|
||||
GST_LOG_OBJECT (self, "Waiting for previous command, %" G_GUINT64_FORMAT,
|
||||
fence_to_wait);
|
||||
gst_d3d12_device_fence_wait (self->device,
|
||||
D3D12_COMMAND_LIST_TYPE_DIRECT, fence_to_wait);
|
||||
}
|
||||
|
||||
if (!gst_d3d12_compositor_draw_background (self)) {
|
||||
GST_ERROR_OBJECT (self, "Couldn't draw background");
|
||||
return GST_FLOW_ERROR;
|
||||
|
@ -2429,6 +2427,26 @@ gst_d3d12_compositor_aggregate_frames (GstVideoAggregator * vagg,
|
|||
return ret;
|
||||
|
||||
priv->scheduled.push (fence_val);
|
||||
|
||||
auto completed = gst_d3d12_device_get_completed_value (self->device,
|
||||
D3D12_COMMAND_LIST_TYPE_DIRECT);
|
||||
while (!priv->scheduled.empty ()) {
|
||||
if (priv->scheduled.front () > completed)
|
||||
break;
|
||||
|
||||
priv->scheduled.pop ();
|
||||
}
|
||||
|
||||
auto async_depth = priv->async_depth.load ();
|
||||
if (async_depth > 0 && priv->scheduled.size () > async_depth) {
|
||||
auto fence_to_wait = priv->scheduled.front ();
|
||||
priv->scheduled.pop ();
|
||||
GST_LOG_OBJECT (self, "Waiting for previous command, %" G_GUINT64_FORMAT,
|
||||
fence_to_wait);
|
||||
gst_d3d12_device_fence_wait (self->device,
|
||||
D3D12_COMMAND_LIST_TYPE_DIRECT, fence_to_wait);
|
||||
}
|
||||
|
||||
if (priv->generated_output_buf != outbuf) {
|
||||
GstVideoFrame out_frame, in_frame;
|
||||
if (!gst_video_frame_map (&in_frame, &vagg->info,
|
||||
|
|
|
@ -28,6 +28,7 @@
|
|||
#include <memory>
|
||||
#include <queue>
|
||||
#include <wrl.h>
|
||||
#include <atomic>
|
||||
|
||||
/* *INDENT-OFF* */
|
||||
using namespace Microsoft::WRL;
|
||||
|
@ -65,6 +66,7 @@ enum
|
|||
PROP_VIDEO_DIRECTION,
|
||||
PROP_GAMMA_MODE,
|
||||
PROP_PRIMARIES_MODE,
|
||||
PROP_ASYNC_DEPTH,
|
||||
};
|
||||
|
||||
#define DEFAULT_ADD_BORDERS TRUE
|
||||
|
@ -72,8 +74,7 @@ enum
|
|||
#define DEFAULT_GAMMA_MODE GST_VIDEO_GAMMA_MODE_NONE
|
||||
#define DEFAULT_PRIMARIES_MODE GST_VIDEO_PRIMARIES_MODE_NONE
|
||||
#define DEFAULT_SAMPLING_METHOD GST_D3D12_SAMPLING_METHOD_BILINEAR
|
||||
|
||||
#define ASYNC_DEPTH 2
|
||||
#define DEFAULT_ASYNC_DEPTH 0
|
||||
|
||||
/* *INDENT-OFF* */
|
||||
struct ConvertContext
|
||||
|
@ -154,6 +155,8 @@ struct GstD3D12ConvertPrivate
|
|||
/* method previously selected and used for negotiation */
|
||||
GstVideoOrientationMethod active_method = GST_VIDEO_ORIENTATION_IDENTITY;
|
||||
|
||||
std::atomic<guint> async_depth = { DEFAULT_ASYNC_DEPTH };
|
||||
|
||||
std::mutex lock;
|
||||
};
|
||||
/* *INDENT-ON* */
|
||||
|
@ -252,6 +255,13 @@ gst_d3d12_convert_class_init (GstD3D12ConvertClass * klass)
|
|||
DEFAULT_PRIMARIES_MODE, (GParamFlags) (GST_PARAM_MUTABLE_PLAYING |
|
||||
G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
|
||||
|
||||
g_object_class_install_property (object_class, PROP_ASYNC_DEPTH,
|
||||
g_param_spec_uint ("async-depth", "Async Depth",
|
||||
"Number of in-flight GPU commands which can be scheduled without "
|
||||
"synchronization (0 = unlimited)", 0, G_MAXINT, DEFAULT_ASYNC_DEPTH,
|
||||
(GParamFlags) (GST_PARAM_MUTABLE_PLAYING |
|
||||
G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
|
||||
|
||||
gst_element_class_add_static_pad_template (element_class, &sink_template);
|
||||
gst_element_class_add_static_pad_template (element_class, &src_template);
|
||||
|
||||
|
@ -425,6 +435,7 @@ gst_d3d12_convert_set_property (GObject * object, guint prop_id,
|
|||
const GValue * value, GParamSpec * pspec)
|
||||
{
|
||||
auto self = GST_D3D12_CONVERT (object);
|
||||
auto priv = self->priv;
|
||||
|
||||
switch (prop_id) {
|
||||
case PROP_SAMPLING_METHOD:
|
||||
|
@ -449,6 +460,9 @@ gst_d3d12_convert_set_property (GObject * object, guint prop_id,
|
|||
gst_d3d12_convert_set_primaries_mode (self,
|
||||
(GstVideoPrimariesMode) g_value_get_enum (value));
|
||||
break;
|
||||
case PROP_ASYNC_DEPTH:
|
||||
priv->async_depth = g_value_get_uint (value);
|
||||
break;
|
||||
default:
|
||||
G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
|
||||
break;
|
||||
|
@ -482,6 +496,9 @@ gst_d3d12_convert_get_property (GObject * object, guint prop_id,
|
|||
case PROP_PRIMARIES_MODE:
|
||||
g_value_set_enum (value, priv->primaries_mode);
|
||||
break;
|
||||
case PROP_ASYNC_DEPTH:
|
||||
g_value_set_uint (value, priv->async_depth);
|
||||
break;
|
||||
default:
|
||||
G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
|
||||
break;
|
||||
|
@ -1971,22 +1988,6 @@ gst_d3d12_convert_transform (GstBaseTransform * trans, GstBuffer * inbuf,
|
|||
"src-height", (gint) in_rect.bottom - in_rect.top, nullptr);
|
||||
}
|
||||
|
||||
auto completed = gst_d3d12_device_get_completed_value (priv->ctx->device,
|
||||
D3D12_COMMAND_LIST_TYPE_DIRECT);
|
||||
while (!priv->ctx->scheduled.empty ()) {
|
||||
if (priv->ctx->scheduled.front () > completed)
|
||||
break;
|
||||
|
||||
priv->ctx->scheduled.pop ();
|
||||
}
|
||||
|
||||
if (priv->ctx->scheduled.size () >= ASYNC_DEPTH) {
|
||||
auto fence_to_wait = priv->ctx->scheduled.front ();
|
||||
priv->ctx->scheduled.pop ();
|
||||
gst_d3d12_device_fence_wait (priv->ctx->device,
|
||||
D3D12_COMMAND_LIST_TYPE_DIRECT, fence_to_wait);
|
||||
}
|
||||
|
||||
GstD3D12CommandAllocator *gst_ca;
|
||||
if (!gst_d3d12_command_allocator_pool_acquire (priv->ctx->ca_pool, &gst_ca)) {
|
||||
GST_ERROR_OBJECT (self, "Couldn't acquire command allocator");
|
||||
|
@ -2057,5 +2058,22 @@ gst_d3d12_convert_transform (GstBaseTransform * trans, GstBuffer * inbuf,
|
|||
|
||||
priv->ctx->scheduled.push (priv->ctx->fence_val);
|
||||
|
||||
auto completed = gst_d3d12_device_get_completed_value (priv->ctx->device,
|
||||
D3D12_COMMAND_LIST_TYPE_DIRECT);
|
||||
while (!priv->ctx->scheduled.empty ()) {
|
||||
if (priv->ctx->scheduled.front () > completed)
|
||||
break;
|
||||
|
||||
priv->ctx->scheduled.pop ();
|
||||
}
|
||||
|
||||
auto async_depth = priv->async_depth.load ();
|
||||
if (async_depth > 0 && priv->ctx->scheduled.size () > async_depth) {
|
||||
auto fence_to_wait = priv->ctx->scheduled.front ();
|
||||
priv->ctx->scheduled.pop ();
|
||||
gst_d3d12_device_fence_wait (priv->ctx->device,
|
||||
D3D12_COMMAND_LIST_TYPE_DIRECT, fence_to_wait);
|
||||
}
|
||||
|
||||
return GST_FLOW_OK;
|
||||
}
|
||||
|
|
|
@ -46,6 +46,7 @@
|
|||
#include <memory>
|
||||
#include <vector>
|
||||
#include <queue>
|
||||
#include <atomic>
|
||||
#include <gst/d3dshader/gstd3dshader.h>
|
||||
|
||||
/* *INDENT-OFF* */
|
||||
|
@ -120,13 +121,13 @@ enum
|
|||
PROP_PATTERN,
|
||||
PROP_ALPHA,
|
||||
PROP_ALPHA_MODE,
|
||||
PROP_ASYNC_DEPTH,
|
||||
};
|
||||
|
||||
#define DEFAULT_ADAPTER -1
|
||||
#define DEFAULT_PATTERN GST_D3D12_TEST_SRC_SMPTE
|
||||
#define DEFAULT_ALPHA 1.0f
|
||||
|
||||
#define ASYNC_DEPTH 2
|
||||
#define DEFAULT_ASYNC_DEPTH 0
|
||||
|
||||
struct ColorValue
|
||||
{
|
||||
|
@ -321,6 +322,7 @@ struct GstD3D12TestSrcPrivate
|
|||
gint64 accum_frames = 0;
|
||||
GstClockTime accum_rtime = 0;
|
||||
GstClockTime running_time = 0;
|
||||
std::atomic<guint> async_depth = { DEFAULT_ASYNC_DEPTH };
|
||||
};
|
||||
/* *INDENT-ON* */
|
||||
|
||||
|
@ -1387,6 +1389,12 @@ gst_d3d12_test_src_class_init (GstD3D12TestSrcClass * klass)
|
|||
0, 1, DEFAULT_ALPHA,
|
||||
(GParamFlags) (G_PARAM_READWRITE | GST_PARAM_MUTABLE_READY |
|
||||
G_PARAM_STATIC_STRINGS)));
|
||||
g_object_class_install_property (object_class, PROP_ASYNC_DEPTH,
|
||||
g_param_spec_uint ("async-depth", "Async Depth",
|
||||
"Number of in-flight GPU commands which can be scheduled without "
|
||||
"synchronization (0 = unlimited)", 0, G_MAXINT, DEFAULT_ASYNC_DEPTH,
|
||||
(GParamFlags) (GST_PARAM_MUTABLE_PLAYING |
|
||||
G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
|
||||
|
||||
element_class->set_context =
|
||||
GST_DEBUG_FUNCPTR (gst_d3d12_test_src_set_context);
|
||||
|
@ -1456,6 +1464,9 @@ gst_d3d12_test_src_set_property (GObject * object, guint prop_id,
|
|||
case PROP_ALPHA:
|
||||
priv->alpha = g_value_get_float (value);
|
||||
break;
|
||||
case PROP_ASYNC_DEPTH:
|
||||
priv->async_depth = g_value_get_uint (value);
|
||||
break;
|
||||
default:
|
||||
G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
|
||||
break;
|
||||
|
@ -1482,6 +1493,9 @@ gst_d3d12_test_src_get_property (GObject * object, guint prop_id,
|
|||
case PROP_ALPHA:
|
||||
g_value_set_float (value, priv->alpha);
|
||||
break;
|
||||
case PROP_ASYNC_DEPTH:
|
||||
g_value_set_uint (value, priv->async_depth);
|
||||
break;
|
||||
default:
|
||||
G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
|
||||
break;
|
||||
|
@ -2166,22 +2180,6 @@ gst_d3d12_test_src_create (GstBaseSrc * bsrc, guint64 offset,
|
|||
if (ret != GST_FLOW_OK)
|
||||
return ret;
|
||||
|
||||
auto completed = gst_d3d12_device_get_completed_value (self->device,
|
||||
D3D12_COMMAND_LIST_TYPE_DIRECT);
|
||||
while (!priv->ctx->scheduled.empty ()) {
|
||||
if (priv->ctx->scheduled.front () > completed)
|
||||
break;
|
||||
|
||||
priv->ctx->scheduled.pop ();
|
||||
}
|
||||
|
||||
if (priv->ctx->scheduled.size () >= ASYNC_DEPTH) {
|
||||
auto fence_to_wait = priv->ctx->scheduled.front ();
|
||||
priv->ctx->scheduled.pop ();
|
||||
gst_d3d12_device_fence_wait (self->device,
|
||||
D3D12_COMMAND_LIST_TYPE_DIRECT, fence_to_wait);
|
||||
}
|
||||
|
||||
GstD3D12CommandAllocator *gst_ca;
|
||||
if (!gst_d3d12_command_allocator_pool_acquire (priv->ctx->ca_pool, &gst_ca)) {
|
||||
GST_ERROR_OBJECT (self, "Couldn't acquire command allocator");
|
||||
|
@ -2264,6 +2262,23 @@ gst_d3d12_test_src_create (GstBaseSrc * bsrc, guint64 offset,
|
|||
|
||||
priv->ctx->scheduled.push (priv->ctx->fence_val);
|
||||
|
||||
auto completed = gst_d3d12_device_get_completed_value (self->device,
|
||||
D3D12_COMMAND_LIST_TYPE_DIRECT);
|
||||
while (!priv->ctx->scheduled.empty ()) {
|
||||
if (priv->ctx->scheduled.front () > completed)
|
||||
break;
|
||||
|
||||
priv->ctx->scheduled.pop ();
|
||||
}
|
||||
|
||||
auto async_depth = priv->async_depth.load ();
|
||||
if (async_depth > 0 && priv->ctx->scheduled.size () > async_depth) {
|
||||
auto fence_to_wait = priv->ctx->scheduled.front ();
|
||||
priv->ctx->scheduled.pop ();
|
||||
gst_d3d12_device_fence_wait (self->device,
|
||||
D3D12_COMMAND_LIST_TYPE_DIRECT, fence_to_wait);
|
||||
}
|
||||
|
||||
if (priv->downstream_supports_d3d12) {
|
||||
buffer = convert_buffer;
|
||||
convert_buffer = nullptr;
|
||||
|
|
Loading…
Reference in a new issue