d3d12: Workaround for Intel iGPU decoder crash

Observed Intel GPU driver crash when multiple decoders are
configured in a process. It might be because of frequent
command queue alloc/free or too many in-flight decoding commands.
In order to make command queue persistent and limit the number of
in-flight command lists, holds global decoding command queue.

Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/7019>
This commit is contained in:
Seungha Yang 2024-06-12 01:02:39 +09:00 committed by GStreamer Marge Bot
parent e5b5d223b4
commit a2df44da7d
5 changed files with 199 additions and 33 deletions

View file

@ -89,4 +89,26 @@ private:
GstD3D12Device *device_;
};
class GstD3D12DeviceDecoderLockGuard
{
public:
explicit GstD3D12DeviceDecoderLockGuard(GstD3D12Device * device) : device_ (device)
{
if (device_)
gst_d3d12_device_decoder_lock (device_);
}
~GstD3D12DeviceDecoderLockGuard()
{
if (device_)
gst_d3d12_device_decoder_unlock (device_);
}
GstD3D12DeviceDecoderLockGuard(const GstD3D12DeviceDecoderLockGuard&) = delete;
GstD3D12DeviceDecoderLockGuard& operator=(const GstD3D12DeviceDecoderLockGuard&) = delete;
private:
GstD3D12Device *device_;
};
#endif /* __cplusplus */

View file

@ -164,7 +164,7 @@ gst_d3d12_command_queue_new (ID3D12Device * device,
ComPtr < ID3D12CommandQueue > cq;
auto hr = device->CreateCommandQueue (desc, IID_PPV_ARGS (&cq));
if (FAILED (hr)) {
GST_ERROR ("Couldn't create command queue, hr: 0x%x", (guint) hr);
GST_WARNING ("Couldn't create command queue, hr: 0x%x", (guint) hr);
return nullptr;
}

View file

@ -25,6 +25,14 @@
G_BEGIN_DECLS
enum GstD3D12WAFlags
{
GST_D3D12_WA_NONE = 0,
GST_D3D12_WA_DECODER_RACE = (1 << 0),
};
DEFINE_ENUM_FLAG_OPERATORS (GstD3D12WAFlags);
struct GstD3D12CopyTextureRegionArgs
{
D3D12_TEXTURE_COPY_LOCATION dst;
@ -71,5 +79,17 @@ void gst_d3d12_device_11on12_unlock (GstD3D12Device * device);
GST_D3D12_API
void gst_d3d12_device_check_device_removed (GstD3D12Device * device);
GST_D3D12_API
GstD3D12CommandQueue * gst_d3d12_device_get_decode_queue (GstD3D12Device * device);
GST_D3D12_API
void gst_d3d12_device_decoder_lock (GstD3D12Device * device);
GST_D3D12_API
void gst_d3d12_device_decoder_unlock (GstD3D12Device * device);
GST_D3D12_API
GstD3D12WAFlags gst_d3d12_device_get_workaround_flags (GstD3D12Device * device);
G_END_DECLS

View file

@ -126,6 +126,8 @@ struct DeviceInner
gst_clear_object (&direct_queue);
gst_clear_object (&copy_queue);
for (guint i = 0; i < num_decode_queue; i++)
gst_clear_object (&decode_queue[i]);
gst_clear_object (&direct_ca_pool);
gst_clear_object (&direct_cl_pool);
@ -154,6 +156,9 @@ struct DeviceInner
if (copy_queue)
gst_d3d12_command_queue_drain (copy_queue);
for (guint i = 0; i < num_decode_queue; i++)
gst_d3d12_command_queue_drain (decode_queue[i]);
}
void ReportLiveObjects ()
@ -230,6 +235,11 @@ struct DeviceInner
GstD3D12CommandQueue *direct_queue = nullptr;
GstD3D12CommandQueue *copy_queue = nullptr;
GstD3D12CommandQueue *decode_queue[2] = { nullptr, };
guint num_decode_queue = 0;
guint decode_queue_index = 0;
std::recursive_mutex decoder_lock;
GstD3D12WAFlags wa_flags = GST_D3D12_WA_NONE;
GstD3D12CommandListPool *direct_cl_pool = nullptr;
GstD3D12CommandAllocatorPool *direct_ca_pool = nullptr;
@ -239,6 +249,8 @@ struct DeviceInner
GstD3D12FenceDataPool *fence_data_pool = nullptr;
D3D12_FEATURE_DATA_ARCHITECTURE feature_data_arch = { };
guint rtv_inc_size;
guint adapter_index = 0;
@ -961,6 +973,24 @@ gst_d3d12_device_find_adapter (const GstD3D12DeviceConstructData * data,
return E_FAIL;
}
static gboolean
is_intel_gen11_or_older (UINT vendor_id, D3D_FEATURE_LEVEL feature_level,
const std::string & description)
{
if (vendor_id != 0x8086)
return FALSE;
/* Arc GPU supports feature level 12.2 and iGPU Xe does 12.1 */
if (feature_level <= D3D_FEATURE_LEVEL_12_0)
return TRUE;
/* gen 11 is UHD xxx, older ones are HD xxx */
if (description.find ("HD") != std::string::npos)
return TRUE;
return FALSE;
}
static GstD3D12Device *
gst_d3d12_device_new_internal (const GstD3D12DeviceConstructData * data)
{
@ -970,6 +1000,13 @@ gst_d3d12_device_new_internal (const GstD3D12DeviceConstructData * data)
HRESULT hr;
UINT factory_flags = 0;
guint index = 0;
const D3D_FEATURE_LEVEL feature_levels[] = {
D3D_FEATURE_LEVEL_11_0,
D3D_FEATURE_LEVEL_11_1,
D3D_FEATURE_LEVEL_12_0,
D3D_FEATURE_LEVEL_12_1,
D3D_FEATURE_LEVEL_12_2,
};
gst_d3d12_device_enable_debug ();
gst_d3d12_device_enable_dred ();
@ -1014,16 +1051,31 @@ gst_d3d12_device_new_internal (const GstD3D12DeviceConstructData * data)
priv->device_id = desc.DeviceId;
priv->adapter_index = index;
device->CheckFeatureSupport (D3D12_FEATURE_ARCHITECTURE,
&priv->feature_data_arch, sizeof (D3D12_FEATURE_DATA_ARCHITECTURE));
D3D12_FEATURE_DATA_FEATURE_LEVELS flevel = { };
flevel.NumFeatureLevels = G_N_ELEMENTS (feature_levels);
flevel.pFeatureLevelsRequested = feature_levels;
device->CheckFeatureSupport (D3D12_FEATURE_FEATURE_LEVELS,
&flevel, sizeof (flevel));
std::wstring_convert < std::codecvt_utf8 < wchar_t >, wchar_t >converter;
priv->description = converter.to_bytes (desc.Description);
GST_INFO_OBJECT (self,
"adapter index %d: D3D12 device vendor-id: 0x%04x, device-id: 0x%04x, "
"Flags: 0x%x, adapter-luid: %" G_GINT64_FORMAT ", %s",
"Flags: 0x%x, adapter-luid: %" G_GINT64_FORMAT ", is-UMA: %d, "
"feature-level: 0x%x, %s",
priv->adapter_index, desc.VendorId, desc.DeviceId, desc.Flags,
priv->adapter_luid, priv->description.c_str ());
priv->adapter_luid, priv->feature_data_arch.UMA,
flevel.MaxSupportedFeatureLevel, priv->description.c_str ());
gst_d3d12_device_setup_format_table (self);
if (priv->feature_data_arch.UMA && is_intel_gen11_or_older (priv->vendor_id,
flevel.MaxSupportedFeatureLevel, priv->description)) {
priv->wa_flags |= GST_D3D12_WA_DECODER_RACE;
}
if (gst_d3d12_device_enable_debug ()) {
ComPtr < ID3D12InfoQueue > info_queue;
@ -1071,6 +1123,30 @@ gst_d3d12_device_new_internal (const GstD3D12DeviceConstructData * data)
priv->fence_data_pool = gst_d3d12_fence_data_pool_new ();
{
ComPtr < ID3D12VideoDevice > video_device;
auto hr = device.As (&video_device);
if (SUCCEEDED (hr)) {
queue_desc.Type = D3D12_COMMAND_LIST_TYPE_VIDEO_DECODE;
for (guint i = 0; i < G_N_ELEMENTS (priv->decode_queue); i++) {
priv->decode_queue[i] = gst_d3d12_command_queue_new (device.Get (),
&queue_desc, D3D12_FENCE_FLAG_NONE, 8);
if (!priv->decode_queue)
break;
GST_OBJECT_FLAG_SET (priv->decode_queue[i],
GST_OBJECT_FLAG_MAY_BE_LEAKED);
priv->num_decode_queue++;
/* XXX: Old Intel iGPU crashes with multiple decode queues */
if ((priv->wa_flags & GST_D3D12_WA_DECODER_RACE) ==
GST_D3D12_WA_DECODER_RACE) {
break;
}
}
}
}
GST_OBJECT_FLAG_SET (priv->direct_queue, GST_OBJECT_FLAG_MAY_BE_LEAKED);
GST_OBJECT_FLAG_SET (priv->direct_cl_pool, GST_OBJECT_FLAG_MAY_BE_LEAKED);
GST_OBJECT_FLAG_SET (priv->direct_ca_pool, GST_OBJECT_FLAG_MAY_BE_LEAKED);
@ -1823,3 +1899,48 @@ gst_d3d12_device_check_device_removed (GstD3D12Device * device)
manager->OnDeviceRemoved (priv->adapter_luid);
}
}
GstD3D12CommandQueue *
gst_d3d12_device_get_decode_queue (GstD3D12Device * device)
{
g_return_val_if_fail (GST_IS_D3D12_DEVICE (device), nullptr);
auto priv = device->priv->inner;
if (!priv->num_decode_queue)
return nullptr;
std::lock_guard < std::mutex > lk (priv->lock);
auto queue = priv->decode_queue[priv->decode_queue_index];
priv->decode_queue_index++;
priv->decode_queue_index %= priv->num_decode_queue;
return queue;
}
void
gst_d3d12_device_decoder_lock (GstD3D12Device * device)
{
g_return_if_fail (GST_IS_D3D12_DEVICE (device));
auto priv = device->priv->inner;
if ((priv->wa_flags & GST_D3D12_WA_DECODER_RACE) == GST_D3D12_WA_DECODER_RACE)
priv->decoder_lock.lock ();
}
void
gst_d3d12_device_decoder_unlock (GstD3D12Device * device)
{
g_return_if_fail (GST_IS_D3D12_DEVICE (device));
auto priv = device->priv->inner;
if ((priv->wa_flags & GST_D3D12_WA_DECODER_RACE) == GST_D3D12_WA_DECODER_RACE)
priv->decoder_lock.unlock ();
}
GstD3D12WAFlags
gst_d3d12_device_get_workaround_flags (GstD3D12Device * device)
{
g_return_val_if_fail (GST_IS_D3D12_DEVICE (device), GST_D3D12_WA_NONE);
return device->priv->inner->wa_flags;
}

View file

@ -226,15 +226,14 @@ struct DecoderCmdData
{
CloseHandle (event_handle);
gst_clear_object (&ca_pool);
gst_clear_object (&queue);
}
ComPtr<ID3D12Device> device;
ComPtr<ID3D12VideoDevice> video_device;
ComPtr<ID3D12VideoDecodeCommandList> cl;
GstD3D12CommandQueue *queue = nullptr;
GstD3D12CommandAllocatorPool *ca_pool = nullptr;
bool need_full_drain = false;
/* Fence to wait at command record thread */
HANDLE event_handle;
@ -441,11 +440,7 @@ gst_d3d12_decoder_open (GstD3D12Decoder * decoder, GstElement * element)
return FALSE;
}
D3D12_COMMAND_QUEUE_DESC desc = { };
desc.Type = D3D12_COMMAND_LIST_TYPE_VIDEO_DECODE;
desc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE;
cmd->queue = gst_d3d12_command_queue_new (cmd->device.Get (), &desc,
D3D12_FENCE_FLAG_NONE, ASYNC_DEPTH * 2);
cmd->queue = gst_d3d12_device_get_decode_queue (decoder->device);
if (!cmd->queue) {
GST_ERROR_OBJECT (element, "Couldn't create command queue");
return FALSE;
@ -454,6 +449,10 @@ gst_d3d12_decoder_open (GstD3D12Decoder * decoder, GstElement * element)
cmd->ca_pool = gst_d3d12_command_allocator_pool_new (cmd->device.Get (),
D3D12_COMMAND_LIST_TYPE_VIDEO_DECODE);
auto flags = gst_d3d12_device_get_workaround_flags (decoder->device);
if ((flags & GST_D3D12_WA_DECODER_RACE) == GST_D3D12_WA_DECODER_RACE)
cmd->need_full_drain = true;
priv->cmd = std::move (cmd);
priv->flushing = false;
@ -511,13 +510,11 @@ gst_d3d12_decoder_close (GstD3D12Decoder * decoder)
GST_DEBUG_OBJECT (decoder, "Close");
if (priv->cmd) {
gst_d3d12_command_queue_fence_wait (priv->cmd->queue, priv->cmd->fence_val,
priv->cmd->event_handle);
}
{
GstD3D12DeviceDecoderLockGuard lk (decoder->device);
priv->session = nullptr;
priv->cmd = nullptr;
}
gst_clear_object (&decoder->device);
@ -540,6 +537,13 @@ gst_d3d12_decoder_configure (GstD3D12Decoder * decoder,
GST_FLOW_ERROR);
g_return_val_if_fail (dpb_size > 0, GST_FLOW_ERROR);
if (!decoder->device) {
GST_ERROR_OBJECT (decoder, "Device was not configured");
return GST_FLOW_ERROR;
}
GstD3D12DeviceDecoderLockGuard dlk (decoder->device);
GstD3D12Format device_format;
auto priv = decoder->priv;
HRESULT hr;
@ -800,8 +804,12 @@ gst_d3d12_decoder_stop (GstD3D12Decoder * decoder)
priv->flushing = true;
if (priv->cmd) {
gst_d3d12_command_queue_fence_wait (priv->cmd->queue, priv->cmd->fence_val,
priv->cmd->event_handle);
if (priv->cmd->need_full_drain) {
gst_d3d12_command_queue_drain (priv->cmd->queue);
} else {
gst_d3d12_command_queue_fence_wait (priv->cmd->queue,
priv->cmd->fence_val, priv->cmd->event_handle);
}
}
if (priv->output_thread && priv->session) {
@ -814,6 +822,7 @@ gst_d3d12_decoder_stop (GstD3D12Decoder * decoder)
g_clear_pointer (&priv->output_thread, g_thread_join);
priv->flushing = false;
GstD3D12DeviceDecoderLockGuard lk (decoder->device);
priv->session = nullptr;
return TRUE;
@ -1112,8 +1121,8 @@ gst_d3d12_decoder_end_picture (GstD3D12Decoder * decoder,
memset (&in_args, 0, sizeof (D3D12_VIDEO_DECODE_INPUT_STREAM_ARGUMENTS));
memset (&out_args, 0, sizeof (D3D12_VIDEO_DECODE_OUTPUT_STREAM_ARGUMENTS));
GstD3D12DeviceDecoderLockGuard dlk (decoder->device);
auto ca = gst_d3d12_command_allocator_get_handle (gst_ca);
hr = ca->Reset ();
if (!gst_d3d12_result (hr, decoder->device)) {
GST_ERROR_OBJECT (decoder, "Couldn't reset command allocator");
@ -1299,17 +1308,6 @@ gst_d3d12_decoder_end_picture (GstD3D12Decoder * decoder,
}
decoder_pic->fence_val = priv->cmd->fence_val;
auto fence_handle =
gst_d3d12_command_queue_get_fence_handle (priv->cmd->queue);
dmem = (GstD3D12Memory *) gst_buffer_peek_memory (decoder_pic->buffer, 0);
gst_d3d12_memory_set_external_fence (dmem,
fence_handle, priv->cmd->fence_val);
if (decoder_pic->output_buffer) {
dmem = (GstD3D12Memory *)
gst_buffer_peek_memory (decoder_pic->output_buffer, 0);
gst_d3d12_memory_set_external_fence (dmem,
fence_handle, priv->cmd->fence_val);
}
GstD3D12FenceData *fence_data;
gst_d3d12_fence_data_pool_acquire (priv->fence_data_pool, &fence_data);
@ -1540,10 +1538,8 @@ gst_d3d12_decoder_process_output (GstD3D12Decoder * self,
gst_buffer_ref (buffer));
}
auto fence_handle =
gst_d3d12_command_queue_get_fence_handle (priv->cmd->queue);
gst_d3d12_device_copy_texture_region (self->device, copy_args.size (),
copy_args.data (), fence_data, fence_handle, decoder_pic->fence_val,
copy_args.data (), fence_data, nullptr, decoder_pic->fence_val,
queue_type, &copy_fence_val);
if (!out_resource) {
@ -1616,6 +1612,8 @@ gst_d3d12_decoder_output_loop (GstD3D12Decoder * self)
GST_DEBUG_OBJECT (self, "Entering output thread");
auto event_handle = CreateEventEx (nullptr, nullptr, 0, EVENT_ALL_ACCESS);
while (true) {
DecoderOutputData output_data;
{
@ -1636,6 +1634,9 @@ gst_d3d12_decoder_output_loop (GstD3D12Decoder * self)
auto decoder_pic = get_decoder_picture (output_data.picture);
g_assert (decoder_pic);
gst_d3d12_command_queue_fence_wait (priv->cmd->queue,
decoder_pic->fence_val, event_handle);
if (priv->flushing) {
GST_DEBUG_OBJECT (self, "Drop framem, we are flushing");
gst_codec_picture_unref (output_data.picture);
@ -1660,6 +1661,8 @@ gst_d3d12_decoder_output_loop (GstD3D12Decoder * self)
GST_DEBUG_OBJECT (self, "Leaving output thread");
CloseHandle (event_handle);
return nullptr;
}