mirror of
https://gitlab.freedesktop.org/gstreamer/gstreamer.git
synced 2024-11-26 19:51:11 +00:00
d3d12: Workaround for Intel iGPU decoder crash
Observed Intel GPU driver crash when multiple decoders are configured in a process. It might be because of frequent command queue alloc/free or too many in-flight decoding commands. In order to make command queue persistent and limit the number of in-flight command lists, holds global decoding command queue. Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/7019>
This commit is contained in:
parent
e5b5d223b4
commit
a2df44da7d
5 changed files with 199 additions and 33 deletions
|
@ -89,4 +89,26 @@ private:
|
|||
GstD3D12Device *device_;
|
||||
};
|
||||
|
||||
class GstD3D12DeviceDecoderLockGuard
|
||||
{
|
||||
public:
|
||||
explicit GstD3D12DeviceDecoderLockGuard(GstD3D12Device * device) : device_ (device)
|
||||
{
|
||||
if (device_)
|
||||
gst_d3d12_device_decoder_lock (device_);
|
||||
}
|
||||
|
||||
~GstD3D12DeviceDecoderLockGuard()
|
||||
{
|
||||
if (device_)
|
||||
gst_d3d12_device_decoder_unlock (device_);
|
||||
}
|
||||
|
||||
GstD3D12DeviceDecoderLockGuard(const GstD3D12DeviceDecoderLockGuard&) = delete;
|
||||
GstD3D12DeviceDecoderLockGuard& operator=(const GstD3D12DeviceDecoderLockGuard&) = delete;
|
||||
|
||||
private:
|
||||
GstD3D12Device *device_;
|
||||
};
|
||||
|
||||
#endif /* __cplusplus */
|
||||
|
|
|
@ -164,7 +164,7 @@ gst_d3d12_command_queue_new (ID3D12Device * device,
|
|||
ComPtr < ID3D12CommandQueue > cq;
|
||||
auto hr = device->CreateCommandQueue (desc, IID_PPV_ARGS (&cq));
|
||||
if (FAILED (hr)) {
|
||||
GST_ERROR ("Couldn't create command queue, hr: 0x%x", (guint) hr);
|
||||
GST_WARNING ("Couldn't create command queue, hr: 0x%x", (guint) hr);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
|
|
@ -25,6 +25,14 @@
|
|||
|
||||
G_BEGIN_DECLS
|
||||
|
||||
enum GstD3D12WAFlags
|
||||
{
|
||||
GST_D3D12_WA_NONE = 0,
|
||||
GST_D3D12_WA_DECODER_RACE = (1 << 0),
|
||||
};
|
||||
|
||||
DEFINE_ENUM_FLAG_OPERATORS (GstD3D12WAFlags);
|
||||
|
||||
struct GstD3D12CopyTextureRegionArgs
|
||||
{
|
||||
D3D12_TEXTURE_COPY_LOCATION dst;
|
||||
|
@ -71,5 +79,17 @@ void gst_d3d12_device_11on12_unlock (GstD3D12Device * device);
|
|||
GST_D3D12_API
|
||||
void gst_d3d12_device_check_device_removed (GstD3D12Device * device);
|
||||
|
||||
GST_D3D12_API
|
||||
GstD3D12CommandQueue * gst_d3d12_device_get_decode_queue (GstD3D12Device * device);
|
||||
|
||||
GST_D3D12_API
|
||||
void gst_d3d12_device_decoder_lock (GstD3D12Device * device);
|
||||
|
||||
GST_D3D12_API
|
||||
void gst_d3d12_device_decoder_unlock (GstD3D12Device * device);
|
||||
|
||||
GST_D3D12_API
|
||||
GstD3D12WAFlags gst_d3d12_device_get_workaround_flags (GstD3D12Device * device);
|
||||
|
||||
G_END_DECLS
|
||||
|
||||
|
|
|
@ -126,6 +126,8 @@ struct DeviceInner
|
|||
|
||||
gst_clear_object (&direct_queue);
|
||||
gst_clear_object (©_queue);
|
||||
for (guint i = 0; i < num_decode_queue; i++)
|
||||
gst_clear_object (&decode_queue[i]);
|
||||
|
||||
gst_clear_object (&direct_ca_pool);
|
||||
gst_clear_object (&direct_cl_pool);
|
||||
|
@ -154,6 +156,9 @@ struct DeviceInner
|
|||
|
||||
if (copy_queue)
|
||||
gst_d3d12_command_queue_drain (copy_queue);
|
||||
|
||||
for (guint i = 0; i < num_decode_queue; i++)
|
||||
gst_d3d12_command_queue_drain (decode_queue[i]);
|
||||
}
|
||||
|
||||
void ReportLiveObjects ()
|
||||
|
@ -230,6 +235,11 @@ struct DeviceInner
|
|||
|
||||
GstD3D12CommandQueue *direct_queue = nullptr;
|
||||
GstD3D12CommandQueue *copy_queue = nullptr;
|
||||
GstD3D12CommandQueue *decode_queue[2] = { nullptr, };
|
||||
guint num_decode_queue = 0;
|
||||
guint decode_queue_index = 0;
|
||||
std::recursive_mutex decoder_lock;
|
||||
GstD3D12WAFlags wa_flags = GST_D3D12_WA_NONE;
|
||||
|
||||
GstD3D12CommandListPool *direct_cl_pool = nullptr;
|
||||
GstD3D12CommandAllocatorPool *direct_ca_pool = nullptr;
|
||||
|
@ -239,6 +249,8 @@ struct DeviceInner
|
|||
|
||||
GstD3D12FenceDataPool *fence_data_pool = nullptr;
|
||||
|
||||
D3D12_FEATURE_DATA_ARCHITECTURE feature_data_arch = { };
|
||||
|
||||
guint rtv_inc_size;
|
||||
|
||||
guint adapter_index = 0;
|
||||
|
@ -961,6 +973,24 @@ gst_d3d12_device_find_adapter (const GstD3D12DeviceConstructData * data,
|
|||
return E_FAIL;
|
||||
}
|
||||
|
||||
static gboolean
|
||||
is_intel_gen11_or_older (UINT vendor_id, D3D_FEATURE_LEVEL feature_level,
|
||||
const std::string & description)
|
||||
{
|
||||
if (vendor_id != 0x8086)
|
||||
return FALSE;
|
||||
|
||||
/* Arc GPU supports feature level 12.2 and iGPU Xe does 12.1 */
|
||||
if (feature_level <= D3D_FEATURE_LEVEL_12_0)
|
||||
return TRUE;
|
||||
|
||||
/* gen 11 is UHD xxx, older ones are HD xxx */
|
||||
if (description.find ("HD") != std::string::npos)
|
||||
return TRUE;
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
static GstD3D12Device *
|
||||
gst_d3d12_device_new_internal (const GstD3D12DeviceConstructData * data)
|
||||
{
|
||||
|
@ -970,6 +1000,13 @@ gst_d3d12_device_new_internal (const GstD3D12DeviceConstructData * data)
|
|||
HRESULT hr;
|
||||
UINT factory_flags = 0;
|
||||
guint index = 0;
|
||||
const D3D_FEATURE_LEVEL feature_levels[] = {
|
||||
D3D_FEATURE_LEVEL_11_0,
|
||||
D3D_FEATURE_LEVEL_11_1,
|
||||
D3D_FEATURE_LEVEL_12_0,
|
||||
D3D_FEATURE_LEVEL_12_1,
|
||||
D3D_FEATURE_LEVEL_12_2,
|
||||
};
|
||||
|
||||
gst_d3d12_device_enable_debug ();
|
||||
gst_d3d12_device_enable_dred ();
|
||||
|
@ -1014,16 +1051,31 @@ gst_d3d12_device_new_internal (const GstD3D12DeviceConstructData * data)
|
|||
priv->device_id = desc.DeviceId;
|
||||
priv->adapter_index = index;
|
||||
|
||||
device->CheckFeatureSupport (D3D12_FEATURE_ARCHITECTURE,
|
||||
&priv->feature_data_arch, sizeof (D3D12_FEATURE_DATA_ARCHITECTURE));
|
||||
|
||||
D3D12_FEATURE_DATA_FEATURE_LEVELS flevel = { };
|
||||
flevel.NumFeatureLevels = G_N_ELEMENTS (feature_levels);
|
||||
flevel.pFeatureLevelsRequested = feature_levels;
|
||||
device->CheckFeatureSupport (D3D12_FEATURE_FEATURE_LEVELS,
|
||||
&flevel, sizeof (flevel));
|
||||
|
||||
std::wstring_convert < std::codecvt_utf8 < wchar_t >, wchar_t >converter;
|
||||
priv->description = converter.to_bytes (desc.Description);
|
||||
|
||||
GST_INFO_OBJECT (self,
|
||||
"adapter index %d: D3D12 device vendor-id: 0x%04x, device-id: 0x%04x, "
|
||||
"Flags: 0x%x, adapter-luid: %" G_GINT64_FORMAT ", %s",
|
||||
"Flags: 0x%x, adapter-luid: %" G_GINT64_FORMAT ", is-UMA: %d, "
|
||||
"feature-level: 0x%x, %s",
|
||||
priv->adapter_index, desc.VendorId, desc.DeviceId, desc.Flags,
|
||||
priv->adapter_luid, priv->description.c_str ());
|
||||
priv->adapter_luid, priv->feature_data_arch.UMA,
|
||||
flevel.MaxSupportedFeatureLevel, priv->description.c_str ());
|
||||
|
||||
gst_d3d12_device_setup_format_table (self);
|
||||
if (priv->feature_data_arch.UMA && is_intel_gen11_or_older (priv->vendor_id,
|
||||
flevel.MaxSupportedFeatureLevel, priv->description)) {
|
||||
priv->wa_flags |= GST_D3D12_WA_DECODER_RACE;
|
||||
}
|
||||
|
||||
if (gst_d3d12_device_enable_debug ()) {
|
||||
ComPtr < ID3D12InfoQueue > info_queue;
|
||||
|
@ -1071,6 +1123,30 @@ gst_d3d12_device_new_internal (const GstD3D12DeviceConstructData * data)
|
|||
|
||||
priv->fence_data_pool = gst_d3d12_fence_data_pool_new ();
|
||||
|
||||
{
|
||||
ComPtr < ID3D12VideoDevice > video_device;
|
||||
auto hr = device.As (&video_device);
|
||||
if (SUCCEEDED (hr)) {
|
||||
queue_desc.Type = D3D12_COMMAND_LIST_TYPE_VIDEO_DECODE;
|
||||
for (guint i = 0; i < G_N_ELEMENTS (priv->decode_queue); i++) {
|
||||
priv->decode_queue[i] = gst_d3d12_command_queue_new (device.Get (),
|
||||
&queue_desc, D3D12_FENCE_FLAG_NONE, 8);
|
||||
if (!priv->decode_queue)
|
||||
break;
|
||||
|
||||
GST_OBJECT_FLAG_SET (priv->decode_queue[i],
|
||||
GST_OBJECT_FLAG_MAY_BE_LEAKED);
|
||||
priv->num_decode_queue++;
|
||||
|
||||
/* XXX: Old Intel iGPU crashes with multiple decode queues */
|
||||
if ((priv->wa_flags & GST_D3D12_WA_DECODER_RACE) ==
|
||||
GST_D3D12_WA_DECODER_RACE) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
GST_OBJECT_FLAG_SET (priv->direct_queue, GST_OBJECT_FLAG_MAY_BE_LEAKED);
|
||||
GST_OBJECT_FLAG_SET (priv->direct_cl_pool, GST_OBJECT_FLAG_MAY_BE_LEAKED);
|
||||
GST_OBJECT_FLAG_SET (priv->direct_ca_pool, GST_OBJECT_FLAG_MAY_BE_LEAKED);
|
||||
|
@ -1823,3 +1899,48 @@ gst_d3d12_device_check_device_removed (GstD3D12Device * device)
|
|||
manager->OnDeviceRemoved (priv->adapter_luid);
|
||||
}
|
||||
}
|
||||
|
||||
GstD3D12CommandQueue *
|
||||
gst_d3d12_device_get_decode_queue (GstD3D12Device * device)
|
||||
{
|
||||
g_return_val_if_fail (GST_IS_D3D12_DEVICE (device), nullptr);
|
||||
auto priv = device->priv->inner;
|
||||
|
||||
if (!priv->num_decode_queue)
|
||||
return nullptr;
|
||||
|
||||
std::lock_guard < std::mutex > lk (priv->lock);
|
||||
auto queue = priv->decode_queue[priv->decode_queue_index];
|
||||
priv->decode_queue_index++;
|
||||
priv->decode_queue_index %= priv->num_decode_queue;
|
||||
|
||||
return queue;
|
||||
}
|
||||
|
||||
void
|
||||
gst_d3d12_device_decoder_lock (GstD3D12Device * device)
|
||||
{
|
||||
g_return_if_fail (GST_IS_D3D12_DEVICE (device));
|
||||
|
||||
auto priv = device->priv->inner;
|
||||
if ((priv->wa_flags & GST_D3D12_WA_DECODER_RACE) == GST_D3D12_WA_DECODER_RACE)
|
||||
priv->decoder_lock.lock ();
|
||||
}
|
||||
|
||||
void
|
||||
gst_d3d12_device_decoder_unlock (GstD3D12Device * device)
|
||||
{
|
||||
g_return_if_fail (GST_IS_D3D12_DEVICE (device));
|
||||
|
||||
auto priv = device->priv->inner;
|
||||
if ((priv->wa_flags & GST_D3D12_WA_DECODER_RACE) == GST_D3D12_WA_DECODER_RACE)
|
||||
priv->decoder_lock.unlock ();
|
||||
}
|
||||
|
||||
GstD3D12WAFlags
|
||||
gst_d3d12_device_get_workaround_flags (GstD3D12Device * device)
|
||||
{
|
||||
g_return_val_if_fail (GST_IS_D3D12_DEVICE (device), GST_D3D12_WA_NONE);
|
||||
|
||||
return device->priv->inner->wa_flags;
|
||||
}
|
||||
|
|
|
@ -226,15 +226,14 @@ struct DecoderCmdData
|
|||
{
|
||||
CloseHandle (event_handle);
|
||||
gst_clear_object (&ca_pool);
|
||||
gst_clear_object (&queue);
|
||||
}
|
||||
|
||||
ComPtr<ID3D12Device> device;
|
||||
|
||||
ComPtr<ID3D12VideoDevice> video_device;
|
||||
ComPtr<ID3D12VideoDecodeCommandList> cl;
|
||||
GstD3D12CommandQueue *queue = nullptr;
|
||||
GstD3D12CommandAllocatorPool *ca_pool = nullptr;
|
||||
bool need_full_drain = false;
|
||||
|
||||
/* Fence to wait at command record thread */
|
||||
HANDLE event_handle;
|
||||
|
@ -441,11 +440,7 @@ gst_d3d12_decoder_open (GstD3D12Decoder * decoder, GstElement * element)
|
|||
return FALSE;
|
||||
}
|
||||
|
||||
D3D12_COMMAND_QUEUE_DESC desc = { };
|
||||
desc.Type = D3D12_COMMAND_LIST_TYPE_VIDEO_DECODE;
|
||||
desc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE;
|
||||
cmd->queue = gst_d3d12_command_queue_new (cmd->device.Get (), &desc,
|
||||
D3D12_FENCE_FLAG_NONE, ASYNC_DEPTH * 2);
|
||||
cmd->queue = gst_d3d12_device_get_decode_queue (decoder->device);
|
||||
if (!cmd->queue) {
|
||||
GST_ERROR_OBJECT (element, "Couldn't create command queue");
|
||||
return FALSE;
|
||||
|
@ -454,6 +449,10 @@ gst_d3d12_decoder_open (GstD3D12Decoder * decoder, GstElement * element)
|
|||
cmd->ca_pool = gst_d3d12_command_allocator_pool_new (cmd->device.Get (),
|
||||
D3D12_COMMAND_LIST_TYPE_VIDEO_DECODE);
|
||||
|
||||
auto flags = gst_d3d12_device_get_workaround_flags (decoder->device);
|
||||
if ((flags & GST_D3D12_WA_DECODER_RACE) == GST_D3D12_WA_DECODER_RACE)
|
||||
cmd->need_full_drain = true;
|
||||
|
||||
priv->cmd = std::move (cmd);
|
||||
priv->flushing = false;
|
||||
|
||||
|
@ -511,14 +510,12 @@ gst_d3d12_decoder_close (GstD3D12Decoder * decoder)
|
|||
|
||||
GST_DEBUG_OBJECT (decoder, "Close");
|
||||
|
||||
if (priv->cmd) {
|
||||
gst_d3d12_command_queue_fence_wait (priv->cmd->queue, priv->cmd->fence_val,
|
||||
priv->cmd->event_handle);
|
||||
{
|
||||
GstD3D12DeviceDecoderLockGuard lk (decoder->device);
|
||||
priv->session = nullptr;
|
||||
priv->cmd = nullptr;
|
||||
}
|
||||
|
||||
priv->session = nullptr;
|
||||
priv->cmd = nullptr;
|
||||
|
||||
gst_clear_object (&decoder->device);
|
||||
|
||||
return TRUE;
|
||||
|
@ -540,6 +537,13 @@ gst_d3d12_decoder_configure (GstD3D12Decoder * decoder,
|
|||
GST_FLOW_ERROR);
|
||||
g_return_val_if_fail (dpb_size > 0, GST_FLOW_ERROR);
|
||||
|
||||
if (!decoder->device) {
|
||||
GST_ERROR_OBJECT (decoder, "Device was not configured");
|
||||
return GST_FLOW_ERROR;
|
||||
}
|
||||
|
||||
GstD3D12DeviceDecoderLockGuard dlk (decoder->device);
|
||||
|
||||
GstD3D12Format device_format;
|
||||
auto priv = decoder->priv;
|
||||
HRESULT hr;
|
||||
|
@ -800,8 +804,12 @@ gst_d3d12_decoder_stop (GstD3D12Decoder * decoder)
|
|||
|
||||
priv->flushing = true;
|
||||
if (priv->cmd) {
|
||||
gst_d3d12_command_queue_fence_wait (priv->cmd->queue, priv->cmd->fence_val,
|
||||
priv->cmd->event_handle);
|
||||
if (priv->cmd->need_full_drain) {
|
||||
gst_d3d12_command_queue_drain (priv->cmd->queue);
|
||||
} else {
|
||||
gst_d3d12_command_queue_fence_wait (priv->cmd->queue,
|
||||
priv->cmd->fence_val, priv->cmd->event_handle);
|
||||
}
|
||||
}
|
||||
|
||||
if (priv->output_thread && priv->session) {
|
||||
|
@ -814,6 +822,7 @@ gst_d3d12_decoder_stop (GstD3D12Decoder * decoder)
|
|||
g_clear_pointer (&priv->output_thread, g_thread_join);
|
||||
priv->flushing = false;
|
||||
|
||||
GstD3D12DeviceDecoderLockGuard lk (decoder->device);
|
||||
priv->session = nullptr;
|
||||
|
||||
return TRUE;
|
||||
|
@ -1112,8 +1121,8 @@ gst_d3d12_decoder_end_picture (GstD3D12Decoder * decoder,
|
|||
memset (&in_args, 0, sizeof (D3D12_VIDEO_DECODE_INPUT_STREAM_ARGUMENTS));
|
||||
memset (&out_args, 0, sizeof (D3D12_VIDEO_DECODE_OUTPUT_STREAM_ARGUMENTS));
|
||||
|
||||
GstD3D12DeviceDecoderLockGuard dlk (decoder->device);
|
||||
auto ca = gst_d3d12_command_allocator_get_handle (gst_ca);
|
||||
|
||||
hr = ca->Reset ();
|
||||
if (!gst_d3d12_result (hr, decoder->device)) {
|
||||
GST_ERROR_OBJECT (decoder, "Couldn't reset command allocator");
|
||||
|
@ -1299,17 +1308,6 @@ gst_d3d12_decoder_end_picture (GstD3D12Decoder * decoder,
|
|||
}
|
||||
|
||||
decoder_pic->fence_val = priv->cmd->fence_val;
|
||||
auto fence_handle =
|
||||
gst_d3d12_command_queue_get_fence_handle (priv->cmd->queue);
|
||||
dmem = (GstD3D12Memory *) gst_buffer_peek_memory (decoder_pic->buffer, 0);
|
||||
gst_d3d12_memory_set_external_fence (dmem,
|
||||
fence_handle, priv->cmd->fence_val);
|
||||
if (decoder_pic->output_buffer) {
|
||||
dmem = (GstD3D12Memory *)
|
||||
gst_buffer_peek_memory (decoder_pic->output_buffer, 0);
|
||||
gst_d3d12_memory_set_external_fence (dmem,
|
||||
fence_handle, priv->cmd->fence_val);
|
||||
}
|
||||
|
||||
GstD3D12FenceData *fence_data;
|
||||
gst_d3d12_fence_data_pool_acquire (priv->fence_data_pool, &fence_data);
|
||||
|
@ -1540,10 +1538,8 @@ gst_d3d12_decoder_process_output (GstD3D12Decoder * self,
|
|||
gst_buffer_ref (buffer));
|
||||
}
|
||||
|
||||
auto fence_handle =
|
||||
gst_d3d12_command_queue_get_fence_handle (priv->cmd->queue);
|
||||
gst_d3d12_device_copy_texture_region (self->device, copy_args.size (),
|
||||
copy_args.data (), fence_data, fence_handle, decoder_pic->fence_val,
|
||||
copy_args.data (), fence_data, nullptr, decoder_pic->fence_val,
|
||||
queue_type, ©_fence_val);
|
||||
|
||||
if (!out_resource) {
|
||||
|
@ -1616,6 +1612,8 @@ gst_d3d12_decoder_output_loop (GstD3D12Decoder * self)
|
|||
|
||||
GST_DEBUG_OBJECT (self, "Entering output thread");
|
||||
|
||||
auto event_handle = CreateEventEx (nullptr, nullptr, 0, EVENT_ALL_ACCESS);
|
||||
|
||||
while (true) {
|
||||
DecoderOutputData output_data;
|
||||
{
|
||||
|
@ -1636,6 +1634,9 @@ gst_d3d12_decoder_output_loop (GstD3D12Decoder * self)
|
|||
auto decoder_pic = get_decoder_picture (output_data.picture);
|
||||
g_assert (decoder_pic);
|
||||
|
||||
gst_d3d12_command_queue_fence_wait (priv->cmd->queue,
|
||||
decoder_pic->fence_val, event_handle);
|
||||
|
||||
if (priv->flushing) {
|
||||
GST_DEBUG_OBJECT (self, "Drop framem, we are flushing");
|
||||
gst_codec_picture_unref (output_data.picture);
|
||||
|
@ -1660,6 +1661,8 @@ gst_d3d12_decoder_output_loop (GstD3D12Decoder * self)
|
|||
|
||||
GST_DEBUG_OBJECT (self, "Leaving output thread");
|
||||
|
||||
CloseHandle (event_handle);
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue