From 969ab3e664e437e9caa4878405e0e07c43cbe08a Mon Sep 17 00:00:00 2001 From: Seungha Yang Date: Sat, 24 Oct 2020 02:47:22 +0900 Subject: [PATCH] d3d11convert: Add support for conversion using ID3D11VideoProcessor Output texture of d3d11 decoder cannot have the bind flag D3D11_BIND_SHADER_RESOURCE (meaning that it cannot be used for shader input resource). So d3d11convert (and it's subclasses) was copying texture into another internal texture to use d3d11 shader. It's obviously overhead and we can avoid texture copy for colorspace conversion or resizing via ID3D11VideoProcessor as it supports decoder output texture. This commit would be a visible optimization for d3d11 decoder with d3d11compositor use case because we can avoid texture copy per frame. Part-of: --- sys/d3d11/gstd3d11colorconvert.c | 177 ++++++++++++++++++++++++++++- sys/d3d11/gstd3d11videoprocessor.c | 8 ++ sys/d3d11/gstd3d11videoprocessor.h | 3 + 3 files changed, 185 insertions(+), 3 deletions(-) diff --git a/sys/d3d11/gstd3d11colorconvert.c b/sys/d3d11/gstd3d11colorconvert.c index 2c9bc05347..13a1777914 100644 --- a/sys/d3d11/gstd3d11colorconvert.c +++ b/sys/d3d11/gstd3d11colorconvert.c @@ -53,6 +53,8 @@ #include "gstd3d11memory.h" #include "gstd3d11device.h" #include "gstd3d11bufferpool.h" +#include "gstd3d11videoprocessor.h" +#include "gstd3d11format.h" GST_DEBUG_CATEGORY_STATIC (gst_d3d11_convert_debug); #define GST_CAT_DEFAULT gst_d3d11_convert_debug @@ -95,6 +97,7 @@ struct _GstD3D11Convert guint num_output_view; GstD3D11ColorConverter *converter; + GstD3D11VideoProcessor *processor; /* used for fallback texture copy */ D3D11_BOX in_src_box; @@ -334,9 +337,8 @@ gst_d3d11_convert_clear_shader_resource (GstD3D11Convert * self) } } - if (self->converter) - gst_d3d11_color_converter_free (self->converter); - self->converter = NULL; + g_clear_pointer (&self->converter, gst_d3d11_color_converter_free); + g_clear_pointer (&self->processor, gst_d3d11_video_processor_free); } static void @@ -1515,6 +1517,79 @@ gst_d3d11_convert_set_info (GstD3D11BaseFilter * filter, return FALSE; } + /* If both input and output formats are native DXGI format */ + if (self->in_d3d11_format->dxgi_format != DXGI_FORMAT_UNKNOWN && + self->out_d3d11_format->dxgi_format != DXGI_FORMAT_UNKNOWN) { + gboolean hardware = FALSE; + GstD3D11VideoProcessor *processor = NULL; + + g_object_get (filter->device, "hardware", &hardware, NULL); + if (hardware) { + processor = gst_d3d11_video_processor_new (filter->device, + in_info->width, in_info->height, out_info->width, out_info->height); + } + + /* check input and output formats are supported by processor */ + if (processor + && !gst_d3d11_video_processor_supports_input_format (processor, + self->in_d3d11_format->dxgi_format)) { + GST_DEBUG_OBJECT (self, + "Input DXGI format %d is not supported by video processor", + self->in_d3d11_format->dxgi_format); + gst_d3d11_video_processor_free (processor); + processor = NULL; + } + + if (processor && + !gst_d3d11_video_processor_supports_output_format (processor, + self->out_d3d11_format->dxgi_format)) { + GST_DEBUG_OBJECT (self, + "Output DXGI format %d is not supported by video processor", + self->out_d3d11_format->dxgi_format); + gst_d3d11_video_processor_free (processor); + processor = NULL; + } + + if (processor) { + gboolean set_color_space = TRUE; +#if (DXGI_HEADER_VERSION >= 4) + const GstDxgiColorSpace *in_color_space; + const GstDxgiColorSpace *out_color_space; + + in_color_space = gst_d3d11_video_info_to_dxgi_color_space (in_info); + out_color_space = gst_d3d11_video_info_to_dxgi_color_space (out_info); + + if (in_color_space && out_color_space) { + DXGI_COLOR_SPACE_TYPE in_type = + (DXGI_COLOR_SPACE_TYPE) in_color_space->dxgi_color_space_type; + DXGI_COLOR_SPACE_TYPE out_type = + (DXGI_COLOR_SPACE_TYPE) out_color_space->dxgi_color_space_type; + + if (!gst_d3d11_video_processor_set_input_dxgi_color_space (processor, + in_type) || + !gst_d3d11_video_processor_set_output_dxgi_color_space (processor, + out_type)) { + GST_DEBUG_OBJECT (self, "DXGI colorspace is not supported"); + } else { + GST_DEBUG_OBJECT (self, + "IN DXGI colorspace %d, OUT DXGI colorspace %d", + (guint) in_type, (guint) out_type); + set_color_space = FALSE; + } + } +#endif + + if (set_color_space) { + gst_d3d11_video_processor_set_input_color_space (processor, + &in_info->colorimetry); + gst_d3d11_video_processor_set_output_color_space (processor, + &out_info->colorimetry); + } + + self->processor = processor; + } + } + /* setup D3D11_BOX struct for fallback copy */ self->in_src_box.left = 0; self->in_src_box.top = 0; @@ -1547,6 +1622,99 @@ format_unknown: } } +static gboolean +gst_d3d11_convert_prefer_video_processor (GstD3D11Convert * self, + GstBuffer * inbuf, GstBuffer * outbuf) +{ + GstD3D11BaseFilter *filter = GST_D3D11_BASE_FILTER (self); + GstMemory *mem; + GstD3D11Memory *dmem; + + if (!self->processor) + return FALSE; + + if (gst_buffer_n_memory (inbuf) != 1 || gst_buffer_n_memory (outbuf) != 1) + return FALSE; + + mem = gst_buffer_peek_memory (inbuf, 0); + g_assert (gst_is_d3d11_memory (mem)); + + dmem = (GstD3D11Memory *) mem; + if (dmem->device != filter->device) + return FALSE; + + /* If we can use shader, we prefer to use shader instead of video processor + * because video processor implementation is vendor dependent + * and not flexible */ + if (gst_d3d11_memory_ensure_shader_resource_view (dmem)) + return FALSE; + + if (!gst_d3d11_video_processor_ensure_input_view (self->processor, dmem)) + return FALSE; + + mem = gst_buffer_peek_memory (outbuf, 0); + g_assert (gst_is_d3d11_memory (mem)); + + dmem = (GstD3D11Memory *) mem; + if (dmem->device != filter->device) + return FALSE; + + if (!gst_d3d11_video_processor_ensure_output_view (self->processor, dmem)) + return FALSE; + + return TRUE; +} + +static GstFlowReturn +gst_d3d11_convert_transform_using_processor (GstD3D11Convert * self, + GstBuffer * inbuf, GstBuffer * outbuf) +{ + GstMemory *in_mem, *out_mem; + GstD3D11Memory *in_dmem, *out_dmem; + GstMapInfo in_map, out_map; + RECT in_rect, out_rect; + gboolean ret; + + in_mem = gst_buffer_peek_memory (inbuf, 0); + in_dmem = (GstD3D11Memory *) in_mem; + if (!gst_memory_map (in_mem, &in_map, GST_MAP_D3D11 | GST_MAP_READ)) { + GST_ERROR_OBJECT (self, "Failed to map input d3d11 memory"); + return GST_FLOW_ERROR; + } + + out_mem = gst_buffer_peek_memory (outbuf, 0); + out_dmem = (GstD3D11Memory *) out_mem; + if (!gst_memory_map (out_mem, &out_map, GST_MAP_D3D11 | GST_MAP_WRITE)) { + GST_ERROR_OBJECT (self, "Failed to map output d3d11 memory"); + gst_memory_unmap (in_mem, &in_map); + return GST_FLOW_ERROR; + } + + in_rect.left = 0; + in_rect.top = 0; + in_rect.right = self->in_src_box.right; + in_rect.bottom = self->in_src_box.bottom; + + out_rect.left = 0; + out_rect.top = 0; + out_rect.right = self->out_src_box.right; + out_rect.bottom = self->out_src_box.bottom; + + ret = gst_d3d11_video_processor_render (self->processor, + &in_rect, in_dmem->processor_input_view, &out_rect, + out_dmem->processor_output_view); + + gst_memory_unmap (in_mem, &in_map); + gst_memory_unmap (out_mem, &out_map); + + if (!ret) { + GST_ERROR_OBJECT (self, "Couldn't convert using video processor"); + return GST_FLOW_ERROR; + } + + return GST_FLOW_OK; +} + static GstFlowReturn gst_d3d11_convert_transform (GstBaseTransform * trans, GstBuffer * inbuf, GstBuffer * outbuf) @@ -1561,6 +1729,9 @@ gst_d3d11_convert_transform (GstBaseTransform * trans, gboolean copy_output = FALSE; GstD3D11Device *device = filter->device; + if (gst_d3d11_convert_prefer_video_processor (self, inbuf, outbuf)) + return gst_d3d11_convert_transform_using_processor (self, inbuf, outbuf); + context_handle = gst_d3d11_device_get_device_context_handle (device); view_index = 0; diff --git a/sys/d3d11/gstd3d11videoprocessor.c b/sys/d3d11/gstd3d11videoprocessor.c index e4eb208518..92b3b88f49 100644 --- a/sys/d3d11/gstd3d11videoprocessor.c +++ b/sys/d3d11/gstd3d11videoprocessor.c @@ -427,6 +427,14 @@ gst_d3d11_video_processor_create_output_view (GstD3D11VideoProcessor * return TRUE; } +gboolean +gst_d3d11_video_processor_ensure_output_view (GstD3D11VideoProcessor * + processor, GstD3D11Memory * mem) +{ + return gst_d3d11_memory_ensure_processor_output_view (mem, + processor->video_device, processor->enumerator); +} + void gst_d3d11_video_processor_input_view_release (ID3D11VideoProcessorInputView * view) diff --git a/sys/d3d11/gstd3d11videoprocessor.h b/sys/d3d11/gstd3d11videoprocessor.h index 6c26330664..8f66c98cc7 100644 --- a/sys/d3d11/gstd3d11videoprocessor.h +++ b/sys/d3d11/gstd3d11videoprocessor.h @@ -80,6 +80,9 @@ gboolean gst_d3d11_video_processor_create_output_view (GstD3D11VideoProcessor * ID3D11Resource *resource, ID3D11VideoProcessorOutputView ** view); +gboolean gst_d3d11_video_processor_ensure_output_view (GstD3D11VideoProcessor * processor, + GstD3D11Memory *mem); + void gst_d3d11_video_processor_input_view_release (ID3D11VideoProcessorInputView * view); void gst_d3d11_video_processor_output_view_release (ID3D11VideoProcessorOutputView * view);