d3d12: Reduce shader visible descriptor size

Shader visible descriptors occupy GPU resource and there are hardware
limits. Thus, in order to minimize the amount of shader visible heaps,
only non shader visible descriptor heap (staging) will be held by d3d12memory.
Then converter will copy the staging descriptor to shader visible
descriptor heap per draw.

Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/5875>
This commit is contained in:
Seungha Yang 2024-01-01 17:19:16 +09:00 committed by GStreamer Marge Bot
parent 0ce6e752e4
commit 0577d1c9de
3 changed files with 58 additions and 127 deletions

View file

@ -275,6 +275,7 @@ struct _GstD3D12ConverterPrivate
ComPtr<ID3D12Resource> gamma_dec_lut; ComPtr<ID3D12Resource> gamma_dec_lut;
ComPtr<ID3D12Resource> gamma_enc_lut; ComPtr<ID3D12Resource> gamma_enc_lut;
D3D12_PLACED_SUBRESOURCE_FOOTPRINT gamma_lut_layout; D3D12_PLACED_SUBRESOURCE_FOOTPRINT gamma_lut_layout;
ComPtr<ID3D12DescriptorHeap> gamma_lut_heap;
std::vector<QuadData> quad_data; std::vector<QuadData> quad_data;
@ -898,6 +899,34 @@ gst_d3d12_converter_setup_resource (GstD3D12Converter * self,
memcpy (data, gamma_enc_table->lut, GAMMA_LUT_SIZE * sizeof (guint16)); memcpy (data, gamma_enc_table->lut, GAMMA_LUT_SIZE * sizeof (guint16));
upload_data->gamma_enc_lut_upload->Unmap (0, nullptr); upload_data->gamma_enc_lut_upload->Unmap (0, nullptr);
D3D12_DESCRIPTOR_HEAP_DESC desc = { };
desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV;
desc.NumDescriptors = 2;
desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE;
auto hr = device->CreateDescriptorHeap (&desc,
IID_PPV_ARGS (&priv->gamma_lut_heap));
if (!gst_d3d12_result (hr, self->device)) {
GST_ERROR_OBJECT (self, "Couldn't map gamma lut upload buffer");
return FALSE;
}
auto cpu_handle =
CD3DX12_CPU_DESCRIPTOR_HANDLE
(priv->gamma_lut_heap->GetCPUDescriptorHandleForHeapStart ());
D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc = { };
srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE1D;
srv_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
srv_desc.Texture1D.MipLevels = 1;
device->CreateShaderResourceView (priv->gamma_dec_lut.Get (), &srv_desc,
cpu_handle);
cpu_handle.Offset (priv->srv_inc_size);
device->CreateShaderResourceView (priv->gamma_enc_lut.Get (), &srv_desc,
cpu_handle);
} }
priv->input_texture_width = GST_VIDEO_INFO_WIDTH (in_info); priv->input_texture_width = GST_VIDEO_INFO_WIDTH (in_info);
@ -1786,28 +1815,6 @@ gst_d3d12_converter_execute (GstD3D12Converter * self,
{ {
auto priv = self->priv; auto priv = self->priv;
ComPtr < ID3D12DescriptorHeap > srv_heap;
bool create_srv = true;
if (!priv->crs->HaveLut () && gst_buffer_n_memory (in_buf) == 1) {
auto mem = (GstD3D12Memory *) gst_buffer_peek_memory (in_buf, 0);
if (!gst_d3d12_memory_get_shader_resource_view_heap (mem, &srv_heap)) {
GST_ERROR_OBJECT (self, "Couldn't get srv heap from memory");
return FALSE;
}
create_srv = false;
} else {
GstD3D12Descriptor *descriptor;
if (!gst_d3d12_descriptor_pool_acquire (priv->srv_heap_pool, &descriptor)) {
GST_ERROR_OBJECT (self, "Couldn't acquire srv heap");
return FALSE;
}
gst_d3d12_descriptor_get_handle (descriptor, &srv_heap);
gst_d3d12_fence_data_add_notify (fence_data, descriptor,
(GDestroyNotify) gst_d3d12_descriptor_unref);
}
std::lock_guard < std::mutex > lk (priv->prop_lock); std::lock_guard < std::mutex > lk (priv->prop_lock);
auto mem = (GstD3D12Memory *) gst_buffer_peek_memory (in_buf, 0); auto mem = (GstD3D12Memory *) gst_buffer_peek_memory (in_buf, 0);
auto resource = gst_d3d12_memory_get_resource_handle (mem); auto resource = gst_d3d12_memory_get_resource_handle (mem);
@ -1922,43 +1929,44 @@ gst_d3d12_converter_execute (GstD3D12Converter * self,
upload_data->vertex_index_upload.Get ()); upload_data->vertex_index_upload.Get ());
} }
} }
priv->is_first = false;
auto device = gst_d3d12_device_get_device_handle (self->device); auto device = gst_d3d12_device_get_device_handle (self->device);
guint srv_offset = 0; ComPtr < ID3D12DescriptorHeap > srv_heap;
GstD3D12Descriptor *descriptor;
if (!gst_d3d12_descriptor_pool_acquire (priv->srv_heap_pool, &descriptor)) {
GST_ERROR_OBJECT (self, "Couldn't acquire srv heap");
return FALSE;
}
gst_d3d12_descriptor_get_handle (descriptor, &srv_heap);
gst_d3d12_fence_data_add_notify (fence_data, descriptor,
(GDestroyNotify) gst_d3d12_descriptor_unref);
auto cpu_handle =
CD3DX12_CPU_DESCRIPTOR_HANDLE
(srv_heap->GetCPUDescriptorHandleForHeapStart ());
for (guint i = 0; i < gst_buffer_n_memory (in_buf); i++) { for (guint i = 0; i < gst_buffer_n_memory (in_buf); i++) {
auto mem = (GstD3D12Memory *) gst_buffer_peek_memory (in_buf, i); auto mem = (GstD3D12Memory *) gst_buffer_peek_memory (in_buf, i);
auto num_planes = gst_d3d12_memory_get_plane_count (mem); auto num_planes = gst_d3d12_memory_get_plane_count (mem);
ComPtr < ID3D12DescriptorHeap > mem_srv_heap;
for (guint plane = 0; plane < num_planes; plane++) { if (!gst_d3d12_memory_get_shader_resource_view_heap (mem, &mem_srv_heap)) {
if (create_srv && GST_ERROR_OBJECT (self, "Couldn't get SRV");
!gst_d3d12_memory_create_shader_resource_view (mem, plane, srv_offset,
srv_heap.Get ())) {
GST_ERROR_OBJECT (self, "Couldn't create SRV");
return FALSE; return FALSE;
} }
srv_offset++;
} device->CopyDescriptorsSimple (num_planes, cpu_handle,
mem_srv_heap->GetCPUDescriptorHandleForHeapStart (),
D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
cpu_handle.Offset (num_planes, priv->srv_inc_size);
} }
if (priv->crs->HaveLut ()) { if (priv->crs->HaveLut ()) {
auto cpu_handle = device->CopyDescriptorsSimple (2, cpu_handle,
CD3DX12_CPU_DESCRIPTOR_HANDLE priv->gamma_lut_heap->GetCPUDescriptorHandleForHeapStart (),
(srv_heap->GetCPUDescriptorHandleForHeapStart (), D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
srv_offset, priv->srv_inc_size);
D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc = { };
srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE1D;
srv_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
srv_desc.Texture1D.MipLevels = 1;
device->CreateShaderResourceView (priv->gamma_dec_lut.Get (), &srv_desc,
cpu_handle);
cpu_handle.Offset (priv->srv_inc_size);
device->CreateShaderResourceView (priv->gamma_enc_lut.Get (), &srv_desc,
cpu_handle);
} }
for (guint i = 0; i < gst_buffer_n_memory (out_buf); i++) { for (guint i = 0; i < gst_buffer_n_memory (out_buf); i++) {
@ -2045,6 +2053,7 @@ gst_d3d12_converter_execute (GstD3D12Converter * self,
priv->upload_data, (GDestroyNotify) converter_upload_data_free); priv->upload_data, (GDestroyNotify) converter_upload_data_free);
} }
priv->upload_data = nullptr; priv->upload_data = nullptr;
priv->is_first = false;
return TRUE; return TRUE;
} }

View file

@ -473,42 +473,6 @@ gst_d3d12_memory_get_plane_count (GstD3D12Memory * mem)
return mem->priv->num_subresources; return mem->priv->num_subresources;
} }
gboolean
gst_d3d12_memory_create_shader_resource_view (GstD3D12Memory * mem,
guint plane, guint heap_offset, ID3D12DescriptorHeap * heap)
{
auto priv = mem->priv;
auto allocator = GST_MEMORY_CAST (mem)->allocator;
if ((priv->desc.Flags & D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE) != 0) {
GST_LOG_OBJECT (allocator,
"Shader resource was denied, configured flags 0x%x",
(guint) priv->desc.Flags);
return FALSE;
}
if (priv->num_subresources <= plane) {
GST_ERROR_OBJECT (allocator, "Out of bound request");
return FALSE;
}
auto cpu_handle =
CD3DX12_CPU_DESCRIPTOR_HANDLE (heap->GetCPUDescriptorHandleForHeapStart
(), heap_offset, priv->srv_inc_size);
D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc = { };
srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D;
srv_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
srv_desc.Texture2D.MipLevels = 1;
srv_desc.Format = priv->resource_formats[plane];
srv_desc.Texture2D.PlaneSlice = plane;
auto device = gst_d3d12_device_get_device_handle (mem->device);
device->CreateShaderResourceView (priv->resource.Get (),
&srv_desc, cpu_handle);
return TRUE;
}
gboolean gboolean
gst_d3d12_memory_get_shader_resource_view_heap (GstD3D12Memory * mem, gst_d3d12_memory_get_shader_resource_view_heap (GstD3D12Memory * mem,
ID3D12DescriptorHeap ** heap) ID3D12DescriptorHeap ** heap)
@ -527,7 +491,7 @@ gst_d3d12_memory_get_shader_resource_view_heap (GstD3D12Memory * mem,
D3D12_DESCRIPTOR_HEAP_DESC desc = { }; D3D12_DESCRIPTOR_HEAP_DESC desc = { };
desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV;
desc.NumDescriptors = priv->num_subresources; desc.NumDescriptors = priv->num_subresources;
desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE;
auto device = gst_d3d12_device_get_device_handle (mem->device); auto device = gst_d3d12_device_get_device_handle (mem->device);
@ -564,38 +528,6 @@ gst_d3d12_memory_get_shader_resource_view_heap (GstD3D12Memory * mem,
return TRUE; return TRUE;
} }
gboolean
gst_d3d12_memory_create_render_target_view (GstD3D12Memory * mem,
guint plane, guint heap_offset, ID3D12DescriptorHeap * heap)
{
auto priv = mem->priv;
auto allocator = GST_MEMORY_CAST (mem)->allocator;
if ((priv->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET) == 0) {
GST_LOG_OBJECT (allocator,
"Render target is not allowed, configured flags 0x%x",
(guint) priv->desc.Flags);
return FALSE;
}
if (priv->num_subresources <= plane) {
GST_ERROR_OBJECT (allocator, "Out of bound request");
return FALSE;
}
auto device = gst_d3d12_device_get_device_handle (mem->device);
auto cpu_handle =
CD3DX12_CPU_DESCRIPTOR_HANDLE (heap->GetCPUDescriptorHandleForHeapStart
(), heap_offset, priv->rtv_inc_size);
D3D12_RENDER_TARGET_VIEW_DESC rtv_desc = { };
rtv_desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D;
rtv_desc.Format = priv->resource_formats[plane];
rtv_desc.Texture2D.PlaneSlice = plane;
device->CreateRenderTargetView (priv->resource.Get (), &rtv_desc, cpu_handle);
return TRUE;
}
gboolean gboolean
gst_d3d12_memory_get_render_target_view_heap (GstD3D12Memory * mem, gst_d3d12_memory_get_render_target_view_heap (GstD3D12Memory * mem,
ID3D12DescriptorHeap ** heap) ID3D12DescriptorHeap ** heap)

View file

@ -136,19 +136,9 @@ gboolean gst_d3d12_memory_get_subresource_index (GstD3D12Memory * mem,
guint gst_d3d12_memory_get_plane_count (GstD3D12Memory * mem); guint gst_d3d12_memory_get_plane_count (GstD3D12Memory * mem);
gboolean gst_d3d12_memory_create_shader_resource_view (GstD3D12Memory * mem,
guint plane,
guint heap_offset,
ID3D12DescriptorHeap * heap);
gboolean gst_d3d12_memory_get_shader_resource_view_heap (GstD3D12Memory * mem, gboolean gst_d3d12_memory_get_shader_resource_view_heap (GstD3D12Memory * mem,
ID3D12DescriptorHeap ** heap); ID3D12DescriptorHeap ** heap);
gboolean gst_d3d12_memory_create_render_target_view (GstD3D12Memory * mem,
guint plane,
guint heap_offset,
ID3D12DescriptorHeap * heap);
gboolean gst_d3d12_memory_get_render_target_view_heap (GstD3D12Memory * mem, gboolean gst_d3d12_memory_get_render_target_view_heap (GstD3D12Memory * mem,
ID3D12DescriptorHeap ** heap); ID3D12DescriptorHeap ** heap);