diff --git a/subprojects/gst-plugins-bad/gst-libs/gst/d3d12/gstd3d12converter.cpp b/subprojects/gst-plugins-bad/gst-libs/gst/d3d12/gstd3d12converter.cpp index 69f41bc425..7feb0937b8 100644 --- a/subprojects/gst-plugins-bad/gst-libs/gst/d3d12/gstd3d12converter.cpp +++ b/subprojects/gst-plugins-bad/gst-libs/gst/d3d12/gstd3d12converter.cpp @@ -166,14 +166,7 @@ struct VertexData } texture; }; -struct GammaLut -{ - guint16 lut[GAMMA_LUT_SIZE]; -}; - /* *INDENT-OFF* */ -typedef std::shared_ptr GammaLutPtr; - static const XMFLOAT4X4A g_matrix_identity = XMFLOAT4X4A ( 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, @@ -293,6 +286,12 @@ struct _GstD3D12ConverterPrivate if (fence_val > 0 && cq) gst_d3d12_cmd_queue_fence_wait (cq, fence_val); + if (setup_fence) { + auto completed = setup_fence->GetCompletedValue (); + if (completed < setup_fence_val) + setup_fence->SetEventOnCompletion (setup_fence_val, nullptr); + } + gst_clear_object (&srv_heap_pool); gst_clear_object (&cq); gst_clear_object (&pack); @@ -323,7 +322,7 @@ struct _GstD3D12ConverterPrivate gboolean have_lut = FALSE; D3D12_VERTEX_BUFFER_VIEW vbv; - D3D12_INDEX_BUFFER_VIEW idv; + D3D12_INDEX_BUFFER_VIEW ibv; D3D12_GPU_VIRTUAL_ADDRESS const_buf_addr[2]; ComPtr shader_buf; ComPtr vertex_upload; @@ -360,6 +359,9 @@ struct _GstD3D12ConverterPrivate std::mutex prop_lock; guint64 fence_val = 0; + ComPtr setup_fence; + guint64 setup_fence_val = 0; + /* properties */ gint src_x = 0; gint src_y = 0; @@ -695,54 +697,6 @@ gst_d3d12_converter_get_property (GObject * object, guint prop_id, } } -static GammaLutPtr -gst_d3d12_converter_get_gamma_dec_table (GstVideoTransferFunction func) -{ - static std::mutex lut_lock; - static std::map < GstVideoTransferFunction, GammaLutPtr > g_gamma_dec_table; - - std::lock_guard < std::mutex > lk (lut_lock); - auto lut = g_gamma_dec_table.find (func); - if (lut != g_gamma_dec_table.end ()) - return lut->second; - - const gdouble scale = (gdouble) 1 / (GAMMA_LUT_SIZE - 1); - auto table = std::make_shared < GammaLut > (); - for (guint i = 0; i < GAMMA_LUT_SIZE; i++) { - gdouble val = gst_video_transfer_function_decode (func, i * scale); - val = rint (val * 65535); - val = CLAMP (val, 0, 65535); - table->lut[i] = (guint16) val; - } - - g_gamma_dec_table[func] = table; - return table; -} - -static GammaLutPtr -gst_d3d12_converter_get_gamma_enc_table (GstVideoTransferFunction func) -{ - static std::mutex lut_lock; - static std::map < GstVideoTransferFunction, GammaLutPtr > g_gamma_enc_table; - - std::lock_guard < std::mutex > lk (lut_lock); - auto lut = g_gamma_enc_table.find (func); - if (lut != g_gamma_enc_table.end ()) - return lut->second; - - const gdouble scale = (gdouble) 1 / (GAMMA_LUT_SIZE - 1); - auto table = std::make_shared < GammaLut > (); - for (guint i = 0; i < GAMMA_LUT_SIZE; i++) { - gdouble val = gst_video_transfer_function_encode (func, i * scale); - val = rint (val * 65535); - val = CLAMP (val, 0, 65535); - table->lut[i] = (guint16) val; - } - - g_gamma_enc_table[func] = table; - return table; -} - static guint reorder_rtv_index (GstVideoFormat output_format, guint index) { @@ -820,8 +774,6 @@ gst_d3d12_converter_setup_resource (GstD3D12Converter * self, HRESULT hr; VertexData vertex_data[4]; ComPtr < ID3D12Resource > upload_buf; - ComPtr < ID3D12Resource > gamma_dec_lut_upload; - ComPtr < ID3D12Resource > gamma_enc_lut_upload; auto device = gst_d3d12_device_get_device_handle (self->device); @@ -1023,9 +975,9 @@ gst_d3d12_converter_setup_resource (GstD3D12Converter * self, priv->vbv.SizeInBytes = g_vertex_buf_size; priv->vbv.StrideInBytes = sizeof (VertexData); - priv->idv.BufferLocation = priv->vbv.BufferLocation + g_vertex_buf_size; - priv->idv.SizeInBytes = g_index_buf_size; - priv->idv.Format = DXGI_FORMAT_R16_UINT; + priv->ibv.BufferLocation = priv->vbv.BufferLocation + g_vertex_buf_size; + priv->ibv.SizeInBytes = g_index_buf_size; + priv->ibv.Format = DXGI_FORMAT_R16_UINT; priv->const_buf_addr[0] = priv->vbv.BufferLocation + vertex_index_size; priv->const_buf_addr[1] = priv->vbv.BufferLocation + other_const_off; @@ -1053,106 +1005,12 @@ gst_d3d12_converter_setup_resource (GstD3D12Converter * self, upload_buf->Unmap (0, nullptr); } - if (priv->have_lut) { - heap_prop = CD3DX12_HEAP_PROPERTIES (D3D12_HEAP_TYPE_DEFAULT); - resource_desc = CD3DX12_RESOURCE_DESC::Tex1D (DXGI_FORMAT_R16_UNORM, - GAMMA_LUT_SIZE, 1, 1); + auto in_trc = in_info->colorimetry.transfer; + auto out_trc = in_info->colorimetry.transfer; - hr = device->CreateCommittedResource (&heap_prop, - heap_flags, &resource_desc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, - IID_PPV_ARGS (&priv->gamma_dec_lut)); - if (!gst_d3d12_result (hr, self->device)) { - GST_ERROR_OBJECT (self, "Couldn't create gamma decoding LUT"); - return FALSE; - } - - hr = device->CreateCommittedResource (&heap_prop, - heap_flags, &resource_desc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, - IID_PPV_ARGS (&priv->gamma_enc_lut)); - if (!gst_d3d12_result (hr, self->device)) { - GST_ERROR_OBJECT (self, "Couldn't create gamma encoding LUT"); - return FALSE; - } - - UINT64 gamma_lut_size; - device->GetCopyableFootprints (&resource_desc, 0, 1, 0, - &priv->gamma_lut_layout, nullptr, nullptr, &gamma_lut_size); - - heap_prop = CD3DX12_HEAP_PROPERTIES (D3D12_HEAP_TYPE_UPLOAD); - resource_desc = CD3DX12_RESOURCE_DESC::Buffer (gamma_lut_size); - - hr = device->CreateCommittedResource (&heap_prop, - heap_flags, &resource_desc, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, - IID_PPV_ARGS (&gamma_dec_lut_upload)); - if (!gst_d3d12_result (hr, self->device)) { - GST_ERROR_OBJECT (self, "Couldn't create gamma decoding LUT upload"); - return FALSE; - } - - hr = device->CreateCommittedResource (&heap_prop, - heap_flags, &resource_desc, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, - IID_PPV_ARGS (&gamma_enc_lut_upload)); - if (!gst_d3d12_result (hr, self->device)) { - GST_ERROR_OBJECT (self, "Couldn't create gamma encoding LUT upload"); - return FALSE; - } - - auto in_trc = in_info->colorimetry.transfer; - auto out_trc = in_info->colorimetry.transfer; - - if (priv->convert_type[0] == CONVERT_TYPE::GAMMA || - priv->convert_type[0] == CONVERT_TYPE::PRIMARY) { - out_trc = out_info->colorimetry.transfer; - } - - auto gamma_dec_table = gst_d3d12_converter_get_gamma_dec_table (in_trc); - auto gamma_enc_table = gst_d3d12_converter_get_gamma_enc_table (out_trc); - - hr = gamma_dec_lut_upload->Map (0, &range, (void **) &data); - if (!gst_d3d12_result (hr, self->device)) { - GST_ERROR_OBJECT (self, "Couldn't map gamma lut upload buffer"); - return FALSE; - } - - memcpy (data, gamma_dec_table->lut, GAMMA_LUT_SIZE * sizeof (guint16)); - gamma_dec_lut_upload->Unmap (0, nullptr); - - hr = gamma_enc_lut_upload->Map (0, &range, (void **) &data); - if (!gst_d3d12_result (hr, self->device)) { - GST_ERROR_OBJECT (self, "Couldn't map gamma lut upload buffer"); - return FALSE; - } - - memcpy (data, gamma_enc_table->lut, GAMMA_LUT_SIZE * sizeof (guint16)); - gamma_enc_lut_upload->Unmap (0, nullptr); - - D3D12_DESCRIPTOR_HEAP_DESC desc = { }; - desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; - desc.NumDescriptors = 2; - desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE; - - auto hr = device->CreateDescriptorHeap (&desc, - IID_PPV_ARGS (&priv->gamma_lut_heap)); - if (!gst_d3d12_result (hr, self->device)) { - GST_ERROR_OBJECT (self, "Couldn't map gamma lut upload buffer"); - return FALSE; - } - - auto cpu_handle = - CD3DX12_CPU_DESCRIPTOR_HANDLE (GetCPUDescriptorHandleForHeapStart - (priv->gamma_lut_heap)); - - D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc = { }; - srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE1D; - srv_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; - srv_desc.Texture1D.MipLevels = 1; - - device->CreateShaderResourceView (priv->gamma_dec_lut.Get (), &srv_desc, - cpu_handle); - cpu_handle.Offset (priv->srv_inc_size); - - device->CreateShaderResourceView (priv->gamma_enc_lut.Get (), &srv_desc, - cpu_handle); + if (priv->convert_type[0] == CONVERT_TYPE::GAMMA || + priv->convert_type[0] == CONVERT_TYPE::PRIMARY) { + out_trc = out_info->colorimetry.transfer; } priv->input_texture_width = GST_VIDEO_INFO_WIDTH (in_info); @@ -1172,84 +1030,52 @@ gst_d3d12_converter_setup_resource (GstD3D12Converter * self, priv->scissor_rect[i].bottom = GST_VIDEO_INFO_COMP_HEIGHT (out_info, i); } - ComPtr < ID3D12CommandAllocator > ca; - hr = device->CreateCommandAllocator (D3D12_COMMAND_LIST_TYPE_DIRECT, - IID_PPV_ARGS (&ca)); - if (!gst_d3d12_result (hr, self->device)) - return FALSE; - - ComPtr < ID3D12GraphicsCommandList > cl; - hr = device->CreateCommandList (0, D3D12_COMMAND_LIST_TYPE_DIRECT, - ca.Get (), nullptr, IID_PPV_ARGS (&cl)); - if (!gst_d3d12_result (hr, self->device)) - return FALSE; - - std::vector < D3D12_RESOURCE_BARRIER > barriers; - cl->CopyResource (priv->shader_buf.Get (), upload_buf.Get ()); - - barriers.push_back (CD3DX12_RESOURCE_BARRIER::Transition (priv->shader_buf. - Get (), D3D12_RESOURCE_STATE_COPY_DEST, STATE_VERTEX_AND_INDEX)); - if (priv->have_lut) { - D3D12_TEXTURE_COPY_LOCATION src; - D3D12_TEXTURE_COPY_LOCATION dst; - src = - CD3DX12_TEXTURE_COPY_LOCATION (gamma_dec_lut_upload.Get (), - priv->gamma_lut_layout); - dst = CD3DX12_TEXTURE_COPY_LOCATION (priv->gamma_dec_lut.Get ()); - cl->CopyTextureRegion (&dst, 0, 0, 0, &src, nullptr); - - src = - CD3DX12_TEXTURE_COPY_LOCATION (gamma_enc_lut_upload.Get (), - priv->gamma_lut_layout); - dst = CD3DX12_TEXTURE_COPY_LOCATION (priv->gamma_enc_lut.Get ()); - cl->CopyTextureRegion (&dst, 0, 0, 0, &src, nullptr); - - barriers. - push_back (CD3DX12_RESOURCE_BARRIER::Transition (priv->gamma_dec_lut. - Get (), D3D12_RESOURCE_STATE_COPY_DEST, - D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE)); - barriers. - push_back (CD3DX12_RESOURCE_BARRIER::Transition (priv->gamma_enc_lut. - Get (), D3D12_RESOURCE_STATE_COPY_DEST, - D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE)); + hr = gst_d3d12_device_get_converter_resources (self->device, + priv->shader_buf.Get (), upload_buf.Get (), &priv->vbv, &priv->ibv, + in_trc, &priv->gamma_dec_lut, out_trc, &priv->gamma_enc_lut, + &priv->setup_fence, &priv->setup_fence_val); + } else { + hr = gst_d3d12_device_get_converter_resources (self->device, + priv->shader_buf.Get (), upload_buf.Get (), &priv->vbv, &priv->ibv, + in_trc, nullptr, out_trc, nullptr, &priv->setup_fence, + &priv->setup_fence_val); } - cl->ResourceBarrier (barriers.size (), barriers.data ()); - - hr = cl->Close (); - if (!gst_d3d12_result (hr, self->device)) { - GST_ERROR_OBJECT (self, "Couldn't close upload command list"); - return FALSE; - } - - ID3D12CommandList *cmd_list[] = { cl.Get () }; - - hr = gst_d3d12_cmd_queue_execute_command_lists (priv->cq, 1, cmd_list, - &priv->fence_val); if (!gst_d3d12_result (hr, self->device)) { GST_ERROR_OBJECT (self, "Couldn't execute command list"); return FALSE; } - GstD3D12FenceData *fence_data; - gst_d3d12_device_acquire_fence_data (self->device, &fence_data); - gst_d3d12_fence_data_push (fence_data, FENCE_NOTIFY_COM (cl.Detach ())); - gst_d3d12_fence_data_push (fence_data, FENCE_NOTIFY_COM (ca.Detach ())); - gst_d3d12_fence_data_push (fence_data, - FENCE_NOTIFY_COM (upload_buf.Detach ())); - if (gamma_dec_lut_upload) { - gst_d3d12_fence_data_push (fence_data, - FENCE_NOTIFY_COM (gamma_dec_lut_upload.Detach ())); - } + if (priv->have_lut) { + D3D12_DESCRIPTOR_HEAP_DESC desc = { }; + desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; + desc.NumDescriptors = 2; + desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE; - if (gamma_enc_lut_upload) { - gst_d3d12_fence_data_push (fence_data, - FENCE_NOTIFY_COM (gamma_enc_lut_upload.Detach ())); - } + auto hr = device->CreateDescriptorHeap (&desc, + IID_PPV_ARGS (&priv->gamma_lut_heap)); + if (!gst_d3d12_result (hr, self->device)) { + GST_ERROR_OBJECT (self, "Couldn't create gamma lut heap"); + return FALSE; + } - gst_d3d12_cmd_queue_set_notify (priv->cq, priv->fence_val, - FENCE_NOTIFY_MINI_OBJECT (fence_data)); + auto cpu_handle = + CD3DX12_CPU_DESCRIPTOR_HANDLE (GetCPUDescriptorHandleForHeapStart + (priv->gamma_lut_heap)); + + D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc = { }; + srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE1D; + srv_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + srv_desc.Texture1D.MipLevels = 1; + + device->CreateShaderResourceView (priv->gamma_dec_lut.Get (), &srv_desc, + cpu_handle); + cpu_handle.Offset (priv->srv_inc_size); + + device->CreateShaderResourceView (priv->gamma_enc_lut.Get (), &srv_desc, + cpu_handle); + } return TRUE; } @@ -2410,7 +2236,7 @@ gst_d3d12_converter_execute (GstD3D12Converter * self, GstD3D12Frame * in_frame, cl->SetGraphicsRootConstantBufferView (pipeline_data.crs->GetPsCbvIdx (), priv->const_buf_addr[pipeline_index]); - cl->IASetIndexBuffer (&priv->idv); + cl->IASetIndexBuffer (&priv->ibv); cl->IASetVertexBuffers (0, 1, &priv->vbv); cl->IASetPrimitiveTopology (D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); cl->RSSetViewports (1, priv->viewport); @@ -2534,6 +2360,20 @@ gst_d3d12_converter_convert_buffer (GstD3D12Converter * converter, gst_d3d12_frame_fence_gpu_wait (&out_frame, priv->cq); } + if (priv->setup_fence) { + auto default_queue = gst_d3d12_device_get_cmd_queue (converter->device, + D3D12_COMMAND_LIST_TYPE_DIRECT); + if (priv->cq != default_queue) { + auto completed = priv->setup_fence->GetCompletedValue (); + if (completed < priv->setup_fence_val) { + gst_d3d12_cmd_queue_execute_wait (priv->cq, priv->setup_fence.Get (), + priv->setup_fence_val); + } + } + + priv->setup_fence = nullptr; + } + gst_d3d12_frame_unmap (&in_frame); gst_d3d12_frame_unmap (&out_frame); diff --git a/subprojects/gst-plugins-bad/gst-libs/gst/d3d12/gstd3d12device-private.h b/subprojects/gst-plugins-bad/gst-libs/gst/d3d12/gstd3d12device-private.h index 83bf22d69a..a4a3aef397 100644 --- a/subprojects/gst-plugins-bad/gst-libs/gst/d3d12/gstd3d12device-private.h +++ b/subprojects/gst-plugins-bad/gst-libs/gst/d3d12/gstd3d12device-private.h @@ -100,5 +100,18 @@ HRESULT gst_d3d12_device_get_sampler_state (GstD3D12Device * device, GST_D3D12_API gboolean gst_d3d12_device_non_zeroed_supported (GstD3D12Device * device); +GST_D3D12_API +HRESULT gst_d3d12_device_get_converter_resources (GstD3D12Device * device, + ID3D12Resource * index_buf, + ID3D12Resource * index_upload, + const D3D12_VERTEX_BUFFER_VIEW * vbv, + const D3D12_INDEX_BUFFER_VIEW * ibv, + GstVideoTransferFunction gamma_dec_func, + ID3D12Resource ** gamma_dec, + GstVideoTransferFunction gamma_enc_func, + ID3D12Resource ** gamma_enc, + ID3D12Fence ** fence, + guint64 * fence_val); + G_END_DECLS diff --git a/subprojects/gst-plugins-bad/gst-libs/gst/d3d12/gstd3d12device.cpp b/subprojects/gst-plugins-bad/gst-libs/gst/d3d12/gstd3d12device.cpp index 29ef109e37..411ca979be 100644 --- a/subprojects/gst-plugins-bad/gst-libs/gst/d3d12/gstd3d12device.cpp +++ b/subprojects/gst-plugins-bad/gst-libs/gst/d3d12/gstd3d12device.cpp @@ -26,6 +26,7 @@ #include "gstd3d12cmdlistpool.h" #include #include +#include #include #include #include @@ -142,8 +143,13 @@ struct DeviceInner gst_clear_object (©_cl_pool); gst_clear_object (&fence_data_pool); + gst_clear_object (&rtv_heap_pool); + gamma_dec_lut.clear(); + gamma_enc_lut.clear(); samplers.clear (); + gamma_lut_pso = nullptr; + gamma_lut_rs = nullptr; factory = nullptr; adapter = nullptr; @@ -264,6 +270,12 @@ struct DeviceInner GstD3D12FenceDataPool *fence_data_pool = nullptr; + ComPtr gamma_lut_rs; + ComPtr gamma_lut_pso; + std::unordered_map> gamma_dec_lut; + std::unordered_map> gamma_enc_lut; + GstD3D12DescHeapPool *rtv_heap_pool = nullptr; + guint rtv_inc_size; guint adapter_index = 0; @@ -1392,6 +1404,17 @@ gst_d3d12_device_new_internal (const GstD3D12DeviceConstructData * data) priv->non_zeroed_supported = TRUE; } + { + D3D12_DESCRIPTOR_HEAP_DESC rtv_desc = { }; + rtv_desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_RTV; + rtv_desc.NumDescriptors = 2; + rtv_desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE; + + priv->rtv_heap_pool = gst_d3d12_desc_heap_pool_new (device.Get (), + &rtv_desc); + GST_OBJECT_FLAG_SET (priv->rtv_heap_pool, GST_OBJECT_FLAG_MAY_BE_LEAKED); + } + return self; error: @@ -2243,3 +2266,374 @@ gst_d3d12_flush_all_devices (void) auto manager = DeviceCacheManager::GetInstance (); manager->FlushAll (); } + +static inline DWORD +gst_d3d12_transfer_func_to_gamma_func (GstVideoTransferFunction func) +{ + enum class GammaFuncType + { + GAMMA10, + GAMMA18, + GAMMA20, + GAMMA22, + BT709, + SMPTE240M, + SRGB, + GAMMA28, + LOG100, + LOG316, + BT2020, + ADOBERGB, + PQ, + HLG, + }; + + switch (func) { + case GST_VIDEO_TRANSFER_GAMMA18: + return (DWORD) GammaFuncType::GAMMA18; + case GST_VIDEO_TRANSFER_GAMMA20: + return (DWORD) GammaFuncType::GAMMA20; + case GST_VIDEO_TRANSFER_GAMMA22: + return (DWORD) GammaFuncType::GAMMA22; + case GST_VIDEO_TRANSFER_BT601: + case GST_VIDEO_TRANSFER_BT709: + case GST_VIDEO_TRANSFER_BT2020_10: + return (DWORD) GammaFuncType::BT709; + case GST_VIDEO_TRANSFER_SMPTE240M: + return (DWORD) GammaFuncType::SMPTE240M; + case GST_VIDEO_TRANSFER_SRGB: + return (DWORD) GammaFuncType::SRGB; + case GST_VIDEO_TRANSFER_GAMMA28: + return (DWORD) GammaFuncType::GAMMA28; + case GST_VIDEO_TRANSFER_LOG100: + return (DWORD) GammaFuncType::LOG100; + case GST_VIDEO_TRANSFER_LOG316: + return (DWORD) GammaFuncType::LOG316; + case GST_VIDEO_TRANSFER_BT2020_12: + return (DWORD) GammaFuncType::BT2020; + case GST_VIDEO_TRANSFER_ADOBERGB: + return (DWORD) GammaFuncType::ADOBERGB; + case GST_VIDEO_TRANSFER_SMPTE2084: + return (DWORD) GammaFuncType::PQ; + case GST_VIDEO_TRANSFER_ARIB_STD_B67: + return (DWORD) GammaFuncType::HLG; + default: + break; + } + + return (DWORD) GammaFuncType::GAMMA10; +} + +HRESULT +gst_d3d12_device_get_converter_resources (GstD3D12Device * device, + ID3D12Resource * index_buf, ID3D12Resource * index_upload, + const D3D12_VERTEX_BUFFER_VIEW * vbv, const D3D12_INDEX_BUFFER_VIEW * ibv, + GstVideoTransferFunction gamma_dec_func, ID3D12Resource ** gamma_dec, + GstVideoTransferFunction gamma_enc_func, ID3D12Resource ** gamma_enc, + ID3D12Fence ** fence, guint64 * fence_val) +{ + auto priv = device->priv->inner; + GstD3D12FenceData *fence_data = nullptr; + GstD3D12CmdAlloc *gst_ca = nullptr; + GstD3D12CmdList *gst_cl = nullptr; + bool need_lut = false; + HRESULT hr = S_OK; + DWORD gamma_dec_func_d3d12 = 0; + DWORD gamma_enc_func_d3d12 = 0; + + if (gamma_dec != nullptr && gamma_enc != nullptr) + need_lut = true; + + if (need_lut) { + gamma_dec_func_d3d12 = + gst_d3d12_transfer_func_to_gamma_func (gamma_dec_func); + gamma_enc_func_d3d12 = + gst_d3d12_transfer_func_to_gamma_func (gamma_enc_func); + } + + std::lock_guard < std::mutex > lk (priv->lock); + if (!priv->gamma_lut_rs) { + D3D12_ROOT_SIGNATURE_FLAGS rs_flags = + D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT | + D3D12_ROOT_SIGNATURE_FLAG_DENY_HULL_SHADER_ROOT_ACCESS | + D3D12_ROOT_SIGNATURE_FLAG_DENY_DOMAIN_SHADER_ROOT_ACCESS | + D3D12_ROOT_SIGNATURE_FLAG_DENY_GEOMETRY_SHADER_ROOT_ACCESS | + D3D12_ROOT_SIGNATURE_FLAG_DENY_AMPLIFICATION_SHADER_ROOT_ACCESS | + D3D12_ROOT_SIGNATURE_FLAG_DENY_MESH_SHADER_ROOT_ACCESS; + D3D12_VERSIONED_ROOT_SIGNATURE_DESC rs_desc = { }; + CD3DX12_ROOT_PARAMETER root_params[1]; + + root_params[0].InitAsConstants (2, 0); + + CD3DX12_VERSIONED_ROOT_SIGNATURE_DESC::Init_1_0 (rs_desc, 1, root_params, + 0, nullptr, rs_flags); + + ComPtr < ID3DBlob > rs_blob; + ComPtr < ID3DBlob > error_blob; + hr = D3DX12SerializeVersionedRootSignature (&rs_desc, + D3D_ROOT_SIGNATURE_VERSION_1, &rs_blob, &error_blob); + + if (!gst_d3d12_result (hr, device)) { + const gchar *error_msg = nullptr; + if (error_blob) + error_msg = (const gchar *) error_blob->GetBufferPointer (); + + GST_ERROR_OBJECT (device, + "Couldn't serialize root signature, hr: 0x%x, error detail: %s", + (guint) hr, GST_STR_NULL (error_msg)); + return hr; + } + + hr = priv->device->CreateRootSignature (0, rs_blob->GetBufferPointer (), + rs_blob->GetBufferSize (), IID_PPV_ARGS (&priv->gamma_lut_rs)); + if (!gst_d3d12_result (hr, device)) { + GST_ERROR_OBJECT (device, "Couldn't create root signature"); + return hr; + } + } + + if (!priv->gamma_lut_pso) { + GstD3DShaderByteCode vs_blob; + GstD3DShaderByteCode ps_blob; + if (!gst_d3d12_shader_cache_get_gamma_lut_blob (&vs_blob, &ps_blob)) { + GST_ERROR_OBJECT (device, "Couldn't get gamma decode byte code"); + return E_FAIL; + } + + D3D12_INPUT_ELEMENT_DESC input_desc[2]; + input_desc[0].SemanticName = "POSITION"; + input_desc[0].SemanticIndex = 0; + input_desc[0].Format = DXGI_FORMAT_R32G32B32_FLOAT; + input_desc[0].InputSlot = 0; + input_desc[0].AlignedByteOffset = D3D12_APPEND_ALIGNED_ELEMENT; + input_desc[0].InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA; + input_desc[0].InstanceDataStepRate = 0; + + input_desc[1].SemanticName = "TEXCOORD"; + input_desc[1].SemanticIndex = 0; + input_desc[1].Format = DXGI_FORMAT_R32G32_FLOAT; + input_desc[1].InputSlot = 0; + input_desc[1].AlignedByteOffset = D3D12_APPEND_ALIGNED_ELEMENT; + input_desc[1].InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA; + input_desc[1].InstanceDataStepRate = 0; + + D3D12_GRAPHICS_PIPELINE_STATE_DESC pso_desc = { }; + pso_desc.pRootSignature = priv->gamma_lut_rs.Get (); + pso_desc.VS.pShaderBytecode = vs_blob.byte_code; + pso_desc.VS.BytecodeLength = vs_blob.byte_code_len; + pso_desc.PS.pShaderBytecode = ps_blob.byte_code; + pso_desc.PS.BytecodeLength = ps_blob.byte_code_len; + pso_desc.BlendState = CD3DX12_BLEND_DESC (D3D12_DEFAULT); + pso_desc.SampleMask = UINT_MAX; + pso_desc.RasterizerState = CD3DX12_RASTERIZER_DESC (D3D12_DEFAULT); + pso_desc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE; + pso_desc.DepthStencilState.DepthEnable = FALSE; + pso_desc.DepthStencilState.StencilEnable = FALSE; + pso_desc.InputLayout.pInputElementDescs = input_desc; + pso_desc.InputLayout.NumElements = 2; + pso_desc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; + pso_desc.NumRenderTargets = 1; + pso_desc.RTVFormats[0] = DXGI_FORMAT_R16_UNORM; + pso_desc.SampleDesc.Count = 1; + pso_desc.SampleDesc.Quality = 0; + + hr = priv->device->CreateGraphicsPipelineState (&pso_desc, + IID_PPV_ARGS (&priv->gamma_lut_pso)); + if (!gst_d3d12_result (hr, device)) { + GST_ERROR_OBJECT (device, "Couldn't create gamma decode pso"); + return hr; + } + } + + gst_d3d12_fence_data_pool_acquire (priv->fence_data_pool, &fence_data); + + gst_d3d12_cmd_alloc_pool_acquire (priv->direct_ca_pool, &gst_ca); + if (!gst_ca) { + GST_ERROR_OBJECT (device, "Couldn't acquire command allocator"); + gst_d3d12_fence_data_unref (fence_data); + return E_FAIL; + } + + gst_d3d12_fence_data_push (fence_data, FENCE_NOTIFY_MINI_OBJECT (gst_ca)); + + auto ca = gst_d3d12_cmd_alloc_get_handle (gst_ca); + gst_d3d12_cmd_list_pool_acquire (priv->direct_cl_pool, ca, &gst_cl); + + if (!gst_cl) { + GST_ERROR_OBJECT (device, "Couldn't acquire command list"); + gst_d3d12_fence_data_unref (fence_data); + return E_FAIL; + } + + ComPtr < ID3D12CommandList > cl_base; + ComPtr < ID3D12GraphicsCommandList > cl; + + cl_base = gst_d3d12_cmd_list_get_handle (gst_cl); + cl_base.As (&cl); + + cl->CopyResource (index_buf, index_upload); + index_buf->AddRef (); + gst_d3d12_fence_data_push (fence_data, FENCE_NOTIFY_COM (index_buf)); + + index_upload->AddRef (); + gst_d3d12_fence_data_push (fence_data, FENCE_NOTIFY_COM (index_upload)); + + D3D12_RESOURCE_BARRIER copy_barrier = + CD3DX12_RESOURCE_BARRIER::Transition (index_buf, + D3D12_RESOURCE_STATE_COPY_DEST, + D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER | + D3D12_RESOURCE_STATE_INDEX_BUFFER, + D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES); + + cl->ResourceBarrier (1, ©_barrier); + + bool store_dec = false; + bool store_enc = false; + ComPtr < ID3D12Resource > gamma_dec_resource; + ComPtr < ID3D12Resource > gamma_enc_resource; + + if (need_lut) { + const UINT lut_size = 4096; + + auto dec_iter = priv->gamma_dec_lut.find (gamma_dec_func_d3d12); + if (dec_iter != priv->gamma_dec_lut.end ()) { + GST_LOG_OBJECT (device, "Reuse gamma decode LUT"); + gamma_dec_resource = dec_iter->second; + } else { + GST_DEBUG_OBJECT (device, "Need to build gamma decode LUT"); + } + + auto enc_iter = priv->gamma_enc_lut.find (gamma_enc_func_d3d12); + if (enc_iter != priv->gamma_enc_lut.end ()) { + GST_LOG_OBJECT (device, "Reuse gamma encode LUT"); + gamma_enc_resource = enc_iter->second; + } else { + GST_DEBUG_OBJECT (device, "Need to build gamma encode LUT"); + } + + if (!gamma_dec_resource || !gamma_enc_resource) { + GstD3D12DescHeap *desc_heap = nullptr; + D3D12_HEAP_FLAGS heap_flags = D3D12_HEAP_FLAG_NONE; + if (gst_d3d12_device_non_zeroed_supported (device)) + heap_flags = D3D12_HEAP_FLAG_CREATE_NOT_ZEROED; + + auto heap_prop = CD3DX12_HEAP_PROPERTIES (D3D12_HEAP_TYPE_DEFAULT); + auto resource_desc = CD3DX12_RESOURCE_DESC::Tex1D (DXGI_FORMAT_R16_UNORM, + lut_size, 1, 1, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | + D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS); + + D3D12_RENDER_TARGET_VIEW_DESC rtv_desc = { }; + rtv_desc.Format = DXGI_FORMAT_R16_UNORM; + rtv_desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE1D; + rtv_desc.Texture1D.MipSlice = 0; + + gst_d3d12_desc_heap_pool_acquire (priv->rtv_heap_pool, &desc_heap); + if (!desc_heap) { + GST_ERROR_OBJECT (device, "Couldn't acquire descriptor heap"); + gst_d3d12_fence_data_unref (fence_data); + return E_FAIL; + } + + gst_d3d12_fence_data_push (fence_data, + FENCE_NOTIFY_MINI_OBJECT (desc_heap)); + + auto desc_handle = gst_d3d12_desc_heap_get_handle (desc_heap); + auto cpu_handle = CD3DX12_CPU_DESCRIPTOR_HANDLE + (GetCPUDescriptorHandleForHeapStart (desc_handle)); + + cl->SetGraphicsRootSignature (priv->gamma_lut_rs.Get ()); + cl->SetPipelineState (priv->gamma_lut_pso.Get ()); + cl->IASetIndexBuffer (ibv); + cl->IASetVertexBuffers (0, 1, vbv); + cl->IASetPrimitiveTopology (D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + + D3D12_VIEWPORT viewport = { }; + viewport.Width = lut_size; + viewport.Height = 1; + viewport.MinDepth = 0; + viewport.MaxDepth = 1; + cl->RSSetViewports (1, &viewport); + + D3D12_RECT scissor_rect = { }; + scissor_rect.left = 0; + scissor_rect.top = 0; + scissor_rect.right = lut_size; + scissor_rect.bottom = 1; + cl->RSSetScissorRects (1, &scissor_rect); + + if (!gamma_dec_resource) { + store_dec = true; + hr = priv->device->CreateCommittedResource (&heap_prop, heap_flags, + &resource_desc, D3D12_RESOURCE_STATE_COMMON, nullptr, + IID_PPV_ARGS (&gamma_dec_resource)); + if (!gst_d3d12_result (hr, device)) { + GST_ERROR_OBJECT (device, "Couldn't create LUT texture"); + return hr; + } + + priv->device->CreateRenderTargetView (gamma_dec_resource.Get (), + &rtv_desc, cpu_handle); + + cl->SetGraphicsRoot32BitConstant (0, 1, 0); + cl->SetGraphicsRoot32BitConstant (0, gamma_dec_func_d3d12, 1); + cl->OMSetRenderTargets (1, &cpu_handle, FALSE, nullptr); + cl->DrawIndexedInstanced (6, 1, 0, 0, 0); + + cpu_handle.Offset (priv->rtv_inc_size); + } + + if (!gamma_enc_resource) { + store_enc = true; + hr = priv->device->CreateCommittedResource (&heap_prop, heap_flags, + &resource_desc, D3D12_RESOURCE_STATE_COMMON, nullptr, + IID_PPV_ARGS (&gamma_enc_resource)); + if (!gst_d3d12_result (hr, device)) { + GST_ERROR_OBJECT (device, "Couldn't create LUT texture"); + return hr; + } + + priv->device->CreateRenderTargetView (gamma_enc_resource.Get (), + &rtv_desc, cpu_handle); + + cl->SetGraphicsRoot32BitConstant (0, 0, 0); + cl->SetGraphicsRoot32BitConstant (0, gamma_enc_func_d3d12, 1); + cl->OMSetRenderTargets (1, &cpu_handle, FALSE, nullptr); + cl->DrawIndexedInstanced (6, 1, 0, 0, 0); + } + } + } + + hr = cl->Close (); + if (!gst_d3d12_result (hr, device)) { + GST_ERROR_OBJECT (device, "Couldn't close command list"); + gst_d3d12_fence_data_unref (fence_data); + return hr; + } + + ID3D12CommandList *cmd_list[] = { cl.Get () }; + hr = gst_d3d12_cmd_queue_execute_command_lists (priv->direct_queue, + 1, cmd_list, fence_val); + if (!gst_d3d12_result (hr, device)) { + GST_ERROR_OBJECT (device, "Couldn't execute command list"); + gst_d3d12_fence_data_unref (fence_data); + return hr; + } + + gst_d3d12_cmd_queue_set_notify (priv->direct_queue, *fence_val, fence_data, + (GDestroyNotify) gst_d3d12_fence_data_unref); + + if (need_lut) { + if (store_dec) + priv->gamma_dec_lut[gamma_dec_func_d3d12] = gamma_dec_resource; + if (store_enc) + priv->gamma_enc_lut[gamma_enc_func_d3d12] = gamma_enc_resource; + + *gamma_dec = gamma_dec_resource.Detach (); + *gamma_enc = gamma_enc_resource.Detach (); + } + + *fence = gst_d3d12_cmd_queue_get_fence_handle (priv->direct_queue); + (*fence)->AddRef (); + + return S_OK; +}