From 522d883fc3bc3c8247e87d445db43913ccd5efd1 Mon Sep 17 00:00:00 2001 From: Seungha Yang Date: Mon, 20 Nov 2023 00:26:44 +0900 Subject: [PATCH] d3d11: Add support for Y210 and Y212 formats Part-of: --- .../gst-libs/gst/d3d11/gstd3d11-private.h | 5 ++ .../gst/d3d11/gstd3d11converter-helper.cpp | 37 ++++++++++- .../gst-libs/gst/d3d11/gstd3d11converter.cpp | 16 +++-- .../gst-libs/gst/d3d11/gstd3d11device.cpp | 2 + .../gst-libs/gst/d3d11/gstd3d11format.cpp | 5 ++ .../gst/d3d11/hlsl/CSMain_converter.hlsl | 66 +++++++++++++++++++ .../gst-libs/gst/d3d11/hlsl/meson.build | 2 + 7 files changed, 128 insertions(+), 5 deletions(-) diff --git a/subprojects/gst-plugins-bad/gst-libs/gst/d3d11/gstd3d11-private.h b/subprojects/gst-plugins-bad/gst-libs/gst/d3d11/gstd3d11-private.h index 2bec861e6c..b2ddcf524a 100644 --- a/subprojects/gst-plugins-bad/gst-libs/gst/d3d11/gstd3d11-private.h +++ b/subprojects/gst-plugins-bad/gst-libs/gst/d3d11/gstd3d11-private.h @@ -33,6 +33,7 @@ G_BEGIN_DECLS "RGBA64_LE, RGB10A2_LE, BGRA, RGBA, BGRx, RGBx, VUYA, NV12, NV21, " \ "P010_10LE, P012_LE, P016_LE, I420, YV12, I420_10LE, I420_12LE, " \ "Y42B, I422_10LE, I422_12LE, Y444, Y444_10LE, Y444_12LE, Y444_16LE, YUY2, Y410, " \ + "Y210, Y212_LE, " \ "GRAY8, GRAY16_LE, AYUV, AYUV64, RGBP, BGRP, GBR, GBR_10LE, GBR_12LE, " \ "GBR_16LE, GBRA, GBRA_10LE, GBRA_12LE" @@ -132,6 +133,10 @@ static const GstD3D11Format _gst_d3d11_default_format_map[] = { D3D11_FORMAT_SUPPORT_SHADER_SAMPLE), MAKE_FORMAT_MAP_YUV_FULL (YUY2, YUY2, R8G8B8A8_UNORM, UNKNOWN, UNKNOWN, UNKNOWN, D3D11_FORMAT_SUPPORT_SHADER_SAMPLE), + MAKE_FORMAT_MAP_YUV_FULL (Y210, Y210, R16G16B16A16_UNORM, UNKNOWN, UNKNOWN, UNKNOWN, + D3D11_FORMAT_SUPPORT_SHADER_SAMPLE), + MAKE_FORMAT_MAP_YUV_FULL (Y212_LE, Y216, R16G16B16A16_UNORM, UNKNOWN, UNKNOWN, UNKNOWN, + D3D11_FORMAT_SUPPORT_SHADER_SAMPLE), MAKE_FORMAT_MAP_RGBP (RGBP, R8_UNORM, UNKNOWN), MAKE_FORMAT_MAP_RGBP (BGRP, R8_UNORM, UNKNOWN), MAKE_FORMAT_MAP_RGBP (GBR, R8_UNORM, UNKNOWN), diff --git a/subprojects/gst-plugins-bad/gst-libs/gst/d3d11/gstd3d11converter-helper.cpp b/subprojects/gst-plugins-bad/gst-libs/gst/d3d11/gstd3d11converter-helper.cpp index 824174627a..a968c59a77 100644 --- a/subprojects/gst-plugins-bad/gst-libs/gst/d3d11/gstd3d11converter-helper.cpp +++ b/subprojects/gst-plugins-bad/gst-libs/gst/d3d11/gstd3d11converter-helper.cpp @@ -128,6 +128,18 @@ gst_d3d11_converter_helper_new (GstD3D11Device * device, srv_format = DXGI_FORMAT_R16G16B16A16_UNORM; uav_format = DXGI_FORMAT_R32_UINT; x_unit = 8; + } else if (in_format == GST_VIDEO_FORMAT_AYUV64 && + (out_format == GST_VIDEO_FORMAT_Y210 || + out_format == GST_VIDEO_FORMAT_Y212_LE)) { + entry_point = "CSMain_AYUV64_to_Y210"; + srv_format = DXGI_FORMAT_R16G16B16A16_UNORM; + uav_format = DXGI_FORMAT_R16G16B16A16_UINT; + } else if ((in_format == GST_VIDEO_FORMAT_Y210 || + in_format == GST_VIDEO_FORMAT_Y212_LE) && + out_format == GST_VIDEO_FORMAT_AYUV64) { + entry_point = "CSMain_Y210_to_AYUV64"; + srv_format = DXGI_FORMAT_R16G16B16A16_UINT; + uav_format = DXGI_FORMAT_R16G16B16A16_UNORM; } else if (in_format != out_format) { g_assert_not_reached (); return nullptr; @@ -146,8 +158,26 @@ gst_d3d11_converter_helper_new (GstD3D11Device * device, if (!entry_point.empty ()) { auto handle = gst_d3d11_device_get_device_handle (device); + gboolean try_cs = TRUE; feature_level = handle->GetFeatureLevel (); - if (feature_level >= D3D_FEATURE_LEVEL_11_0) { + if (feature_level < D3D_FEATURE_LEVEL_11_0) { + try_cs = FALSE; + GST_DEBUG ("Device does not support typed UAV"); + } else if (uav_format != DXGI_FORMAT_R32_UINT) { + D3D11_FEATURE_DATA_FORMAT_SUPPORT2 support2; + support2.InFormat = uav_format; + support2.OutFormatSupport2 = 0; + hr = handle->CheckFeatureSupport (D3D11_FEATURE_FORMAT_SUPPORT2, + &support2, sizeof (D3D11_FEATURE_DATA_FORMAT_SUPPORT2)); + /* XXX: D3D11_FORMAT_SUPPORT2_UAV_TYPED_STORE (0x80) + * undefined in old MinGW toolchain */ + if (FAILED (hr) || (support2.OutFormatSupport2 & 0x80) == 0) { + try_cs = FALSE; + GST_DEBUG ("Device does not support typed UAV store"); + } + } + + if (try_cs) { std::lock_guard lk (cache_lock); std::shared_ptr source; auto cached = cs_source_cache.find (entry_point); @@ -198,6 +228,8 @@ gst_d3d11_converter_helper_new (GstD3D11Device * device, } if (cs) { + GST_DEBUG ("Compute shader \"%s\" available", entry_point.c_str ()); + self->cs = cs; self->x_unit = x_unit; @@ -211,6 +243,9 @@ gst_d3d11_converter_helper_new (GstD3D11Device * device, self->tg_y = (UINT) ceil (height / (float) y_unit); } } else { + GST_DEBUG ("Creating software converter for \"%s\"", + entry_point.c_str ()); + self->sw_conv = gst_video_converter_new (&self->in_info, &self->out_info, nullptr); } diff --git a/subprojects/gst-plugins-bad/gst-libs/gst/d3d11/gstd3d11converter.cpp b/subprojects/gst-plugins-bad/gst-libs/gst/d3d11/gstd3d11converter.cpp index 3778dfe994..b4df2fb061 100644 --- a/subprojects/gst-plugins-bad/gst-libs/gst/d3d11/gstd3d11converter.cpp +++ b/subprojects/gst-plugins-bad/gst-libs/gst/d3d11/gstd3d11converter.cpp @@ -2037,10 +2037,16 @@ gst_d3d11_converter_new (GstD3D11Device * device, const GstVideoInfo * in_info, priv->border_color = 0xffff000000000000; /* Preprocess packed and subsampled texture */ - if (GST_VIDEO_INFO_FORMAT (in_info) == GST_VIDEO_FORMAT_YUY2) { + if (GST_VIDEO_INFO_FORMAT (in_info) == GST_VIDEO_FORMAT_YUY2 || + GST_VIDEO_INFO_FORMAT (in_info) == GST_VIDEO_FORMAT_Y210 || + GST_VIDEO_INFO_FORMAT (in_info) == GST_VIDEO_FORMAT_Y212_LE) { GstVideoInfo tmp_info; + GstVideoFormat postproc_format = GST_VIDEO_FORMAT_VUYA; - gst_video_info_set_interlaced_format (&tmp_info, GST_VIDEO_FORMAT_VUYA, + if (GST_VIDEO_INFO_FORMAT (in_info) != GST_VIDEO_FORMAT_YUY2) + postproc_format = GST_VIDEO_FORMAT_AYUV64; + + gst_video_info_set_interlaced_format (&tmp_info, postproc_format, GST_VIDEO_INFO_INTERLACE_MODE (in_info), GST_VIDEO_INFO_WIDTH (in_info), GST_VIDEO_INFO_HEIGHT (in_info)); tmp_info.chroma_site = in_info->chroma_site; @@ -2055,11 +2061,13 @@ gst_d3d11_converter_new (GstD3D11Device * device, const GstVideoInfo * in_info, } if (GST_VIDEO_INFO_FORMAT (out_info) == GST_VIDEO_FORMAT_YUY2 || - GST_VIDEO_INFO_FORMAT (out_info) == GST_VIDEO_FORMAT_Y410) { + GST_VIDEO_INFO_FORMAT (out_info) == GST_VIDEO_FORMAT_Y410 || + GST_VIDEO_INFO_FORMAT (out_info) == GST_VIDEO_FORMAT_Y210 || + GST_VIDEO_INFO_FORMAT (out_info) == GST_VIDEO_FORMAT_Y212_LE) { GstVideoInfo tmp_info; GstVideoFormat postproc_format = GST_VIDEO_FORMAT_VUYA; - if (GST_VIDEO_INFO_FORMAT (out_info) == GST_VIDEO_FORMAT_Y410) + if (GST_VIDEO_INFO_FORMAT (out_info) != GST_VIDEO_FORMAT_YUY2) postproc_format = GST_VIDEO_FORMAT_AYUV64; gst_video_info_set_interlaced_format (&tmp_info, postproc_format, diff --git a/subprojects/gst-plugins-bad/gst-libs/gst/d3d11/gstd3d11device.cpp b/subprojects/gst-plugins-bad/gst-libs/gst/d3d11/gstd3d11device.cpp index cbd318df5c..87ad165322 100644 --- a/subprojects/gst-plugins-bad/gst-libs/gst/d3d11/gstd3d11device.cpp +++ b/subprojects/gst-plugins-bad/gst-libs/gst/d3d11/gstd3d11device.cpp @@ -521,6 +521,8 @@ gst_d3d11_device_setup_format_table (GstD3D11Device * self) case GST_VIDEO_FORMAT_P012_LE: case GST_VIDEO_FORMAT_P016_LE: case GST_VIDEO_FORMAT_YUY2: + case GST_VIDEO_FORMAT_Y210: + case GST_VIDEO_FORMAT_Y212_LE: { gboolean supported = TRUE; diff --git a/subprojects/gst-plugins-bad/gst-libs/gst/d3d11/gstd3d11format.cpp b/subprojects/gst-plugins-bad/gst-libs/gst/d3d11/gstd3d11format.cpp index 6b1dcc4355..d0388762a7 100644 --- a/subprojects/gst-plugins-bad/gst-libs/gst/d3d11/gstd3d11format.cpp +++ b/subprojects/gst-plugins-bad/gst-libs/gst/d3d11/gstd3d11format.cpp @@ -152,6 +152,7 @@ gst_d3d11_dxgi_format_get_size (DXGI_FORMAT format, guint width, guint height, case DXGI_FORMAT_R8G8_B8G8_UNORM: case DXGI_FORMAT_Y210: case DXGI_FORMAT_Y410: + case DXGI_FORMAT_Y216: case DXGI_FORMAT_R16G16B16A16_UNORM: offset[0] = 0; stride[0] = pitch; @@ -211,6 +212,8 @@ gst_d3d11_dxgi_format_to_gst (DXGI_FORMAT format) return GST_VIDEO_FORMAT_P010_10LE; case DXGI_FORMAT_P016: return GST_VIDEO_FORMAT_P016_LE; + case DXGI_FORMAT_Y216: + return GST_VIDEO_FORMAT_Y212_LE; default: break; } @@ -289,6 +292,8 @@ gst_d3d11_dxgi_format_get_alignment (DXGI_FORMAT format) case DXGI_FORMAT_NV12: case DXGI_FORMAT_P010: case DXGI_FORMAT_P016: + case DXGI_FORMAT_Y210: + case DXGI_FORMAT_Y216: return 2; default: break; diff --git a/subprojects/gst-plugins-bad/gst-libs/gst/d3d11/hlsl/CSMain_converter.hlsl b/subprojects/gst-plugins-bad/gst-libs/gst/d3d11/hlsl/CSMain_converter.hlsl index 60a941701e..b0925f9296 100644 --- a/subprojects/gst-plugins-bad/gst-libs/gst/d3d11/hlsl/CSMain_converter.hlsl +++ b/subprojects/gst-plugins-bad/gst-libs/gst/d3d11/hlsl/CSMain_converter.hlsl @@ -63,6 +63,39 @@ void Execute (uint3 tid) } #endif +#ifdef BUILDING_CSMain_AYUV64_to_Y210 +Texture2D inTex : register(t0); +RWTexture2D outTex : register(u0); + +void Execute (uint3 tid) +{ + float4 val = inTex.Load (uint3(tid.x * 2, tid.y, 0)); + uint3 scaled = val.yzw * 65535; + uint Y0 = scaled.x; + uint U = scaled.y; + uint V = scaled.z; + uint Y1 = inTex.Load (uint3(tid.x * 2 + 1, tid.y, 0)).y * 65535; + outTex[tid.xy] = uint4(Y0, U, Y1, V); +} +#endif + +#ifdef BUILDING_CSMain_Y210_to_AYUV64 +Texture2D inTex : register(t0); +RWTexture2D outTex : register(u0); + +void Execute (uint3 tid) +{ + float4 val = inTex.Load (tid) / 65535.0; + float Y0 = val.r; + float U = val.g; + float Y1 = val.b; + float V = val.a; + + outTex[uint2(tid.x * 2, tid.y)] = float4 (1.0, Y0, U, V); + outTex[uint2(tid.x * 2 + 1, tid.y)] = float4 (1.0, Y1, U, V); +} +#endif + [numthreads(8, 8, 1)] void ENTRY_POINT (uint3 tid : SV_DispatchThreadID) { @@ -115,6 +148,39 @@ static const char g_CSMain_converter_str[] = "}\n" "#endif\n" "\n" +"#ifdef BUILDING_CSMain_AYUV64_to_Y210\n" +"Texture2D inTex : register(t0);\n" +"RWTexture2D outTex : register(u0);\n" +"\n" +"void Execute (uint3 tid)\n" +"{\n" +" float4 val = inTex.Load (uint3(tid.x * 2, tid.y, 0));\n" +" uint3 scaled = val.yzw * 65535;\n" +" uint Y0 = scaled.x;\n" +" uint U = scaled.y;\n" +" uint V = scaled.z;\n" +" uint Y1 = inTex.Load (uint3(tid.x * 2 + 1, tid.y, 0)).y * 65535;\n" +" outTex[tid.xy] = uint4(Y0, U, Y1, V);\n" +"}\n" +"#endif\n" +"\n" +"#ifdef BUILDING_CSMain_Y210_to_AYUV64\n" +"Texture2D inTex : register(t0);\n" +"RWTexture2D outTex : register(u0);\n" +"\n" +"void Execute (uint3 tid)\n" +"{\n" +" float4 val = inTex.Load (tid) / 65535.0;\n" +" float Y0 = val.r;\n" +" float U = val.g;\n" +" float Y1 = val.b;\n" +" float V = val.a;\n" +"\n" +" outTex[uint2(tid.x * 2, tid.y)] = float4 (1.0, Y0, U, V);\n" +" outTex[uint2(tid.x * 2 + 1, tid.y)] = float4 (1.0, Y1, U, V);\n" +"}\n" +"#endif\n" +"\n" "[numthreads(8, 8, 1)]\n" "void ENTRY_POINT (uint3 tid : SV_DispatchThreadID)\n" "{\n" diff --git a/subprojects/gst-plugins-bad/gst-libs/gst/d3d11/hlsl/meson.build b/subprojects/gst-plugins-bad/gst-libs/gst/d3d11/hlsl/meson.build index e9343e5010..3e8b412c47 100644 --- a/subprojects/gst-plugins-bad/gst-libs/gst/d3d11/hlsl/meson.build +++ b/subprojects/gst-plugins-bad/gst-libs/gst/d3d11/hlsl/meson.build @@ -131,6 +131,8 @@ hlsl_cs_entry_points = [ 'CSMain_AYUV64_to_Y410', 'CSMain_VUYA_to_YUY2', 'CSMain_YUY2_to_VUYA', + 'CSMain_AYUV64_to_Y210', + 'CSMain_Y210_to_AYUV64', ] foreach shader : hlsl_cs_entry_points