From 65c23885d448b5cd275917a668e21c6f398725a2 Mon Sep 17 00:00:00 2001 From: Seungha Yang Date: Fri, 10 Jan 2025 00:38:39 +0900 Subject: [PATCH] d3d12mipmapping: Skip alpha sampling if possible If input format has no alpha and output format has no alpha, skip alpha sampling which can reduce the number of instruction slots Part-of: --- .../gst/d3d12/gstd3d12mipgen-private.h | 4 +- .../gst-libs/gst/d3d12/gstd3d12mipgen.cpp | 5 +- .../gst/d3dshader/gstd3dshadercache.cpp | 2 + .../gst/d3dshader/gstd3dshadercache.h | 2 + .../plugin-hlsl/CSMain_mipgen_ayuv.hlsl | 417 ++++++++++++++++++ .../plugin-hlsl/CSMain_mipgen_vuya.hlsl | 417 ++++++++++++++++++ .../gst-libs/gst/d3dshader/plugin-hlsl/hlsl.h | 2 + .../gst/d3dshader/plugin-hlsl/meson.build | 2 + .../sys/d3d12/gstd3d12mipmapping.cpp | 11 +- 9 files changed, 857 insertions(+), 5 deletions(-) create mode 100644 subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/plugin-hlsl/CSMain_mipgen_ayuv.hlsl create mode 100644 subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/plugin-hlsl/CSMain_mipgen_vuya.hlsl diff --git a/subprojects/gst-plugins-bad/gst-libs/gst/d3d12/gstd3d12mipgen-private.h b/subprojects/gst-plugins-bad/gst-libs/gst/d3d12/gstd3d12mipgen-private.h index ceed7308cd..9266f85d1c 100644 --- a/subprojects/gst-plugins-bad/gst-libs/gst/d3d12/gstd3d12mipgen-private.h +++ b/subprojects/gst-plugins-bad/gst-libs/gst/d3d12/gstd3d12mipgen-private.h @@ -21,6 +21,7 @@ #include #include +#include G_BEGIN_DECLS @@ -30,7 +31,8 @@ GST_D3D12_API G_DECLARE_FINAL_TYPE (GstD3D12MipGen, gst_d3d12_mip_gen, GST, D3D12_MIP_GEN, GstObject); GST_D3D12_API -GstD3D12MipGen * gst_d3d12_mip_gen_new (GstD3D12Device * device); +GstD3D12MipGen * gst_d3d12_mip_gen_new (GstD3D12Device * device, + GstD3DPluginCS cs_type); GST_D3D12_API gboolean gst_d3d12_mip_gen_execute (GstD3D12MipGen * gen, diff --git a/subprojects/gst-plugins-bad/gst-libs/gst/d3d12/gstd3d12mipgen.cpp b/subprojects/gst-plugins-bad/gst-libs/gst/d3d12/gstd3d12mipgen.cpp index 853805cf33..61308e0fb5 100644 --- a/subprojects/gst-plugins-bad/gst-libs/gst/d3d12/gstd3d12mipgen.cpp +++ b/subprojects/gst-plugins-bad/gst-libs/gst/d3d12/gstd3d12mipgen.cpp @@ -129,7 +129,7 @@ gst_d3d12_mip_gen_finalize (GObject * object) } GstD3D12MipGen * -gst_d3d12_mip_gen_new (GstD3D12Device * device) +gst_d3d12_mip_gen_new (GstD3D12Device * device, GstD3DPluginCS cs_type) { g_return_val_if_fail (GST_IS_D3D12_DEVICE (device), nullptr); @@ -197,8 +197,7 @@ gst_d3d12_mip_gen_new (GstD3D12Device * device) } GstD3DShaderByteCode byte_code; - if (!gst_d3d_plugin_shader_get_cs_blob (GST_D3D_PLUGIN_CS_MIP_GEN, - GST_D3D_SM_5_0, &byte_code)) { + if (!gst_d3d_plugin_shader_get_cs_blob (cs_type, GST_D3D_SM_5_0, &byte_code)) { GST_ERROR_OBJECT (self, "Couldn't get shader byte code"); gst_object_unref (self); return nullptr; diff --git a/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/gstd3dshadercache.cpp b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/gstd3dshadercache.cpp index ef9eeefc20..f19ba362a7 100644 --- a/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/gstd3dshadercache.cpp +++ b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/gstd3dshadercache.cpp @@ -88,6 +88,8 @@ static const ShaderItem g_vs_map[] = { static const ShaderItem g_cs_map[] = { {GST_D3D_PLUGIN_CS_MIP_GEN, BUILD_SOURCE (CSMain_mipgen)}, + {GST_D3D_PLUGIN_CS_MIP_GEN_VUYA, BUILD_SOURCE (CSMain_mipgen_vuya)}, + {GST_D3D_PLUGIN_CS_MIP_GEN_AYUV, BUILD_SOURCE (CSMain_mipgen_ayuv)}, {GST_D3D_PLUGIN_CS_YADIF_1, BUILD_SOURCE (CSMain_yadif_1)}, {GST_D3D_PLUGIN_CS_YADIF_1_10, BUILD_SOURCE (CSMain_yadif_1_10)}, {GST_D3D_PLUGIN_CS_YADIF_1_12, BUILD_SOURCE (CSMain_yadif_1_12)}, diff --git a/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/gstd3dshadercache.h b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/gstd3dshadercache.h index 8601183974..aecb316531 100644 --- a/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/gstd3dshadercache.h +++ b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/gstd3dshadercache.h @@ -54,6 +54,8 @@ typedef enum typedef enum { GST_D3D_PLUGIN_CS_MIP_GEN, + GST_D3D_PLUGIN_CS_MIP_GEN_VUYA, + GST_D3D_PLUGIN_CS_MIP_GEN_AYUV, GST_D3D_PLUGIN_CS_YADIF_1, GST_D3D_PLUGIN_CS_YADIF_1_10, GST_D3D_PLUGIN_CS_YADIF_1_12, diff --git a/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/plugin-hlsl/CSMain_mipgen_ayuv.hlsl b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/plugin-hlsl/CSMain_mipgen_ayuv.hlsl new file mode 100644 index 0000000000..29a2183466 --- /dev/null +++ b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/plugin-hlsl/CSMain_mipgen_ayuv.hlsl @@ -0,0 +1,417 @@ +/** + * MIT License + * + * Copyright (c) 2018 Jeremiah van Oosten + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* Source: https://github.com/jpvanoosten/LearningDirectX12 */ + +#ifdef BUILDING_HLSL + +#define BLOCK_SIZE 8 + + // When reducing the size of a texture, it could be that downscaling the texture + // will result in a less than exactly 50% (1/2) of the original texture size. + // This happens if either the width, or the height (or both) dimensions of the texture + // are odd. For example, downscaling a 5x3 texture will result in a 2x1 texture which + // has a 60% reduction in the texture width and 66% reduction in the height. + // When this happens, we need to take more samples from the source texture to + // determine the pixel value in the destination texture. + +#define WIDTH_HEIGHT_EVEN 0 // Both the width and the height of the texture are even. +#define WIDTH_ODD_HEIGHT_EVEN 1 // The texture width is odd and the height is even. +#define WIDTH_EVEN_HEIGHT_ODD 2 // The texture width is even and teh height is odd. +#define WIDTH_HEIGHT_ODD 3 // Both the width and height of the texture are odd. + +struct ComputeShaderInput +{ + uint3 GroupID : SV_GroupID; // 3D index of the thread group in the dispatch. + uint3 GroupThreadID : SV_GroupThreadID; // 3D index of local thread ID in a thread group. + uint3 DispatchThreadID : SV_DispatchThreadID; // 3D index of global thread ID in the dispatch. + uint GroupIndex : SV_GroupIndex; // Flattened local index of the thread within a thread group. +}; + +cbuffer GenerateMipsCB : register( b0 ) +{ + uint SrcMipLevel; // Texture level of source mip + uint NumMipLevels; // Number of OutMips to write: [1-4] + uint SrcDimension; // Width and height of the source texture are even or odd. + uint padding; + float2 TexelSize; // 1.0 / OutMip1.Dimensions +} + +// Source mip map. +Texture2D SrcMip : register( t0 ); + +// Write up to 4 mip map levels. +RWTexture2D OutMip1 : register( u0 ); +RWTexture2D OutMip2 : register( u1 ); +RWTexture2D OutMip3 : register( u2 ); +RWTexture2D OutMip4 : register( u3 ); + +// Linear clamp sampler. +SamplerState LinearClampSampler : register( s0 ); + +// The reason for separating channels is to reduce bank conflicts in the +// local data memory controller. A large stride will cause more threads +// to collide on the same memory bank. +groupshared float gs_Y[64]; +groupshared float gs_U[64]; +groupshared float gs_V[64]; + +void StoreColor( uint Index, float3 Color ) +{ + gs_Y[Index] = Color.x; + gs_U[Index] = Color.y; + gs_V[Index] = Color.z; +} + +float3 LoadColor( uint Index ) +{ + return float3( gs_Y[Index], gs_U[Index], gs_V[Index] ); +} + +[numthreads( BLOCK_SIZE, BLOCK_SIZE, 1 )] +void ENTRY_POINT( ComputeShaderInput IN ) +{ + float3 Src1 = (float3)0; + + // One bilinear sample is insufficient when scaling down by more than 2x. + // You will slightly undersample in the case where the source dimension + // is odd. This is why it's a really good idea to only generate mips on + // power-of-two sized textures. Trying to handle the undersampling case + // will force this shader to be slower and more complicated as it will + // have to take more source texture samples. + + // Determine the path to use based on the dimension of the + // source texture. + // 0b00(0): Both width and height are even. + // 0b01(1): Width is odd, height is even. + // 0b10(2): Width is even, height is odd. + // 0b11(3): Both width and height are odd. + switch ( SrcDimension ) + { + case WIDTH_HEIGHT_EVEN: + { + float2 UV = TexelSize * ( IN.DispatchThreadID.xy + 0.5 ); + + Src1 = SrcMip.SampleLevel( LinearClampSampler, UV, SrcMipLevel ).yzw; + } + break; + case WIDTH_ODD_HEIGHT_EVEN: + { + // > 2:1 in X dimension + // Use 2 bilinear samples to guarantee we don't undersample when downsizing by more than 2x + // horizontally. + float2 UV1 = TexelSize * ( IN.DispatchThreadID.xy + float2( 0.25, 0.5 ) ); + float2 Off = TexelSize * float2( 0.5, 0.0 ); + + Src1 = 0.5 * ( SrcMip.SampleLevel( LinearClampSampler, UV1, SrcMipLevel ).yzw + + SrcMip.SampleLevel( LinearClampSampler, UV1 + Off, SrcMipLevel ).yzw ); + } + break; + case WIDTH_EVEN_HEIGHT_ODD: + { + // > 2:1 in Y dimension + // Use 2 bilinear samples to guarantee we don't undersample when downsizing by more than 2x + // vertically. + float2 UV1 = TexelSize * ( IN.DispatchThreadID.xy + float2( 0.5, 0.25 ) ); + float2 Off = TexelSize * float2( 0.0, 0.5 ); + + Src1 = 0.5 * ( SrcMip.SampleLevel( LinearClampSampler, UV1, SrcMipLevel ).yzw + + SrcMip.SampleLevel( LinearClampSampler, UV1 + Off, SrcMipLevel ).yzw ); + } + break; + case WIDTH_HEIGHT_ODD: + { + // > 2:1 in in both dimensions + // Use 4 bilinear samples to guarantee we don't undersample when downsizing by more than 2x + // in both directions. + float2 UV1 = TexelSize * ( IN.DispatchThreadID.xy + float2( 0.25, 0.25 ) ); + float2 Off = TexelSize * 0.5; + + Src1 = SrcMip.SampleLevel( LinearClampSampler, UV1, SrcMipLevel ).yzw; + Src1 += SrcMip.SampleLevel( LinearClampSampler, UV1 + float2( Off.x, 0.0 ), SrcMipLevel ).yzw; + Src1 += SrcMip.SampleLevel( LinearClampSampler, UV1 + float2( 0.0, Off.y ), SrcMipLevel ).yzw; + Src1 += SrcMip.SampleLevel( LinearClampSampler, UV1 + float2( Off.x, Off.y ), SrcMipLevel ).yzw; + Src1 *= 0.25; + } + break; + } + + OutMip1[IN.DispatchThreadID.xy] = float4(1.0f, Src1); + + // A scalar (constant) branch can exit all threads coherently. + if ( NumMipLevels == 1 ) + return; + + // Without lane swizzle operations, the only way to share data with other + // threads is through LDS. + StoreColor( IN.GroupIndex, Src1 ); + + // This guarantees all LDS writes are complete and that all threads have + // executed all instructions so far (and therefore have issued their LDS + // write instructions.) + GroupMemoryBarrierWithGroupSync(); + + // With low three bits for X and high three bits for Y, this bit mask + // (binary: 001001) checks that X and Y are even. + if ( ( IN.GroupIndex & 0x9 ) == 0 ) + { + float3 Src2 = LoadColor( IN.GroupIndex + 0x01 ); + float3 Src3 = LoadColor( IN.GroupIndex + 0x08 ); + float3 Src4 = LoadColor( IN.GroupIndex + 0x09 ); + Src1 = 0.25 * ( Src1 + Src2 + Src3 + Src4 ); + + OutMip2[IN.DispatchThreadID.xy / 2] = float4(1.0f, Src1); + StoreColor( IN.GroupIndex, Src1 ); + } + + if ( NumMipLevels == 2 ) + return; + + GroupMemoryBarrierWithGroupSync(); + + // This bit mask (binary: 011011) checks that X and Y are multiples of four. + if ( ( IN.GroupIndex & 0x1B ) == 0 ) + { + float3 Src2 = LoadColor( IN.GroupIndex + 0x02 ); + float3 Src3 = LoadColor( IN.GroupIndex + 0x10 ); + float3 Src4 = LoadColor( IN.GroupIndex + 0x12 ); + Src1 = 0.25 * ( Src1 + Src2 + Src3 + Src4 ); + + OutMip3[IN.DispatchThreadID.xy / 4] = float4(1.0f, Src1); + StoreColor( IN.GroupIndex, Src1 ); + } + + if ( NumMipLevels == 3 ) + return; + + GroupMemoryBarrierWithGroupSync(); + + // This bit mask would be 111111 (X & Y multiples of 8), but only one + // thread fits that criteria. + if ( IN.GroupIndex == 0 ) + { + float3 Src2 = LoadColor( IN.GroupIndex + 0x04 ); + float3 Src3 = LoadColor( IN.GroupIndex + 0x20 ); + float3 Src4 = LoadColor( IN.GroupIndex + 0x24 ); + Src1 = 0.25 * ( Src1 + Src2 + Src3 + Src4 ); + + OutMip4[IN.DispatchThreadID.xy / 8] = float4(1.0f, Src1); + } +} +#else +static const char str_CSMain_mipgen_ayuv[] = +"#define BLOCK_SIZE 8\n" +"\n" +" // When reducing the size of a texture, it could be that downscaling the texture\n" +" // will result in a less than exactly 50% (1/2) of the original texture size.\n" +" // This happens if either the width, or the height (or both) dimensions of the texture\n" +" // are odd. For example, downscaling a 5x3 texture will result in a 2x1 texture which\n" +" // has a 60% reduction in the texture width and 66% reduction in the height.\n" +" // When this happens, we need to take more samples from the source texture to\n" +" // determine the pixel value in the destination texture.\n" +"\n" +"#define WIDTH_HEIGHT_EVEN 0 // Both the width and the height of the texture are even.\n" +"#define WIDTH_ODD_HEIGHT_EVEN 1 // The texture width is odd and the height is even.\n" +"#define WIDTH_EVEN_HEIGHT_ODD 2 // The texture width is even and teh height is odd.\n" +"#define WIDTH_HEIGHT_ODD 3 // Both the width and height of the texture are odd.\n" +"\n" +"struct ComputeShaderInput\n" +"{\n" +" uint3 GroupID : SV_GroupID; // 3D index of the thread group in the dispatch.\n" +" uint3 GroupThreadID : SV_GroupThreadID; // 3D index of local thread ID in a thread group.\n" +" uint3 DispatchThreadID : SV_DispatchThreadID; // 3D index of global thread ID in the dispatch.\n" +" uint GroupIndex : SV_GroupIndex; // Flattened local index of the thread within a thread group.\n" +"};\n" +"\n" +"cbuffer GenerateMipsCB : register( b0 )\n" +"{\n" +" uint SrcMipLevel; // Texture level of source mip\n" +" uint NumMipLevels; // Number of OutMips to write: [1-4]\n" +" uint SrcDimension; // Width and height of the source texture are even or odd.\n" +" uint padding;\n" +" float2 TexelSize; // 1.0 / OutMip1.Dimensions\n" +"}\n" +"\n" +"// Source mip map.\n" +"Texture2D SrcMip : register( t0 );\n" +"\n" +"// Write up to 4 mip map levels.\n" +"RWTexture2D OutMip1 : register( u0 );\n" +"RWTexture2D OutMip2 : register( u1 );\n" +"RWTexture2D OutMip3 : register( u2 );\n" +"RWTexture2D OutMip4 : register( u3 );\n" +"\n" +"// Linear clamp sampler.\n" +"SamplerState LinearClampSampler : register( s0 );\n" +"\n" +"// The reason for separating channels is to reduce bank conflicts in the\n" +"// local data memory controller. A large stride will cause more threads\n" +"// to collide on the same memory bank.\n" +"groupshared float gs_Y[64];\n" +"groupshared float gs_U[64];\n" +"groupshared float gs_V[64];\n" +"\n" +"void StoreColor( uint Index, float3 Color )\n" +"{\n" +" gs_Y[Index] = Color.x;\n" +" gs_U[Index] = Color.y;\n" +" gs_V[Index] = Color.z;\n" +"}\n" +"\n" +"float3 LoadColor( uint Index )\n" +"{\n" +" return float3( gs_Y[Index], gs_U[Index], gs_V[Index] );\n" +"}\n" +"\n" +"[numthreads( BLOCK_SIZE, BLOCK_SIZE, 1 )]\n" +"void ENTRY_POINT( ComputeShaderInput IN )\n" +"{\n" +" float3 Src1 = (float3)0;\n" +"\n" +" // One bilinear sample is insufficient when scaling down by more than 2x.\n" +" // You will slightly undersample in the case where the source dimension\n" +" // is odd. This is why it's a really good idea to only generate mips on\n" +" // power-of-two sized textures. Trying to handle the undersampling case\n" +" // will force this shader to be slower and more complicated as it will\n" +" // have to take more source texture samples.\n" +"\n" +" // Determine the path to use based on the dimension of the\n" +" // source texture.\n" +" // 0b00(0): Both width and height are even.\n" +" // 0b01(1): Width is odd, height is even.\n" +" // 0b10(2): Width is even, height is odd.\n" +" // 0b11(3): Both width and height are odd.\n" +" switch ( SrcDimension )\n" +" {\n" +" case WIDTH_HEIGHT_EVEN:\n" +" {\n" +" float2 UV = TexelSize * ( IN.DispatchThreadID.xy + 0.5 );\n" +"\n" +" Src1 = SrcMip.SampleLevel( LinearClampSampler, UV, SrcMipLevel ).yzw;\n" +" }\n" +" break;\n" +" case WIDTH_ODD_HEIGHT_EVEN:\n" +" {\n" +" // > 2:1 in X dimension\n" +" // Use 2 bilinear samples to guarantee we don't undersample when downsizing by more than 2x\n" +" // horizontally.\n" +" float2 UV1 = TexelSize * ( IN.DispatchThreadID.xy + float2( 0.25, 0.5 ) );\n" +" float2 Off = TexelSize * float2( 0.5, 0.0 );\n" +"\n" +" Src1 = 0.5 * ( SrcMip.SampleLevel( LinearClampSampler, UV1, SrcMipLevel ).yzw +\n" +" SrcMip.SampleLevel( LinearClampSampler, UV1 + Off, SrcMipLevel ).yzw );\n" +" }\n" +" break;\n" +" case WIDTH_EVEN_HEIGHT_ODD:\n" +" {\n" +" // > 2:1 in Y dimension\n" +" // Use 2 bilinear samples to guarantee we don't undersample when downsizing by more than 2x\n" +" // vertically.\n" +" float2 UV1 = TexelSize * ( IN.DispatchThreadID.xy + float2( 0.5, 0.25 ) );\n" +" float2 Off = TexelSize * float2( 0.0, 0.5 );\n" +"\n" +" Src1 = 0.5 * ( SrcMip.SampleLevel( LinearClampSampler, UV1, SrcMipLevel ).yzw +\n" +" SrcMip.SampleLevel( LinearClampSampler, UV1 + Off, SrcMipLevel ).yzw );\n" +" }\n" +" break;\n" +" case WIDTH_HEIGHT_ODD:\n" +" {\n" +" // > 2:1 in in both dimensions\n" +" // Use 4 bilinear samples to guarantee we don't undersample when downsizing by more than 2x\n" +" // in both directions.\n" +" float2 UV1 = TexelSize * ( IN.DispatchThreadID.xy + float2( 0.25, 0.25 ) );\n" +" float2 Off = TexelSize * 0.5;\n" +"\n" +" Src1 = SrcMip.SampleLevel( LinearClampSampler, UV1, SrcMipLevel ).yzw;\n" +" Src1 += SrcMip.SampleLevel( LinearClampSampler, UV1 + float2( Off.x, 0.0 ), SrcMipLevel ).yzw;\n" +" Src1 += SrcMip.SampleLevel( LinearClampSampler, UV1 + float2( 0.0, Off.y ), SrcMipLevel ).yzw;\n" +" Src1 += SrcMip.SampleLevel( LinearClampSampler, UV1 + float2( Off.x, Off.y ), SrcMipLevel ).yzw;\n" +" Src1 *= 0.25;\n" +" }\n" +" break;\n" +" }\n" +"\n" +" OutMip1[IN.DispatchThreadID.xy] = float4(1.0f, Src1);\n" +"\n" +" // A scalar (constant) branch can exit all threads coherently.\n" +" if ( NumMipLevels == 1 )\n" +" return;\n" +"\n" +" // Without lane swizzle operations, the only way to share data with other\n" +" // threads is through LDS.\n" +" StoreColor( IN.GroupIndex, Src1 );\n" +"\n" +" // This guarantees all LDS writes are complete and that all threads have\n" +" // executed all instructions so far (and therefore have issued their LDS\n" +" // write instructions.)\n" +" GroupMemoryBarrierWithGroupSync();\n" +"\n" +" // With low three bits for X and high three bits for Y, this bit mask\n" +" // (binary: 001001) checks that X and Y are even.\n" +" if ( ( IN.GroupIndex & 0x9 ) == 0 )\n" +" {\n" +" float3 Src2 = LoadColor( IN.GroupIndex + 0x01 );\n" +" float3 Src3 = LoadColor( IN.GroupIndex + 0x08 );\n" +" float3 Src4 = LoadColor( IN.GroupIndex + 0x09 );\n" +" Src1 = 0.25 * ( Src1 + Src2 + Src3 + Src4 );\n" +"\n" +" OutMip2[IN.DispatchThreadID.xy / 2] = float4(1.0f, Src1);\n" +" StoreColor( IN.GroupIndex, Src1 );\n" +" }\n" +"\n" +" if ( NumMipLevels == 2 )\n" +" return;\n" +"\n" +" GroupMemoryBarrierWithGroupSync();\n" +"\n" +" // This bit mask (binary: 011011) checks that X and Y are multiples of four.\n" +" if ( ( IN.GroupIndex & 0x1B ) == 0 )\n" +" {\n" +" float3 Src2 = LoadColor( IN.GroupIndex + 0x02 );\n" +" float3 Src3 = LoadColor( IN.GroupIndex + 0x10 );\n" +" float3 Src4 = LoadColor( IN.GroupIndex + 0x12 );\n" +" Src1 = 0.25 * ( Src1 + Src2 + Src3 + Src4 );\n" +"\n" +" OutMip3[IN.DispatchThreadID.xy / 4] = float4(1.0f, Src1);\n" +" StoreColor( IN.GroupIndex, Src1 );\n" +" }\n" +"\n" +" if ( NumMipLevels == 3 )\n" +" return;\n" +"\n" +" GroupMemoryBarrierWithGroupSync();\n" +"\n" +" // This bit mask would be 111111 (X & Y multiples of 8), but only one\n" +" // thread fits that criteria.\n" +" if ( IN.GroupIndex == 0 )\n" +" {\n" +" float3 Src2 = LoadColor( IN.GroupIndex + 0x04 );\n" +" float3 Src3 = LoadColor( IN.GroupIndex + 0x20 );\n" +" float3 Src4 = LoadColor( IN.GroupIndex + 0x24 );\n" +" Src1 = 0.25 * ( Src1 + Src2 + Src3 + Src4 );\n" +"\n" +" OutMip4[IN.DispatchThreadID.xy / 8] = float4(1.0f, Src1);\n" +" }\n" +"}\n"; +#endif diff --git a/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/plugin-hlsl/CSMain_mipgen_vuya.hlsl b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/plugin-hlsl/CSMain_mipgen_vuya.hlsl new file mode 100644 index 0000000000..b53e67b2cf --- /dev/null +++ b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/plugin-hlsl/CSMain_mipgen_vuya.hlsl @@ -0,0 +1,417 @@ +/** + * MIT License + * + * Copyright (c) 2018 Jeremiah van Oosten + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* Source: https://github.com/jpvanoosten/LearningDirectX12 */ + +#ifdef BUILDING_HLSL + +#define BLOCK_SIZE 8 + + // When reducing the size of a texture, it could be that downscaling the texture + // will result in a less than exactly 50% (1/2) of the original texture size. + // This happens if either the width, or the height (or both) dimensions of the texture + // are odd. For example, downscaling a 5x3 texture will result in a 2x1 texture which + // has a 60% reduction in the texture width and 66% reduction in the height. + // When this happens, we need to take more samples from the source texture to + // determine the pixel value in the destination texture. + +#define WIDTH_HEIGHT_EVEN 0 // Both the width and the height of the texture are even. +#define WIDTH_ODD_HEIGHT_EVEN 1 // The texture width is odd and the height is even. +#define WIDTH_EVEN_HEIGHT_ODD 2 // The texture width is even and teh height is odd. +#define WIDTH_HEIGHT_ODD 3 // Both the width and height of the texture are odd. + +struct ComputeShaderInput +{ + uint3 GroupID : SV_GroupID; // 3D index of the thread group in the dispatch. + uint3 GroupThreadID : SV_GroupThreadID; // 3D index of local thread ID in a thread group. + uint3 DispatchThreadID : SV_DispatchThreadID; // 3D index of global thread ID in the dispatch. + uint GroupIndex : SV_GroupIndex; // Flattened local index of the thread within a thread group. +}; + +cbuffer GenerateMipsCB : register( b0 ) +{ + uint SrcMipLevel; // Texture level of source mip + uint NumMipLevels; // Number of OutMips to write: [1-4] + uint SrcDimension; // Width and height of the source texture are even or odd. + uint padding; + float2 TexelSize; // 1.0 / OutMip1.Dimensions +} + +// Source mip map. +Texture2D SrcMip : register( t0 ); + +// Write up to 4 mip map levels. +RWTexture2D OutMip1 : register( u0 ); +RWTexture2D OutMip2 : register( u1 ); +RWTexture2D OutMip3 : register( u2 ); +RWTexture2D OutMip4 : register( u3 ); + +// Linear clamp sampler. +SamplerState LinearClampSampler : register( s0 ); + +// The reason for separating channels is to reduce bank conflicts in the +// local data memory controller. A large stride will cause more threads +// to collide on the same memory bank. +groupshared float gs_Y[64]; +groupshared float gs_U[64]; +groupshared float gs_V[64]; + +void StoreColor( uint Index, float3 Color ) +{ + gs_Y[Index] = Color.x; + gs_U[Index] = Color.y; + gs_V[Index] = Color.z; +} + +float3 LoadColor( uint Index ) +{ + return float3( gs_Y[Index], gs_U[Index], gs_V[Index] ); +} + +[numthreads( BLOCK_SIZE, BLOCK_SIZE, 1 )] +void ENTRY_POINT( ComputeShaderInput IN ) +{ + float3 Src1 = (float3)0; + + // One bilinear sample is insufficient when scaling down by more than 2x. + // You will slightly undersample in the case where the source dimension + // is odd. This is why it's a really good idea to only generate mips on + // power-of-two sized textures. Trying to handle the undersampling case + // will force this shader to be slower and more complicated as it will + // have to take more source texture samples. + + // Determine the path to use based on the dimension of the + // source texture. + // 0b00(0): Both width and height are even. + // 0b01(1): Width is odd, height is even. + // 0b10(2): Width is even, height is odd. + // 0b11(3): Both width and height are odd. + switch ( SrcDimension ) + { + case WIDTH_HEIGHT_EVEN: + { + float2 UV = TexelSize * ( IN.DispatchThreadID.xy + 0.5 ); + + Src1 = SrcMip.SampleLevel( LinearClampSampler, UV, SrcMipLevel ).xyz; + } + break; + case WIDTH_ODD_HEIGHT_EVEN: + { + // > 2:1 in X dimension + // Use 2 bilinear samples to guarantee we don't undersample when downsizing by more than 2x + // horizontally. + float2 UV1 = TexelSize * ( IN.DispatchThreadID.xy + float2( 0.25, 0.5 ) ); + float2 Off = TexelSize * float2( 0.5, 0.0 ); + + Src1 = 0.5 * ( SrcMip.SampleLevel( LinearClampSampler, UV1, SrcMipLevel ).xyz + + SrcMip.SampleLevel( LinearClampSampler, UV1 + Off, SrcMipLevel ).xyz ); + } + break; + case WIDTH_EVEN_HEIGHT_ODD: + { + // > 2:1 in Y dimension + // Use 2 bilinear samples to guarantee we don't undersample when downsizing by more than 2x + // vertically. + float2 UV1 = TexelSize * ( IN.DispatchThreadID.xy + float2( 0.5, 0.25 ) ); + float2 Off = TexelSize * float2( 0.0, 0.5 ); + + Src1 = 0.5 * ( SrcMip.SampleLevel( LinearClampSampler, UV1, SrcMipLevel ).xyz + + SrcMip.SampleLevel( LinearClampSampler, UV1 + Off, SrcMipLevel ).xyz ); + } + break; + case WIDTH_HEIGHT_ODD: + { + // > 2:1 in in both dimensions + // Use 4 bilinear samples to guarantee we don't undersample when downsizing by more than 2x + // in both directions. + float2 UV1 = TexelSize * ( IN.DispatchThreadID.xy + float2( 0.25, 0.25 ) ); + float2 Off = TexelSize * 0.5; + + Src1 = SrcMip.SampleLevel( LinearClampSampler, UV1, SrcMipLevel ).xyz; + Src1 += SrcMip.SampleLevel( LinearClampSampler, UV1 + float2( Off.x, 0.0 ), SrcMipLevel ).xyz; + Src1 += SrcMip.SampleLevel( LinearClampSampler, UV1 + float2( 0.0, Off.y ), SrcMipLevel ).xyz; + Src1 += SrcMip.SampleLevel( LinearClampSampler, UV1 + float2( Off.x, Off.y ), SrcMipLevel ).xyz; + Src1 *= 0.25; + } + break; + } + + OutMip1[IN.DispatchThreadID.xy] = float4(Src1, 1.0f); + + // A scalar (constant) branch can exit all threads coherently. + if ( NumMipLevels == 1 ) + return; + + // Without lane swizzle operations, the only way to share data with other + // threads is through LDS. + StoreColor( IN.GroupIndex, Src1 ); + + // This guarantees all LDS writes are complete and that all threads have + // executed all instructions so far (and therefore have issued their LDS + // write instructions.) + GroupMemoryBarrierWithGroupSync(); + + // With low three bits for X and high three bits for Y, this bit mask + // (binary: 001001) checks that X and Y are even. + if ( ( IN.GroupIndex & 0x9 ) == 0 ) + { + float3 Src2 = LoadColor( IN.GroupIndex + 0x01 ); + float3 Src3 = LoadColor( IN.GroupIndex + 0x08 ); + float3 Src4 = LoadColor( IN.GroupIndex + 0x09 ); + Src1 = 0.25 * ( Src1 + Src2 + Src3 + Src4 ); + + OutMip2[IN.DispatchThreadID.xy / 2] = float4(Src1, 1.0f); + StoreColor( IN.GroupIndex, Src1 ); + } + + if ( NumMipLevels == 2 ) + return; + + GroupMemoryBarrierWithGroupSync(); + + // This bit mask (binary: 011011) checks that X and Y are multiples of four. + if ( ( IN.GroupIndex & 0x1B ) == 0 ) + { + float3 Src2 = LoadColor( IN.GroupIndex + 0x02 ); + float3 Src3 = LoadColor( IN.GroupIndex + 0x10 ); + float3 Src4 = LoadColor( IN.GroupIndex + 0x12 ); + Src1 = 0.25 * ( Src1 + Src2 + Src3 + Src4 ); + + OutMip3[IN.DispatchThreadID.xy / 4] = float4(Src1, 1.0f); + StoreColor( IN.GroupIndex, Src1 ); + } + + if ( NumMipLevels == 3 ) + return; + + GroupMemoryBarrierWithGroupSync(); + + // This bit mask would be 111111 (X & Y multiples of 8), but only one + // thread fits that criteria. + if ( IN.GroupIndex == 0 ) + { + float3 Src2 = LoadColor( IN.GroupIndex + 0x04 ); + float3 Src3 = LoadColor( IN.GroupIndex + 0x20 ); + float3 Src4 = LoadColor( IN.GroupIndex + 0x24 ); + Src1 = 0.25 * ( Src1 + Src2 + Src3 + Src4 ); + + OutMip4[IN.DispatchThreadID.xy / 8] = float4(Src1, 1.0f); + } +} +#else +static const char str_CSMain_mipgen_vuya[] = +"#define BLOCK_SIZE 8\n" +"\n" +" // When reducing the size of a texture, it could be that downscaling the texture\n" +" // will result in a less than exactly 50% (1/2) of the original texture size.\n" +" // This happens if either the width, or the height (or both) dimensions of the texture\n" +" // are odd. For example, downscaling a 5x3 texture will result in a 2x1 texture which\n" +" // has a 60% reduction in the texture width and 66% reduction in the height.\n" +" // When this happens, we need to take more samples from the source texture to\n" +" // determine the pixel value in the destination texture.\n" +"\n" +"#define WIDTH_HEIGHT_EVEN 0 // Both the width and the height of the texture are even.\n" +"#define WIDTH_ODD_HEIGHT_EVEN 1 // The texture width is odd and the height is even.\n" +"#define WIDTH_EVEN_HEIGHT_ODD 2 // The texture width is even and teh height is odd.\n" +"#define WIDTH_HEIGHT_ODD 3 // Both the width and height of the texture are odd.\n" +"\n" +"struct ComputeShaderInput\n" +"{\n" +" uint3 GroupID : SV_GroupID; // 3D index of the thread group in the dispatch.\n" +" uint3 GroupThreadID : SV_GroupThreadID; // 3D index of local thread ID in a thread group.\n" +" uint3 DispatchThreadID : SV_DispatchThreadID; // 3D index of global thread ID in the dispatch.\n" +" uint GroupIndex : SV_GroupIndex; // Flattened local index of the thread within a thread group.\n" +"};\n" +"\n" +"cbuffer GenerateMipsCB : register( b0 )\n" +"{\n" +" uint SrcMipLevel; // Texture level of source mip\n" +" uint NumMipLevels; // Number of OutMips to write: [1-4]\n" +" uint SrcDimension; // Width and height of the source texture are even or odd.\n" +" uint padding;\n" +" float2 TexelSize; // 1.0 / OutMip1.Dimensions\n" +"}\n" +"\n" +"// Source mip map.\n" +"Texture2D SrcMip : register( t0 );\n" +"\n" +"// Write up to 4 mip map levels.\n" +"RWTexture2D OutMip1 : register( u0 );\n" +"RWTexture2D OutMip2 : register( u1 );\n" +"RWTexture2D OutMip3 : register( u2 );\n" +"RWTexture2D OutMip4 : register( u3 );\n" +"\n" +"// Linear clamp sampler.\n" +"SamplerState LinearClampSampler : register( s0 );\n" +"\n" +"// The reason for separating channels is to reduce bank conflicts in the\n" +"// local data memory controller. A large stride will cause more threads\n" +"// to collide on the same memory bank.\n" +"groupshared float gs_Y[64];\n" +"groupshared float gs_U[64];\n" +"groupshared float gs_V[64];\n" +"\n" +"void StoreColor( uint Index, float3 Color )\n" +"{\n" +" gs_Y[Index] = Color.x;\n" +" gs_U[Index] = Color.y;\n" +" gs_V[Index] = Color.z;\n" +"}\n" +"\n" +"float3 LoadColor( uint Index )\n" +"{\n" +" return float3( gs_Y[Index], gs_U[Index], gs_V[Index] );\n" +"}\n" +"\n" +"[numthreads( BLOCK_SIZE, BLOCK_SIZE, 1 )]\n" +"void ENTRY_POINT( ComputeShaderInput IN )\n" +"{\n" +" float3 Src1 = (float3)0;\n" +"\n" +" // One bilinear sample is insufficient when scaling down by more than 2x.\n" +" // You will slightly undersample in the case where the source dimension\n" +" // is odd. This is why it's a really good idea to only generate mips on\n" +" // power-of-two sized textures. Trying to handle the undersampling case\n" +" // will force this shader to be slower and more complicated as it will\n" +" // have to take more source texture samples.\n" +"\n" +" // Determine the path to use based on the dimension of the\n" +" // source texture.\n" +" // 0b00(0): Both width and height are even.\n" +" // 0b01(1): Width is odd, height is even.\n" +" // 0b10(2): Width is even, height is odd.\n" +" // 0b11(3): Both width and height are odd.\n" +" switch ( SrcDimension )\n" +" {\n" +" case WIDTH_HEIGHT_EVEN:\n" +" {\n" +" float2 UV = TexelSize * ( IN.DispatchThreadID.xy + 0.5 );\n" +"\n" +" Src1 = SrcMip.SampleLevel( LinearClampSampler, UV, SrcMipLevel ).xyz;\n" +" }\n" +" break;\n" +" case WIDTH_ODD_HEIGHT_EVEN:\n" +" {\n" +" // > 2:1 in X dimension\n" +" // Use 2 bilinear samples to guarantee we don't undersample when downsizing by more than 2x\n" +" // horizontally.\n" +" float2 UV1 = TexelSize * ( IN.DispatchThreadID.xy + float2( 0.25, 0.5 ) );\n" +" float2 Off = TexelSize * float2( 0.5, 0.0 );\n" +"\n" +" Src1 = 0.5 * ( SrcMip.SampleLevel( LinearClampSampler, UV1, SrcMipLevel ).xyz +\n" +" SrcMip.SampleLevel( LinearClampSampler, UV1 + Off, SrcMipLevel ).xyz );\n" +" }\n" +" break;\n" +" case WIDTH_EVEN_HEIGHT_ODD:\n" +" {\n" +" // > 2:1 in Y dimension\n" +" // Use 2 bilinear samples to guarantee we don't undersample when downsizing by more than 2x\n" +" // vertically.\n" +" float2 UV1 = TexelSize * ( IN.DispatchThreadID.xy + float2( 0.5, 0.25 ) );\n" +" float2 Off = TexelSize * float2( 0.0, 0.5 );\n" +"\n" +" Src1 = 0.5 * ( SrcMip.SampleLevel( LinearClampSampler, UV1, SrcMipLevel ).xyz +\n" +" SrcMip.SampleLevel( LinearClampSampler, UV1 + Off, SrcMipLevel ).xyz );\n" +" }\n" +" break;\n" +" case WIDTH_HEIGHT_ODD:\n" +" {\n" +" // > 2:1 in in both dimensions\n" +" // Use 4 bilinear samples to guarantee we don't undersample when downsizing by more than 2x\n" +" // in both directions.\n" +" float2 UV1 = TexelSize * ( IN.DispatchThreadID.xy + float2( 0.25, 0.25 ) );\n" +" float2 Off = TexelSize * 0.5;\n" +"\n" +" Src1 = SrcMip.SampleLevel( LinearClampSampler, UV1, SrcMipLevel ).xyz;\n" +" Src1 += SrcMip.SampleLevel( LinearClampSampler, UV1 + float2( Off.x, 0.0 ), SrcMipLevel ).xyz;\n" +" Src1 += SrcMip.SampleLevel( LinearClampSampler, UV1 + float2( 0.0, Off.y ), SrcMipLevel ).xyz;\n" +" Src1 += SrcMip.SampleLevel( LinearClampSampler, UV1 + float2( Off.x, Off.y ), SrcMipLevel ).xyz;\n" +" Src1 *= 0.25;\n" +" }\n" +" break;\n" +" }\n" +"\n" +" OutMip1[IN.DispatchThreadID.xy] = float4(Src1, 1.0f);\n" +"\n" +" // A scalar (constant) branch can exit all threads coherently.\n" +" if ( NumMipLevels == 1 )\n" +" return;\n" +"\n" +" // Without lane swizzle operations, the only way to share data with other\n" +" // threads is through LDS.\n" +" StoreColor( IN.GroupIndex, Src1 );\n" +"\n" +" // This guarantees all LDS writes are complete and that all threads have\n" +" // executed all instructions so far (and therefore have issued their LDS\n" +" // write instructions.)\n" +" GroupMemoryBarrierWithGroupSync();\n" +"\n" +" // With low three bits for X and high three bits for Y, this bit mask\n" +" // (binary: 001001) checks that X and Y are even.\n" +" if ( ( IN.GroupIndex & 0x9 ) == 0 )\n" +" {\n" +" float3 Src2 = LoadColor( IN.GroupIndex + 0x01 );\n" +" float3 Src3 = LoadColor( IN.GroupIndex + 0x08 );\n" +" float3 Src4 = LoadColor( IN.GroupIndex + 0x09 );\n" +" Src1 = 0.25 * ( Src1 + Src2 + Src3 + Src4 );\n" +"\n" +" OutMip2[IN.DispatchThreadID.xy / 2] = float4(Src1, 1.0f);\n" +" StoreColor( IN.GroupIndex, Src1 );\n" +" }\n" +"\n" +" if ( NumMipLevels == 2 )\n" +" return;\n" +"\n" +" GroupMemoryBarrierWithGroupSync();\n" +"\n" +" // This bit mask (binary: 011011) checks that X and Y are multiples of four.\n" +" if ( ( IN.GroupIndex & 0x1B ) == 0 )\n" +" {\n" +" float3 Src2 = LoadColor( IN.GroupIndex + 0x02 );\n" +" float3 Src3 = LoadColor( IN.GroupIndex + 0x10 );\n" +" float3 Src4 = LoadColor( IN.GroupIndex + 0x12 );\n" +" Src1 = 0.25 * ( Src1 + Src2 + Src3 + Src4 );\n" +"\n" +" OutMip3[IN.DispatchThreadID.xy / 4] = float4(Src1, 1.0f);\n" +" StoreColor( IN.GroupIndex, Src1 );\n" +" }\n" +"\n" +" if ( NumMipLevels == 3 )\n" +" return;\n" +"\n" +" GroupMemoryBarrierWithGroupSync();\n" +"\n" +" // This bit mask would be 111111 (X & Y multiples of 8), but only one\n" +" // thread fits that criteria.\n" +" if ( IN.GroupIndex == 0 )\n" +" {\n" +" float3 Src2 = LoadColor( IN.GroupIndex + 0x04 );\n" +" float3 Src3 = LoadColor( IN.GroupIndex + 0x20 );\n" +" float3 Src4 = LoadColor( IN.GroupIndex + 0x24 );\n" +" Src1 = 0.25 * ( Src1 + Src2 + Src3 + Src4 );\n" +"\n" +" OutMip4[IN.DispatchThreadID.xy / 8] = float4(Src1, 1.0f);\n" +" }\n" +"}\n"; +#endif diff --git a/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/plugin-hlsl/hlsl.h b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/plugin-hlsl/hlsl.h index acb9c7fce0..59aed914d2 100644 --- a/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/plugin-hlsl/hlsl.h +++ b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/plugin-hlsl/hlsl.h @@ -33,6 +33,8 @@ #include "VSMain_coord.hlsl" #include "VSMain_pos.hlsl" #include "CSMain_mipgen.hlsl" +#include "CSMain_mipgen_vuya.hlsl" +#include "CSMain_mipgen_ayuv.hlsl" #include "CSMain_yadif_1.hlsl" #include "CSMain_yadif_1_10.hlsl" #include "CSMain_yadif_1_12.hlsl" diff --git a/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/plugin-hlsl/meson.build b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/plugin-hlsl/meson.build index 33f69b3015..38550d370f 100644 --- a/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/plugin-hlsl/meson.build +++ b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/plugin-hlsl/meson.build @@ -13,6 +13,8 @@ hlsl_sources = [ ['VSMain_coord', 'vs'], ['VSMain_pos', 'vs'], ['CSMain_mipgen', 'cs'], + ['CSMain_mipgen_vuya', 'cs'], + ['CSMain_mipgen_ayuv', 'cs'], ['CSMain_yadif_1_10', 'cs'], ['CSMain_yadif_1_12', 'cs'], ['CSMain_yadif_1', 'cs'], diff --git a/subprojects/gst-plugins-bad/sys/d3d12/gstd3d12mipmapping.cpp b/subprojects/gst-plugins-bad/sys/d3d12/gstd3d12mipmapping.cpp index 2bcfce50d7..a79aaf6a05 100644 --- a/subprojects/gst-plugins-bad/sys/d3d12/gstd3d12mipmapping.cpp +++ b/subprojects/gst-plugins-bad/sys/d3d12/gstd3d12mipmapping.cpp @@ -901,7 +901,16 @@ gst_d3d12_mip_mapping_set_info (GstD3D12BaseFilter * filter, return FALSE; } - ctx->gen = gst_d3d12_mip_gen_new (filter->device); + GstD3DPluginCS cs_type = GST_D3D_PLUGIN_CS_MIP_GEN; + if (!GST_VIDEO_INFO_HAS_ALPHA (in_info)) { + GST_DEBUG_OBJECT (self, "Use VUYA shader"); + if (GST_VIDEO_INFO_FORMAT (out_info) == GST_VIDEO_FORMAT_AYUV64) + cs_type = GST_D3D_PLUGIN_CS_MIP_GEN_AYUV; + else + cs_type = GST_D3D_PLUGIN_CS_MIP_GEN_VUYA; + } + + ctx->gen = gst_d3d12_mip_gen_new (filter->device, cs_type); if (!ctx->gen) { GST_ERROR_OBJECT (self, "Couldn't create mip generator"); return FALSE;