mirror of
https://gitlab.freedesktop.org/gstreamer/gstreamer.git
synced 2024-12-30 12:10:37 +00:00
cef201734c
Adding a new element for texture conversion from single mip level texture to mipmapping enabled RGBA texture Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/7555>
421 lines
17 KiB
HLSL
421 lines
17 KiB
HLSL
/**
|
|
* MIT License
|
|
*
|
|
* Copyright (c) 2018 Jeremiah van Oosten
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
* of this software and associated documentation files (the "Software"), to deal
|
|
* in the Software without restriction, including without limitation the rights
|
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
* copies of the Software, and to permit persons to whom the Software is
|
|
* furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included in all
|
|
* copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
* SOFTWARE.
|
|
*/
|
|
|
|
/* Source: https://github.com/jpvanoosten/LearningDirectX12 */
|
|
|
|
#ifdef BUILDING_HLSL
|
|
|
|
#define BLOCK_SIZE 8
|
|
|
|
// When reducing the size of a texture, it could be that downscaling the texture
|
|
// will result in a less than exactly 50% (1/2) of the original texture size.
|
|
// This happens if either the width, or the height (or both) dimensions of the texture
|
|
// are odd. For example, downscaling a 5x3 texture will result in a 2x1 texture which
|
|
// has a 60% reduction in the texture width and 66% reduction in the height.
|
|
// When this happens, we need to take more samples from the source texture to
|
|
// determine the pixel value in the destination texture.
|
|
|
|
#define WIDTH_HEIGHT_EVEN 0 // Both the width and the height of the texture are even.
|
|
#define WIDTH_ODD_HEIGHT_EVEN 1 // The texture width is odd and the height is even.
|
|
#define WIDTH_EVEN_HEIGHT_ODD 2 // The texture width is even and teh height is odd.
|
|
#define WIDTH_HEIGHT_ODD 3 // Both the width and height of the texture are odd.
|
|
|
|
struct ComputeShaderInput
|
|
{
|
|
uint3 GroupID : SV_GroupID; // 3D index of the thread group in the dispatch.
|
|
uint3 GroupThreadID : SV_GroupThreadID; // 3D index of local thread ID in a thread group.
|
|
uint3 DispatchThreadID : SV_DispatchThreadID; // 3D index of global thread ID in the dispatch.
|
|
uint GroupIndex : SV_GroupIndex; // Flattened local index of the thread within a thread group.
|
|
};
|
|
|
|
cbuffer GenerateMipsCB : register( b0 )
|
|
{
|
|
uint SrcMipLevel; // Texture level of source mip
|
|
uint NumMipLevels; // Number of OutMips to write: [1-4]
|
|
uint SrcDimension; // Width and height of the source texture are even or odd.
|
|
uint padding;
|
|
float2 TexelSize; // 1.0 / OutMip1.Dimensions
|
|
}
|
|
|
|
// Source mip map.
|
|
Texture2D<float4> SrcMip : register( t0 );
|
|
|
|
// Write up to 4 mip map levels.
|
|
RWTexture2D<float4> OutMip1 : register( u0 );
|
|
RWTexture2D<float4> OutMip2 : register( u1 );
|
|
RWTexture2D<float4> OutMip3 : register( u2 );
|
|
RWTexture2D<float4> OutMip4 : register( u3 );
|
|
|
|
// Linear clamp sampler.
|
|
SamplerState LinearClampSampler : register( s0 );
|
|
|
|
// The reason for separating channels is to reduce bank conflicts in the
|
|
// local data memory controller. A large stride will cause more threads
|
|
// to collide on the same memory bank.
|
|
groupshared float gs_R[64];
|
|
groupshared float gs_G[64];
|
|
groupshared float gs_B[64];
|
|
groupshared float gs_A[64];
|
|
|
|
void StoreColor( uint Index, float4 Color )
|
|
{
|
|
gs_R[Index] = Color.r;
|
|
gs_G[Index] = Color.g;
|
|
gs_B[Index] = Color.b;
|
|
gs_A[Index] = Color.a;
|
|
}
|
|
|
|
float4 LoadColor( uint Index )
|
|
{
|
|
return float4( gs_R[Index], gs_G[Index], gs_B[Index], gs_A[Index] );
|
|
}
|
|
|
|
[numthreads( BLOCK_SIZE, BLOCK_SIZE, 1 )]
|
|
void ENTRY_POINT( ComputeShaderInput IN )
|
|
{
|
|
float4 Src1 = (float4)0;
|
|
|
|
// One bilinear sample is insufficient when scaling down by more than 2x.
|
|
// You will slightly undersample in the case where the source dimension
|
|
// is odd. This is why it's a really good idea to only generate mips on
|
|
// power-of-two sized textures. Trying to handle the undersampling case
|
|
// will force this shader to be slower and more complicated as it will
|
|
// have to take more source texture samples.
|
|
|
|
// Determine the path to use based on the dimension of the
|
|
// source texture.
|
|
// 0b00(0): Both width and height are even.
|
|
// 0b01(1): Width is odd, height is even.
|
|
// 0b10(2): Width is even, height is odd.
|
|
// 0b11(3): Both width and height are odd.
|
|
switch ( SrcDimension )
|
|
{
|
|
case WIDTH_HEIGHT_EVEN:
|
|
{
|
|
float2 UV = TexelSize * ( IN.DispatchThreadID.xy + 0.5 );
|
|
|
|
Src1 = SrcMip.SampleLevel( LinearClampSampler, UV, SrcMipLevel );
|
|
}
|
|
break;
|
|
case WIDTH_ODD_HEIGHT_EVEN:
|
|
{
|
|
// > 2:1 in X dimension
|
|
// Use 2 bilinear samples to guarantee we don't undersample when downsizing by more than 2x
|
|
// horizontally.
|
|
float2 UV1 = TexelSize * ( IN.DispatchThreadID.xy + float2( 0.25, 0.5 ) );
|
|
float2 Off = TexelSize * float2( 0.5, 0.0 );
|
|
|
|
Src1 = 0.5 * ( SrcMip.SampleLevel( LinearClampSampler, UV1, SrcMipLevel ) +
|
|
SrcMip.SampleLevel( LinearClampSampler, UV1 + Off, SrcMipLevel ) );
|
|
}
|
|
break;
|
|
case WIDTH_EVEN_HEIGHT_ODD:
|
|
{
|
|
// > 2:1 in Y dimension
|
|
// Use 2 bilinear samples to guarantee we don't undersample when downsizing by more than 2x
|
|
// vertically.
|
|
float2 UV1 = TexelSize * ( IN.DispatchThreadID.xy + float2( 0.5, 0.25 ) );
|
|
float2 Off = TexelSize * float2( 0.0, 0.5 );
|
|
|
|
Src1 = 0.5 * ( SrcMip.SampleLevel( LinearClampSampler, UV1, SrcMipLevel ) +
|
|
SrcMip.SampleLevel( LinearClampSampler, UV1 + Off, SrcMipLevel ) );
|
|
}
|
|
break;
|
|
case WIDTH_HEIGHT_ODD:
|
|
{
|
|
// > 2:1 in in both dimensions
|
|
// Use 4 bilinear samples to guarantee we don't undersample when downsizing by more than 2x
|
|
// in both directions.
|
|
float2 UV1 = TexelSize * ( IN.DispatchThreadID.xy + float2( 0.25, 0.25 ) );
|
|
float2 Off = TexelSize * 0.5;
|
|
|
|
Src1 = SrcMip.SampleLevel( LinearClampSampler, UV1, SrcMipLevel );
|
|
Src1 += SrcMip.SampleLevel( LinearClampSampler, UV1 + float2( Off.x, 0.0 ), SrcMipLevel );
|
|
Src1 += SrcMip.SampleLevel( LinearClampSampler, UV1 + float2( 0.0, Off.y ), SrcMipLevel );
|
|
Src1 += SrcMip.SampleLevel( LinearClampSampler, UV1 + float2( Off.x, Off.y ), SrcMipLevel );
|
|
Src1 *= 0.25;
|
|
}
|
|
break;
|
|
}
|
|
|
|
OutMip1[IN.DispatchThreadID.xy] = Src1;
|
|
|
|
// A scalar (constant) branch can exit all threads coherently.
|
|
if ( NumMipLevels == 1 )
|
|
return;
|
|
|
|
// Without lane swizzle operations, the only way to share data with other
|
|
// threads is through LDS.
|
|
StoreColor( IN.GroupIndex, Src1 );
|
|
|
|
// This guarantees all LDS writes are complete and that all threads have
|
|
// executed all instructions so far (and therefore have issued their LDS
|
|
// write instructions.)
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
// With low three bits for X and high three bits for Y, this bit mask
|
|
// (binary: 001001) checks that X and Y are even.
|
|
if ( ( IN.GroupIndex & 0x9 ) == 0 )
|
|
{
|
|
float4 Src2 = LoadColor( IN.GroupIndex + 0x01 );
|
|
float4 Src3 = LoadColor( IN.GroupIndex + 0x08 );
|
|
float4 Src4 = LoadColor( IN.GroupIndex + 0x09 );
|
|
Src1 = 0.25 * ( Src1 + Src2 + Src3 + Src4 );
|
|
|
|
OutMip2[IN.DispatchThreadID.xy / 2] = Src1;
|
|
StoreColor( IN.GroupIndex, Src1 );
|
|
}
|
|
|
|
if ( NumMipLevels == 2 )
|
|
return;
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
// This bit mask (binary: 011011) checks that X and Y are multiples of four.
|
|
if ( ( IN.GroupIndex & 0x1B ) == 0 )
|
|
{
|
|
float4 Src2 = LoadColor( IN.GroupIndex + 0x02 );
|
|
float4 Src3 = LoadColor( IN.GroupIndex + 0x10 );
|
|
float4 Src4 = LoadColor( IN.GroupIndex + 0x12 );
|
|
Src1 = 0.25 * ( Src1 + Src2 + Src3 + Src4 );
|
|
|
|
OutMip3[IN.DispatchThreadID.xy / 4] = Src1;
|
|
StoreColor( IN.GroupIndex, Src1 );
|
|
}
|
|
|
|
if ( NumMipLevels == 3 )
|
|
return;
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
// This bit mask would be 111111 (X & Y multiples of 8), but only one
|
|
// thread fits that criteria.
|
|
if ( IN.GroupIndex == 0 )
|
|
{
|
|
float4 Src2 = LoadColor( IN.GroupIndex + 0x04 );
|
|
float4 Src3 = LoadColor( IN.GroupIndex + 0x20 );
|
|
float4 Src4 = LoadColor( IN.GroupIndex + 0x24 );
|
|
Src1 = 0.25 * ( Src1 + Src2 + Src3 + Src4 );
|
|
|
|
OutMip4[IN.DispatchThreadID.xy / 8] = Src1;
|
|
}
|
|
}
|
|
#else
|
|
static const char str_CSMain_mipgen[] =
|
|
"#define BLOCK_SIZE 8\n"
|
|
"\n"
|
|
" // When reducing the size of a texture, it could be that downscaling the texture\n"
|
|
" // will result in a less than exactly 50% (1/2) of the original texture size.\n"
|
|
" // This happens if either the width, or the height (or both) dimensions of the texture\n"
|
|
" // are odd. For example, downscaling a 5x3 texture will result in a 2x1 texture which\n"
|
|
" // has a 60% reduction in the texture width and 66% reduction in the height.\n"
|
|
" // When this happens, we need to take more samples from the source texture to\n"
|
|
" // determine the pixel value in the destination texture.\n"
|
|
"\n"
|
|
"#define WIDTH_HEIGHT_EVEN 0 // Both the width and the height of the texture are even.\n"
|
|
"#define WIDTH_ODD_HEIGHT_EVEN 1 // The texture width is odd and the height is even.\n"
|
|
"#define WIDTH_EVEN_HEIGHT_ODD 2 // The texture width is even and teh height is odd.\n"
|
|
"#define WIDTH_HEIGHT_ODD 3 // Both the width and height of the texture are odd.\n"
|
|
"\n"
|
|
"struct ComputeShaderInput\n"
|
|
"{\n"
|
|
" uint3 GroupID : SV_GroupID; // 3D index of the thread group in the dispatch.\n"
|
|
" uint3 GroupThreadID : SV_GroupThreadID; // 3D index of local thread ID in a thread group.\n"
|
|
" uint3 DispatchThreadID : SV_DispatchThreadID; // 3D index of global thread ID in the dispatch.\n"
|
|
" uint GroupIndex : SV_GroupIndex; // Flattened local index of the thread within a thread group.\n"
|
|
"};\n"
|
|
"\n"
|
|
"cbuffer GenerateMipsCB : register( b0 )\n"
|
|
"{\n"
|
|
" uint SrcMipLevel; // Texture level of source mip\n"
|
|
" uint NumMipLevels; // Number of OutMips to write: [1-4]\n"
|
|
" uint SrcDimension; // Width and height of the source texture are even or odd.\n"
|
|
" uint padding;\n"
|
|
" float2 TexelSize; // 1.0 / OutMip1.Dimensions\n"
|
|
"}\n"
|
|
"\n"
|
|
"// Source mip map.\n"
|
|
"Texture2D<float4> SrcMip : register( t0 );\n"
|
|
"\n"
|
|
"// Write up to 4 mip map levels.\n"
|
|
"RWTexture2D<float4> OutMip1 : register( u0 );\n"
|
|
"RWTexture2D<float4> OutMip2 : register( u1 );\n"
|
|
"RWTexture2D<float4> OutMip3 : register( u2 );\n"
|
|
"RWTexture2D<float4> OutMip4 : register( u3 );\n"
|
|
"\n"
|
|
"// Linear clamp sampler.\n"
|
|
"SamplerState LinearClampSampler : register( s0 );\n"
|
|
"\n"
|
|
"// The reason for separating channels is to reduce bank conflicts in the\n"
|
|
"// local data memory controller. A large stride will cause more threads\n"
|
|
"// to collide on the same memory bank.\n"
|
|
"groupshared float gs_R[64];\n"
|
|
"groupshared float gs_G[64];\n"
|
|
"groupshared float gs_B[64];\n"
|
|
"groupshared float gs_A[64];\n"
|
|
"\n"
|
|
"void StoreColor( uint Index, float4 Color )\n"
|
|
"{\n"
|
|
" gs_R[Index] = Color.r;\n"
|
|
" gs_G[Index] = Color.g;\n"
|
|
" gs_B[Index] = Color.b;\n"
|
|
" gs_A[Index] = Color.a;\n"
|
|
"}\n"
|
|
"\n"
|
|
"float4 LoadColor( uint Index )\n"
|
|
"{\n"
|
|
" return float4( gs_R[Index], gs_G[Index], gs_B[Index], gs_A[Index] );\n"
|
|
"}\n"
|
|
"\n"
|
|
"[numthreads( BLOCK_SIZE, BLOCK_SIZE, 1 )]\n"
|
|
"void ENTRY_POINT( ComputeShaderInput IN )\n"
|
|
"{\n"
|
|
" float4 Src1 = (float4)0;\n"
|
|
"\n"
|
|
" // One bilinear sample is insufficient when scaling down by more than 2x.\n"
|
|
" // You will slightly undersample in the case where the source dimension\n"
|
|
" // is odd. This is why it's a really good idea to only generate mips on\n"
|
|
" // power-of-two sized textures. Trying to handle the undersampling case\n"
|
|
" // will force this shader to be slower and more complicated as it will\n"
|
|
" // have to take more source texture samples.\n"
|
|
"\n"
|
|
" // Determine the path to use based on the dimension of the\n"
|
|
" // source texture.\n"
|
|
" // 0b00(0): Both width and height are even.\n"
|
|
" // 0b01(1): Width is odd, height is even.\n"
|
|
" // 0b10(2): Width is even, height is odd.\n"
|
|
" // 0b11(3): Both width and height are odd.\n"
|
|
" switch ( SrcDimension )\n"
|
|
" {\n"
|
|
" case WIDTH_HEIGHT_EVEN:\n"
|
|
" {\n"
|
|
" float2 UV = TexelSize * ( IN.DispatchThreadID.xy + 0.5 );\n"
|
|
"\n"
|
|
" Src1 = SrcMip.SampleLevel( LinearClampSampler, UV, SrcMipLevel );\n"
|
|
" }\n"
|
|
" break;\n"
|
|
" case WIDTH_ODD_HEIGHT_EVEN:\n"
|
|
" {\n"
|
|
" // > 2:1 in X dimension\n"
|
|
" // Use 2 bilinear samples to guarantee we don't undersample when downsizing by more than 2x\n"
|
|
" // horizontally.\n"
|
|
" float2 UV1 = TexelSize * ( IN.DispatchThreadID.xy + float2( 0.25, 0.5 ) );\n"
|
|
" float2 Off = TexelSize * float2( 0.5, 0.0 );\n"
|
|
"\n"
|
|
" Src1 = 0.5 * ( SrcMip.SampleLevel( LinearClampSampler, UV1, SrcMipLevel ) +\n"
|
|
" SrcMip.SampleLevel( LinearClampSampler, UV1 + Off, SrcMipLevel ) );\n"
|
|
" }\n"
|
|
" break;\n"
|
|
" case WIDTH_EVEN_HEIGHT_ODD:\n"
|
|
" {\n"
|
|
" // > 2:1 in Y dimension\n"
|
|
" // Use 2 bilinear samples to guarantee we don't undersample when downsizing by more than 2x\n"
|
|
" // vertically.\n"
|
|
" float2 UV1 = TexelSize * ( IN.DispatchThreadID.xy + float2( 0.5, 0.25 ) );\n"
|
|
" float2 Off = TexelSize * float2( 0.0, 0.5 );\n"
|
|
"\n"
|
|
" Src1 = 0.5 * ( SrcMip.SampleLevel( LinearClampSampler, UV1, SrcMipLevel ) +\n"
|
|
" SrcMip.SampleLevel( LinearClampSampler, UV1 + Off, SrcMipLevel ) );\n"
|
|
" }\n"
|
|
" break;\n"
|
|
" case WIDTH_HEIGHT_ODD:\n"
|
|
" {\n"
|
|
" // > 2:1 in in both dimensions\n"
|
|
" // Use 4 bilinear samples to guarantee we don't undersample when downsizing by more than 2x\n"
|
|
" // in both directions.\n"
|
|
" float2 UV1 = TexelSize * ( IN.DispatchThreadID.xy + float2( 0.25, 0.25 ) );\n"
|
|
" float2 Off = TexelSize * 0.5;\n"
|
|
"\n"
|
|
" Src1 = SrcMip.SampleLevel( LinearClampSampler, UV1, SrcMipLevel );\n"
|
|
" Src1 += SrcMip.SampleLevel( LinearClampSampler, UV1 + float2( Off.x, 0.0 ), SrcMipLevel );\n"
|
|
" Src1 += SrcMip.SampleLevel( LinearClampSampler, UV1 + float2( 0.0, Off.y ), SrcMipLevel );\n"
|
|
" Src1 += SrcMip.SampleLevel( LinearClampSampler, UV1 + float2( Off.x, Off.y ), SrcMipLevel );\n"
|
|
" Src1 *= 0.25;\n"
|
|
" }\n"
|
|
" break;\n"
|
|
" }\n"
|
|
"\n"
|
|
" OutMip1[IN.DispatchThreadID.xy] = Src1;\n"
|
|
"\n"
|
|
" // A scalar (constant) branch can exit all threads coherently.\n"
|
|
" if ( NumMipLevels == 1 )\n"
|
|
" return;\n"
|
|
"\n"
|
|
" // Without lane swizzle operations, the only way to share data with other\n"
|
|
" // threads is through LDS.\n"
|
|
" StoreColor( IN.GroupIndex, Src1 );\n"
|
|
"\n"
|
|
" // This guarantees all LDS writes are complete and that all threads have\n"
|
|
" // executed all instructions so far (and therefore have issued their LDS\n"
|
|
" // write instructions.)\n"
|
|
" GroupMemoryBarrierWithGroupSync();\n"
|
|
"\n"
|
|
" // With low three bits for X and high three bits for Y, this bit mask\n"
|
|
" // (binary: 001001) checks that X and Y are even.\n"
|
|
" if ( ( IN.GroupIndex & 0x9 ) == 0 )\n"
|
|
" {\n"
|
|
" float4 Src2 = LoadColor( IN.GroupIndex + 0x01 );\n"
|
|
" float4 Src3 = LoadColor( IN.GroupIndex + 0x08 );\n"
|
|
" float4 Src4 = LoadColor( IN.GroupIndex + 0x09 );\n"
|
|
" Src1 = 0.25 * ( Src1 + Src2 + Src3 + Src4 );\n"
|
|
"\n"
|
|
" OutMip2[IN.DispatchThreadID.xy / 2] = Src1;\n"
|
|
" StoreColor( IN.GroupIndex, Src1 );\n"
|
|
" }\n"
|
|
"\n"
|
|
" if ( NumMipLevels == 2 )\n"
|
|
" return;\n"
|
|
"\n"
|
|
" GroupMemoryBarrierWithGroupSync();\n"
|
|
"\n"
|
|
" // This bit mask (binary: 011011) checks that X and Y are multiples of four.\n"
|
|
" if ( ( IN.GroupIndex & 0x1B ) == 0 )\n"
|
|
" {\n"
|
|
" float4 Src2 = LoadColor( IN.GroupIndex + 0x02 );\n"
|
|
" float4 Src3 = LoadColor( IN.GroupIndex + 0x10 );\n"
|
|
" float4 Src4 = LoadColor( IN.GroupIndex + 0x12 );\n"
|
|
" Src1 = 0.25 * ( Src1 + Src2 + Src3 + Src4 );\n"
|
|
"\n"
|
|
" OutMip3[IN.DispatchThreadID.xy / 4] = Src1;\n"
|
|
" StoreColor( IN.GroupIndex, Src1 );\n"
|
|
" }\n"
|
|
"\n"
|
|
" if ( NumMipLevels == 3 )\n"
|
|
" return;\n"
|
|
"\n"
|
|
" GroupMemoryBarrierWithGroupSync();\n"
|
|
"\n"
|
|
" // This bit mask would be 111111 (X & Y multiples of 8), but only one\n"
|
|
" // thread fits that criteria.\n"
|
|
" if ( IN.GroupIndex == 0 )\n"
|
|
" {\n"
|
|
" float4 Src2 = LoadColor( IN.GroupIndex + 0x04 );\n"
|
|
" float4 Src3 = LoadColor( IN.GroupIndex + 0x20 );\n"
|
|
" float4 Src4 = LoadColor( IN.GroupIndex + 0x24 );\n"
|
|
" Src1 = 0.25 * ( Src1 + Src2 + Src3 + Src4 );\n"
|
|
"\n"
|
|
" OutMip4[IN.DispatchThreadID.xy / 8] = Src1;\n"
|
|
" }\n"
|
|
"}\n";
|
|
#endif
|