mirror of
https://gitlab.freedesktop.org/gstreamer/gstreamer.git
synced 2024-12-22 16:26:39 +00:00
d3d12: Add d3d12mipmapping element
Adding a new element for texture conversion from single mip level texture to mipmapping enabled RGBA texture Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/7555>
This commit is contained in:
parent
51e1834e81
commit
cef201734c
11 changed files with 1965 additions and 0 deletions
|
@ -41,12 +41,14 @@ using namespace Microsoft::WRL;
|
|||
#include "converter_hlsl_cs.h"
|
||||
#include "plugin_hlsl_ps.h"
|
||||
#include "plugin_hlsl_vs.h"
|
||||
#include "plugin_hlsl_cs.h"
|
||||
#else
|
||||
static std::unordered_map<std::string, std::pair<const BYTE *, SIZE_T>> g_converter_ps_table;
|
||||
static std::unordered_map<std::string, std::pair<const BYTE *, SIZE_T>> g_converter_vs_table;
|
||||
static std::unordered_map<std::string, std::pair<const BYTE *, SIZE_T>> g_converter_cs_table;
|
||||
static std::unordered_map<std::string, std::pair<const BYTE *, SIZE_T>> g_plugin_ps_table;
|
||||
static std::unordered_map<std::string, std::pair<const BYTE *, SIZE_T>> g_plugin_vs_table;
|
||||
static std::unordered_map<std::string, std::pair<const BYTE *, SIZE_T>> g_plugin_cs_table;
|
||||
#endif
|
||||
|
||||
static std::vector<std::pair<std::string, ID3DBlob *>> g_compiled_blobs;
|
||||
|
@ -81,6 +83,10 @@ static const ShaderItem g_vs_map[] = {
|
|||
{GST_D3D_PLUGIN_VS_POS, BUILD_SOURCE (VSMain_pos)},
|
||||
};
|
||||
|
||||
static const ShaderItem g_cs_map[] = {
|
||||
{GST_D3D_PLUGIN_CS_MIP_GEN, BUILD_SOURCE (CSMain_mipgen)},
|
||||
};
|
||||
|
||||
#undef BUILD_SOURCE
|
||||
|
||||
static const gchar * g_sm_map[] = {
|
||||
|
@ -195,6 +201,60 @@ gst_d3d_plugin_shader_get_ps_blob (GstD3DPluginPS type,
|
|||
return TRUE;
|
||||
}
|
||||
|
||||
gboolean
|
||||
gst_d3d_plugin_shader_get_cs_blob (GstD3DPluginCS type,
|
||||
GstD3DShaderModel shader_model, GstD3DShaderByteCode * byte_code)
|
||||
{
|
||||
g_return_val_if_fail (type < GST_D3D_PLUGIN_CS_LAST, FALSE);
|
||||
g_return_val_if_fail (shader_model < GST_D3D_SM_LAST, FALSE);
|
||||
g_return_val_if_fail (byte_code, FALSE);
|
||||
|
||||
static std::mutex cache_lock;
|
||||
|
||||
auto shader_name = std::string (g_cs_map[type].name) + "_" +
|
||||
std::string (g_sm_map[shader_model]);
|
||||
|
||||
std::lock_guard <std::mutex> lk (cache_lock);
|
||||
auto it = g_plugin_cs_table.find (shader_name);
|
||||
if (it != g_plugin_cs_table.end ()) {
|
||||
byte_code->byte_code = it->second.first;
|
||||
byte_code->byte_code_len = it->second.second;
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
auto target = std::string ("cs_") + g_sm_map[shader_model];
|
||||
|
||||
ID3DBlob *blob = nullptr;
|
||||
ComPtr<ID3DBlob> error_msg;
|
||||
|
||||
auto hr = gst_d3d_compile (g_cs_map[type].source, g_cs_map[type].source_size,
|
||||
nullptr, nullptr, nullptr, "ENTRY_POINT", target.c_str (), 0, 0,
|
||||
&blob, &error_msg);
|
||||
if (FAILED (hr)) {
|
||||
const gchar *err = nullptr;
|
||||
if (error_msg)
|
||||
err = (const gchar *) error_msg->GetBufferPointer ();
|
||||
|
||||
GST_ERROR ("Couldn't compile code, hr: 0x%x, error detail: %s, "
|
||||
"source code: \n%s", (guint) hr, GST_STR_NULL (err),
|
||||
g_cs_map[type].source);
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
byte_code->byte_code = blob->GetBufferPointer ();
|
||||
byte_code->byte_code_len = blob->GetBufferSize ();
|
||||
|
||||
g_plugin_cs_table[shader_name] = { (const BYTE *) blob->GetBufferPointer (),
|
||||
blob->GetBufferSize ()};
|
||||
|
||||
std::lock_guard <std::mutex> blk (g_blob_lock);
|
||||
g_compiled_blobs.push_back ({ shader_name, blob });
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
|
||||
gboolean
|
||||
gst_d3d_converter_shader_get_vs_blob (GstD3DShaderModel shader_model,
|
||||
GstD3DShaderByteCode * byte_code)
|
||||
|
|
|
@ -49,6 +49,12 @@ typedef enum
|
|||
GST_D3D_PLUGIN_VS_LAST,
|
||||
} GstD3DPluginVS;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
GST_D3D_PLUGIN_CS_MIP_GEN,
|
||||
|
||||
GST_D3D_PLUGIN_CS_LAST,
|
||||
} GstD3DPluginCS;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
|
@ -99,6 +105,11 @@ gboolean gst_d3d_plugin_shader_get_ps_blob (GstD3DPluginPS type,
|
|||
GstD3DShaderModel shader_model,
|
||||
GstD3DShaderByteCode * byte_code);
|
||||
|
||||
GST_D3D_SHADER_API
|
||||
gboolean gst_d3d_plugin_shader_get_cs_blob (GstD3DPluginCS type,
|
||||
GstD3DShaderModel shader_model,
|
||||
GstD3DShaderByteCode * byte_code);
|
||||
|
||||
GST_D3D_SHADER_API
|
||||
gboolean gst_d3d_converter_shader_get_vs_blob (GstD3DShaderModel shader_model,
|
||||
GstD3DShaderByteCode * byte_code);
|
||||
|
|
|
@ -0,0 +1,421 @@
|
|||
/**
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2018 Jeremiah van Oosten
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
/* Source: https://github.com/jpvanoosten/LearningDirectX12 */
|
||||
|
||||
#ifdef BUILDING_HLSL
|
||||
|
||||
#define BLOCK_SIZE 8
|
||||
|
||||
// When reducing the size of a texture, it could be that downscaling the texture
|
||||
// will result in a less than exactly 50% (1/2) of the original texture size.
|
||||
// This happens if either the width, or the height (or both) dimensions of the texture
|
||||
// are odd. For example, downscaling a 5x3 texture will result in a 2x1 texture which
|
||||
// has a 60% reduction in the texture width and 66% reduction in the height.
|
||||
// When this happens, we need to take more samples from the source texture to
|
||||
// determine the pixel value in the destination texture.
|
||||
|
||||
#define WIDTH_HEIGHT_EVEN 0 // Both the width and the height of the texture are even.
|
||||
#define WIDTH_ODD_HEIGHT_EVEN 1 // The texture width is odd and the height is even.
|
||||
#define WIDTH_EVEN_HEIGHT_ODD 2 // The texture width is even and teh height is odd.
|
||||
#define WIDTH_HEIGHT_ODD 3 // Both the width and height of the texture are odd.
|
||||
|
||||
struct ComputeShaderInput
|
||||
{
|
||||
uint3 GroupID : SV_GroupID; // 3D index of the thread group in the dispatch.
|
||||
uint3 GroupThreadID : SV_GroupThreadID; // 3D index of local thread ID in a thread group.
|
||||
uint3 DispatchThreadID : SV_DispatchThreadID; // 3D index of global thread ID in the dispatch.
|
||||
uint GroupIndex : SV_GroupIndex; // Flattened local index of the thread within a thread group.
|
||||
};
|
||||
|
||||
cbuffer GenerateMipsCB : register( b0 )
|
||||
{
|
||||
uint SrcMipLevel; // Texture level of source mip
|
||||
uint NumMipLevels; // Number of OutMips to write: [1-4]
|
||||
uint SrcDimension; // Width and height of the source texture are even or odd.
|
||||
uint padding;
|
||||
float2 TexelSize; // 1.0 / OutMip1.Dimensions
|
||||
}
|
||||
|
||||
// Source mip map.
|
||||
Texture2D<float4> SrcMip : register( t0 );
|
||||
|
||||
// Write up to 4 mip map levels.
|
||||
RWTexture2D<float4> OutMip1 : register( u0 );
|
||||
RWTexture2D<float4> OutMip2 : register( u1 );
|
||||
RWTexture2D<float4> OutMip3 : register( u2 );
|
||||
RWTexture2D<float4> OutMip4 : register( u3 );
|
||||
|
||||
// Linear clamp sampler.
|
||||
SamplerState LinearClampSampler : register( s0 );
|
||||
|
||||
// The reason for separating channels is to reduce bank conflicts in the
|
||||
// local data memory controller. A large stride will cause more threads
|
||||
// to collide on the same memory bank.
|
||||
groupshared float gs_R[64];
|
||||
groupshared float gs_G[64];
|
||||
groupshared float gs_B[64];
|
||||
groupshared float gs_A[64];
|
||||
|
||||
void StoreColor( uint Index, float4 Color )
|
||||
{
|
||||
gs_R[Index] = Color.r;
|
||||
gs_G[Index] = Color.g;
|
||||
gs_B[Index] = Color.b;
|
||||
gs_A[Index] = Color.a;
|
||||
}
|
||||
|
||||
float4 LoadColor( uint Index )
|
||||
{
|
||||
return float4( gs_R[Index], gs_G[Index], gs_B[Index], gs_A[Index] );
|
||||
}
|
||||
|
||||
[numthreads( BLOCK_SIZE, BLOCK_SIZE, 1 )]
|
||||
void ENTRY_POINT( ComputeShaderInput IN )
|
||||
{
|
||||
float4 Src1 = (float4)0;
|
||||
|
||||
// One bilinear sample is insufficient when scaling down by more than 2x.
|
||||
// You will slightly undersample in the case where the source dimension
|
||||
// is odd. This is why it's a really good idea to only generate mips on
|
||||
// power-of-two sized textures. Trying to handle the undersampling case
|
||||
// will force this shader to be slower and more complicated as it will
|
||||
// have to take more source texture samples.
|
||||
|
||||
// Determine the path to use based on the dimension of the
|
||||
// source texture.
|
||||
// 0b00(0): Both width and height are even.
|
||||
// 0b01(1): Width is odd, height is even.
|
||||
// 0b10(2): Width is even, height is odd.
|
||||
// 0b11(3): Both width and height are odd.
|
||||
switch ( SrcDimension )
|
||||
{
|
||||
case WIDTH_HEIGHT_EVEN:
|
||||
{
|
||||
float2 UV = TexelSize * ( IN.DispatchThreadID.xy + 0.5 );
|
||||
|
||||
Src1 = SrcMip.SampleLevel( LinearClampSampler, UV, SrcMipLevel );
|
||||
}
|
||||
break;
|
||||
case WIDTH_ODD_HEIGHT_EVEN:
|
||||
{
|
||||
// > 2:1 in X dimension
|
||||
// Use 2 bilinear samples to guarantee we don't undersample when downsizing by more than 2x
|
||||
// horizontally.
|
||||
float2 UV1 = TexelSize * ( IN.DispatchThreadID.xy + float2( 0.25, 0.5 ) );
|
||||
float2 Off = TexelSize * float2( 0.5, 0.0 );
|
||||
|
||||
Src1 = 0.5 * ( SrcMip.SampleLevel( LinearClampSampler, UV1, SrcMipLevel ) +
|
||||
SrcMip.SampleLevel( LinearClampSampler, UV1 + Off, SrcMipLevel ) );
|
||||
}
|
||||
break;
|
||||
case WIDTH_EVEN_HEIGHT_ODD:
|
||||
{
|
||||
// > 2:1 in Y dimension
|
||||
// Use 2 bilinear samples to guarantee we don't undersample when downsizing by more than 2x
|
||||
// vertically.
|
||||
float2 UV1 = TexelSize * ( IN.DispatchThreadID.xy + float2( 0.5, 0.25 ) );
|
||||
float2 Off = TexelSize * float2( 0.0, 0.5 );
|
||||
|
||||
Src1 = 0.5 * ( SrcMip.SampleLevel( LinearClampSampler, UV1, SrcMipLevel ) +
|
||||
SrcMip.SampleLevel( LinearClampSampler, UV1 + Off, SrcMipLevel ) );
|
||||
}
|
||||
break;
|
||||
case WIDTH_HEIGHT_ODD:
|
||||
{
|
||||
// > 2:1 in in both dimensions
|
||||
// Use 4 bilinear samples to guarantee we don't undersample when downsizing by more than 2x
|
||||
// in both directions.
|
||||
float2 UV1 = TexelSize * ( IN.DispatchThreadID.xy + float2( 0.25, 0.25 ) );
|
||||
float2 Off = TexelSize * 0.5;
|
||||
|
||||
Src1 = SrcMip.SampleLevel( LinearClampSampler, UV1, SrcMipLevel );
|
||||
Src1 += SrcMip.SampleLevel( LinearClampSampler, UV1 + float2( Off.x, 0.0 ), SrcMipLevel );
|
||||
Src1 += SrcMip.SampleLevel( LinearClampSampler, UV1 + float2( 0.0, Off.y ), SrcMipLevel );
|
||||
Src1 += SrcMip.SampleLevel( LinearClampSampler, UV1 + float2( Off.x, Off.y ), SrcMipLevel );
|
||||
Src1 *= 0.25;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
OutMip1[IN.DispatchThreadID.xy] = Src1;
|
||||
|
||||
// A scalar (constant) branch can exit all threads coherently.
|
||||
if ( NumMipLevels == 1 )
|
||||
return;
|
||||
|
||||
// Without lane swizzle operations, the only way to share data with other
|
||||
// threads is through LDS.
|
||||
StoreColor( IN.GroupIndex, Src1 );
|
||||
|
||||
// This guarantees all LDS writes are complete and that all threads have
|
||||
// executed all instructions so far (and therefore have issued their LDS
|
||||
// write instructions.)
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
|
||||
// With low three bits for X and high three bits for Y, this bit mask
|
||||
// (binary: 001001) checks that X and Y are even.
|
||||
if ( ( IN.GroupIndex & 0x9 ) == 0 )
|
||||
{
|
||||
float4 Src2 = LoadColor( IN.GroupIndex + 0x01 );
|
||||
float4 Src3 = LoadColor( IN.GroupIndex + 0x08 );
|
||||
float4 Src4 = LoadColor( IN.GroupIndex + 0x09 );
|
||||
Src1 = 0.25 * ( Src1 + Src2 + Src3 + Src4 );
|
||||
|
||||
OutMip2[IN.DispatchThreadID.xy / 2] = Src1;
|
||||
StoreColor( IN.GroupIndex, Src1 );
|
||||
}
|
||||
|
||||
if ( NumMipLevels == 2 )
|
||||
return;
|
||||
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
|
||||
// This bit mask (binary: 011011) checks that X and Y are multiples of four.
|
||||
if ( ( IN.GroupIndex & 0x1B ) == 0 )
|
||||
{
|
||||
float4 Src2 = LoadColor( IN.GroupIndex + 0x02 );
|
||||
float4 Src3 = LoadColor( IN.GroupIndex + 0x10 );
|
||||
float4 Src4 = LoadColor( IN.GroupIndex + 0x12 );
|
||||
Src1 = 0.25 * ( Src1 + Src2 + Src3 + Src4 );
|
||||
|
||||
OutMip3[IN.DispatchThreadID.xy / 4] = Src1;
|
||||
StoreColor( IN.GroupIndex, Src1 );
|
||||
}
|
||||
|
||||
if ( NumMipLevels == 3 )
|
||||
return;
|
||||
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
|
||||
// This bit mask would be 111111 (X & Y multiples of 8), but only one
|
||||
// thread fits that criteria.
|
||||
if ( IN.GroupIndex == 0 )
|
||||
{
|
||||
float4 Src2 = LoadColor( IN.GroupIndex + 0x04 );
|
||||
float4 Src3 = LoadColor( IN.GroupIndex + 0x20 );
|
||||
float4 Src4 = LoadColor( IN.GroupIndex + 0x24 );
|
||||
Src1 = 0.25 * ( Src1 + Src2 + Src3 + Src4 );
|
||||
|
||||
OutMip4[IN.DispatchThreadID.xy / 8] = Src1;
|
||||
}
|
||||
}
|
||||
#else
|
||||
static const char str_CSMain_mipgen[] =
|
||||
"#define BLOCK_SIZE 8\n"
|
||||
"\n"
|
||||
" // When reducing the size of a texture, it could be that downscaling the texture\n"
|
||||
" // will result in a less than exactly 50% (1/2) of the original texture size.\n"
|
||||
" // This happens if either the width, or the height (or both) dimensions of the texture\n"
|
||||
" // are odd. For example, downscaling a 5x3 texture will result in a 2x1 texture which\n"
|
||||
" // has a 60% reduction in the texture width and 66% reduction in the height.\n"
|
||||
" // When this happens, we need to take more samples from the source texture to\n"
|
||||
" // determine the pixel value in the destination texture.\n"
|
||||
"\n"
|
||||
"#define WIDTH_HEIGHT_EVEN 0 // Both the width and the height of the texture are even.\n"
|
||||
"#define WIDTH_ODD_HEIGHT_EVEN 1 // The texture width is odd and the height is even.\n"
|
||||
"#define WIDTH_EVEN_HEIGHT_ODD 2 // The texture width is even and teh height is odd.\n"
|
||||
"#define WIDTH_HEIGHT_ODD 3 // Both the width and height of the texture are odd.\n"
|
||||
"\n"
|
||||
"struct ComputeShaderInput\n"
|
||||
"{\n"
|
||||
" uint3 GroupID : SV_GroupID; // 3D index of the thread group in the dispatch.\n"
|
||||
" uint3 GroupThreadID : SV_GroupThreadID; // 3D index of local thread ID in a thread group.\n"
|
||||
" uint3 DispatchThreadID : SV_DispatchThreadID; // 3D index of global thread ID in the dispatch.\n"
|
||||
" uint GroupIndex : SV_GroupIndex; // Flattened local index of the thread within a thread group.\n"
|
||||
"};\n"
|
||||
"\n"
|
||||
"cbuffer GenerateMipsCB : register( b0 )\n"
|
||||
"{\n"
|
||||
" uint SrcMipLevel; // Texture level of source mip\n"
|
||||
" uint NumMipLevels; // Number of OutMips to write: [1-4]\n"
|
||||
" uint SrcDimension; // Width and height of the source texture are even or odd.\n"
|
||||
" uint padding;\n"
|
||||
" float2 TexelSize; // 1.0 / OutMip1.Dimensions\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"// Source mip map.\n"
|
||||
"Texture2D<float4> SrcMip : register( t0 );\n"
|
||||
"\n"
|
||||
"// Write up to 4 mip map levels.\n"
|
||||
"RWTexture2D<float4> OutMip1 : register( u0 );\n"
|
||||
"RWTexture2D<float4> OutMip2 : register( u1 );\n"
|
||||
"RWTexture2D<float4> OutMip3 : register( u2 );\n"
|
||||
"RWTexture2D<float4> OutMip4 : register( u3 );\n"
|
||||
"\n"
|
||||
"// Linear clamp sampler.\n"
|
||||
"SamplerState LinearClampSampler : register( s0 );\n"
|
||||
"\n"
|
||||
"// The reason for separating channels is to reduce bank conflicts in the\n"
|
||||
"// local data memory controller. A large stride will cause more threads\n"
|
||||
"// to collide on the same memory bank.\n"
|
||||
"groupshared float gs_R[64];\n"
|
||||
"groupshared float gs_G[64];\n"
|
||||
"groupshared float gs_B[64];\n"
|
||||
"groupshared float gs_A[64];\n"
|
||||
"\n"
|
||||
"void StoreColor( uint Index, float4 Color )\n"
|
||||
"{\n"
|
||||
" gs_R[Index] = Color.r;\n"
|
||||
" gs_G[Index] = Color.g;\n"
|
||||
" gs_B[Index] = Color.b;\n"
|
||||
" gs_A[Index] = Color.a;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"float4 LoadColor( uint Index )\n"
|
||||
"{\n"
|
||||
" return float4( gs_R[Index], gs_G[Index], gs_B[Index], gs_A[Index] );\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"[numthreads( BLOCK_SIZE, BLOCK_SIZE, 1 )]\n"
|
||||
"void ENTRY_POINT( ComputeShaderInput IN )\n"
|
||||
"{\n"
|
||||
" float4 Src1 = (float4)0;\n"
|
||||
"\n"
|
||||
" // One bilinear sample is insufficient when scaling down by more than 2x.\n"
|
||||
" // You will slightly undersample in the case where the source dimension\n"
|
||||
" // is odd. This is why it's a really good idea to only generate mips on\n"
|
||||
" // power-of-two sized textures. Trying to handle the undersampling case\n"
|
||||
" // will force this shader to be slower and more complicated as it will\n"
|
||||
" // have to take more source texture samples.\n"
|
||||
"\n"
|
||||
" // Determine the path to use based on the dimension of the\n"
|
||||
" // source texture.\n"
|
||||
" // 0b00(0): Both width and height are even.\n"
|
||||
" // 0b01(1): Width is odd, height is even.\n"
|
||||
" // 0b10(2): Width is even, height is odd.\n"
|
||||
" // 0b11(3): Both width and height are odd.\n"
|
||||
" switch ( SrcDimension )\n"
|
||||
" {\n"
|
||||
" case WIDTH_HEIGHT_EVEN:\n"
|
||||
" {\n"
|
||||
" float2 UV = TexelSize * ( IN.DispatchThreadID.xy + 0.5 );\n"
|
||||
"\n"
|
||||
" Src1 = SrcMip.SampleLevel( LinearClampSampler, UV, SrcMipLevel );\n"
|
||||
" }\n"
|
||||
" break;\n"
|
||||
" case WIDTH_ODD_HEIGHT_EVEN:\n"
|
||||
" {\n"
|
||||
" // > 2:1 in X dimension\n"
|
||||
" // Use 2 bilinear samples to guarantee we don't undersample when downsizing by more than 2x\n"
|
||||
" // horizontally.\n"
|
||||
" float2 UV1 = TexelSize * ( IN.DispatchThreadID.xy + float2( 0.25, 0.5 ) );\n"
|
||||
" float2 Off = TexelSize * float2( 0.5, 0.0 );\n"
|
||||
"\n"
|
||||
" Src1 = 0.5 * ( SrcMip.SampleLevel( LinearClampSampler, UV1, SrcMipLevel ) +\n"
|
||||
" SrcMip.SampleLevel( LinearClampSampler, UV1 + Off, SrcMipLevel ) );\n"
|
||||
" }\n"
|
||||
" break;\n"
|
||||
" case WIDTH_EVEN_HEIGHT_ODD:\n"
|
||||
" {\n"
|
||||
" // > 2:1 in Y dimension\n"
|
||||
" // Use 2 bilinear samples to guarantee we don't undersample when downsizing by more than 2x\n"
|
||||
" // vertically.\n"
|
||||
" float2 UV1 = TexelSize * ( IN.DispatchThreadID.xy + float2( 0.5, 0.25 ) );\n"
|
||||
" float2 Off = TexelSize * float2( 0.0, 0.5 );\n"
|
||||
"\n"
|
||||
" Src1 = 0.5 * ( SrcMip.SampleLevel( LinearClampSampler, UV1, SrcMipLevel ) +\n"
|
||||
" SrcMip.SampleLevel( LinearClampSampler, UV1 + Off, SrcMipLevel ) );\n"
|
||||
" }\n"
|
||||
" break;\n"
|
||||
" case WIDTH_HEIGHT_ODD:\n"
|
||||
" {\n"
|
||||
" // > 2:1 in in both dimensions\n"
|
||||
" // Use 4 bilinear samples to guarantee we don't undersample when downsizing by more than 2x\n"
|
||||
" // in both directions.\n"
|
||||
" float2 UV1 = TexelSize * ( IN.DispatchThreadID.xy + float2( 0.25, 0.25 ) );\n"
|
||||
" float2 Off = TexelSize * 0.5;\n"
|
||||
"\n"
|
||||
" Src1 = SrcMip.SampleLevel( LinearClampSampler, UV1, SrcMipLevel );\n"
|
||||
" Src1 += SrcMip.SampleLevel( LinearClampSampler, UV1 + float2( Off.x, 0.0 ), SrcMipLevel );\n"
|
||||
" Src1 += SrcMip.SampleLevel( LinearClampSampler, UV1 + float2( 0.0, Off.y ), SrcMipLevel );\n"
|
||||
" Src1 += SrcMip.SampleLevel( LinearClampSampler, UV1 + float2( Off.x, Off.y ), SrcMipLevel );\n"
|
||||
" Src1 *= 0.25;\n"
|
||||
" }\n"
|
||||
" break;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" OutMip1[IN.DispatchThreadID.xy] = Src1;\n"
|
||||
"\n"
|
||||
" // A scalar (constant) branch can exit all threads coherently.\n"
|
||||
" if ( NumMipLevels == 1 )\n"
|
||||
" return;\n"
|
||||
"\n"
|
||||
" // Without lane swizzle operations, the only way to share data with other\n"
|
||||
" // threads is through LDS.\n"
|
||||
" StoreColor( IN.GroupIndex, Src1 );\n"
|
||||
"\n"
|
||||
" // This guarantees all LDS writes are complete and that all threads have\n"
|
||||
" // executed all instructions so far (and therefore have issued their LDS\n"
|
||||
" // write instructions.)\n"
|
||||
" GroupMemoryBarrierWithGroupSync();\n"
|
||||
"\n"
|
||||
" // With low three bits for X and high three bits for Y, this bit mask\n"
|
||||
" // (binary: 001001) checks that X and Y are even.\n"
|
||||
" if ( ( IN.GroupIndex & 0x9 ) == 0 )\n"
|
||||
" {\n"
|
||||
" float4 Src2 = LoadColor( IN.GroupIndex + 0x01 );\n"
|
||||
" float4 Src3 = LoadColor( IN.GroupIndex + 0x08 );\n"
|
||||
" float4 Src4 = LoadColor( IN.GroupIndex + 0x09 );\n"
|
||||
" Src1 = 0.25 * ( Src1 + Src2 + Src3 + Src4 );\n"
|
||||
"\n"
|
||||
" OutMip2[IN.DispatchThreadID.xy / 2] = Src1;\n"
|
||||
" StoreColor( IN.GroupIndex, Src1 );\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" if ( NumMipLevels == 2 )\n"
|
||||
" return;\n"
|
||||
"\n"
|
||||
" GroupMemoryBarrierWithGroupSync();\n"
|
||||
"\n"
|
||||
" // This bit mask (binary: 011011) checks that X and Y are multiples of four.\n"
|
||||
" if ( ( IN.GroupIndex & 0x1B ) == 0 )\n"
|
||||
" {\n"
|
||||
" float4 Src2 = LoadColor( IN.GroupIndex + 0x02 );\n"
|
||||
" float4 Src3 = LoadColor( IN.GroupIndex + 0x10 );\n"
|
||||
" float4 Src4 = LoadColor( IN.GroupIndex + 0x12 );\n"
|
||||
" Src1 = 0.25 * ( Src1 + Src2 + Src3 + Src4 );\n"
|
||||
"\n"
|
||||
" OutMip3[IN.DispatchThreadID.xy / 4] = Src1;\n"
|
||||
" StoreColor( IN.GroupIndex, Src1 );\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" if ( NumMipLevels == 3 )\n"
|
||||
" return;\n"
|
||||
"\n"
|
||||
" GroupMemoryBarrierWithGroupSync();\n"
|
||||
"\n"
|
||||
" // This bit mask would be 111111 (X & Y multiples of 8), but only one\n"
|
||||
" // thread fits that criteria.\n"
|
||||
" if ( IN.GroupIndex == 0 )\n"
|
||||
" {\n"
|
||||
" float4 Src2 = LoadColor( IN.GroupIndex + 0x04 );\n"
|
||||
" float4 Src3 = LoadColor( IN.GroupIndex + 0x20 );\n"
|
||||
" float4 Src4 = LoadColor( IN.GroupIndex + 0x24 );\n"
|
||||
" Src1 = 0.25 * ( Src1 + Src2 + Src3 + Src4 );\n"
|
||||
"\n"
|
||||
" OutMip4[IN.DispatchThreadID.xy / 8] = Src1;\n"
|
||||
" }\n"
|
||||
"}\n";
|
||||
#endif
|
|
@ -30,3 +30,4 @@
|
|||
#include "VSMain_color.hlsl"
|
||||
#include "VSMain_coord.hlsl"
|
||||
#include "VSMain_pos.hlsl"
|
||||
#include "CSMain_mipgen.hlsl"
|
||||
|
|
|
@ -10,6 +10,7 @@ hlsl_sources = [
|
|||
['VSMain_color', 'vs'],
|
||||
['VSMain_coord', 'vs'],
|
||||
['VSMain_pos', 'vs'],
|
||||
['CSMain_mipgen', 'cs'],
|
||||
]
|
||||
|
||||
shader_model = '5_0'
|
||||
|
@ -58,8 +59,19 @@ plugin_vs_collection = custom_target('plugin_hlsl_vs',
|
|||
'--output', '@OUTPUT@'
|
||||
])
|
||||
|
||||
plugin_cs_collection = custom_target('plugin_hlsl_cs',
|
||||
input : plugin_hlsl_precompiled,
|
||||
output : 'plugin_hlsl_cs.h',
|
||||
command : [header_collector,
|
||||
'--input', meson.current_build_dir(),
|
||||
'--prefix', 'CSMain_',
|
||||
'--name', 'g_plugin_cs_table',
|
||||
'--output', '@OUTPUT@'
|
||||
])
|
||||
|
||||
hlsl_precompiled += [
|
||||
plugin_hlsl_precompiled,
|
||||
plugin_ps_collection,
|
||||
plugin_vs_collection,
|
||||
plugin_cs_collection,
|
||||
]
|
||||
|
|
366
subprojects/gst-plugins-bad/sys/d3d12/gstd3d12mipgen.cpp
Normal file
366
subprojects/gst-plugins-bad/sys/d3d12/gstd3d12mipgen.cpp
Normal file
|
@ -0,0 +1,366 @@
|
|||
/* GStreamer
|
||||
* Copyright (C) 2024 Seungha Yang <seungha@centricular.com>
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Library General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Library General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Library General Public
|
||||
* License along with this library; if not, write to the
|
||||
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
|
||||
* Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright(c) 2018 Jeremiah van Oosten
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files(the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions :
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/* Reference: https://github.com/jpvanoosten/LearningDirectX12 */
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "gstd3d12mipgen.h"
|
||||
#include <gst/d3d12/gstd3d12-private.h>
|
||||
#include <gst/d3dshader/gstd3dshader.h>
|
||||
#include <directx/d3dx12.h>
|
||||
#include <wrl.h>
|
||||
#include <algorithm>
|
||||
|
||||
#define _XM_NO_INTRINSICS_
|
||||
#include <DirectXMath.h>
|
||||
|
||||
GST_DEBUG_CATEGORY_STATIC (gst_d3d12_mip_gen_debug);
|
||||
#define GST_CAT_DEFAULT gst_d3d12_mip_gen_debug
|
||||
|
||||
/* *INDENT-OFF* */
|
||||
using namespace Microsoft::WRL;
|
||||
using namespace DirectX;
|
||||
/* *INDENT-ON* */
|
||||
|
||||
struct GenerateMipsCB
|
||||
{
|
||||
UINT SrcMipLevel;
|
||||
UINT NumMipLevels;
|
||||
UINT SrcDimension;
|
||||
UINT padding;
|
||||
XMFLOAT2 TexelSize;
|
||||
};
|
||||
|
||||
struct GstD3D12MipGenPrivate
|
||||
{
|
||||
~GstD3D12MipGenPrivate ()
|
||||
{
|
||||
pso = nullptr;
|
||||
rs = nullptr;
|
||||
gst_clear_object (&desc_pool);
|
||||
gst_clear_object (&device);
|
||||
}
|
||||
|
||||
GstD3D12Device *device = nullptr;
|
||||
GstD3D12DescriptorPool *desc_pool = nullptr;
|
||||
ComPtr < ID3D12PipelineState > pso;
|
||||
ComPtr < ID3D12RootSignature > rs;
|
||||
guint desc_inc_size;
|
||||
};
|
||||
|
||||
struct _GstD3D12MipGen
|
||||
{
|
||||
GstObject parent;
|
||||
|
||||
GstD3D12MipGenPrivate *priv;
|
||||
};
|
||||
/* *INDENT-ON* */
|
||||
|
||||
static void gst_d3d12_mip_gen_finalize (GObject * object);
|
||||
|
||||
#define gst_d3d12_mip_gen_parent_class parent_class
|
||||
G_DEFINE_TYPE (GstD3D12MipGen, gst_d3d12_mip_gen, GST_TYPE_OBJECT);
|
||||
|
||||
static void
|
||||
gst_d3d12_mip_gen_class_init (GstD3D12MipGenClass * klass)
|
||||
{
|
||||
auto object_class = G_OBJECT_CLASS (klass);
|
||||
|
||||
object_class->finalize = gst_d3d12_mip_gen_finalize;
|
||||
|
||||
GST_DEBUG_CATEGORY_INIT (gst_d3d12_mip_gen_debug,
|
||||
"d3d12mipgen", 0, "d3d12mipgen");
|
||||
}
|
||||
|
||||
static void
|
||||
gst_d3d12_mip_gen_init (GstD3D12MipGen * self)
|
||||
{
|
||||
self->priv = new GstD3D12MipGenPrivate ();
|
||||
}
|
||||
|
||||
static void
|
||||
gst_d3d12_mip_gen_finalize (GObject * object)
|
||||
{
|
||||
auto self = GST_D3D12_MIP_GEN (object);
|
||||
|
||||
delete self->priv;
|
||||
|
||||
G_OBJECT_CLASS (parent_class)->finalize (object);
|
||||
}
|
||||
|
||||
GstD3D12MipGen *
|
||||
gst_d3d12_mip_gen_new (GstD3D12Device * device)
|
||||
{
|
||||
g_return_val_if_fail (GST_IS_D3D12_DEVICE (device), nullptr);
|
||||
|
||||
D3D12_VERSIONED_ROOT_SIGNATURE_DESC rs_desc = { };
|
||||
CD3DX12_ROOT_PARAMETER root_params[3];
|
||||
CD3DX12_DESCRIPTOR_RANGE range_srv;
|
||||
CD3DX12_DESCRIPTOR_RANGE range_uav;
|
||||
D3D12_STATIC_SAMPLER_DESC sampler_desc = { };
|
||||
|
||||
auto self = (GstD3D12MipGen *) g_object_new (GST_TYPE_D3D12_MIP_GEN, nullptr);
|
||||
gst_object_ref_sink (self);
|
||||
|
||||
auto priv = self->priv;
|
||||
priv->device = (GstD3D12Device *) gst_object_ref (device);
|
||||
|
||||
sampler_desc.Filter = D3D12_FILTER_MIN_MAG_LINEAR_MIP_POINT;
|
||||
sampler_desc.AddressU = D3D12_TEXTURE_ADDRESS_MODE_CLAMP;
|
||||
sampler_desc.AddressV = D3D12_TEXTURE_ADDRESS_MODE_CLAMP;
|
||||
sampler_desc.AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP;
|
||||
sampler_desc.MipLODBias = 0;
|
||||
sampler_desc.MaxAnisotropy = 1;
|
||||
sampler_desc.ComparisonFunc = D3D12_COMPARISON_FUNC_ALWAYS;
|
||||
sampler_desc.BorderColor = D3D12_STATIC_BORDER_COLOR_OPAQUE_BLACK;
|
||||
sampler_desc.MinLOD = 0;
|
||||
sampler_desc.MaxLOD = D3D12_FLOAT32_MAX;
|
||||
sampler_desc.ShaderRegister = 0;
|
||||
sampler_desc.RegisterSpace = 0;
|
||||
sampler_desc.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
||||
|
||||
root_params[0].InitAsConstants (6, 0, 0);
|
||||
|
||||
range_srv.Init (D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 0);
|
||||
root_params[1].InitAsDescriptorTable (1, &range_srv);
|
||||
|
||||
range_uav.Init (D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 4, 0);
|
||||
root_params[2].InitAsDescriptorTable (1, &range_uav);
|
||||
|
||||
CD3DX12_VERSIONED_ROOT_SIGNATURE_DESC::Init_1_0 (rs_desc, 3, root_params,
|
||||
1, &sampler_desc, D3D12_ROOT_SIGNATURE_FLAG_NONE);
|
||||
|
||||
ComPtr < ID3DBlob > rs_blob;
|
||||
ComPtr < ID3DBlob > error_blob;
|
||||
auto hr = D3DX12SerializeVersionedRootSignature (&rs_desc,
|
||||
D3D_ROOT_SIGNATURE_VERSION_1, &rs_blob, &error_blob);
|
||||
|
||||
if (!gst_d3d12_result (hr, device)) {
|
||||
const gchar *error_msg = nullptr;
|
||||
if (error_blob)
|
||||
error_msg = (const gchar *) error_blob->GetBufferPointer ();
|
||||
|
||||
GST_ERROR_OBJECT (self,
|
||||
"Couldn't serialize root signature, hr: 0x%x, error detail: %s",
|
||||
(guint) hr, GST_STR_NULL (error_msg));
|
||||
gst_object_unref (self);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto device_handle = gst_d3d12_device_get_device_handle (device);
|
||||
hr = device_handle->CreateRootSignature (0, rs_blob->GetBufferPointer (),
|
||||
rs_blob->GetBufferSize (), IID_PPV_ARGS (&priv->rs));
|
||||
if (!gst_d3d12_result (hr, device)) {
|
||||
GST_ERROR_OBJECT (self, "Couldn't create root signature");
|
||||
gst_object_unref (self);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
GstD3DShaderByteCode byte_code;
|
||||
if (!gst_d3d_plugin_shader_get_cs_blob (GST_D3D_PLUGIN_CS_MIP_GEN,
|
||||
GST_D3D_SM_5_0, &byte_code)) {
|
||||
GST_ERROR_OBJECT (self, "Couldn't get shader byte code");
|
||||
gst_object_unref (self);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
D3D12_COMPUTE_PIPELINE_STATE_DESC pso_desc = { };
|
||||
pso_desc.pRootSignature = priv->rs.Get ();
|
||||
pso_desc.CS.pShaderBytecode = byte_code.byte_code;
|
||||
pso_desc.CS.BytecodeLength = byte_code.byte_code_len;
|
||||
hr = device_handle->CreateComputePipelineState (&pso_desc,
|
||||
IID_PPV_ARGS (&priv->pso));
|
||||
if (!gst_d3d12_result (hr, device)) {
|
||||
GST_ERROR_OBJECT (self, "Couldn't create PSO");
|
||||
gst_object_unref (self);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
D3D12_DESCRIPTOR_HEAP_DESC desc_heap_desc = { };
|
||||
desc_heap_desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV;
|
||||
desc_heap_desc.NumDescriptors = 5;
|
||||
desc_heap_desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE;
|
||||
|
||||
priv->desc_pool = gst_d3d12_descriptor_pool_new (device_handle,
|
||||
&desc_heap_desc);
|
||||
if (!priv->desc_pool) {
|
||||
GST_ERROR_OBJECT (self, "Couldn't create descriptor pool");
|
||||
gst_object_unref (self);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
priv->desc_inc_size = device_handle->GetDescriptorHandleIncrementSize
|
||||
(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
|
||||
|
||||
return self;
|
||||
}
|
||||
|
||||
gboolean
|
||||
gst_d3d12_mip_gen_execute (GstD3D12MipGen * gen, ID3D12Resource * resource,
|
||||
GstD3D12FenceData * fence_data, ID3D12GraphicsCommandList * cl)
|
||||
{
|
||||
g_return_val_if_fail (GST_IS_D3D12_MIP_GEN (gen), FALSE);
|
||||
g_return_val_if_fail (resource, FALSE);
|
||||
g_return_val_if_fail (fence_data, FALSE);
|
||||
g_return_val_if_fail (cl, FALSE);
|
||||
|
||||
auto desc = GetDesc (resource);
|
||||
|
||||
if (desc.MipLevels == 1) {
|
||||
GST_LOG_OBJECT (gen, "Single mip level texture");
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
if ((desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS) !=
|
||||
D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS ||
|
||||
(desc.Flags & D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE) ==
|
||||
D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE) {
|
||||
GST_WARNING_OBJECT (gen, "Resource flag is incompatible");
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
auto priv = gen->priv;
|
||||
auto device = gst_d3d12_device_get_device_handle (priv->device);
|
||||
|
||||
cl->SetComputeRootSignature (priv->rs.Get ());
|
||||
cl->SetPipelineState (priv->pso.Get ());
|
||||
|
||||
D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc = { };
|
||||
srv_desc.Format = desc.Format;
|
||||
srv_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
|
||||
srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D;
|
||||
srv_desc.Texture2D.MipLevels = desc.MipLevels;
|
||||
|
||||
for (guint srcMip = 0; srcMip < desc.MipLevels - 1;) {
|
||||
guint64 srcWidth = desc.Width >> srcMip;
|
||||
guint srcHeight = desc.Height >> srcMip;
|
||||
guint dstWidth = static_cast < guint > (srcWidth >> 1);
|
||||
guint dstHeight = srcHeight >> 1;
|
||||
GenerateMipsCB cbuf;
|
||||
|
||||
// 0b00(0): Both width and height are even.
|
||||
// 0b01(1): Width is odd, height is even.
|
||||
// 0b10(2): Width is even, height is odd.
|
||||
// 0b11(3): Both width and height are odd.
|
||||
cbuf.SrcDimension = (srcHeight & 1) << 1 | (srcWidth & 1);
|
||||
|
||||
// How many mipmap levels to compute this pass (max 4 mips per pass)
|
||||
DWORD mipCount;
|
||||
|
||||
// The number of times we can half the size of the texture and get
|
||||
// exactly a 50% reduction in size.
|
||||
// A 1 bit in the width or height indicates an odd dimension.
|
||||
// The case where either the width or the height is exactly 1 is handled
|
||||
// as a special case (as the dimension does not require reduction).
|
||||
_BitScanForward (&mipCount, (dstWidth == 1 ? dstHeight : dstWidth) |
|
||||
(dstHeight == 1 ? dstWidth : dstHeight));
|
||||
// Maximum number of mips to generate is 4.
|
||||
mipCount = std::min < DWORD > (4, mipCount + 1);
|
||||
// Clamp to total number of mips left over.
|
||||
mipCount = (srcMip + mipCount) >= desc.MipLevels ?
|
||||
desc.MipLevels - srcMip - 1 : mipCount;
|
||||
|
||||
// Dimensions should not reduce to 0.
|
||||
// This can happen if the width and height are not the same.
|
||||
dstWidth = std::max < DWORD > (1, dstWidth);
|
||||
dstHeight = std::max < DWORD > (1, dstHeight);
|
||||
|
||||
cbuf.SrcMipLevel = srcMip;
|
||||
cbuf.NumMipLevels = mipCount;
|
||||
cbuf.TexelSize.x = 1.0f / (float) dstWidth;
|
||||
cbuf.TexelSize.y = 1.0f / (float) dstHeight;
|
||||
|
||||
if (srcMip != 0) {
|
||||
D3D12_RESOURCE_BARRIER barrier =
|
||||
CD3DX12_RESOURCE_BARRIER::Transition (resource,
|
||||
D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
|
||||
D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, srcMip);
|
||||
cl->ResourceBarrier (1, &barrier);
|
||||
}
|
||||
|
||||
GstD3D12Descriptor *desc_heap;
|
||||
if (!gst_d3d12_descriptor_pool_acquire (priv->desc_pool, &desc_heap)) {
|
||||
GST_ERROR_OBJECT (gen, "Couldn't acquire descriptor heap");
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
gst_d3d12_fence_data_push (fence_data,
|
||||
FENCE_NOTIFY_MINI_OBJECT (desc_heap));
|
||||
auto desc_handle = gst_d3d12_descriptor_get_handle (desc_heap);
|
||||
auto cpu_handle = CD3DX12_CPU_DESCRIPTOR_HANDLE
|
||||
(GetCPUDescriptorHandleForHeapStart (desc_handle));
|
||||
|
||||
device->CreateShaderResourceView (resource, &srv_desc, cpu_handle);
|
||||
|
||||
for (guint mip = 0; mip < mipCount; mip++) {
|
||||
D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = { };
|
||||
uavDesc.Format = desc.Format;
|
||||
uavDesc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2D;
|
||||
uavDesc.Texture2D.MipSlice = srcMip + mip + 1;
|
||||
|
||||
cpu_handle.Offset (priv->desc_inc_size);
|
||||
device->CreateUnorderedAccessView (resource,
|
||||
nullptr, &uavDesc, cpu_handle);
|
||||
}
|
||||
|
||||
auto gpu_handle = CD3DX12_GPU_DESCRIPTOR_HANDLE
|
||||
(GetGPUDescriptorHandleForHeapStart (desc_handle));
|
||||
|
||||
ID3D12DescriptorHeap *heaps[] = { desc_handle };
|
||||
cl->SetDescriptorHeaps (1, heaps);
|
||||
cl->SetComputeRoot32BitConstants (0, 6, &cbuf, 0);
|
||||
cl->SetComputeRootDescriptorTable (1, gpu_handle);
|
||||
gpu_handle.Offset (priv->desc_inc_size);
|
||||
cl->SetComputeRootDescriptorTable (2, gpu_handle);
|
||||
|
||||
cl->Dispatch ((dstWidth + 7) / 8, (dstHeight + 7) / 8, 1);
|
||||
|
||||
D3D12_RESOURCE_BARRIER barrier = CD3DX12_RESOURCE_BARRIER::UAV (resource);
|
||||
cl->ResourceBarrier (1, &barrier);
|
||||
|
||||
srcMip += mipCount;
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
}
|
38
subprojects/gst-plugins-bad/sys/d3d12/gstd3d12mipgen.h
Normal file
38
subprojects/gst-plugins-bad/sys/d3d12/gstd3d12mipgen.h
Normal file
|
@ -0,0 +1,38 @@
|
|||
/* GStreamer
|
||||
* Copyright (C) 2024 Seungha Yang <seungha@centricular.com>
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Library General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Library General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Library General Public
|
||||
* License along with this library; if not, write to the
|
||||
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
|
||||
* Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <gst/gst.h>
|
||||
#include <gst/d3d12/gstd3d12.h>
|
||||
|
||||
G_BEGIN_DECLS
|
||||
|
||||
#define GST_TYPE_D3D12_MIP_GEN (gst_d3d12_mip_gen_get_type())
|
||||
G_DECLARE_FINAL_TYPE (GstD3D12MipGen, gst_d3d12_mip_gen, GST, D3D12_MIP_GEN, GstObject);
|
||||
|
||||
GstD3D12MipGen * gst_d3d12_mip_gen_new (GstD3D12Device * device);
|
||||
|
||||
gboolean gst_d3d12_mip_gen_execute (GstD3D12MipGen * gen,
|
||||
ID3D12Resource * resource,
|
||||
GstD3D12FenceData * fence_data,
|
||||
ID3D12GraphicsCommandList * cl);
|
||||
|
||||
G_END_DECLS
|
||||
|
1019
subprojects/gst-plugins-bad/sys/d3d12/gstd3d12mipmapping.cpp
Normal file
1019
subprojects/gst-plugins-bad/sys/d3d12/gstd3d12mipmapping.cpp
Normal file
File diff suppressed because it is too large
Load diff
32
subprojects/gst-plugins-bad/sys/d3d12/gstd3d12mipmapping.h
Normal file
32
subprojects/gst-plugins-bad/sys/d3d12/gstd3d12mipmapping.h
Normal file
|
@ -0,0 +1,32 @@
|
|||
/* GStreamer
|
||||
* Copyright (C) 2024 Seungha Yang <seungha@centricular.com>
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Library General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Library General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Library General Public
|
||||
* License along with this library; if not, write to the
|
||||
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
|
||||
* Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <gst/gst.h>
|
||||
#include "gstd3d12basefilter.h"
|
||||
|
||||
G_BEGIN_DECLS
|
||||
|
||||
#define GST_TYPE_D3D12_MIP_MAPPING (gst_d3d12_mip_mapping_get_type())
|
||||
G_DECLARE_FINAL_TYPE (GstD3D12MipMapping, gst_d3d12_mip_mapping,
|
||||
GST, D3D12_MIP_MAPPING, GstD3D12BaseFilter)
|
||||
|
||||
G_END_DECLS
|
||||
|
|
@ -18,6 +18,8 @@ d3d12_sources = [
|
|||
'gstd3d12ipcsink.cpp',
|
||||
'gstd3d12ipcsrc.cpp',
|
||||
'gstd3d12mpeg2dec.cpp',
|
||||
'gstd3d12mipgen.cpp',
|
||||
'gstd3d12mipmapping.cpp',
|
||||
'gstd3d12overlaycompositor.cpp',
|
||||
'gstd3d12pluginutils.cpp',
|
||||
'gstd3d12screencapture.cpp',
|
||||
|
|
|
@ -49,6 +49,7 @@
|
|||
#include "gstd3d12ipcsrc.h"
|
||||
#include "gstd3d12ipcsink.h"
|
||||
#include "gstd3d12swapchainsink.h"
|
||||
#include "gstd3d12mipmapping.h"
|
||||
#include <windows.h>
|
||||
#include <versionhelpers.h>
|
||||
#include <wrl.h>
|
||||
|
@ -181,6 +182,8 @@ plugin_init (GstPlugin * plugin)
|
|||
"d3d12ipcsink", GST_RANK_NONE, GST_TYPE_D3D12_IPC_SINK);
|
||||
gst_element_register (plugin,
|
||||
"d3d12swapchainsink", GST_RANK_NONE, GST_TYPE_D3D12_SWAPCHAIN_SINK);
|
||||
gst_element_register (plugin,
|
||||
"d3d12mipmapping", GST_RANK_NONE, GST_TYPE_D3D12_MIP_MAPPING);
|
||||
|
||||
g_object_set_data_full (G_OBJECT (plugin),
|
||||
"plugin-d3d12-shutdown", (gpointer) "shutdown-data",
|
||||
|
|
Loading…
Reference in a new issue