d3d11converter: Do not use R32_UINT UAV

It does not work well with YUY2 texture on some GPUs. Always use
the same DXGI formats for each SRV and UAV

Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/5709>
This commit is contained in:
Seungha Yang 2023-11-25 01:26:31 +09:00
parent 030800905b
commit df0e6c4140
4 changed files with 108 additions and 178 deletions

View file

@ -113,44 +113,69 @@ gst_d3d11_converter_helper_new (GstD3D11Device * device,
std::string entry_point;
HRESULT hr;
if (in_format == GST_VIDEO_FORMAT_YUY2 && out_format == GST_VIDEO_FORMAT_VUYA) {
entry_point = "CSMain_YUY2_to_VUYA";
srv_format = DXGI_FORMAT_R8G8B8A8_UINT;
uav_format = DXGI_FORMAT_R32_UINT;
} else if (in_format == GST_VIDEO_FORMAT_VUYA &&
out_format == GST_VIDEO_FORMAT_YUY2) {
entry_point = "CSMain_VUYA_to_YUY2";
srv_format = DXGI_FORMAT_R8G8B8A8_UINT;
uav_format = DXGI_FORMAT_R32_UINT;
} else if (in_format == GST_VIDEO_FORMAT_AYUV64 &&
out_format == GST_VIDEO_FORMAT_Y410) {
entry_point = "CSMain_AYUV64_to_Y410";
if (in_format != out_format) {
std::string in_format_str;
std::string out_format_str;
switch (in_format) {
case GST_VIDEO_FORMAT_YUY2:
srv_format = DXGI_FORMAT_R8G8B8A8_UNORM;
in_format_str = "YUY2";
break;
case GST_VIDEO_FORMAT_Y210:
case GST_VIDEO_FORMAT_Y212_LE:
srv_format = DXGI_FORMAT_R16G16B16A16_UNORM;
uav_format = DXGI_FORMAT_R32_UINT;
x_unit = 8;
} else if (in_format == GST_VIDEO_FORMAT_AYUV64 &&
(out_format == GST_VIDEO_FORMAT_Y210 ||
out_format == GST_VIDEO_FORMAT_Y212_LE)) {
entry_point = "CSMain_AYUV64_to_Y210";
in_format_str = "YUY2";
break;
case GST_VIDEO_FORMAT_AYUV:
srv_format = DXGI_FORMAT_R8G8B8A8_UNORM;
in_format_str = "AYUV";
break;
case GST_VIDEO_FORMAT_AYUV64:
srv_format = DXGI_FORMAT_R16G16B16A16_UNORM;
uav_format = DXGI_FORMAT_R16G16B16A16_UINT;
} else if ((in_format == GST_VIDEO_FORMAT_Y210 ||
in_format == GST_VIDEO_FORMAT_Y212_LE) &&
out_format == GST_VIDEO_FORMAT_AYUV64) {
entry_point = "CSMain_Y210_to_AYUV64";
srv_format = DXGI_FORMAT_R16G16B16A16_UINT;
uav_format = DXGI_FORMAT_R16G16B16A16_UNORM;
} else if (in_format == GST_VIDEO_FORMAT_AYUV64 &&
out_format == GST_VIDEO_FORMAT_Y412_LE) {
entry_point = "CSMain_AYUV64_to_Y412";
srv_format = DXGI_FORMAT_R16G16B16A16_UNORM;
uav_format = DXGI_FORMAT_R16G16B16A16_UINT;
x_unit = 8;
} else if (in_format != out_format) {
in_format_str = "AYUV";
break;
default:
g_assert_not_reached ();
return nullptr;
}
switch (out_format) {
case GST_VIDEO_FORMAT_YUY2:
uav_format = DXGI_FORMAT_R8G8B8A8_UNORM;
out_format_str = "YUY2";
break;
case GST_VIDEO_FORMAT_Y210:
case GST_VIDEO_FORMAT_Y212_LE:
uav_format = DXGI_FORMAT_R16G16B16A16_UNORM;
out_format_str = "YUY2";
break;
case GST_VIDEO_FORMAT_Y410:
uav_format = DXGI_FORMAT_R10G10B10A2_UNORM;
out_format_str = "Y410";
x_unit = 8;
break;
case GST_VIDEO_FORMAT_Y412_LE:
uav_format = DXGI_FORMAT_R16G16B16A16_UNORM;
out_format_str = "Y410";
x_unit = 8;
break;
case GST_VIDEO_FORMAT_AYUV:
uav_format = DXGI_FORMAT_R8G8B8A8_UNORM;
out_format_str = "AYUV";
break;
case GST_VIDEO_FORMAT_AYUV64:
uav_format = DXGI_FORMAT_R16G16B16A16_UNORM;
out_format_str = "AYUV";
break;
default:
g_assert_not_reached ();
return nullptr;
}
entry_point = "CSMain_" + in_format_str + "_to_" + out_format_str;
}
self = new GstD3D11ConverterHelper ();
self->device = (GstD3D11Device *) gst_object_ref (device);

View file

@ -2041,7 +2041,7 @@ gst_d3d11_converter_new (GstD3D11Device * device, const GstVideoInfo * in_info,
GST_VIDEO_INFO_FORMAT (in_info) == GST_VIDEO_FORMAT_Y210 ||
GST_VIDEO_INFO_FORMAT (in_info) == GST_VIDEO_FORMAT_Y212_LE) {
GstVideoInfo tmp_info;
GstVideoFormat postproc_format = GST_VIDEO_FORMAT_VUYA;
GstVideoFormat postproc_format = GST_VIDEO_FORMAT_AYUV;
if (GST_VIDEO_INFO_FORMAT (in_info) != GST_VIDEO_FORMAT_YUY2)
postproc_format = GST_VIDEO_FORMAT_AYUV64;
@ -2066,7 +2066,7 @@ gst_d3d11_converter_new (GstD3D11Device * device, const GstVideoInfo * in_info,
GST_VIDEO_INFO_FORMAT (out_info) == GST_VIDEO_FORMAT_Y212_LE ||
GST_VIDEO_INFO_FORMAT (out_info) == GST_VIDEO_FORMAT_Y412_LE) {
GstVideoInfo tmp_info;
GstVideoFormat postproc_format = GST_VIDEO_FORMAT_VUYA;
GstVideoFormat postproc_format = GST_VIDEO_FORMAT_AYUV;
if (GST_VIDEO_INFO_FORMAT (out_info) != GST_VIDEO_FORMAT_YUY2)
postproc_format = GST_VIDEO_FORMAT_AYUV64;

View file

@ -18,74 +18,13 @@
*/
#ifdef BUILDING_HLSL
#ifdef BUILDING_CSMain_AYUV64_to_Y410
Texture2D<float4> inTex : register(t0);
RWTexture2D<uint> outTex : register(u0);
RWTexture2D<unorm float4> outTex : register(u0);
#ifdef BUILDING_CSMain_YUY2_to_AYUV
void Execute (uint3 tid)
{
float4 val = inTex.Load (tid);
uint3 scaled = val.yzw * 1023;
outTex[tid.xy] = (0xc0 << 24) | (scaled.z << 20) | (scaled.x << 10) | scaled.y;
}
#endif
#ifdef BUILDING_CSMain_VUYA_to_YUY2
Texture2D<uint4> inTex : register(t0);
RWTexture2D<uint> outTex : register(u0);
void Execute (uint3 tid)
{
uint4 val = inTex.Load (uint3(tid.x * 2, tid.y, 0));
uint Y0 = val.b;
uint U = val.g;
uint V = val.r;
uint Y1 = inTex.Load (uint3(tid.x * 2 + 1, tid.y, 0)).b;
outTex[tid.xy] = Y0 | (U << 8) | (Y1 << 16) | (V << 24);
}
#endif
#ifdef BUILDING_CSMain_YUY2_to_VUYA
Texture2D<uint4> inTex : register(t0);
RWTexture2D<uint> outTex : register(u0);
void Execute (uint3 tid)
{
uint4 val = inTex.Load (tid);
uint Y0 = val.r;
uint U = val.g;
uint Y1 = val.b;
uint V = val.a;
outTex[uint2(tid.x * 2, tid.y)] = V | (U << 8) | (Y0 << 16) | (0xff << 24);
outTex[uint2(tid.x * 2 + 1, tid.y)] = V | (U << 8) | (Y1 << 16) | (0xff << 24);
}
#endif
#ifdef BUILDING_CSMain_AYUV64_to_Y210
Texture2D<float4> inTex : register(t0);
RWTexture2D<uint4> outTex : register(u0);
void Execute (uint3 tid)
{
float4 val = inTex.Load (uint3(tid.x * 2, tid.y, 0));
uint3 scaled = val.yzw * 65535;
uint Y0 = scaled.x;
uint U = scaled.y;
uint V = scaled.z;
uint Y1 = inTex.Load (uint3(tid.x * 2 + 1, tid.y, 0)).y * 65535;
outTex[tid.xy] = uint4(Y0, U, Y1, V);
}
#endif
#ifdef BUILDING_CSMain_Y210_to_AYUV64
Texture2D<uint4> inTex : register(t0);
RWTexture2D<unorm float4> outTex : register(u0);
void Execute (uint3 tid)
{
float4 val = inTex.Load (tid) / 65535.0;
float Y0 = val.r;
float U = val.g;
float Y1 = val.b;
@ -96,14 +35,29 @@ void Execute (uint3 tid)
}
#endif
#ifdef BUILDING_CSMain_AYUV64_to_Y412
Texture2D<float4> inTex : register(t0);
RWTexture2D<uint4> outTex : register(u0);
#ifdef BUILDING_CSMain_AYUV_to_YUY2
void Execute (uint3 tid)
{
float3 val = inTex.Load (uint3(tid.x * 2, tid.y, 0)).yzw;
float Y0 = val.x;
float U = val.y;
float V = val.z;
float Y1 = inTex.Load (uint3(tid.x * 2 + 1, tid.y, 0)).y;
outTex[tid.xy] = float4 (Y0, U, Y1, V);
}
#endif
#ifdef BUILDING_CSMain_AYUV_to_Y410
void Execute (uint3 tid)
{
float4 val = inTex.Load (tid);
outTex[tid.xy] = uint4(val.zywx * 65535);
float Y = val.y;
float U = val.z;
float V = val.w;
float A = val.x;
outTex[tid.xy] = float4 (U, Y, V, A);
}
#endif
@ -114,74 +68,13 @@ void ENTRY_POINT (uint3 tid : SV_DispatchThreadID)
}
#else
static const char g_CSMain_converter_str[] =
"#ifdef BUILDING_CSMain_AYUV64_to_Y410\n"
"Texture2D<float4> inTex : register(t0);\n"
"RWTexture2D<uint> outTex : register(u0);\n"
"RWTexture2D<unorm float4> outTex : register(u0);\n"
"\n"
"#ifdef BUILDING_CSMain_YUY2_to_AYUV\n"
"void Execute (uint3 tid)\n"
"{\n"
" float4 val = inTex.Load (tid);\n"
" uint3 scaled = val.yzw * 1023;\n"
" outTex[tid.xy] = (0xc0 << 24) | (scaled.z << 20) | (scaled.x << 10) | scaled.y;\n"
"}\n"
"#endif\n"
"\n"
"#ifdef BUILDING_CSMain_VUYA_to_YUY2\n"
"Texture2D<uint4> inTex : register(t0);\n"
"RWTexture2D<uint> outTex : register(u0);\n"
"\n"
"void Execute (uint3 tid)\n"
"{\n"
" uint4 val = inTex.Load (uint3(tid.x * 2, tid.y, 0));\n"
" uint Y0 = val.b;\n"
" uint U = val.g;\n"
" uint V = val.r;\n"
" uint Y1 = inTex.Load (uint3(tid.x * 2 + 1, tid.y, 0)).b;\n"
"\n"
" outTex[tid.xy] = Y0 | (U << 8) | (Y1 << 16) | (V << 24);\n"
"}\n"
"#endif\n"
"\n"
"#ifdef BUILDING_CSMain_YUY2_to_VUYA\n"
"Texture2D<uint4> inTex : register(t0);\n"
"RWTexture2D<uint> outTex : register(u0);\n"
"\n"
"void Execute (uint3 tid)\n"
"{\n"
" uint4 val = inTex.Load (tid);\n"
" uint Y0 = val.r;\n"
" uint U = val.g;\n"
" uint Y1 = val.b;\n"
" uint V = val.a;\n"
"\n"
" outTex[uint2(tid.x * 2, tid.y)] = V | (U << 8) | (Y0 << 16) | (0xff << 24);\n"
" outTex[uint2(tid.x * 2 + 1, tid.y)] = V | (U << 8) | (Y1 << 16) | (0xff << 24);\n"
"}\n"
"#endif\n"
"\n"
"#ifdef BUILDING_CSMain_AYUV64_to_Y210\n"
"Texture2D<float4> inTex : register(t0);\n"
"RWTexture2D<uint4> outTex : register(u0);\n"
"\n"
"void Execute (uint3 tid)\n"
"{\n"
" float4 val = inTex.Load (uint3(tid.x * 2, tid.y, 0));\n"
" uint3 scaled = val.yzw * 65535;\n"
" uint Y0 = scaled.x;\n"
" uint U = scaled.y;\n"
" uint V = scaled.z;\n"
" uint Y1 = inTex.Load (uint3(tid.x * 2 + 1, tid.y, 0)).y * 65535;\n"
" outTex[tid.xy] = uint4(Y0, U, Y1, V);\n"
"}\n"
"#endif\n"
"\n"
"#ifdef BUILDING_CSMain_Y210_to_AYUV64\n"
"Texture2D<uint4> inTex : register(t0);\n"
"RWTexture2D<unorm float4> outTex : register(u0);\n"
"\n"
"void Execute (uint3 tid)\n"
"{\n"
" float4 val = inTex.Load (tid) / 65535.0;\n"
" float Y0 = val.r;\n"
" float U = val.g;\n"
" float Y1 = val.b;\n"
@ -192,14 +85,29 @@ static const char g_CSMain_converter_str[] =
"}\n"
"#endif\n"
"\n"
"#ifdef BUILDING_CSMain_AYUV64_to_Y412\n"
"Texture2D<float4> inTex : register(t0);\n"
"RWTexture2D<uint4> outTex : register(u0);\n"
"#ifdef BUILDING_CSMain_AYUV_to_YUY2\n"
"void Execute (uint3 tid)\n"
"{\n"
" float3 val = inTex.Load (uint3(tid.x * 2, tid.y, 0)).yzw;\n"
" float Y0 = val.x;\n"
" float U = val.y;\n"
" float V = val.z;\n"
" float Y1 = inTex.Load (uint3(tid.x * 2 + 1, tid.y, 0)).y;\n"
"\n"
" outTex[tid.xy] = float4 (Y0, U, Y1, V);\n"
"}\n"
"#endif\n"
"\n"
"#ifdef BUILDING_CSMain_AYUV_to_Y410\n"
"void Execute (uint3 tid)\n"
"{\n"
" float4 val = inTex.Load (tid);\n"
" outTex[tid.xy] = uint4(val.zywx * 65535);\n"
" float Y = val.y;\n"
" float U = val.z;\n"
" float V = val.w;\n"
" float A = val.x;\n"
"\n"
" outTex[tid.xy] = float4 (U, Y, V, A);\n"
"}\n"
"#endif\n"
"\n"

View file

@ -130,12 +130,9 @@ endforeach
hlsl_cs_source = files('CSMain_converter.hlsl')
hlsl_cs_entry_points = [
'CSMain_AYUV64_to_Y410',
'CSMain_VUYA_to_YUY2',
'CSMain_YUY2_to_VUYA',
'CSMain_AYUV64_to_Y210',
'CSMain_Y210_to_AYUV64',
'CSMain_AYUV64_to_Y412',
'CSMain_YUY2_to_AYUV',
'CSMain_AYUV_to_YUY2',
'CSMain_AYUV_to_Y410',
]
foreach shader : hlsl_cs_entry_points