d3d11converter: Do not use R32_UINT UAV

It does not work well with YUY2 texture on some GPUs. Always use
the same DXGI formats for each SRV and UAV

Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/5709>
This commit is contained in:
Seungha Yang 2023-11-25 01:26:31 +09:00
parent 030800905b
commit df0e6c4140
4 changed files with 108 additions and 178 deletions

View file

@ -113,42 +113,67 @@ gst_d3d11_converter_helper_new (GstD3D11Device * device,
std::string entry_point; std::string entry_point;
HRESULT hr; HRESULT hr;
if (in_format == GST_VIDEO_FORMAT_YUY2 && out_format == GST_VIDEO_FORMAT_VUYA) { if (in_format != out_format) {
entry_point = "CSMain_YUY2_to_VUYA"; std::string in_format_str;
srv_format = DXGI_FORMAT_R8G8B8A8_UINT; std::string out_format_str;
uav_format = DXGI_FORMAT_R32_UINT;
} else if (in_format == GST_VIDEO_FORMAT_VUYA && switch (in_format) {
out_format == GST_VIDEO_FORMAT_YUY2) { case GST_VIDEO_FORMAT_YUY2:
entry_point = "CSMain_VUYA_to_YUY2"; srv_format = DXGI_FORMAT_R8G8B8A8_UNORM;
srv_format = DXGI_FORMAT_R8G8B8A8_UINT; in_format_str = "YUY2";
uav_format = DXGI_FORMAT_R32_UINT; break;
} else if (in_format == GST_VIDEO_FORMAT_AYUV64 && case GST_VIDEO_FORMAT_Y210:
out_format == GST_VIDEO_FORMAT_Y410) { case GST_VIDEO_FORMAT_Y212_LE:
entry_point = "CSMain_AYUV64_to_Y410"; srv_format = DXGI_FORMAT_R16G16B16A16_UNORM;
srv_format = DXGI_FORMAT_R16G16B16A16_UNORM; in_format_str = "YUY2";
uav_format = DXGI_FORMAT_R32_UINT; break;
x_unit = 8; case GST_VIDEO_FORMAT_AYUV:
} else if (in_format == GST_VIDEO_FORMAT_AYUV64 && srv_format = DXGI_FORMAT_R8G8B8A8_UNORM;
(out_format == GST_VIDEO_FORMAT_Y210 || in_format_str = "AYUV";
out_format == GST_VIDEO_FORMAT_Y212_LE)) { break;
entry_point = "CSMain_AYUV64_to_Y210"; case GST_VIDEO_FORMAT_AYUV64:
srv_format = DXGI_FORMAT_R16G16B16A16_UNORM; srv_format = DXGI_FORMAT_R16G16B16A16_UNORM;
uav_format = DXGI_FORMAT_R16G16B16A16_UINT; in_format_str = "AYUV";
} else if ((in_format == GST_VIDEO_FORMAT_Y210 || break;
in_format == GST_VIDEO_FORMAT_Y212_LE) && default:
out_format == GST_VIDEO_FORMAT_AYUV64) { g_assert_not_reached ();
entry_point = "CSMain_Y210_to_AYUV64"; return nullptr;
srv_format = DXGI_FORMAT_R16G16B16A16_UINT; }
uav_format = DXGI_FORMAT_R16G16B16A16_UNORM;
} else if (in_format == GST_VIDEO_FORMAT_AYUV64 && switch (out_format) {
out_format == GST_VIDEO_FORMAT_Y412_LE) { case GST_VIDEO_FORMAT_YUY2:
entry_point = "CSMain_AYUV64_to_Y412"; uav_format = DXGI_FORMAT_R8G8B8A8_UNORM;
srv_format = DXGI_FORMAT_R16G16B16A16_UNORM; out_format_str = "YUY2";
uav_format = DXGI_FORMAT_R16G16B16A16_UINT; break;
x_unit = 8; case GST_VIDEO_FORMAT_Y210:
} else if (in_format != out_format) { case GST_VIDEO_FORMAT_Y212_LE:
g_assert_not_reached (); uav_format = DXGI_FORMAT_R16G16B16A16_UNORM;
return nullptr; out_format_str = "YUY2";
break;
case GST_VIDEO_FORMAT_Y410:
uav_format = DXGI_FORMAT_R10G10B10A2_UNORM;
out_format_str = "Y410";
x_unit = 8;
break;
case GST_VIDEO_FORMAT_Y412_LE:
uav_format = DXGI_FORMAT_R16G16B16A16_UNORM;
out_format_str = "Y410";
x_unit = 8;
break;
case GST_VIDEO_FORMAT_AYUV:
uav_format = DXGI_FORMAT_R8G8B8A8_UNORM;
out_format_str = "AYUV";
break;
case GST_VIDEO_FORMAT_AYUV64:
uav_format = DXGI_FORMAT_R16G16B16A16_UNORM;
out_format_str = "AYUV";
break;
default:
g_assert_not_reached ();
return nullptr;
}
entry_point = "CSMain_" + in_format_str + "_to_" + out_format_str;
} }
self = new GstD3D11ConverterHelper (); self = new GstD3D11ConverterHelper ();

View file

@ -2041,7 +2041,7 @@ gst_d3d11_converter_new (GstD3D11Device * device, const GstVideoInfo * in_info,
GST_VIDEO_INFO_FORMAT (in_info) == GST_VIDEO_FORMAT_Y210 || GST_VIDEO_INFO_FORMAT (in_info) == GST_VIDEO_FORMAT_Y210 ||
GST_VIDEO_INFO_FORMAT (in_info) == GST_VIDEO_FORMAT_Y212_LE) { GST_VIDEO_INFO_FORMAT (in_info) == GST_VIDEO_FORMAT_Y212_LE) {
GstVideoInfo tmp_info; GstVideoInfo tmp_info;
GstVideoFormat postproc_format = GST_VIDEO_FORMAT_VUYA; GstVideoFormat postproc_format = GST_VIDEO_FORMAT_AYUV;
if (GST_VIDEO_INFO_FORMAT (in_info) != GST_VIDEO_FORMAT_YUY2) if (GST_VIDEO_INFO_FORMAT (in_info) != GST_VIDEO_FORMAT_YUY2)
postproc_format = GST_VIDEO_FORMAT_AYUV64; postproc_format = GST_VIDEO_FORMAT_AYUV64;
@ -2066,7 +2066,7 @@ gst_d3d11_converter_new (GstD3D11Device * device, const GstVideoInfo * in_info,
GST_VIDEO_INFO_FORMAT (out_info) == GST_VIDEO_FORMAT_Y212_LE || GST_VIDEO_INFO_FORMAT (out_info) == GST_VIDEO_FORMAT_Y212_LE ||
GST_VIDEO_INFO_FORMAT (out_info) == GST_VIDEO_FORMAT_Y412_LE) { GST_VIDEO_INFO_FORMAT (out_info) == GST_VIDEO_FORMAT_Y412_LE) {
GstVideoInfo tmp_info; GstVideoInfo tmp_info;
GstVideoFormat postproc_format = GST_VIDEO_FORMAT_VUYA; GstVideoFormat postproc_format = GST_VIDEO_FORMAT_AYUV;
if (GST_VIDEO_INFO_FORMAT (out_info) != GST_VIDEO_FORMAT_YUY2) if (GST_VIDEO_INFO_FORMAT (out_info) != GST_VIDEO_FORMAT_YUY2)
postproc_format = GST_VIDEO_FORMAT_AYUV64; postproc_format = GST_VIDEO_FORMAT_AYUV64;

View file

@ -18,74 +18,13 @@
*/ */
#ifdef BUILDING_HLSL #ifdef BUILDING_HLSL
#ifdef BUILDING_CSMain_AYUV64_to_Y410
Texture2D<float4> inTex : register(t0); Texture2D<float4> inTex : register(t0);
RWTexture2D<uint> outTex : register(u0); RWTexture2D<unorm float4> outTex : register(u0);
#ifdef BUILDING_CSMain_YUY2_to_AYUV
void Execute (uint3 tid) void Execute (uint3 tid)
{ {
float4 val = inTex.Load (tid); float4 val = inTex.Load (tid);
uint3 scaled = val.yzw * 1023;
outTex[tid.xy] = (0xc0 << 24) | (scaled.z << 20) | (scaled.x << 10) | scaled.y;
}
#endif
#ifdef BUILDING_CSMain_VUYA_to_YUY2
Texture2D<uint4> inTex : register(t0);
RWTexture2D<uint> outTex : register(u0);
void Execute (uint3 tid)
{
uint4 val = inTex.Load (uint3(tid.x * 2, tid.y, 0));
uint Y0 = val.b;
uint U = val.g;
uint V = val.r;
uint Y1 = inTex.Load (uint3(tid.x * 2 + 1, tid.y, 0)).b;
outTex[tid.xy] = Y0 | (U << 8) | (Y1 << 16) | (V << 24);
}
#endif
#ifdef BUILDING_CSMain_YUY2_to_VUYA
Texture2D<uint4> inTex : register(t0);
RWTexture2D<uint> outTex : register(u0);
void Execute (uint3 tid)
{
uint4 val = inTex.Load (tid);
uint Y0 = val.r;
uint U = val.g;
uint Y1 = val.b;
uint V = val.a;
outTex[uint2(tid.x * 2, tid.y)] = V | (U << 8) | (Y0 << 16) | (0xff << 24);
outTex[uint2(tid.x * 2 + 1, tid.y)] = V | (U << 8) | (Y1 << 16) | (0xff << 24);
}
#endif
#ifdef BUILDING_CSMain_AYUV64_to_Y210
Texture2D<float4> inTex : register(t0);
RWTexture2D<uint4> outTex : register(u0);
void Execute (uint3 tid)
{
float4 val = inTex.Load (uint3(tid.x * 2, tid.y, 0));
uint3 scaled = val.yzw * 65535;
uint Y0 = scaled.x;
uint U = scaled.y;
uint V = scaled.z;
uint Y1 = inTex.Load (uint3(tid.x * 2 + 1, tid.y, 0)).y * 65535;
outTex[tid.xy] = uint4(Y0, U, Y1, V);
}
#endif
#ifdef BUILDING_CSMain_Y210_to_AYUV64
Texture2D<uint4> inTex : register(t0);
RWTexture2D<unorm float4> outTex : register(u0);
void Execute (uint3 tid)
{
float4 val = inTex.Load (tid) / 65535.0;
float Y0 = val.r; float Y0 = val.r;
float U = val.g; float U = val.g;
float Y1 = val.b; float Y1 = val.b;
@ -96,14 +35,29 @@ void Execute (uint3 tid)
} }
#endif #endif
#ifdef BUILDING_CSMain_AYUV64_to_Y412 #ifdef BUILDING_CSMain_AYUV_to_YUY2
Texture2D<float4> inTex : register(t0); void Execute (uint3 tid)
RWTexture2D<uint4> outTex : register(u0); {
float3 val = inTex.Load (uint3(tid.x * 2, tid.y, 0)).yzw;
float Y0 = val.x;
float U = val.y;
float V = val.z;
float Y1 = inTex.Load (uint3(tid.x * 2 + 1, tid.y, 0)).y;
outTex[tid.xy] = float4 (Y0, U, Y1, V);
}
#endif
#ifdef BUILDING_CSMain_AYUV_to_Y410
void Execute (uint3 tid) void Execute (uint3 tid)
{ {
float4 val = inTex.Load (tid); float4 val = inTex.Load (tid);
outTex[tid.xy] = uint4(val.zywx * 65535); float Y = val.y;
float U = val.z;
float V = val.w;
float A = val.x;
outTex[tid.xy] = float4 (U, Y, V, A);
} }
#endif #endif
@ -114,74 +68,13 @@ void ENTRY_POINT (uint3 tid : SV_DispatchThreadID)
} }
#else #else
static const char g_CSMain_converter_str[] = static const char g_CSMain_converter_str[] =
"#ifdef BUILDING_CSMain_AYUV64_to_Y410\n"
"Texture2D<float4> inTex : register(t0);\n" "Texture2D<float4> inTex : register(t0);\n"
"RWTexture2D<uint> outTex : register(u0);\n" "RWTexture2D<unorm float4> outTex : register(u0);\n"
"\n" "\n"
"#ifdef BUILDING_CSMain_YUY2_to_AYUV\n"
"void Execute (uint3 tid)\n" "void Execute (uint3 tid)\n"
"{\n" "{\n"
" float4 val = inTex.Load (tid);\n" " float4 val = inTex.Load (tid);\n"
" uint3 scaled = val.yzw * 1023;\n"
" outTex[tid.xy] = (0xc0 << 24) | (scaled.z << 20) | (scaled.x << 10) | scaled.y;\n"
"}\n"
"#endif\n"
"\n"
"#ifdef BUILDING_CSMain_VUYA_to_YUY2\n"
"Texture2D<uint4> inTex : register(t0);\n"
"RWTexture2D<uint> outTex : register(u0);\n"
"\n"
"void Execute (uint3 tid)\n"
"{\n"
" uint4 val = inTex.Load (uint3(tid.x * 2, tid.y, 0));\n"
" uint Y0 = val.b;\n"
" uint U = val.g;\n"
" uint V = val.r;\n"
" uint Y1 = inTex.Load (uint3(tid.x * 2 + 1, tid.y, 0)).b;\n"
"\n"
" outTex[tid.xy] = Y0 | (U << 8) | (Y1 << 16) | (V << 24);\n"
"}\n"
"#endif\n"
"\n"
"#ifdef BUILDING_CSMain_YUY2_to_VUYA\n"
"Texture2D<uint4> inTex : register(t0);\n"
"RWTexture2D<uint> outTex : register(u0);\n"
"\n"
"void Execute (uint3 tid)\n"
"{\n"
" uint4 val = inTex.Load (tid);\n"
" uint Y0 = val.r;\n"
" uint U = val.g;\n"
" uint Y1 = val.b;\n"
" uint V = val.a;\n"
"\n"
" outTex[uint2(tid.x * 2, tid.y)] = V | (U << 8) | (Y0 << 16) | (0xff << 24);\n"
" outTex[uint2(tid.x * 2 + 1, tid.y)] = V | (U << 8) | (Y1 << 16) | (0xff << 24);\n"
"}\n"
"#endif\n"
"\n"
"#ifdef BUILDING_CSMain_AYUV64_to_Y210\n"
"Texture2D<float4> inTex : register(t0);\n"
"RWTexture2D<uint4> outTex : register(u0);\n"
"\n"
"void Execute (uint3 tid)\n"
"{\n"
" float4 val = inTex.Load (uint3(tid.x * 2, tid.y, 0));\n"
" uint3 scaled = val.yzw * 65535;\n"
" uint Y0 = scaled.x;\n"
" uint U = scaled.y;\n"
" uint V = scaled.z;\n"
" uint Y1 = inTex.Load (uint3(tid.x * 2 + 1, tid.y, 0)).y * 65535;\n"
" outTex[tid.xy] = uint4(Y0, U, Y1, V);\n"
"}\n"
"#endif\n"
"\n"
"#ifdef BUILDING_CSMain_Y210_to_AYUV64\n"
"Texture2D<uint4> inTex : register(t0);\n"
"RWTexture2D<unorm float4> outTex : register(u0);\n"
"\n"
"void Execute (uint3 tid)\n"
"{\n"
" float4 val = inTex.Load (tid) / 65535.0;\n"
" float Y0 = val.r;\n" " float Y0 = val.r;\n"
" float U = val.g;\n" " float U = val.g;\n"
" float Y1 = val.b;\n" " float Y1 = val.b;\n"
@ -192,14 +85,29 @@ static const char g_CSMain_converter_str[] =
"}\n" "}\n"
"#endif\n" "#endif\n"
"\n" "\n"
"#ifdef BUILDING_CSMain_AYUV64_to_Y412\n" "#ifdef BUILDING_CSMain_AYUV_to_YUY2\n"
"Texture2D<float4> inTex : register(t0);\n" "void Execute (uint3 tid)\n"
"RWTexture2D<uint4> outTex : register(u0);\n" "{\n"
" float3 val = inTex.Load (uint3(tid.x * 2, tid.y, 0)).yzw;\n"
" float Y0 = val.x;\n"
" float U = val.y;\n"
" float V = val.z;\n"
" float Y1 = inTex.Load (uint3(tid.x * 2 + 1, tid.y, 0)).y;\n"
"\n" "\n"
" outTex[tid.xy] = float4 (Y0, U, Y1, V);\n"
"}\n"
"#endif\n"
"\n"
"#ifdef BUILDING_CSMain_AYUV_to_Y410\n"
"void Execute (uint3 tid)\n" "void Execute (uint3 tid)\n"
"{\n" "{\n"
" float4 val = inTex.Load (tid);\n" " float4 val = inTex.Load (tid);\n"
" outTex[tid.xy] = uint4(val.zywx * 65535);\n" " float Y = val.y;\n"
" float U = val.z;\n"
" float V = val.w;\n"
" float A = val.x;\n"
"\n"
" outTex[tid.xy] = float4 (U, Y, V, A);\n"
"}\n" "}\n"
"#endif\n" "#endif\n"
"\n" "\n"

View file

@ -130,12 +130,9 @@ endforeach
hlsl_cs_source = files('CSMain_converter.hlsl') hlsl_cs_source = files('CSMain_converter.hlsl')
hlsl_cs_entry_points = [ hlsl_cs_entry_points = [
'CSMain_AYUV64_to_Y410', 'CSMain_YUY2_to_AYUV',
'CSMain_VUYA_to_YUY2', 'CSMain_AYUV_to_YUY2',
'CSMain_YUY2_to_VUYA', 'CSMain_AYUV_to_Y410',
'CSMain_AYUV64_to_Y210',
'CSMain_Y210_to_AYUV64',
'CSMain_AYUV64_to_Y412',
] ]
foreach shader : hlsl_cs_entry_points foreach shader : hlsl_cs_entry_points