d3d11: Add support for Y210 and Y212 formats

Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/5691>
This commit is contained in:
Seungha Yang 2023-11-20 00:26:44 +09:00
parent 0ad1c07b20
commit 522d883fc3
7 changed files with 128 additions and 5 deletions

View file

@ -33,6 +33,7 @@ G_BEGIN_DECLS
"RGBA64_LE, RGB10A2_LE, BGRA, RGBA, BGRx, RGBx, VUYA, NV12, NV21, " \ "RGBA64_LE, RGB10A2_LE, BGRA, RGBA, BGRx, RGBx, VUYA, NV12, NV21, " \
"P010_10LE, P012_LE, P016_LE, I420, YV12, I420_10LE, I420_12LE, " \ "P010_10LE, P012_LE, P016_LE, I420, YV12, I420_10LE, I420_12LE, " \
"Y42B, I422_10LE, I422_12LE, Y444, Y444_10LE, Y444_12LE, Y444_16LE, YUY2, Y410, " \ "Y42B, I422_10LE, I422_12LE, Y444, Y444_10LE, Y444_12LE, Y444_16LE, YUY2, Y410, " \
"Y210, Y212_LE, " \
"GRAY8, GRAY16_LE, AYUV, AYUV64, RGBP, BGRP, GBR, GBR_10LE, GBR_12LE, " \ "GRAY8, GRAY16_LE, AYUV, AYUV64, RGBP, BGRP, GBR, GBR_10LE, GBR_12LE, " \
"GBR_16LE, GBRA, GBRA_10LE, GBRA_12LE" "GBR_16LE, GBRA, GBRA_10LE, GBRA_12LE"
@ -132,6 +133,10 @@ static const GstD3D11Format _gst_d3d11_default_format_map[] = {
D3D11_FORMAT_SUPPORT_SHADER_SAMPLE), D3D11_FORMAT_SUPPORT_SHADER_SAMPLE),
MAKE_FORMAT_MAP_YUV_FULL (YUY2, YUY2, R8G8B8A8_UNORM, UNKNOWN, UNKNOWN, UNKNOWN, MAKE_FORMAT_MAP_YUV_FULL (YUY2, YUY2, R8G8B8A8_UNORM, UNKNOWN, UNKNOWN, UNKNOWN,
D3D11_FORMAT_SUPPORT_SHADER_SAMPLE), D3D11_FORMAT_SUPPORT_SHADER_SAMPLE),
MAKE_FORMAT_MAP_YUV_FULL (Y210, Y210, R16G16B16A16_UNORM, UNKNOWN, UNKNOWN, UNKNOWN,
D3D11_FORMAT_SUPPORT_SHADER_SAMPLE),
MAKE_FORMAT_MAP_YUV_FULL (Y212_LE, Y216, R16G16B16A16_UNORM, UNKNOWN, UNKNOWN, UNKNOWN,
D3D11_FORMAT_SUPPORT_SHADER_SAMPLE),
MAKE_FORMAT_MAP_RGBP (RGBP, R8_UNORM, UNKNOWN), MAKE_FORMAT_MAP_RGBP (RGBP, R8_UNORM, UNKNOWN),
MAKE_FORMAT_MAP_RGBP (BGRP, R8_UNORM, UNKNOWN), MAKE_FORMAT_MAP_RGBP (BGRP, R8_UNORM, UNKNOWN),
MAKE_FORMAT_MAP_RGBP (GBR, R8_UNORM, UNKNOWN), MAKE_FORMAT_MAP_RGBP (GBR, R8_UNORM, UNKNOWN),

View file

@ -128,6 +128,18 @@ gst_d3d11_converter_helper_new (GstD3D11Device * device,
srv_format = DXGI_FORMAT_R16G16B16A16_UNORM; srv_format = DXGI_FORMAT_R16G16B16A16_UNORM;
uav_format = DXGI_FORMAT_R32_UINT; uav_format = DXGI_FORMAT_R32_UINT;
x_unit = 8; x_unit = 8;
} else if (in_format == GST_VIDEO_FORMAT_AYUV64 &&
(out_format == GST_VIDEO_FORMAT_Y210 ||
out_format == GST_VIDEO_FORMAT_Y212_LE)) {
entry_point = "CSMain_AYUV64_to_Y210";
srv_format = DXGI_FORMAT_R16G16B16A16_UNORM;
uav_format = DXGI_FORMAT_R16G16B16A16_UINT;
} else if ((in_format == GST_VIDEO_FORMAT_Y210 ||
in_format == GST_VIDEO_FORMAT_Y212_LE) &&
out_format == GST_VIDEO_FORMAT_AYUV64) {
entry_point = "CSMain_Y210_to_AYUV64";
srv_format = DXGI_FORMAT_R16G16B16A16_UINT;
uav_format = DXGI_FORMAT_R16G16B16A16_UNORM;
} else if (in_format != out_format) { } else if (in_format != out_format) {
g_assert_not_reached (); g_assert_not_reached ();
return nullptr; return nullptr;
@ -146,8 +158,26 @@ gst_d3d11_converter_helper_new (GstD3D11Device * device,
if (!entry_point.empty ()) { if (!entry_point.empty ()) {
auto handle = gst_d3d11_device_get_device_handle (device); auto handle = gst_d3d11_device_get_device_handle (device);
gboolean try_cs = TRUE;
feature_level = handle->GetFeatureLevel (); feature_level = handle->GetFeatureLevel ();
if (feature_level >= D3D_FEATURE_LEVEL_11_0) { if (feature_level < D3D_FEATURE_LEVEL_11_0) {
try_cs = FALSE;
GST_DEBUG ("Device does not support typed UAV");
} else if (uav_format != DXGI_FORMAT_R32_UINT) {
D3D11_FEATURE_DATA_FORMAT_SUPPORT2 support2;
support2.InFormat = uav_format;
support2.OutFormatSupport2 = 0;
hr = handle->CheckFeatureSupport (D3D11_FEATURE_FORMAT_SUPPORT2,
&support2, sizeof (D3D11_FEATURE_DATA_FORMAT_SUPPORT2));
/* XXX: D3D11_FORMAT_SUPPORT2_UAV_TYPED_STORE (0x80)
* undefined in old MinGW toolchain */
if (FAILED (hr) || (support2.OutFormatSupport2 & 0x80) == 0) {
try_cs = FALSE;
GST_DEBUG ("Device does not support typed UAV store");
}
}
if (try_cs) {
std::lock_guard<std::mutex> lk (cache_lock); std::lock_guard<std::mutex> lk (cache_lock);
std::shared_ptr<ConverterCSSource> source; std::shared_ptr<ConverterCSSource> source;
auto cached = cs_source_cache.find (entry_point); auto cached = cs_source_cache.find (entry_point);
@ -198,6 +228,8 @@ gst_d3d11_converter_helper_new (GstD3D11Device * device,
} }
if (cs) { if (cs) {
GST_DEBUG ("Compute shader \"%s\" available", entry_point.c_str ());
self->cs = cs; self->cs = cs;
self->x_unit = x_unit; self->x_unit = x_unit;
@ -211,6 +243,9 @@ gst_d3d11_converter_helper_new (GstD3D11Device * device,
self->tg_y = (UINT) ceil (height / (float) y_unit); self->tg_y = (UINT) ceil (height / (float) y_unit);
} }
} else { } else {
GST_DEBUG ("Creating software converter for \"%s\"",
entry_point.c_str ());
self->sw_conv = self->sw_conv =
gst_video_converter_new (&self->in_info, &self->out_info, nullptr); gst_video_converter_new (&self->in_info, &self->out_info, nullptr);
} }

View file

@ -2037,10 +2037,16 @@ gst_d3d11_converter_new (GstD3D11Device * device, const GstVideoInfo * in_info,
priv->border_color = 0xffff000000000000; priv->border_color = 0xffff000000000000;
/* Preprocess packed and subsampled texture */ /* Preprocess packed and subsampled texture */
if (GST_VIDEO_INFO_FORMAT (in_info) == GST_VIDEO_FORMAT_YUY2) { if (GST_VIDEO_INFO_FORMAT (in_info) == GST_VIDEO_FORMAT_YUY2 ||
GST_VIDEO_INFO_FORMAT (in_info) == GST_VIDEO_FORMAT_Y210 ||
GST_VIDEO_INFO_FORMAT (in_info) == GST_VIDEO_FORMAT_Y212_LE) {
GstVideoInfo tmp_info; GstVideoInfo tmp_info;
GstVideoFormat postproc_format = GST_VIDEO_FORMAT_VUYA;
gst_video_info_set_interlaced_format (&tmp_info, GST_VIDEO_FORMAT_VUYA, if (GST_VIDEO_INFO_FORMAT (in_info) != GST_VIDEO_FORMAT_YUY2)
postproc_format = GST_VIDEO_FORMAT_AYUV64;
gst_video_info_set_interlaced_format (&tmp_info, postproc_format,
GST_VIDEO_INFO_INTERLACE_MODE (in_info), GST_VIDEO_INFO_INTERLACE_MODE (in_info),
GST_VIDEO_INFO_WIDTH (in_info), GST_VIDEO_INFO_HEIGHT (in_info)); GST_VIDEO_INFO_WIDTH (in_info), GST_VIDEO_INFO_HEIGHT (in_info));
tmp_info.chroma_site = in_info->chroma_site; tmp_info.chroma_site = in_info->chroma_site;
@ -2055,11 +2061,13 @@ gst_d3d11_converter_new (GstD3D11Device * device, const GstVideoInfo * in_info,
} }
if (GST_VIDEO_INFO_FORMAT (out_info) == GST_VIDEO_FORMAT_YUY2 || if (GST_VIDEO_INFO_FORMAT (out_info) == GST_VIDEO_FORMAT_YUY2 ||
GST_VIDEO_INFO_FORMAT (out_info) == GST_VIDEO_FORMAT_Y410) { GST_VIDEO_INFO_FORMAT (out_info) == GST_VIDEO_FORMAT_Y410 ||
GST_VIDEO_INFO_FORMAT (out_info) == GST_VIDEO_FORMAT_Y210 ||
GST_VIDEO_INFO_FORMAT (out_info) == GST_VIDEO_FORMAT_Y212_LE) {
GstVideoInfo tmp_info; GstVideoInfo tmp_info;
GstVideoFormat postproc_format = GST_VIDEO_FORMAT_VUYA; GstVideoFormat postproc_format = GST_VIDEO_FORMAT_VUYA;
if (GST_VIDEO_INFO_FORMAT (out_info) == GST_VIDEO_FORMAT_Y410) if (GST_VIDEO_INFO_FORMAT (out_info) != GST_VIDEO_FORMAT_YUY2)
postproc_format = GST_VIDEO_FORMAT_AYUV64; postproc_format = GST_VIDEO_FORMAT_AYUV64;
gst_video_info_set_interlaced_format (&tmp_info, postproc_format, gst_video_info_set_interlaced_format (&tmp_info, postproc_format,

View file

@ -521,6 +521,8 @@ gst_d3d11_device_setup_format_table (GstD3D11Device * self)
case GST_VIDEO_FORMAT_P012_LE: case GST_VIDEO_FORMAT_P012_LE:
case GST_VIDEO_FORMAT_P016_LE: case GST_VIDEO_FORMAT_P016_LE:
case GST_VIDEO_FORMAT_YUY2: case GST_VIDEO_FORMAT_YUY2:
case GST_VIDEO_FORMAT_Y210:
case GST_VIDEO_FORMAT_Y212_LE:
{ {
gboolean supported = TRUE; gboolean supported = TRUE;

View file

@ -152,6 +152,7 @@ gst_d3d11_dxgi_format_get_size (DXGI_FORMAT format, guint width, guint height,
case DXGI_FORMAT_R8G8_B8G8_UNORM: case DXGI_FORMAT_R8G8_B8G8_UNORM:
case DXGI_FORMAT_Y210: case DXGI_FORMAT_Y210:
case DXGI_FORMAT_Y410: case DXGI_FORMAT_Y410:
case DXGI_FORMAT_Y216:
case DXGI_FORMAT_R16G16B16A16_UNORM: case DXGI_FORMAT_R16G16B16A16_UNORM:
offset[0] = 0; offset[0] = 0;
stride[0] = pitch; stride[0] = pitch;
@ -211,6 +212,8 @@ gst_d3d11_dxgi_format_to_gst (DXGI_FORMAT format)
return GST_VIDEO_FORMAT_P010_10LE; return GST_VIDEO_FORMAT_P010_10LE;
case DXGI_FORMAT_P016: case DXGI_FORMAT_P016:
return GST_VIDEO_FORMAT_P016_LE; return GST_VIDEO_FORMAT_P016_LE;
case DXGI_FORMAT_Y216:
return GST_VIDEO_FORMAT_Y212_LE;
default: default:
break; break;
} }
@ -289,6 +292,8 @@ gst_d3d11_dxgi_format_get_alignment (DXGI_FORMAT format)
case DXGI_FORMAT_NV12: case DXGI_FORMAT_NV12:
case DXGI_FORMAT_P010: case DXGI_FORMAT_P010:
case DXGI_FORMAT_P016: case DXGI_FORMAT_P016:
case DXGI_FORMAT_Y210:
case DXGI_FORMAT_Y216:
return 2; return 2;
default: default:
break; break;

View file

@ -63,6 +63,39 @@ void Execute (uint3 tid)
} }
#endif #endif
#ifdef BUILDING_CSMain_AYUV64_to_Y210
Texture2D<float4> inTex : register(t0);
RWTexture2D<uint4> outTex : register(u0);
void Execute (uint3 tid)
{
float4 val = inTex.Load (uint3(tid.x * 2, tid.y, 0));
uint3 scaled = val.yzw * 65535;
uint Y0 = scaled.x;
uint U = scaled.y;
uint V = scaled.z;
uint Y1 = inTex.Load (uint3(tid.x * 2 + 1, tid.y, 0)).y * 65535;
outTex[tid.xy] = uint4(Y0, U, Y1, V);
}
#endif
#ifdef BUILDING_CSMain_Y210_to_AYUV64
Texture2D<uint4> inTex : register(t0);
RWTexture2D<unorm float4> outTex : register(u0);
void Execute (uint3 tid)
{
float4 val = inTex.Load (tid) / 65535.0;
float Y0 = val.r;
float U = val.g;
float Y1 = val.b;
float V = val.a;
outTex[uint2(tid.x * 2, tid.y)] = float4 (1.0, Y0, U, V);
outTex[uint2(tid.x * 2 + 1, tid.y)] = float4 (1.0, Y1, U, V);
}
#endif
[numthreads(8, 8, 1)] [numthreads(8, 8, 1)]
void ENTRY_POINT (uint3 tid : SV_DispatchThreadID) void ENTRY_POINT (uint3 tid : SV_DispatchThreadID)
{ {
@ -115,6 +148,39 @@ static const char g_CSMain_converter_str[] =
"}\n" "}\n"
"#endif\n" "#endif\n"
"\n" "\n"
"#ifdef BUILDING_CSMain_AYUV64_to_Y210\n"
"Texture2D<float4> inTex : register(t0);\n"
"RWTexture2D<uint4> outTex : register(u0);\n"
"\n"
"void Execute (uint3 tid)\n"
"{\n"
" float4 val = inTex.Load (uint3(tid.x * 2, tid.y, 0));\n"
" uint3 scaled = val.yzw * 65535;\n"
" uint Y0 = scaled.x;\n"
" uint U = scaled.y;\n"
" uint V = scaled.z;\n"
" uint Y1 = inTex.Load (uint3(tid.x * 2 + 1, tid.y, 0)).y * 65535;\n"
" outTex[tid.xy] = uint4(Y0, U, Y1, V);\n"
"}\n"
"#endif\n"
"\n"
"#ifdef BUILDING_CSMain_Y210_to_AYUV64\n"
"Texture2D<uint4> inTex : register(t0);\n"
"RWTexture2D<unorm float4> outTex : register(u0);\n"
"\n"
"void Execute (uint3 tid)\n"
"{\n"
" float4 val = inTex.Load (tid) / 65535.0;\n"
" float Y0 = val.r;\n"
" float U = val.g;\n"
" float Y1 = val.b;\n"
" float V = val.a;\n"
"\n"
" outTex[uint2(tid.x * 2, tid.y)] = float4 (1.0, Y0, U, V);\n"
" outTex[uint2(tid.x * 2 + 1, tid.y)] = float4 (1.0, Y1, U, V);\n"
"}\n"
"#endif\n"
"\n"
"[numthreads(8, 8, 1)]\n" "[numthreads(8, 8, 1)]\n"
"void ENTRY_POINT (uint3 tid : SV_DispatchThreadID)\n" "void ENTRY_POINT (uint3 tid : SV_DispatchThreadID)\n"
"{\n" "{\n"

View file

@ -131,6 +131,8 @@ hlsl_cs_entry_points = [
'CSMain_AYUV64_to_Y410', 'CSMain_AYUV64_to_Y410',
'CSMain_VUYA_to_YUY2', 'CSMain_VUYA_to_YUY2',
'CSMain_YUY2_to_VUYA', 'CSMain_YUY2_to_VUYA',
'CSMain_AYUV64_to_Y210',
'CSMain_Y210_to_AYUV64',
] ]
foreach shader : hlsl_cs_entry_points foreach shader : hlsl_cs_entry_points