mirror of
https://gitlab.freedesktop.org/gstreamer/gstreamer.git
synced 2024-12-29 11:40:38 +00:00
f52ecb9607
Precompile pixel shaders for simple conversion path (without gamma/primaries conversion) in case of MSVC build. Even if runtime compile is required (cross-compiled or complex conversion path), do it only once and reuse the compiled bytecode. This precompile/caching can save about 95% of time taken by gst_d3d11_converter_new() call. Fixes: https://gitlab.freedesktop.org/gstreamer/gstreamer/-/issues/3004 Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/5457>
438 lines
13 KiB
C++
438 lines
13 KiB
C++
/* GStreamer
|
|
* Copyright (C) 2023 Seungha Yang <seungha@centricular.com>
|
|
*
|
|
* This library is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Library General Public
|
|
* License as published by the Free Software Foundation; either
|
|
* version 2 of the License, or (at your option) any later version.
|
|
*
|
|
* This library is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Library General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Library General Public
|
|
* License along with this library; if not, write to the
|
|
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
|
|
* Boston, MA 02110-1301, USA.
|
|
*/
|
|
|
|
#ifdef HAVE_CONFIG_H
|
|
#include "config.h"
|
|
#endif
|
|
|
|
#include "gstd3d11converterbuilder.h"
|
|
#include "gstd3d11device-private.h"
|
|
#include "gstd3d11-private.h"
|
|
#include <map>
|
|
#include <mutex>
|
|
#include <string>
|
|
#include <utility>
|
|
#include <memory>
|
|
|
|
/* *INDENT-OFF* */
|
|
using namespace Microsoft::WRL;
|
|
|
|
struct ConverterPSSource
|
|
{
|
|
gint64 token;
|
|
std::string entry_point;
|
|
const BYTE *bytecode;
|
|
SIZE_T bytecode_size;
|
|
std::vector<std::pair<std::string, std::string>> macros;
|
|
guint num_rtv;
|
|
};
|
|
|
|
enum class PS_OUTPUT
|
|
{
|
|
PACKED,
|
|
LUMA,
|
|
CHROMA,
|
|
CHROMA_PLANAR,
|
|
PLANAR,
|
|
PLANAR_FULL,
|
|
};
|
|
|
|
static std::map<std::string, std::shared_ptr<ConverterPSSource>> ps_source_cache;
|
|
static std::mutex cache_lock;
|
|
#ifdef HLSL_PRECOMPILED
|
|
#include "PSMainConverter.h"
|
|
#include "VSMain_converter.h"
|
|
#else
|
|
static const std::map<std::string, std::pair<const BYTE *, SIZE_T>> precompiled_bytecode;
|
|
#endif
|
|
|
|
#include "hlsl/PSMain_converter.hlsl"
|
|
#include "hlsl/VSMain_converter.hlsl"
|
|
|
|
static const std::string
|
|
ps_output_to_string (PS_OUTPUT output)
|
|
{
|
|
switch (output) {
|
|
case PS_OUTPUT::PACKED:
|
|
return "PS_OUTPUT_PACKED";
|
|
case PS_OUTPUT::LUMA:
|
|
return "PS_OUTPUT_LUMA";
|
|
case PS_OUTPUT::CHROMA:
|
|
return "PS_OUTPUT_CHROMA";
|
|
case PS_OUTPUT::CHROMA_PLANAR:
|
|
return "PS_OUTPUT_CHROMA_PLANAR";
|
|
case PS_OUTPUT::PLANAR:
|
|
return "PS_OUTPUT_PLANAR";
|
|
case PS_OUTPUT::PLANAR_FULL:
|
|
return "PS_OUTPUT_PLANAR_FULL";
|
|
default:
|
|
g_assert_not_reached ();
|
|
break;
|
|
}
|
|
|
|
return "";
|
|
}
|
|
|
|
static guint
|
|
ps_output_get_num_rtv (PS_OUTPUT output)
|
|
{
|
|
switch (output) {
|
|
case PS_OUTPUT::PACKED:
|
|
case PS_OUTPUT::LUMA:
|
|
case PS_OUTPUT::CHROMA:
|
|
return 1;
|
|
case PS_OUTPUT::CHROMA_PLANAR:
|
|
return 2;
|
|
case PS_OUTPUT::PLANAR:
|
|
return 3;
|
|
case PS_OUTPUT::PLANAR_FULL:
|
|
return 4;
|
|
default:
|
|
g_assert_not_reached ();
|
|
break;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static std::string
|
|
make_input (GstVideoFormat format, gboolean premul)
|
|
{
|
|
switch (format) {
|
|
case GST_VIDEO_FORMAT_RGBA:
|
|
case GST_VIDEO_FORMAT_RGBA64_LE:
|
|
case GST_VIDEO_FORMAT_RGB10A2_LE:
|
|
case GST_VIDEO_FORMAT_BGRA:
|
|
if (premul)
|
|
return "RGBAPremul";
|
|
return "RGBA";
|
|
case GST_VIDEO_FORMAT_RGBx:
|
|
case GST_VIDEO_FORMAT_BGRx:
|
|
return "RGBx";
|
|
case GST_VIDEO_FORMAT_VUYA:
|
|
if (premul)
|
|
return "VUYAPremul";
|
|
return "VUYA";
|
|
case GST_VIDEO_FORMAT_AYUV:
|
|
case GST_VIDEO_FORMAT_AYUV64:
|
|
return "AYUV";
|
|
case GST_VIDEO_FORMAT_NV12:
|
|
case GST_VIDEO_FORMAT_P010_10LE:
|
|
case GST_VIDEO_FORMAT_P012_LE:
|
|
case GST_VIDEO_FORMAT_P016_LE:
|
|
return "NV12";
|
|
case GST_VIDEO_FORMAT_NV21:
|
|
return "NV21";
|
|
case GST_VIDEO_FORMAT_I420:
|
|
case GST_VIDEO_FORMAT_Y42B:
|
|
case GST_VIDEO_FORMAT_Y444:
|
|
case GST_VIDEO_FORMAT_Y444_16LE:
|
|
return "I420";
|
|
case GST_VIDEO_FORMAT_YV12:
|
|
return "YV12";
|
|
case GST_VIDEO_FORMAT_I420_10LE:
|
|
case GST_VIDEO_FORMAT_I422_10LE:
|
|
case GST_VIDEO_FORMAT_Y444_10LE:
|
|
return "I420_10";
|
|
case GST_VIDEO_FORMAT_I420_12LE:
|
|
case GST_VIDEO_FORMAT_I422_12LE:
|
|
case GST_VIDEO_FORMAT_Y444_12LE:
|
|
return "I420_12";
|
|
case GST_VIDEO_FORMAT_Y410:
|
|
return "Y410";
|
|
case GST_VIDEO_FORMAT_GRAY8:
|
|
case GST_VIDEO_FORMAT_GRAY16_LE:
|
|
return "GRAY";
|
|
case GST_VIDEO_FORMAT_RGBP:
|
|
return "RGBP";
|
|
case GST_VIDEO_FORMAT_BGRP:
|
|
return "BGRP";
|
|
case GST_VIDEO_FORMAT_GBR:
|
|
case GST_VIDEO_FORMAT_GBR_16LE:
|
|
return "GBR";
|
|
case GST_VIDEO_FORMAT_GBR_10LE:
|
|
return "GBR_10";
|
|
case GST_VIDEO_FORMAT_GBR_12LE:
|
|
return "GBR_12";
|
|
case GST_VIDEO_FORMAT_GBRA:
|
|
if (premul)
|
|
return "GBRAPremul";
|
|
return "GBRA";
|
|
case GST_VIDEO_FORMAT_GBRA_10LE:
|
|
if (premul)
|
|
return "GBRAPremul_10";
|
|
return "GBRA_10";
|
|
case GST_VIDEO_FORMAT_GBRA_12LE:
|
|
if (premul)
|
|
return "GBRAPremul_12";
|
|
return "GBRA_12";
|
|
default:
|
|
g_assert_not_reached ();
|
|
break;
|
|
}
|
|
|
|
return "";
|
|
}
|
|
|
|
static std::vector<std::pair<PS_OUTPUT, std::string>>
|
|
make_output (GstVideoFormat format, gboolean premul)
|
|
{
|
|
std::vector<std::pair<PS_OUTPUT, std::string>> ret;
|
|
|
|
switch (format) {
|
|
case GST_VIDEO_FORMAT_RGBA:
|
|
case GST_VIDEO_FORMAT_RGBA64_LE:
|
|
case GST_VIDEO_FORMAT_RGB10A2_LE:
|
|
case GST_VIDEO_FORMAT_BGRA:
|
|
if (premul)
|
|
ret.push_back(std::make_pair(PS_OUTPUT::PACKED, "RGBAPremul"));
|
|
else
|
|
ret.push_back(std::make_pair(PS_OUTPUT::PACKED, "RGBA"));
|
|
break;
|
|
case GST_VIDEO_FORMAT_RGBx:
|
|
case GST_VIDEO_FORMAT_BGRx:
|
|
ret.push_back(std::make_pair(PS_OUTPUT::PACKED, "RGBx"));
|
|
break;
|
|
case GST_VIDEO_FORMAT_VUYA:
|
|
if (premul)
|
|
ret.push_back(std::make_pair(PS_OUTPUT::PACKED, "VUYAPremul"));
|
|
else
|
|
ret.push_back(std::make_pair(PS_OUTPUT::PACKED, "VUYA"));
|
|
break;
|
|
case GST_VIDEO_FORMAT_AYUV:
|
|
case GST_VIDEO_FORMAT_AYUV64:
|
|
ret.push_back(std::make_pair(PS_OUTPUT::PACKED, "AYUV"));
|
|
break;
|
|
case GST_VIDEO_FORMAT_NV12:
|
|
case GST_VIDEO_FORMAT_P010_10LE:
|
|
case GST_VIDEO_FORMAT_P012_LE:
|
|
case GST_VIDEO_FORMAT_P016_LE:
|
|
ret.push_back(std::make_pair(PS_OUTPUT::LUMA, "Luma"));
|
|
ret.push_back(std::make_pair(PS_OUTPUT::CHROMA, "ChromaNV12"));
|
|
break;
|
|
case GST_VIDEO_FORMAT_NV21:
|
|
ret.push_back(std::make_pair(PS_OUTPUT::LUMA, "Luma"));
|
|
ret.push_back(std::make_pair(PS_OUTPUT::CHROMA, "ChromaNV21"));
|
|
break;
|
|
case GST_VIDEO_FORMAT_I420:
|
|
case GST_VIDEO_FORMAT_Y42B:
|
|
ret.push_back(std::make_pair(PS_OUTPUT::LUMA, "Luma"));
|
|
ret.push_back(std::make_pair(PS_OUTPUT::CHROMA_PLANAR, "ChromaI420"));
|
|
break;
|
|
case GST_VIDEO_FORMAT_Y444:
|
|
case GST_VIDEO_FORMAT_Y444_16LE:
|
|
ret.push_back(std::make_pair(PS_OUTPUT::PLANAR, "Y444"));
|
|
break;
|
|
case GST_VIDEO_FORMAT_YV12:
|
|
ret.push_back(std::make_pair(PS_OUTPUT::LUMA, "Luma"));
|
|
ret.push_back(std::make_pair(PS_OUTPUT::CHROMA_PLANAR, "ChromaYV12"));
|
|
break;
|
|
case GST_VIDEO_FORMAT_I420_10LE:
|
|
case GST_VIDEO_FORMAT_I422_10LE:
|
|
ret.push_back(std::make_pair(PS_OUTPUT::LUMA, "Luma_10"));
|
|
ret.push_back(std::make_pair(PS_OUTPUT::CHROMA_PLANAR, "ChromaI420_10"));
|
|
break;
|
|
case GST_VIDEO_FORMAT_Y444_10LE:
|
|
ret.push_back(std::make_pair(PS_OUTPUT::PLANAR, "Y444_10"));
|
|
break;
|
|
case GST_VIDEO_FORMAT_I420_12LE:
|
|
case GST_VIDEO_FORMAT_I422_12LE:
|
|
ret.push_back(std::make_pair(PS_OUTPUT::LUMA, "Luma_12"));
|
|
ret.push_back(std::make_pair(PS_OUTPUT::CHROMA_PLANAR, "ChromaI420_12"));
|
|
break;
|
|
case GST_VIDEO_FORMAT_Y444_12LE:
|
|
ret.push_back(std::make_pair(PS_OUTPUT::PLANAR, "Y444_12"));
|
|
break;
|
|
case GST_VIDEO_FORMAT_GRAY8:
|
|
case GST_VIDEO_FORMAT_GRAY16_LE:
|
|
ret.push_back(std::make_pair(PS_OUTPUT::LUMA, "Luma"));
|
|
break;
|
|
case GST_VIDEO_FORMAT_RGBP:
|
|
ret.push_back(std::make_pair(PS_OUTPUT::PLANAR, "RGBP"));
|
|
break;
|
|
case GST_VIDEO_FORMAT_BGRP:
|
|
ret.push_back(std::make_pair(PS_OUTPUT::PLANAR, "BGRP"));
|
|
break;
|
|
case GST_VIDEO_FORMAT_GBR:
|
|
case GST_VIDEO_FORMAT_GBR_16LE:
|
|
ret.push_back(std::make_pair(PS_OUTPUT::PLANAR, "GBR"));
|
|
break;
|
|
case GST_VIDEO_FORMAT_GBR_10LE:
|
|
ret.push_back(std::make_pair(PS_OUTPUT::PLANAR, "GBR_10"));
|
|
break;
|
|
case GST_VIDEO_FORMAT_GBR_12LE:
|
|
ret.push_back(std::make_pair(PS_OUTPUT::PLANAR, "GBR_12"));
|
|
break;
|
|
case GST_VIDEO_FORMAT_GBRA:
|
|
if (premul)
|
|
ret.push_back(std::make_pair(PS_OUTPUT::PLANAR_FULL, "GBRAPremul"));
|
|
else
|
|
ret.push_back(std::make_pair(PS_OUTPUT::PLANAR_FULL, "GBRA"));
|
|
break;
|
|
case GST_VIDEO_FORMAT_GBRA_10LE:
|
|
if (premul)
|
|
ret.push_back(std::make_pair(PS_OUTPUT::PLANAR_FULL, "GBRAPremul_10"));
|
|
else
|
|
ret.push_back(std::make_pair(PS_OUTPUT::PLANAR_FULL, "GBRA_10"));
|
|
break;
|
|
case GST_VIDEO_FORMAT_GBRA_12LE:
|
|
if (premul)
|
|
ret.push_back(std::make_pair(PS_OUTPUT::PLANAR_FULL, "GBRAPremul_12"));
|
|
else
|
|
ret.push_back(std::make_pair(PS_OUTPUT::PLANAR_FULL, "GBRA_12"));
|
|
break;
|
|
default:
|
|
g_assert_not_reached ();
|
|
break;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
PixelShaderList
|
|
gst_d3d11_get_converter_pixel_shader (GstD3D11Device * device,
|
|
GstVideoFormat in_format, GstVideoFormat out_format, gboolean in_premul,
|
|
gboolean out_premul, CONVERT_TYPE type)
|
|
{
|
|
HRESULT hr;
|
|
auto input = make_input (in_format, in_premul);
|
|
auto output = make_output (out_format, out_premul);
|
|
std::string conv_type;
|
|
PixelShaderList ret;
|
|
|
|
switch (type) {
|
|
case CONVERT_TYPE::IDENTITY:
|
|
conv_type = "Identity";
|
|
break;
|
|
case CONVERT_TYPE::SIMPLE:
|
|
conv_type = "Simple";
|
|
break;
|
|
case CONVERT_TYPE::RANGE:
|
|
conv_type = "Range";
|
|
break;
|
|
case CONVERT_TYPE::GAMMA:
|
|
conv_type = "Gamma";
|
|
break;
|
|
case CONVERT_TYPE::PRIMARY:
|
|
conv_type = "Primary";
|
|
break;
|
|
}
|
|
|
|
for (const auto & it : output) {
|
|
std::string entry_point = "PSMain_" + input + "_" + conv_type + "_" +
|
|
it.second;
|
|
std::shared_ptr<ConverterPSSource> source;
|
|
std::vector<D3D_SHADER_MACRO> macros;
|
|
ComPtr<ID3D11PixelShader> shader;
|
|
cache_lock.lock ();
|
|
auto cached = ps_source_cache.find(entry_point);
|
|
if (cached != ps_source_cache.end()) {
|
|
source = cached->second;
|
|
} else {
|
|
source = std::make_shared<ConverterPSSource> ();
|
|
source->token = gst_d3d11_pixel_shader_token_new ();
|
|
source->entry_point = entry_point;
|
|
auto precompiled = precompiled_bytecode.find (entry_point);
|
|
if (precompiled != precompiled_bytecode.end ()) {
|
|
source->bytecode = precompiled->second.first;
|
|
source->bytecode_size = precompiled->second.second;
|
|
} else {
|
|
source->bytecode = nullptr;
|
|
source->bytecode_size = 0;
|
|
}
|
|
|
|
source->num_rtv = ps_output_get_num_rtv (it.first);
|
|
|
|
source->macros.push_back(std::make_pair("ENTRY_POINT", entry_point));
|
|
source->macros.push_back(std::make_pair("SAMPLER", "Sampler" + input));
|
|
source->macros.push_back(std::make_pair("CONVERTER",
|
|
"Converter" + conv_type));
|
|
source->macros.push_back(std::make_pair("OUTPUT_TYPE",
|
|
ps_output_to_string(it.first)));
|
|
source->macros.push_back(std::make_pair("OUTPUT_BUILDER",
|
|
"Output" + it.second));
|
|
ps_source_cache[entry_point] = source;
|
|
}
|
|
cache_lock.unlock ();
|
|
|
|
for (const auto & defines : source->macros)
|
|
macros.push_back({defines.first.c_str (), defines.second.c_str ()});
|
|
|
|
macros.push_back({nullptr, nullptr});
|
|
|
|
hr = gst_d3d11_device_get_pixel_shader_uncached (device, source->token,
|
|
source->bytecode, source->bytecode_size, g_PSMain_converter_str,
|
|
sizeof (g_PSMain_converter_str), source->entry_point.c_str (),
|
|
¯os[0], &shader);
|
|
if (FAILED (hr)) {
|
|
ret.clear ();
|
|
return ret;
|
|
}
|
|
|
|
auto ps = std::make_shared<PixelShader> ();
|
|
ps->shader = shader;
|
|
ps->num_rtv = source->num_rtv;
|
|
|
|
ret.push_back (ps);
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
/* *INDENT-ON* */
|
|
|
|
HRESULT
|
|
gst_d3d11_get_converter_vertex_shader (GstD3D11Device * device,
|
|
ID3D11VertexShader ** vs, ID3D11InputLayout ** layout)
|
|
{
|
|
static gint64 token = 0;
|
|
const void *bytecode = nullptr;
|
|
gsize bytecode_size = 0;
|
|
|
|
GST_D3D11_CALL_ONCE_BEGIN {
|
|
token = gst_d3d11_vertex_shader_token_new ();
|
|
} GST_D3D11_CALL_ONCE_END;
|
|
|
|
#ifdef HLSL_PRECOMPILED
|
|
bytecode = g_VSMain_converter;
|
|
bytecode_size = sizeof (g_VSMain_converter);
|
|
#endif
|
|
|
|
D3D11_INPUT_ELEMENT_DESC input_desc[2];
|
|
|
|
input_desc[0].SemanticName = "POSITION";
|
|
input_desc[0].SemanticIndex = 0;
|
|
input_desc[0].Format = DXGI_FORMAT_R32G32B32_FLOAT;
|
|
input_desc[0].InputSlot = 0;
|
|
input_desc[0].AlignedByteOffset = D3D11_APPEND_ALIGNED_ELEMENT;
|
|
input_desc[0].InputSlotClass = D3D11_INPUT_PER_VERTEX_DATA;
|
|
input_desc[0].InstanceDataStepRate = 0;
|
|
|
|
input_desc[1].SemanticName = "TEXCOORD";
|
|
input_desc[1].SemanticIndex = 0;
|
|
input_desc[1].Format = DXGI_FORMAT_R32G32_FLOAT;
|
|
input_desc[1].InputSlot = 0;
|
|
input_desc[1].AlignedByteOffset = D3D11_APPEND_ALIGNED_ELEMENT;
|
|
input_desc[1].InputSlotClass = D3D11_INPUT_PER_VERTEX_DATA;
|
|
input_desc[1].InstanceDataStepRate = 0;
|
|
|
|
return gst_d3d11_device_get_vertex_shader (device, token,
|
|
bytecode, bytecode_size, g_VSMain_converter_str,
|
|
sizeof (g_VSMain_converter_str), "VSMain_converter", input_desc,
|
|
G_N_ELEMENTS (input_desc), vs, layout);
|
|
}
|