d3d11converter: Add support for HLSL precompile and bytecode caching

Precompile pixel shaders for simple conversion path
(without gamma/primaries conversion) in case of MSVC build.
Even if runtime compile is required (cross-compiled or complex conversion
path), do it only once and reuse the compiled bytecode.

This precompile/caching can save about 95% of time taken by
gst_d3d11_converter_new() call.

Fixes: https://gitlab.freedesktop.org/gstreamer/gstreamer/-/issues/3004
Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/5457>
This commit is contained in:
Seungha Yang 2023-10-10 19:40:36 +09:00 committed by GStreamer Marge Bot
parent 83a576e854
commit f52ecb9607
15 changed files with 2955 additions and 1686 deletions

View file

@ -160,6 +160,9 @@ typedef struct _GstD3D11ColorMatrix
gdouble max[3]; gdouble max[3];
} GstD3D11ColorMatrix; } GstD3D11ColorMatrix;
GST_D3D11_API
void gst_d3d11_color_matrix_init (GstD3D11ColorMatrix * matrix);
GST_D3D11_API GST_D3D11_API
gchar * gst_d3d11_dump_color_matrix (GstD3D11ColorMatrix * matrix); gchar * gst_d3d11_dump_color_matrix (GstD3D11ColorMatrix * matrix);

View file

@ -0,0 +1,438 @@
/* GStreamer
* Copyright (C) 2023 Seungha Yang <seungha@centricular.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "gstd3d11converterbuilder.h"
#include "gstd3d11device-private.h"
#include "gstd3d11-private.h"
#include <map>
#include <mutex>
#include <string>
#include <utility>
#include <memory>
/* *INDENT-OFF* */
using namespace Microsoft::WRL;
struct ConverterPSSource
{
gint64 token;
std::string entry_point;
const BYTE *bytecode;
SIZE_T bytecode_size;
std::vector<std::pair<std::string, std::string>> macros;
guint num_rtv;
};
enum class PS_OUTPUT
{
PACKED,
LUMA,
CHROMA,
CHROMA_PLANAR,
PLANAR,
PLANAR_FULL,
};
static std::map<std::string, std::shared_ptr<ConverterPSSource>> ps_source_cache;
static std::mutex cache_lock;
#ifdef HLSL_PRECOMPILED
#include "PSMainConverter.h"
#include "VSMain_converter.h"
#else
static const std::map<std::string, std::pair<const BYTE *, SIZE_T>> precompiled_bytecode;
#endif
#include "hlsl/PSMain_converter.hlsl"
#include "hlsl/VSMain_converter.hlsl"
static const std::string
ps_output_to_string (PS_OUTPUT output)
{
switch (output) {
case PS_OUTPUT::PACKED:
return "PS_OUTPUT_PACKED";
case PS_OUTPUT::LUMA:
return "PS_OUTPUT_LUMA";
case PS_OUTPUT::CHROMA:
return "PS_OUTPUT_CHROMA";
case PS_OUTPUT::CHROMA_PLANAR:
return "PS_OUTPUT_CHROMA_PLANAR";
case PS_OUTPUT::PLANAR:
return "PS_OUTPUT_PLANAR";
case PS_OUTPUT::PLANAR_FULL:
return "PS_OUTPUT_PLANAR_FULL";
default:
g_assert_not_reached ();
break;
}
return "";
}
static guint
ps_output_get_num_rtv (PS_OUTPUT output)
{
switch (output) {
case PS_OUTPUT::PACKED:
case PS_OUTPUT::LUMA:
case PS_OUTPUT::CHROMA:
return 1;
case PS_OUTPUT::CHROMA_PLANAR:
return 2;
case PS_OUTPUT::PLANAR:
return 3;
case PS_OUTPUT::PLANAR_FULL:
return 4;
default:
g_assert_not_reached ();
break;
}
return 0;
}
static std::string
make_input (GstVideoFormat format, gboolean premul)
{
switch (format) {
case GST_VIDEO_FORMAT_RGBA:
case GST_VIDEO_FORMAT_RGBA64_LE:
case GST_VIDEO_FORMAT_RGB10A2_LE:
case GST_VIDEO_FORMAT_BGRA:
if (premul)
return "RGBAPremul";
return "RGBA";
case GST_VIDEO_FORMAT_RGBx:
case GST_VIDEO_FORMAT_BGRx:
return "RGBx";
case GST_VIDEO_FORMAT_VUYA:
if (premul)
return "VUYAPremul";
return "VUYA";
case GST_VIDEO_FORMAT_AYUV:
case GST_VIDEO_FORMAT_AYUV64:
return "AYUV";
case GST_VIDEO_FORMAT_NV12:
case GST_VIDEO_FORMAT_P010_10LE:
case GST_VIDEO_FORMAT_P012_LE:
case GST_VIDEO_FORMAT_P016_LE:
return "NV12";
case GST_VIDEO_FORMAT_NV21:
return "NV21";
case GST_VIDEO_FORMAT_I420:
case GST_VIDEO_FORMAT_Y42B:
case GST_VIDEO_FORMAT_Y444:
case GST_VIDEO_FORMAT_Y444_16LE:
return "I420";
case GST_VIDEO_FORMAT_YV12:
return "YV12";
case GST_VIDEO_FORMAT_I420_10LE:
case GST_VIDEO_FORMAT_I422_10LE:
case GST_VIDEO_FORMAT_Y444_10LE:
return "I420_10";
case GST_VIDEO_FORMAT_I420_12LE:
case GST_VIDEO_FORMAT_I422_12LE:
case GST_VIDEO_FORMAT_Y444_12LE:
return "I420_12";
case GST_VIDEO_FORMAT_Y410:
return "Y410";
case GST_VIDEO_FORMAT_GRAY8:
case GST_VIDEO_FORMAT_GRAY16_LE:
return "GRAY";
case GST_VIDEO_FORMAT_RGBP:
return "RGBP";
case GST_VIDEO_FORMAT_BGRP:
return "BGRP";
case GST_VIDEO_FORMAT_GBR:
case GST_VIDEO_FORMAT_GBR_16LE:
return "GBR";
case GST_VIDEO_FORMAT_GBR_10LE:
return "GBR_10";
case GST_VIDEO_FORMAT_GBR_12LE:
return "GBR_12";
case GST_VIDEO_FORMAT_GBRA:
if (premul)
return "GBRAPremul";
return "GBRA";
case GST_VIDEO_FORMAT_GBRA_10LE:
if (premul)
return "GBRAPremul_10";
return "GBRA_10";
case GST_VIDEO_FORMAT_GBRA_12LE:
if (premul)
return "GBRAPremul_12";
return "GBRA_12";
default:
g_assert_not_reached ();
break;
}
return "";
}
static std::vector<std::pair<PS_OUTPUT, std::string>>
make_output (GstVideoFormat format, gboolean premul)
{
std::vector<std::pair<PS_OUTPUT, std::string>> ret;
switch (format) {
case GST_VIDEO_FORMAT_RGBA:
case GST_VIDEO_FORMAT_RGBA64_LE:
case GST_VIDEO_FORMAT_RGB10A2_LE:
case GST_VIDEO_FORMAT_BGRA:
if (premul)
ret.push_back(std::make_pair(PS_OUTPUT::PACKED, "RGBAPremul"));
else
ret.push_back(std::make_pair(PS_OUTPUT::PACKED, "RGBA"));
break;
case GST_VIDEO_FORMAT_RGBx:
case GST_VIDEO_FORMAT_BGRx:
ret.push_back(std::make_pair(PS_OUTPUT::PACKED, "RGBx"));
break;
case GST_VIDEO_FORMAT_VUYA:
if (premul)
ret.push_back(std::make_pair(PS_OUTPUT::PACKED, "VUYAPremul"));
else
ret.push_back(std::make_pair(PS_OUTPUT::PACKED, "VUYA"));
break;
case GST_VIDEO_FORMAT_AYUV:
case GST_VIDEO_FORMAT_AYUV64:
ret.push_back(std::make_pair(PS_OUTPUT::PACKED, "AYUV"));
break;
case GST_VIDEO_FORMAT_NV12:
case GST_VIDEO_FORMAT_P010_10LE:
case GST_VIDEO_FORMAT_P012_LE:
case GST_VIDEO_FORMAT_P016_LE:
ret.push_back(std::make_pair(PS_OUTPUT::LUMA, "Luma"));
ret.push_back(std::make_pair(PS_OUTPUT::CHROMA, "ChromaNV12"));
break;
case GST_VIDEO_FORMAT_NV21:
ret.push_back(std::make_pair(PS_OUTPUT::LUMA, "Luma"));
ret.push_back(std::make_pair(PS_OUTPUT::CHROMA, "ChromaNV21"));
break;
case GST_VIDEO_FORMAT_I420:
case GST_VIDEO_FORMAT_Y42B:
ret.push_back(std::make_pair(PS_OUTPUT::LUMA, "Luma"));
ret.push_back(std::make_pair(PS_OUTPUT::CHROMA_PLANAR, "ChromaI420"));
break;
case GST_VIDEO_FORMAT_Y444:
case GST_VIDEO_FORMAT_Y444_16LE:
ret.push_back(std::make_pair(PS_OUTPUT::PLANAR, "Y444"));
break;
case GST_VIDEO_FORMAT_YV12:
ret.push_back(std::make_pair(PS_OUTPUT::LUMA, "Luma"));
ret.push_back(std::make_pair(PS_OUTPUT::CHROMA_PLANAR, "ChromaYV12"));
break;
case GST_VIDEO_FORMAT_I420_10LE:
case GST_VIDEO_FORMAT_I422_10LE:
ret.push_back(std::make_pair(PS_OUTPUT::LUMA, "Luma_10"));
ret.push_back(std::make_pair(PS_OUTPUT::CHROMA_PLANAR, "ChromaI420_10"));
break;
case GST_VIDEO_FORMAT_Y444_10LE:
ret.push_back(std::make_pair(PS_OUTPUT::PLANAR, "Y444_10"));
break;
case GST_VIDEO_FORMAT_I420_12LE:
case GST_VIDEO_FORMAT_I422_12LE:
ret.push_back(std::make_pair(PS_OUTPUT::LUMA, "Luma_12"));
ret.push_back(std::make_pair(PS_OUTPUT::CHROMA_PLANAR, "ChromaI420_12"));
break;
case GST_VIDEO_FORMAT_Y444_12LE:
ret.push_back(std::make_pair(PS_OUTPUT::PLANAR, "Y444_12"));
break;
case GST_VIDEO_FORMAT_GRAY8:
case GST_VIDEO_FORMAT_GRAY16_LE:
ret.push_back(std::make_pair(PS_OUTPUT::LUMA, "Luma"));
break;
case GST_VIDEO_FORMAT_RGBP:
ret.push_back(std::make_pair(PS_OUTPUT::PLANAR, "RGBP"));
break;
case GST_VIDEO_FORMAT_BGRP:
ret.push_back(std::make_pair(PS_OUTPUT::PLANAR, "BGRP"));
break;
case GST_VIDEO_FORMAT_GBR:
case GST_VIDEO_FORMAT_GBR_16LE:
ret.push_back(std::make_pair(PS_OUTPUT::PLANAR, "GBR"));
break;
case GST_VIDEO_FORMAT_GBR_10LE:
ret.push_back(std::make_pair(PS_OUTPUT::PLANAR, "GBR_10"));
break;
case GST_VIDEO_FORMAT_GBR_12LE:
ret.push_back(std::make_pair(PS_OUTPUT::PLANAR, "GBR_12"));
break;
case GST_VIDEO_FORMAT_GBRA:
if (premul)
ret.push_back(std::make_pair(PS_OUTPUT::PLANAR_FULL, "GBRAPremul"));
else
ret.push_back(std::make_pair(PS_OUTPUT::PLANAR_FULL, "GBRA"));
break;
case GST_VIDEO_FORMAT_GBRA_10LE:
if (premul)
ret.push_back(std::make_pair(PS_OUTPUT::PLANAR_FULL, "GBRAPremul_10"));
else
ret.push_back(std::make_pair(PS_OUTPUT::PLANAR_FULL, "GBRA_10"));
break;
case GST_VIDEO_FORMAT_GBRA_12LE:
if (premul)
ret.push_back(std::make_pair(PS_OUTPUT::PLANAR_FULL, "GBRAPremul_12"));
else
ret.push_back(std::make_pair(PS_OUTPUT::PLANAR_FULL, "GBRA_12"));
break;
default:
g_assert_not_reached ();
break;
}
return ret;
}
PixelShaderList
gst_d3d11_get_converter_pixel_shader (GstD3D11Device * device,
GstVideoFormat in_format, GstVideoFormat out_format, gboolean in_premul,
gboolean out_premul, CONVERT_TYPE type)
{
HRESULT hr;
auto input = make_input (in_format, in_premul);
auto output = make_output (out_format, out_premul);
std::string conv_type;
PixelShaderList ret;
switch (type) {
case CONVERT_TYPE::IDENTITY:
conv_type = "Identity";
break;
case CONVERT_TYPE::SIMPLE:
conv_type = "Simple";
break;
case CONVERT_TYPE::RANGE:
conv_type = "Range";
break;
case CONVERT_TYPE::GAMMA:
conv_type = "Gamma";
break;
case CONVERT_TYPE::PRIMARY:
conv_type = "Primary";
break;
}
for (const auto & it : output) {
std::string entry_point = "PSMain_" + input + "_" + conv_type + "_" +
it.second;
std::shared_ptr<ConverterPSSource> source;
std::vector<D3D_SHADER_MACRO> macros;
ComPtr<ID3D11PixelShader> shader;
cache_lock.lock ();
auto cached = ps_source_cache.find(entry_point);
if (cached != ps_source_cache.end()) {
source = cached->second;
} else {
source = std::make_shared<ConverterPSSource> ();
source->token = gst_d3d11_pixel_shader_token_new ();
source->entry_point = entry_point;
auto precompiled = precompiled_bytecode.find (entry_point);
if (precompiled != precompiled_bytecode.end ()) {
source->bytecode = precompiled->second.first;
source->bytecode_size = precompiled->second.second;
} else {
source->bytecode = nullptr;
source->bytecode_size = 0;
}
source->num_rtv = ps_output_get_num_rtv (it.first);
source->macros.push_back(std::make_pair("ENTRY_POINT", entry_point));
source->macros.push_back(std::make_pair("SAMPLER", "Sampler" + input));
source->macros.push_back(std::make_pair("CONVERTER",
"Converter" + conv_type));
source->macros.push_back(std::make_pair("OUTPUT_TYPE",
ps_output_to_string(it.first)));
source->macros.push_back(std::make_pair("OUTPUT_BUILDER",
"Output" + it.second));
ps_source_cache[entry_point] = source;
}
cache_lock.unlock ();
for (const auto & defines : source->macros)
macros.push_back({defines.first.c_str (), defines.second.c_str ()});
macros.push_back({nullptr, nullptr});
hr = gst_d3d11_device_get_pixel_shader_uncached (device, source->token,
source->bytecode, source->bytecode_size, g_PSMain_converter_str,
sizeof (g_PSMain_converter_str), source->entry_point.c_str (),
&macros[0], &shader);
if (FAILED (hr)) {
ret.clear ();
return ret;
}
auto ps = std::make_shared<PixelShader> ();
ps->shader = shader;
ps->num_rtv = source->num_rtv;
ret.push_back (ps);
}
return ret;
}
/* *INDENT-ON* */
HRESULT
gst_d3d11_get_converter_vertex_shader (GstD3D11Device * device,
ID3D11VertexShader ** vs, ID3D11InputLayout ** layout)
{
static gint64 token = 0;
const void *bytecode = nullptr;
gsize bytecode_size = 0;
GST_D3D11_CALL_ONCE_BEGIN {
token = gst_d3d11_vertex_shader_token_new ();
} GST_D3D11_CALL_ONCE_END;
#ifdef HLSL_PRECOMPILED
bytecode = g_VSMain_converter;
bytecode_size = sizeof (g_VSMain_converter);
#endif
D3D11_INPUT_ELEMENT_DESC input_desc[2];
input_desc[0].SemanticName = "POSITION";
input_desc[0].SemanticIndex = 0;
input_desc[0].Format = DXGI_FORMAT_R32G32B32_FLOAT;
input_desc[0].InputSlot = 0;
input_desc[0].AlignedByteOffset = D3D11_APPEND_ALIGNED_ELEMENT;
input_desc[0].InputSlotClass = D3D11_INPUT_PER_VERTEX_DATA;
input_desc[0].InstanceDataStepRate = 0;
input_desc[1].SemanticName = "TEXCOORD";
input_desc[1].SemanticIndex = 0;
input_desc[1].Format = DXGI_FORMAT_R32G32_FLOAT;
input_desc[1].InputSlot = 0;
input_desc[1].AlignedByteOffset = D3D11_APPEND_ALIGNED_ELEMENT;
input_desc[1].InputSlotClass = D3D11_INPUT_PER_VERTEX_DATA;
input_desc[1].InstanceDataStepRate = 0;
return gst_d3d11_device_get_vertex_shader (device, token,
bytecode, bytecode_size, g_VSMain_converter_str,
sizeof (g_VSMain_converter_str), "VSMain_converter", input_desc,
G_N_ELEMENTS (input_desc), vs, layout);
}

View file

@ -0,0 +1,57 @@
/* GStreamer
* Copyright (C) 2023 Seungha Yang <seungha@centricular.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
#pragma once
#include <gst/gst.h>
#include <gst/video/video.h>
#include <gst/d3d11/gstd3d11_fwd.h>
#include <vector>
#include <wrl.h>
#include <memory>
#include <vector>
enum class CONVERT_TYPE
{
IDENTITY,
SIMPLE,
RANGE,
GAMMA,
PRIMARY,
};
struct PixelShader
{
Microsoft::WRL::ComPtr<ID3D11PixelShader> shader;
guint num_rtv;
};
typedef std::vector<std::shared_ptr<PixelShader>> PixelShaderList;
PixelShaderList
gst_d3d11_get_converter_pixel_shader (GstD3D11Device * device,
GstVideoFormat in_format,
GstVideoFormat out_format,
gboolean in_premul,
gboolean out_premul,
CONVERT_TYPE type);
HRESULT gst_d3d11_get_converter_vertex_shader (GstD3D11Device * device,
ID3D11VertexShader ** vs,
ID3D11InputLayout ** layout);

View file

@ -38,8 +38,20 @@ HRESULT gst_d3d11_device_get_pixel_shader (GstD3D11Device * device,
const gchar * source, const gchar * source,
gsize source_size, gsize source_size,
const gchar * entry_point, const gchar * entry_point,
const D3D_SHADER_MACRO * defines,
ID3D11PixelShader ** ps); ID3D11PixelShader ** ps);
GST_D3D11_API
HRESULT gst_d3d11_device_get_pixel_shader_uncached (GstD3D11Device * device,
gint64 token,
const void * bytecode,
gsize bytecode_size,
const gchar * source,
gsize source_size,
const gchar * entry_point,
const D3D_SHADER_MACRO * defines,
ID3D11PixelShader ** ps);
GST_D3D11_API GST_D3D11_API
HRESULT gst_d3d11_device_get_vertex_shader (GstD3D11Device * device, HRESULT gst_d3d11_device_get_vertex_shader (GstD3D11Device * device,
gint64 token, gint64 token,

View file

@ -1704,10 +1704,73 @@ gst_d3d11_vertex_shader_token_new (void)
return token_.fetch_add (1); return token_.fetch_add (1);
} }
HRESULT
gst_d3d11_device_get_pixel_shader_uncached (GstD3D11Device * device,
gint64 token, const void *bytecode, gsize bytecode_size,
const gchar * source, gsize source_size, const gchar * entry_point,
const D3D_SHADER_MACRO * defines, ID3D11PixelShader ** ps)
{
GstD3D11DevicePrivate *priv = device->priv;
HRESULT hr;
ComPtr < ID3D11PixelShader > shader;
ComPtr < ID3DBlob > blob;
const void *data;
gsize size;
GST_LOG_OBJECT (device,
"Creating pixel shader for token %" G_GINT64_FORMAT ", source:\n%s",
token, source);
if (priv->feature_level >= D3D_FEATURE_LEVEL_11_0) {
if (bytecode && bytecode_size > 1) {
data = bytecode;
size = bytecode_size;
GST_DEBUG_OBJECT (device,
"Creating shader \"%s\" using precompiled bytecode", entry_point);
} else {
hr = gst_d3d11_shader_cache_get_pixel_shader_blob (token,
source, source_size, entry_point, defines, &blob);
if (!gst_d3d11_result (hr, device))
return hr;
data = blob->GetBufferPointer ();
size = blob->GetBufferSize ();
}
} else {
const gchar *target;
if (priv->feature_level >= D3D_FEATURE_LEVEL_10_0)
target = "ps_4_0";
else if (priv->feature_level >= D3D_FEATURE_LEVEL_9_3)
target = "ps_4_0_level_9_3";
else
target = "ps_4_0_level_9_1";
hr = gst_d3d11_compile (source, source_size, nullptr, defines,
nullptr, entry_point, target, 0, 0, &blob, nullptr);
if (!gst_d3d11_result (hr, device))
return hr;
data = blob->GetBufferPointer ();
size = blob->GetBufferSize ();
}
hr = priv->device->CreatePixelShader (data, size, nullptr, &shader);
if (!gst_d3d11_result (hr, device))
return hr;
GST_DEBUG_OBJECT (device,
"Created pixel shader \"%s\" for token %" G_GINT64_FORMAT,
entry_point, token);
*ps = shader.Detach ();
return S_OK;
}
HRESULT HRESULT
gst_d3d11_device_get_pixel_shader (GstD3D11Device * device, gint64 token, gst_d3d11_device_get_pixel_shader (GstD3D11Device * device, gint64 token,
const void *bytecode, gsize bytecode_len, const gchar * source, const void *bytecode, gsize bytecode_size, const gchar * source,
gsize source_size, const gchar * entry_point, ID3D11PixelShader ** ps) gsize source_size, const gchar * entry_point,
const D3D_SHADER_MACRO * defines, ID3D11PixelShader ** ps)
{ {
GstD3D11DevicePrivate *priv = device->priv; GstD3D11DevicePrivate *priv = device->priv;
HRESULT hr; HRESULT hr;
@ -1727,43 +1790,11 @@ gst_d3d11_device_get_pixel_shader (GstD3D11Device * device, gint64 token,
return S_OK; return S_OK;
} }
GST_LOG_OBJECT (device, hr = gst_d3d11_device_get_pixel_shader_uncached (device, token, bytecode,
"Creating pixel shader for token %" G_GINT64_FORMAT ", source:\n%s", bytecode_size, source, source_size, entry_point, defines, &shader);
token, source); if (!gst_d3d11_result (hr, device))
return hr;
if (priv->feature_level >= D3D_FEATURE_LEVEL_11_0) {
ComPtr < ID3DBlob > blob;
const void *data;
gsize size;
if (bytecode && bytecode_len > 1) {
data = bytecode;
size = bytecode_len;
GST_DEBUG_OBJECT (device,
"Creating shader \"%s\" using precompiled bytecode", entry_point);
} else {
hr = gst_d3d11_shader_cache_get_pixel_shader_blob (token,
source, source_size, entry_point, &blob);
if (!gst_d3d11_result (hr, device))
return hr;
data = blob->GetBufferPointer ();
size = blob->GetBufferSize ();
}
hr = priv->device->CreatePixelShader (data, size, nullptr, &shader);
if (!gst_d3d11_result (hr, device))
return hr;
} else {
hr = gst_d3d11_create_pixel_shader_simple (device, source, entry_point,
&shader);
if (!gst_d3d11_result (hr, device))
return hr;
}
GST_DEBUG_OBJECT (device,
"Created pixel shader \"%s\" for token %" G_GINT64_FORMAT,
entry_point, token);
priv->ps_cache[token] = shader; priv->ps_cache[token] = shader;
*ps = shader.Detach (); *ps = shader.Detach ();
@ -1772,7 +1803,7 @@ gst_d3d11_device_get_pixel_shader (GstD3D11Device * device, gint64 token,
HRESULT HRESULT
gst_d3d11_device_get_vertex_shader (GstD3D11Device * device, gint64 token, gst_d3d11_device_get_vertex_shader (GstD3D11Device * device, gint64 token,
const void *bytecode, gsize bytecode_len, const gchar * source, const void *bytecode, gsize bytecode_size, const gchar * source,
gsize source_size, const gchar * entry_point, gsize source_size, const gchar * entry_point,
const D3D11_INPUT_ELEMENT_DESC * input_desc, guint desc_len, const D3D11_INPUT_ELEMENT_DESC * input_desc, guint desc_len,
ID3D11VertexShader ** vs, ID3D11InputLayout ** layout) ID3D11VertexShader ** vs, ID3D11InputLayout ** layout)
@ -1807,9 +1838,9 @@ gst_d3d11_device_get_vertex_shader (GstD3D11Device * device, gint64 token,
const void *data; const void *data;
gsize size; gsize size;
if (bytecode && bytecode_len > 1) { if (bytecode && bytecode_size > 1) {
data = bytecode; data = bytecode;
size = bytecode_len; size = bytecode_size;
GST_DEBUG_OBJECT (device, GST_DEBUG_OBJECT (device,
"Creating shader \"%s\" using precompiled bytecode", entry_point); "Creating shader \"%s\" using precompiled bytecode", entry_point);
} else { } else {

View file

@ -987,6 +987,19 @@ color_matrix_identity (GstD3D11ColorMatrix * m)
} }
} }
void
gst_d3d11_color_matrix_init (GstD3D11ColorMatrix * matrix)
{
g_return_if_fail (matrix);
color_matrix_identity (matrix);
for (guint i = 0; i < 3; i++) {
matrix->min[i] = 0;
matrix->max[i] = 1;
matrix->offset[i] = 0;
}
}
static gboolean static gboolean
color_matrix_invert (GstD3D11ColorMatrix * dst, GstD3D11ColorMatrix * src) color_matrix_invert (GstD3D11ColorMatrix * dst, GstD3D11ColorMatrix * src)
{ {

View file

@ -36,7 +36,7 @@ static std::map <gint64, ID3DBlob *> vs_blob_;
HRESULT HRESULT
gst_d3d11_shader_cache_get_pixel_shader_blob (gint64 token, gst_d3d11_shader_cache_get_pixel_shader_blob (gint64 token,
const gchar * source, gsize source_size, const gchar * entry_point, const gchar * source, gsize source_size, const gchar * entry_point,
ID3DBlob ** blob) const D3D_SHADER_MACRO * defines, ID3DBlob ** blob)
{ {
std::lock_guard < std::mutex > lk (cache_lock_); std::lock_guard < std::mutex > lk (cache_lock_);
@ -47,7 +47,7 @@ gst_d3d11_shader_cache_get_pixel_shader_blob (gint64 token,
return S_OK; return S_OK;
} }
HRESULT hr = gst_d3d11_compile (source, source_size, nullptr, nullptr, HRESULT hr = gst_d3d11_compile (source, source_size, nullptr, defines,
nullptr, entry_point, "ps_5_0", 0, 0, blob, nullptr); nullptr, entry_point, "ps_5_0", 0, 0, blob, nullptr);
if (FAILED (hr)) if (FAILED (hr))
return hr; return hr;

View file

@ -28,6 +28,7 @@ HRESULT gst_d3d11_shader_cache_get_pixel_shader_blob (gint64 token,
const gchar * source, const gchar * source,
gsize source_size, gsize source_size,
const gchar * entry_point, const gchar * entry_point,
const D3D_SHADER_MACRO * defines,
ID3DBlob ** blob); ID3DBlob ** blob);
HRESULT gst_d3d11_shader_cache_get_vertex_shader_blob (gint64 token, HRESULT gst_d3d11_shader_cache_get_vertex_shader_blob (gint64 token,

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,55 @@
/* GStreamer
* Copyright (C) 2023 Seungha Yang <seungha@centricular.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
#ifdef BUILDING_HLSL
struct VS_INPUT
{
float4 Position : POSITION;
float2 Texture : TEXCOORD;
};
struct VS_OUTPUT
{
float4 Position : SV_POSITION;
float2 Texture : TEXCOORD;
};
VS_OUTPUT VSMain_converter (VS_INPUT input)
{
return input;
}
#else
static const char g_VSMain_converter_str[] =
"struct VS_INPUT\n"
"{\n"
" float4 Position : POSITION;\n"
" float2 Texture : TEXCOORD;\n"
"};\n"
"\n"
"struct VS_OUTPUT\n"
"{\n"
" float4 Position : SV_POSITION;\n"
" float2 Texture : TEXCOORD;\n"
"};\n"
"\n"
"VS_OUTPUT VSMain_converter (VS_INPUT input)\n"
"{\n"
" return input;\n"
"}\n";
#endif

View file

@ -0,0 +1,63 @@
#!/usr/bin/env python3
# GStreamer
# Copyright (C) 2023 Seungha Yang <seungha@centricular.com>
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Library General Public
# License as published by the Free Software Foundation; either
# version 2 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Library General Public License for more details.
#
# You should have received a copy of the GNU Library General Public
# License along with this library; if not, write to the
# Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
# Boston, MA 02110-1301, USA.
import sys
import os
import argparse
start_header = """/*
* This file is autogenerated by collect_hlsl_header.py
*/
#pragma once
"""
start_map = """
#define MAKE_BYTECODE(name) { G_STRINGIFY (name), { g_##name, sizeof (g_##name)} }
static const std::map<std::string, std::pair<const BYTE *, SIZE_T>> precompiled_bytecode = {
"""
end_map = """};
#undef MAKE_BYTECODE
"""
def main(args):
parser = argparse.ArgumentParser(description='Read precompiled HLSL headers from directory and make single header')
parser.add_argument("--input", help="the precompiled HLSL header directory")
parser.add_argument("--output", help="output header file location")
args = parser.parse_args(args)
# Scan precompiled PSMain_*.h headers in build directory
# and generate single header
hlsl_headers = [os.path.basename(file) for file in os.listdir(args.input) if file.startswith("PSMain_") and file.endswith(".h") ]
with open(args.output, 'w', newline='\n', encoding='utf8') as f:
f.write(start_header)
for file in hlsl_headers:
f.write("#include \"")
f.write(file)
f.write("\"\n")
f.write(start_map)
for file in hlsl_headers:
f.write(" MAKE_BYTECODE ({}),\n".format(os.path.splitext(file)[0]))
f.write(end_map)
if __name__ == "__main__":
sys.exit(main(sys.argv[1:]))

View file

@ -0,0 +1,128 @@
hlsl_ps_source = files('PSMain_converter.hlsl')
hlsl_ps_input_formats = [
['NV12', false],
['NV21', false],
['I420', false],
['YV12', false],
['I420_10', false],
['I420_12', false],
['VUYA', false],
['VUYAPremul', false],
['Y410', false],
['AYUV', false],
['AYUVPremul', false],
['RGBA', true],
['RGBAPremul', true],
['RGBx', true],
['GBR', true],
['GBR_10', true],
['GBR_12', true],
['GBRA', true],
['GBRAPremul', true],
['GBRA_10', true],
['GBRAPremul_10', true],
['GBRA_12', true],
['GBRAPremul_12', true],
['RGBP', true],
['BGRP', true],
]
hlsl_ps_output_formats = [
['PS_OUTPUT_LUMA', 'Luma', false],
['PS_OUTPUT_LUMA', 'Luma_10', false],
['PS_OUTPUT_LUMA', 'Luma_12', false],
['PS_OUTPUT_CHROMA', 'ChromaNV12', false],
['PS_OUTPUT_CHROMA', 'ChromaNV21', false],
['PS_OUTPUT_CHROMA_PLANAR', 'ChromaI420', false],
['PS_OUTPUT_CHROMA_PLANAR', 'ChromaYV12', false],
['PS_OUTPUT_CHROMA_PLANAR', 'ChromaI420_10', false],
['PS_OUTPUT_CHROMA_PLANAR', 'ChromaI420_12', false],
['PS_OUTPUT_PLANAR', 'Y444', false],
['PS_OUTPUT_PLANAR', 'Y444_10', false],
['PS_OUTPUT_PLANAR', 'Y444_12', false],
['PS_OUTPUT_PLANAR', 'GBR', true],
['PS_OUTPUT_PLANAR', 'GBR_10', true],
['PS_OUTPUT_PLANAR', 'GBR_12', true],
['PS_OUTPUT_PLANAR', 'RGBP', true],
['PS_OUTPUT_PLANAR', 'BGRP', true],
['PS_OUTPUT_PLANAR_FULL', 'GBRA', true],
['PS_OUTPUT_PLANAR_FULL', 'GBRAPremul', true],
['PS_OUTPUT_PLANAR_FULL', 'GBRA_10', true],
['PS_OUTPUT_PLANAR_FULL', 'GBRAPremul_10', true],
['PS_OUTPUT_PLANAR_FULL', 'GBRA_12', true],
['PS_OUTPUT_PLANAR_FULL', 'GBRAPremul_12', true],
['PS_OUTPUT_PACKED', 'RGBA', true],
['PS_OUTPUT_PACKED', 'RGBAPremul', true],
['PS_OUTPUT_PACKED', 'RGBx', true],
['PS_OUTPUT_PACKED', 'VUYA', false],
['PS_OUTPUT_PACKED', 'VUYAPremul', false],
['PS_OUTPUT_PACKED', 'AYUV', false],
['PS_OUTPUT_PACKED', 'AYUVPremul', false],
]
header_collector = find_program('collect_hlsl_header.py')
foreach input_format : hlsl_ps_input_formats
in_format = input_format.get(0)
foreach output_format : hlsl_ps_output_formats
converter = ''
if input_format.get(1) != output_format.get(2)
converter = 'Simple'
else
converter = 'Identity'
endif
output_type = output_format.get(0)
output_builder = output_format.get(1)
entry_point = 'PSMain_@0@_@1@_@2@'.format(in_format, converter, output_builder)
header = '@0@.h'.format(entry_point)
compiled_shader = custom_target(header,
input : hlsl_ps_source,
output : header,
command : [fxc, '/Fh', '@OUTPUT@',
'/E', entry_point,
'/T', 'ps_5_0',
'/D', 'BUILDING_HLSL=1',
'/D', 'OUTPUT_TYPE=@0@'.format(output_type),
'/D', 'ENTRY_POINT=@0@'.format(entry_point),
'/D', 'SAMPLER=Sampler@0@'.format(in_format),
'/D', 'CONVERTER=Converter@0@'.format(converter),
'/D', 'OUTPUT_BUILDER=Output@0@'.format(output_builder),
'/nologo',
'@INPUT@'])
hlsl_precompiled += [compiled_shader]
endforeach
endforeach
header_collection = 'PSMainConverter.h'
generated_collection = custom_target(header_collection,
input : hlsl_precompiled,
output : header_collection,
command : [header_collector,
'--input',
meson.current_build_dir(),
'--output',
'@OUTPUT@'
])
hlsl_precompiled += generated_collection
hlsl_vs_sources = [
[files('VSMain_converter.hlsl'), 'VSMain_converter', 'vs_5_0'],
]
foreach shader : hlsl_vs_sources
source = shader.get(0)
entry_point = shader.get(1)
header = '@0@.h'.format(entry_point)
compiled_shader = custom_target(header,
input : source,
output : header,
command : [fxc, '/Fh', '@OUTPUT@',
'/E', entry_point,
'/T', shader.get(2),
'/D', 'BUILDING_HLSL=1',
'/nologo',
'@INPUT@'])
hlsl_precompiled += [compiled_shader]
endforeach

View file

@ -2,6 +2,7 @@ d3d11_sources = [
'gstd3d11bufferpool.cpp', 'gstd3d11bufferpool.cpp',
'gstd3d11compile.cpp', 'gstd3d11compile.cpp',
'gstd3d11converter.cpp', 'gstd3d11converter.cpp',
'gstd3d11converterbuilder.cpp',
'gstd3d11device.cpp', 'gstd3d11device.cpp',
'gstd3d11format.cpp', 'gstd3d11format.cpp',
'gstd3d11memory.cpp', 'gstd3d11memory.cpp',
@ -167,6 +168,13 @@ if cc.get_id() != 'msvc'
extra_comm_args += extra_args extra_comm_args += extra_args
endif endif
hlsl_precompiled = []
fxc = find_program ('fxc', required : get_option ('d3d11-hlsl-precompile'))
if cc.get_id() == 'msvc' and fxc.found()
subdir('hlsl')
extra_comm_args += ['-DHLSL_PRECOMPILED']
endif
configure_file( configure_file(
input : 'gstd3d11config.h.meson', input : 'gstd3d11config.h.meson',
output : 'gstd3d11config.h', output : 'gstd3d11config.h',
@ -177,7 +185,7 @@ configure_file(
pkg_name = 'gstreamer-d3d11-' + api_version pkg_name = 'gstreamer-d3d11-' + api_version
gstd3d11 = library('gstd3d11-' + api_version, gstd3d11 = library('gstd3d11-' + api_version,
d3d11_sources, d3d11_sources + hlsl_precompiled,
c_args : gst_plugins_bad_args + extra_c_args + extra_comm_args, c_args : gst_plugins_bad_args + extra_c_args + extra_comm_args,
cpp_args : gst_plugins_bad_args + extra_comm_args, cpp_args : gst_plugins_bad_args + extra_comm_args,
include_directories : [configinc, libsinc], include_directories : [configinc, libsinc],

View file

@ -709,7 +709,7 @@ gst_d3d11_get_pixel_shader_checker_luma (GstD3D11Device * device,
return gst_d3d11_device_get_pixel_shader (device, token, return gst_d3d11_device_get_pixel_shader (device, token,
g_PSMain_checker_luma, sizeof (g_PSMain_checker_luma), g_PSMain_checker_luma, sizeof (g_PSMain_checker_luma),
g_PSMain_checker_luma_str, sizeof (g_PSMain_checker_luma_str), g_PSMain_checker_luma_str, sizeof (g_PSMain_checker_luma_str),
"PSMain_checker_luma", ps); "PSMain_checker_luma", nullptr, ps);
} }
HRESULT HRESULT
@ -725,7 +725,7 @@ gst_d3d11_get_pixel_shader_checker_rgb (GstD3D11Device * device,
return gst_d3d11_device_get_pixel_shader (device, token, return gst_d3d11_device_get_pixel_shader (device, token,
g_PSMain_checker_rgb, sizeof (g_PSMain_checker_rgb), g_PSMain_checker_rgb, sizeof (g_PSMain_checker_rgb),
g_PSMain_checker_rgb_str, sizeof (g_PSMain_checker_rgb_str), g_PSMain_checker_rgb_str, sizeof (g_PSMain_checker_rgb_str),
"PSMain_checker_rgb", ps); "PSMain_checker_rgb", nullptr, ps);
} }
HRESULT HRESULT
@ -741,7 +741,7 @@ gst_d3d11_get_pixel_shader_checker_vuya (GstD3D11Device * device,
return gst_d3d11_device_get_pixel_shader (device, token, return gst_d3d11_device_get_pixel_shader (device, token,
g_PSMain_checker_vuya, sizeof (g_PSMain_checker_vuya), g_PSMain_checker_vuya, sizeof (g_PSMain_checker_vuya),
g_PSMain_checker_vuya_str, sizeof (g_PSMain_checker_vuya_str), g_PSMain_checker_vuya_str, sizeof (g_PSMain_checker_vuya_str),
"PSMain_checker_vuya", ps); "PSMain_checker_vuya", nullptr, ps);
} }
HRESULT HRESULT
@ -757,7 +757,7 @@ gst_d3d11_get_pixel_shader_checker (GstD3D11Device * device,
return gst_d3d11_device_get_pixel_shader (device, token, return gst_d3d11_device_get_pixel_shader (device, token,
g_PSMain_checker, sizeof (g_PSMain_checker), g_PSMain_checker, sizeof (g_PSMain_checker),
g_PSMain_checker_str, sizeof (g_PSMain_checker_str), g_PSMain_checker_str, sizeof (g_PSMain_checker_str),
"PSMain_checker", ps); "PSMain_checker", nullptr, ps);
} }
HRESULT HRESULT
@ -772,7 +772,8 @@ gst_d3d11_get_pixel_shader_color (GstD3D11Device * device,
return gst_d3d11_device_get_pixel_shader (device, token, return gst_d3d11_device_get_pixel_shader (device, token,
g_PSMain_color, sizeof (g_PSMain_color), g_PSMain_color, sizeof (g_PSMain_color),
g_PSMain_color_str, sizeof (g_PSMain_color_str), "PSMain_color", ps); g_PSMain_color_str, sizeof (g_PSMain_color_str), "PSMain_color",
nullptr, ps);
} }
HRESULT HRESULT
@ -788,7 +789,7 @@ gst_d3d11_get_pixel_shader_sample_premul (GstD3D11Device * device,
return gst_d3d11_device_get_pixel_shader (device, token, return gst_d3d11_device_get_pixel_shader (device, token,
g_PSMain_sample_premul, sizeof (g_PSMain_sample_premul), g_PSMain_sample_premul, sizeof (g_PSMain_sample_premul),
g_PSMain_sample_premul_str, sizeof (g_PSMain_sample_premul_str), g_PSMain_sample_premul_str, sizeof (g_PSMain_sample_premul_str),
"PSMain_sample_premul", ps); "PSMain_sample_premul", nullptr, ps);
} }
HRESULT HRESULT
@ -803,7 +804,8 @@ gst_d3d11_get_pixel_shader_sample (GstD3D11Device * device,
return gst_d3d11_device_get_pixel_shader (device, token, return gst_d3d11_device_get_pixel_shader (device, token,
g_PSMain_sample, sizeof (g_PSMain_sample), g_PSMain_sample, sizeof (g_PSMain_sample),
g_PSMain_sample_str, sizeof (g_PSMain_sample_str), "PSMain_sample", ps); g_PSMain_sample_str, sizeof (g_PSMain_sample_str), "PSMain_sample",
nullptr, ps);
} }
HRESULT HRESULT
@ -818,7 +820,8 @@ gst_d3d11_get_pixel_shader_snow (GstD3D11Device * device,
return gst_d3d11_device_get_pixel_shader (device, token, return gst_d3d11_device_get_pixel_shader (device, token,
g_PSMain_snow, sizeof (g_PSMain_snow), g_PSMain_snow, sizeof (g_PSMain_snow),
g_PSMain_snow_str, sizeof (g_PSMain_snow_str), "PSMain_snow", ps); g_PSMain_snow_str, sizeof (g_PSMain_snow_str), "PSMain_snow",
nullptr, ps);
} }
HRESULT HRESULT