d3d11: Implement helper object for converter

This object will upload system memory to GPU and preprocess
texture using compute shader or software converter if needed

Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/5691>
This commit is contained in:
Seungha Yang 2023-11-17 20:51:31 +09:00
parent 66f51f642f
commit c57fe82a93
13 changed files with 990 additions and 9 deletions

View file

@ -21,7 +21,7 @@
#include "config.h" #include "config.h"
#endif #endif
#include "gstd3d11converterbuilder.h" #include "gstd3d11converter-builder.h"
#include "gstd3d11device-private.h" #include "gstd3d11device-private.h"
#include "gstd3d11-private.h" #include "gstd3d11-private.h"
#include <map> #include <map>

View file

@ -0,0 +1,726 @@
/* GStreamer
* Copyright (C) 2023 Seungha Yang <seungha@centricular.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include "gstd3d11-private.h"
#include "gstd3d11converter-helper.h"
#include "gstd3d11device.h"
#include "gstd3d11device-private.h"
#include "gstd3d11utils.h"
#include "gstd3d11memory.h"
#include "gstd3d11bufferpool.h"
#include "gstd3d11shadercache.h"
#include <wrl.h>
#include <math.h>
#include <map>
#include <vector>
#include <mutex>
#include <string>
#include <memory>
/* *INDENT-OFF* */
using namespace Microsoft::WRL;
/* *INDENT-ON* */
GST_DEBUG_CATEGORY_EXTERN (gst_d3d11_converter_debug);
#define GST_CAT_DEFAULT gst_d3d11_converter_debug
/* *INDENT-OFF* */
struct ConverterCSSource
{
gint64 token;
std::string entry_point;
const BYTE *bytecode;
SIZE_T bytecode_size;
std::vector<std::pair<std::string, std::string>> macros;
};
static std::map<std::string, std::shared_ptr<ConverterCSSource>> cs_source_cache;
static std::mutex cache_lock;
#ifdef HLSL_PRECOMPILED
#include "CSMainConverter.h"
#else
static const std::map<std::string, std::pair<const BYTE *, SIZE_T>> precompiled_bytecode;
#endif
/* *INDENT-ON* */
#include "hlsl/CSMain_converter.hlsl"
struct _GstD3D11ConverterHelper
{
~_GstD3D11ConverterHelper ()
{
if (sw_conv)
gst_video_converter_free (sw_conv);
gst_clear_buffer (&srv_buf);
gst_clear_buffer (&uav_buf);
gst_clear_object (&device);
}
GstD3D11Device *device = nullptr;
ComPtr < ID3D11ComputeShader > cs;
DXGI_FORMAT srv_format = DXGI_FORMAT_UNKNOWN;
DXGI_FORMAT uav_format = DXGI_FORMAT_UNKNOWN;
GstBuffer *srv_buf = nullptr;
GstBuffer *uav_buf = nullptr;
GstVideoConverter *sw_conv = nullptr;
GstVideoInfo in_info;
GstVideoInfo out_info;
GstVideoInfo in_alloc_info;
GstVideoInfo out_alloc_info;
guint tg_x;
guint tg_y;
guint x_unit;
guint y_unit;
};
/* *INDENT-OFF* */
GstD3D11ConverterHelper *
gst_d3d11_converter_helper_new (GstD3D11Device * device,
GstVideoFormat in_format, GstVideoFormat out_format, guint width,
guint height)
{
GstD3D11ConverterHelper *self;
ComPtr < ID3D11ComputeShader > cs;
D3D_FEATURE_LEVEL feature_level;
DXGI_FORMAT srv_format = DXGI_FORMAT_UNKNOWN;
DXGI_FORMAT uav_format = DXGI_FORMAT_UNKNOWN;
guint x_unit = 16;
guint y_unit = 8;
std::string entry_point;
HRESULT hr;
if (in_format == GST_VIDEO_FORMAT_YUY2 && out_format == GST_VIDEO_FORMAT_VUYA) {
entry_point = "CSMain_YUY2_to_VUYA";
srv_format = DXGI_FORMAT_R8G8B8A8_UINT;
uav_format = DXGI_FORMAT_R32_UINT;
} else if (in_format == GST_VIDEO_FORMAT_VUYA &&
out_format == GST_VIDEO_FORMAT_YUY2) {
entry_point = "CSMain_VUYA_to_YUY2";
srv_format = DXGI_FORMAT_R8G8B8A8_UINT;
uav_format = DXGI_FORMAT_R32_UINT;
} else if (in_format == GST_VIDEO_FORMAT_AYUV64 &&
out_format == GST_VIDEO_FORMAT_Y410) {
entry_point = "CSMain_AYUV64_to_Y410";
srv_format = DXGI_FORMAT_R16G16B16A16_UNORM;
uav_format = DXGI_FORMAT_R32_UINT;
x_unit = 8;
} else if (in_format != out_format) {
g_assert_not_reached ();
return nullptr;
}
self = new GstD3D11ConverterHelper ();
self->device = (GstD3D11Device *) gst_object_ref (device);
gst_video_info_set_format (&self->in_info, in_format, width, height);
gst_video_info_set_format (&self->out_info, out_format, width, height);
self->in_alloc_info = self->in_info;
self->out_alloc_info = self->out_info;
self->srv_format = srv_format;
self->uav_format = uav_format;
if (!entry_point.empty ()) {
auto handle = gst_d3d11_device_get_device_handle (device);
feature_level = handle->GetFeatureLevel ();
if (feature_level >= D3D_FEATURE_LEVEL_11_0) {
std::lock_guard<std::mutex> lk (cache_lock);
std::shared_ptr<ConverterCSSource> source;
auto cached = cs_source_cache.find (entry_point);
if (cached != cs_source_cache.end ()) {
source = cached->second;
} else {
source = std::make_shared<ConverterCSSource> ();
source->token = gst_d3d11_compute_shader_token_new ();
source->entry_point = entry_point;
auto precompiled = precompiled_bytecode.find (entry_point);
if (precompiled != precompiled_bytecode.end ()) {
source->bytecode = precompiled->second.first;
source->bytecode_size = precompiled->second.second;
} else {
source->bytecode = nullptr;
source->bytecode_size = 0;
source->macros.push_back(std::make_pair("ENTRY_POINT", entry_point));
source->macros.push_back(std::make_pair("BUILDING_" + entry_point, "1"));
}
cs_source_cache[entry_point] = source;
}
if (source->bytecode) {
hr = handle->CreateComputeShader (source->bytecode,
source->bytecode_size, nullptr, &cs);
if (!gst_d3d11_result (hr, device))
GST_WARNING ("Couldn't create compute shader from precompiled blob");
} else {
std::vector<D3D_SHADER_MACRO> macros;
ComPtr < ID3DBlob > blob;
for (const auto & defines : source->macros)
macros.push_back({defines.first.c_str (), defines.second.c_str ()});
macros.push_back({nullptr, nullptr});
gst_d3d11_shader_cache_get_compute_shader_blob (source->token,
g_CSMain_converter_str, sizeof (g_CSMain_converter_str),
source->entry_point.c_str (), &macros[0], &blob);
if (blob) {
hr = handle->CreateComputeShader (blob->GetBufferPointer (),
blob->GetBufferSize (), nullptr, &cs);
if (!gst_d3d11_result (hr, device))
GST_WARNING ("Couldn't create compute shader from source");
}
}
}
if (cs) {
self->cs = cs;
self->x_unit = x_unit;
self->y_unit = y_unit;
if ((width % x_unit) == 0 && (height % y_unit) == 0) {
self->tg_x = width / x_unit;
self->tg_y = height / y_unit;
} else {
self->tg_x = (UINT) ceil (width / (float) x_unit);
self->tg_y = (UINT) ceil (height / (float) y_unit);
}
} else {
self->sw_conv =
gst_video_converter_new (&self->in_info, &self->out_info, nullptr);
}
}
return self;
}
/* *INDENT-ON* */
void
gst_d3d11_converter_helper_free (GstD3D11ConverterHelper * converter)
{
delete converter;
}
void
gst_d3d11_converter_helper_update_size (GstD3D11ConverterHelper * helper,
guint width, guint height)
{
if (width != (guint) helper->in_alloc_info.width ||
height != (guint) helper->in_alloc_info.height) {
gst_clear_buffer (&helper->srv_buf);
gst_clear_buffer (&helper->uav_buf);
gst_video_info_set_format (&helper->in_alloc_info,
GST_VIDEO_INFO_FORMAT (&helper->in_info), width, height);
gst_video_info_set_format (&helper->out_alloc_info,
GST_VIDEO_INFO_FORMAT (&helper->out_info), width, height);
if (helper->cs) {
if ((width % helper->x_unit) == 0 && (height % helper->y_unit) == 0) {
helper->tg_x = width / helper->x_unit;
helper->tg_y = height / helper->y_unit;
} else {
helper->tg_x = (gint) ceil (width / (float) helper->x_unit);
helper->tg_y = (gint) ceil (height / (float) helper->y_unit);
}
}
if (helper->sw_conv) {
gst_video_converter_free (helper->sw_conv);
helper->sw_conv = gst_video_converter_new (&helper->in_alloc_info,
&helper->out_alloc_info, nullptr);
}
}
}
static GstBuffer *
gst_d3d11_converter_helper_allocate_buffer (GstD3D11ConverterHelper * self,
const GstVideoInfo * info, UINT bind_flags)
{
GstD3D11AllocationParams *params;
GstBufferPool *pool;
GstCaps *caps;
GstStructure *config;
GstBuffer *buf = nullptr;
params = gst_d3d11_allocation_params_new (self->device, info,
GST_D3D11_ALLOCATION_FLAG_DEFAULT, bind_flags, 0);
caps = gst_video_info_to_caps (info);
pool = gst_d3d11_buffer_pool_new (self->device);
config = gst_buffer_pool_get_config (pool);
gst_buffer_pool_config_set_params (config, caps, info->size, 0, 0);
gst_buffer_pool_config_set_d3d11_allocation_params (config, params);
gst_caps_unref (caps);
gst_d3d11_allocation_params_free (params);
if (!gst_buffer_pool_set_config (pool, config)) {
GST_ERROR ("Failed to set pool config");
gst_object_unref (pool);
return nullptr;
}
if (!gst_buffer_pool_set_active (pool, TRUE)) {
GST_ERROR ("Failed to set active");
gst_object_unref (pool);
return nullptr;
}
gst_buffer_pool_acquire_buffer (pool, &buf, nullptr);
gst_buffer_pool_set_active (pool, FALSE);
gst_object_unref (pool);
return buf;
}
static GstBuffer *
gst_d3d11_converter_helper_upload (GstD3D11ConverterHelper * self,
GstBuffer * buffer)
{
GstMemory *mem = gst_buffer_peek_memory (buffer, 0);
GstVideoFrame in_frame, out_frame;
if (gst_is_d3d11_memory (mem)) {
GstD3D11Memory *dmem = GST_D3D11_MEMORY_CAST (mem);
D3D11_TEXTURE2D_DESC desc;
gst_d3d11_memory_get_texture_desc (dmem, &desc);
gst_d3d11_converter_helper_update_size (self, desc.Width, desc.Height);
if (dmem->device == self->device) {
if ((desc.BindFlags & D3D11_BIND_SHADER_RESOURCE) != 0)
return buffer;
if (!self->srv_buf) {
self->srv_buf = gst_d3d11_converter_helper_allocate_buffer (self,
&self->in_alloc_info, D3D11_BIND_SHADER_RESOURCE);
}
if (!self->srv_buf)
return nullptr;
auto ctx = gst_d3d11_device_get_device_context_handle (self->device);
for (guint i = 0; i < gst_buffer_n_memory (buffer); i++) {
GstMapInfo in_map, out_map;
GstMemory *in_mem, *out_mem;
guint subresource;
D3D11_TEXTURE2D_DESC in_desc;
D3D11_TEXTURE2D_DESC out_desc;
D3D11_BOX src_box = { 0, };
in_mem = gst_buffer_peek_memory (buffer, i);
out_mem = gst_buffer_peek_memory (self->srv_buf, i);
if (!gst_memory_map (in_mem, &in_map, (GstMapFlags)
(GST_MAP_READ | GST_MAP_D3D11))) {
GST_ERROR ("Couldn't map in memory");
return nullptr;
}
if (!gst_memory_map (out_mem, &out_map, (GstMapFlags)
(GST_MAP_WRITE | GST_MAP_D3D11))) {
GST_ERROR ("Couldn't map out memory");
gst_memory_unmap (in_mem, &in_map);
return nullptr;
}
dmem = GST_D3D11_MEMORY_CAST (in_mem);
gst_d3d11_memory_get_texture_desc (dmem, &in_desc);
subresource = gst_d3d11_memory_get_subresource_index (dmem);
dmem = GST_D3D11_MEMORY_CAST (out_mem);
gst_d3d11_memory_get_texture_desc (dmem, &out_desc);
src_box.left = 0;
src_box.top = 0;
src_box.front = 0;
src_box.back = 1;
src_box.right = MIN (in_desc.Width, out_desc.Width);
src_box.bottom = MIN (in_desc.Height, out_desc.Height);
ctx->CopySubresourceRegion ((ID3D11Resource *) out_map.data, 0, 0, 0, 0,
(ID3D11Resource *) in_map.data, subresource, &src_box);
gst_memory_unmap (in_mem, &in_map);
gst_memory_unmap (out_mem, &out_map);
}
return self->srv_buf;
}
}
if (!gst_video_frame_map (&in_frame, &self->in_info, buffer, GST_MAP_READ)) {
GST_ERROR ("Couldn't map in buffer");
return nullptr;
}
gst_d3d11_converter_helper_update_size (self,
in_frame.info.width, in_frame.info.height);
if (!self->srv_buf) {
self->srv_buf = gst_d3d11_converter_helper_allocate_buffer (self,
&self->in_info, D3D11_BIND_SHADER_RESOURCE);
}
if (!self->srv_buf) {
gst_video_frame_unmap (&in_frame);
return nullptr;
}
if (!gst_video_frame_map (&out_frame, &self->in_alloc_info, self->srv_buf,
GST_MAP_WRITE)) {
GST_ERROR ("Couldn't map out buffer");
gst_video_frame_unmap (&in_frame);
return nullptr;
}
gst_video_frame_copy (&out_frame, &in_frame);
gst_video_frame_unmap (&out_frame);
gst_video_frame_unmap (&in_frame);
return self->srv_buf;
}
static gboolean
gst_d3d11_converter_helper_copy_buffer (GstD3D11ConverterHelper * self,
GstBuffer * dst, GstBuffer * src)
{
if (dst == src)
return TRUE;
auto ctx = gst_d3d11_device_get_device_context_handle (self->device);
for (guint i = 0; i < gst_buffer_n_memory (dst); i++) {
GstMapInfo in_map, out_map;
GstMemory *in_mem, *out_mem;
GstD3D11Memory *dmem;
guint in_subresource;
guint out_subresource;
D3D11_TEXTURE2D_DESC in_desc;
D3D11_TEXTURE2D_DESC out_desc;
D3D11_BOX src_box = { 0, };
in_mem = gst_buffer_peek_memory (src, i);
out_mem = gst_buffer_peek_memory (dst, i);
if (!gst_memory_map (in_mem, &in_map, (GstMapFlags)
(GST_MAP_READ | GST_MAP_D3D11))) {
GST_ERROR ("Couldn't map in memory");
return FALSE;
}
if (!gst_memory_map (out_mem, &out_map, (GstMapFlags)
(GST_MAP_WRITE | GST_MAP_D3D11))) {
GST_ERROR ("Couldn't map out memory");
gst_memory_unmap (in_mem, &in_map);
return FALSE;
}
dmem = GST_D3D11_MEMORY_CAST (in_mem);
gst_d3d11_memory_get_texture_desc (dmem, &in_desc);
in_subresource = gst_d3d11_memory_get_subresource_index (dmem);
dmem = GST_D3D11_MEMORY_CAST (out_mem);
out_subresource = gst_d3d11_memory_get_subresource_index (dmem);
gst_d3d11_memory_get_texture_desc (dmem, &out_desc);
src_box.left = 0;
src_box.top = 0;
src_box.front = 0;
src_box.back = 1;
src_box.right = MIN (in_desc.Width, out_desc.Width);
src_box.bottom = MIN (in_desc.Height, out_desc.Height);
ctx->CopySubresourceRegion ((ID3D11Resource *) out_map.data,
out_subresource, 0, 0, 0,
(ID3D11Resource *) in_map.data, in_subresource, &src_box);
gst_memory_unmap (in_mem, &in_map);
gst_memory_unmap (out_mem, &out_map);
}
return TRUE;
}
static GstBuffer *
gst_d3d11_converter_helper_get_uav_outbuf (GstD3D11ConverterHelper * self)
{
if (self->uav_buf)
return self->uav_buf;
self->uav_buf = gst_d3d11_converter_helper_allocate_buffer (self,
&self->out_alloc_info,
D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_UNORDERED_ACCESS);
return self->uav_buf;
}
GstBuffer *
gst_d3d11_converter_helper_preproc (GstD3D11ConverterHelper * helper,
GstBuffer * buffer)
{
GstBuffer *outbuf = nullptr;
if (helper->cs) {
GstBuffer *inbuf;
ID3D11ShaderResourceView *srv[1];
ID3D11ShaderResourceView *srv_unbind[1] = { nullptr };
ID3D11UnorderedAccessView *uav[1];
ID3D11UnorderedAccessView *uav_unbind[1] = { nullptr };
GstMemory *in_mem;
GstMemory *out_mem;
GstMapInfo in_map;
GstMapInfo out_map;
ComPtr < ID3D11ShaderResourceView > in_srv;
ComPtr < ID3D11UnorderedAccessView > out_uav;
auto ctx = gst_d3d11_device_get_device_context_handle (helper->device);
auto device = gst_d3d11_device_get_device_handle (helper->device);
HRESULT hr;
D3D11_SHADER_RESOURCE_VIEW_DESC srv_desc;
D3D11_UNORDERED_ACCESS_VIEW_DESC uav_desc;
inbuf = gst_d3d11_converter_helper_upload (helper, buffer);
if (!inbuf)
return nullptr;
outbuf = gst_d3d11_converter_helper_get_uav_outbuf (helper);
if (!outbuf)
return nullptr;
in_mem = gst_buffer_peek_memory (inbuf, 0);
out_mem = gst_buffer_peek_memory (outbuf, 0);
if (!gst_memory_map (in_mem, &in_map,
(GstMapFlags) (GST_MAP_D3D11 | GST_MAP_READ))) {
GST_ERROR ("Couldn't map in memory");
return nullptr;
}
if (!gst_memory_map (out_mem, &out_map,
(GstMapFlags) (GST_MAP_D3D11 | GST_MAP_WRITE))) {
GST_ERROR ("Couldn't map out memory");
gst_memory_unmap (in_mem, &in_map);
return nullptr;
}
srv_desc.Format = helper->srv_format;
srv_desc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D;
srv_desc.Texture2D.MipLevels = 1;
srv_desc.Texture2D.MostDetailedMip = 0;
hr = device->CreateShaderResourceView ((ID3D11Resource *) in_map.data,
&srv_desc, &in_srv);
if (!gst_d3d11_result (hr, helper->device)) {
gst_memory_unmap (out_mem, &out_map);
gst_memory_unmap (in_mem, &in_map);
return nullptr;
}
uav_desc.Format = helper->uav_format;
uav_desc.ViewDimension = D3D11_UAV_DIMENSION_TEXTURE2D;
uav_desc.Texture2D.MipSlice = 0;
hr = device->CreateUnorderedAccessView ((ID3D11Resource *) out_map.data,
&uav_desc, &out_uav);
if (!gst_d3d11_result (hr, helper->device)) {
gst_memory_unmap (out_mem, &out_map);
gst_memory_unmap (in_mem, &in_map);
return nullptr;
}
srv[0] = in_srv.Get ();
uav[0] = out_uav.Get ();
ctx->CSSetShader (helper->cs.Get (), nullptr, 0);
ctx->CSSetShaderResources (0, 1, srv);
ctx->CSSetUnorderedAccessViews (0, 1, uav, nullptr);
ctx->Dispatch (helper->tg_x, helper->tg_y, 1);
ctx->CSSetUnorderedAccessViews (0, 1, uav_unbind, nullptr);
ctx->CSSetShaderResources (0, 1, srv_unbind);
ctx->CSSetShader (nullptr, nullptr, 0);
gst_memory_unmap (out_mem, &out_map);
gst_memory_unmap (in_mem, &in_map);
} else if (helper->sw_conv) {
GstVideoFrame in_frame, out_frame;
if (!gst_video_frame_map (&in_frame,
&helper->in_info, buffer, GST_MAP_READ)) {
GST_ERROR ("Couldn't map input buffer");
return nullptr;
}
gst_d3d11_converter_helper_update_size (helper, in_frame.info.width,
in_frame.info.height);
if (!helper->srv_buf) {
helper->srv_buf = gst_d3d11_converter_helper_allocate_buffer (helper,
&helper->out_alloc_info, D3D11_BIND_SHADER_RESOURCE);
}
if (!helper->srv_buf) {
gst_video_frame_unmap (&in_frame);
return nullptr;
}
if (!gst_video_frame_map (&out_frame,
&helper->out_alloc_info, helper->srv_buf, GST_MAP_WRITE)) {
GST_ERROR ("Couldn't map input buffer");
gst_video_frame_unmap (&in_frame);
return nullptr;
}
gst_video_converter_frame (helper->sw_conv, &in_frame, &out_frame);
gst_video_frame_unmap (&out_frame);
gst_video_frame_unmap (&in_frame);
outbuf = helper->srv_buf;
} else {
outbuf = gst_d3d11_converter_helper_upload (helper, buffer);
}
return outbuf;
}
gboolean
gst_d3d11_converter_helper_postproc (GstD3D11ConverterHelper * helper,
GstBuffer * in_buf, GstBuffer * out_buf)
{
gboolean ret = TRUE;
if (helper->cs) {
ID3D11ShaderResourceView *srv[1];
ID3D11ShaderResourceView *srv_unbind[1] = { nullptr };
ID3D11UnorderedAccessView *uav[1];
ID3D11UnorderedAccessView *uav_unbind[1] = { nullptr };
GstMemory *in_mem;
GstMemory *out_mem;
GstMapInfo in_map;
GstMapInfo out_map;
ComPtr < ID3D11ShaderResourceView > in_srv;
ComPtr < ID3D11UnorderedAccessView > out_uav;
auto ctx = gst_d3d11_device_get_device_context_handle (helper->device);
auto device = gst_d3d11_device_get_device_handle (helper->device);
HRESULT hr;
D3D11_SHADER_RESOURCE_VIEW_DESC srv_desc;
D3D11_UNORDERED_ACCESS_VIEW_DESC uav_desc;
GstBuffer *uav_outbuf = out_buf;
D3D11_TEXTURE2D_DESC desc;
in_mem = gst_buffer_peek_memory (in_buf, 0);
out_mem = gst_buffer_peek_memory (out_buf, 0);
gst_d3d11_memory_get_texture_desc (GST_D3D11_MEMORY_CAST (out_mem), &desc);
if ((desc.BindFlags & D3D11_BIND_UNORDERED_ACCESS) == 0) {
uav_outbuf = gst_d3d11_converter_helper_get_uav_outbuf (helper);
if (!uav_outbuf)
return FALSE;
out_mem = gst_buffer_peek_memory (uav_outbuf, 0);
}
if (!gst_memory_map (in_mem, &in_map,
(GstMapFlags) (GST_MAP_D3D11 | GST_MAP_READ))) {
GST_ERROR ("Couldn't map in memory");
return FALSE;
}
if (!gst_memory_map (out_mem, &out_map,
(GstMapFlags) (GST_MAP_D3D11 | GST_MAP_WRITE))) {
GST_ERROR ("Couldn't map out memory");
gst_memory_unmap (in_mem, &in_map);
return FALSE;
}
srv_desc.Format = helper->srv_format;
srv_desc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D;
srv_desc.Texture2D.MipLevels = 1;
srv_desc.Texture2D.MostDetailedMip = 0;
hr = device->CreateShaderResourceView ((ID3D11Resource *) in_map.data,
&srv_desc, &in_srv);
if (!gst_d3d11_result (hr, helper->device)) {
gst_memory_unmap (out_mem, &out_map);
gst_memory_unmap (in_mem, &in_map);
return FALSE;
}
uav_desc.Format = helper->uav_format;
uav_desc.ViewDimension = D3D11_UAV_DIMENSION_TEXTURE2D;
uav_desc.Texture2D.MipSlice = 0;
hr = device->CreateUnorderedAccessView ((ID3D11Resource *) out_map.data,
&uav_desc, &out_uav);
if (!gst_d3d11_result (hr, helper->device)) {
gst_memory_unmap (out_mem, &out_map);
gst_memory_unmap (in_mem, &in_map);
return FALSE;
}
srv[0] = in_srv.Get ();
uav[0] = out_uav.Get ();
ctx->CSSetShader (helper->cs.Get (), nullptr, 0);
ctx->CSSetShaderResources (0, 1, srv);
ctx->CSSetUnorderedAccessViews (0, 1, uav, nullptr);
ctx->Dispatch (helper->tg_x, helper->tg_y, 1);
ctx->CSSetUnorderedAccessViews (0, 1, uav_unbind, nullptr);
ctx->CSSetShaderResources (0, 1, srv_unbind);
ctx->CSSetShader (nullptr, nullptr, 0);
gst_memory_unmap (out_mem, &out_map);
gst_memory_unmap (in_mem, &in_map);
ret = gst_d3d11_converter_helper_copy_buffer (helper, out_buf, uav_outbuf);
} else if (helper->sw_conv) {
GstVideoFrame in_frame, out_frame;
if (!gst_video_frame_map (&in_frame,
&helper->in_info, in_buf, GST_MAP_READ)) {
GST_ERROR ("Couldn't map input buffer");
return FALSE;
}
if (!gst_video_frame_map (&out_frame,
&helper->out_info, out_buf, GST_MAP_WRITE)) {
GST_ERROR ("Couldn't map input buffer");
gst_video_frame_unmap (&in_frame);
return FALSE;
}
gst_video_converter_frame (helper->sw_conv, &in_frame, &out_frame);
gst_video_frame_unmap (&out_frame);
gst_video_frame_unmap (&in_frame);
} else {
ret = gst_d3d11_converter_helper_copy_buffer (helper, out_buf, in_buf);
}
return ret;
}

View file

@ -0,0 +1,49 @@
/* GStreamer
* Copyright (C) 2023 Seungha Yang <seungha@centricular.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
#pragma once
#include <gst/gst.h>
#include <gst/video/video.h>
#include <gst/d3d11/gstd3d11_fwd.h>
G_BEGIN_DECLS
typedef struct _GstD3D11ConverterHelper GstD3D11ConverterHelper;
GstD3D11ConverterHelper * gst_d3d11_converter_helper_new (GstD3D11Device * device,
GstVideoFormat in_format,
GstVideoFormat out_format,
guint width,
guint height);
void gst_d3d11_converter_helper_free (GstD3D11ConverterHelper * helper);
void gst_d3d11_converter_helper_update_size (GstD3D11ConverterHelper * helper,
guint width,
guint height);
GstBuffer * gst_d3d11_converter_helper_preproc (GstD3D11ConverterHelper * helper,
GstBuffer * buffer);
gboolean gst_d3d11_converter_helper_postproc (GstD3D11ConverterHelper * helper,
GstBuffer * in_buf,
GstBuffer * out_buf);
G_END_DECLS

View file

@ -32,7 +32,7 @@
#include "gstd3d11memory.h" #include "gstd3d11memory.h"
#include "gstd3d11compile.h" #include "gstd3d11compile.h"
#include "gstd3d11bufferpool.h" #include "gstd3d11bufferpool.h"
#include "gstd3d11converterbuilder.h" #include "gstd3d11converter-builder.h"
#include <wrl.h> #include <wrl.h>
#include <string.h> #include <string.h>
#include <math.h> #include <math.h>
@ -55,7 +55,7 @@
* Since: 1.22 * Since: 1.22
*/ */
GST_DEBUG_CATEGORY_STATIC (gst_d3d11_converter_debug); GST_DEBUG_CATEGORY (gst_d3d11_converter_debug);
#define GST_CAT_DEFAULT gst_d3d11_converter_debug #define GST_CAT_DEFAULT gst_d3d11_converter_debug
DEFINE_ENUM_FLAG_OPERATORS (GstD3D11ConverterBackend); DEFINE_ENUM_FLAG_OPERATORS (GstD3D11ConverterBackend);

View file

@ -30,6 +30,9 @@ gint64 gst_d3d11_pixel_shader_token_new (void);
GST_D3D11_API GST_D3D11_API
gint64 gst_d3d11_vertex_shader_token_new (void); gint64 gst_d3d11_vertex_shader_token_new (void);
GST_D3D11_API
gint64 gst_d3d11_compute_shader_token_new (void);
GST_D3D11_API GST_D3D11_API
HRESULT gst_d3d11_device_get_pixel_shader (GstD3D11Device * device, HRESULT gst_d3d11_device_get_pixel_shader (GstD3D11Device * device,
gint64 token, gint64 token,

View file

@ -1699,6 +1699,16 @@ gst_d3d11_vertex_shader_token_new (void)
return token_.fetch_add (1); return token_.fetch_add (1);
} }
gint64
gst_d3d11_compute_shader_token_new (void)
{
/* *INDENT-OFF* */
static std::atomic < gint64 > token_ { 0 };
/* *INDENT-ON* */
return token_.fetch_add (1);
}
HRESULT HRESULT
gst_d3d11_device_get_pixel_shader_uncached (GstD3D11Device * device, gst_d3d11_device_get_pixel_shader_uncached (GstD3D11Device * device,
gint64 token, const void *bytecode, gsize bytecode_size, gint64 token, const void *bytecode, gsize bytecode_size,

View file

@ -31,6 +31,7 @@
static std::mutex cache_lock_; static std::mutex cache_lock_;
static std::map <gint64, ID3DBlob *> ps_blob_; static std::map <gint64, ID3DBlob *> ps_blob_;
static std::map <gint64, ID3DBlob *> vs_blob_; static std::map <gint64, ID3DBlob *> vs_blob_;
static std::map <gint64, ID3DBlob *> cs_blob_;
/* *INDENT-ON* */ /* *INDENT-ON* */
HRESULT HRESULT
@ -82,3 +83,28 @@ gst_d3d11_shader_cache_get_vertex_shader_blob (gint64 token,
return S_OK; return S_OK;
} }
HRESULT
gst_d3d11_shader_cache_get_compute_shader_blob (gint64 token,
const gchar * source, gsize source_size, const gchar * entry_point,
const D3D_SHADER_MACRO * defines, ID3DBlob ** blob)
{
std::lock_guard < std::mutex > lk (cache_lock_);
auto cached = cs_blob_.find (token);
if (cached != cs_blob_.end ()) {
*blob = cached->second;
cached->second->AddRef ();
return S_OK;
}
HRESULT hr = gst_d3d11_compile (source, source_size, nullptr, defines,
nullptr, entry_point, "cs_5_0", 0, 0, blob, nullptr);
if (FAILED (hr))
return hr;
(*blob)->AddRef ();
cs_blob_[token] = *blob;
return S_OK;
}

View file

@ -37,5 +37,12 @@ HRESULT gst_d3d11_shader_cache_get_vertex_shader_blob (gint64 token,
const gchar * entry_point, const gchar * entry_point,
ID3DBlob ** blob); ID3DBlob ** blob);
HRESULT gst_d3d11_shader_cache_get_compute_shader_blob (gint64 token,
const gchar * source,
gsize source_size,
const gchar * entry_point,
const D3D_SHADER_MACRO * defines,
ID3DBlob ** blob);
G_END_DECLS G_END_DECLS

View file

@ -0,0 +1,123 @@
/* GStreamer
* Copyright (C) 2023 Seungha Yang <seungha@centricular.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
#ifdef BUILDING_HLSL
#ifdef BUILDING_CSMain_AYUV64_to_Y410
Texture2D<float4> inTex : register(t0);
RWTexture2D<uint> outTex : register(u0);
void Execute (uint3 tid)
{
float4 val = inTex.Load (tid);
uint3 scaled = val.yzw * 1023;
outTex[tid.xy] = (0xc0 << 24) | (scaled.z << 20) | (scaled.x << 10) | scaled.y;
}
#endif
#ifdef BUILDING_CSMain_VUYA_to_YUY2
Texture2D<uint4> inTex : register(t0);
RWTexture2D<uint> outTex : register(u0);
void Execute (uint3 tid)
{
uint4 val = inTex.Load (uint3(tid.x * 2, tid.y, 0));
uint Y0 = val.b;
uint U = val.g;
uint V = val.r;
uint Y1 = inTex.Load (uint3(tid.x * 2 + 1, tid.y, 0)).b;
outTex[tid.xy] = Y0 | (U << 8) | (Y1 << 16) | (V << 24);
}
#endif
#ifdef BUILDING_CSMain_YUY2_to_VUYA
Texture2D<uint4> inTex : register(t0);
RWTexture2D<uint> outTex : register(u0);
void Execute (uint3 tid)
{
uint4 val = inTex.Load (tid);
uint Y0 = val.r;
uint U = val.g;
uint Y1 = val.b;
uint V = val.a;
outTex[uint2(tid.x * 2, tid.y)] = V | (U << 8) | (Y0 << 16) | (0xff << 24);
outTex[uint2(tid.x * 2 + 1, tid.y)] = V | (U << 8) | (Y1 << 16) | (0xff << 24);
}
#endif
[numthreads(8, 8, 1)]
void ENTRY_POINT (uint3 tid : SV_DispatchThreadID)
{
Execute (tid);
}
#else
static const char g_CSMain_converter_str[] =
"#ifdef BUILDING_CSMain_AYUV64_to_Y410\n"
"Texture2D<float4> inTex : register(t0);\n"
"RWTexture2D<uint> outTex : register(u0);\n"
"\n"
"void Execute (uint3 tid)\n"
"{\n"
" float4 val = inTex.Load (tid);\n"
" uint3 scaled = val.yzw * 1023;\n"
" outTex[tid.xy] = (0xc0 << 24) | (scaled.z << 20) | (scaled.x << 10) | scaled.y;\n"
"}\n"
"#endif\n"
"\n"
"#ifdef BUILDING_CSMain_VUYA_to_YUY2\n"
"Texture2D<uint4> inTex : register(t0);\n"
"RWTexture2D<uint> outTex : register(u0);\n"
"\n"
"void Execute (uint3 tid)\n"
"{\n"
" uint4 val = inTex.Load (uint3(tid.x * 2, tid.y, 0));\n"
" uint Y0 = val.b;\n"
" uint U = val.g;\n"
" uint V = val.r;\n"
" uint Y1 = inTex.Load (uint3(tid.x * 2 + 1, tid.y, 0)).b;\n"
"\n"
" outTex[tid.xy] = Y0 | (U << 8) | (Y1 << 16) | (V << 24);\n"
"}\n"
"#endif\n"
"\n"
"#ifdef BUILDING_CSMain_YUY2_to_VUYA\n"
"Texture2D<uint4> inTex : register(t0);\n"
"RWTexture2D<uint> outTex : register(u0);\n"
"\n"
"void Execute (uint3 tid)\n"
"{\n"
" uint4 val = inTex.Load (tid);\n"
" uint Y0 = val.r;\n"
" uint U = val.g;\n"
" uint Y1 = val.b;\n"
" uint V = val.a;\n"
"\n"
" outTex[uint2(tid.x * 2, tid.y)] = V | (U << 8) | (Y0 << 16) | (0xff << 24);\n"
" outTex[uint2(tid.x * 2 + 1, tid.y)] = V | (U << 8) | (Y1 << 16) | (0xff << 24);\n"
"}\n"
"#endif\n"
"\n"
"[numthreads(8, 8, 1)]\n"
"void ENTRY_POINT (uint3 tid : SV_DispatchThreadID)\n"
"{\n"
" Execute (tid);\n"
"}\n";
#endif

View file

@ -41,11 +41,12 @@ def main(args):
parser = argparse.ArgumentParser(description='Read precompiled HLSL headers from directory and make single header') parser = argparse.ArgumentParser(description='Read precompiled HLSL headers from directory and make single header')
parser.add_argument("--input", help="the precompiled HLSL header directory") parser.add_argument("--input", help="the precompiled HLSL header directory")
parser.add_argument("--output", help="output header file location") parser.add_argument("--output", help="output header file location")
parser.add_argument("--prefix", help="HLSL header filename prefix")
args = parser.parse_args(args) args = parser.parse_args(args)
# Scan precompiled PSMain_*.h headers in build directory # Scan precompiled PSMain_*.h headers in build directory
# and generate single header # and generate single header
hlsl_headers = [os.path.basename(file) for file in os.listdir(args.input) if file.startswith("PSMain_") and file.endswith(".h") ] hlsl_headers = [os.path.basename(file) for file in os.listdir(args.input) if file.startswith(args.prefix) and file.endswith(".h") ]
with open(args.output, 'w', newline='\n', encoding='utf8') as f: with open(args.output, 'w', newline='\n', encoding='utf8') as f:
f.write(start_header) f.write(start_header)

View file

@ -99,10 +99,9 @@ generated_collection = custom_target(header_collection,
input : hlsl_precompiled, input : hlsl_precompiled,
output : header_collection, output : header_collection,
command : [header_collector, command : [header_collector,
'--input', '--input', meson.current_build_dir(),
meson.current_build_dir(), '--prefix', 'PSMain_',
'--output', '--output', '@OUTPUT@'
'@OUTPUT@'
]) ])
hlsl_precompiled += generated_collection hlsl_precompiled += generated_collection
@ -126,3 +125,39 @@ foreach shader : hlsl_vs_sources
'@INPUT@']) '@INPUT@'])
hlsl_precompiled += [compiled_shader] hlsl_precompiled += [compiled_shader]
endforeach endforeach
hlsl_cs_source = files('CSMain_converter.hlsl')
hlsl_cs_entry_points = [
'CSMain_AYUV64_to_Y410',
'CSMain_VUYA_to_YUY2',
'CSMain_YUY2_to_VUYA',
]
foreach shader : hlsl_cs_entry_points
entry_point = shader
header = '@0@.h'.format(entry_point)
compiled_shader = custom_target(header,
input : hlsl_cs_source,
output : header,
command : [fxc, '/Fh', '@OUTPUT@',
'/E', entry_point,
'/T', 'cs_5_0',
'/D', 'BUILDING_HLSL=1',
'/D', 'ENTRY_POINT=@0@'.format(entry_point),
'/D', 'BUILDING_@0@=1'.format(entry_point),
'/nologo',
'@INPUT@'])
hlsl_precompiled += [compiled_shader]
endforeach
header_collection = 'CSMainConverter.h'
generated_collection = custom_target(header_collection,
input : hlsl_precompiled,
output : header_collection,
command : [header_collector,
'--input', meson.current_build_dir(),
'--prefix', 'CSMain_',
'--output', '@OUTPUT@'
])
hlsl_precompiled += generated_collection

View file

@ -2,7 +2,8 @@ d3d11_sources = [
'gstd3d11bufferpool.cpp', 'gstd3d11bufferpool.cpp',
'gstd3d11compile.cpp', 'gstd3d11compile.cpp',
'gstd3d11converter.cpp', 'gstd3d11converter.cpp',
'gstd3d11converterbuilder.cpp', 'gstd3d11converter-builder.cpp',
'gstd3d11converter-helper.cpp',
'gstd3d11device.cpp', 'gstd3d11device.cpp',
'gstd3d11format.cpp', 'gstd3d11format.cpp',
'gstd3d11memory.cpp', 'gstd3d11memory.cpp',