glbuffer: support persistent buffer mappings

Requires OpenGL 4.4 or EXT_buffer_storage

Current mesa exposes GL_ARB_buffer_storage when retrieving the relevant
functions returns no-ops and causes failures.

Improves throughput of uploads by roughly 30%-60% and download throughput by
roughly 10-30% across depending on the exact scenario and hardware.

Part-of: <https://gitlab.freedesktop.org/gstreamer/gst-plugins-base/-/merge_requests/1191>
This commit is contained in:
Matthew Waters 2021-06-04 18:32:07 +10:00 committed by GStreamer Marge Bot
parent 8593e1517e
commit 04856a3828
6 changed files with 177 additions and 18 deletions

View file

@ -31,3 +31,4 @@
#include "sync.h"
#include "buffers.h"
#include "query.h"
#include "buffer_storage.h"

View file

@ -0,0 +1,42 @@
/*
* GStreamer
* Copyright (C) 2021 Matthew Waters <matthew@centricular.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
GST_GL_EXT_BEGIN (buffer_storage,
GST_GL_API_OPENGL | GST_GL_API_OPENGL3 |
GST_GL_API_GLES2,
4, 4,
255, 255,
"EXT\0", /* XXX: GL_ARB_buffer_storage advertisement seems to be broken in Mesa */
"buffer_storage\0")
GST_GL_EXT_FUNCTION (void, BufferStorage,
(GLenum target, GLsizeiptr, const void * data, GLbitfield flags))
GST_GL_EXT_END ()
GST_GL_EXT_BEGIN (flush_mapped,
GST_GL_API_OPENGL | GST_GL_API_OPENGL3 |
GST_GL_API_GLES2,
3, 0,
3, 0,
"\0",
"\0")
GST_GL_EXT_FUNCTION (void, FlushMappedBufferRange,
(GLenum target, GLintptr offset, GLsizeiptr length))
GST_GL_EXT_END ()

View file

@ -53,6 +53,8 @@
#define USING_GLES2(context) (gst_gl_context_check_gl_version (context, GST_GL_API_GLES2, 2, 0))
#define USING_GLES3(context) (gst_gl_context_check_gl_version (context, GST_GL_API_GLES2, 3, 0))
#define HAVE_BUFFER_STORAGE(context) (context->gl_vtable->BufferStorage != NULL)
/* compatibility definitions... */
#ifndef GL_MAP_READ_BIT
#define GL_MAP_READ_BIT 0x0001
@ -60,6 +62,18 @@
#ifndef GL_MAP_WRITE_BIT
#define GL_MAP_WRITE_BIT 0x0002
#endif
#ifndef GL_MAP_FLUSH_EXPLICIT_BIT
#define GL_MAP_FLUSH_EXPLICIT_BIT 0x0010
#endif
#ifndef GL_MAP_PERSISTENT_BIT
#define GL_MAP_PERSISTENT_BIT 0x0040
#endif
#ifndef GL_DYNAMIC_STORAGE_BIT
#define GL_DYNAMIC_STORAGE_BIT 0x0100
#endif
#ifndef GL_CLIENT_STORAGE_BIT
#define GL_CLIENT_STORAGE_BIT 0x0200
#endif
#ifndef GL_COPY_READ_BUFFER
#define GL_COPY_READ_BUFFER 0x8F36
#endif
@ -81,8 +95,31 @@ _gl_buffer_create (GstGLBuffer * gl_mem, GError ** error)
gl->GenBuffers (1, &gl_mem->id);
gl->BindBuffer (gl_mem->target, gl_mem->id);
gl->BufferData (gl_mem->target, gl_mem->mem.mem.maxsize, NULL,
gl_mem->usage_hints);
if (HAVE_BUFFER_STORAGE (gl_mem->mem.context)) {
GLenum flags = 0;
flags |= GL_MAP_READ_BIT | GL_MAP_WRITE_BIT;
/* allow access to GPU-side data while there are outstanding mappings */
flags |= GL_MAP_PERSISTENT_BIT;
/* match the glBufferData() below and make this buffer mutable */
flags |= GL_DYNAMIC_STORAGE_BIT;
/* hint that the data should be kept CPU-side. Fixes atrocious
* performance when e.g. libav decoders are direct-rendering into our
* data pointer */
/* XXX: make this an fine-grained option. The current assumption here is
* that users signal GL_STREAM_DRAW for regularly updating video frames as
* is done by gstglmemorypbo */
if (gl_mem->usage_hints == GL_STREAM_DRAW) {
GST_CAT_DEBUG (GST_CAT_GL_BUFFER, "using client-side storage for "
"buffer %p %u", gl_mem, gl_mem->id);
flags |= GL_CLIENT_STORAGE_BIT;
}
gl->BufferStorage (gl_mem->target, gl_mem->mem.mem.maxsize, NULL, flags);
} else {
gl->BufferData (gl_mem->target, gl_mem->mem.mem.maxsize, NULL,
gl_mem->usage_hints);
}
gl->BindBuffer (gl_mem->target, 0);
return TRUE;
@ -121,20 +158,50 @@ _gl_buffer_new (GstAllocator * allocator, GstMemory * parent,
return ret;
}
static GLenum
gst_map_flags_to_gl (GstMapFlags flags)
{
GLenum ret = 0;
if (flags & GST_MAP_READ)
ret |= GL_MAP_READ_BIT;
if (flags & GST_MAP_WRITE)
ret |= GL_MAP_WRITE_BIT;
return ret;
}
static gpointer
gst_gl_buffer_cpu_access (GstGLBuffer * mem, GstMapInfo * info, gsize size)
{
const GstGLFuncs *gl = mem->mem.context->gl_vtable;
gpointer data, ret;
GST_CAT_LOG (GST_CAT_GL_BUFFER, "mapping %p id %d size %" G_GSIZE_FORMAT,
mem, mem->id, size);
if (HAVE_BUFFER_STORAGE (mem->mem.context)) {
GLenum gl_map_flags = gst_map_flags_to_gl (info->flags);
gl_map_flags |= GL_MAP_PERSISTENT_BIT;
if (info->flags & GST_MAP_WRITE)
gl_map_flags |= GL_MAP_FLUSH_EXPLICIT_BIT;
if (mem->mem.map_count == (mem->mem.gl_map_count + 1)) {
gl->BindBuffer (mem->target, mem->id);
mem->mem.data = gl->MapBufferRange (mem->target, 0, size, gl_map_flags);
gl->BindBuffer (mem->target, 0);
}
return mem->mem.data;
}
if (!gst_gl_base_memory_alloc_data (GST_GL_BASE_MEMORY_CAST (mem)))
return NULL;
ret = mem->mem.data;
GST_CAT_LOG (GST_CAT_GL_BUFFER, "mapping id %d size %" G_GSIZE_FORMAT,
mem->id, size);
/* The extra data pointer indirection/memcpy is needed for coherent across
* concurrent map()'s in both GL and CPU */
if (GST_MEMORY_FLAG_IS_SET (mem, GST_GL_BASE_MEMORY_TRANSFER_NEED_DOWNLOAD)
@ -177,6 +244,14 @@ gst_gl_buffer_upload_cpu_write (GstGLBuffer * mem, GstMapInfo * info,
/* no data pointer has been written */
return;
GST_CAT_LOG (GST_CAT_GL_BUFFER, "mapping %p id %d size %" G_GSIZE_FORMAT,
mem, mem->id, size);
if (HAVE_BUFFER_STORAGE (mem->mem.context)) {
/* flushing is done on unmap already */
return;
}
/* The extra data pointer indirection/memcpy is needed for coherent across
* concurrent map()'s in both GL and CPU */
/* FIXME: uploading potentially half-written data for libav pushing READWRITE
@ -227,6 +302,21 @@ _gl_buffer_unmap (GstGLBuffer * mem, GstMapInfo * info)
{
const GstGLFuncs *gl = mem->mem.context->gl_vtable;
GST_CAT_LOG (GST_CAT_GL_BUFFER, "unmapping %p id %d size %" G_GSIZE_FORMAT,
mem, mem->id, info->size);
if (HAVE_BUFFER_STORAGE (mem->mem.context) && (info->flags & GST_MAP_GL) == 0) {
gl->BindBuffer (mem->target, mem->id);
if (info->flags & GST_MAP_WRITE)
gl->FlushMappedBufferRange (mem->target, 0, info->maxsize);
if (mem->mem.map_count == (mem->mem.gl_map_count + 1))
/* if this is our last cpu-mapping, unmap the GL buffer */
gl->UnmapBuffer (mem->target);
gl->BindBuffer (mem->target, 0);
}
if ((info->flags & GST_MAP_GL) != 0) {
gl->BindBuffer (mem->target, 0);
}

View file

@ -99,7 +99,7 @@ G_BEGIN_DECLS
struct _GstGLFuncs
{
#include <gst/gl/glprototypes/all_functions.h>
gpointer padding[GST_PADDING_LARGE*6];
gpointer padding[GST_PADDING_LARGE*6-2];
};
#undef GST_GL_EXT_BEGIN

View file

@ -197,6 +197,7 @@ _gl_mem_create (GstGLMemoryPBO * gl_mem, GError ** error)
{ 0, GST_MEMORY_CAST (gl_mem)->align, 0, 0 };
GstGLBaseMemoryAllocator *buf_allocator;
GstGLBufferAllocationParams *params;
GstMapInfo pbo_map;
buf_allocator =
GST_GL_BASE_MEMORY_ALLOCATOR (gst_allocator_find
@ -211,9 +212,34 @@ _gl_mem_create (GstGLMemoryPBO * gl_mem, GError ** error)
gl_mem->pbo = (GstGLBuffer *) gst_gl_base_memory_alloc (buf_allocator,
(GstGLAllocationParams *) params);
gst_gl_allocation_params_free ((GstGLAllocationParams *) params);
gst_object_unref (buf_allocator);
/* if we are wrapping an existing data pointer, and glbuffer is using
* GL_EXT/ARB_buffer_storage or OpenGL 4.4 then we need to copy into the GL
* provided data pointer.
*/
if (GST_MEMORY_FLAG_IS_SET (gl_mem,
GST_GL_BASE_MEMORY_TRANSFER_NEED_UPLOAD)) {
GST_MINI_OBJECT_FLAG_SET (gl_mem->pbo,
GST_GL_BASE_MEMORY_TRANSFER_NEED_UPLOAD);
gl_mem->pbo->mem.data = params->parent.wrapped_data;
if (!gst_memory_map ((GstMemory *) gl_mem->pbo, &pbo_map,
GST_MAP_READWRITE)) {
gst_gl_allocation_params_free ((GstGLAllocationParams *) params);
GST_CAT_WARNING (GST_CAT_GL_MEMORY, "Failed to map pbo memory");
return FALSE;
}
if (pbo_map.data != params->parent.wrapped_data) {
memcpy (pbo_map.data, gl_mem->mem.mem.data,
GST_MEMORY_CAST (gl_mem)->size);
}
gst_memory_unmap ((GstMemory *) gl_mem->pbo, &pbo_map);
}
gst_gl_allocation_params_free ((GstGLAllocationParams *) params);
GST_CAT_LOG (GST_CAT_GL_MEMORY, "generated pbo %u", gl_mem->pbo->id);
}
@ -626,9 +652,12 @@ _gl_mem_pbo_alloc (GstGLBaseMemoryAllocator * allocator,
GstGLVideoAllocationParams * params)
{
GstGLMemoryPBO *mem;
GstAllocationParams alloc_params = { 0, };
guint alloc_flags;
alloc_flags = params->parent.alloc_flags;
if (params->parent.alloc_params)
alloc_params = *params->parent.alloc_params;
g_return_val_if_fail (alloc_flags & GST_GL_ALLOCATION_PARAMS_ALLOC_FLAG_VIDEO,
NULL);
@ -638,23 +667,19 @@ _gl_mem_pbo_alloc (GstGLBaseMemoryAllocator * allocator,
if (alloc_flags & GST_GL_ALLOCATION_PARAMS_ALLOC_FLAG_WRAP_GPU_HANDLE) {
mem->mem.tex_id = GPOINTER_TO_UINT (params->parent.gl_handle);
mem->mem.texture_wrapped = TRUE;
alloc_params.flags |= GST_GL_BASE_MEMORY_TRANSFER_NEED_DOWNLOAD;
}
if (alloc_flags & GST_GL_ALLOCATION_PARAMS_ALLOC_FLAG_WRAP_SYSMEM) {
alloc_params.flags |= GST_GL_BASE_MEMORY_TRANSFER_NEED_UPLOAD;
mem->mem.mem.data = params->parent.wrapped_data;
}
gst_gl_memory_init (GST_GL_MEMORY_CAST (mem), GST_ALLOCATOR_CAST (allocator),
NULL, params->parent.context, params->target, params->tex_format,
params->parent.alloc_params, params->v_info, params->plane,
params->valign, params->parent.user_data, params->parent.notify);
&alloc_params, params->v_info, params->plane, params->valign,
params->parent.user_data, params->parent.notify);
if (alloc_flags & GST_GL_ALLOCATION_PARAMS_ALLOC_FLAG_WRAP_GPU_HANDLE) {
GST_MINI_OBJECT_FLAG_SET (mem, GST_GL_BASE_MEMORY_TRANSFER_NEED_DOWNLOAD);
}
if (alloc_flags & GST_GL_ALLOCATION_PARAMS_ALLOC_FLAG_WRAP_SYSMEM) {
GST_MINI_OBJECT_FLAG_SET (mem, GST_GL_BASE_MEMORY_TRANSFER_NEED_UPLOAD);
if (mem->pbo) {
GST_MINI_OBJECT_FLAG_SET (mem->pbo,
GST_GL_BASE_MEMORY_TRANSFER_NEED_UPLOAD);
mem->pbo->mem.data = params->parent.wrapped_data;
}
mem->mem.mem.data = params->parent.wrapped_data;
}

View file

@ -82,6 +82,7 @@ gl_headers = gir_gl_headers + [
gl_prototype_headers = [
'glprototypes/all_functions.h',
'glprototypes/base.h',
'glprototypes/buffer_storage.h',
'glprototypes/blending.h',
'glprototypes/buffers.h',
'glprototypes/debug.h',