/* GStreamer * Copyright (C) <2018-2019> Seungha Yang * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Library General Public License for more details. * * You should have received a copy of the GNU Library General Public * License along with this library; if not, write to the * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, * Boston, MA 02110-1301, USA. */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include "gstcudamemory.h" #include "gstcudautils.h" #include GST_DEBUG_CATEGORY_STATIC (cuda_allocator_debug); #define GST_CAT_DEFAULT cuda_allocator_debug static GstAllocator *_gst_cuda_allocator = NULL; struct _GstCudaMemoryPrivate { CUdeviceptr data; void *staging; /* params used for cuMemAllocPitch */ gsize pitch; guint width_in_bytes; guint height; GMutex lock; GstCudaStream *stream; }; struct _GstCudaAllocatorPrivate { GstMemoryCopyFunction fallback_copy; }; #define gst_cuda_allocator_parent_class parent_class G_DEFINE_TYPE_WITH_PRIVATE (GstCudaAllocator, gst_cuda_allocator, GST_TYPE_ALLOCATOR); static void gst_cuda_allocator_free (GstAllocator * allocator, GstMemory * memory); static gpointer cuda_mem_map (GstMemory * mem, gsize maxsize, GstMapFlags flags); static void cuda_mem_unmap_full (GstMemory * mem, GstMapInfo * info); static GstMemory *cuda_mem_copy (GstMemory * mem, gssize offset, gssize size); static GstMemory * gst_cuda_allocator_dummy_alloc (GstAllocator * allocator, gsize size, GstAllocationParams * params) { g_return_val_if_reached (NULL); } static void gst_cuda_allocator_class_init (GstCudaAllocatorClass * klass) { GstAllocatorClass *allocator_class = GST_ALLOCATOR_CLASS (klass); allocator_class->alloc = GST_DEBUG_FUNCPTR (gst_cuda_allocator_dummy_alloc); allocator_class->free = GST_DEBUG_FUNCPTR (gst_cuda_allocator_free); GST_DEBUG_CATEGORY_INIT (cuda_allocator_debug, "cudaallocator", 0, "CUDA Allocator"); } static void gst_cuda_allocator_init (GstCudaAllocator * allocator) { GstAllocator *alloc = GST_ALLOCATOR_CAST (allocator); GstCudaAllocatorPrivate *priv; priv = allocator->priv = gst_cuda_allocator_get_instance_private (allocator); alloc->mem_type = GST_CUDA_MEMORY_TYPE_NAME; alloc->mem_map = cuda_mem_map; alloc->mem_unmap_full = cuda_mem_unmap_full; /* Store pointer to default mem_copy method for fallback copy */ priv->fallback_copy = alloc->mem_copy; alloc->mem_copy = cuda_mem_copy; GST_OBJECT_FLAG_SET (allocator, GST_ALLOCATOR_FLAG_CUSTOM_ALLOC); } static GstMemory * gst_cuda_allocator_alloc_internal (GstCudaAllocator * self, GstCudaContext * context, GstCudaStream * stream, const GstVideoInfo * info, guint width_in_bytes, guint alloc_height) { GstCudaMemoryPrivate *priv; GstCudaMemory *mem; CUdeviceptr data; gboolean ret = FALSE; gsize pitch; guint height = GST_VIDEO_INFO_HEIGHT (info); GstVideoInfo *alloc_info; if (!gst_cuda_context_push (context)) return NULL; ret = gst_cuda_result (CuMemAllocPitch (&data, &pitch, width_in_bytes, alloc_height, 16)); gst_cuda_context_pop (NULL); if (!ret) { GST_ERROR_OBJECT (self, "Failed to allocate CUDA memory"); return NULL; } mem = g_new0 (GstCudaMemory, 1); mem->priv = priv = g_new0 (GstCudaMemoryPrivate, 1); priv->data = data; priv->pitch = pitch; priv->width_in_bytes = width_in_bytes; priv->height = alloc_height; g_mutex_init (&priv->lock); if (stream) priv->stream = gst_cuda_stream_ref (stream); mem->context = gst_object_ref (context); mem->info = *info; mem->info.size = pitch * alloc_height; alloc_info = &mem->info; gst_memory_init (GST_MEMORY_CAST (mem), 0, GST_ALLOCATOR_CAST (self), NULL, alloc_info->size, 0, 0, alloc_info->size); switch (GST_VIDEO_INFO_FORMAT (info)) { case GST_VIDEO_FORMAT_I420: case GST_VIDEO_FORMAT_YV12: case GST_VIDEO_FORMAT_I420_10LE: /* we are wasting space yes, but required so that this memory * can be used in kernel function */ alloc_info->stride[0] = pitch; alloc_info->stride[1] = pitch; alloc_info->stride[2] = pitch; alloc_info->offset[0] = 0; alloc_info->offset[1] = alloc_info->stride[0] * height; alloc_info->offset[2] = alloc_info->offset[1] + alloc_info->stride[1] * height / 2; break; case GST_VIDEO_FORMAT_Y42B: case GST_VIDEO_FORMAT_I422_10LE: case GST_VIDEO_FORMAT_I422_12LE: alloc_info->stride[0] = pitch; alloc_info->stride[1] = pitch; alloc_info->stride[2] = pitch; alloc_info->offset[0] = 0; alloc_info->offset[1] = alloc_info->stride[0] * height; alloc_info->offset[2] = alloc_info->offset[1] + alloc_info->stride[1] * height; break; case GST_VIDEO_FORMAT_NV12: case GST_VIDEO_FORMAT_NV21: case GST_VIDEO_FORMAT_P010_10LE: case GST_VIDEO_FORMAT_P016_LE: alloc_info->stride[0] = pitch; alloc_info->stride[1] = pitch; alloc_info->offset[0] = 0; alloc_info->offset[1] = alloc_info->stride[0] * height; break; case GST_VIDEO_FORMAT_Y444: case GST_VIDEO_FORMAT_Y444_16LE: case GST_VIDEO_FORMAT_RGBP: case GST_VIDEO_FORMAT_BGRP: case GST_VIDEO_FORMAT_GBR: alloc_info->stride[0] = pitch; alloc_info->stride[1] = pitch; alloc_info->stride[2] = pitch; alloc_info->offset[0] = 0; alloc_info->offset[1] = alloc_info->stride[0] * height; alloc_info->offset[2] = alloc_info->offset[1] * 2; break; case GST_VIDEO_FORMAT_GBRA: alloc_info->stride[0] = pitch; alloc_info->stride[1] = pitch; alloc_info->stride[2] = pitch; alloc_info->stride[3] = pitch; alloc_info->offset[0] = 0; alloc_info->offset[1] = alloc_info->stride[0] * height; alloc_info->offset[2] = alloc_info->offset[1] * 2; alloc_info->offset[3] = alloc_info->offset[1] * 3; break; case GST_VIDEO_FORMAT_BGRA: case GST_VIDEO_FORMAT_RGBA: case GST_VIDEO_FORMAT_RGBx: case GST_VIDEO_FORMAT_BGRx: case GST_VIDEO_FORMAT_ARGB: case GST_VIDEO_FORMAT_ABGR: case GST_VIDEO_FORMAT_RGB: case GST_VIDEO_FORMAT_BGR: case GST_VIDEO_FORMAT_BGR10A2_LE: case GST_VIDEO_FORMAT_RGB10A2_LE: case GST_VIDEO_FORMAT_YUY2: case GST_VIDEO_FORMAT_UYVY: alloc_info->stride[0] = pitch; alloc_info->offset[0] = 0; break; default: GST_ERROR_OBJECT (self, "Unexpected format %s", gst_video_format_to_string (GST_VIDEO_INFO_FORMAT (info))); g_assert_not_reached (); gst_memory_unref (GST_MEMORY_CAST (mem)); return NULL; } return GST_MEMORY_CAST (mem); } static void gst_cuda_allocator_free (GstAllocator * allocator, GstMemory * memory) { GstCudaMemory *mem = GST_CUDA_MEMORY_CAST (memory); GstCudaMemoryPrivate *priv = mem->priv; gst_cuda_context_push (mem->context); /* Finish any pending operations before freeing */ if (priv->stream && GST_MEMORY_FLAG_IS_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_SYNC)) { CuStreamSynchronize (gst_cuda_stream_get_handle (priv->stream)); } if (priv->data) gst_cuda_result (CuMemFree (priv->data)); if (priv->staging) gst_cuda_result (CuMemFreeHost (priv->staging)); gst_cuda_context_pop (NULL); gst_clear_cuda_stream (&priv->stream); gst_object_unref (mem->context); g_mutex_clear (&priv->lock); g_free (mem->priv); g_free (mem); } static gboolean gst_cuda_memory_upload (GstCudaAllocator * self, GstCudaMemory * mem) { GstCudaMemoryPrivate *priv = mem->priv; gboolean ret = TRUE; CUDA_MEMCPY2D param = { 0, }; CUstream stream = gst_cuda_stream_get_handle (priv->stream); if (!priv->staging || !GST_MEMORY_FLAG_IS_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_UPLOAD)) { return TRUE; } if (!gst_cuda_context_push (mem->context)) { GST_ERROR_OBJECT (self, "Failed to push cuda context"); return FALSE; } param.srcMemoryType = CU_MEMORYTYPE_HOST; param.srcHost = priv->staging; param.srcPitch = priv->pitch; param.dstMemoryType = CU_MEMORYTYPE_DEVICE; param.dstDevice = (CUdeviceptr) priv->data; param.dstPitch = priv->pitch; param.WidthInBytes = priv->width_in_bytes; param.Height = priv->height; ret = gst_cuda_result (CuMemcpy2DAsync (¶m, stream)); /* Sync only if we use default stream. * Otherwise (in case of non-default stream case) sync is caller's * responsibility */ if (!priv->stream) { CuStreamSynchronize (stream); GST_MINI_OBJECT_FLAG_UNSET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_SYNC); } else { GST_MINI_OBJECT_FLAG_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_SYNC); } gst_cuda_context_pop (NULL); if (!ret) GST_ERROR_OBJECT (self, "Failed to upload memory"); return ret; } static gboolean gst_cuda_memory_download (GstCudaAllocator * self, GstCudaMemory * mem) { GstCudaMemoryPrivate *priv = mem->priv; gboolean ret = TRUE; CUDA_MEMCPY2D param = { 0, }; CUstream stream = gst_cuda_stream_get_handle (priv->stream); if (!GST_MEMORY_FLAG_IS_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_DOWNLOAD)) return TRUE; if (!gst_cuda_context_push (mem->context)) { GST_ERROR_OBJECT (self, "Failed to push cuda context"); return FALSE; } if (!priv->staging) { ret = gst_cuda_result (CuMemAllocHost (&priv->staging, GST_MEMORY_CAST (mem)->size)); if (!ret) { GST_ERROR_OBJECT (self, "Failed to allocate staging memory"); gst_cuda_context_pop (NULL); return FALSE; } } param.srcMemoryType = CU_MEMORYTYPE_DEVICE; param.srcDevice = (CUdeviceptr) priv->data; param.srcPitch = priv->pitch; param.dstMemoryType = CU_MEMORYTYPE_HOST; param.dstHost = priv->staging; param.dstPitch = priv->pitch; param.WidthInBytes = priv->width_in_bytes; param.Height = priv->height; ret = gst_cuda_result (CuMemcpy2DAsync (¶m, stream)); /* For CPU access, sync immediately */ CuStreamSynchronize (stream); gst_cuda_context_pop (NULL); GST_MINI_OBJECT_FLAG_UNSET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_SYNC); if (!ret) GST_ERROR_OBJECT (self, "Failed to upload memory"); return ret; } static gpointer cuda_mem_map (GstMemory * mem, gsize maxsize, GstMapFlags flags) { GstCudaAllocator *self = GST_CUDA_ALLOCATOR (mem->allocator); GstCudaMemory *cmem = GST_CUDA_MEMORY_CAST (mem); GstCudaMemoryPrivate *priv = cmem->priv; gpointer ret = NULL; g_mutex_lock (&priv->lock); if ((flags & GST_MAP_CUDA) == GST_MAP_CUDA) { if (!gst_cuda_memory_upload (self, cmem)) goto out; GST_MEMORY_FLAG_UNSET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_UPLOAD); if ((flags & GST_MAP_WRITE) == GST_MAP_WRITE) { GST_MINI_OBJECT_FLAG_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_DOWNLOAD); /* Assume that memory needs sync if we are using non-default stream */ if (priv->stream) GST_MINI_OBJECT_FLAG_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_SYNC); } ret = (gpointer) priv->data; goto out; } /* First CPU access, must be downloaded */ if (!priv->staging) GST_MINI_OBJECT_FLAG_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_DOWNLOAD); if (!gst_cuda_memory_download (self, cmem)) goto out; ret = priv->staging; if ((flags & GST_MAP_WRITE) == GST_MAP_WRITE) GST_MINI_OBJECT_FLAG_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_UPLOAD); GST_MEMORY_FLAG_UNSET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_DOWNLOAD); out: g_mutex_unlock (&priv->lock); return ret; } static void cuda_mem_unmap_full (GstMemory * mem, GstMapInfo * info) { GstCudaMemory *cmem = GST_CUDA_MEMORY_CAST (mem); GstCudaMemoryPrivate *priv = cmem->priv; g_mutex_lock (&priv->lock); if ((info->flags & GST_MAP_CUDA) == GST_MAP_CUDA) { if ((info->flags & GST_MAP_WRITE) == GST_MAP_WRITE) GST_MINI_OBJECT_FLAG_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_DOWNLOAD); goto out; } if ((info->flags & GST_MAP_WRITE) == GST_MAP_WRITE) GST_MINI_OBJECT_FLAG_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_UPLOAD); out: g_mutex_unlock (&priv->lock); return; } static GstMemory * cuda_mem_copy (GstMemory * mem, gssize offset, gssize size) { GstCudaAllocator *self = GST_CUDA_ALLOCATOR (mem->allocator); GstCudaMemory *src_mem = GST_CUDA_MEMORY_CAST (mem); GstCudaContext *context = src_mem->context; GstMapInfo src_info, dst_info; CUDA_MEMCPY2D param = { 0, }; GstMemory *copy = NULL; gboolean ret; GstCudaStream *stream = src_mem->priv->stream; CUstream stream_handle = gst_cuda_stream_get_handle (stream); /* non-zero offset or different size is not supported */ if (offset != 0 || (size != -1 && (gsize) size != mem->size)) { GST_DEBUG_OBJECT (self, "Different size/offset, try fallback copy"); return self->priv->fallback_copy (mem, offset, size); } if (GST_IS_CUDA_POOL_ALLOCATOR (self)) { gst_cuda_pool_allocator_acquire_memory (GST_CUDA_POOL_ALLOCATOR (self), ©); } if (!copy) { copy = gst_cuda_allocator_alloc_internal (self, context, stream, &src_mem->info, src_mem->priv->width_in_bytes, src_mem->priv->height); } if (!copy) { GST_ERROR_OBJECT (self, "Failed to allocate memory for copying"); return NULL; } if (!gst_memory_map (mem, &src_info, GST_MAP_READ | GST_MAP_CUDA)) { GST_ERROR_OBJECT (self, "Failed to map src memory"); gst_memory_unref (copy); return NULL; } if (!gst_memory_map (copy, &dst_info, GST_MAP_WRITE | GST_MAP_CUDA)) { GST_ERROR_OBJECT (self, "Failed to map dst memory"); gst_memory_unmap (mem, &src_info); gst_memory_unref (copy); return NULL; } if (!gst_cuda_context_push (context)) { GST_ERROR_OBJECT (self, "Failed to push cuda context"); gst_memory_unmap (mem, &src_info); gst_memory_unmap (copy, &dst_info); return NULL; } param.srcMemoryType = CU_MEMORYTYPE_DEVICE; param.srcDevice = (CUdeviceptr) src_info.data; param.srcPitch = src_mem->priv->pitch; param.dstMemoryType = CU_MEMORYTYPE_DEVICE; param.dstDevice = (CUdeviceptr) dst_info.data; param.dstPitch = src_mem->priv->pitch; param.WidthInBytes = src_mem->priv->width_in_bytes; param.Height = src_mem->priv->height; ret = gst_cuda_result (CuMemcpy2DAsync (¶m, stream_handle)); CuStreamSynchronize (stream_handle); gst_cuda_context_pop (NULL); gst_memory_unmap (mem, &src_info); gst_memory_unmap (copy, &dst_info); if (!ret) { GST_ERROR_OBJECT (self, "Failed to copy memory"); gst_memory_unref (copy); return NULL; } return copy; } /** * gst_cuda_memory_init_once: * * Ensures that the #GstCudaAllocator is initialized and ready to be used. * * Since: 1.22 */ void gst_cuda_memory_init_once (void) { static gsize _init = 0; if (g_once_init_enter (&_init)) { _gst_cuda_allocator = (GstAllocator *) g_object_new (GST_TYPE_CUDA_ALLOCATOR, NULL); gst_object_ref_sink (_gst_cuda_allocator); gst_object_ref (_gst_cuda_allocator); gst_allocator_register (GST_CUDA_MEMORY_TYPE_NAME, _gst_cuda_allocator); g_once_init_leave (&_init, 1); } } /** * gst_is_cuda_memory: * @mem: A #GstMemory * * Check if @mem is a cuda memory * * Since: 1.22 */ gboolean gst_is_cuda_memory (GstMemory * mem) { return mem != NULL && mem->allocator != NULL && GST_IS_CUDA_ALLOCATOR (mem->allocator); } /** * gst_cuda_memory_get_stream: * @mem: A #GstCudaMemory * * Gets CUDA stream object associated with @mem * * Returns: (transfer none) (nullable): a #GstCudaStream or %NULL if default * CUDA stream is in use * * Since: 1.24 */ GstCudaStream * gst_cuda_memory_get_stream (GstCudaMemory * mem) { g_return_val_if_fail (gst_is_cuda_memory ((GstMemory *) mem), NULL); return mem->priv->stream; } /** * gst_cuda_memory_sync: * @mem: A #GstCudaMemory * * Performs synchronization if needed * * Since: 1.24 */ void gst_cuda_memory_sync (GstCudaMemory * mem) { GstCudaMemoryPrivate *priv; g_return_if_fail (gst_is_cuda_memory ((GstMemory *) mem)); priv = mem->priv; if (!priv->stream) return; g_mutex_lock (&priv->lock); if (GST_MEMORY_FLAG_IS_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_SYNC)) { GST_MEMORY_FLAG_UNSET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_SYNC); if (gst_cuda_context_push (mem->context)) { CuStreamSynchronize (gst_cuda_stream_get_handle (priv->stream)); gst_cuda_context_pop (NULL); } } g_mutex_unlock (&priv->lock); } /** * gst_cuda_allocator_alloc: * @allocator: (transfer none) (allow-none): a #GstCudaAllocator * @context: (transfer none): a #GstCudaContext * @stream: (transfer none) (allow-none): a #GstCudaStream * @info: a #GstVideoInfo * * Returns: (transfer full) (nullable): a newly allocated #GstCudaMemory * * Since: 1.22 */ GstMemory * gst_cuda_allocator_alloc (GstCudaAllocator * allocator, GstCudaContext * context, GstCudaStream * stream, const GstVideoInfo * info) { guint alloc_height; g_return_val_if_fail (GST_IS_CUDA_CONTEXT (context), NULL); g_return_val_if_fail (!stream || GST_IS_CUDA_STREAM (stream), NULL); g_return_val_if_fail (info != NULL, NULL); if (stream && stream->context != context) { GST_ERROR_OBJECT (context, "stream object is holding different CUDA context"); return NULL; } if (!allocator) allocator = (GstCudaAllocator *) _gst_cuda_allocator; alloc_height = GST_VIDEO_INFO_HEIGHT (info); /* make sure valid height for subsampled formats */ switch (GST_VIDEO_INFO_FORMAT (info)) { case GST_VIDEO_FORMAT_I420: case GST_VIDEO_FORMAT_YV12: case GST_VIDEO_FORMAT_NV12: case GST_VIDEO_FORMAT_P010_10LE: case GST_VIDEO_FORMAT_P016_LE: case GST_VIDEO_FORMAT_I420_10LE: alloc_height = GST_ROUND_UP_2 (alloc_height); break; default: break; } switch (GST_VIDEO_INFO_FORMAT (info)) { case GST_VIDEO_FORMAT_I420: case GST_VIDEO_FORMAT_YV12: case GST_VIDEO_FORMAT_I420_10LE: case GST_VIDEO_FORMAT_NV12: case GST_VIDEO_FORMAT_NV21: case GST_VIDEO_FORMAT_P010_10LE: case GST_VIDEO_FORMAT_P016_LE: alloc_height *= 2; break; case GST_VIDEO_FORMAT_Y42B: case GST_VIDEO_FORMAT_I422_10LE: case GST_VIDEO_FORMAT_I422_12LE: case GST_VIDEO_FORMAT_Y444: case GST_VIDEO_FORMAT_Y444_16LE: case GST_VIDEO_FORMAT_RGBP: case GST_VIDEO_FORMAT_BGRP: case GST_VIDEO_FORMAT_GBR: alloc_height *= 3; break; case GST_VIDEO_FORMAT_GBRA: alloc_height *= 4; break; default: break; } return gst_cuda_allocator_alloc_internal (allocator, context, stream, info, info->stride[0], alloc_height); } /** * gst_cuda_allocator_set_active: * @allocator: a #GstCudaAllocator * @active: the new active state * * Controls the active state of @allocator. Default #GstCudaAllocator is * stateless and therefore active state is ignored, but subclass implementation * (e.g., #GstCudaPoolAllocator) will require explicit active state control * for its internal resource management. * * This method is conceptually identical to gst_buffer_pool_set_active method. * * Returns: %TRUE if active state of @allocator was successfully updated. * * Since: 1.24 */ gboolean gst_cuda_allocator_set_active (GstCudaAllocator * allocator, gboolean active) { GstCudaAllocatorClass *klass; g_return_val_if_fail (GST_IS_CUDA_ALLOCATOR (allocator), FALSE); klass = GST_CUDA_ALLOCATOR_GET_CLASS (allocator); if (klass->set_active) return klass->set_active (allocator, active); return TRUE; } #define GST_CUDA_POOL_ALLOCATOR_IS_FLUSHING(alloc) (g_atomic_int_get (&alloc->priv->flushing)) struct _GstCudaPoolAllocatorPrivate { GstAtomicQueue *queue; GstPoll *poll; GRecMutex lock; gboolean started; gboolean active; guint outstanding; guint cur_mems; gboolean flushing; }; static void gst_cuda_pool_allocator_finalize (GObject * object); static gboolean gst_cuda_pool_allocator_set_active (GstCudaAllocator * allocator, gboolean active); static gboolean gst_cuda_pool_allocator_start (GstCudaPoolAllocator * self); static gboolean gst_cuda_pool_allocator_stop (GstCudaPoolAllocator * self); static gboolean gst_cuda_memory_release (GstMiniObject * mini_object); #define gst_cuda_pool_allocator_parent_class pool_alloc_parent_class G_DEFINE_TYPE_WITH_PRIVATE (GstCudaPoolAllocator, gst_cuda_pool_allocator, GST_TYPE_CUDA_ALLOCATOR); static void gst_cuda_pool_allocator_class_init (GstCudaPoolAllocatorClass * klass) { GObjectClass *gobject_class = G_OBJECT_CLASS (klass); GstCudaAllocatorClass *cuda_alloc_class = GST_CUDA_ALLOCATOR_CLASS (klass); gobject_class->finalize = gst_cuda_pool_allocator_finalize; cuda_alloc_class->set_active = gst_cuda_pool_allocator_set_active; } static void gst_cuda_pool_allocator_init (GstCudaPoolAllocator * allocator) { GstCudaPoolAllocatorPrivate *priv; priv = allocator->priv = gst_cuda_pool_allocator_get_instance_private (allocator); g_rec_mutex_init (&priv->lock); priv->poll = gst_poll_new_timer (); priv->queue = gst_atomic_queue_new (16); priv->flushing = 1; priv->active = FALSE; priv->started = FALSE; /* 1 control write for flushing - the flush token */ gst_poll_write_control (priv->poll); /* 1 control write for marking that we are not waiting for poll - the wait token */ gst_poll_write_control (priv->poll); } static void gst_cuda_pool_allocator_finalize (GObject * object) { GstCudaPoolAllocator *self = GST_CUDA_POOL_ALLOCATOR (object); GstCudaPoolAllocatorPrivate *priv = self->priv; GST_DEBUG_OBJECT (self, "Finalize"); gst_cuda_pool_allocator_stop (self); gst_atomic_queue_unref (priv->queue); gst_poll_free (priv->poll); g_rec_mutex_clear (&priv->lock); gst_clear_cuda_stream (&self->stream); gst_clear_object (&self->context); G_OBJECT_CLASS (pool_alloc_parent_class)->finalize (object); } static gboolean gst_cuda_pool_allocator_start (GstCudaPoolAllocator * self) { GstCudaPoolAllocatorPrivate *priv = self->priv; priv->started = TRUE; return TRUE; } static void gst_cuda_pool_allocator_do_set_flushing (GstCudaPoolAllocator * self, gboolean flushing) { GstCudaPoolAllocatorPrivate *priv = self->priv; if (GST_CUDA_POOL_ALLOCATOR_IS_FLUSHING (self) == flushing) return; if (flushing) { g_atomic_int_set (&priv->flushing, 1); /* Write the flush token to wake up any waiters */ gst_poll_write_control (priv->poll); } else { while (!gst_poll_read_control (priv->poll)) { if (errno == EWOULDBLOCK) { /* This should not really happen unless flushing and unflushing * happens on different threads. Let's wait a bit to get back flush * token from the thread that was setting it to flushing */ g_thread_yield (); continue; } else { /* Critical error but GstPoll already complained */ break; } } g_atomic_int_set (&priv->flushing, 0); } } static gboolean gst_cuda_pool_allocator_set_active (GstCudaAllocator * allocator, gboolean active) { GstCudaPoolAllocator *self = GST_CUDA_POOL_ALLOCATOR (allocator); GstCudaPoolAllocatorPrivate *priv = self->priv; gboolean ret = TRUE; GST_LOG_OBJECT (self, "active %d", active); g_rec_mutex_lock (&priv->lock); /* just return if we are already in the right state */ if (priv->active == active) goto done; if (active) { gst_cuda_pool_allocator_start (self); /* flush_stop may release memory objects, setting to active to avoid running * do_stop while activating the pool */ priv->active = TRUE; gst_cuda_pool_allocator_do_set_flushing (self, FALSE); } else { gint outstanding; /* set to flushing first */ gst_cuda_pool_allocator_do_set_flushing (self, TRUE); /* when all memory objects are in the pool, free them. Else they will be * freed when they are released */ outstanding = g_atomic_int_get (&priv->outstanding); GST_LOG_OBJECT (self, "outstanding memories %d, (in queue %d)", outstanding, gst_atomic_queue_length (priv->queue)); if (outstanding == 0) { if (!gst_cuda_pool_allocator_stop (self)) { GST_ERROR_OBJECT (self, "stop failed"); ret = FALSE; goto done; } } priv->active = FALSE; } done: g_rec_mutex_unlock (&priv->lock); return ret; } static void gst_cuda_pool_allocator_free_memory (GstCudaPoolAllocator * self, GstMemory * mem) { GstCudaPoolAllocatorPrivate *priv = self->priv; g_atomic_int_add (&priv->cur_mems, -1); GST_LOG_OBJECT (self, "freeing memory %p (%u left)", mem, priv->cur_mems); GST_MINI_OBJECT_CAST (mem)->dispose = NULL; gst_memory_unref (mem); } static gboolean gst_cuda_pool_allocator_clear_queue (GstCudaPoolAllocator * self) { GstCudaPoolAllocatorPrivate *priv = self->priv; GstMemory *memory; GST_LOG_OBJECT (self, "Clearing queue"); if (self->stream) { /* Wait for outstanding operations */ gst_cuda_context_push (self->context); CuStreamSynchronize (gst_cuda_stream_get_handle (self->stream)); gst_cuda_context_pop (NULL); } while ((memory = (GstMemory *) gst_atomic_queue_pop (priv->queue))) { while (!gst_poll_read_control (priv->poll)) { if (errno == EWOULDBLOCK) { /* We put the memory into the queue but did not finish writing control * yet, let's wait a bit and retry */ g_thread_yield (); continue; } else { /* Critical error but GstPoll already complained */ break; } } /* Already synchronized above */ GST_MEMORY_FLAG_UNSET (memory, GST_CUDA_MEMORY_TRANSFER_NEED_SYNC); gst_cuda_pool_allocator_free_memory (self, memory); } GST_LOG_OBJECT (self, "Clear done"); return priv->cur_mems == 0; } /* must be called with the lock */ static gboolean gst_cuda_pool_allocator_stop (GstCudaPoolAllocator * self) { GstCudaPoolAllocatorPrivate *priv = self->priv; GST_DEBUG_OBJECT (self, "Stop"); if (priv->started) { if (!gst_cuda_pool_allocator_clear_queue (self)) return FALSE; priv->started = FALSE; } return TRUE; } static inline void dec_outstanding (GstCudaPoolAllocator * self) { if (g_atomic_int_dec_and_test (&self->priv->outstanding)) { /* all memory objects are returned to the pool, see if we need to free them */ if (GST_CUDA_POOL_ALLOCATOR_IS_FLUSHING (self)) { /* take the lock so that set_active is not run concurrently */ g_rec_mutex_lock (&self->priv->lock); /* now that we have the lock, check if we have been de-activated with * outstanding buffers */ if (!self->priv->active) gst_cuda_pool_allocator_stop (self); g_rec_mutex_unlock (&self->priv->lock); } } } static void gst_cuda_pool_allocator_release_memory (GstCudaPoolAllocator * self, GstMemory * mem) { GST_LOG_OBJECT (self, "Released memory %p", mem); GST_MINI_OBJECT_CAST (mem)->dispose = NULL; mem->allocator = gst_object_ref (_gst_cuda_allocator); /* keep it around in our queue */ gst_atomic_queue_push (self->priv->queue, mem); gst_poll_write_control (self->priv->poll); dec_outstanding (self); gst_object_unref (self); } static gboolean gst_cuda_memory_release (GstMiniObject * object) { GstMemory *mem = GST_MEMORY_CAST (object); GstCudaPoolAllocator *alloc; g_assert (mem->allocator); if (!GST_IS_CUDA_POOL_ALLOCATOR (mem->allocator)) { GST_LOG_OBJECT (mem->allocator, "Not our memory, free"); return TRUE; } alloc = GST_CUDA_POOL_ALLOCATOR (mem->allocator); /* if flushing, free this memory */ if (GST_CUDA_POOL_ALLOCATOR_IS_FLUSHING (alloc)) { GST_LOG_OBJECT (alloc, "allocator is flushing, free %p", mem); return TRUE; } /* return the memory to the allocator */ gst_memory_ref (mem); gst_cuda_pool_allocator_release_memory (alloc, mem); return FALSE; } /* must be called with the lock */ static GstFlowReturn gst_cuda_pool_allocator_alloc (GstCudaPoolAllocator * self, GstMemory ** mem) { GstCudaPoolAllocatorPrivate *priv = self->priv; GstMemory *new_mem; /* increment the allocation counter */ g_atomic_int_add (&priv->cur_mems, 1); new_mem = gst_cuda_allocator_alloc ((GstCudaAllocator *) _gst_cuda_allocator, self->context, self->stream, &self->info); if (!new_mem) { GST_ERROR_OBJECT (self, "Failed to allocate new memory"); g_atomic_int_add (&priv->cur_mems, -1); return GST_FLOW_ERROR; } *mem = new_mem; return GST_FLOW_OK; } static GstFlowReturn gst_cuda_pool_allocator_acquire_memory_internal (GstCudaPoolAllocator * self, GstMemory ** memory) { GstFlowReturn result; GstCudaPoolAllocatorPrivate *priv = self->priv; while (TRUE) { if (G_UNLIKELY (GST_CUDA_POOL_ALLOCATOR_IS_FLUSHING (self))) goto flushing; /* try to get a memory from the queue */ *memory = (GstMemory *) gst_atomic_queue_pop (priv->queue); if (G_LIKELY (*memory)) { while (!gst_poll_read_control (priv->poll)) { if (errno == EWOULDBLOCK) { /* We put the memory into the queue but did not finish writing control * yet, let's wait a bit and retry */ g_thread_yield (); continue; } else { /* Critical error but GstPoll already complained */ break; } } result = GST_FLOW_OK; GST_LOG_OBJECT (self, "acquired memory %p", *memory); break; } /* no memory, try to allocate some more */ GST_LOG_OBJECT (self, "no memory, trying to allocate"); result = gst_cuda_pool_allocator_alloc (self, memory); if (G_LIKELY (result == GST_FLOW_OK)) /* we have a memory, return it */ break; if (G_UNLIKELY (result != GST_FLOW_EOS)) /* something went wrong, return error */ break; /* now we release the control socket, we wait for a memory release or * flushing */ if (!gst_poll_read_control (priv->poll)) { if (errno == EWOULDBLOCK) { /* This means that we have two threads trying to allocate memory * already, and the other one already got the wait token. This * means that we only have to wait for the poll now and not write the * token afterwards: we will be woken up once the other thread is * woken up and that one will write the wait token it removed */ GST_LOG_OBJECT (self, "waiting for free memory or flushing"); gst_poll_wait (priv->poll, GST_CLOCK_TIME_NONE); } else { /* This is a critical error, GstPoll already gave a warning */ result = GST_FLOW_ERROR; break; } } else { /* We're the first thread waiting, we got the wait token and have to * write it again later * OR * We're a second thread and just consumed the flush token and block all * other threads, in which case we must not wait and give it back * immediately */ if (!GST_CUDA_POOL_ALLOCATOR_IS_FLUSHING (self)) { GST_LOG_OBJECT (self, "waiting for free memory or flushing"); gst_poll_wait (priv->poll, GST_CLOCK_TIME_NONE); } gst_poll_write_control (priv->poll); } } return result; /* ERRORS */ flushing: { GST_DEBUG_OBJECT (self, "we are flushing"); return GST_FLOW_FLUSHING; } } /** * gst_cuda_pool_allocator_new: * @context: a #GstCudaContext * @stream: (allow-none): a #GstCudaStream * @info: a #GstVideoInfo * * Creates a new #GstCudaPoolAllocator instance. * * Returns: (transfer full): a new #GstCudaPoolAllocator instance * * Since: 1.24 */ GstCudaPoolAllocator * gst_cuda_pool_allocator_new (GstCudaContext * context, GstCudaStream * stream, const GstVideoInfo * info) { GstCudaPoolAllocator *self; g_return_val_if_fail (GST_IS_CUDA_CONTEXT (context), NULL); g_return_val_if_fail (!stream || GST_IS_CUDA_STREAM (stream), NULL); self = g_object_new (GST_TYPE_CUDA_POOL_ALLOCATOR, NULL); gst_object_ref_sink (self); self->context = gst_object_ref (context); if (stream) self->stream = gst_cuda_stream_ref (stream); self->info = *info; return self; } /** * gst_cuda_pool_allocator_acquire_memory: * @allocator: a #GstCudaPoolAllocator * @memory: (out): a #GstMemory * * Acquires a #GstMemory from @allocator. @memory should point to a memory * location that can hold a pointer to the new #GstMemory. * * Returns: a #GstFlowReturn such as %GST_FLOW_FLUSHING when the allocator is * inactive. * * Since: 1.24 */ GstFlowReturn gst_cuda_pool_allocator_acquire_memory (GstCudaPoolAllocator * allocator, GstMemory ** memory) { GstFlowReturn result; GstCudaPoolAllocatorPrivate *priv; g_return_val_if_fail (GST_IS_CUDA_POOL_ALLOCATOR (allocator), GST_FLOW_ERROR); g_return_val_if_fail (memory, GST_FLOW_ERROR); priv = allocator->priv; g_atomic_int_inc (&priv->outstanding); result = gst_cuda_pool_allocator_acquire_memory_internal (allocator, memory); if (result == GST_FLOW_OK) { GstMemory *mem = *memory; /* Replace default allocator with ours */ gst_object_unref (mem->allocator); mem->allocator = gst_object_ref (allocator); GST_MINI_OBJECT_CAST (mem)->dispose = gst_cuda_memory_release; allocator->priv->outstanding++; } else { dec_outstanding (allocator); } return result; }