diff --git a/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcudabufferpool.c b/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcudabufferpool.c index 6f6b34156d..4a0d419b53 100644 --- a/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcudabufferpool.c +++ b/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcudabufferpool.c @@ -73,7 +73,7 @@ gst_cuda_buffer_pool_set_config (GstBufferPool * pool, GstStructure * config) return FALSE; } - mem = gst_cuda_allocator_alloc (NULL, self->context, &info); + mem = gst_cuda_allocator_alloc (NULL, self->context, NULL, &info); if (!mem) { GST_WARNING_OBJECT (self, "Failed to allocate memory"); return FALSE; @@ -102,7 +102,7 @@ gst_cuda_buffer_pool_alloc (GstBufferPool * pool, GstBuffer ** buffer, GstMemory *mem; GstCudaMemory *cmem; - mem = gst_cuda_allocator_alloc (NULL, self->context, &priv->info); + mem = gst_cuda_allocator_alloc (NULL, self->context, NULL, &priv->info); if (!mem) { GST_WARNING_OBJECT (pool, "Cannot create CUDA memory"); return GST_FLOW_ERROR; diff --git a/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcudamemory.c b/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcudamemory.c index 9cda172775..ba29c2efef 100644 --- a/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcudamemory.c +++ b/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcudamemory.c @@ -42,10 +42,18 @@ struct _GstCudaMemoryPrivate guint height; GMutex lock; + + GstCudaStream *stream; +}; + +struct _GstCudaAllocatorPrivate +{ + GstMemoryCopyFunction fallback_copy; }; #define gst_cuda_allocator_parent_class parent_class -G_DEFINE_TYPE (GstCudaAllocator, gst_cuda_allocator, GST_TYPE_ALLOCATOR); +G_DEFINE_TYPE_WITH_PRIVATE (GstCudaAllocator, gst_cuda_allocator, + GST_TYPE_ALLOCATOR); static void gst_cuda_allocator_free (GstAllocator * allocator, GstMemory * memory); @@ -78,13 +86,17 @@ static void gst_cuda_allocator_init (GstCudaAllocator * allocator) { GstAllocator *alloc = GST_ALLOCATOR_CAST (allocator); + GstCudaAllocatorPrivate *priv; - GST_DEBUG_OBJECT (allocator, "init"); + priv = allocator->priv = gst_cuda_allocator_get_instance_private (allocator); alloc->mem_type = GST_CUDA_MEMORY_TYPE_NAME; alloc->mem_map = cuda_mem_map; alloc->mem_unmap_full = cuda_mem_unmap_full; + + /* Store pointer to default mem_copy method for fallback copy */ + priv->fallback_copy = alloc->mem_copy; alloc->mem_copy = cuda_mem_copy; GST_OBJECT_FLAG_SET (allocator, GST_ALLOCATOR_FLAG_CUSTOM_ALLOC); @@ -92,7 +104,7 @@ gst_cuda_allocator_init (GstCudaAllocator * allocator) static GstMemory * gst_cuda_allocator_alloc_internal (GstCudaAllocator * self, - GstCudaContext * context, const GstVideoInfo * info, + GstCudaContext * context, GstCudaStream * stream, const GstVideoInfo * info, guint width_in_bytes, guint alloc_height) { GstCudaMemoryPrivate *priv; @@ -123,6 +135,8 @@ gst_cuda_allocator_alloc_internal (GstCudaAllocator * self, priv->width_in_bytes = width_in_bytes; priv->height = alloc_height; g_mutex_init (&priv->lock); + if (stream) + priv->stream = gst_cuda_stream_ref (stream); mem->context = gst_object_ref (context); mem->info = *info; @@ -221,6 +235,12 @@ gst_cuda_allocator_free (GstAllocator * allocator, GstMemory * memory) GstCudaMemoryPrivate *priv = mem->priv; gst_cuda_context_push (mem->context); + /* Finish any pending operations before freeing */ + if (priv->stream && + GST_MEMORY_FLAG_IS_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_SYNC)) { + CuStreamSynchronize (gst_cuda_stream_get_handle (priv->stream)); + } + if (priv->data) gst_cuda_result (CuMemFree (priv->data)); @@ -228,6 +248,7 @@ gst_cuda_allocator_free (GstAllocator * allocator, GstMemory * memory) gst_cuda_result (CuMemFreeHost (priv->staging)); gst_cuda_context_pop (NULL); + gst_clear_cuda_stream (&priv->stream); gst_object_unref (mem->context); g_mutex_clear (&priv->lock); @@ -241,6 +262,7 @@ gst_cuda_memory_upload (GstCudaAllocator * self, GstCudaMemory * mem) GstCudaMemoryPrivate *priv = mem->priv; gboolean ret = TRUE; CUDA_MEMCPY2D param = { 0, }; + CUstream stream = gst_cuda_stream_get_handle (priv->stream); if (!priv->staging || !GST_MEMORY_FLAG_IS_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_UPLOAD)) { @@ -262,7 +284,16 @@ gst_cuda_memory_upload (GstCudaAllocator * self, GstCudaMemory * mem) param.WidthInBytes = priv->width_in_bytes; param.Height = priv->height; - ret = gst_cuda_result (CuMemcpy2D (¶m)); + ret = gst_cuda_result (CuMemcpy2DAsync (¶m, stream)); + /* Sync only if we use default stream. + * Otherwise (in case of non-default stream case) sync is caller's + * responsibility */ + if (!priv->stream) { + CuStreamSynchronize (stream); + GST_MINI_OBJECT_FLAG_UNSET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_SYNC); + } else { + GST_MINI_OBJECT_FLAG_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_SYNC); + } gst_cuda_context_pop (NULL); if (!ret) @@ -277,6 +308,7 @@ gst_cuda_memory_download (GstCudaAllocator * self, GstCudaMemory * mem) GstCudaMemoryPrivate *priv = mem->priv; gboolean ret = TRUE; CUDA_MEMCPY2D param = { 0, }; + CUstream stream = gst_cuda_stream_get_handle (priv->stream); if (!GST_MEMORY_FLAG_IS_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_DOWNLOAD)) return TRUE; @@ -306,8 +338,11 @@ gst_cuda_memory_download (GstCudaAllocator * self, GstCudaMemory * mem) param.WidthInBytes = priv->width_in_bytes; param.Height = priv->height; - ret = gst_cuda_result (CuMemcpy2D (¶m)); + ret = gst_cuda_result (CuMemcpy2DAsync (¶m, stream)); + /* For CPU access, sync immediately */ + CuStreamSynchronize (stream); gst_cuda_context_pop (NULL); + GST_MINI_OBJECT_FLAG_UNSET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_SYNC); if (!ret) GST_ERROR_OBJECT (self, "Failed to upload memory"); @@ -330,8 +365,12 @@ cuda_mem_map (GstMemory * mem, gsize maxsize, GstMapFlags flags) GST_MEMORY_FLAG_UNSET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_UPLOAD); - if ((flags & GST_MAP_WRITE) == GST_MAP_WRITE) + if ((flags & GST_MAP_WRITE) == GST_MAP_WRITE) { GST_MINI_OBJECT_FLAG_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_DOWNLOAD); + /* Assume that memory needs sync if we are using non-default stream */ + if (priv->stream) + GST_MINI_OBJECT_FLAG_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_SYNC); + } ret = (gpointer) priv->data; goto out; @@ -388,12 +427,26 @@ cuda_mem_copy (GstMemory * mem, gssize offset, gssize size) GstCudaContext *context = src_mem->context; GstMapInfo src_info, dst_info; CUDA_MEMCPY2D param = { 0, }; - GstMemory *copy; + GstMemory *copy = NULL; gboolean ret; + GstCudaStream *stream = src_mem->priv->stream; + CUstream stream_handle = gst_cuda_stream_get_handle (stream); - /* offset and size are ignored */ - copy = gst_cuda_allocator_alloc_internal (self, context, - &src_mem->info, src_mem->priv->width_in_bytes, src_mem->priv->height); + /* non-zero offset or different size is not supported */ + if (offset != 0 || (size != -1 && (gsize) size != mem->size)) { + GST_DEBUG_OBJECT (self, "Different size/offset, try fallback copy"); + return self->priv->fallback_copy (mem, offset, size); + } + + if (GST_IS_CUDA_POOL_ALLOCATOR (self)) { + gst_cuda_pool_allocator_acquire_memory (GST_CUDA_POOL_ALLOCATOR (self), + ©); + } + + if (!copy) { + copy = gst_cuda_allocator_alloc_internal (self, context, stream, + &src_mem->info, src_mem->priv->width_in_bytes, src_mem->priv->height); + } if (!copy) { GST_ERROR_OBJECT (self, "Failed to allocate memory for copying"); @@ -431,7 +484,8 @@ cuda_mem_copy (GstMemory * mem, gssize offset, gssize size) param.WidthInBytes = src_mem->priv->width_in_bytes; param.Height = src_mem->priv->height; - ret = gst_cuda_result (CuMemcpy2D (¶m)); + ret = gst_cuda_result (CuMemcpy2DAsync (¶m, stream_handle)); + CuStreamSynchronize (stream_handle); gst_cuda_context_pop (NULL); gst_memory_unmap (mem, &src_info); @@ -484,10 +538,61 @@ gst_is_cuda_memory (GstMemory * mem) GST_IS_CUDA_ALLOCATOR (mem->allocator); } +/** + * gst_cuda_memory_get_stream: + * @mem: A #GstCudaMemory + * + * Gets CUDA stream object associated with @mem + * + * Returns: (transfer none) (nullable): a #GstCudaStream or %NULL if default + * CUDA stream is in use + * + * Since: 1.24 + */ +GstCudaStream * +gst_cuda_memory_get_stream (GstCudaMemory * mem) +{ + g_return_val_if_fail (gst_is_cuda_memory ((GstMemory *) mem), NULL); + + return mem->priv->stream; +} + +/** + * gst_cuda_memory_sync: + * @mem: A #GstCudaMemory + * + * Performs synchronization if needed + * + * Since: 1.24 + */ +void +gst_cuda_memory_sync (GstCudaMemory * mem) +{ + GstCudaMemoryPrivate *priv; + + g_return_if_fail (gst_is_cuda_memory ((GstMemory *) mem)); + + priv = mem->priv; + if (!priv->stream) + return; + + g_mutex_lock (&priv->lock); + if (GST_MEMORY_FLAG_IS_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_SYNC)) { + GST_MEMORY_FLAG_UNSET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_SYNC); + if (gst_cuda_context_push (mem->context)) { + CuStreamSynchronize (gst_cuda_stream_get_handle (priv->stream)); + gst_cuda_context_pop (NULL); + } + } + + g_mutex_unlock (&priv->lock); +} + /** * gst_cuda_allocator_alloc: * @allocator: (transfer none) (allow-none): a #GstCudaAllocator * @context: (transfer none): a #GstCudaContext + * @stream: (transfer none) (allow-none): a #GstCudaStream * @info: a #GstVideoInfo * * Returns: (transfer full) (nullable): a newly allocated #GstCudaMemory @@ -496,13 +601,20 @@ gst_is_cuda_memory (GstMemory * mem) */ GstMemory * gst_cuda_allocator_alloc (GstCudaAllocator * allocator, - GstCudaContext * context, const GstVideoInfo * info) + GstCudaContext * context, GstCudaStream * stream, const GstVideoInfo * info) { guint alloc_height; g_return_val_if_fail (GST_IS_CUDA_CONTEXT (context), NULL); + g_return_val_if_fail (!stream || GST_IS_CUDA_STREAM (stream), NULL); g_return_val_if_fail (info != NULL, NULL); + if (stream && stream->context != context) { + GST_ERROR_OBJECT (context, + "stream object is holding different CUDA context"); + return NULL; + } + if (!allocator) allocator = (GstCudaAllocator *) _gst_cuda_allocator; @@ -549,6 +661,518 @@ gst_cuda_allocator_alloc (GstCudaAllocator * allocator, break; } - return gst_cuda_allocator_alloc_internal (allocator, context, + return gst_cuda_allocator_alloc_internal (allocator, context, stream, info, info->stride[0], alloc_height); } + +/** + * gst_cuda_allocator_set_active: + * @allocator: a #GstCudaAllocator + * @active: the new active state + * + * Controls the active state of @allocator. Default #GstCudaAllocator is + * stateless and therefore active state is ignored, but subclass implementation + * (e.g., #GstCudaPoolAllocator) will require explicit active state control + * for its internal resource management. + * + * This method is conceptually identical to gst_buffer_pool_set_active method. + * + * Returns: %TRUE if active state of @allocator was successfully updated. + * + * Since: 1.24 + */ +gboolean +gst_cuda_allocator_set_active (GstCudaAllocator * allocator, gboolean active) +{ + GstCudaAllocatorClass *klass; + + g_return_val_if_fail (GST_IS_CUDA_ALLOCATOR (allocator), FALSE); + + klass = GST_CUDA_ALLOCATOR_GET_CLASS (allocator); + if (klass->set_active) + return klass->set_active (allocator, active); + + return TRUE; +} + +#define GST_CUDA_POOL_ALLOCATOR_IS_FLUSHING(alloc) (g_atomic_int_get (&alloc->priv->flushing)) + +struct _GstCudaPoolAllocatorPrivate +{ + GstAtomicQueue *queue; + GstPoll *poll; + + GRecMutex lock; + gboolean started; + gboolean active; + + guint outstanding; + guint cur_mems; + gboolean flushing; +}; + +static void gst_cuda_pool_allocator_finalize (GObject * object); + +static gboolean +gst_cuda_pool_allocator_set_active (GstCudaAllocator * allocator, + gboolean active); + +static gboolean gst_cuda_pool_allocator_start (GstCudaPoolAllocator * self); +static gboolean gst_cuda_pool_allocator_stop (GstCudaPoolAllocator * self); +static gboolean gst_cuda_memory_release (GstMiniObject * mini_object); + +#define gst_cuda_pool_allocator_parent_class pool_alloc_parent_class +G_DEFINE_TYPE_WITH_PRIVATE (GstCudaPoolAllocator, + gst_cuda_pool_allocator, GST_TYPE_CUDA_ALLOCATOR); + +static void +gst_cuda_pool_allocator_class_init (GstCudaPoolAllocatorClass * klass) +{ + GObjectClass *gobject_class = G_OBJECT_CLASS (klass); + GstCudaAllocatorClass *cuda_alloc_class = GST_CUDA_ALLOCATOR_CLASS (klass); + + gobject_class->finalize = gst_cuda_pool_allocator_finalize; + + cuda_alloc_class->set_active = gst_cuda_pool_allocator_set_active; +} + +static void +gst_cuda_pool_allocator_init (GstCudaPoolAllocator * allocator) +{ + GstCudaPoolAllocatorPrivate *priv; + + priv = allocator->priv = + gst_cuda_pool_allocator_get_instance_private (allocator); + + g_rec_mutex_init (&priv->lock); + + priv->poll = gst_poll_new_timer (); + priv->queue = gst_atomic_queue_new (16); + priv->flushing = 1; + priv->active = FALSE; + priv->started = FALSE; + + /* 1 control write for flushing - the flush token */ + gst_poll_write_control (priv->poll); + /* 1 control write for marking that we are not waiting for poll - the wait token */ + gst_poll_write_control (priv->poll); +} + +static void +gst_cuda_pool_allocator_finalize (GObject * object) +{ + GstCudaPoolAllocator *self = GST_CUDA_POOL_ALLOCATOR (object); + GstCudaPoolAllocatorPrivate *priv = self->priv; + + GST_DEBUG_OBJECT (self, "Finalize"); + + gst_cuda_pool_allocator_stop (self); + gst_atomic_queue_unref (priv->queue); + gst_poll_free (priv->poll); + g_rec_mutex_clear (&priv->lock); + + gst_clear_cuda_stream (&self->stream); + gst_clear_object (&self->context); + + G_OBJECT_CLASS (pool_alloc_parent_class)->finalize (object); +} + +static gboolean +gst_cuda_pool_allocator_start (GstCudaPoolAllocator * self) +{ + GstCudaPoolAllocatorPrivate *priv = self->priv; + + priv->started = TRUE; + + return TRUE; +} + +static void +gst_cuda_pool_allocator_do_set_flushing (GstCudaPoolAllocator * self, + gboolean flushing) +{ + GstCudaPoolAllocatorPrivate *priv = self->priv; + + if (GST_CUDA_POOL_ALLOCATOR_IS_FLUSHING (self) == flushing) + return; + + if (flushing) { + g_atomic_int_set (&priv->flushing, 1); + /* Write the flush token to wake up any waiters */ + gst_poll_write_control (priv->poll); + } else { + while (!gst_poll_read_control (priv->poll)) { + if (errno == EWOULDBLOCK) { + /* This should not really happen unless flushing and unflushing + * happens on different threads. Let's wait a bit to get back flush + * token from the thread that was setting it to flushing */ + g_thread_yield (); + continue; + } else { + /* Critical error but GstPoll already complained */ + break; + } + } + + g_atomic_int_set (&priv->flushing, 0); + } +} + +static gboolean +gst_cuda_pool_allocator_set_active (GstCudaAllocator * allocator, + gboolean active) +{ + GstCudaPoolAllocator *self = GST_CUDA_POOL_ALLOCATOR (allocator); + GstCudaPoolAllocatorPrivate *priv = self->priv; + gboolean ret = TRUE; + + GST_LOG_OBJECT (self, "active %d", active); + + g_rec_mutex_lock (&priv->lock); + /* just return if we are already in the right state */ + if (priv->active == active) + goto done; + + if (active) { + gst_cuda_pool_allocator_start (self); + + /* flush_stop may release memory objects, setting to active to avoid running + * do_stop while activating the pool */ + priv->active = TRUE; + + gst_cuda_pool_allocator_do_set_flushing (self, FALSE); + } else { + gint outstanding; + + /* set to flushing first */ + gst_cuda_pool_allocator_do_set_flushing (self, TRUE); + + /* when all memory objects are in the pool, free them. Else they will be + * freed when they are released */ + outstanding = g_atomic_int_get (&priv->outstanding); + GST_LOG_OBJECT (self, "outstanding memories %d, (in queue %d)", + outstanding, gst_atomic_queue_length (priv->queue)); + if (outstanding == 0) { + if (!gst_cuda_pool_allocator_stop (self)) { + GST_ERROR_OBJECT (self, "stop failed"); + ret = FALSE; + goto done; + } + } + + priv->active = FALSE; + } + +done: + g_rec_mutex_unlock (&priv->lock); + + return ret; +} + +static void +gst_cuda_pool_allocator_free_memory (GstCudaPoolAllocator * self, + GstMemory * mem) +{ + GstCudaPoolAllocatorPrivate *priv = self->priv; + + g_atomic_int_add (&priv->cur_mems, -1); + GST_LOG_OBJECT (self, "freeing memory %p (%u left)", mem, priv->cur_mems); + + GST_MINI_OBJECT_CAST (mem)->dispose = NULL; + gst_memory_unref (mem); +} + +static gboolean +gst_cuda_pool_allocator_clear_queue (GstCudaPoolAllocator * self) +{ + GstCudaPoolAllocatorPrivate *priv = self->priv; + GstMemory *memory; + + GST_LOG_OBJECT (self, "Clearing queue"); + + if (self->stream) { + /* Wait for outstanding operations */ + gst_cuda_context_push (self->context); + CuStreamSynchronize (gst_cuda_stream_get_handle (self->stream)); + gst_cuda_context_pop (NULL); + } + + while ((memory = (GstMemory *) gst_atomic_queue_pop (priv->queue))) { + while (!gst_poll_read_control (priv->poll)) { + if (errno == EWOULDBLOCK) { + /* We put the memory into the queue but did not finish writing control + * yet, let's wait a bit and retry */ + g_thread_yield (); + continue; + } else { + /* Critical error but GstPoll already complained */ + break; + } + } + + /* Already synchronized above */ + GST_MEMORY_FLAG_UNSET (memory, GST_CUDA_MEMORY_TRANSFER_NEED_SYNC); + gst_cuda_pool_allocator_free_memory (self, memory); + } + + GST_LOG_OBJECT (self, "Clear done"); + + return priv->cur_mems == 0; +} + +/* must be called with the lock */ +static gboolean +gst_cuda_pool_allocator_stop (GstCudaPoolAllocator * self) +{ + GstCudaPoolAllocatorPrivate *priv = self->priv; + + GST_DEBUG_OBJECT (self, "Stop"); + + if (priv->started) { + if (!gst_cuda_pool_allocator_clear_queue (self)) + return FALSE; + + priv->started = FALSE; + } + + return TRUE; +} + +static inline void +dec_outstanding (GstCudaPoolAllocator * self) +{ + if (g_atomic_int_dec_and_test (&self->priv->outstanding)) { + /* all memory objects are returned to the pool, see if we need to free them */ + if (GST_CUDA_POOL_ALLOCATOR_IS_FLUSHING (self)) { + /* take the lock so that set_active is not run concurrently */ + g_rec_mutex_lock (&self->priv->lock); + /* now that we have the lock, check if we have been de-activated with + * outstanding buffers */ + if (!self->priv->active) + gst_cuda_pool_allocator_stop (self); + g_rec_mutex_unlock (&self->priv->lock); + } + } +} + +static void +gst_cuda_pool_allocator_release_memory (GstCudaPoolAllocator * self, + GstMemory * mem) +{ + GST_LOG_OBJECT (self, "Released memory %p", mem); + + GST_MINI_OBJECT_CAST (mem)->dispose = NULL; + mem->allocator = gst_object_ref (_gst_cuda_allocator); + + /* keep it around in our queue */ + gst_atomic_queue_push (self->priv->queue, mem); + gst_poll_write_control (self->priv->poll); + dec_outstanding (self); + + gst_object_unref (self); +} + +static gboolean +gst_cuda_memory_release (GstMiniObject * object) +{ + GstMemory *mem = GST_MEMORY_CAST (object); + GstCudaPoolAllocator *alloc; + + g_assert (mem->allocator); + + if (!GST_IS_CUDA_POOL_ALLOCATOR (mem->allocator)) { + GST_LOG_OBJECT (mem->allocator, "Not our memory, free"); + return TRUE; + } + + alloc = GST_CUDA_POOL_ALLOCATOR (mem->allocator); + /* if flushing, free this memory */ + if (GST_CUDA_POOL_ALLOCATOR_IS_FLUSHING (alloc)) { + GST_LOG_OBJECT (alloc, "allocator is flushing, free %p", mem); + return TRUE; + } + + /* return the memory to the allocator */ + gst_memory_ref (mem); + gst_cuda_pool_allocator_release_memory (alloc, mem); + + return FALSE; +} + +/* must be called with the lock */ +static GstFlowReturn +gst_cuda_pool_allocator_alloc (GstCudaPoolAllocator * self, GstMemory ** mem) +{ + GstCudaPoolAllocatorPrivate *priv = self->priv; + GstMemory *new_mem; + + /* increment the allocation counter */ + g_atomic_int_add (&priv->cur_mems, 1); + new_mem = gst_cuda_allocator_alloc ((GstCudaAllocator *) _gst_cuda_allocator, + self->context, self->stream, &self->info); + if (!new_mem) { + GST_ERROR_OBJECT (self, "Failed to allocate new memory"); + g_atomic_int_add (&priv->cur_mems, -1); + return GST_FLOW_ERROR; + } + + *mem = new_mem; + + return GST_FLOW_OK; +} + +static GstFlowReturn +gst_cuda_pool_allocator_acquire_memory_internal (GstCudaPoolAllocator * self, + GstMemory ** memory) +{ + GstFlowReturn result; + GstCudaPoolAllocatorPrivate *priv = self->priv; + + while (TRUE) { + if (G_UNLIKELY (GST_CUDA_POOL_ALLOCATOR_IS_FLUSHING (self))) + goto flushing; + + /* try to get a memory from the queue */ + *memory = (GstMemory *) gst_atomic_queue_pop (priv->queue); + if (G_LIKELY (*memory)) { + while (!gst_poll_read_control (priv->poll)) { + if (errno == EWOULDBLOCK) { + /* We put the memory into the queue but did not finish writing control + * yet, let's wait a bit and retry */ + g_thread_yield (); + continue; + } else { + /* Critical error but GstPoll already complained */ + break; + } + } + result = GST_FLOW_OK; + GST_LOG_OBJECT (self, "acquired memory %p", *memory); + break; + } + + /* no memory, try to allocate some more */ + GST_LOG_OBJECT (self, "no memory, trying to allocate"); + result = gst_cuda_pool_allocator_alloc (self, memory); + if (G_LIKELY (result == GST_FLOW_OK)) + /* we have a memory, return it */ + break; + + if (G_UNLIKELY (result != GST_FLOW_EOS)) + /* something went wrong, return error */ + break; + + /* now we release the control socket, we wait for a memory release or + * flushing */ + if (!gst_poll_read_control (priv->poll)) { + if (errno == EWOULDBLOCK) { + /* This means that we have two threads trying to allocate memory + * already, and the other one already got the wait token. This + * means that we only have to wait for the poll now and not write the + * token afterwards: we will be woken up once the other thread is + * woken up and that one will write the wait token it removed */ + GST_LOG_OBJECT (self, "waiting for free memory or flushing"); + gst_poll_wait (priv->poll, GST_CLOCK_TIME_NONE); + } else { + /* This is a critical error, GstPoll already gave a warning */ + result = GST_FLOW_ERROR; + break; + } + } else { + /* We're the first thread waiting, we got the wait token and have to + * write it again later + * OR + * We're a second thread and just consumed the flush token and block all + * other threads, in which case we must not wait and give it back + * immediately */ + if (!GST_CUDA_POOL_ALLOCATOR_IS_FLUSHING (self)) { + GST_LOG_OBJECT (self, "waiting for free memory or flushing"); + gst_poll_wait (priv->poll, GST_CLOCK_TIME_NONE); + } + gst_poll_write_control (priv->poll); + } + } + + return result; + + /* ERRORS */ +flushing: + { + GST_DEBUG_OBJECT (self, "we are flushing"); + return GST_FLOW_FLUSHING; + } +} + +/** + * gst_cuda_pool_allocator_new: + * @context: a #GstCudaContext + * @stream: (allow-none): a #GstCudaStream + * @info: a #GstVideoInfo + * + * Creates a new #GstCudaPoolAllocator instance. + * + * Returns: (transfer full): a new #GstCudaPoolAllocator instance + * + * Since: 1.24 + */ +GstCudaPoolAllocator * +gst_cuda_pool_allocator_new (GstCudaContext * context, GstCudaStream * stream, + const GstVideoInfo * info) +{ + GstCudaPoolAllocator *self; + + g_return_val_if_fail (GST_IS_CUDA_CONTEXT (context), NULL); + g_return_val_if_fail (!stream || GST_IS_CUDA_STREAM (stream), NULL); + + self = g_object_new (GST_TYPE_CUDA_POOL_ALLOCATOR, NULL); + gst_object_ref_sink (self); + + self->context = gst_object_ref (context); + if (stream) + self->stream = gst_cuda_stream_ref (stream); + self->info = *info; + + return self; +} + +/** + * gst_cuda_pool_allocator_acquire_memory: + * @allocator: a #GstCudaPoolAllocator + * @memory: (out): a #GstMemory + * + * Acquires a #GstMemory from @allocator. @memory should point to a memory + * location that can hold a pointer to the new #GstMemory. + * + * Returns: a #GstFlowReturn such as %GST_FLOW_FLUSHING when the allocator is + * inactive. + * + * Since: 1.24 + */ +GstFlowReturn +gst_cuda_pool_allocator_acquire_memory (GstCudaPoolAllocator * allocator, + GstMemory ** memory) +{ + GstFlowReturn result; + GstCudaPoolAllocatorPrivate *priv; + + g_return_val_if_fail (GST_IS_CUDA_POOL_ALLOCATOR (allocator), GST_FLOW_ERROR); + g_return_val_if_fail (memory, GST_FLOW_ERROR); + + priv = allocator->priv; + + g_atomic_int_inc (&priv->outstanding); + result = gst_cuda_pool_allocator_acquire_memory_internal (allocator, memory); + + if (result == GST_FLOW_OK) { + GstMemory *mem = *memory; + /* Replace default allocator with ours */ + gst_object_unref (mem->allocator); + mem->allocator = gst_object_ref (allocator); + GST_MINI_OBJECT_CAST (mem)->dispose = gst_cuda_memory_release; + allocator->priv->outstanding++; + } else { + dec_outstanding (allocator); + } + + return result; +} diff --git a/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcudamemory.h b/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcudamemory.h index c4f9da6b76..94b3b4770f 100644 --- a/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcudamemory.h +++ b/subprojects/gst-plugins-bad/gst-libs/gst/cuda/gstcudamemory.h @@ -24,6 +24,7 @@ #include #include #include +#include G_BEGIN_DECLS @@ -33,6 +34,7 @@ G_BEGIN_DECLS #define GST_CUDA_ALLOCATOR_GET_CLASS(obj) (G_TYPE_INSTANCE_GET_CLASS((obj), GST_TYPE_CUDA_ALLOCATOR,GstCudaAllocatorClass)) #define GST_IS_CUDA_ALLOCATOR(obj) (G_TYPE_CHECK_INSTANCE_TYPE((obj),GST_TYPE_CUDA_ALLOCATOR)) #define GST_IS_CUDA_ALLOCATOR_CLASS(klass) (G_TYPE_CHECK_CLASS_TYPE((klass), GST_TYPE_CUDA_ALLOCATOR)) + /** * GST_CUDA_ALLOCATOR_CAST: * @@ -46,19 +48,24 @@ G_BEGIN_DECLS */ #define GST_CUDA_MEMORY_CAST(mem) ((GstCudaMemory *) (mem)) -/** - * GstCudaAllocator: - * - * A #GstAllocator subclass for cuda memory - * - * Since: 1.22 - */ -typedef struct _GstCudaAllocator GstCudaAllocator; -typedef struct _GstCudaAllocatorClass GstCudaAllocatorClass; +#define GST_TYPE_CUDA_POOL_ALLOCATOR (gst_cuda_pool_allocator_get_type()) +#define GST_CUDA_POOL_ALLOCATOR(obj) (G_TYPE_CHECK_INSTANCE_CAST((obj),GST_TYPE_CUDA_POOL_ALLOCATOR,GstCudaPoolAllocator)) +#define GST_CUDA_POOL_ALLOCATOR_CLASS(klass) (G_TYPE_CHECK_CLASS_CAST((klass), GST_TYPE_CUDA_POOL_ALLOCATOR,GstCudaPoolAllocatorClass)) +#define GST_CUDA_POOL_ALLOCATOR_GET_CLASS(obj) (G_TYPE_INSTANCE_GET_CLASS((obj), GST_TYPE_CUDA_POOL_ALLOCATOR,GstCudaPoolAllocatorClass)) +#define GST_IS_CUDA_POOL_ALLOCATOR(obj) (G_TYPE_CHECK_INSTANCE_TYPE((obj),GST_TYPE_CUDA_POOL_ALLOCATOR)) +#define GST_IS_CUDA_POOL_ALLOCATOR_CLASS(klass) (G_TYPE_CHECK_CLASS_TYPE((klass), GST_TYPE_CUDA_POOL_ALLOCATOR)) typedef struct _GstCudaMemory GstCudaMemory; typedef struct _GstCudaMemoryPrivate GstCudaMemoryPrivate; +typedef struct _GstCudaAllocator GstCudaAllocator; +typedef struct _GstCudaAllocatorClass GstCudaAllocatorClass; +typedef struct _GstCudaAllocatorPrivate GstCudaAllocatorPrivate; + +typedef struct _GstCudaPoolAllocator GstCudaPoolAllocator; +typedef struct _GstCudaPoolAllocatorClass GstCudaPoolAllocatorClass; +typedef struct _GstCudaPoolAllocatorPrivate GstCudaPoolAllocatorPrivate; + /** * GST_MAP_CUDA: * @@ -95,17 +102,37 @@ typedef struct _GstCudaMemoryPrivate GstCudaMemoryPrivate; /** * GstCudaMemoryTransfer: - * @GST_CUDA_MEMORY_TRANSFER_NEED_DOWNLOAD: the device memory needs downloading - * to the staging memory - * @GST_CUDA_MEMORY_TRANSFER_NEED_UPLOAD: the staging memory needs uploading - * to the device memory * - * Since: 1.22 + * CUDA memory transfer flags */ typedef enum { - GST_CUDA_MEMORY_TRANSFER_NEED_DOWNLOAD = (GST_MEMORY_FLAG_LAST << 0), - GST_CUDA_MEMORY_TRANSFER_NEED_UPLOAD = (GST_MEMORY_FLAG_LAST << 1) + /** + * GST_CUDA_MEMORY_TRANSFER_NEED_DOWNLOAD: + * + * the device memory needs downloading to the staging memory + * + * Since: 1.22 + */ + GST_CUDA_MEMORY_TRANSFER_NEED_DOWNLOAD = (GST_MEMORY_FLAG_LAST << 0), + + /** + * GST_CUDA_MEMORY_TRANSFER_NEED_UPLOAD: + * + * the staging memory needs uploading to the device memory + * + * Since: 1.22 + */ + GST_CUDA_MEMORY_TRANSFER_NEED_UPLOAD = (GST_MEMORY_FLAG_LAST << 1), + + /** + * GST_CUDA_MEMORY_TRANSFER_NEED_SYNC: + * + * the device memory needs synchronization + * + * Since: 1.24 + */ + GST_CUDA_MEMORY_TRANSFER_NEED_SYNC = (GST_MEMORY_FLAG_LAST << 2), } GstCudaMemoryTransfer; /** @@ -126,29 +153,105 @@ struct _GstCudaMemory gpointer _gst_reserved[GST_PADDING]; }; +GST_CUDA_API +void gst_cuda_memory_init_once (void); + +GST_CUDA_API +gboolean gst_is_cuda_memory (GstMemory * mem); + +GST_CUDA_API +GstCudaStream * gst_cuda_memory_get_stream (GstCudaMemory * mem); + +GST_CUDA_API +void gst_cuda_memory_sync (GstCudaMemory * mem); + +/** + * GstCudaAllocator: + * + * A #GstAllocator subclass for cuda memory + * + * Since: 1.22 + */ struct _GstCudaAllocator { GstAllocator parent; + + /*< private >*/ + GstCudaAllocatorPrivate *priv; + gpointer _gst_reserved[GST_PADDING]; }; struct _GstCudaAllocatorClass { GstAllocatorClass parent_class; + + /** + * GstCudaAllocatorClass::set_active: + * @allocator: a #GstCudaAllocator + * @active: the new active state + * + * Since: 1.24 + */ + gboolean (*set_active) (GstCudaAllocator * allocator, + gboolean active); + + /*< private >*/ + gpointer _gst_reserved[GST_PADDING_LARGE]; }; GST_CUDA_API -void gst_cuda_memory_init_once (void); +GType gst_cuda_allocator_get_type (void); GST_CUDA_API -gboolean gst_is_cuda_memory (GstMemory * mem); +GstMemory * gst_cuda_allocator_alloc (GstCudaAllocator * allocator, + GstCudaContext * context, + GstCudaStream * stream, + const GstVideoInfo * info); GST_CUDA_API -GType gst_cuda_allocator_get_type (void); +gboolean gst_cuda_allocator_set_active (GstCudaAllocator * allocator, + gboolean active); + +/** + * GstCudaPoolAllocator: + * + * A #GstCudaAllocator subclass for cuda memory pool + * + * Since: 1.24 + */ +struct _GstCudaPoolAllocator +{ + GstCudaAllocator parent; + + GstCudaContext *context; + GstCudaStream *stream; + + GstVideoInfo info; + + /*< private >*/ + GstCudaPoolAllocatorPrivate *priv; + gpointer _gst_reserved[GST_PADDING]; +}; + +struct _GstCudaPoolAllocatorClass +{ + GstCudaAllocatorClass parent_class; + + /*< private >*/ + gpointer _gst_reserved[GST_PADDING]; +}; GST_CUDA_API -GstMemory * gst_cuda_allocator_alloc (GstCudaAllocator * allocator, - GstCudaContext * context, - const GstVideoInfo * info); +GType gst_cuda_pool_allocator_get_type (void); + +GST_CUDA_API +GstCudaPoolAllocator * gst_cuda_pool_allocator_new (GstCudaContext * context, + GstCudaStream * stream, + const GstVideoInfo * info); + +GST_CUDA_API +GstFlowReturn gst_cuda_pool_allocator_acquire_memory (GstCudaPoolAllocator * allocator, + GstMemory ** memory); G_END_DECLS