From 111b2c3f5373e4142809bf97d36e90fdf9d587bb Mon Sep 17 00:00:00 2001 From: Seungha Yang Date: Thu, 3 Mar 2022 03:25:47 +0900 Subject: [PATCH] nvcodec: Refactor GstCudaMemory abstraction * Hide GstCudaMemory member variables * Make GstCudaAllocator object GstCudaContext independent * Set offset/stride of memory correctly via video meta * Drop GST_BUFFER_POOL_OPTION_VIDEO_ALIGNMENT support. This implementation actually does not support custom alignment because we allocate device memory via cuMemAllocPitch of which alignment is almost uncontrollable Part-of: --- .../sys/nvcodec/cuda-converter.c | 201 +++--- .../sys/nvcodec/cuda-converter.h | 16 +- .../sys/nvcodec/gstcudabasefilter.c | 48 +- .../sys/nvcodec/gstcudabufferpool.c | 208 ++----- .../sys/nvcodec/gstcudadownload.c | 37 +- .../sys/nvcodec/gstcudamemory.c | 587 +++++++++--------- .../sys/nvcodec/gstcudamemory.h | 75 +-- .../sys/nvcodec/gstcudaupload.c | 37 +- .../sys/nvcodec/gstnvbaseenc.c | 28 +- .../gst-plugins-bad/sys/nvcodec/gstnvdec.c | 69 +- .../sys/nvcodec/gstnvdecoder.c | 31 +- .../gst-plugins-bad/sys/nvcodec/plugin.c | 2 + 12 files changed, 580 insertions(+), 759 deletions(-) diff --git a/subprojects/gst-plugins-bad/sys/nvcodec/cuda-converter.c b/subprojects/gst-plugins-bad/sys/nvcodec/cuda-converter.c index 614ca0cbd6..57523431e5 100644 --- a/subprojects/gst-plugins-bad/sys/nvcodec/cuda-converter.c +++ b/subprojects/gst-plugins-bad/sys/nvcodec/cuda-converter.c @@ -229,7 +229,7 @@ static const gchar templ_YUV_TO_YUV[] = GST_CUDA_KERNEL_FUNC "(cudaTextureObject_t tex0, cudaTextureObject_t tex1, cudaTextureObject_t tex2,\n" " unsigned char *dst0, unsigned char *dst1, unsigned char *dst2,\n" -" int stride)\n" +" int stride, int uv_stride)\n" "{\n" " int x_pos = blockIdx.x * blockDim.x + threadIdx.x;\n" " int y_pos = blockIdx.y * blockDim.y + threadIdx.y;\n" @@ -265,7 +265,7 @@ GST_CUDA_KERNEL_FUNC " v = tmp;\n" " }\n" " write_chroma (dst1,\n" -" dst2, u, v, x_pos, y_pos, CHROMA_PSTRIDE, stride, MASK);\n" +" dst2, u, v, x_pos, y_pos, CHROMA_PSTRIDE, uv_stride, MASK);\n" " }\n" "}\n" "\n" @@ -589,7 +589,7 @@ GST_CUDA_KERNEL_FUNC_TO_Y444 GST_CUDA_KERNEL_FUNC_Y444_TO_YUV "(cudaTextureObject_t tex0, cudaTextureObject_t tex1, cudaTextureObject_t tex2,\n" " unsigned char *dst0, unsigned char *dst1, unsigned char *dst2,\n" -" int stride)\n" +" int stride, int uv_stride)\n" "{\n" " int x_pos = blockIdx.x * blockDim.x + threadIdx.x;\n" " int y_pos = blockIdx.y * blockDim.y + threadIdx.y;\n" @@ -626,7 +626,7 @@ GST_CUDA_KERNEL_FUNC_Y444_TO_YUV " v = tmp;\n" " }\n" " write_chroma (dst1,\n" -" dst2, u, v, x_pos, y_pos, CHROMA_PSTRIDE, stride, MASK);\n" +" dst2, u, v, x_pos, y_pos, CHROMA_PSTRIDE, uv_stride, MASK);\n" " }\n" "}\n" "\n" @@ -745,9 +745,10 @@ struct _GstCudaConverter gchar *ptx; GstCudaStageBuffer fallback_buffer[GST_VIDEO_MAX_PLANES]; - gboolean (*convert) (GstCudaConverter * convert, const GstCudaMemory * src, - GstVideoInfo * in_info, GstCudaMemory * dst, GstVideoInfo * out_info, - CUstream cuda_stream); + /* *INDENT-OFF* */ + gboolean (*convert) (GstCudaConverter * convert, GstVideoFrame * src_frame, + GstVideoFrame * dst_frame, CUstream cuda_stream); + /* *INDENT-ON* */ const CUdeviceptr src; GstVideoInfo *cur_in_info; @@ -893,67 +894,25 @@ gst_cuda_converter_free (GstCudaConverter * convert) g_free (convert); } -/** - * gst_cuda_converter_frame: - * @convert: a #GstCudaConverter - * @src: a #GstCudaMemory - * @in_info: a #GstVideoInfo representing @src - * @dst: a #GstCudaMemory - * @out_info: a #GstVideoInfo representing @dst - * @cuda_stream: a #CUstream - * - * Convert the pixels of @src into @dest using @convert. - * Called without gst_cuda_context_push() and gst_cuda_context_pop() by caller - */ gboolean -gst_cuda_converter_frame (GstCudaConverter * convert, const GstCudaMemory * src, - GstVideoInfo * in_info, GstCudaMemory * dst, GstVideoInfo * out_info, - CUstream cuda_stream) +gst_cuda_converter_convert_frame (GstCudaConverter * convert, + GstVideoFrame * src_frame, GstVideoFrame * dst_frame, CUstream cuda_stream) { gboolean ret; g_return_val_if_fail (convert, FALSE); - g_return_val_if_fail (src, FALSE); - g_return_val_if_fail (in_info, FALSE); - g_return_val_if_fail (dst, FALSE); - g_return_val_if_fail (out_info, FALSE); + g_return_val_if_fail (src_frame, FALSE); + g_return_val_if_fail (dst_frame, FALSE); gst_cuda_context_push (convert->cuda_ctx); - ret = gst_cuda_converter_frame_unlocked (convert, - src, in_info, dst, out_info, cuda_stream); + ret = convert->convert (convert, src_frame, dst_frame, cuda_stream); gst_cuda_context_pop (NULL); return ret; } -/** - * gst_cuda_converter_frame_unlocked: - * @convert: a #GstCudaConverter - * @src: a #GstCudaMemory - * @in_info: a #GstVideoInfo representing @src - * @dst: a #GstCudaMemory - * @out_info: a #GstVideoInfo representing @dest - * @cuda_stream: a #CUstream - * - * Convert the pixels of @src into @dest using @convert. - * Caller should call this method after gst_cuda_context_push() - */ -gboolean -gst_cuda_converter_frame_unlocked (GstCudaConverter * convert, - const GstCudaMemory * src, GstVideoInfo * in_info, GstCudaMemory * dst, - GstVideoInfo * out_info, CUstream cuda_stream) -{ - g_return_val_if_fail (convert, FALSE); - g_return_val_if_fail (src, FALSE); - g_return_val_if_fail (in_info, FALSE); - g_return_val_if_fail (dst, FALSE); - g_return_val_if_fail (out_info, FALSE); - - return convert->convert (convert, src, in_info, dst, out_info, cuda_stream); -} - /* allocate fallback memory for texture alignment requirement */ static gboolean convert_ensure_fallback_memory (GstCudaConverter * convert, @@ -1020,8 +979,8 @@ convert_create_texture_unchecked (const CUdeviceptr src, gint width, } static CUtexObject -convert_create_texture (GstCudaConverter * convert, const GstCudaMemory * src, - GstVideoInfo * info, guint plane, CUstream cuda_stream) +convert_create_texture (GstCudaConverter * convert, GstVideoFrame * src_frame, + guint plane, CUstream cuda_stream) { CUarray_format format = CU_AD_FORMAT_UNSIGNED_INT8; guint channels = 1; @@ -1030,22 +989,23 @@ convert_create_texture (GstCudaConverter * convert, const GstCudaMemory * src, CUresult cuda_ret; CUfilter_mode mode; - if (GST_VIDEO_INFO_COMP_DEPTH (info, plane) > 8) + if (GST_VIDEO_FRAME_COMP_DEPTH (src_frame, plane) > 8) format = CU_AD_FORMAT_UNSIGNED_INT16; /* FIXME: more graceful method ? */ if (plane != 0 && - GST_VIDEO_INFO_N_PLANES (info) != GST_VIDEO_INFO_N_COMPONENTS (info)) { + GST_VIDEO_FRAME_N_PLANES (src_frame) != + GST_VIDEO_FRAME_N_COMPONENTS (src_frame)) { channels = 2; } - src_ptr = src->data + src->offset[plane]; - stride = src->stride; + src_ptr = (CUdeviceptr) GST_VIDEO_FRAME_PLANE_DATA (src_frame, plane); + stride = GST_VIDEO_FRAME_PLANE_STRIDE (src_frame, plane); if (convert->texture_alignment && (src_ptr % convert->texture_alignment)) { CUDA_MEMCPY2D copy_params = { 0, }; - if (!convert_ensure_fallback_memory (convert, info, plane)) + if (!convert_ensure_fallback_memory (convert, &src_frame->info, plane)) return 0; GST_LOG ("device memory was not aligned, copy to fallback memory"); @@ -1057,9 +1017,9 @@ convert_create_texture (GstCudaConverter * convert, const GstCudaMemory * src, copy_params.dstMemoryType = CU_MEMORYTYPE_DEVICE; copy_params.dstPitch = convert->fallback_buffer[plane].cuda_stride; copy_params.dstDevice = convert->fallback_buffer[plane].device_ptr; - copy_params.WidthInBytes = GST_VIDEO_INFO_COMP_WIDTH (info, plane) - * GST_VIDEO_INFO_COMP_PSTRIDE (info, plane); - copy_params.Height = GST_VIDEO_INFO_COMP_HEIGHT (info, plane); + copy_params.WidthInBytes = GST_VIDEO_FRAME_COMP_WIDTH (src_frame, plane) + * GST_VIDEO_FRAME_COMP_PSTRIDE (src_frame, plane); + copy_params.Height = GST_VIDEO_FRAME_COMP_HEIGHT (src_frame, plane); cuda_ret = CuMemcpy2DAsync (©_params, cuda_stream); if (!gst_cuda_result (cuda_ret)) { @@ -1079,27 +1039,26 @@ convert_create_texture (GstCudaConverter * convert, const GstCudaMemory * src, mode = CU_TR_FILTER_MODE_LINEAR; return convert_create_texture_unchecked (src_ptr, - GST_VIDEO_INFO_COMP_WIDTH (info, plane), - GST_VIDEO_INFO_COMP_HEIGHT (info, plane), channels, stride, format, mode, - cuda_stream); + GST_VIDEO_FRAME_COMP_WIDTH (src_frame, plane), + GST_VIDEO_FRAME_COMP_HEIGHT (src_frame, plane), channels, stride, format, + mode, cuda_stream); } /* main conversion function for YUV to YUV conversion */ static gboolean -convert_YUV_TO_YUV (GstCudaConverter * convert, - const GstCudaMemory * src, GstVideoInfo * in_info, GstCudaMemory * dst, - GstVideoInfo * out_info, CUstream cuda_stream) +convert_YUV_TO_YUV (GstCudaConverter * convert, GstVideoFrame * src_frame, + GstVideoFrame * dst_frame, CUstream cuda_stream) { CUtexObject texture[GST_VIDEO_MAX_PLANES] = { 0, }; CUresult cuda_ret; gboolean ret = FALSE; CUdeviceptr dst_ptr[GST_VIDEO_MAX_PLANES] = { 0, }; - gint dst_stride; + gint dst_stride, dst_uv_stride; gint width, height; gint i; gpointer kernel_args[] = { &texture[0], &texture[1], &texture[2], - &dst_ptr[0], &dst_ptr[1], &dst_ptr[2], &dst_stride + &dst_ptr[0], &dst_ptr[1], &dst_ptr[2], &dst_stride, &dst_uv_stride }; /* conversion step @@ -1110,21 +1069,23 @@ convert_YUV_TO_YUV (GstCudaConverter * convert, */ /* map CUDA device memory to CUDA texture object */ - for (i = 0; i < GST_VIDEO_INFO_N_PLANES (in_info); i++) { - texture[i] = convert_create_texture (convert, src, in_info, i, cuda_stream); + for (i = 0; i < GST_VIDEO_FRAME_N_PLANES (src_frame); i++) { + texture[i] = convert_create_texture (convert, src_frame, i, cuda_stream); if (!texture[i]) { GST_ERROR ("couldn't create texture for %d th plane", i); goto done; } } - for (i = 0; i < GST_VIDEO_INFO_N_PLANES (out_info); i++) - dst_ptr[i] = dst->data + dst->offset[i]; + for (i = 0; i < GST_VIDEO_FRAME_N_PLANES (dst_frame); i++) { + dst_ptr[i] = (CUdeviceptr) GST_VIDEO_FRAME_PLANE_DATA (dst_frame, i); + } - dst_stride = dst->stride; + dst_stride = GST_VIDEO_FRAME_PLANE_STRIDE (dst_frame, 0); + dst_uv_stride = GST_VIDEO_FRAME_PLANE_STRIDE (dst_frame, 1); - width = GST_VIDEO_INFO_WIDTH (out_info); - height = GST_VIDEO_INFO_HEIGHT (out_info); + width = GST_VIDEO_FRAME_WIDTH (dst_frame); + height = GST_VIDEO_FRAME_HEIGHT (dst_frame); cuda_ret = CuLaunchKernel (convert->kernel_func[0], DIV_UP (width, CUDA_BLOCK_X), @@ -1140,7 +1101,7 @@ convert_YUV_TO_YUV (GstCudaConverter * convert, gst_cuda_result (CuStreamSynchronize (cuda_stream)); done: - for (i = 0; i < GST_VIDEO_INFO_N_PLANES (in_info); i++) { + for (i = 0; i < GST_VIDEO_FRAME_N_PLANES (src_frame); i++) { if (texture[i]) gst_cuda_result (CuTexObjectDestroy (texture[i])); } @@ -1150,9 +1111,8 @@ done: /* main conversion function for YUV to RGB conversion */ static gboolean -convert_YUV_TO_RGB (GstCudaConverter * convert, - const GstCudaMemory * src, GstVideoInfo * in_info, GstCudaMemory * dst, - GstVideoInfo * out_info, CUstream cuda_stream) +convert_YUV_TO_RGB (GstCudaConverter * convert, GstVideoFrame * src_frame, + GstVideoFrame * dst_frame, CUstream cuda_stream) { CUtexObject texture[GST_VIDEO_MAX_PLANES] = { 0, }; CUresult cuda_ret; @@ -1174,19 +1134,19 @@ convert_YUV_TO_RGB (GstCudaConverter * convert, */ /* map CUDA device memory to CUDA texture object */ - for (i = 0; i < GST_VIDEO_INFO_N_PLANES (in_info); i++) { - texture[i] = convert_create_texture (convert, src, in_info, i, cuda_stream); + for (i = 0; i < GST_VIDEO_FRAME_N_PLANES (src_frame); i++) { + texture[i] = convert_create_texture (convert, src_frame, i, cuda_stream); if (!texture[i]) { GST_ERROR ("couldn't create texture for %d th plane", i); goto done; } } - dstRGB = dst->data; - dst_stride = dst->stride; + dstRGB = (CUdeviceptr) GST_VIDEO_FRAME_PLANE_DATA (dst_frame, 0); + dst_stride = GST_VIDEO_FRAME_PLANE_STRIDE (dst_frame, 0); - width = GST_VIDEO_INFO_WIDTH (out_info); - height = GST_VIDEO_INFO_HEIGHT (out_info); + width = GST_VIDEO_FRAME_WIDTH (dst_frame); + height = GST_VIDEO_FRAME_HEIGHT (dst_frame); cuda_ret = CuLaunchKernel (convert->kernel_func[0], DIV_UP (width, CUDA_BLOCK_X), @@ -1202,7 +1162,7 @@ convert_YUV_TO_RGB (GstCudaConverter * convert, gst_cuda_result (CuStreamSynchronize (cuda_stream)); done: - for (i = 0; i < GST_VIDEO_INFO_N_PLANES (in_info); i++) { + for (i = 0; i < GST_VIDEO_FRAME_N_PLANES (src_frame); i++) { if (texture[i]) gst_cuda_result (CuTexObjectDestroy (texture[i])); } @@ -1212,7 +1172,7 @@ done: static gboolean convert_UNPACK_RGB (GstCudaConverter * convert, CUfunction kernel_func, - CUstream cuda_stream, const GstCudaMemory * src, GstVideoInfo * in_info, + CUstream cuda_stream, GstVideoFrame * src_frame, CUdeviceptr dst, gint dst_stride, GstCudaRGBOrder * rgb_order) { CUdeviceptr srcRGB = 0; @@ -1227,12 +1187,12 @@ convert_UNPACK_RGB (GstCudaConverter * convert, CUfunction kernel_func, &convert->in_rgb_order.B, &convert->in_rgb_order.A, }; - srcRGB = src->data; - src_stride = src->stride; + srcRGB = (CUdeviceptr) GST_VIDEO_FRAME_PLANE_DATA (src_frame, 0); + src_stride = GST_VIDEO_FRAME_PLANE_STRIDE (src_frame, 0); - width = GST_VIDEO_INFO_WIDTH (in_info); - height = GST_VIDEO_INFO_HEIGHT (in_info); - src_pstride = GST_VIDEO_INFO_COMP_PSTRIDE (in_info, 0); + width = GST_VIDEO_FRAME_WIDTH (src_frame); + height = GST_VIDEO_FRAME_HEIGHT (src_frame); + src_pstride = GST_VIDEO_FRAME_COMP_PSTRIDE (src_frame, 0); cuda_ret = CuLaunchKernel (kernel_func, DIV_UP (width, CUDA_BLOCK_X), @@ -1274,9 +1234,8 @@ convert_TO_Y444 (GstCudaConverter * convert, CUfunction kernel_func, /* main conversion function for RGB to YUV conversion */ static gboolean -convert_RGB_TO_YUV (GstCudaConverter * convert, - const GstCudaMemory * src, GstVideoInfo * in_info, GstCudaMemory * dst, - GstVideoInfo * out_info, CUstream cuda_stream) +convert_RGB_TO_YUV (GstCudaConverter * convert, GstVideoFrame * src_frame, + GstVideoFrame * dst_frame, CUstream cuda_stream) { CUtexObject texture = 0; CUtexObject yuv_texture[3] = { 0, }; @@ -1285,7 +1244,7 @@ convert_RGB_TO_YUV (GstCudaConverter * convert, gboolean ret = FALSE; gint in_width, in_height; gint out_width, out_height; - gint dst_stride; + gint dst_stride, dst_uv_stride; CUarray_format format = CU_AD_FORMAT_UNSIGNED_INT8; CUfilter_mode mode = CU_TR_FILTER_MODE_POINT; gint pstride = 1; @@ -1293,7 +1252,7 @@ convert_RGB_TO_YUV (GstCudaConverter * convert, gint i; gpointer kernel_args[] = { &yuv_texture[0], &yuv_texture[1], &yuv_texture[2], - &dst_ptr[0], &dst_ptr[1], &dst_ptr[2], &dst_stride + &dst_ptr[0], &dst_ptr[1], &dst_ptr[2], &dst_stride, &dst_uv_stride }; /* conversion step @@ -1304,21 +1263,22 @@ convert_RGB_TO_YUV (GstCudaConverter * convert, * the CUDA kernel function */ if (!convert_UNPACK_RGB (convert, convert->kernel_func[0], cuda_stream, - src, in_info, convert->unpack_surface.device_ptr, + src_frame, convert->unpack_surface.device_ptr, convert->unpack_surface.cuda_stride, &convert->in_rgb_order)) { GST_ERROR ("could not unpack input rgb"); goto done; } - in_width = GST_VIDEO_INFO_WIDTH (in_info); - in_height = GST_VIDEO_INFO_HEIGHT (in_info); + in_width = GST_VIDEO_FRAME_WIDTH (src_frame); + in_height = GST_VIDEO_FRAME_HEIGHT (src_frame); - out_width = GST_VIDEO_INFO_WIDTH (out_info); - out_height = GST_VIDEO_INFO_HEIGHT (out_info); - dst_stride = dst->stride; + out_width = GST_VIDEO_FRAME_WIDTH (dst_frame); + out_height = GST_VIDEO_FRAME_HEIGHT (dst_frame); + dst_stride = GST_VIDEO_FRAME_PLANE_STRIDE (dst_frame, 0); + dst_uv_stride = GST_VIDEO_FRAME_PLANE_STRIDE (dst_frame, 1); - if (GST_VIDEO_INFO_COMP_DEPTH (in_info, 0) > 8) { + if (GST_VIDEO_FRAME_COMP_DEPTH (src_frame, 0) > 8) { pstride = 2; bitdepth = 16; format = CU_AD_FORMAT_UNSIGNED_INT16; @@ -1365,8 +1325,8 @@ convert_RGB_TO_YUV (GstCudaConverter * convert, } } - for (i = 0; i < GST_VIDEO_INFO_N_PLANES (out_info); i++) - dst_ptr[i] = dst->data + dst->offset[i]; + for (i = 0; i < GST_VIDEO_FRAME_N_PLANES (dst_frame); i++) + dst_ptr[i] = (CUdeviceptr) GST_VIDEO_FRAME_PLANE_DATA (dst_frame, i); cuda_ret = CuLaunchKernel (convert->kernel_func[2], DIV_UP (out_width, CUDA_BLOCK_X), @@ -1394,9 +1354,8 @@ done: /* main conversion function for RGB to RGB conversion */ static gboolean -convert_RGB_TO_RGB (GstCudaConverter * convert, - const GstCudaMemory * src, GstVideoInfo * in_info, GstCudaMemory * dst, - GstVideoInfo * out_info, CUstream cuda_stream) +convert_RGB_TO_RGB (GstCudaConverter * convert, GstVideoFrame * src_frame, + GstVideoFrame * dst_frame, CUstream cuda_stream) { CUtexObject texture = 0; CUresult cuda_ret; @@ -1418,23 +1377,23 @@ convert_RGB_TO_RGB (GstCudaConverter * convert, */ if (!convert_UNPACK_RGB (convert, convert->kernel_func[0], cuda_stream, - src, in_info, convert->unpack_surface.device_ptr, + src_frame, convert->unpack_surface.device_ptr, convert->unpack_surface.cuda_stride, &convert->in_rgb_order)) { GST_ERROR ("could not unpack input rgb"); goto done; } - in_width = GST_VIDEO_INFO_WIDTH (in_info); - in_height = GST_VIDEO_INFO_HEIGHT (in_info); + in_width = GST_VIDEO_FRAME_WIDTH (src_frame); + in_height = GST_VIDEO_FRAME_HEIGHT (src_frame); - out_width = GST_VIDEO_INFO_WIDTH (out_info); - out_height = GST_VIDEO_INFO_HEIGHT (out_info); + out_width = GST_VIDEO_FRAME_WIDTH (dst_frame); + out_height = GST_VIDEO_FRAME_HEIGHT (dst_frame); - dstRGB = dst->data; - dst_stride = dst->stride; + dstRGB = (CUdeviceptr) GST_VIDEO_FRAME_PLANE_DATA (dst_frame, 0); + dst_stride = GST_VIDEO_FRAME_PLANE_STRIDE (dst_frame, 0); - if (GST_VIDEO_INFO_COMP_DEPTH (in_info, 0) > 8) + if (GST_VIDEO_FRAME_COMP_DEPTH (src_frame, 0) > 8) format = CU_AD_FORMAT_UNSIGNED_INT16; /* Use h/w linear interpolation only when resize is required. diff --git a/subprojects/gst-plugins-bad/sys/nvcodec/cuda-converter.h b/subprojects/gst-plugins-bad/sys/nvcodec/cuda-converter.h index d4bae22ddf..5149ab0cd2 100644 --- a/subprojects/gst-plugins-bad/sys/nvcodec/cuda-converter.h +++ b/subprojects/gst-plugins-bad/sys/nvcodec/cuda-converter.h @@ -34,21 +34,11 @@ GstCudaConverter * gst_cuda_converter_new (GstVideoInfo * in_info, void gst_cuda_converter_free (GstCudaConverter * convert); -gboolean gst_cuda_converter_frame (GstCudaConverter * convert, - const GstCudaMemory * src, - GstVideoInfo * in_info, - GstCudaMemory * dst, - GstVideoInfo * out_info, +gboolean gst_cuda_converter_convert_frame (GstCudaConverter * convert, + GstVideoFrame * src_frame, + GstVideoFrame * dst_frame, CUstream cuda_stream); -gboolean gst_cuda_converter_frame_unlocked (GstCudaConverter * convert, - const GstCudaMemory * src, - GstVideoInfo * in_info, - GstCudaMemory * dst, - GstVideoInfo * out_info, - CUstream cuda_stream); - - G_END_DECLS #endif /* __GST_CUDA_CONVERTER_H__ */ diff --git a/subprojects/gst-plugins-bad/sys/nvcodec/gstcudabasefilter.c b/subprojects/gst-plugins-bad/sys/nvcodec/gstcudabasefilter.c index 21d459533c..911552e484 100644 --- a/subprojects/gst-plugins-bad/sys/nvcodec/gstcudabasefilter.c +++ b/subprojects/gst-plugins-bad/sys/nvcodec/gstcudabasefilter.c @@ -168,47 +168,30 @@ gst_cuda_base_filter_propose_allocation (GstBaseTransform * trans, if (gst_query_get_n_allocation_pools (query) == 0) { GstStructure *config; - GstVideoAlignment align; - GstAllocationParams params = { 0, 31, 0, 0, }; - GstAllocator *allocator = NULL; - gint i; pool = gst_cuda_buffer_pool_new (ctrans->context); config = gst_buffer_pool_get_config (pool); - gst_video_alignment_reset (&align); - for (i = 0; i < GST_VIDEO_INFO_N_PLANES (&info); i++) { - align.stride_align[i] = 31; - } - gst_video_info_align (&info, &align); - gst_buffer_pool_config_add_option (config, GST_BUFFER_POOL_OPTION_VIDEO_META); - gst_buffer_pool_config_add_option (config, - GST_BUFFER_POOL_OPTION_VIDEO_ALIGNMENT); - gst_buffer_pool_config_set_video_alignment (config, &align); size = GST_VIDEO_INFO_SIZE (&info); gst_buffer_pool_config_set_params (config, caps, size, 0, 0); - gst_query_add_allocation_meta (query, GST_VIDEO_META_API_TYPE, NULL); - gst_query_add_allocation_pool (query, pool, size, 0, 0); - - if (gst_buffer_pool_config_get_allocator (config, &allocator, ¶ms)) { - if (params.align < 31) - params.align = 31; - - gst_query_add_allocation_param (query, allocator, ¶ms); - gst_buffer_pool_config_set_allocator (config, allocator, ¶ms); - } - if (!gst_buffer_pool_set_config (pool, config)) { GST_ERROR_OBJECT (ctrans, "failed to set config"); gst_object_unref (pool); return FALSE; } + /* Get updated size by cuda buffer pool */ + config = gst_buffer_pool_get_config (pool); + gst_buffer_pool_config_get_params (config, NULL, &size, NULL, NULL); + gst_structure_free (config); + + gst_query_add_allocation_pool (query, pool, size, 0, 0); + gst_object_unref (pool); } @@ -265,6 +248,12 @@ gst_cuda_base_filter_decide_allocation (GstBaseTransform * trans, gst_buffer_pool_config_add_option (config, GST_BUFFER_POOL_OPTION_VIDEO_META); gst_buffer_pool_config_set_params (config, outcaps, size, min, max); gst_buffer_pool_set_config (pool, config); + + /* Get updated size by cuda buffer pool */ + config = gst_buffer_pool_get_config (pool); + gst_buffer_pool_config_get_params (config, NULL, &size, NULL, NULL); + gst_structure_free (config); + if (update_pool) gst_query_set_nth_allocation_pool (query, 0, pool, size, min, max); else @@ -285,8 +274,6 @@ gst_cuda_base_filter_transform (GstBaseTransform * trans, GstVideoFrame in_frame, out_frame; GstFlowReturn ret = GST_FLOW_OK; GstMemory *mem; - GstCudaMemory *in_cuda_mem = NULL; - GstCudaMemory *out_cuda_mem = NULL; if (gst_buffer_n_memory (inbuf) != 1) { GST_ERROR_OBJECT (self, "Invalid input buffer"); @@ -299,8 +286,6 @@ gst_cuda_base_filter_transform (GstBaseTransform * trans, return GST_FLOW_ERROR; } - in_cuda_mem = GST_CUDA_MEMORY_CAST (mem); - if (gst_buffer_n_memory (outbuf) != 1) { GST_ERROR_OBJECT (self, "Invalid output buffer"); return GST_FLOW_ERROR; @@ -312,8 +297,6 @@ gst_cuda_base_filter_transform (GstBaseTransform * trans, return GST_FLOW_ERROR; } - out_cuda_mem = GST_CUDA_MEMORY_CAST (mem); - if (!gst_video_frame_map (&in_frame, &ctrans->in_info, inbuf, GST_MAP_READ | GST_MAP_CUDA)) { GST_ERROR_OBJECT (self, "Failed to map input buffer"); @@ -327,9 +310,8 @@ gst_cuda_base_filter_transform (GstBaseTransform * trans, return GST_FLOW_ERROR; } - if (!gst_cuda_converter_frame (self->converter, - in_cuda_mem, &ctrans->in_info, - out_cuda_mem, &ctrans->out_info, ctrans->cuda_stream)) { + if (!gst_cuda_converter_convert_frame (self->converter, &in_frame, &out_frame, + ctrans->cuda_stream)) { GST_ERROR_OBJECT (self, "Failed to convert frame"); ret = GST_FLOW_ERROR; } diff --git a/subprojects/gst-plugins-bad/sys/nvcodec/gstcudabufferpool.c b/subprojects/gst-plugins-bad/sys/nvcodec/gstcudabufferpool.c index 4e57742639..bd2a248551 100644 --- a/subprojects/gst-plugins-bad/sys/nvcodec/gstcudabufferpool.c +++ b/subprojects/gst-plugins-bad/sys/nvcodec/gstcudabufferpool.c @@ -30,11 +30,8 @@ GST_DEBUG_CATEGORY_STATIC (gst_cuda_buffer_pool_debug); struct _GstCudaBufferPoolPrivate { - GstAllocator *allocator; + GstCudaAllocator *allocator; GstVideoInfo info; - gboolean add_videometa; - gboolean need_alignment; - GstCudaAllocationParams params; }; #define gst_cuda_buffer_pool_parent_class parent_class @@ -44,8 +41,7 @@ G_DEFINE_TYPE_WITH_PRIVATE (GstCudaBufferPool, gst_cuda_buffer_pool, static const gchar ** gst_cuda_buffer_pool_get_options (GstBufferPool * pool) { - static const gchar *options[] = { GST_BUFFER_POOL_OPTION_VIDEO_META, - GST_BUFFER_POOL_OPTION_VIDEO_ALIGNMENT, NULL + static const gchar *options[] = { GST_BUFFER_POOL_OPTION_VIDEO_META, NULL }; return options; @@ -54,153 +50,86 @@ gst_cuda_buffer_pool_get_options (GstBufferPool * pool) static gboolean gst_cuda_buffer_pool_set_config (GstBufferPool * pool, GstStructure * config) { - GstCudaBufferPool *cuda_pool = GST_CUDA_BUFFER_POOL_CAST (pool); - GstCudaBufferPoolPrivate *priv = cuda_pool->priv; + GstCudaBufferPool *self = GST_CUDA_BUFFER_POOL (pool); + GstCudaBufferPoolPrivate *priv = self->priv; GstCaps *caps = NULL; guint size, min_buffers, max_buffers; - guint max_align, n; - GstAllocator *allocator = NULL; - GstAllocationParams *params = (GstAllocationParams *) & priv->params; - GstVideoInfo *info = &priv->params.info; + GstVideoInfo info; + GstMemory *mem; + GstCudaMemory *cmem; if (!gst_buffer_pool_config_get_params (config, &caps, &size, &min_buffers, - &max_buffers)) - goto wrong_config; - - if (caps == NULL) - goto no_caps; - - if (!gst_buffer_pool_config_get_allocator (config, &allocator, params)) - goto wrong_config; - - /* now parse the caps from the config */ - if (!gst_video_info_from_caps (info, caps)) - goto wrong_caps; - - GST_LOG_OBJECT (pool, "%dx%d, caps %" GST_PTR_FORMAT, - GST_VIDEO_INFO_WIDTH (info), GST_VIDEO_INFO_HEIGHT (info), caps); - - gst_clear_object (&priv->allocator); - - if (allocator) { - if (!GST_IS_CUDA_ALLOCATOR (allocator)) { - goto wrong_allocator; - } else { - priv->allocator = gst_object_ref (allocator); - } - } else { - allocator = priv->allocator = gst_cuda_allocator_new (cuda_pool->context); - if (G_UNLIKELY (priv->allocator == NULL)) - goto no_allocator; - } - - priv->add_videometa = gst_buffer_pool_config_has_option (config, - GST_BUFFER_POOL_OPTION_VIDEO_META); - - priv->need_alignment = gst_buffer_pool_config_has_option (config, - GST_BUFFER_POOL_OPTION_VIDEO_ALIGNMENT); - - max_align = params->align; - - /* do memory align */ - if (priv->need_alignment && priv->add_videometa) { - GstVideoAlignment valign; - - gst_buffer_pool_config_get_video_alignment (config, &valign); - - for (n = 0; n < GST_VIDEO_MAX_PLANES; ++n) - max_align |= valign.stride_align[n]; - - for (n = 0; n < GST_VIDEO_MAX_PLANES; ++n) - valign.stride_align[n] = max_align; - - if (!gst_video_info_align (info, &valign)) - goto failed_to_align; - - gst_buffer_pool_config_set_video_alignment (config, &valign); - } - - if (params->align < max_align) { - GST_WARNING_OBJECT (pool, "allocation params alignment %u is smaller " - "than the max specified video stride alignment %u, fixing", - (guint) params->align, max_align); - - params->align = max_align; - gst_buffer_pool_config_set_allocator (config, allocator, params); - } - - gst_buffer_pool_config_set_params (config, caps, GST_VIDEO_INFO_SIZE (info), - min_buffers, max_buffers); - - return GST_BUFFER_POOL_CLASS (parent_class)->set_config (pool, config); - - /* ERRORS */ -wrong_config: - { - GST_WARNING_OBJECT (pool, "invalid config"); + &max_buffers)) { + GST_WARNING_OBJECT (self, "invalid config"); return FALSE; } -no_caps: - { + + if (!caps) { GST_WARNING_OBJECT (pool, "no caps in config"); return FALSE; } -wrong_caps: - { - GST_WARNING_OBJECT (pool, - "failed getting geometry from caps %" GST_PTR_FORMAT, caps); + + if (!gst_video_info_from_caps (&info, caps)) { + GST_WARNING_OBJECT (self, "Failed to convert caps to video-info"); return FALSE; } -no_allocator: - { - GST_WARNING_OBJECT (pool, "Could not create new CUDA allocator"); + + gst_clear_object (&priv->allocator); + priv->allocator = (GstCudaAllocator *) + gst_allocator_find (GST_CUDA_MEMORY_TYPE_NAME); + if (!priv->allocator) { + GST_WARNING_OBJECT (self, "CudaAllocator is unavailable"); return FALSE; } -wrong_allocator: - { - GST_WARNING_OBJECT (pool, "Incorrect allocator type for this pool"); - return FALSE; - } -failed_to_align: - { - GST_WARNING_OBJECT (pool, "Failed to align"); + + mem = gst_cuda_allocator_alloc (priv->allocator, self->context, &info); + if (!mem) { + GST_WARNING_OBJECT (self, "Failed to allocate memory"); return FALSE; } + + cmem = GST_CUDA_MEMORY_CAST (mem); + + gst_buffer_pool_config_set_params (config, caps, + GST_VIDEO_INFO_SIZE (&cmem->info), min_buffers, max_buffers); + + priv->info = info; + + gst_memory_unref (mem); + + return GST_BUFFER_POOL_CLASS (parent_class)->set_config (pool, config); } static GstFlowReturn gst_cuda_buffer_pool_alloc (GstBufferPool * pool, GstBuffer ** buffer, GstBufferPoolAcquireParams * params) { - GstCudaBufferPool *cuda_pool = GST_CUDA_BUFFER_POOL_CAST (pool); - GstCudaBufferPoolPrivate *priv = cuda_pool->priv; - GstVideoInfo *info; - GstBuffer *cuda; + GstCudaBufferPool *self = GST_CUDA_BUFFER_POOL_CAST (pool); + GstCudaBufferPoolPrivate *priv = self->priv; + GstVideoInfo *info = &priv->info; + GstBuffer *buf; GstMemory *mem; + GstCudaMemory *cmem; - info = &priv->params.info; - - cuda = gst_buffer_new (); - - mem = gst_cuda_allocator_alloc (GST_ALLOCATOR_CAST (priv->allocator), - GST_VIDEO_INFO_SIZE (info), &priv->params); - - if (mem == NULL) { - gst_buffer_unref (cuda); + mem = gst_cuda_allocator_alloc (priv->allocator, self->context, &priv->info); + if (!mem) { GST_WARNING_OBJECT (pool, "Cannot create CUDA memory"); return GST_FLOW_ERROR; } - gst_buffer_append_memory (cuda, mem); - if (priv->add_videometa) { - GST_DEBUG_OBJECT (pool, "adding GstVideoMeta"); - gst_buffer_add_video_meta_full (cuda, GST_VIDEO_FRAME_FLAG_NONE, - GST_VIDEO_INFO_FORMAT (info), GST_VIDEO_INFO_WIDTH (info), - GST_VIDEO_INFO_HEIGHT (info), GST_VIDEO_INFO_N_PLANES (info), - info->offset, info->stride); - } + cmem = GST_CUDA_MEMORY_CAST (mem); - *buffer = cuda; + buf = gst_buffer_new (); + + gst_buffer_append_memory (buf, mem); + + GST_DEBUG_OBJECT (pool, "adding GstVideoMeta"); + gst_buffer_add_video_meta_full (buf, GST_VIDEO_FRAME_FLAG_NONE, + GST_VIDEO_INFO_FORMAT (info), GST_VIDEO_INFO_WIDTH (info), + GST_VIDEO_INFO_HEIGHT (info), GST_VIDEO_INFO_N_PLANES (info), + cmem->info.offset, cmem->info.stride); + + *buffer = buf; return GST_FLOW_OK; } @@ -208,44 +137,41 @@ gst_cuda_buffer_pool_alloc (GstBufferPool * pool, GstBuffer ** buffer, GstBufferPool * gst_cuda_buffer_pool_new (GstCudaContext * context) { - GstCudaBufferPool *pool; + GstCudaBufferPool *self; - pool = g_object_new (GST_TYPE_CUDA_BUFFER_POOL, NULL); - gst_object_ref_sink (pool); + g_return_val_if_fail (GST_IS_CUDA_CONTEXT (context), NULL); - pool->context = gst_object_ref (context); + self = g_object_new (GST_TYPE_CUDA_BUFFER_POOL, NULL); + gst_object_ref_sink (self); - GST_LOG_OBJECT (pool, "new CUDA buffer pool %p", pool); + self->context = gst_object_ref (context); - return GST_BUFFER_POOL_CAST (pool); + return GST_BUFFER_POOL_CAST (self); } static void gst_cuda_buffer_pool_dispose (GObject * object) { - GstCudaBufferPool *pool = GST_CUDA_BUFFER_POOL_CAST (object); - GstCudaBufferPoolPrivate *priv = pool->priv; - - GST_LOG_OBJECT (pool, "finalize CUDA buffer pool %p", pool); + GstCudaBufferPool *self = GST_CUDA_BUFFER_POOL_CAST (object); + GstCudaBufferPoolPrivate *priv = self->priv; gst_clear_object (&priv->allocator); - gst_clear_object (&pool->context); + gst_clear_object (&self->context); G_OBJECT_CLASS (parent_class)->dispose (object); } - static void gst_cuda_buffer_pool_class_init (GstCudaBufferPoolClass * klass) { GObjectClass *gobject_class = (GObjectClass *) klass; - GstBufferPoolClass *gstbufferpool_class = (GstBufferPoolClass *) klass; + GstBufferPoolClass *bufferpool_class = (GstBufferPoolClass *) klass; gobject_class->dispose = gst_cuda_buffer_pool_dispose; - gstbufferpool_class->get_options = gst_cuda_buffer_pool_get_options; - gstbufferpool_class->set_config = gst_cuda_buffer_pool_set_config; - gstbufferpool_class->alloc_buffer = gst_cuda_buffer_pool_alloc; + bufferpool_class->get_options = gst_cuda_buffer_pool_get_options; + bufferpool_class->set_config = gst_cuda_buffer_pool_set_config; + bufferpool_class->alloc_buffer = gst_cuda_buffer_pool_alloc; GST_DEBUG_CATEGORY_INIT (gst_cuda_buffer_pool_debug, "cudabufferpool", 0, "CUDA Buffer Pool"); diff --git a/subprojects/gst-plugins-bad/sys/nvcodec/gstcudadownload.c b/subprojects/gst-plugins-bad/sys/nvcodec/gstcudadownload.c index 9fd4cfa62c..c8ef47eac2 100644 --- a/subprojects/gst-plugins-bad/sys/nvcodec/gstcudadownload.c +++ b/subprojects/gst-plugins-bad/sys/nvcodec/gstcudadownload.c @@ -180,10 +180,6 @@ gst_cuda_download_propose_allocation (GstBaseTransform * trans, if (gst_query_get_n_allocation_pools (query) == 0) { GstCapsFeatures *features; GstStructure *config; - GstVideoAlignment align; - GstAllocationParams params = { 0, 31, 0, 0, }; - GstAllocator *allocator = NULL; - gint i; features = gst_caps_get_features (caps, 0); @@ -197,38 +193,25 @@ gst_cuda_download_propose_allocation (GstBaseTransform * trans, config = gst_buffer_pool_get_config (pool); - gst_video_alignment_reset (&align); - for (i = 0; i < GST_VIDEO_INFO_N_PLANES (&info); i++) { - align.stride_align[i] = 31; - } - gst_video_info_align (&info, &align); - gst_buffer_pool_config_add_option (config, GST_BUFFER_POOL_OPTION_VIDEO_META); - gst_buffer_pool_config_add_option (config, - GST_BUFFER_POOL_OPTION_VIDEO_ALIGNMENT); - gst_buffer_pool_config_set_video_alignment (config, &align); size = GST_VIDEO_INFO_SIZE (&info); gst_buffer_pool_config_set_params (config, caps, size, 0, 0); - gst_query_add_allocation_meta (query, GST_VIDEO_META_API_TYPE, NULL); - gst_query_add_allocation_pool (query, pool, size, 0, 0); - - if (gst_buffer_pool_config_get_allocator (config, &allocator, ¶ms)) { - if (params.align < 31) - params.align = 31; - - gst_query_add_allocation_param (query, allocator, ¶ms); - gst_buffer_pool_config_set_allocator (config, allocator, ¶ms); - } - if (!gst_buffer_pool_set_config (pool, config)) { GST_ERROR_OBJECT (ctrans, "failed to set config"); gst_object_unref (pool); return FALSE; } + /* Get updated size by cuda buffer pool */ + config = gst_buffer_pool_get_config (pool); + gst_buffer_pool_config_get_params (config, NULL, &size, NULL, NULL); + gst_structure_free (config); + + gst_query_add_allocation_pool (query, pool, size, 0, 0); + gst_object_unref (pool); } @@ -295,6 +278,12 @@ gst_cuda_download_decide_allocation (GstBaseTransform * trans, GstQuery * query) gst_buffer_pool_config_add_option (config, GST_BUFFER_POOL_OPTION_VIDEO_META); gst_buffer_pool_config_set_params (config, outcaps, size, min, max); gst_buffer_pool_set_config (pool, config); + + /* Get updated size by cuda buffer pool */ + config = gst_buffer_pool_get_config (pool); + gst_buffer_pool_config_get_params (config, NULL, &size, NULL, NULL); + gst_structure_free (config); + if (update_pool) gst_query_set_nth_allocation_pool (query, 0, pool, size, min, max); else diff --git a/subprojects/gst-plugins-bad/sys/nvcodec/gstcudamemory.c b/subprojects/gst-plugins-bad/sys/nvcodec/gstcudamemory.c index 37d2eb0ff4..d0e4fa677c 100644 --- a/subprojects/gst-plugins-bad/sys/nvcodec/gstcudamemory.c +++ b/subprojects/gst-plugins-bad/sys/nvcodec/gstcudamemory.c @@ -26,20 +26,33 @@ #include -GST_DEBUG_CATEGORY_STATIC (cudaallocator_debug); -#define GST_CAT_DEFAULT cudaallocator_debug -GST_DEBUG_CATEGORY_STATIC (GST_CAT_MEMORY); +GST_DEBUG_CATEGORY_STATIC (cuda_allocator_debug); +#define GST_CAT_DEFAULT cuda_allocator_debug + +static GstAllocator *_gst_cuda_allocator = NULL; + +struct _GstCudaMemoryPrivate +{ + CUdeviceptr data; + void *staging; + + /* params used for cuMemAllocPitch */ + gsize pitch; + guint width_in_bytes; + guint height; + + GMutex lock; +}; #define gst_cuda_allocator_parent_class parent_class G_DEFINE_TYPE (GstCudaAllocator, gst_cuda_allocator, GST_TYPE_ALLOCATOR); -static void gst_cuda_allocator_dispose (GObject * object); static void gst_cuda_allocator_free (GstAllocator * allocator, GstMemory * memory); -static gpointer cuda_mem_map (GstCudaMemory * mem, gsize maxsize, +static gpointer cuda_mem_map (GstMemory * mem, gsize maxsize, GstMapFlags flags); -static void cuda_mem_unmap_full (GstCudaMemory * mem, GstMapInfo * info); +static void cuda_mem_unmap_full (GstMemory * mem, GstMapInfo * info); static GstMemory *cuda_mem_copy (GstMemory * mem, gssize offset, gssize size); static GstMemory * @@ -52,17 +65,13 @@ gst_cuda_allocator_dummy_alloc (GstAllocator * allocator, gsize size, static void gst_cuda_allocator_class_init (GstCudaAllocatorClass * klass) { - GObjectClass *gobject_class = G_OBJECT_CLASS (klass); GstAllocatorClass *allocator_class = GST_ALLOCATOR_CLASS (klass); - gobject_class->dispose = gst_cuda_allocator_dispose; - allocator_class->alloc = GST_DEBUG_FUNCPTR (gst_cuda_allocator_dummy_alloc); allocator_class->free = GST_DEBUG_FUNCPTR (gst_cuda_allocator_free); - GST_DEBUG_CATEGORY_INIT (cudaallocator_debug, "cudaallocator", 0, + GST_DEBUG_CATEGORY_INIT (cuda_allocator_debug, "cudaallocator", 0, "CUDA Allocator"); - GST_DEBUG_CATEGORY_GET (GST_CAT_MEMORY, "GST_MEMORY"); } static void @@ -74,337 +83,273 @@ gst_cuda_allocator_init (GstCudaAllocator * allocator) alloc->mem_type = GST_CUDA_MEMORY_TYPE_NAME; - alloc->mem_map = (GstMemoryMapFunction) cuda_mem_map; - alloc->mem_unmap_full = (GstMemoryUnmapFullFunction) cuda_mem_unmap_full; - alloc->mem_copy = (GstMemoryCopyFunction) cuda_mem_copy; + alloc->mem_map = cuda_mem_map; + alloc->mem_unmap_full = cuda_mem_unmap_full; + alloc->mem_copy = cuda_mem_copy; GST_OBJECT_FLAG_SET (allocator, GST_ALLOCATOR_FLAG_CUSTOM_ALLOC); } -static void -gst_cuda_allocator_dispose (GObject * object) +static GstMemory * +gst_cuda_allocator_alloc_internal (GstCudaAllocator * self, + GstCudaContext * context, const GstVideoInfo * info, + guint width_in_bytes, guint alloc_height) { - GstCudaAllocator *self = GST_CUDA_ALLOCATOR_CAST (object); - - GST_DEBUG_OBJECT (self, "dispose"); - - gst_clear_object (&self->context); - G_OBJECT_CLASS (parent_class)->dispose (object); -} - -GstMemory * -gst_cuda_allocator_alloc (GstAllocator * allocator, gsize size, - GstCudaAllocationParams * params) -{ - GstCudaAllocator *self = GST_CUDA_ALLOCATOR_CAST (allocator); - gsize maxsize = size + params->parent.prefix + params->parent.padding; - gsize align = params->parent.align; - gsize offset = params->parent.prefix; - GstMemoryFlags flags = params->parent.flags; + GstCudaMemoryPrivate *priv; + GstCudaMemory *mem; CUdeviceptr data; gboolean ret = FALSE; - GstCudaMemory *mem; - GstVideoInfo *info = ¶ms->info; - gint i; - guint width, height; - gsize stride, plane_offset; + gsize pitch; + guint height = GST_VIDEO_INFO_HEIGHT (info); + GstVideoInfo *alloc_info; - if (!gst_cuda_context_push (self->context)) + if (!gst_cuda_context_push (context)) return NULL; - /* ensure configured alignment */ - align |= gst_memory_alignment; - /* allocate more to compensate for alignment */ - maxsize += align; - - GST_CAT_DEBUG_OBJECT (GST_CAT_MEMORY, self, "allocate new cuda memory"); - - width = GST_VIDEO_INFO_COMP_WIDTH (info, 0) * - GST_VIDEO_INFO_COMP_PSTRIDE (info, 0); - height = 0; - for (i = 0; i < GST_VIDEO_INFO_N_PLANES (info); i++) - height += GST_VIDEO_INFO_COMP_HEIGHT (info, i); - - ret = gst_cuda_result (CuMemAllocPitch (&data, &stride, width, height, 16)); + ret = gst_cuda_result (CuMemAllocPitch (&data, &pitch, width_in_bytes, + alloc_height, 16)); gst_cuda_context_pop (NULL); - if (G_UNLIKELY (!ret)) { - GST_CAT_ERROR_OBJECT (GST_CAT_MEMORY, self, "CUDA allocation failure"); + if (!ret) { + GST_ERROR_OBJECT (self, "Failed to allocate CUDA memory"); return NULL; } mem = g_new0 (GstCudaMemory, 1); - g_mutex_init (&mem->lock); - mem->data = data; - mem->alloc_params = *params; - mem->stride = stride; + mem->priv = priv = g_new0 (GstCudaMemoryPrivate, 1); - plane_offset = 0; - for (i = 0; i < GST_VIDEO_INFO_N_PLANES (info); i++) { - mem->offset[i] = plane_offset; - plane_offset += stride * GST_VIDEO_INFO_COMP_HEIGHT (info, i); + priv->data = data; + priv->pitch = pitch; + priv->width_in_bytes = width_in_bytes; + priv->height = alloc_height; + g_mutex_init (&priv->lock); + + mem->context = gst_object_ref (context); + mem->info = *info; + mem->info.size = pitch * alloc_height; + + alloc_info = &mem->info; + gst_memory_init (GST_MEMORY_CAST (mem), 0, GST_ALLOCATOR_CAST (self), + NULL, alloc_info->size, 0, 0, alloc_info->size); + + switch (GST_VIDEO_INFO_FORMAT (info)) { + case GST_VIDEO_FORMAT_I420: + case GST_VIDEO_FORMAT_YV12: + case GST_VIDEO_FORMAT_I420_10LE: + /* we are wasting space yes, but required so that this memory + * can be used in kernel function */ + alloc_info->stride[0] = pitch; + alloc_info->stride[1] = pitch; + alloc_info->stride[2] = pitch; + alloc_info->offset[0] = 0; + alloc_info->offset[1] = alloc_info->stride[0] * height; + alloc_info->offset[2] = alloc_info->offset[1] + + alloc_info->stride[1] * height / 2; + break; + case GST_VIDEO_FORMAT_NV12: + case GST_VIDEO_FORMAT_NV21: + case GST_VIDEO_FORMAT_P010_10LE: + case GST_VIDEO_FORMAT_P016_LE: + alloc_info->stride[0] = pitch; + alloc_info->stride[1] = pitch; + alloc_info->offset[0] = 0; + alloc_info->offset[1] = alloc_info->stride[0] * height; + break; + case GST_VIDEO_FORMAT_Y444: + case GST_VIDEO_FORMAT_Y444_16LE: + alloc_info->stride[0] = pitch; + alloc_info->stride[1] = pitch; + alloc_info->stride[2] = pitch; + alloc_info->offset[0] = 0; + alloc_info->offset[1] = alloc_info->stride[0] * height; + alloc_info->offset[2] = alloc_info->offset[1] * 2; + break; + case GST_VIDEO_FORMAT_BGRA: + case GST_VIDEO_FORMAT_RGBA: + case GST_VIDEO_FORMAT_RGBx: + case GST_VIDEO_FORMAT_BGRx: + case GST_VIDEO_FORMAT_ARGB: + case GST_VIDEO_FORMAT_ABGR: + case GST_VIDEO_FORMAT_RGB: + case GST_VIDEO_FORMAT_BGR: + case GST_VIDEO_FORMAT_BGR10A2_LE: + case GST_VIDEO_FORMAT_RGB10A2_LE: + alloc_info->stride[0] = pitch; + alloc_info->offset[0] = 0; + break; + default: + GST_ERROR_OBJECT (self, "Unexpected format %s", + gst_video_format_to_string (GST_VIDEO_INFO_FORMAT (info))); + g_assert_not_reached (); + gst_memory_unref (GST_MEMORY_CAST (mem)); + return NULL; } - mem->context = gst_object_ref (self->context); - - gst_memory_init (GST_MEMORY_CAST (mem), - flags, GST_ALLOCATOR_CAST (self), NULL, maxsize, align, offset, size); - return GST_MEMORY_CAST (mem); } static void gst_cuda_allocator_free (GstAllocator * allocator, GstMemory * memory) { - GstCudaAllocator *self = GST_CUDA_ALLOCATOR_CAST (allocator); GstCudaMemory *mem = GST_CUDA_MEMORY_CAST (memory); + GstCudaMemoryPrivate *priv = mem->priv; - GST_CAT_DEBUG_OBJECT (GST_CAT_MEMORY, allocator, "free cuda memory"); - - g_mutex_clear (&mem->lock); - - gst_cuda_context_push (self->context); - if (mem->data) - gst_cuda_result (CuMemFree (mem->data)); - - if (mem->map_alloc_data) - gst_cuda_result (CuMemFreeHost (mem->map_alloc_data)); + gst_cuda_context_push (mem->context); + if (priv->data) + gst_cuda_result (CuMemFree (priv->data)); + if (priv->staging) + gst_cuda_result (CuMemFreeHost (priv->staging)); gst_cuda_context_pop (NULL); + gst_object_unref (mem->context); + g_mutex_clear (&priv->lock); + g_free (mem->priv); g_free (mem); } -/* called with lock */ static gboolean -gst_cuda_memory_upload_transfer (GstCudaMemory * mem) +gst_cuda_memory_upload (GstCudaAllocator * self, GstCudaMemory * mem) { - gint i; - GstVideoInfo *info = &mem->alloc_params.info; + GstCudaMemoryPrivate *priv = mem->priv; gboolean ret = TRUE; + CUDA_MEMCPY2D param = { 0, }; - if (!mem->map_data) { - GST_CAT_ERROR (GST_CAT_MEMORY, "no staging memory to upload"); + if (!priv->staging || + !GST_MEMORY_FLAG_IS_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_UPLOAD)) { + return TRUE; + } + + if (!gst_cuda_context_push (mem->context)) { + GST_ERROR_OBJECT (self, "Failed to push cuda context"); return FALSE; } - for (i = 0; i < GST_VIDEO_INFO_N_PLANES (info); i++) { - CUDA_MEMCPY2D param = { 0, }; + param.srcMemoryType = CU_MEMORYTYPE_HOST; + param.srcHost = priv->staging; + param.srcPitch = priv->pitch; - param.srcMemoryType = CU_MEMORYTYPE_HOST; - param.srcHost = - (guint8 *) mem->map_data + GST_VIDEO_INFO_PLANE_OFFSET (info, i); - param.srcPitch = GST_VIDEO_INFO_PLANE_STRIDE (info, i); + param.dstMemoryType = CU_MEMORYTYPE_DEVICE; + param.dstDevice = (CUdeviceptr) priv->data; + param.dstPitch = priv->pitch; + param.WidthInBytes = priv->width_in_bytes; + param.Height = priv->height; - param.dstMemoryType = CU_MEMORYTYPE_DEVICE; - param.dstDevice = mem->data + mem->offset[i]; - param.dstPitch = mem->stride; - param.WidthInBytes = GST_VIDEO_INFO_COMP_WIDTH (info, i) * - GST_VIDEO_INFO_COMP_PSTRIDE (info, i); - param.Height = GST_VIDEO_INFO_COMP_HEIGHT (info, i); + ret = gst_cuda_result (CuMemcpy2D (¶m)); + gst_cuda_context_pop (NULL); - if (!gst_cuda_result (CuMemcpy2DAsync (¶m, NULL))) { - GST_CAT_ERROR (GST_CAT_MEMORY, "Failed to copy %dth plane", i); - ret = FALSE; - break; - } - } - gst_cuda_result (CuStreamSynchronize (NULL)); + if (!ret) + GST_ERROR_OBJECT (self, "Failed to upload memory"); return ret; } -/* called with lock */ static gboolean -gst_cuda_memory_download_transfer (GstCudaMemory * mem) +gst_cuda_memory_download (GstCudaAllocator * self, GstCudaMemory * mem) { - gint i; - GstVideoInfo *info = &mem->alloc_params.info; + GstCudaMemoryPrivate *priv = mem->priv; + gboolean ret = TRUE; + CUDA_MEMCPY2D param = { 0, }; - if (!mem->map_data) { - GST_CAT_ERROR (GST_CAT_MEMORY, "no staging memory to upload"); + if (!GST_MEMORY_FLAG_IS_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_DOWNLOAD)) + return TRUE; + + if (!gst_cuda_context_push (mem->context)) { + GST_ERROR_OBJECT (self, "Failed to push cuda context"); return FALSE; } - for (i = 0; i < GST_VIDEO_INFO_N_PLANES (info); i++) { - CUDA_MEMCPY2D param = { 0, }; - - param.srcMemoryType = CU_MEMORYTYPE_DEVICE; - param.srcDevice = mem->data + mem->offset[i]; - param.srcPitch = mem->stride; - - param.dstMemoryType = CU_MEMORYTYPE_HOST; - param.dstHost = - (guint8 *) mem->map_data + GST_VIDEO_INFO_PLANE_OFFSET (info, i); - param.dstPitch = GST_VIDEO_INFO_PLANE_STRIDE (info, i); - param.WidthInBytes = GST_VIDEO_INFO_COMP_WIDTH (info, i) * - GST_VIDEO_INFO_COMP_PSTRIDE (info, i); - param.Height = GST_VIDEO_INFO_COMP_HEIGHT (info, i); - - if (!gst_cuda_result (CuMemcpy2DAsync (¶m, NULL))) { - GST_CAT_ERROR (GST_CAT_MEMORY, "Failed to copy %dth plane", i); - CuMemFreeHost (mem->map_alloc_data); - mem->map_alloc_data = mem->map_data = mem->align_data = NULL; - break; - } - } - gst_cuda_result (CuStreamSynchronize (NULL)); - - if (!mem->map_data) - return FALSE; - - return TRUE; -} - -static gpointer -gst_cuda_memory_device_memory_map (GstCudaMemory * mem) -{ - GstMemory *memory = GST_MEMORY_CAST (mem); - gpointer data; - gsize aoffset; - gsize align = memory->align; - - if (mem->map_data) { - return mem->map_data; - } - - GST_CAT_DEBUG (GST_CAT_MEMORY, "alloc host memory for map"); - - if (!mem->map_alloc_data) { - gsize maxsize; - guint8 *align_data; - - maxsize = memory->maxsize + align; - if (!gst_cuda_context_push (mem->context)) { - GST_CAT_ERROR (GST_CAT_MEMORY, "cannot push cuda context"); - - return NULL; - } - - if (!gst_cuda_result (CuMemAllocHost (&data, maxsize))) { - GST_CAT_ERROR (GST_CAT_MEMORY, "cannot alloc host memory"); + if (!priv->staging) { + ret = gst_cuda_result (CuMemAllocHost (&priv->staging, + GST_MEMORY_CAST (mem)->size)); + if (!ret) { + GST_ERROR_OBJECT (self, "Failed to allocate staging memory"); gst_cuda_context_pop (NULL); - - return NULL; - } - - if (!gst_cuda_context_pop (NULL)) { - GST_CAT_WARNING (GST_CAT_MEMORY, "cannot pop cuda context"); - } - - mem->map_alloc_data = data; - align_data = data; - - /* do align */ - if ((aoffset = ((guintptr) align_data & align))) { - aoffset = (align + 1) - aoffset; - align_data += aoffset; - } - mem->align_data = align_data; - - /* first memory, always need download to staging */ - GST_MINI_OBJECT_FLAG_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_DOWNLOAD); - } - - mem->map_data = mem->align_data; - - if (GST_MEMORY_FLAG_IS_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_DOWNLOAD)) { - if (!gst_cuda_context_push (mem->context)) { - GST_CAT_ERROR (GST_CAT_MEMORY, "cannot push cuda context"); - - return NULL; - } - - gst_cuda_memory_download_transfer (mem); - - if (!gst_cuda_context_pop (NULL)) { - GST_CAT_WARNING (GST_CAT_MEMORY, "cannot pop cuda context"); + return FALSE; } } - return mem->map_data; + param.srcMemoryType = CU_MEMORYTYPE_DEVICE; + param.srcDevice = (CUdeviceptr) priv->data; + param.srcPitch = priv->pitch; + + param.dstMemoryType = CU_MEMORYTYPE_HOST; + param.dstHost = priv->staging; + param.dstPitch = priv->pitch; + param.WidthInBytes = priv->width_in_bytes; + param.Height = priv->height; + + ret = gst_cuda_result (CuMemcpy2D (¶m)); + gst_cuda_context_pop (NULL); + + if (!ret) + GST_ERROR_OBJECT (self, "Failed to upload memory"); + + return ret; } static gpointer -cuda_mem_map (GstCudaMemory * mem, gsize maxsize, GstMapFlags flags) +cuda_mem_map (GstMemory * mem, gsize maxsize, GstMapFlags flags) { + GstCudaAllocator *self = GST_CUDA_ALLOCATOR (mem->allocator); + GstCudaMemory *cmem = GST_CUDA_MEMORY_CAST (mem); + GstCudaMemoryPrivate *priv = cmem->priv; gpointer ret = NULL; - g_mutex_lock (&mem->lock); - mem->map_count++; - + g_mutex_lock (&priv->lock); if ((flags & GST_MAP_CUDA) == GST_MAP_CUDA) { - /* upload from staging to device memory if necessary */ - if (GST_MEMORY_FLAG_IS_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_UPLOAD)) { - if (!gst_cuda_context_push (mem->context)) { - GST_CAT_ERROR (GST_CAT_MEMORY, "cannot push cuda context"); - g_mutex_unlock (&mem->lock); - - return NULL; - } - - if (!gst_cuda_memory_upload_transfer (mem)) { - g_mutex_unlock (&mem->lock); - return NULL; - } - - gst_cuda_context_pop (NULL); - } + if (!gst_cuda_memory_upload (self, cmem)) + goto out; GST_MEMORY_FLAG_UNSET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_UPLOAD); if ((flags & GST_MAP_WRITE) == GST_MAP_WRITE) GST_MINI_OBJECT_FLAG_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_DOWNLOAD); - g_mutex_unlock (&mem->lock); - return (gpointer) mem->data; + ret = (gpointer) priv->data; + goto out; } - ret = gst_cuda_memory_device_memory_map (mem); - if (ret == NULL) { - mem->map_count--; - g_mutex_unlock (&mem->lock); - return NULL; - } + /* First CPU access, must be downloaded */ + if (!priv->staging) + GST_MINI_OBJECT_FLAG_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_DOWNLOAD); + + if (!gst_cuda_memory_download (self, cmem)) + goto out; + + ret = priv->staging; if ((flags & GST_MAP_WRITE) == GST_MAP_WRITE) GST_MINI_OBJECT_FLAG_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_UPLOAD); GST_MEMORY_FLAG_UNSET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_DOWNLOAD); - g_mutex_unlock (&mem->lock); +out: + g_mutex_unlock (&priv->lock); return ret; } static void -cuda_mem_unmap_full (GstCudaMemory * mem, GstMapInfo * info) +cuda_mem_unmap_full (GstMemory * mem, GstMapInfo * info) { - g_mutex_lock (&mem->lock); - mem->map_count--; - GST_CAT_TRACE (GST_CAT_MEMORY, - "unmap CUDA memory %p, map count %d, have map_data %s", - mem, mem->map_count, mem->map_data ? "true" : "false"); + GstCudaMemory *cmem = GST_CUDA_MEMORY_CAST (mem); + GstCudaMemoryPrivate *priv = cmem->priv; + g_mutex_lock (&priv->lock); if ((info->flags & GST_MAP_CUDA) == GST_MAP_CUDA) { if ((info->flags & GST_MAP_WRITE) == GST_MAP_WRITE) GST_MINI_OBJECT_FLAG_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_DOWNLOAD); - g_mutex_unlock (&mem->lock); - return; + goto out; } - if ((info->flags & GST_MAP_WRITE)) + if ((info->flags & GST_MAP_WRITE) == GST_MAP_WRITE) GST_MINI_OBJECT_FLAG_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_UPLOAD); - if (mem->map_count > 0 || !mem->map_data) { - g_mutex_unlock (&mem->lock); - return; - } - - mem->map_data = NULL; - g_mutex_unlock (&mem->lock); +out: + g_mutex_unlock (&priv->lock); return; } @@ -412,72 +357,82 @@ cuda_mem_unmap_full (GstCudaMemory * mem, GstMapInfo * info) static GstMemory * cuda_mem_copy (GstMemory * mem, gssize offset, gssize size) { - GstMemory *copy; + GstCudaAllocator *self = GST_CUDA_ALLOCATOR (mem->allocator); GstCudaMemory *src_mem = GST_CUDA_MEMORY_CAST (mem); - GstCudaMemory *dst_mem; - GstCudaContext *ctx = GST_CUDA_ALLOCATOR_CAST (mem->allocator)->context; - gint i; - GstVideoInfo *info; + GstCudaContext *context = src_mem->context; + GstMapInfo src_info, dst_info; + CUDA_MEMCPY2D param = { 0, }; + GstMemory *copy; + gboolean ret; /* offset and size are ignored */ - copy = gst_cuda_allocator_alloc (mem->allocator, mem->size, - &src_mem->alloc_params); + copy = gst_cuda_allocator_alloc_internal (self, context, + &src_mem->info, src_mem->priv->width_in_bytes, src_mem->priv->height); - dst_mem = GST_CUDA_MEMORY_CAST (copy); + if (!copy) { + GST_ERROR_OBJECT (self, "Failed to allocate memory for copying"); + return NULL; + } - info = &src_mem->alloc_params.info; + if (!gst_memory_map (mem, &src_info, GST_MAP_READ | GST_MAP_CUDA)) { + GST_ERROR_OBJECT (self, "Failed to map src memory"); + gst_memory_unref (copy); + return NULL; + } - if (!gst_cuda_context_push (ctx)) { - GST_CAT_ERROR (GST_CAT_MEMORY, "cannot push cuda context"); - gst_cuda_allocator_free (mem->allocator, copy); + if (!gst_memory_map (copy, &dst_info, GST_MAP_WRITE | GST_MAP_CUDA)) { + GST_ERROR_OBJECT (self, "Failed to map dst memory"); + gst_memory_unmap (mem, &src_info); + gst_memory_unref (copy); + return NULL; + } + + if (!gst_cuda_context_push (context)) { + GST_ERROR_OBJECT (self, "Failed to push cuda context"); + gst_memory_unmap (mem, &src_info); + gst_memory_unmap (copy, &dst_info); return NULL; } - for (i = 0; i < GST_VIDEO_INFO_N_PLANES (info); i++) { - CUDA_MEMCPY2D param = { 0, }; + param.srcMemoryType = CU_MEMORYTYPE_DEVICE; + param.srcDevice = (CUdeviceptr) src_info.data; + param.srcPitch = src_mem->priv->pitch; - param.srcMemoryType = CU_MEMORYTYPE_DEVICE; - param.srcDevice = src_mem->data + src_mem->offset[i]; - param.srcPitch = src_mem->stride; + param.dstMemoryType = CU_MEMORYTYPE_DEVICE; + param.dstDevice = (CUdeviceptr) dst_info.data; + param.dstPitch = src_mem->priv->pitch; + param.WidthInBytes = src_mem->priv->width_in_bytes; + param.Height = src_mem->priv->height; - param.dstMemoryType = CU_MEMORYTYPE_DEVICE; - param.dstDevice = dst_mem->data + dst_mem->offset[i]; - param.dstPitch = dst_mem->stride; - param.WidthInBytes = GST_VIDEO_INFO_COMP_WIDTH (info, i) * - GST_VIDEO_INFO_COMP_PSTRIDE (info, i); - param.Height = GST_VIDEO_INFO_COMP_HEIGHT (info, i); + ret = gst_cuda_result (CuMemcpy2D (¶m)); + gst_cuda_context_pop (NULL); - if (!gst_cuda_result (CuMemcpy2DAsync (¶m, NULL))) { - GST_CAT_ERROR_OBJECT (GST_CAT_MEMORY, - mem->allocator, "Failed to copy %dth plane", i); - gst_cuda_context_pop (NULL); - gst_cuda_allocator_free (mem->allocator, copy); + gst_memory_unmap (mem, &src_info); + gst_memory_unmap (copy, &dst_info); - return NULL; - } - } - - gst_cuda_result (CuStreamSynchronize (NULL)); - - if (!gst_cuda_context_pop (NULL)) { - GST_CAT_WARNING (GST_CAT_MEMORY, "cannot pop cuda context"); + if (!ret) { + GST_ERROR_OBJECT (self, "Failed to copy memory"); + gst_memory_unref (copy); + return NULL; } return copy; } -GstAllocator * -gst_cuda_allocator_new (GstCudaContext * context) +void +gst_cuda_memory_init_once (void) { - GstCudaAllocator *allocator; + static gsize _init = 0; - g_return_val_if_fail (GST_IS_CUDA_CONTEXT (context), NULL); + if (g_once_init_enter (&_init)) { + _gst_cuda_allocator = + (GstAllocator *) g_object_new (GST_TYPE_CUDA_ALLOCATOR, NULL); + gst_object_ref_sink (_gst_cuda_allocator); - allocator = g_object_new (GST_TYPE_CUDA_ALLOCATOR, NULL); - allocator->context = gst_object_ref (context); - - return GST_ALLOCATOR_CAST (allocator); + gst_allocator_register (GST_CUDA_MEMORY_TYPE_NAME, _gst_cuda_allocator); + g_once_init_leave (&_init, 1); + } } gboolean @@ -486,3 +441,51 @@ gst_is_cuda_memory (GstMemory * mem) return mem != NULL && mem->allocator != NULL && GST_IS_CUDA_ALLOCATOR (mem->allocator); } + +GstMemory * +gst_cuda_allocator_alloc (GstCudaAllocator * allocator, + GstCudaContext * context, const GstVideoInfo * info) +{ + guint alloc_height; + + g_return_val_if_fail (GST_IS_CUDA_ALLOCATOR (allocator), NULL); + g_return_val_if_fail (GST_IS_CUDA_CONTEXT (context), NULL); + g_return_val_if_fail (info != NULL, NULL); + + alloc_height = GST_VIDEO_INFO_HEIGHT (info); + + /* make sure valid height for subsampled formats */ + switch (GST_VIDEO_INFO_FORMAT (info)) { + case GST_VIDEO_FORMAT_I420: + case GST_VIDEO_FORMAT_YV12: + case GST_VIDEO_FORMAT_NV12: + case GST_VIDEO_FORMAT_P010_10LE: + case GST_VIDEO_FORMAT_P016_LE: + case GST_VIDEO_FORMAT_I420_10LE: + alloc_height = GST_ROUND_UP_2 (alloc_height); + break; + default: + break; + } + + switch (GST_VIDEO_INFO_FORMAT (info)) { + case GST_VIDEO_FORMAT_I420: + case GST_VIDEO_FORMAT_YV12: + case GST_VIDEO_FORMAT_I420_10LE: + case GST_VIDEO_FORMAT_NV12: + case GST_VIDEO_FORMAT_NV21: + case GST_VIDEO_FORMAT_P010_10LE: + case GST_VIDEO_FORMAT_P016_LE: + alloc_height *= 2; + break; + case GST_VIDEO_FORMAT_Y444: + case GST_VIDEO_FORMAT_Y444_16LE: + alloc_height *= 3; + break; + default: + break; + } + + return gst_cuda_allocator_alloc_internal (allocator, context, + info, info->stride[0], alloc_height); +} diff --git a/subprojects/gst-plugins-bad/sys/nvcodec/gstcudamemory.h b/subprojects/gst-plugins-bad/sys/nvcodec/gstcudamemory.h index 79837660ca..e0c24fed42 100644 --- a/subprojects/gst-plugins-bad/sys/nvcodec/gstcudamemory.h +++ b/subprojects/gst-plugins-bad/sys/nvcodec/gstcudamemory.h @@ -37,10 +37,11 @@ G_BEGIN_DECLS #define GST_CUDA_ALLOCATOR_CAST(obj) ((GstCudaAllocator *)(obj)) #define GST_CUDA_MEMORY_CAST(mem) ((GstCudaMemory *) (mem)) -typedef struct _GstCudaAllocationParams GstCudaAllocationParams; typedef struct _GstCudaAllocator GstCudaAllocator; typedef struct _GstCudaAllocatorClass GstCudaAllocatorClass; + typedef struct _GstCudaMemory GstCudaMemory; +typedef struct _GstCudaMemoryPrivate GstCudaMemoryPrivate; /** * GST_MAP_CUDA: @@ -65,32 +66,6 @@ typedef struct _GstCudaMemory GstCudaMemory; */ #define GST_CAPS_FEATURE_MEMORY_CUDA_MEMORY "memory:CUDAMemory" -struct _GstCudaAllocationParams -{ - GstAllocationParams parent; - - GstVideoInfo info; -}; - -struct _GstCudaAllocator -{ - GstAllocator parent; - GstCudaContext *context; -}; - -struct _GstCudaAllocatorClass -{ - GstAllocatorClass parent_class; -}; - -GType gst_cuda_allocator_get_type (void); - -GstAllocator * gst_cuda_allocator_new (GstCudaContext * context); - -GstMemory * gst_cuda_allocator_alloc (GstAllocator * allocator, - gsize size, - GstCudaAllocationParams * params); - /** * GstCudaMemoryTransfer: * @GST_CUDA_MEMORY_TRANSFER_NEED_DOWNLOAD: the device memory needs downloading @@ -106,32 +81,36 @@ typedef enum struct _GstCudaMemory { - GstMemory mem; + GstMemory mem; + /*< public >*/ GstCudaContext *context; - CUdeviceptr data; + GstVideoInfo info; - GstCudaAllocationParams alloc_params; - - /* offset and stride of CUDA device memory */ - gsize offset[GST_VIDEO_MAX_PLANES]; - gint stride; - - /* allocated CUDA Host memory */ - gpointer map_alloc_data; - - /* aligned CUDA Host memory */ - guint8 *align_data; - - /* pointing align_data if the memory is mapped */ - gpointer map_data; - - gint map_count; - - GMutex lock; + /*< private >*/ + GstCudaMemoryPrivate *priv; + gpointer _gst_reserved[GST_PADDING]; }; -gboolean gst_is_cuda_memory (GstMemory * mem); +struct _GstCudaAllocator +{ + GstAllocator parent; +}; + +struct _GstCudaAllocatorClass +{ + GstAllocatorClass parent_class; +}; + +void gst_cuda_memory_init_once (void); + +gboolean gst_is_cuda_memory (GstMemory * mem); + +GType gst_cuda_allocator_get_type (void); + +GstMemory * gst_cuda_allocator_alloc (GstCudaAllocator * allocator, + GstCudaContext * context, + const GstVideoInfo * info); G_END_DECLS diff --git a/subprojects/gst-plugins-bad/sys/nvcodec/gstcudaupload.c b/subprojects/gst-plugins-bad/sys/nvcodec/gstcudaupload.c index b67c228ea6..0740e71c45 100644 --- a/subprojects/gst-plugins-bad/sys/nvcodec/gstcudaupload.c +++ b/subprojects/gst-plugins-bad/sys/nvcodec/gstcudaupload.c @@ -174,10 +174,6 @@ gst_cuda_upload_propose_allocation (GstBaseTransform * trans, if (gst_query_get_n_allocation_pools (query) == 0) { GstCapsFeatures *features; GstStructure *config; - GstVideoAlignment align; - GstAllocationParams params = { 0, 31, 0, 0, }; - GstAllocator *allocator = NULL; - gint i; features = gst_caps_get_features (caps, 0); @@ -191,38 +187,25 @@ gst_cuda_upload_propose_allocation (GstBaseTransform * trans, config = gst_buffer_pool_get_config (pool); - gst_video_alignment_reset (&align); - for (i = 0; i < GST_VIDEO_INFO_N_PLANES (&info); i++) { - align.stride_align[i] = 31; - } - gst_video_info_align (&info, &align); - gst_buffer_pool_config_add_option (config, GST_BUFFER_POOL_OPTION_VIDEO_META); - gst_buffer_pool_config_add_option (config, - GST_BUFFER_POOL_OPTION_VIDEO_ALIGNMENT); - gst_buffer_pool_config_set_video_alignment (config, &align); size = GST_VIDEO_INFO_SIZE (&info); gst_buffer_pool_config_set_params (config, caps, size, 0, 0); - gst_query_add_allocation_meta (query, GST_VIDEO_META_API_TYPE, NULL); - gst_query_add_allocation_pool (query, pool, size, 0, 0); - - if (gst_buffer_pool_config_get_allocator (config, &allocator, ¶ms)) { - if (params.align < 31) - params.align = 31; - - gst_query_add_allocation_param (query, allocator, ¶ms); - gst_buffer_pool_config_set_allocator (config, allocator, ¶ms); - } - if (!gst_buffer_pool_set_config (pool, config)) { GST_ERROR_OBJECT (ctrans, "failed to set config"); gst_object_unref (pool); return FALSE; } + /* Get updated size by cuda buffer pool */ + config = gst_buffer_pool_get_config (pool); + gst_buffer_pool_config_get_params (config, NULL, &size, NULL, NULL); + gst_structure_free (config); + + gst_query_add_allocation_pool (query, pool, size, 0, 0); + gst_object_unref (pool); } @@ -289,6 +272,12 @@ gst_cuda_upload_decide_allocation (GstBaseTransform * trans, GstQuery * query) gst_buffer_pool_config_add_option (config, GST_BUFFER_POOL_OPTION_VIDEO_META); gst_buffer_pool_config_set_params (config, outcaps, size, min, max); gst_buffer_pool_set_config (pool, config); + + /* Get updated size by cuda buffer pool */ + config = gst_buffer_pool_get_config (pool); + gst_buffer_pool_config_get_params (config, NULL, &size, NULL, NULL); + gst_structure_free (config); + if (update_pool) gst_query_set_nth_allocation_pool (query, 0, pool, size, min, max); else diff --git a/subprojects/gst-plugins-bad/sys/nvcodec/gstnvbaseenc.c b/subprojects/gst-plugins-bad/sys/nvcodec/gstnvbaseenc.c index 4299f5a213..809d08cbba 100644 --- a/subprojects/gst-plugins-bad/sys/nvcodec/gstnvbaseenc.c +++ b/subprojects/gst-plugins-bad/sys/nvcodec/gstnvbaseenc.c @@ -625,6 +625,7 @@ gst_nv_base_enc_propose_allocation (GstVideoEncoder * enc, GstQuery * query) GstBufferPool *pool; GstStructure *config; GstCapsFeatures *features; + guint size; GST_DEBUG_OBJECT (nvenc, "propose allocation"); @@ -665,18 +666,25 @@ gst_nv_base_enc_propose_allocation (GstVideoEncoder * enc, GstQuery * query) goto done; } - config = gst_buffer_pool_get_config (pool); - gst_buffer_pool_config_set_params (config, caps, GST_VIDEO_INFO_SIZE (&info), - nvenc->items->len, nvenc->items->len); + size = GST_VIDEO_INFO_SIZE (&info); - gst_query_add_allocation_pool (query, pool, GST_VIDEO_INFO_SIZE (&info), + config = gst_buffer_pool_get_config (pool); + gst_buffer_pool_config_set_params (config, caps, size, nvenc->items->len, nvenc->items->len); gst_buffer_pool_config_add_option (config, GST_BUFFER_POOL_OPTION_VIDEO_META); - gst_query_add_allocation_meta (query, GST_VIDEO_META_API_TYPE, NULL); if (!gst_buffer_pool_set_config (pool, config)) goto error_pool_config; + /* Get updated size by cuda buffer pool */ + config = gst_buffer_pool_get_config (pool); + gst_buffer_pool_config_get_params (config, NULL, &size, NULL, NULL); + gst_structure_free (config); + + gst_query_add_allocation_pool (query, pool, size, + nvenc->items->len, nvenc->items->len); + gst_query_add_allocation_meta (query, GST_VIDEO_META_API_TYPE, NULL); + gst_object_unref (pool); done: @@ -2231,17 +2239,12 @@ gst_nv_base_enc_upload_frame (GstNvBaseEnc * nvenc, GstVideoFrame * frame, CUdeviceptr dst = resource->cuda_pointer; GstVideoInfo *info = &frame->info; CUresult cuda_ret; - GstCudaMemory *cuda_mem = NULL; if (!gst_cuda_context_push (nvenc->cuda_ctx)) { GST_ERROR_OBJECT (nvenc, "cannot push context"); return FALSE; } - if (use_device_memory) { - cuda_mem = (GstCudaMemory *) gst_buffer_peek_memory (frame->buffer, 0); - } - for (i = 0; i < GST_VIDEO_FRAME_N_PLANES (frame); i++) { CUDA_MEMCPY2D param = { 0, }; guint dest_stride = _get_cuda_device_stride (&nvenc->input_info, i, @@ -2249,13 +2252,12 @@ gst_nv_base_enc_upload_frame (GstNvBaseEnc * nvenc, GstVideoFrame * frame, if (use_device_memory) { param.srcMemoryType = CU_MEMORYTYPE_DEVICE; - param.srcDevice = cuda_mem->data + cuda_mem->offset[i]; - param.srcPitch = cuda_mem->stride; + param.srcDevice = (CUdeviceptr) GST_VIDEO_FRAME_PLANE_DATA (frame, i); } else { param.srcMemoryType = CU_MEMORYTYPE_HOST; param.srcHost = GST_VIDEO_FRAME_PLANE_DATA (frame, i); - param.srcPitch = GST_VIDEO_FRAME_PLANE_STRIDE (frame, i); } + param.srcPitch = GST_VIDEO_FRAME_PLANE_STRIDE (frame, i); param.dstMemoryType = CU_MEMORYTYPE_DEVICE; param.dstDevice = dst; diff --git a/subprojects/gst-plugins-bad/sys/nvcodec/gstnvdec.c b/subprojects/gst-plugins-bad/sys/nvcodec/gstnvdec.c index 450528bffb..e2348a18a7 100644 --- a/subprojects/gst-plugins-bad/sys/nvcodec/gstnvdec.c +++ b/subprojects/gst-plugins-bad/sys/nvcodec/gstnvdec.c @@ -1241,34 +1241,25 @@ gst_nvdec_copy_device_to_memory (GstNvDec * nvdec, GstVideoInfo *info = &nvdec->output_state->info; gint i; GstMemory *mem; - GstCudaMemory *cuda_mem = NULL; - - if (!gst_cuda_context_push (nvdec->cuda_ctx)) { - GST_WARNING_OBJECT (nvdec, "failed to lock CUDA context"); - return FALSE; - } + gboolean use_device_copy = FALSE; + GstMapFlags map_flags = GST_MAP_WRITE; if (nvdec->mem_type == GST_NVDEC_MEM_TYPE_CUDA && (mem = gst_buffer_peek_memory (output_buffer, 0)) && gst_is_cuda_memory (mem)) { - GstCudaMemory *cmem = GST_CUDA_MEMORY_CAST (mem); - - if (cmem->context == nvdec->cuda_ctx || - gst_cuda_context_get_handle (cmem->context) == - gst_cuda_context_get_handle (nvdec->cuda_ctx) || - (gst_cuda_context_can_access_peer (cmem->context, nvdec->cuda_ctx) && - gst_cuda_context_can_access_peer (nvdec->cuda_ctx, - cmem->context))) { - cuda_mem = cmem; - } + map_flags |= GST_MAP_CUDA; + use_device_copy = TRUE; } - if (!cuda_mem) { - if (!gst_video_frame_map (&video_frame, info, output_buffer, GST_MAP_WRITE)) { - GST_ERROR_OBJECT (nvdec, "frame map failure"); - gst_cuda_context_pop (NULL); - return FALSE; - } + if (!gst_video_frame_map (&video_frame, info, output_buffer, map_flags)) { + GST_ERROR_OBJECT (nvdec, "frame map failure"); + return FALSE; + } + + if (!gst_cuda_context_push (nvdec->cuda_ctx)) { + gst_video_frame_unmap (&video_frame); + GST_WARNING_OBJECT (nvdec, "failed to lock CUDA context"); + return FALSE; } params.progressive_frame = dispinfo->progressive_frame; @@ -1286,17 +1277,17 @@ gst_nvdec_copy_device_to_memory (GstNvDec * nvdec, copy_params.srcMemoryType = CU_MEMORYTYPE_DEVICE; copy_params.srcPitch = pitch; copy_params.dstMemoryType = - cuda_mem ? CU_MEMORYTYPE_DEVICE : CU_MEMORYTYPE_HOST; + use_device_copy ? CU_MEMORYTYPE_DEVICE : CU_MEMORYTYPE_HOST; for (i = 0; i < GST_VIDEO_INFO_N_PLANES (info); i++) { copy_params.srcDevice = dptr + (i * pitch * GST_VIDEO_INFO_HEIGHT (info)); - if (cuda_mem) { - copy_params.dstDevice = cuda_mem->data + cuda_mem->offset[i]; - copy_params.dstPitch = cuda_mem->stride; + if (use_device_copy) { + copy_params.dstDevice = + (CUdeviceptr) GST_VIDEO_FRAME_PLANE_DATA (&video_frame, i); } else { copy_params.dstHost = GST_VIDEO_FRAME_PLANE_DATA (&video_frame, i); - copy_params.dstPitch = GST_VIDEO_FRAME_PLANE_STRIDE (&video_frame, i); } + copy_params.dstPitch = GST_VIDEO_FRAME_PLANE_STRIDE (&video_frame, i); copy_params.WidthInBytes = GST_VIDEO_INFO_COMP_WIDTH (info, i) * GST_VIDEO_INFO_COMP_PSTRIDE (info, i); copy_params.Height = GST_VIDEO_INFO_COMP_HEIGHT (info, i); @@ -1304,8 +1295,7 @@ gst_nvdec_copy_device_to_memory (GstNvDec * nvdec, if (!gst_cuda_result (CuMemcpy2DAsync (©_params, nvdec->cuda_stream))) { GST_ERROR_OBJECT (nvdec, "failed to copy %dth plane", i); CuvidUnmapVideoFrame (nvdec->decoder, dptr); - if (!cuda_mem) - gst_video_frame_unmap (&video_frame); + gst_video_frame_unmap (&video_frame); gst_cuda_context_pop (NULL); return FALSE; } @@ -1313,8 +1303,7 @@ gst_nvdec_copy_device_to_memory (GstNvDec * nvdec, gst_cuda_result (CuStreamSynchronize (nvdec->cuda_stream)); - if (!cuda_mem) - gst_video_frame_unmap (&video_frame); + gst_video_frame_unmap (&video_frame); if (!gst_cuda_result (CuvidUnmapVideoFrame (nvdec->decoder, dptr))) GST_WARNING_OBJECT (nvdec, "failed to unmap video frame"); @@ -1558,9 +1547,15 @@ gst_nvdec_ensure_cuda_pool (GstNvDec * nvdec, GstQuery * query) n = gst_query_get_n_allocation_pools (query); if (n > 0) { gst_query_parse_nth_allocation_pool (query, 0, &pool, &size, &min, &max); - if (pool && !GST_IS_CUDA_BUFFER_POOL (pool)) { - gst_object_unref (pool); - pool = NULL; + if (pool) { + if (!GST_IS_CUDA_BUFFER_POOL (pool)) { + gst_clear_object (&pool); + } else { + GstCudaBufferPool *cpool = GST_CUDA_BUFFER_POOL (pool); + + if (cpool->context != nvdec->cuda_ctx) + gst_clear_object (&pool); + } } } @@ -1578,6 +1573,12 @@ gst_nvdec_ensure_cuda_pool (GstNvDec * nvdec, GstQuery * query) gst_buffer_pool_config_set_params (config, outcaps, size, min, max); gst_buffer_pool_config_add_option (config, GST_BUFFER_POOL_OPTION_VIDEO_META); gst_buffer_pool_set_config (pool, config); + + /* Get updated size by cuda buffer pool */ + config = gst_buffer_pool_get_config (pool); + gst_buffer_pool_config_get_params (config, NULL, &size, NULL, NULL); + gst_structure_free (config); + if (n > 0) gst_query_set_nth_allocation_pool (query, 0, pool, size, min, max); else diff --git a/subprojects/gst-plugins-bad/sys/nvcodec/gstnvdecoder.c b/subprojects/gst-plugins-bad/sys/nvcodec/gstnvdecoder.c index 34c056953a..1c5ae8e6e3 100644 --- a/subprojects/gst-plugins-bad/sys/nvcodec/gstnvdecoder.c +++ b/subprojects/gst-plugins-bad/sys/nvcodec/gstnvdecoder.c @@ -740,33 +740,24 @@ gst_nv_decoder_copy_frame_to_cuda (GstNvDecoder * decoder, { CUDA_MEMCPY2D copy_params = { 0, }; GstMemory *mem; - GstCudaMemory *cuda_mem = NULL; gint i; gboolean ret = FALSE; + GstVideoFrame video_frame; mem = gst_buffer_peek_memory (buffer, 0); if (!gst_is_cuda_memory (mem)) { GST_WARNING_OBJECT (decoder, "Not a CUDA memory"); return FALSE; - } else { - GstCudaMemory *cmem = GST_CUDA_MEMORY_CAST (mem); - - if (cmem->context == decoder->context || - gst_cuda_context_get_handle (cmem->context) == - gst_cuda_context_get_handle (decoder->context) || - (gst_cuda_context_can_access_peer (cmem->context, decoder->context) && - gst_cuda_context_can_access_peer (decoder->context, - cmem->context))) { - cuda_mem = cmem; - } } - if (!cuda_mem) { - GST_WARNING_OBJECT (decoder, "Access to CUDA memory is not allowed"); + if (!gst_video_frame_map (&video_frame, + &decoder->info, buffer, GST_MAP_WRITE | GST_MAP_CUDA)) { + GST_ERROR_OBJECT (decoder, "frame map failure"); return FALSE; } if (!gst_cuda_context_push (decoder->context)) { + gst_video_frame_unmap (&video_frame); GST_ERROR_OBJECT (decoder, "Failed to push CUDA context"); return FALSE; } @@ -778,8 +769,9 @@ gst_nv_decoder_copy_frame_to_cuda (GstNvDecoder * decoder, for (i = 0; i < GST_VIDEO_INFO_N_PLANES (&decoder->info); i++) { copy_params.srcDevice = frame->devptr + (i * frame->pitch * GST_VIDEO_INFO_HEIGHT (&decoder->info)); - copy_params.dstDevice = cuda_mem->data + cuda_mem->offset[i]; - copy_params.dstPitch = cuda_mem->stride; + copy_params.dstDevice = + (CUdeviceptr) GST_VIDEO_FRAME_PLANE_DATA (&video_frame, i); + copy_params.dstPitch = GST_VIDEO_FRAME_PLANE_STRIDE (&video_frame, i); copy_params.WidthInBytes = GST_VIDEO_INFO_COMP_WIDTH (&decoder->info, 0) * GST_VIDEO_INFO_COMP_PSTRIDE (&decoder->info, 0); copy_params.Height = GST_VIDEO_INFO_COMP_HEIGHT (&decoder->info, i); @@ -795,6 +787,7 @@ gst_nv_decoder_copy_frame_to_cuda (GstNvDecoder * decoder, ret = TRUE; done: + gst_video_frame_unmap (&video_frame); gst_cuda_context_pop (NULL); GST_LOG_OBJECT (decoder, "Copy frame to CUDA ret %d", ret); @@ -1567,6 +1560,12 @@ gst_nv_decoder_ensure_cuda_pool (GstNvDecoder * decoder, GstQuery * query) gst_buffer_pool_config_set_params (config, outcaps, size, min, max); gst_buffer_pool_config_add_option (config, GST_BUFFER_POOL_OPTION_VIDEO_META); gst_buffer_pool_set_config (pool, config); + + /* Get updated size by cuda buffer pool */ + config = gst_buffer_pool_get_config (pool); + gst_buffer_pool_config_get_params (config, NULL, &size, NULL, NULL); + gst_structure_free (config); + if (n > 0) gst_query_set_nth_allocation_pool (query, 0, pool, size, min, max); else diff --git a/subprojects/gst-plugins-bad/sys/nvcodec/plugin.c b/subprojects/gst-plugins-bad/sys/nvcodec/plugin.c index 741893fc33..f6800c8c4f 100644 --- a/subprojects/gst-plugins-bad/sys/nvcodec/plugin.c +++ b/subprojects/gst-plugins-bad/sys/nvcodec/plugin.c @@ -39,6 +39,7 @@ #include "gstcudadownload.h" #include "gstcudaupload.h" #include "gstcudafilter.h" +#include "gstcudamemory.h" GST_DEBUG_CATEGORY (gst_nvcodec_debug); GST_DEBUG_CATEGORY (gst_nvdec_debug); @@ -238,6 +239,7 @@ plugin_init (GstPlugin * plugin) GST_TYPE_CUDA_UPLOAD); gst_cuda_filter_plugin_init (plugin); + gst_cuda_memory_init_once (); return TRUE; }