nvcodec: Refactor GstCudaMemory abstraction

* Hide GstCudaMemory member variables
* Make GstCudaAllocator object GstCudaContext independent
* Set offset/stride of memory correctly via video meta
* Drop GST_BUFFER_POOL_OPTION_VIDEO_ALIGNMENT support.
  This implementation actually does not support custom alignment
  because we allocate device memory via cuMemAllocPitch
  of which alignment is almost uncontrollable

Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/1834>
This commit is contained in:
Seungha Yang 2022-03-03 03:25:47 +09:00
parent ad0e7fca14
commit 111b2c3f53
12 changed files with 580 additions and 759 deletions

View file

@ -229,7 +229,7 @@ static const gchar templ_YUV_TO_YUV[] =
GST_CUDA_KERNEL_FUNC
"(cudaTextureObject_t tex0, cudaTextureObject_t tex1, cudaTextureObject_t tex2,\n"
" unsigned char *dst0, unsigned char *dst1, unsigned char *dst2,\n"
" int stride)\n"
" int stride, int uv_stride)\n"
"{\n"
" int x_pos = blockIdx.x * blockDim.x + threadIdx.x;\n"
" int y_pos = blockIdx.y * blockDim.y + threadIdx.y;\n"
@ -265,7 +265,7 @@ GST_CUDA_KERNEL_FUNC
" v = tmp;\n"
" }\n"
" write_chroma (dst1,\n"
" dst2, u, v, x_pos, y_pos, CHROMA_PSTRIDE, stride, MASK);\n"
" dst2, u, v, x_pos, y_pos, CHROMA_PSTRIDE, uv_stride, MASK);\n"
" }\n"
"}\n"
"\n"
@ -589,7 +589,7 @@ GST_CUDA_KERNEL_FUNC_TO_Y444
GST_CUDA_KERNEL_FUNC_Y444_TO_YUV
"(cudaTextureObject_t tex0, cudaTextureObject_t tex1, cudaTextureObject_t tex2,\n"
" unsigned char *dst0, unsigned char *dst1, unsigned char *dst2,\n"
" int stride)\n"
" int stride, int uv_stride)\n"
"{\n"
" int x_pos = blockIdx.x * blockDim.x + threadIdx.x;\n"
" int y_pos = blockIdx.y * blockDim.y + threadIdx.y;\n"
@ -626,7 +626,7 @@ GST_CUDA_KERNEL_FUNC_Y444_TO_YUV
" v = tmp;\n"
" }\n"
" write_chroma (dst1,\n"
" dst2, u, v, x_pos, y_pos, CHROMA_PSTRIDE, stride, MASK);\n"
" dst2, u, v, x_pos, y_pos, CHROMA_PSTRIDE, uv_stride, MASK);\n"
" }\n"
"}\n"
"\n"
@ -745,9 +745,10 @@ struct _GstCudaConverter
gchar *ptx;
GstCudaStageBuffer fallback_buffer[GST_VIDEO_MAX_PLANES];
gboolean (*convert) (GstCudaConverter * convert, const GstCudaMemory * src,
GstVideoInfo * in_info, GstCudaMemory * dst, GstVideoInfo * out_info,
CUstream cuda_stream);
/* *INDENT-OFF* */
gboolean (*convert) (GstCudaConverter * convert, GstVideoFrame * src_frame,
GstVideoFrame * dst_frame, CUstream cuda_stream);
/* *INDENT-ON* */
const CUdeviceptr src;
GstVideoInfo *cur_in_info;
@ -893,67 +894,25 @@ gst_cuda_converter_free (GstCudaConverter * convert)
g_free (convert);
}
/**
* gst_cuda_converter_frame:
* @convert: a #GstCudaConverter
* @src: a #GstCudaMemory
* @in_info: a #GstVideoInfo representing @src
* @dst: a #GstCudaMemory
* @out_info: a #GstVideoInfo representing @dst
* @cuda_stream: a #CUstream
*
* Convert the pixels of @src into @dest using @convert.
* Called without gst_cuda_context_push() and gst_cuda_context_pop() by caller
*/
gboolean
gst_cuda_converter_frame (GstCudaConverter * convert, const GstCudaMemory * src,
GstVideoInfo * in_info, GstCudaMemory * dst, GstVideoInfo * out_info,
CUstream cuda_stream)
gst_cuda_converter_convert_frame (GstCudaConverter * convert,
GstVideoFrame * src_frame, GstVideoFrame * dst_frame, CUstream cuda_stream)
{
gboolean ret;
g_return_val_if_fail (convert, FALSE);
g_return_val_if_fail (src, FALSE);
g_return_val_if_fail (in_info, FALSE);
g_return_val_if_fail (dst, FALSE);
g_return_val_if_fail (out_info, FALSE);
g_return_val_if_fail (src_frame, FALSE);
g_return_val_if_fail (dst_frame, FALSE);
gst_cuda_context_push (convert->cuda_ctx);
ret = gst_cuda_converter_frame_unlocked (convert,
src, in_info, dst, out_info, cuda_stream);
ret = convert->convert (convert, src_frame, dst_frame, cuda_stream);
gst_cuda_context_pop (NULL);
return ret;
}
/**
* gst_cuda_converter_frame_unlocked:
* @convert: a #GstCudaConverter
* @src: a #GstCudaMemory
* @in_info: a #GstVideoInfo representing @src
* @dst: a #GstCudaMemory
* @out_info: a #GstVideoInfo representing @dest
* @cuda_stream: a #CUstream
*
* Convert the pixels of @src into @dest using @convert.
* Caller should call this method after gst_cuda_context_push()
*/
gboolean
gst_cuda_converter_frame_unlocked (GstCudaConverter * convert,
const GstCudaMemory * src, GstVideoInfo * in_info, GstCudaMemory * dst,
GstVideoInfo * out_info, CUstream cuda_stream)
{
g_return_val_if_fail (convert, FALSE);
g_return_val_if_fail (src, FALSE);
g_return_val_if_fail (in_info, FALSE);
g_return_val_if_fail (dst, FALSE);
g_return_val_if_fail (out_info, FALSE);
return convert->convert (convert, src, in_info, dst, out_info, cuda_stream);
}
/* allocate fallback memory for texture alignment requirement */
static gboolean
convert_ensure_fallback_memory (GstCudaConverter * convert,
@ -1020,8 +979,8 @@ convert_create_texture_unchecked (const CUdeviceptr src, gint width,
}
static CUtexObject
convert_create_texture (GstCudaConverter * convert, const GstCudaMemory * src,
GstVideoInfo * info, guint plane, CUstream cuda_stream)
convert_create_texture (GstCudaConverter * convert, GstVideoFrame * src_frame,
guint plane, CUstream cuda_stream)
{
CUarray_format format = CU_AD_FORMAT_UNSIGNED_INT8;
guint channels = 1;
@ -1030,22 +989,23 @@ convert_create_texture (GstCudaConverter * convert, const GstCudaMemory * src,
CUresult cuda_ret;
CUfilter_mode mode;
if (GST_VIDEO_INFO_COMP_DEPTH (info, plane) > 8)
if (GST_VIDEO_FRAME_COMP_DEPTH (src_frame, plane) > 8)
format = CU_AD_FORMAT_UNSIGNED_INT16;
/* FIXME: more graceful method ? */
if (plane != 0 &&
GST_VIDEO_INFO_N_PLANES (info) != GST_VIDEO_INFO_N_COMPONENTS (info)) {
GST_VIDEO_FRAME_N_PLANES (src_frame) !=
GST_VIDEO_FRAME_N_COMPONENTS (src_frame)) {
channels = 2;
}
src_ptr = src->data + src->offset[plane];
stride = src->stride;
src_ptr = (CUdeviceptr) GST_VIDEO_FRAME_PLANE_DATA (src_frame, plane);
stride = GST_VIDEO_FRAME_PLANE_STRIDE (src_frame, plane);
if (convert->texture_alignment && (src_ptr % convert->texture_alignment)) {
CUDA_MEMCPY2D copy_params = { 0, };
if (!convert_ensure_fallback_memory (convert, info, plane))
if (!convert_ensure_fallback_memory (convert, &src_frame->info, plane))
return 0;
GST_LOG ("device memory was not aligned, copy to fallback memory");
@ -1057,9 +1017,9 @@ convert_create_texture (GstCudaConverter * convert, const GstCudaMemory * src,
copy_params.dstMemoryType = CU_MEMORYTYPE_DEVICE;
copy_params.dstPitch = convert->fallback_buffer[plane].cuda_stride;
copy_params.dstDevice = convert->fallback_buffer[plane].device_ptr;
copy_params.WidthInBytes = GST_VIDEO_INFO_COMP_WIDTH (info, plane)
* GST_VIDEO_INFO_COMP_PSTRIDE (info, plane);
copy_params.Height = GST_VIDEO_INFO_COMP_HEIGHT (info, plane);
copy_params.WidthInBytes = GST_VIDEO_FRAME_COMP_WIDTH (src_frame, plane)
* GST_VIDEO_FRAME_COMP_PSTRIDE (src_frame, plane);
copy_params.Height = GST_VIDEO_FRAME_COMP_HEIGHT (src_frame, plane);
cuda_ret = CuMemcpy2DAsync (&copy_params, cuda_stream);
if (!gst_cuda_result (cuda_ret)) {
@ -1079,27 +1039,26 @@ convert_create_texture (GstCudaConverter * convert, const GstCudaMemory * src,
mode = CU_TR_FILTER_MODE_LINEAR;
return convert_create_texture_unchecked (src_ptr,
GST_VIDEO_INFO_COMP_WIDTH (info, plane),
GST_VIDEO_INFO_COMP_HEIGHT (info, plane), channels, stride, format, mode,
cuda_stream);
GST_VIDEO_FRAME_COMP_WIDTH (src_frame, plane),
GST_VIDEO_FRAME_COMP_HEIGHT (src_frame, plane), channels, stride, format,
mode, cuda_stream);
}
/* main conversion function for YUV to YUV conversion */
static gboolean
convert_YUV_TO_YUV (GstCudaConverter * convert,
const GstCudaMemory * src, GstVideoInfo * in_info, GstCudaMemory * dst,
GstVideoInfo * out_info, CUstream cuda_stream)
convert_YUV_TO_YUV (GstCudaConverter * convert, GstVideoFrame * src_frame,
GstVideoFrame * dst_frame, CUstream cuda_stream)
{
CUtexObject texture[GST_VIDEO_MAX_PLANES] = { 0, };
CUresult cuda_ret;
gboolean ret = FALSE;
CUdeviceptr dst_ptr[GST_VIDEO_MAX_PLANES] = { 0, };
gint dst_stride;
gint dst_stride, dst_uv_stride;
gint width, height;
gint i;
gpointer kernel_args[] = { &texture[0], &texture[1], &texture[2],
&dst_ptr[0], &dst_ptr[1], &dst_ptr[2], &dst_stride
&dst_ptr[0], &dst_ptr[1], &dst_ptr[2], &dst_stride, &dst_uv_stride
};
/* conversion step
@ -1110,21 +1069,23 @@ convert_YUV_TO_YUV (GstCudaConverter * convert,
*/
/* map CUDA device memory to CUDA texture object */
for (i = 0; i < GST_VIDEO_INFO_N_PLANES (in_info); i++) {
texture[i] = convert_create_texture (convert, src, in_info, i, cuda_stream);
for (i = 0; i < GST_VIDEO_FRAME_N_PLANES (src_frame); i++) {
texture[i] = convert_create_texture (convert, src_frame, i, cuda_stream);
if (!texture[i]) {
GST_ERROR ("couldn't create texture for %d th plane", i);
goto done;
}
}
for (i = 0; i < GST_VIDEO_INFO_N_PLANES (out_info); i++)
dst_ptr[i] = dst->data + dst->offset[i];
for (i = 0; i < GST_VIDEO_FRAME_N_PLANES (dst_frame); i++) {
dst_ptr[i] = (CUdeviceptr) GST_VIDEO_FRAME_PLANE_DATA (dst_frame, i);
}
dst_stride = dst->stride;
dst_stride = GST_VIDEO_FRAME_PLANE_STRIDE (dst_frame, 0);
dst_uv_stride = GST_VIDEO_FRAME_PLANE_STRIDE (dst_frame, 1);
width = GST_VIDEO_INFO_WIDTH (out_info);
height = GST_VIDEO_INFO_HEIGHT (out_info);
width = GST_VIDEO_FRAME_WIDTH (dst_frame);
height = GST_VIDEO_FRAME_HEIGHT (dst_frame);
cuda_ret =
CuLaunchKernel (convert->kernel_func[0], DIV_UP (width, CUDA_BLOCK_X),
@ -1140,7 +1101,7 @@ convert_YUV_TO_YUV (GstCudaConverter * convert,
gst_cuda_result (CuStreamSynchronize (cuda_stream));
done:
for (i = 0; i < GST_VIDEO_INFO_N_PLANES (in_info); i++) {
for (i = 0; i < GST_VIDEO_FRAME_N_PLANES (src_frame); i++) {
if (texture[i])
gst_cuda_result (CuTexObjectDestroy (texture[i]));
}
@ -1150,9 +1111,8 @@ done:
/* main conversion function for YUV to RGB conversion */
static gboolean
convert_YUV_TO_RGB (GstCudaConverter * convert,
const GstCudaMemory * src, GstVideoInfo * in_info, GstCudaMemory * dst,
GstVideoInfo * out_info, CUstream cuda_stream)
convert_YUV_TO_RGB (GstCudaConverter * convert, GstVideoFrame * src_frame,
GstVideoFrame * dst_frame, CUstream cuda_stream)
{
CUtexObject texture[GST_VIDEO_MAX_PLANES] = { 0, };
CUresult cuda_ret;
@ -1174,19 +1134,19 @@ convert_YUV_TO_RGB (GstCudaConverter * convert,
*/
/* map CUDA device memory to CUDA texture object */
for (i = 0; i < GST_VIDEO_INFO_N_PLANES (in_info); i++) {
texture[i] = convert_create_texture (convert, src, in_info, i, cuda_stream);
for (i = 0; i < GST_VIDEO_FRAME_N_PLANES (src_frame); i++) {
texture[i] = convert_create_texture (convert, src_frame, i, cuda_stream);
if (!texture[i]) {
GST_ERROR ("couldn't create texture for %d th plane", i);
goto done;
}
}
dstRGB = dst->data;
dst_stride = dst->stride;
dstRGB = (CUdeviceptr) GST_VIDEO_FRAME_PLANE_DATA (dst_frame, 0);
dst_stride = GST_VIDEO_FRAME_PLANE_STRIDE (dst_frame, 0);
width = GST_VIDEO_INFO_WIDTH (out_info);
height = GST_VIDEO_INFO_HEIGHT (out_info);
width = GST_VIDEO_FRAME_WIDTH (dst_frame);
height = GST_VIDEO_FRAME_HEIGHT (dst_frame);
cuda_ret =
CuLaunchKernel (convert->kernel_func[0], DIV_UP (width, CUDA_BLOCK_X),
@ -1202,7 +1162,7 @@ convert_YUV_TO_RGB (GstCudaConverter * convert,
gst_cuda_result (CuStreamSynchronize (cuda_stream));
done:
for (i = 0; i < GST_VIDEO_INFO_N_PLANES (in_info); i++) {
for (i = 0; i < GST_VIDEO_FRAME_N_PLANES (src_frame); i++) {
if (texture[i])
gst_cuda_result (CuTexObjectDestroy (texture[i]));
}
@ -1212,7 +1172,7 @@ done:
static gboolean
convert_UNPACK_RGB (GstCudaConverter * convert, CUfunction kernel_func,
CUstream cuda_stream, const GstCudaMemory * src, GstVideoInfo * in_info,
CUstream cuda_stream, GstVideoFrame * src_frame,
CUdeviceptr dst, gint dst_stride, GstCudaRGBOrder * rgb_order)
{
CUdeviceptr srcRGB = 0;
@ -1227,12 +1187,12 @@ convert_UNPACK_RGB (GstCudaConverter * convert, CUfunction kernel_func,
&convert->in_rgb_order.B, &convert->in_rgb_order.A,
};
srcRGB = src->data;
src_stride = src->stride;
srcRGB = (CUdeviceptr) GST_VIDEO_FRAME_PLANE_DATA (src_frame, 0);
src_stride = GST_VIDEO_FRAME_PLANE_STRIDE (src_frame, 0);
width = GST_VIDEO_INFO_WIDTH (in_info);
height = GST_VIDEO_INFO_HEIGHT (in_info);
src_pstride = GST_VIDEO_INFO_COMP_PSTRIDE (in_info, 0);
width = GST_VIDEO_FRAME_WIDTH (src_frame);
height = GST_VIDEO_FRAME_HEIGHT (src_frame);
src_pstride = GST_VIDEO_FRAME_COMP_PSTRIDE (src_frame, 0);
cuda_ret =
CuLaunchKernel (kernel_func, DIV_UP (width, CUDA_BLOCK_X),
@ -1274,9 +1234,8 @@ convert_TO_Y444 (GstCudaConverter * convert, CUfunction kernel_func,
/* main conversion function for RGB to YUV conversion */
static gboolean
convert_RGB_TO_YUV (GstCudaConverter * convert,
const GstCudaMemory * src, GstVideoInfo * in_info, GstCudaMemory * dst,
GstVideoInfo * out_info, CUstream cuda_stream)
convert_RGB_TO_YUV (GstCudaConverter * convert, GstVideoFrame * src_frame,
GstVideoFrame * dst_frame, CUstream cuda_stream)
{
CUtexObject texture = 0;
CUtexObject yuv_texture[3] = { 0, };
@ -1285,7 +1244,7 @@ convert_RGB_TO_YUV (GstCudaConverter * convert,
gboolean ret = FALSE;
gint in_width, in_height;
gint out_width, out_height;
gint dst_stride;
gint dst_stride, dst_uv_stride;
CUarray_format format = CU_AD_FORMAT_UNSIGNED_INT8;
CUfilter_mode mode = CU_TR_FILTER_MODE_POINT;
gint pstride = 1;
@ -1293,7 +1252,7 @@ convert_RGB_TO_YUV (GstCudaConverter * convert,
gint i;
gpointer kernel_args[] = { &yuv_texture[0], &yuv_texture[1], &yuv_texture[2],
&dst_ptr[0], &dst_ptr[1], &dst_ptr[2], &dst_stride
&dst_ptr[0], &dst_ptr[1], &dst_ptr[2], &dst_stride, &dst_uv_stride
};
/* conversion step
@ -1304,21 +1263,22 @@ convert_RGB_TO_YUV (GstCudaConverter * convert,
* the CUDA kernel function
*/
if (!convert_UNPACK_RGB (convert, convert->kernel_func[0], cuda_stream,
src, in_info, convert->unpack_surface.device_ptr,
src_frame, convert->unpack_surface.device_ptr,
convert->unpack_surface.cuda_stride, &convert->in_rgb_order)) {
GST_ERROR ("could not unpack input rgb");
goto done;
}
in_width = GST_VIDEO_INFO_WIDTH (in_info);
in_height = GST_VIDEO_INFO_HEIGHT (in_info);
in_width = GST_VIDEO_FRAME_WIDTH (src_frame);
in_height = GST_VIDEO_FRAME_HEIGHT (src_frame);
out_width = GST_VIDEO_INFO_WIDTH (out_info);
out_height = GST_VIDEO_INFO_HEIGHT (out_info);
dst_stride = dst->stride;
out_width = GST_VIDEO_FRAME_WIDTH (dst_frame);
out_height = GST_VIDEO_FRAME_HEIGHT (dst_frame);
dst_stride = GST_VIDEO_FRAME_PLANE_STRIDE (dst_frame, 0);
dst_uv_stride = GST_VIDEO_FRAME_PLANE_STRIDE (dst_frame, 1);
if (GST_VIDEO_INFO_COMP_DEPTH (in_info, 0) > 8) {
if (GST_VIDEO_FRAME_COMP_DEPTH (src_frame, 0) > 8) {
pstride = 2;
bitdepth = 16;
format = CU_AD_FORMAT_UNSIGNED_INT16;
@ -1365,8 +1325,8 @@ convert_RGB_TO_YUV (GstCudaConverter * convert,
}
}
for (i = 0; i < GST_VIDEO_INFO_N_PLANES (out_info); i++)
dst_ptr[i] = dst->data + dst->offset[i];
for (i = 0; i < GST_VIDEO_FRAME_N_PLANES (dst_frame); i++)
dst_ptr[i] = (CUdeviceptr) GST_VIDEO_FRAME_PLANE_DATA (dst_frame, i);
cuda_ret =
CuLaunchKernel (convert->kernel_func[2], DIV_UP (out_width, CUDA_BLOCK_X),
@ -1394,9 +1354,8 @@ done:
/* main conversion function for RGB to RGB conversion */
static gboolean
convert_RGB_TO_RGB (GstCudaConverter * convert,
const GstCudaMemory * src, GstVideoInfo * in_info, GstCudaMemory * dst,
GstVideoInfo * out_info, CUstream cuda_stream)
convert_RGB_TO_RGB (GstCudaConverter * convert, GstVideoFrame * src_frame,
GstVideoFrame * dst_frame, CUstream cuda_stream)
{
CUtexObject texture = 0;
CUresult cuda_ret;
@ -1418,23 +1377,23 @@ convert_RGB_TO_RGB (GstCudaConverter * convert,
*/
if (!convert_UNPACK_RGB (convert, convert->kernel_func[0], cuda_stream,
src, in_info, convert->unpack_surface.device_ptr,
src_frame, convert->unpack_surface.device_ptr,
convert->unpack_surface.cuda_stride, &convert->in_rgb_order)) {
GST_ERROR ("could not unpack input rgb");
goto done;
}
in_width = GST_VIDEO_INFO_WIDTH (in_info);
in_height = GST_VIDEO_INFO_HEIGHT (in_info);
in_width = GST_VIDEO_FRAME_WIDTH (src_frame);
in_height = GST_VIDEO_FRAME_HEIGHT (src_frame);
out_width = GST_VIDEO_INFO_WIDTH (out_info);
out_height = GST_VIDEO_INFO_HEIGHT (out_info);
out_width = GST_VIDEO_FRAME_WIDTH (dst_frame);
out_height = GST_VIDEO_FRAME_HEIGHT (dst_frame);
dstRGB = dst->data;
dst_stride = dst->stride;
dstRGB = (CUdeviceptr) GST_VIDEO_FRAME_PLANE_DATA (dst_frame, 0);
dst_stride = GST_VIDEO_FRAME_PLANE_STRIDE (dst_frame, 0);
if (GST_VIDEO_INFO_COMP_DEPTH (in_info, 0) > 8)
if (GST_VIDEO_FRAME_COMP_DEPTH (src_frame, 0) > 8)
format = CU_AD_FORMAT_UNSIGNED_INT16;
/* Use h/w linear interpolation only when resize is required.

View file

@ -34,21 +34,11 @@ GstCudaConverter * gst_cuda_converter_new (GstVideoInfo * in_info,
void gst_cuda_converter_free (GstCudaConverter * convert);
gboolean gst_cuda_converter_frame (GstCudaConverter * convert,
const GstCudaMemory * src,
GstVideoInfo * in_info,
GstCudaMemory * dst,
GstVideoInfo * out_info,
gboolean gst_cuda_converter_convert_frame (GstCudaConverter * convert,
GstVideoFrame * src_frame,
GstVideoFrame * dst_frame,
CUstream cuda_stream);
gboolean gst_cuda_converter_frame_unlocked (GstCudaConverter * convert,
const GstCudaMemory * src,
GstVideoInfo * in_info,
GstCudaMemory * dst,
GstVideoInfo * out_info,
CUstream cuda_stream);
G_END_DECLS
#endif /* __GST_CUDA_CONVERTER_H__ */

View file

@ -168,47 +168,30 @@ gst_cuda_base_filter_propose_allocation (GstBaseTransform * trans,
if (gst_query_get_n_allocation_pools (query) == 0) {
GstStructure *config;
GstVideoAlignment align;
GstAllocationParams params = { 0, 31, 0, 0, };
GstAllocator *allocator = NULL;
gint i;
pool = gst_cuda_buffer_pool_new (ctrans->context);
config = gst_buffer_pool_get_config (pool);
gst_video_alignment_reset (&align);
for (i = 0; i < GST_VIDEO_INFO_N_PLANES (&info); i++) {
align.stride_align[i] = 31;
}
gst_video_info_align (&info, &align);
gst_buffer_pool_config_add_option (config,
GST_BUFFER_POOL_OPTION_VIDEO_META);
gst_buffer_pool_config_add_option (config,
GST_BUFFER_POOL_OPTION_VIDEO_ALIGNMENT);
gst_buffer_pool_config_set_video_alignment (config, &align);
size = GST_VIDEO_INFO_SIZE (&info);
gst_buffer_pool_config_set_params (config, caps, size, 0, 0);
gst_query_add_allocation_meta (query, GST_VIDEO_META_API_TYPE, NULL);
gst_query_add_allocation_pool (query, pool, size, 0, 0);
if (gst_buffer_pool_config_get_allocator (config, &allocator, &params)) {
if (params.align < 31)
params.align = 31;
gst_query_add_allocation_param (query, allocator, &params);
gst_buffer_pool_config_set_allocator (config, allocator, &params);
}
if (!gst_buffer_pool_set_config (pool, config)) {
GST_ERROR_OBJECT (ctrans, "failed to set config");
gst_object_unref (pool);
return FALSE;
}
/* Get updated size by cuda buffer pool */
config = gst_buffer_pool_get_config (pool);
gst_buffer_pool_config_get_params (config, NULL, &size, NULL, NULL);
gst_structure_free (config);
gst_query_add_allocation_pool (query, pool, size, 0, 0);
gst_object_unref (pool);
}
@ -265,6 +248,12 @@ gst_cuda_base_filter_decide_allocation (GstBaseTransform * trans,
gst_buffer_pool_config_add_option (config, GST_BUFFER_POOL_OPTION_VIDEO_META);
gst_buffer_pool_config_set_params (config, outcaps, size, min, max);
gst_buffer_pool_set_config (pool, config);
/* Get updated size by cuda buffer pool */
config = gst_buffer_pool_get_config (pool);
gst_buffer_pool_config_get_params (config, NULL, &size, NULL, NULL);
gst_structure_free (config);
if (update_pool)
gst_query_set_nth_allocation_pool (query, 0, pool, size, min, max);
else
@ -285,8 +274,6 @@ gst_cuda_base_filter_transform (GstBaseTransform * trans,
GstVideoFrame in_frame, out_frame;
GstFlowReturn ret = GST_FLOW_OK;
GstMemory *mem;
GstCudaMemory *in_cuda_mem = NULL;
GstCudaMemory *out_cuda_mem = NULL;
if (gst_buffer_n_memory (inbuf) != 1) {
GST_ERROR_OBJECT (self, "Invalid input buffer");
@ -299,8 +286,6 @@ gst_cuda_base_filter_transform (GstBaseTransform * trans,
return GST_FLOW_ERROR;
}
in_cuda_mem = GST_CUDA_MEMORY_CAST (mem);
if (gst_buffer_n_memory (outbuf) != 1) {
GST_ERROR_OBJECT (self, "Invalid output buffer");
return GST_FLOW_ERROR;
@ -312,8 +297,6 @@ gst_cuda_base_filter_transform (GstBaseTransform * trans,
return GST_FLOW_ERROR;
}
out_cuda_mem = GST_CUDA_MEMORY_CAST (mem);
if (!gst_video_frame_map (&in_frame, &ctrans->in_info, inbuf,
GST_MAP_READ | GST_MAP_CUDA)) {
GST_ERROR_OBJECT (self, "Failed to map input buffer");
@ -327,9 +310,8 @@ gst_cuda_base_filter_transform (GstBaseTransform * trans,
return GST_FLOW_ERROR;
}
if (!gst_cuda_converter_frame (self->converter,
in_cuda_mem, &ctrans->in_info,
out_cuda_mem, &ctrans->out_info, ctrans->cuda_stream)) {
if (!gst_cuda_converter_convert_frame (self->converter, &in_frame, &out_frame,
ctrans->cuda_stream)) {
GST_ERROR_OBJECT (self, "Failed to convert frame");
ret = GST_FLOW_ERROR;
}

View file

@ -30,11 +30,8 @@ GST_DEBUG_CATEGORY_STATIC (gst_cuda_buffer_pool_debug);
struct _GstCudaBufferPoolPrivate
{
GstAllocator *allocator;
GstCudaAllocator *allocator;
GstVideoInfo info;
gboolean add_videometa;
gboolean need_alignment;
GstCudaAllocationParams params;
};
#define gst_cuda_buffer_pool_parent_class parent_class
@ -44,8 +41,7 @@ G_DEFINE_TYPE_WITH_PRIVATE (GstCudaBufferPool, gst_cuda_buffer_pool,
static const gchar **
gst_cuda_buffer_pool_get_options (GstBufferPool * pool)
{
static const gchar *options[] = { GST_BUFFER_POOL_OPTION_VIDEO_META,
GST_BUFFER_POOL_OPTION_VIDEO_ALIGNMENT, NULL
static const gchar *options[] = { GST_BUFFER_POOL_OPTION_VIDEO_META, NULL
};
return options;
@ -54,153 +50,86 @@ gst_cuda_buffer_pool_get_options (GstBufferPool * pool)
static gboolean
gst_cuda_buffer_pool_set_config (GstBufferPool * pool, GstStructure * config)
{
GstCudaBufferPool *cuda_pool = GST_CUDA_BUFFER_POOL_CAST (pool);
GstCudaBufferPoolPrivate *priv = cuda_pool->priv;
GstCudaBufferPool *self = GST_CUDA_BUFFER_POOL (pool);
GstCudaBufferPoolPrivate *priv = self->priv;
GstCaps *caps = NULL;
guint size, min_buffers, max_buffers;
guint max_align, n;
GstAllocator *allocator = NULL;
GstAllocationParams *params = (GstAllocationParams *) & priv->params;
GstVideoInfo *info = &priv->params.info;
GstVideoInfo info;
GstMemory *mem;
GstCudaMemory *cmem;
if (!gst_buffer_pool_config_get_params (config, &caps, &size, &min_buffers,
&max_buffers))
goto wrong_config;
if (caps == NULL)
goto no_caps;
if (!gst_buffer_pool_config_get_allocator (config, &allocator, params))
goto wrong_config;
/* now parse the caps from the config */
if (!gst_video_info_from_caps (info, caps))
goto wrong_caps;
GST_LOG_OBJECT (pool, "%dx%d, caps %" GST_PTR_FORMAT,
GST_VIDEO_INFO_WIDTH (info), GST_VIDEO_INFO_HEIGHT (info), caps);
gst_clear_object (&priv->allocator);
if (allocator) {
if (!GST_IS_CUDA_ALLOCATOR (allocator)) {
goto wrong_allocator;
} else {
priv->allocator = gst_object_ref (allocator);
}
} else {
allocator = priv->allocator = gst_cuda_allocator_new (cuda_pool->context);
if (G_UNLIKELY (priv->allocator == NULL))
goto no_allocator;
}
priv->add_videometa = gst_buffer_pool_config_has_option (config,
GST_BUFFER_POOL_OPTION_VIDEO_META);
priv->need_alignment = gst_buffer_pool_config_has_option (config,
GST_BUFFER_POOL_OPTION_VIDEO_ALIGNMENT);
max_align = params->align;
/* do memory align */
if (priv->need_alignment && priv->add_videometa) {
GstVideoAlignment valign;
gst_buffer_pool_config_get_video_alignment (config, &valign);
for (n = 0; n < GST_VIDEO_MAX_PLANES; ++n)
max_align |= valign.stride_align[n];
for (n = 0; n < GST_VIDEO_MAX_PLANES; ++n)
valign.stride_align[n] = max_align;
if (!gst_video_info_align (info, &valign))
goto failed_to_align;
gst_buffer_pool_config_set_video_alignment (config, &valign);
}
if (params->align < max_align) {
GST_WARNING_OBJECT (pool, "allocation params alignment %u is smaller "
"than the max specified video stride alignment %u, fixing",
(guint) params->align, max_align);
params->align = max_align;
gst_buffer_pool_config_set_allocator (config, allocator, params);
}
gst_buffer_pool_config_set_params (config, caps, GST_VIDEO_INFO_SIZE (info),
min_buffers, max_buffers);
return GST_BUFFER_POOL_CLASS (parent_class)->set_config (pool, config);
/* ERRORS */
wrong_config:
{
GST_WARNING_OBJECT (pool, "invalid config");
&max_buffers)) {
GST_WARNING_OBJECT (self, "invalid config");
return FALSE;
}
no_caps:
{
if (!caps) {
GST_WARNING_OBJECT (pool, "no caps in config");
return FALSE;
}
wrong_caps:
{
GST_WARNING_OBJECT (pool,
"failed getting geometry from caps %" GST_PTR_FORMAT, caps);
if (!gst_video_info_from_caps (&info, caps)) {
GST_WARNING_OBJECT (self, "Failed to convert caps to video-info");
return FALSE;
}
no_allocator:
{
GST_WARNING_OBJECT (pool, "Could not create new CUDA allocator");
gst_clear_object (&priv->allocator);
priv->allocator = (GstCudaAllocator *)
gst_allocator_find (GST_CUDA_MEMORY_TYPE_NAME);
if (!priv->allocator) {
GST_WARNING_OBJECT (self, "CudaAllocator is unavailable");
return FALSE;
}
wrong_allocator:
{
GST_WARNING_OBJECT (pool, "Incorrect allocator type for this pool");
return FALSE;
}
failed_to_align:
{
GST_WARNING_OBJECT (pool, "Failed to align");
mem = gst_cuda_allocator_alloc (priv->allocator, self->context, &info);
if (!mem) {
GST_WARNING_OBJECT (self, "Failed to allocate memory");
return FALSE;
}
cmem = GST_CUDA_MEMORY_CAST (mem);
gst_buffer_pool_config_set_params (config, caps,
GST_VIDEO_INFO_SIZE (&cmem->info), min_buffers, max_buffers);
priv->info = info;
gst_memory_unref (mem);
return GST_BUFFER_POOL_CLASS (parent_class)->set_config (pool, config);
}
static GstFlowReturn
gst_cuda_buffer_pool_alloc (GstBufferPool * pool, GstBuffer ** buffer,
GstBufferPoolAcquireParams * params)
{
GstCudaBufferPool *cuda_pool = GST_CUDA_BUFFER_POOL_CAST (pool);
GstCudaBufferPoolPrivate *priv = cuda_pool->priv;
GstVideoInfo *info;
GstBuffer *cuda;
GstCudaBufferPool *self = GST_CUDA_BUFFER_POOL_CAST (pool);
GstCudaBufferPoolPrivate *priv = self->priv;
GstVideoInfo *info = &priv->info;
GstBuffer *buf;
GstMemory *mem;
GstCudaMemory *cmem;
info = &priv->params.info;
cuda = gst_buffer_new ();
mem = gst_cuda_allocator_alloc (GST_ALLOCATOR_CAST (priv->allocator),
GST_VIDEO_INFO_SIZE (info), &priv->params);
if (mem == NULL) {
gst_buffer_unref (cuda);
mem = gst_cuda_allocator_alloc (priv->allocator, self->context, &priv->info);
if (!mem) {
GST_WARNING_OBJECT (pool, "Cannot create CUDA memory");
return GST_FLOW_ERROR;
}
gst_buffer_append_memory (cuda, mem);
if (priv->add_videometa) {
GST_DEBUG_OBJECT (pool, "adding GstVideoMeta");
gst_buffer_add_video_meta_full (cuda, GST_VIDEO_FRAME_FLAG_NONE,
GST_VIDEO_INFO_FORMAT (info), GST_VIDEO_INFO_WIDTH (info),
GST_VIDEO_INFO_HEIGHT (info), GST_VIDEO_INFO_N_PLANES (info),
info->offset, info->stride);
}
cmem = GST_CUDA_MEMORY_CAST (mem);
*buffer = cuda;
buf = gst_buffer_new ();
gst_buffer_append_memory (buf, mem);
GST_DEBUG_OBJECT (pool, "adding GstVideoMeta");
gst_buffer_add_video_meta_full (buf, GST_VIDEO_FRAME_FLAG_NONE,
GST_VIDEO_INFO_FORMAT (info), GST_VIDEO_INFO_WIDTH (info),
GST_VIDEO_INFO_HEIGHT (info), GST_VIDEO_INFO_N_PLANES (info),
cmem->info.offset, cmem->info.stride);
*buffer = buf;
return GST_FLOW_OK;
}
@ -208,44 +137,41 @@ gst_cuda_buffer_pool_alloc (GstBufferPool * pool, GstBuffer ** buffer,
GstBufferPool *
gst_cuda_buffer_pool_new (GstCudaContext * context)
{
GstCudaBufferPool *pool;
GstCudaBufferPool *self;
pool = g_object_new (GST_TYPE_CUDA_BUFFER_POOL, NULL);
gst_object_ref_sink (pool);
g_return_val_if_fail (GST_IS_CUDA_CONTEXT (context), NULL);
pool->context = gst_object_ref (context);
self = g_object_new (GST_TYPE_CUDA_BUFFER_POOL, NULL);
gst_object_ref_sink (self);
GST_LOG_OBJECT (pool, "new CUDA buffer pool %p", pool);
self->context = gst_object_ref (context);
return GST_BUFFER_POOL_CAST (pool);
return GST_BUFFER_POOL_CAST (self);
}
static void
gst_cuda_buffer_pool_dispose (GObject * object)
{
GstCudaBufferPool *pool = GST_CUDA_BUFFER_POOL_CAST (object);
GstCudaBufferPoolPrivate *priv = pool->priv;
GST_LOG_OBJECT (pool, "finalize CUDA buffer pool %p", pool);
GstCudaBufferPool *self = GST_CUDA_BUFFER_POOL_CAST (object);
GstCudaBufferPoolPrivate *priv = self->priv;
gst_clear_object (&priv->allocator);
gst_clear_object (&pool->context);
gst_clear_object (&self->context);
G_OBJECT_CLASS (parent_class)->dispose (object);
}
static void
gst_cuda_buffer_pool_class_init (GstCudaBufferPoolClass * klass)
{
GObjectClass *gobject_class = (GObjectClass *) klass;
GstBufferPoolClass *gstbufferpool_class = (GstBufferPoolClass *) klass;
GstBufferPoolClass *bufferpool_class = (GstBufferPoolClass *) klass;
gobject_class->dispose = gst_cuda_buffer_pool_dispose;
gstbufferpool_class->get_options = gst_cuda_buffer_pool_get_options;
gstbufferpool_class->set_config = gst_cuda_buffer_pool_set_config;
gstbufferpool_class->alloc_buffer = gst_cuda_buffer_pool_alloc;
bufferpool_class->get_options = gst_cuda_buffer_pool_get_options;
bufferpool_class->set_config = gst_cuda_buffer_pool_set_config;
bufferpool_class->alloc_buffer = gst_cuda_buffer_pool_alloc;
GST_DEBUG_CATEGORY_INIT (gst_cuda_buffer_pool_debug, "cudabufferpool", 0,
"CUDA Buffer Pool");

View file

@ -180,10 +180,6 @@ gst_cuda_download_propose_allocation (GstBaseTransform * trans,
if (gst_query_get_n_allocation_pools (query) == 0) {
GstCapsFeatures *features;
GstStructure *config;
GstVideoAlignment align;
GstAllocationParams params = { 0, 31, 0, 0, };
GstAllocator *allocator = NULL;
gint i;
features = gst_caps_get_features (caps, 0);
@ -197,38 +193,25 @@ gst_cuda_download_propose_allocation (GstBaseTransform * trans,
config = gst_buffer_pool_get_config (pool);
gst_video_alignment_reset (&align);
for (i = 0; i < GST_VIDEO_INFO_N_PLANES (&info); i++) {
align.stride_align[i] = 31;
}
gst_video_info_align (&info, &align);
gst_buffer_pool_config_add_option (config,
GST_BUFFER_POOL_OPTION_VIDEO_META);
gst_buffer_pool_config_add_option (config,
GST_BUFFER_POOL_OPTION_VIDEO_ALIGNMENT);
gst_buffer_pool_config_set_video_alignment (config, &align);
size = GST_VIDEO_INFO_SIZE (&info);
gst_buffer_pool_config_set_params (config, caps, size, 0, 0);
gst_query_add_allocation_meta (query, GST_VIDEO_META_API_TYPE, NULL);
gst_query_add_allocation_pool (query, pool, size, 0, 0);
if (gst_buffer_pool_config_get_allocator (config, &allocator, &params)) {
if (params.align < 31)
params.align = 31;
gst_query_add_allocation_param (query, allocator, &params);
gst_buffer_pool_config_set_allocator (config, allocator, &params);
}
if (!gst_buffer_pool_set_config (pool, config)) {
GST_ERROR_OBJECT (ctrans, "failed to set config");
gst_object_unref (pool);
return FALSE;
}
/* Get updated size by cuda buffer pool */
config = gst_buffer_pool_get_config (pool);
gst_buffer_pool_config_get_params (config, NULL, &size, NULL, NULL);
gst_structure_free (config);
gst_query_add_allocation_pool (query, pool, size, 0, 0);
gst_object_unref (pool);
}
@ -295,6 +278,12 @@ gst_cuda_download_decide_allocation (GstBaseTransform * trans, GstQuery * query)
gst_buffer_pool_config_add_option (config, GST_BUFFER_POOL_OPTION_VIDEO_META);
gst_buffer_pool_config_set_params (config, outcaps, size, min, max);
gst_buffer_pool_set_config (pool, config);
/* Get updated size by cuda buffer pool */
config = gst_buffer_pool_get_config (pool);
gst_buffer_pool_config_get_params (config, NULL, &size, NULL, NULL);
gst_structure_free (config);
if (update_pool)
gst_query_set_nth_allocation_pool (query, 0, pool, size, min, max);
else

View file

@ -26,20 +26,33 @@
#include <string.h>
GST_DEBUG_CATEGORY_STATIC (cudaallocator_debug);
#define GST_CAT_DEFAULT cudaallocator_debug
GST_DEBUG_CATEGORY_STATIC (GST_CAT_MEMORY);
GST_DEBUG_CATEGORY_STATIC (cuda_allocator_debug);
#define GST_CAT_DEFAULT cuda_allocator_debug
static GstAllocator *_gst_cuda_allocator = NULL;
struct _GstCudaMemoryPrivate
{
CUdeviceptr data;
void *staging;
/* params used for cuMemAllocPitch */
gsize pitch;
guint width_in_bytes;
guint height;
GMutex lock;
};
#define gst_cuda_allocator_parent_class parent_class
G_DEFINE_TYPE (GstCudaAllocator, gst_cuda_allocator, GST_TYPE_ALLOCATOR);
static void gst_cuda_allocator_dispose (GObject * object);
static void gst_cuda_allocator_free (GstAllocator * allocator,
GstMemory * memory);
static gpointer cuda_mem_map (GstCudaMemory * mem, gsize maxsize,
static gpointer cuda_mem_map (GstMemory * mem, gsize maxsize,
GstMapFlags flags);
static void cuda_mem_unmap_full (GstCudaMemory * mem, GstMapInfo * info);
static void cuda_mem_unmap_full (GstMemory * mem, GstMapInfo * info);
static GstMemory *cuda_mem_copy (GstMemory * mem, gssize offset, gssize size);
static GstMemory *
@ -52,17 +65,13 @@ gst_cuda_allocator_dummy_alloc (GstAllocator * allocator, gsize size,
static void
gst_cuda_allocator_class_init (GstCudaAllocatorClass * klass)
{
GObjectClass *gobject_class = G_OBJECT_CLASS (klass);
GstAllocatorClass *allocator_class = GST_ALLOCATOR_CLASS (klass);
gobject_class->dispose = gst_cuda_allocator_dispose;
allocator_class->alloc = GST_DEBUG_FUNCPTR (gst_cuda_allocator_dummy_alloc);
allocator_class->free = GST_DEBUG_FUNCPTR (gst_cuda_allocator_free);
GST_DEBUG_CATEGORY_INIT (cudaallocator_debug, "cudaallocator", 0,
GST_DEBUG_CATEGORY_INIT (cuda_allocator_debug, "cudaallocator", 0,
"CUDA Allocator");
GST_DEBUG_CATEGORY_GET (GST_CAT_MEMORY, "GST_MEMORY");
}
static void
@ -74,337 +83,273 @@ gst_cuda_allocator_init (GstCudaAllocator * allocator)
alloc->mem_type = GST_CUDA_MEMORY_TYPE_NAME;
alloc->mem_map = (GstMemoryMapFunction) cuda_mem_map;
alloc->mem_unmap_full = (GstMemoryUnmapFullFunction) cuda_mem_unmap_full;
alloc->mem_copy = (GstMemoryCopyFunction) cuda_mem_copy;
alloc->mem_map = cuda_mem_map;
alloc->mem_unmap_full = cuda_mem_unmap_full;
alloc->mem_copy = cuda_mem_copy;
GST_OBJECT_FLAG_SET (allocator, GST_ALLOCATOR_FLAG_CUSTOM_ALLOC);
}
static void
gst_cuda_allocator_dispose (GObject * object)
static GstMemory *
gst_cuda_allocator_alloc_internal (GstCudaAllocator * self,
GstCudaContext * context, const GstVideoInfo * info,
guint width_in_bytes, guint alloc_height)
{
GstCudaAllocator *self = GST_CUDA_ALLOCATOR_CAST (object);
GST_DEBUG_OBJECT (self, "dispose");
gst_clear_object (&self->context);
G_OBJECT_CLASS (parent_class)->dispose (object);
}
GstMemory *
gst_cuda_allocator_alloc (GstAllocator * allocator, gsize size,
GstCudaAllocationParams * params)
{
GstCudaAllocator *self = GST_CUDA_ALLOCATOR_CAST (allocator);
gsize maxsize = size + params->parent.prefix + params->parent.padding;
gsize align = params->parent.align;
gsize offset = params->parent.prefix;
GstMemoryFlags flags = params->parent.flags;
GstCudaMemoryPrivate *priv;
GstCudaMemory *mem;
CUdeviceptr data;
gboolean ret = FALSE;
GstCudaMemory *mem;
GstVideoInfo *info = &params->info;
gint i;
guint width, height;
gsize stride, plane_offset;
gsize pitch;
guint height = GST_VIDEO_INFO_HEIGHT (info);
GstVideoInfo *alloc_info;
if (!gst_cuda_context_push (self->context))
if (!gst_cuda_context_push (context))
return NULL;
/* ensure configured alignment */
align |= gst_memory_alignment;
/* allocate more to compensate for alignment */
maxsize += align;
GST_CAT_DEBUG_OBJECT (GST_CAT_MEMORY, self, "allocate new cuda memory");
width = GST_VIDEO_INFO_COMP_WIDTH (info, 0) *
GST_VIDEO_INFO_COMP_PSTRIDE (info, 0);
height = 0;
for (i = 0; i < GST_VIDEO_INFO_N_PLANES (info); i++)
height += GST_VIDEO_INFO_COMP_HEIGHT (info, i);
ret = gst_cuda_result (CuMemAllocPitch (&data, &stride, width, height, 16));
ret = gst_cuda_result (CuMemAllocPitch (&data, &pitch, width_in_bytes,
alloc_height, 16));
gst_cuda_context_pop (NULL);
if (G_UNLIKELY (!ret)) {
GST_CAT_ERROR_OBJECT (GST_CAT_MEMORY, self, "CUDA allocation failure");
if (!ret) {
GST_ERROR_OBJECT (self, "Failed to allocate CUDA memory");
return NULL;
}
mem = g_new0 (GstCudaMemory, 1);
g_mutex_init (&mem->lock);
mem->data = data;
mem->alloc_params = *params;
mem->stride = stride;
mem->priv = priv = g_new0 (GstCudaMemoryPrivate, 1);
plane_offset = 0;
for (i = 0; i < GST_VIDEO_INFO_N_PLANES (info); i++) {
mem->offset[i] = plane_offset;
plane_offset += stride * GST_VIDEO_INFO_COMP_HEIGHT (info, i);
priv->data = data;
priv->pitch = pitch;
priv->width_in_bytes = width_in_bytes;
priv->height = alloc_height;
g_mutex_init (&priv->lock);
mem->context = gst_object_ref (context);
mem->info = *info;
mem->info.size = pitch * alloc_height;
alloc_info = &mem->info;
gst_memory_init (GST_MEMORY_CAST (mem), 0, GST_ALLOCATOR_CAST (self),
NULL, alloc_info->size, 0, 0, alloc_info->size);
switch (GST_VIDEO_INFO_FORMAT (info)) {
case GST_VIDEO_FORMAT_I420:
case GST_VIDEO_FORMAT_YV12:
case GST_VIDEO_FORMAT_I420_10LE:
/* we are wasting space yes, but required so that this memory
* can be used in kernel function */
alloc_info->stride[0] = pitch;
alloc_info->stride[1] = pitch;
alloc_info->stride[2] = pitch;
alloc_info->offset[0] = 0;
alloc_info->offset[1] = alloc_info->stride[0] * height;
alloc_info->offset[2] = alloc_info->offset[1] +
alloc_info->stride[1] * height / 2;
break;
case GST_VIDEO_FORMAT_NV12:
case GST_VIDEO_FORMAT_NV21:
case GST_VIDEO_FORMAT_P010_10LE:
case GST_VIDEO_FORMAT_P016_LE:
alloc_info->stride[0] = pitch;
alloc_info->stride[1] = pitch;
alloc_info->offset[0] = 0;
alloc_info->offset[1] = alloc_info->stride[0] * height;
break;
case GST_VIDEO_FORMAT_Y444:
case GST_VIDEO_FORMAT_Y444_16LE:
alloc_info->stride[0] = pitch;
alloc_info->stride[1] = pitch;
alloc_info->stride[2] = pitch;
alloc_info->offset[0] = 0;
alloc_info->offset[1] = alloc_info->stride[0] * height;
alloc_info->offset[2] = alloc_info->offset[1] * 2;
break;
case GST_VIDEO_FORMAT_BGRA:
case GST_VIDEO_FORMAT_RGBA:
case GST_VIDEO_FORMAT_RGBx:
case GST_VIDEO_FORMAT_BGRx:
case GST_VIDEO_FORMAT_ARGB:
case GST_VIDEO_FORMAT_ABGR:
case GST_VIDEO_FORMAT_RGB:
case GST_VIDEO_FORMAT_BGR:
case GST_VIDEO_FORMAT_BGR10A2_LE:
case GST_VIDEO_FORMAT_RGB10A2_LE:
alloc_info->stride[0] = pitch;
alloc_info->offset[0] = 0;
break;
default:
GST_ERROR_OBJECT (self, "Unexpected format %s",
gst_video_format_to_string (GST_VIDEO_INFO_FORMAT (info)));
g_assert_not_reached ();
gst_memory_unref (GST_MEMORY_CAST (mem));
return NULL;
}
mem->context = gst_object_ref (self->context);
gst_memory_init (GST_MEMORY_CAST (mem),
flags, GST_ALLOCATOR_CAST (self), NULL, maxsize, align, offset, size);
return GST_MEMORY_CAST (mem);
}
static void
gst_cuda_allocator_free (GstAllocator * allocator, GstMemory * memory)
{
GstCudaAllocator *self = GST_CUDA_ALLOCATOR_CAST (allocator);
GstCudaMemory *mem = GST_CUDA_MEMORY_CAST (memory);
GstCudaMemoryPrivate *priv = mem->priv;
GST_CAT_DEBUG_OBJECT (GST_CAT_MEMORY, allocator, "free cuda memory");
g_mutex_clear (&mem->lock);
gst_cuda_context_push (self->context);
if (mem->data)
gst_cuda_result (CuMemFree (mem->data));
if (mem->map_alloc_data)
gst_cuda_result (CuMemFreeHost (mem->map_alloc_data));
gst_cuda_context_push (mem->context);
if (priv->data)
gst_cuda_result (CuMemFree (priv->data));
if (priv->staging)
gst_cuda_result (CuMemFreeHost (priv->staging));
gst_cuda_context_pop (NULL);
gst_object_unref (mem->context);
g_mutex_clear (&priv->lock);
g_free (mem->priv);
g_free (mem);
}
/* called with lock */
static gboolean
gst_cuda_memory_upload_transfer (GstCudaMemory * mem)
gst_cuda_memory_upload (GstCudaAllocator * self, GstCudaMemory * mem)
{
gint i;
GstVideoInfo *info = &mem->alloc_params.info;
GstCudaMemoryPrivate *priv = mem->priv;
gboolean ret = TRUE;
CUDA_MEMCPY2D param = { 0, };
if (!mem->map_data) {
GST_CAT_ERROR (GST_CAT_MEMORY, "no staging memory to upload");
if (!priv->staging ||
!GST_MEMORY_FLAG_IS_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_UPLOAD)) {
return TRUE;
}
if (!gst_cuda_context_push (mem->context)) {
GST_ERROR_OBJECT (self, "Failed to push cuda context");
return FALSE;
}
for (i = 0; i < GST_VIDEO_INFO_N_PLANES (info); i++) {
CUDA_MEMCPY2D param = { 0, };
param.srcMemoryType = CU_MEMORYTYPE_HOST;
param.srcHost = priv->staging;
param.srcPitch = priv->pitch;
param.srcMemoryType = CU_MEMORYTYPE_HOST;
param.srcHost =
(guint8 *) mem->map_data + GST_VIDEO_INFO_PLANE_OFFSET (info, i);
param.srcPitch = GST_VIDEO_INFO_PLANE_STRIDE (info, i);
param.dstMemoryType = CU_MEMORYTYPE_DEVICE;
param.dstDevice = (CUdeviceptr) priv->data;
param.dstPitch = priv->pitch;
param.WidthInBytes = priv->width_in_bytes;
param.Height = priv->height;
param.dstMemoryType = CU_MEMORYTYPE_DEVICE;
param.dstDevice = mem->data + mem->offset[i];
param.dstPitch = mem->stride;
param.WidthInBytes = GST_VIDEO_INFO_COMP_WIDTH (info, i) *
GST_VIDEO_INFO_COMP_PSTRIDE (info, i);
param.Height = GST_VIDEO_INFO_COMP_HEIGHT (info, i);
ret = gst_cuda_result (CuMemcpy2D (&param));
gst_cuda_context_pop (NULL);
if (!gst_cuda_result (CuMemcpy2DAsync (&param, NULL))) {
GST_CAT_ERROR (GST_CAT_MEMORY, "Failed to copy %dth plane", i);
ret = FALSE;
break;
}
}
gst_cuda_result (CuStreamSynchronize (NULL));
if (!ret)
GST_ERROR_OBJECT (self, "Failed to upload memory");
return ret;
}
/* called with lock */
static gboolean
gst_cuda_memory_download_transfer (GstCudaMemory * mem)
gst_cuda_memory_download (GstCudaAllocator * self, GstCudaMemory * mem)
{
gint i;
GstVideoInfo *info = &mem->alloc_params.info;
GstCudaMemoryPrivate *priv = mem->priv;
gboolean ret = TRUE;
CUDA_MEMCPY2D param = { 0, };
if (!mem->map_data) {
GST_CAT_ERROR (GST_CAT_MEMORY, "no staging memory to upload");
if (!GST_MEMORY_FLAG_IS_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_DOWNLOAD))
return TRUE;
if (!gst_cuda_context_push (mem->context)) {
GST_ERROR_OBJECT (self, "Failed to push cuda context");
return FALSE;
}
for (i = 0; i < GST_VIDEO_INFO_N_PLANES (info); i++) {
CUDA_MEMCPY2D param = { 0, };
param.srcMemoryType = CU_MEMORYTYPE_DEVICE;
param.srcDevice = mem->data + mem->offset[i];
param.srcPitch = mem->stride;
param.dstMemoryType = CU_MEMORYTYPE_HOST;
param.dstHost =
(guint8 *) mem->map_data + GST_VIDEO_INFO_PLANE_OFFSET (info, i);
param.dstPitch = GST_VIDEO_INFO_PLANE_STRIDE (info, i);
param.WidthInBytes = GST_VIDEO_INFO_COMP_WIDTH (info, i) *
GST_VIDEO_INFO_COMP_PSTRIDE (info, i);
param.Height = GST_VIDEO_INFO_COMP_HEIGHT (info, i);
if (!gst_cuda_result (CuMemcpy2DAsync (&param, NULL))) {
GST_CAT_ERROR (GST_CAT_MEMORY, "Failed to copy %dth plane", i);
CuMemFreeHost (mem->map_alloc_data);
mem->map_alloc_data = mem->map_data = mem->align_data = NULL;
break;
}
}
gst_cuda_result (CuStreamSynchronize (NULL));
if (!mem->map_data)
return FALSE;
return TRUE;
}
static gpointer
gst_cuda_memory_device_memory_map (GstCudaMemory * mem)
{
GstMemory *memory = GST_MEMORY_CAST (mem);
gpointer data;
gsize aoffset;
gsize align = memory->align;
if (mem->map_data) {
return mem->map_data;
}
GST_CAT_DEBUG (GST_CAT_MEMORY, "alloc host memory for map");
if (!mem->map_alloc_data) {
gsize maxsize;
guint8 *align_data;
maxsize = memory->maxsize + align;
if (!gst_cuda_context_push (mem->context)) {
GST_CAT_ERROR (GST_CAT_MEMORY, "cannot push cuda context");
return NULL;
}
if (!gst_cuda_result (CuMemAllocHost (&data, maxsize))) {
GST_CAT_ERROR (GST_CAT_MEMORY, "cannot alloc host memory");
if (!priv->staging) {
ret = gst_cuda_result (CuMemAllocHost (&priv->staging,
GST_MEMORY_CAST (mem)->size));
if (!ret) {
GST_ERROR_OBJECT (self, "Failed to allocate staging memory");
gst_cuda_context_pop (NULL);
return NULL;
}
if (!gst_cuda_context_pop (NULL)) {
GST_CAT_WARNING (GST_CAT_MEMORY, "cannot pop cuda context");
}
mem->map_alloc_data = data;
align_data = data;
/* do align */
if ((aoffset = ((guintptr) align_data & align))) {
aoffset = (align + 1) - aoffset;
align_data += aoffset;
}
mem->align_data = align_data;
/* first memory, always need download to staging */
GST_MINI_OBJECT_FLAG_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_DOWNLOAD);
}
mem->map_data = mem->align_data;
if (GST_MEMORY_FLAG_IS_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_DOWNLOAD)) {
if (!gst_cuda_context_push (mem->context)) {
GST_CAT_ERROR (GST_CAT_MEMORY, "cannot push cuda context");
return NULL;
}
gst_cuda_memory_download_transfer (mem);
if (!gst_cuda_context_pop (NULL)) {
GST_CAT_WARNING (GST_CAT_MEMORY, "cannot pop cuda context");
return FALSE;
}
}
return mem->map_data;
param.srcMemoryType = CU_MEMORYTYPE_DEVICE;
param.srcDevice = (CUdeviceptr) priv->data;
param.srcPitch = priv->pitch;
param.dstMemoryType = CU_MEMORYTYPE_HOST;
param.dstHost = priv->staging;
param.dstPitch = priv->pitch;
param.WidthInBytes = priv->width_in_bytes;
param.Height = priv->height;
ret = gst_cuda_result (CuMemcpy2D (&param));
gst_cuda_context_pop (NULL);
if (!ret)
GST_ERROR_OBJECT (self, "Failed to upload memory");
return ret;
}
static gpointer
cuda_mem_map (GstCudaMemory * mem, gsize maxsize, GstMapFlags flags)
cuda_mem_map (GstMemory * mem, gsize maxsize, GstMapFlags flags)
{
GstCudaAllocator *self = GST_CUDA_ALLOCATOR (mem->allocator);
GstCudaMemory *cmem = GST_CUDA_MEMORY_CAST (mem);
GstCudaMemoryPrivate *priv = cmem->priv;
gpointer ret = NULL;
g_mutex_lock (&mem->lock);
mem->map_count++;
g_mutex_lock (&priv->lock);
if ((flags & GST_MAP_CUDA) == GST_MAP_CUDA) {
/* upload from staging to device memory if necessary */
if (GST_MEMORY_FLAG_IS_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_UPLOAD)) {
if (!gst_cuda_context_push (mem->context)) {
GST_CAT_ERROR (GST_CAT_MEMORY, "cannot push cuda context");
g_mutex_unlock (&mem->lock);
return NULL;
}
if (!gst_cuda_memory_upload_transfer (mem)) {
g_mutex_unlock (&mem->lock);
return NULL;
}
gst_cuda_context_pop (NULL);
}
if (!gst_cuda_memory_upload (self, cmem))
goto out;
GST_MEMORY_FLAG_UNSET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_UPLOAD);
if ((flags & GST_MAP_WRITE) == GST_MAP_WRITE)
GST_MINI_OBJECT_FLAG_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_DOWNLOAD);
g_mutex_unlock (&mem->lock);
return (gpointer) mem->data;
ret = (gpointer) priv->data;
goto out;
}
ret = gst_cuda_memory_device_memory_map (mem);
if (ret == NULL) {
mem->map_count--;
g_mutex_unlock (&mem->lock);
return NULL;
}
/* First CPU access, must be downloaded */
if (!priv->staging)
GST_MINI_OBJECT_FLAG_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_DOWNLOAD);
if (!gst_cuda_memory_download (self, cmem))
goto out;
ret = priv->staging;
if ((flags & GST_MAP_WRITE) == GST_MAP_WRITE)
GST_MINI_OBJECT_FLAG_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_UPLOAD);
GST_MEMORY_FLAG_UNSET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_DOWNLOAD);
g_mutex_unlock (&mem->lock);
out:
g_mutex_unlock (&priv->lock);
return ret;
}
static void
cuda_mem_unmap_full (GstCudaMemory * mem, GstMapInfo * info)
cuda_mem_unmap_full (GstMemory * mem, GstMapInfo * info)
{
g_mutex_lock (&mem->lock);
mem->map_count--;
GST_CAT_TRACE (GST_CAT_MEMORY,
"unmap CUDA memory %p, map count %d, have map_data %s",
mem, mem->map_count, mem->map_data ? "true" : "false");
GstCudaMemory *cmem = GST_CUDA_MEMORY_CAST (mem);
GstCudaMemoryPrivate *priv = cmem->priv;
g_mutex_lock (&priv->lock);
if ((info->flags & GST_MAP_CUDA) == GST_MAP_CUDA) {
if ((info->flags & GST_MAP_WRITE) == GST_MAP_WRITE)
GST_MINI_OBJECT_FLAG_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_DOWNLOAD);
g_mutex_unlock (&mem->lock);
return;
goto out;
}
if ((info->flags & GST_MAP_WRITE))
if ((info->flags & GST_MAP_WRITE) == GST_MAP_WRITE)
GST_MINI_OBJECT_FLAG_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_UPLOAD);
if (mem->map_count > 0 || !mem->map_data) {
g_mutex_unlock (&mem->lock);
return;
}
mem->map_data = NULL;
g_mutex_unlock (&mem->lock);
out:
g_mutex_unlock (&priv->lock);
return;
}
@ -412,72 +357,82 @@ cuda_mem_unmap_full (GstCudaMemory * mem, GstMapInfo * info)
static GstMemory *
cuda_mem_copy (GstMemory * mem, gssize offset, gssize size)
{
GstMemory *copy;
GstCudaAllocator *self = GST_CUDA_ALLOCATOR (mem->allocator);
GstCudaMemory *src_mem = GST_CUDA_MEMORY_CAST (mem);
GstCudaMemory *dst_mem;
GstCudaContext *ctx = GST_CUDA_ALLOCATOR_CAST (mem->allocator)->context;
gint i;
GstVideoInfo *info;
GstCudaContext *context = src_mem->context;
GstMapInfo src_info, dst_info;
CUDA_MEMCPY2D param = { 0, };
GstMemory *copy;
gboolean ret;
/* offset and size are ignored */
copy = gst_cuda_allocator_alloc (mem->allocator, mem->size,
&src_mem->alloc_params);
copy = gst_cuda_allocator_alloc_internal (self, context,
&src_mem->info, src_mem->priv->width_in_bytes, src_mem->priv->height);
dst_mem = GST_CUDA_MEMORY_CAST (copy);
if (!copy) {
GST_ERROR_OBJECT (self, "Failed to allocate memory for copying");
return NULL;
}
info = &src_mem->alloc_params.info;
if (!gst_memory_map (mem, &src_info, GST_MAP_READ | GST_MAP_CUDA)) {
GST_ERROR_OBJECT (self, "Failed to map src memory");
gst_memory_unref (copy);
return NULL;
}
if (!gst_cuda_context_push (ctx)) {
GST_CAT_ERROR (GST_CAT_MEMORY, "cannot push cuda context");
gst_cuda_allocator_free (mem->allocator, copy);
if (!gst_memory_map (copy, &dst_info, GST_MAP_WRITE | GST_MAP_CUDA)) {
GST_ERROR_OBJECT (self, "Failed to map dst memory");
gst_memory_unmap (mem, &src_info);
gst_memory_unref (copy);
return NULL;
}
if (!gst_cuda_context_push (context)) {
GST_ERROR_OBJECT (self, "Failed to push cuda context");
gst_memory_unmap (mem, &src_info);
gst_memory_unmap (copy, &dst_info);
return NULL;
}
for (i = 0; i < GST_VIDEO_INFO_N_PLANES (info); i++) {
CUDA_MEMCPY2D param = { 0, };
param.srcMemoryType = CU_MEMORYTYPE_DEVICE;
param.srcDevice = (CUdeviceptr) src_info.data;
param.srcPitch = src_mem->priv->pitch;
param.srcMemoryType = CU_MEMORYTYPE_DEVICE;
param.srcDevice = src_mem->data + src_mem->offset[i];
param.srcPitch = src_mem->stride;
param.dstMemoryType = CU_MEMORYTYPE_DEVICE;
param.dstDevice = (CUdeviceptr) dst_info.data;
param.dstPitch = src_mem->priv->pitch;
param.WidthInBytes = src_mem->priv->width_in_bytes;
param.Height = src_mem->priv->height;
param.dstMemoryType = CU_MEMORYTYPE_DEVICE;
param.dstDevice = dst_mem->data + dst_mem->offset[i];
param.dstPitch = dst_mem->stride;
param.WidthInBytes = GST_VIDEO_INFO_COMP_WIDTH (info, i) *
GST_VIDEO_INFO_COMP_PSTRIDE (info, i);
param.Height = GST_VIDEO_INFO_COMP_HEIGHT (info, i);
ret = gst_cuda_result (CuMemcpy2D (&param));
gst_cuda_context_pop (NULL);
if (!gst_cuda_result (CuMemcpy2DAsync (&param, NULL))) {
GST_CAT_ERROR_OBJECT (GST_CAT_MEMORY,
mem->allocator, "Failed to copy %dth plane", i);
gst_cuda_context_pop (NULL);
gst_cuda_allocator_free (mem->allocator, copy);
gst_memory_unmap (mem, &src_info);
gst_memory_unmap (copy, &dst_info);
return NULL;
}
}
gst_cuda_result (CuStreamSynchronize (NULL));
if (!gst_cuda_context_pop (NULL)) {
GST_CAT_WARNING (GST_CAT_MEMORY, "cannot pop cuda context");
if (!ret) {
GST_ERROR_OBJECT (self, "Failed to copy memory");
gst_memory_unref (copy);
return NULL;
}
return copy;
}
GstAllocator *
gst_cuda_allocator_new (GstCudaContext * context)
void
gst_cuda_memory_init_once (void)
{
GstCudaAllocator *allocator;
static gsize _init = 0;
g_return_val_if_fail (GST_IS_CUDA_CONTEXT (context), NULL);
if (g_once_init_enter (&_init)) {
_gst_cuda_allocator =
(GstAllocator *) g_object_new (GST_TYPE_CUDA_ALLOCATOR, NULL);
gst_object_ref_sink (_gst_cuda_allocator);
allocator = g_object_new (GST_TYPE_CUDA_ALLOCATOR, NULL);
allocator->context = gst_object_ref (context);
return GST_ALLOCATOR_CAST (allocator);
gst_allocator_register (GST_CUDA_MEMORY_TYPE_NAME, _gst_cuda_allocator);
g_once_init_leave (&_init, 1);
}
}
gboolean
@ -486,3 +441,51 @@ gst_is_cuda_memory (GstMemory * mem)
return mem != NULL && mem->allocator != NULL &&
GST_IS_CUDA_ALLOCATOR (mem->allocator);
}
GstMemory *
gst_cuda_allocator_alloc (GstCudaAllocator * allocator,
GstCudaContext * context, const GstVideoInfo * info)
{
guint alloc_height;
g_return_val_if_fail (GST_IS_CUDA_ALLOCATOR (allocator), NULL);
g_return_val_if_fail (GST_IS_CUDA_CONTEXT (context), NULL);
g_return_val_if_fail (info != NULL, NULL);
alloc_height = GST_VIDEO_INFO_HEIGHT (info);
/* make sure valid height for subsampled formats */
switch (GST_VIDEO_INFO_FORMAT (info)) {
case GST_VIDEO_FORMAT_I420:
case GST_VIDEO_FORMAT_YV12:
case GST_VIDEO_FORMAT_NV12:
case GST_VIDEO_FORMAT_P010_10LE:
case GST_VIDEO_FORMAT_P016_LE:
case GST_VIDEO_FORMAT_I420_10LE:
alloc_height = GST_ROUND_UP_2 (alloc_height);
break;
default:
break;
}
switch (GST_VIDEO_INFO_FORMAT (info)) {
case GST_VIDEO_FORMAT_I420:
case GST_VIDEO_FORMAT_YV12:
case GST_VIDEO_FORMAT_I420_10LE:
case GST_VIDEO_FORMAT_NV12:
case GST_VIDEO_FORMAT_NV21:
case GST_VIDEO_FORMAT_P010_10LE:
case GST_VIDEO_FORMAT_P016_LE:
alloc_height *= 2;
break;
case GST_VIDEO_FORMAT_Y444:
case GST_VIDEO_FORMAT_Y444_16LE:
alloc_height *= 3;
break;
default:
break;
}
return gst_cuda_allocator_alloc_internal (allocator, context,
info, info->stride[0], alloc_height);
}

View file

@ -37,10 +37,11 @@ G_BEGIN_DECLS
#define GST_CUDA_ALLOCATOR_CAST(obj) ((GstCudaAllocator *)(obj))
#define GST_CUDA_MEMORY_CAST(mem) ((GstCudaMemory *) (mem))
typedef struct _GstCudaAllocationParams GstCudaAllocationParams;
typedef struct _GstCudaAllocator GstCudaAllocator;
typedef struct _GstCudaAllocatorClass GstCudaAllocatorClass;
typedef struct _GstCudaMemory GstCudaMemory;
typedef struct _GstCudaMemoryPrivate GstCudaMemoryPrivate;
/**
* GST_MAP_CUDA:
@ -65,32 +66,6 @@ typedef struct _GstCudaMemory GstCudaMemory;
*/
#define GST_CAPS_FEATURE_MEMORY_CUDA_MEMORY "memory:CUDAMemory"
struct _GstCudaAllocationParams
{
GstAllocationParams parent;
GstVideoInfo info;
};
struct _GstCudaAllocator
{
GstAllocator parent;
GstCudaContext *context;
};
struct _GstCudaAllocatorClass
{
GstAllocatorClass parent_class;
};
GType gst_cuda_allocator_get_type (void);
GstAllocator * gst_cuda_allocator_new (GstCudaContext * context);
GstMemory * gst_cuda_allocator_alloc (GstAllocator * allocator,
gsize size,
GstCudaAllocationParams * params);
/**
* GstCudaMemoryTransfer:
* @GST_CUDA_MEMORY_TRANSFER_NEED_DOWNLOAD: the device memory needs downloading
@ -106,32 +81,36 @@ typedef enum
struct _GstCudaMemory
{
GstMemory mem;
GstMemory mem;
/*< public >*/
GstCudaContext *context;
CUdeviceptr data;
GstVideoInfo info;
GstCudaAllocationParams alloc_params;
/* offset and stride of CUDA device memory */
gsize offset[GST_VIDEO_MAX_PLANES];
gint stride;
/* allocated CUDA Host memory */
gpointer map_alloc_data;
/* aligned CUDA Host memory */
guint8 *align_data;
/* pointing align_data if the memory is mapped */
gpointer map_data;
gint map_count;
GMutex lock;
/*< private >*/
GstCudaMemoryPrivate *priv;
gpointer _gst_reserved[GST_PADDING];
};
gboolean gst_is_cuda_memory (GstMemory * mem);
struct _GstCudaAllocator
{
GstAllocator parent;
};
struct _GstCudaAllocatorClass
{
GstAllocatorClass parent_class;
};
void gst_cuda_memory_init_once (void);
gboolean gst_is_cuda_memory (GstMemory * mem);
GType gst_cuda_allocator_get_type (void);
GstMemory * gst_cuda_allocator_alloc (GstCudaAllocator * allocator,
GstCudaContext * context,
const GstVideoInfo * info);
G_END_DECLS

View file

@ -174,10 +174,6 @@ gst_cuda_upload_propose_allocation (GstBaseTransform * trans,
if (gst_query_get_n_allocation_pools (query) == 0) {
GstCapsFeatures *features;
GstStructure *config;
GstVideoAlignment align;
GstAllocationParams params = { 0, 31, 0, 0, };
GstAllocator *allocator = NULL;
gint i;
features = gst_caps_get_features (caps, 0);
@ -191,38 +187,25 @@ gst_cuda_upload_propose_allocation (GstBaseTransform * trans,
config = gst_buffer_pool_get_config (pool);
gst_video_alignment_reset (&align);
for (i = 0; i < GST_VIDEO_INFO_N_PLANES (&info); i++) {
align.stride_align[i] = 31;
}
gst_video_info_align (&info, &align);
gst_buffer_pool_config_add_option (config,
GST_BUFFER_POOL_OPTION_VIDEO_META);
gst_buffer_pool_config_add_option (config,
GST_BUFFER_POOL_OPTION_VIDEO_ALIGNMENT);
gst_buffer_pool_config_set_video_alignment (config, &align);
size = GST_VIDEO_INFO_SIZE (&info);
gst_buffer_pool_config_set_params (config, caps, size, 0, 0);
gst_query_add_allocation_meta (query, GST_VIDEO_META_API_TYPE, NULL);
gst_query_add_allocation_pool (query, pool, size, 0, 0);
if (gst_buffer_pool_config_get_allocator (config, &allocator, &params)) {
if (params.align < 31)
params.align = 31;
gst_query_add_allocation_param (query, allocator, &params);
gst_buffer_pool_config_set_allocator (config, allocator, &params);
}
if (!gst_buffer_pool_set_config (pool, config)) {
GST_ERROR_OBJECT (ctrans, "failed to set config");
gst_object_unref (pool);
return FALSE;
}
/* Get updated size by cuda buffer pool */
config = gst_buffer_pool_get_config (pool);
gst_buffer_pool_config_get_params (config, NULL, &size, NULL, NULL);
gst_structure_free (config);
gst_query_add_allocation_pool (query, pool, size, 0, 0);
gst_object_unref (pool);
}
@ -289,6 +272,12 @@ gst_cuda_upload_decide_allocation (GstBaseTransform * trans, GstQuery * query)
gst_buffer_pool_config_add_option (config, GST_BUFFER_POOL_OPTION_VIDEO_META);
gst_buffer_pool_config_set_params (config, outcaps, size, min, max);
gst_buffer_pool_set_config (pool, config);
/* Get updated size by cuda buffer pool */
config = gst_buffer_pool_get_config (pool);
gst_buffer_pool_config_get_params (config, NULL, &size, NULL, NULL);
gst_structure_free (config);
if (update_pool)
gst_query_set_nth_allocation_pool (query, 0, pool, size, min, max);
else

View file

@ -625,6 +625,7 @@ gst_nv_base_enc_propose_allocation (GstVideoEncoder * enc, GstQuery * query)
GstBufferPool *pool;
GstStructure *config;
GstCapsFeatures *features;
guint size;
GST_DEBUG_OBJECT (nvenc, "propose allocation");
@ -665,18 +666,25 @@ gst_nv_base_enc_propose_allocation (GstVideoEncoder * enc, GstQuery * query)
goto done;
}
config = gst_buffer_pool_get_config (pool);
gst_buffer_pool_config_set_params (config, caps, GST_VIDEO_INFO_SIZE (&info),
nvenc->items->len, nvenc->items->len);
size = GST_VIDEO_INFO_SIZE (&info);
gst_query_add_allocation_pool (query, pool, GST_VIDEO_INFO_SIZE (&info),
config = gst_buffer_pool_get_config (pool);
gst_buffer_pool_config_set_params (config, caps, size,
nvenc->items->len, nvenc->items->len);
gst_buffer_pool_config_add_option (config, GST_BUFFER_POOL_OPTION_VIDEO_META);
gst_query_add_allocation_meta (query, GST_VIDEO_META_API_TYPE, NULL);
if (!gst_buffer_pool_set_config (pool, config))
goto error_pool_config;
/* Get updated size by cuda buffer pool */
config = gst_buffer_pool_get_config (pool);
gst_buffer_pool_config_get_params (config, NULL, &size, NULL, NULL);
gst_structure_free (config);
gst_query_add_allocation_pool (query, pool, size,
nvenc->items->len, nvenc->items->len);
gst_query_add_allocation_meta (query, GST_VIDEO_META_API_TYPE, NULL);
gst_object_unref (pool);
done:
@ -2231,17 +2239,12 @@ gst_nv_base_enc_upload_frame (GstNvBaseEnc * nvenc, GstVideoFrame * frame,
CUdeviceptr dst = resource->cuda_pointer;
GstVideoInfo *info = &frame->info;
CUresult cuda_ret;
GstCudaMemory *cuda_mem = NULL;
if (!gst_cuda_context_push (nvenc->cuda_ctx)) {
GST_ERROR_OBJECT (nvenc, "cannot push context");
return FALSE;
}
if (use_device_memory) {
cuda_mem = (GstCudaMemory *) gst_buffer_peek_memory (frame->buffer, 0);
}
for (i = 0; i < GST_VIDEO_FRAME_N_PLANES (frame); i++) {
CUDA_MEMCPY2D param = { 0, };
guint dest_stride = _get_cuda_device_stride (&nvenc->input_info, i,
@ -2249,13 +2252,12 @@ gst_nv_base_enc_upload_frame (GstNvBaseEnc * nvenc, GstVideoFrame * frame,
if (use_device_memory) {
param.srcMemoryType = CU_MEMORYTYPE_DEVICE;
param.srcDevice = cuda_mem->data + cuda_mem->offset[i];
param.srcPitch = cuda_mem->stride;
param.srcDevice = (CUdeviceptr) GST_VIDEO_FRAME_PLANE_DATA (frame, i);
} else {
param.srcMemoryType = CU_MEMORYTYPE_HOST;
param.srcHost = GST_VIDEO_FRAME_PLANE_DATA (frame, i);
param.srcPitch = GST_VIDEO_FRAME_PLANE_STRIDE (frame, i);
}
param.srcPitch = GST_VIDEO_FRAME_PLANE_STRIDE (frame, i);
param.dstMemoryType = CU_MEMORYTYPE_DEVICE;
param.dstDevice = dst;

View file

@ -1241,34 +1241,25 @@ gst_nvdec_copy_device_to_memory (GstNvDec * nvdec,
GstVideoInfo *info = &nvdec->output_state->info;
gint i;
GstMemory *mem;
GstCudaMemory *cuda_mem = NULL;
if (!gst_cuda_context_push (nvdec->cuda_ctx)) {
GST_WARNING_OBJECT (nvdec, "failed to lock CUDA context");
return FALSE;
}
gboolean use_device_copy = FALSE;
GstMapFlags map_flags = GST_MAP_WRITE;
if (nvdec->mem_type == GST_NVDEC_MEM_TYPE_CUDA &&
(mem = gst_buffer_peek_memory (output_buffer, 0)) &&
gst_is_cuda_memory (mem)) {
GstCudaMemory *cmem = GST_CUDA_MEMORY_CAST (mem);
if (cmem->context == nvdec->cuda_ctx ||
gst_cuda_context_get_handle (cmem->context) ==
gst_cuda_context_get_handle (nvdec->cuda_ctx) ||
(gst_cuda_context_can_access_peer (cmem->context, nvdec->cuda_ctx) &&
gst_cuda_context_can_access_peer (nvdec->cuda_ctx,
cmem->context))) {
cuda_mem = cmem;
}
map_flags |= GST_MAP_CUDA;
use_device_copy = TRUE;
}
if (!cuda_mem) {
if (!gst_video_frame_map (&video_frame, info, output_buffer, GST_MAP_WRITE)) {
GST_ERROR_OBJECT (nvdec, "frame map failure");
gst_cuda_context_pop (NULL);
return FALSE;
}
if (!gst_video_frame_map (&video_frame, info, output_buffer, map_flags)) {
GST_ERROR_OBJECT (nvdec, "frame map failure");
return FALSE;
}
if (!gst_cuda_context_push (nvdec->cuda_ctx)) {
gst_video_frame_unmap (&video_frame);
GST_WARNING_OBJECT (nvdec, "failed to lock CUDA context");
return FALSE;
}
params.progressive_frame = dispinfo->progressive_frame;
@ -1286,17 +1277,17 @@ gst_nvdec_copy_device_to_memory (GstNvDec * nvdec,
copy_params.srcMemoryType = CU_MEMORYTYPE_DEVICE;
copy_params.srcPitch = pitch;
copy_params.dstMemoryType =
cuda_mem ? CU_MEMORYTYPE_DEVICE : CU_MEMORYTYPE_HOST;
use_device_copy ? CU_MEMORYTYPE_DEVICE : CU_MEMORYTYPE_HOST;
for (i = 0; i < GST_VIDEO_INFO_N_PLANES (info); i++) {
copy_params.srcDevice = dptr + (i * pitch * GST_VIDEO_INFO_HEIGHT (info));
if (cuda_mem) {
copy_params.dstDevice = cuda_mem->data + cuda_mem->offset[i];
copy_params.dstPitch = cuda_mem->stride;
if (use_device_copy) {
copy_params.dstDevice =
(CUdeviceptr) GST_VIDEO_FRAME_PLANE_DATA (&video_frame, i);
} else {
copy_params.dstHost = GST_VIDEO_FRAME_PLANE_DATA (&video_frame, i);
copy_params.dstPitch = GST_VIDEO_FRAME_PLANE_STRIDE (&video_frame, i);
}
copy_params.dstPitch = GST_VIDEO_FRAME_PLANE_STRIDE (&video_frame, i);
copy_params.WidthInBytes = GST_VIDEO_INFO_COMP_WIDTH (info, i)
* GST_VIDEO_INFO_COMP_PSTRIDE (info, i);
copy_params.Height = GST_VIDEO_INFO_COMP_HEIGHT (info, i);
@ -1304,8 +1295,7 @@ gst_nvdec_copy_device_to_memory (GstNvDec * nvdec,
if (!gst_cuda_result (CuMemcpy2DAsync (&copy_params, nvdec->cuda_stream))) {
GST_ERROR_OBJECT (nvdec, "failed to copy %dth plane", i);
CuvidUnmapVideoFrame (nvdec->decoder, dptr);
if (!cuda_mem)
gst_video_frame_unmap (&video_frame);
gst_video_frame_unmap (&video_frame);
gst_cuda_context_pop (NULL);
return FALSE;
}
@ -1313,8 +1303,7 @@ gst_nvdec_copy_device_to_memory (GstNvDec * nvdec,
gst_cuda_result (CuStreamSynchronize (nvdec->cuda_stream));
if (!cuda_mem)
gst_video_frame_unmap (&video_frame);
gst_video_frame_unmap (&video_frame);
if (!gst_cuda_result (CuvidUnmapVideoFrame (nvdec->decoder, dptr)))
GST_WARNING_OBJECT (nvdec, "failed to unmap video frame");
@ -1558,9 +1547,15 @@ gst_nvdec_ensure_cuda_pool (GstNvDec * nvdec, GstQuery * query)
n = gst_query_get_n_allocation_pools (query);
if (n > 0) {
gst_query_parse_nth_allocation_pool (query, 0, &pool, &size, &min, &max);
if (pool && !GST_IS_CUDA_BUFFER_POOL (pool)) {
gst_object_unref (pool);
pool = NULL;
if (pool) {
if (!GST_IS_CUDA_BUFFER_POOL (pool)) {
gst_clear_object (&pool);
} else {
GstCudaBufferPool *cpool = GST_CUDA_BUFFER_POOL (pool);
if (cpool->context != nvdec->cuda_ctx)
gst_clear_object (&pool);
}
}
}
@ -1578,6 +1573,12 @@ gst_nvdec_ensure_cuda_pool (GstNvDec * nvdec, GstQuery * query)
gst_buffer_pool_config_set_params (config, outcaps, size, min, max);
gst_buffer_pool_config_add_option (config, GST_BUFFER_POOL_OPTION_VIDEO_META);
gst_buffer_pool_set_config (pool, config);
/* Get updated size by cuda buffer pool */
config = gst_buffer_pool_get_config (pool);
gst_buffer_pool_config_get_params (config, NULL, &size, NULL, NULL);
gst_structure_free (config);
if (n > 0)
gst_query_set_nth_allocation_pool (query, 0, pool, size, min, max);
else

View file

@ -740,33 +740,24 @@ gst_nv_decoder_copy_frame_to_cuda (GstNvDecoder * decoder,
{
CUDA_MEMCPY2D copy_params = { 0, };
GstMemory *mem;
GstCudaMemory *cuda_mem = NULL;
gint i;
gboolean ret = FALSE;
GstVideoFrame video_frame;
mem = gst_buffer_peek_memory (buffer, 0);
if (!gst_is_cuda_memory (mem)) {
GST_WARNING_OBJECT (decoder, "Not a CUDA memory");
return FALSE;
} else {
GstCudaMemory *cmem = GST_CUDA_MEMORY_CAST (mem);
if (cmem->context == decoder->context ||
gst_cuda_context_get_handle (cmem->context) ==
gst_cuda_context_get_handle (decoder->context) ||
(gst_cuda_context_can_access_peer (cmem->context, decoder->context) &&
gst_cuda_context_can_access_peer (decoder->context,
cmem->context))) {
cuda_mem = cmem;
}
}
if (!cuda_mem) {
GST_WARNING_OBJECT (decoder, "Access to CUDA memory is not allowed");
if (!gst_video_frame_map (&video_frame,
&decoder->info, buffer, GST_MAP_WRITE | GST_MAP_CUDA)) {
GST_ERROR_OBJECT (decoder, "frame map failure");
return FALSE;
}
if (!gst_cuda_context_push (decoder->context)) {
gst_video_frame_unmap (&video_frame);
GST_ERROR_OBJECT (decoder, "Failed to push CUDA context");
return FALSE;
}
@ -778,8 +769,9 @@ gst_nv_decoder_copy_frame_to_cuda (GstNvDecoder * decoder,
for (i = 0; i < GST_VIDEO_INFO_N_PLANES (&decoder->info); i++) {
copy_params.srcDevice = frame->devptr +
(i * frame->pitch * GST_VIDEO_INFO_HEIGHT (&decoder->info));
copy_params.dstDevice = cuda_mem->data + cuda_mem->offset[i];
copy_params.dstPitch = cuda_mem->stride;
copy_params.dstDevice =
(CUdeviceptr) GST_VIDEO_FRAME_PLANE_DATA (&video_frame, i);
copy_params.dstPitch = GST_VIDEO_FRAME_PLANE_STRIDE (&video_frame, i);
copy_params.WidthInBytes = GST_VIDEO_INFO_COMP_WIDTH (&decoder->info, 0)
* GST_VIDEO_INFO_COMP_PSTRIDE (&decoder->info, 0);
copy_params.Height = GST_VIDEO_INFO_COMP_HEIGHT (&decoder->info, i);
@ -795,6 +787,7 @@ gst_nv_decoder_copy_frame_to_cuda (GstNvDecoder * decoder,
ret = TRUE;
done:
gst_video_frame_unmap (&video_frame);
gst_cuda_context_pop (NULL);
GST_LOG_OBJECT (decoder, "Copy frame to CUDA ret %d", ret);
@ -1567,6 +1560,12 @@ gst_nv_decoder_ensure_cuda_pool (GstNvDecoder * decoder, GstQuery * query)
gst_buffer_pool_config_set_params (config, outcaps, size, min, max);
gst_buffer_pool_config_add_option (config, GST_BUFFER_POOL_OPTION_VIDEO_META);
gst_buffer_pool_set_config (pool, config);
/* Get updated size by cuda buffer pool */
config = gst_buffer_pool_get_config (pool);
gst_buffer_pool_config_get_params (config, NULL, &size, NULL, NULL);
gst_structure_free (config);
if (n > 0)
gst_query_set_nth_allocation_pool (query, 0, pool, size, min, max);
else

View file

@ -39,6 +39,7 @@
#include "gstcudadownload.h"
#include "gstcudaupload.h"
#include "gstcudafilter.h"
#include "gstcudamemory.h"
GST_DEBUG_CATEGORY (gst_nvcodec_debug);
GST_DEBUG_CATEGORY (gst_nvdec_debug);
@ -238,6 +239,7 @@ plugin_init (GstPlugin * plugin)
GST_TYPE_CUDA_UPLOAD);
gst_cuda_filter_plugin_init (plugin);
gst_cuda_memory_init_once ();
return TRUE;
}