nvcodec: Refactor GstCudaMemory abstraction

* Hide GstCudaMemory member variables
* Make GstCudaAllocator object GstCudaContext independent
* Set offset/stride of memory correctly via video meta
* Drop GST_BUFFER_POOL_OPTION_VIDEO_ALIGNMENT support.
  This implementation actually does not support custom alignment
  because we allocate device memory via cuMemAllocPitch
  of which alignment is almost uncontrollable

Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/1834>
This commit is contained in:
Seungha Yang 2022-03-03 03:25:47 +09:00
parent ad0e7fca14
commit 111b2c3f53
12 changed files with 580 additions and 759 deletions

View file

@ -229,7 +229,7 @@ static const gchar templ_YUV_TO_YUV[] =
GST_CUDA_KERNEL_FUNC GST_CUDA_KERNEL_FUNC
"(cudaTextureObject_t tex0, cudaTextureObject_t tex1, cudaTextureObject_t tex2,\n" "(cudaTextureObject_t tex0, cudaTextureObject_t tex1, cudaTextureObject_t tex2,\n"
" unsigned char *dst0, unsigned char *dst1, unsigned char *dst2,\n" " unsigned char *dst0, unsigned char *dst1, unsigned char *dst2,\n"
" int stride)\n" " int stride, int uv_stride)\n"
"{\n" "{\n"
" int x_pos = blockIdx.x * blockDim.x + threadIdx.x;\n" " int x_pos = blockIdx.x * blockDim.x + threadIdx.x;\n"
" int y_pos = blockIdx.y * blockDim.y + threadIdx.y;\n" " int y_pos = blockIdx.y * blockDim.y + threadIdx.y;\n"
@ -265,7 +265,7 @@ GST_CUDA_KERNEL_FUNC
" v = tmp;\n" " v = tmp;\n"
" }\n" " }\n"
" write_chroma (dst1,\n" " write_chroma (dst1,\n"
" dst2, u, v, x_pos, y_pos, CHROMA_PSTRIDE, stride, MASK);\n" " dst2, u, v, x_pos, y_pos, CHROMA_PSTRIDE, uv_stride, MASK);\n"
" }\n" " }\n"
"}\n" "}\n"
"\n" "\n"
@ -589,7 +589,7 @@ GST_CUDA_KERNEL_FUNC_TO_Y444
GST_CUDA_KERNEL_FUNC_Y444_TO_YUV GST_CUDA_KERNEL_FUNC_Y444_TO_YUV
"(cudaTextureObject_t tex0, cudaTextureObject_t tex1, cudaTextureObject_t tex2,\n" "(cudaTextureObject_t tex0, cudaTextureObject_t tex1, cudaTextureObject_t tex2,\n"
" unsigned char *dst0, unsigned char *dst1, unsigned char *dst2,\n" " unsigned char *dst0, unsigned char *dst1, unsigned char *dst2,\n"
" int stride)\n" " int stride, int uv_stride)\n"
"{\n" "{\n"
" int x_pos = blockIdx.x * blockDim.x + threadIdx.x;\n" " int x_pos = blockIdx.x * blockDim.x + threadIdx.x;\n"
" int y_pos = blockIdx.y * blockDim.y + threadIdx.y;\n" " int y_pos = blockIdx.y * blockDim.y + threadIdx.y;\n"
@ -626,7 +626,7 @@ GST_CUDA_KERNEL_FUNC_Y444_TO_YUV
" v = tmp;\n" " v = tmp;\n"
" }\n" " }\n"
" write_chroma (dst1,\n" " write_chroma (dst1,\n"
" dst2, u, v, x_pos, y_pos, CHROMA_PSTRIDE, stride, MASK);\n" " dst2, u, v, x_pos, y_pos, CHROMA_PSTRIDE, uv_stride, MASK);\n"
" }\n" " }\n"
"}\n" "}\n"
"\n" "\n"
@ -745,9 +745,10 @@ struct _GstCudaConverter
gchar *ptx; gchar *ptx;
GstCudaStageBuffer fallback_buffer[GST_VIDEO_MAX_PLANES]; GstCudaStageBuffer fallback_buffer[GST_VIDEO_MAX_PLANES];
gboolean (*convert) (GstCudaConverter * convert, const GstCudaMemory * src, /* *INDENT-OFF* */
GstVideoInfo * in_info, GstCudaMemory * dst, GstVideoInfo * out_info, gboolean (*convert) (GstCudaConverter * convert, GstVideoFrame * src_frame,
CUstream cuda_stream); GstVideoFrame * dst_frame, CUstream cuda_stream);
/* *INDENT-ON* */
const CUdeviceptr src; const CUdeviceptr src;
GstVideoInfo *cur_in_info; GstVideoInfo *cur_in_info;
@ -893,67 +894,25 @@ gst_cuda_converter_free (GstCudaConverter * convert)
g_free (convert); g_free (convert);
} }
/**
* gst_cuda_converter_frame:
* @convert: a #GstCudaConverter
* @src: a #GstCudaMemory
* @in_info: a #GstVideoInfo representing @src
* @dst: a #GstCudaMemory
* @out_info: a #GstVideoInfo representing @dst
* @cuda_stream: a #CUstream
*
* Convert the pixels of @src into @dest using @convert.
* Called without gst_cuda_context_push() and gst_cuda_context_pop() by caller
*/
gboolean gboolean
gst_cuda_converter_frame (GstCudaConverter * convert, const GstCudaMemory * src, gst_cuda_converter_convert_frame (GstCudaConverter * convert,
GstVideoInfo * in_info, GstCudaMemory * dst, GstVideoInfo * out_info, GstVideoFrame * src_frame, GstVideoFrame * dst_frame, CUstream cuda_stream)
CUstream cuda_stream)
{ {
gboolean ret; gboolean ret;
g_return_val_if_fail (convert, FALSE); g_return_val_if_fail (convert, FALSE);
g_return_val_if_fail (src, FALSE); g_return_val_if_fail (src_frame, FALSE);
g_return_val_if_fail (in_info, FALSE); g_return_val_if_fail (dst_frame, FALSE);
g_return_val_if_fail (dst, FALSE);
g_return_val_if_fail (out_info, FALSE);
gst_cuda_context_push (convert->cuda_ctx); gst_cuda_context_push (convert->cuda_ctx);
ret = gst_cuda_converter_frame_unlocked (convert, ret = convert->convert (convert, src_frame, dst_frame, cuda_stream);
src, in_info, dst, out_info, cuda_stream);
gst_cuda_context_pop (NULL); gst_cuda_context_pop (NULL);
return ret; return ret;
} }
/**
* gst_cuda_converter_frame_unlocked:
* @convert: a #GstCudaConverter
* @src: a #GstCudaMemory
* @in_info: a #GstVideoInfo representing @src
* @dst: a #GstCudaMemory
* @out_info: a #GstVideoInfo representing @dest
* @cuda_stream: a #CUstream
*
* Convert the pixels of @src into @dest using @convert.
* Caller should call this method after gst_cuda_context_push()
*/
gboolean
gst_cuda_converter_frame_unlocked (GstCudaConverter * convert,
const GstCudaMemory * src, GstVideoInfo * in_info, GstCudaMemory * dst,
GstVideoInfo * out_info, CUstream cuda_stream)
{
g_return_val_if_fail (convert, FALSE);
g_return_val_if_fail (src, FALSE);
g_return_val_if_fail (in_info, FALSE);
g_return_val_if_fail (dst, FALSE);
g_return_val_if_fail (out_info, FALSE);
return convert->convert (convert, src, in_info, dst, out_info, cuda_stream);
}
/* allocate fallback memory for texture alignment requirement */ /* allocate fallback memory for texture alignment requirement */
static gboolean static gboolean
convert_ensure_fallback_memory (GstCudaConverter * convert, convert_ensure_fallback_memory (GstCudaConverter * convert,
@ -1020,8 +979,8 @@ convert_create_texture_unchecked (const CUdeviceptr src, gint width,
} }
static CUtexObject static CUtexObject
convert_create_texture (GstCudaConverter * convert, const GstCudaMemory * src, convert_create_texture (GstCudaConverter * convert, GstVideoFrame * src_frame,
GstVideoInfo * info, guint plane, CUstream cuda_stream) guint plane, CUstream cuda_stream)
{ {
CUarray_format format = CU_AD_FORMAT_UNSIGNED_INT8; CUarray_format format = CU_AD_FORMAT_UNSIGNED_INT8;
guint channels = 1; guint channels = 1;
@ -1030,22 +989,23 @@ convert_create_texture (GstCudaConverter * convert, const GstCudaMemory * src,
CUresult cuda_ret; CUresult cuda_ret;
CUfilter_mode mode; CUfilter_mode mode;
if (GST_VIDEO_INFO_COMP_DEPTH (info, plane) > 8) if (GST_VIDEO_FRAME_COMP_DEPTH (src_frame, plane) > 8)
format = CU_AD_FORMAT_UNSIGNED_INT16; format = CU_AD_FORMAT_UNSIGNED_INT16;
/* FIXME: more graceful method ? */ /* FIXME: more graceful method ? */
if (plane != 0 && if (plane != 0 &&
GST_VIDEO_INFO_N_PLANES (info) != GST_VIDEO_INFO_N_COMPONENTS (info)) { GST_VIDEO_FRAME_N_PLANES (src_frame) !=
GST_VIDEO_FRAME_N_COMPONENTS (src_frame)) {
channels = 2; channels = 2;
} }
src_ptr = src->data + src->offset[plane]; src_ptr = (CUdeviceptr) GST_VIDEO_FRAME_PLANE_DATA (src_frame, plane);
stride = src->stride; stride = GST_VIDEO_FRAME_PLANE_STRIDE (src_frame, plane);
if (convert->texture_alignment && (src_ptr % convert->texture_alignment)) { if (convert->texture_alignment && (src_ptr % convert->texture_alignment)) {
CUDA_MEMCPY2D copy_params = { 0, }; CUDA_MEMCPY2D copy_params = { 0, };
if (!convert_ensure_fallback_memory (convert, info, plane)) if (!convert_ensure_fallback_memory (convert, &src_frame->info, plane))
return 0; return 0;
GST_LOG ("device memory was not aligned, copy to fallback memory"); GST_LOG ("device memory was not aligned, copy to fallback memory");
@ -1057,9 +1017,9 @@ convert_create_texture (GstCudaConverter * convert, const GstCudaMemory * src,
copy_params.dstMemoryType = CU_MEMORYTYPE_DEVICE; copy_params.dstMemoryType = CU_MEMORYTYPE_DEVICE;
copy_params.dstPitch = convert->fallback_buffer[plane].cuda_stride; copy_params.dstPitch = convert->fallback_buffer[plane].cuda_stride;
copy_params.dstDevice = convert->fallback_buffer[plane].device_ptr; copy_params.dstDevice = convert->fallback_buffer[plane].device_ptr;
copy_params.WidthInBytes = GST_VIDEO_INFO_COMP_WIDTH (info, plane) copy_params.WidthInBytes = GST_VIDEO_FRAME_COMP_WIDTH (src_frame, plane)
* GST_VIDEO_INFO_COMP_PSTRIDE (info, plane); * GST_VIDEO_FRAME_COMP_PSTRIDE (src_frame, plane);
copy_params.Height = GST_VIDEO_INFO_COMP_HEIGHT (info, plane); copy_params.Height = GST_VIDEO_FRAME_COMP_HEIGHT (src_frame, plane);
cuda_ret = CuMemcpy2DAsync (&copy_params, cuda_stream); cuda_ret = CuMemcpy2DAsync (&copy_params, cuda_stream);
if (!gst_cuda_result (cuda_ret)) { if (!gst_cuda_result (cuda_ret)) {
@ -1079,27 +1039,26 @@ convert_create_texture (GstCudaConverter * convert, const GstCudaMemory * src,
mode = CU_TR_FILTER_MODE_LINEAR; mode = CU_TR_FILTER_MODE_LINEAR;
return convert_create_texture_unchecked (src_ptr, return convert_create_texture_unchecked (src_ptr,
GST_VIDEO_INFO_COMP_WIDTH (info, plane), GST_VIDEO_FRAME_COMP_WIDTH (src_frame, plane),
GST_VIDEO_INFO_COMP_HEIGHT (info, plane), channels, stride, format, mode, GST_VIDEO_FRAME_COMP_HEIGHT (src_frame, plane), channels, stride, format,
cuda_stream); mode, cuda_stream);
} }
/* main conversion function for YUV to YUV conversion */ /* main conversion function for YUV to YUV conversion */
static gboolean static gboolean
convert_YUV_TO_YUV (GstCudaConverter * convert, convert_YUV_TO_YUV (GstCudaConverter * convert, GstVideoFrame * src_frame,
const GstCudaMemory * src, GstVideoInfo * in_info, GstCudaMemory * dst, GstVideoFrame * dst_frame, CUstream cuda_stream)
GstVideoInfo * out_info, CUstream cuda_stream)
{ {
CUtexObject texture[GST_VIDEO_MAX_PLANES] = { 0, }; CUtexObject texture[GST_VIDEO_MAX_PLANES] = { 0, };
CUresult cuda_ret; CUresult cuda_ret;
gboolean ret = FALSE; gboolean ret = FALSE;
CUdeviceptr dst_ptr[GST_VIDEO_MAX_PLANES] = { 0, }; CUdeviceptr dst_ptr[GST_VIDEO_MAX_PLANES] = { 0, };
gint dst_stride; gint dst_stride, dst_uv_stride;
gint width, height; gint width, height;
gint i; gint i;
gpointer kernel_args[] = { &texture[0], &texture[1], &texture[2], gpointer kernel_args[] = { &texture[0], &texture[1], &texture[2],
&dst_ptr[0], &dst_ptr[1], &dst_ptr[2], &dst_stride &dst_ptr[0], &dst_ptr[1], &dst_ptr[2], &dst_stride, &dst_uv_stride
}; };
/* conversion step /* conversion step
@ -1110,21 +1069,23 @@ convert_YUV_TO_YUV (GstCudaConverter * convert,
*/ */
/* map CUDA device memory to CUDA texture object */ /* map CUDA device memory to CUDA texture object */
for (i = 0; i < GST_VIDEO_INFO_N_PLANES (in_info); i++) { for (i = 0; i < GST_VIDEO_FRAME_N_PLANES (src_frame); i++) {
texture[i] = convert_create_texture (convert, src, in_info, i, cuda_stream); texture[i] = convert_create_texture (convert, src_frame, i, cuda_stream);
if (!texture[i]) { if (!texture[i]) {
GST_ERROR ("couldn't create texture for %d th plane", i); GST_ERROR ("couldn't create texture for %d th plane", i);
goto done; goto done;
} }
} }
for (i = 0; i < GST_VIDEO_INFO_N_PLANES (out_info); i++) for (i = 0; i < GST_VIDEO_FRAME_N_PLANES (dst_frame); i++) {
dst_ptr[i] = dst->data + dst->offset[i]; dst_ptr[i] = (CUdeviceptr) GST_VIDEO_FRAME_PLANE_DATA (dst_frame, i);
}
dst_stride = dst->stride; dst_stride = GST_VIDEO_FRAME_PLANE_STRIDE (dst_frame, 0);
dst_uv_stride = GST_VIDEO_FRAME_PLANE_STRIDE (dst_frame, 1);
width = GST_VIDEO_INFO_WIDTH (out_info); width = GST_VIDEO_FRAME_WIDTH (dst_frame);
height = GST_VIDEO_INFO_HEIGHT (out_info); height = GST_VIDEO_FRAME_HEIGHT (dst_frame);
cuda_ret = cuda_ret =
CuLaunchKernel (convert->kernel_func[0], DIV_UP (width, CUDA_BLOCK_X), CuLaunchKernel (convert->kernel_func[0], DIV_UP (width, CUDA_BLOCK_X),
@ -1140,7 +1101,7 @@ convert_YUV_TO_YUV (GstCudaConverter * convert,
gst_cuda_result (CuStreamSynchronize (cuda_stream)); gst_cuda_result (CuStreamSynchronize (cuda_stream));
done: done:
for (i = 0; i < GST_VIDEO_INFO_N_PLANES (in_info); i++) { for (i = 0; i < GST_VIDEO_FRAME_N_PLANES (src_frame); i++) {
if (texture[i]) if (texture[i])
gst_cuda_result (CuTexObjectDestroy (texture[i])); gst_cuda_result (CuTexObjectDestroy (texture[i]));
} }
@ -1150,9 +1111,8 @@ done:
/* main conversion function for YUV to RGB conversion */ /* main conversion function for YUV to RGB conversion */
static gboolean static gboolean
convert_YUV_TO_RGB (GstCudaConverter * convert, convert_YUV_TO_RGB (GstCudaConverter * convert, GstVideoFrame * src_frame,
const GstCudaMemory * src, GstVideoInfo * in_info, GstCudaMemory * dst, GstVideoFrame * dst_frame, CUstream cuda_stream)
GstVideoInfo * out_info, CUstream cuda_stream)
{ {
CUtexObject texture[GST_VIDEO_MAX_PLANES] = { 0, }; CUtexObject texture[GST_VIDEO_MAX_PLANES] = { 0, };
CUresult cuda_ret; CUresult cuda_ret;
@ -1174,19 +1134,19 @@ convert_YUV_TO_RGB (GstCudaConverter * convert,
*/ */
/* map CUDA device memory to CUDA texture object */ /* map CUDA device memory to CUDA texture object */
for (i = 0; i < GST_VIDEO_INFO_N_PLANES (in_info); i++) { for (i = 0; i < GST_VIDEO_FRAME_N_PLANES (src_frame); i++) {
texture[i] = convert_create_texture (convert, src, in_info, i, cuda_stream); texture[i] = convert_create_texture (convert, src_frame, i, cuda_stream);
if (!texture[i]) { if (!texture[i]) {
GST_ERROR ("couldn't create texture for %d th plane", i); GST_ERROR ("couldn't create texture for %d th plane", i);
goto done; goto done;
} }
} }
dstRGB = dst->data; dstRGB = (CUdeviceptr) GST_VIDEO_FRAME_PLANE_DATA (dst_frame, 0);
dst_stride = dst->stride; dst_stride = GST_VIDEO_FRAME_PLANE_STRIDE (dst_frame, 0);
width = GST_VIDEO_INFO_WIDTH (out_info); width = GST_VIDEO_FRAME_WIDTH (dst_frame);
height = GST_VIDEO_INFO_HEIGHT (out_info); height = GST_VIDEO_FRAME_HEIGHT (dst_frame);
cuda_ret = cuda_ret =
CuLaunchKernel (convert->kernel_func[0], DIV_UP (width, CUDA_BLOCK_X), CuLaunchKernel (convert->kernel_func[0], DIV_UP (width, CUDA_BLOCK_X),
@ -1202,7 +1162,7 @@ convert_YUV_TO_RGB (GstCudaConverter * convert,
gst_cuda_result (CuStreamSynchronize (cuda_stream)); gst_cuda_result (CuStreamSynchronize (cuda_stream));
done: done:
for (i = 0; i < GST_VIDEO_INFO_N_PLANES (in_info); i++) { for (i = 0; i < GST_VIDEO_FRAME_N_PLANES (src_frame); i++) {
if (texture[i]) if (texture[i])
gst_cuda_result (CuTexObjectDestroy (texture[i])); gst_cuda_result (CuTexObjectDestroy (texture[i]));
} }
@ -1212,7 +1172,7 @@ done:
static gboolean static gboolean
convert_UNPACK_RGB (GstCudaConverter * convert, CUfunction kernel_func, convert_UNPACK_RGB (GstCudaConverter * convert, CUfunction kernel_func,
CUstream cuda_stream, const GstCudaMemory * src, GstVideoInfo * in_info, CUstream cuda_stream, GstVideoFrame * src_frame,
CUdeviceptr dst, gint dst_stride, GstCudaRGBOrder * rgb_order) CUdeviceptr dst, gint dst_stride, GstCudaRGBOrder * rgb_order)
{ {
CUdeviceptr srcRGB = 0; CUdeviceptr srcRGB = 0;
@ -1227,12 +1187,12 @@ convert_UNPACK_RGB (GstCudaConverter * convert, CUfunction kernel_func,
&convert->in_rgb_order.B, &convert->in_rgb_order.A, &convert->in_rgb_order.B, &convert->in_rgb_order.A,
}; };
srcRGB = src->data; srcRGB = (CUdeviceptr) GST_VIDEO_FRAME_PLANE_DATA (src_frame, 0);
src_stride = src->stride; src_stride = GST_VIDEO_FRAME_PLANE_STRIDE (src_frame, 0);
width = GST_VIDEO_INFO_WIDTH (in_info); width = GST_VIDEO_FRAME_WIDTH (src_frame);
height = GST_VIDEO_INFO_HEIGHT (in_info); height = GST_VIDEO_FRAME_HEIGHT (src_frame);
src_pstride = GST_VIDEO_INFO_COMP_PSTRIDE (in_info, 0); src_pstride = GST_VIDEO_FRAME_COMP_PSTRIDE (src_frame, 0);
cuda_ret = cuda_ret =
CuLaunchKernel (kernel_func, DIV_UP (width, CUDA_BLOCK_X), CuLaunchKernel (kernel_func, DIV_UP (width, CUDA_BLOCK_X),
@ -1274,9 +1234,8 @@ convert_TO_Y444 (GstCudaConverter * convert, CUfunction kernel_func,
/* main conversion function for RGB to YUV conversion */ /* main conversion function for RGB to YUV conversion */
static gboolean static gboolean
convert_RGB_TO_YUV (GstCudaConverter * convert, convert_RGB_TO_YUV (GstCudaConverter * convert, GstVideoFrame * src_frame,
const GstCudaMemory * src, GstVideoInfo * in_info, GstCudaMemory * dst, GstVideoFrame * dst_frame, CUstream cuda_stream)
GstVideoInfo * out_info, CUstream cuda_stream)
{ {
CUtexObject texture = 0; CUtexObject texture = 0;
CUtexObject yuv_texture[3] = { 0, }; CUtexObject yuv_texture[3] = { 0, };
@ -1285,7 +1244,7 @@ convert_RGB_TO_YUV (GstCudaConverter * convert,
gboolean ret = FALSE; gboolean ret = FALSE;
gint in_width, in_height; gint in_width, in_height;
gint out_width, out_height; gint out_width, out_height;
gint dst_stride; gint dst_stride, dst_uv_stride;
CUarray_format format = CU_AD_FORMAT_UNSIGNED_INT8; CUarray_format format = CU_AD_FORMAT_UNSIGNED_INT8;
CUfilter_mode mode = CU_TR_FILTER_MODE_POINT; CUfilter_mode mode = CU_TR_FILTER_MODE_POINT;
gint pstride = 1; gint pstride = 1;
@ -1293,7 +1252,7 @@ convert_RGB_TO_YUV (GstCudaConverter * convert,
gint i; gint i;
gpointer kernel_args[] = { &yuv_texture[0], &yuv_texture[1], &yuv_texture[2], gpointer kernel_args[] = { &yuv_texture[0], &yuv_texture[1], &yuv_texture[2],
&dst_ptr[0], &dst_ptr[1], &dst_ptr[2], &dst_stride &dst_ptr[0], &dst_ptr[1], &dst_ptr[2], &dst_stride, &dst_uv_stride
}; };
/* conversion step /* conversion step
@ -1304,21 +1263,22 @@ convert_RGB_TO_YUV (GstCudaConverter * convert,
* the CUDA kernel function * the CUDA kernel function
*/ */
if (!convert_UNPACK_RGB (convert, convert->kernel_func[0], cuda_stream, if (!convert_UNPACK_RGB (convert, convert->kernel_func[0], cuda_stream,
src, in_info, convert->unpack_surface.device_ptr, src_frame, convert->unpack_surface.device_ptr,
convert->unpack_surface.cuda_stride, &convert->in_rgb_order)) { convert->unpack_surface.cuda_stride, &convert->in_rgb_order)) {
GST_ERROR ("could not unpack input rgb"); GST_ERROR ("could not unpack input rgb");
goto done; goto done;
} }
in_width = GST_VIDEO_INFO_WIDTH (in_info); in_width = GST_VIDEO_FRAME_WIDTH (src_frame);
in_height = GST_VIDEO_INFO_HEIGHT (in_info); in_height = GST_VIDEO_FRAME_HEIGHT (src_frame);
out_width = GST_VIDEO_INFO_WIDTH (out_info); out_width = GST_VIDEO_FRAME_WIDTH (dst_frame);
out_height = GST_VIDEO_INFO_HEIGHT (out_info); out_height = GST_VIDEO_FRAME_HEIGHT (dst_frame);
dst_stride = dst->stride; dst_stride = GST_VIDEO_FRAME_PLANE_STRIDE (dst_frame, 0);
dst_uv_stride = GST_VIDEO_FRAME_PLANE_STRIDE (dst_frame, 1);
if (GST_VIDEO_INFO_COMP_DEPTH (in_info, 0) > 8) { if (GST_VIDEO_FRAME_COMP_DEPTH (src_frame, 0) > 8) {
pstride = 2; pstride = 2;
bitdepth = 16; bitdepth = 16;
format = CU_AD_FORMAT_UNSIGNED_INT16; format = CU_AD_FORMAT_UNSIGNED_INT16;
@ -1365,8 +1325,8 @@ convert_RGB_TO_YUV (GstCudaConverter * convert,
} }
} }
for (i = 0; i < GST_VIDEO_INFO_N_PLANES (out_info); i++) for (i = 0; i < GST_VIDEO_FRAME_N_PLANES (dst_frame); i++)
dst_ptr[i] = dst->data + dst->offset[i]; dst_ptr[i] = (CUdeviceptr) GST_VIDEO_FRAME_PLANE_DATA (dst_frame, i);
cuda_ret = cuda_ret =
CuLaunchKernel (convert->kernel_func[2], DIV_UP (out_width, CUDA_BLOCK_X), CuLaunchKernel (convert->kernel_func[2], DIV_UP (out_width, CUDA_BLOCK_X),
@ -1394,9 +1354,8 @@ done:
/* main conversion function for RGB to RGB conversion */ /* main conversion function for RGB to RGB conversion */
static gboolean static gboolean
convert_RGB_TO_RGB (GstCudaConverter * convert, convert_RGB_TO_RGB (GstCudaConverter * convert, GstVideoFrame * src_frame,
const GstCudaMemory * src, GstVideoInfo * in_info, GstCudaMemory * dst, GstVideoFrame * dst_frame, CUstream cuda_stream)
GstVideoInfo * out_info, CUstream cuda_stream)
{ {
CUtexObject texture = 0; CUtexObject texture = 0;
CUresult cuda_ret; CUresult cuda_ret;
@ -1418,23 +1377,23 @@ convert_RGB_TO_RGB (GstCudaConverter * convert,
*/ */
if (!convert_UNPACK_RGB (convert, convert->kernel_func[0], cuda_stream, if (!convert_UNPACK_RGB (convert, convert->kernel_func[0], cuda_stream,
src, in_info, convert->unpack_surface.device_ptr, src_frame, convert->unpack_surface.device_ptr,
convert->unpack_surface.cuda_stride, &convert->in_rgb_order)) { convert->unpack_surface.cuda_stride, &convert->in_rgb_order)) {
GST_ERROR ("could not unpack input rgb"); GST_ERROR ("could not unpack input rgb");
goto done; goto done;
} }
in_width = GST_VIDEO_INFO_WIDTH (in_info); in_width = GST_VIDEO_FRAME_WIDTH (src_frame);
in_height = GST_VIDEO_INFO_HEIGHT (in_info); in_height = GST_VIDEO_FRAME_HEIGHT (src_frame);
out_width = GST_VIDEO_INFO_WIDTH (out_info); out_width = GST_VIDEO_FRAME_WIDTH (dst_frame);
out_height = GST_VIDEO_INFO_HEIGHT (out_info); out_height = GST_VIDEO_FRAME_HEIGHT (dst_frame);
dstRGB = dst->data; dstRGB = (CUdeviceptr) GST_VIDEO_FRAME_PLANE_DATA (dst_frame, 0);
dst_stride = dst->stride; dst_stride = GST_VIDEO_FRAME_PLANE_STRIDE (dst_frame, 0);
if (GST_VIDEO_INFO_COMP_DEPTH (in_info, 0) > 8) if (GST_VIDEO_FRAME_COMP_DEPTH (src_frame, 0) > 8)
format = CU_AD_FORMAT_UNSIGNED_INT16; format = CU_AD_FORMAT_UNSIGNED_INT16;
/* Use h/w linear interpolation only when resize is required. /* Use h/w linear interpolation only when resize is required.

View file

@ -34,21 +34,11 @@ GstCudaConverter * gst_cuda_converter_new (GstVideoInfo * in_info,
void gst_cuda_converter_free (GstCudaConverter * convert); void gst_cuda_converter_free (GstCudaConverter * convert);
gboolean gst_cuda_converter_frame (GstCudaConverter * convert, gboolean gst_cuda_converter_convert_frame (GstCudaConverter * convert,
const GstCudaMemory * src, GstVideoFrame * src_frame,
GstVideoInfo * in_info, GstVideoFrame * dst_frame,
GstCudaMemory * dst,
GstVideoInfo * out_info,
CUstream cuda_stream); CUstream cuda_stream);
gboolean gst_cuda_converter_frame_unlocked (GstCudaConverter * convert,
const GstCudaMemory * src,
GstVideoInfo * in_info,
GstCudaMemory * dst,
GstVideoInfo * out_info,
CUstream cuda_stream);
G_END_DECLS G_END_DECLS
#endif /* __GST_CUDA_CONVERTER_H__ */ #endif /* __GST_CUDA_CONVERTER_H__ */

View file

@ -168,47 +168,30 @@ gst_cuda_base_filter_propose_allocation (GstBaseTransform * trans,
if (gst_query_get_n_allocation_pools (query) == 0) { if (gst_query_get_n_allocation_pools (query) == 0) {
GstStructure *config; GstStructure *config;
GstVideoAlignment align;
GstAllocationParams params = { 0, 31, 0, 0, };
GstAllocator *allocator = NULL;
gint i;
pool = gst_cuda_buffer_pool_new (ctrans->context); pool = gst_cuda_buffer_pool_new (ctrans->context);
config = gst_buffer_pool_get_config (pool); config = gst_buffer_pool_get_config (pool);
gst_video_alignment_reset (&align);
for (i = 0; i < GST_VIDEO_INFO_N_PLANES (&info); i++) {
align.stride_align[i] = 31;
}
gst_video_info_align (&info, &align);
gst_buffer_pool_config_add_option (config, gst_buffer_pool_config_add_option (config,
GST_BUFFER_POOL_OPTION_VIDEO_META); GST_BUFFER_POOL_OPTION_VIDEO_META);
gst_buffer_pool_config_add_option (config,
GST_BUFFER_POOL_OPTION_VIDEO_ALIGNMENT);
gst_buffer_pool_config_set_video_alignment (config, &align);
size = GST_VIDEO_INFO_SIZE (&info); size = GST_VIDEO_INFO_SIZE (&info);
gst_buffer_pool_config_set_params (config, caps, size, 0, 0); gst_buffer_pool_config_set_params (config, caps, size, 0, 0);
gst_query_add_allocation_meta (query, GST_VIDEO_META_API_TYPE, NULL);
gst_query_add_allocation_pool (query, pool, size, 0, 0);
if (gst_buffer_pool_config_get_allocator (config, &allocator, &params)) {
if (params.align < 31)
params.align = 31;
gst_query_add_allocation_param (query, allocator, &params);
gst_buffer_pool_config_set_allocator (config, allocator, &params);
}
if (!gst_buffer_pool_set_config (pool, config)) { if (!gst_buffer_pool_set_config (pool, config)) {
GST_ERROR_OBJECT (ctrans, "failed to set config"); GST_ERROR_OBJECT (ctrans, "failed to set config");
gst_object_unref (pool); gst_object_unref (pool);
return FALSE; return FALSE;
} }
/* Get updated size by cuda buffer pool */
config = gst_buffer_pool_get_config (pool);
gst_buffer_pool_config_get_params (config, NULL, &size, NULL, NULL);
gst_structure_free (config);
gst_query_add_allocation_pool (query, pool, size, 0, 0);
gst_object_unref (pool); gst_object_unref (pool);
} }
@ -265,6 +248,12 @@ gst_cuda_base_filter_decide_allocation (GstBaseTransform * trans,
gst_buffer_pool_config_add_option (config, GST_BUFFER_POOL_OPTION_VIDEO_META); gst_buffer_pool_config_add_option (config, GST_BUFFER_POOL_OPTION_VIDEO_META);
gst_buffer_pool_config_set_params (config, outcaps, size, min, max); gst_buffer_pool_config_set_params (config, outcaps, size, min, max);
gst_buffer_pool_set_config (pool, config); gst_buffer_pool_set_config (pool, config);
/* Get updated size by cuda buffer pool */
config = gst_buffer_pool_get_config (pool);
gst_buffer_pool_config_get_params (config, NULL, &size, NULL, NULL);
gst_structure_free (config);
if (update_pool) if (update_pool)
gst_query_set_nth_allocation_pool (query, 0, pool, size, min, max); gst_query_set_nth_allocation_pool (query, 0, pool, size, min, max);
else else
@ -285,8 +274,6 @@ gst_cuda_base_filter_transform (GstBaseTransform * trans,
GstVideoFrame in_frame, out_frame; GstVideoFrame in_frame, out_frame;
GstFlowReturn ret = GST_FLOW_OK; GstFlowReturn ret = GST_FLOW_OK;
GstMemory *mem; GstMemory *mem;
GstCudaMemory *in_cuda_mem = NULL;
GstCudaMemory *out_cuda_mem = NULL;
if (gst_buffer_n_memory (inbuf) != 1) { if (gst_buffer_n_memory (inbuf) != 1) {
GST_ERROR_OBJECT (self, "Invalid input buffer"); GST_ERROR_OBJECT (self, "Invalid input buffer");
@ -299,8 +286,6 @@ gst_cuda_base_filter_transform (GstBaseTransform * trans,
return GST_FLOW_ERROR; return GST_FLOW_ERROR;
} }
in_cuda_mem = GST_CUDA_MEMORY_CAST (mem);
if (gst_buffer_n_memory (outbuf) != 1) { if (gst_buffer_n_memory (outbuf) != 1) {
GST_ERROR_OBJECT (self, "Invalid output buffer"); GST_ERROR_OBJECT (self, "Invalid output buffer");
return GST_FLOW_ERROR; return GST_FLOW_ERROR;
@ -312,8 +297,6 @@ gst_cuda_base_filter_transform (GstBaseTransform * trans,
return GST_FLOW_ERROR; return GST_FLOW_ERROR;
} }
out_cuda_mem = GST_CUDA_MEMORY_CAST (mem);
if (!gst_video_frame_map (&in_frame, &ctrans->in_info, inbuf, if (!gst_video_frame_map (&in_frame, &ctrans->in_info, inbuf,
GST_MAP_READ | GST_MAP_CUDA)) { GST_MAP_READ | GST_MAP_CUDA)) {
GST_ERROR_OBJECT (self, "Failed to map input buffer"); GST_ERROR_OBJECT (self, "Failed to map input buffer");
@ -327,9 +310,8 @@ gst_cuda_base_filter_transform (GstBaseTransform * trans,
return GST_FLOW_ERROR; return GST_FLOW_ERROR;
} }
if (!gst_cuda_converter_frame (self->converter, if (!gst_cuda_converter_convert_frame (self->converter, &in_frame, &out_frame,
in_cuda_mem, &ctrans->in_info, ctrans->cuda_stream)) {
out_cuda_mem, &ctrans->out_info, ctrans->cuda_stream)) {
GST_ERROR_OBJECT (self, "Failed to convert frame"); GST_ERROR_OBJECT (self, "Failed to convert frame");
ret = GST_FLOW_ERROR; ret = GST_FLOW_ERROR;
} }

View file

@ -30,11 +30,8 @@ GST_DEBUG_CATEGORY_STATIC (gst_cuda_buffer_pool_debug);
struct _GstCudaBufferPoolPrivate struct _GstCudaBufferPoolPrivate
{ {
GstAllocator *allocator; GstCudaAllocator *allocator;
GstVideoInfo info; GstVideoInfo info;
gboolean add_videometa;
gboolean need_alignment;
GstCudaAllocationParams params;
}; };
#define gst_cuda_buffer_pool_parent_class parent_class #define gst_cuda_buffer_pool_parent_class parent_class
@ -44,8 +41,7 @@ G_DEFINE_TYPE_WITH_PRIVATE (GstCudaBufferPool, gst_cuda_buffer_pool,
static const gchar ** static const gchar **
gst_cuda_buffer_pool_get_options (GstBufferPool * pool) gst_cuda_buffer_pool_get_options (GstBufferPool * pool)
{ {
static const gchar *options[] = { GST_BUFFER_POOL_OPTION_VIDEO_META, static const gchar *options[] = { GST_BUFFER_POOL_OPTION_VIDEO_META, NULL
GST_BUFFER_POOL_OPTION_VIDEO_ALIGNMENT, NULL
}; };
return options; return options;
@ -54,153 +50,86 @@ gst_cuda_buffer_pool_get_options (GstBufferPool * pool)
static gboolean static gboolean
gst_cuda_buffer_pool_set_config (GstBufferPool * pool, GstStructure * config) gst_cuda_buffer_pool_set_config (GstBufferPool * pool, GstStructure * config)
{ {
GstCudaBufferPool *cuda_pool = GST_CUDA_BUFFER_POOL_CAST (pool); GstCudaBufferPool *self = GST_CUDA_BUFFER_POOL (pool);
GstCudaBufferPoolPrivate *priv = cuda_pool->priv; GstCudaBufferPoolPrivate *priv = self->priv;
GstCaps *caps = NULL; GstCaps *caps = NULL;
guint size, min_buffers, max_buffers; guint size, min_buffers, max_buffers;
guint max_align, n; GstVideoInfo info;
GstAllocator *allocator = NULL; GstMemory *mem;
GstAllocationParams *params = (GstAllocationParams *) & priv->params; GstCudaMemory *cmem;
GstVideoInfo *info = &priv->params.info;
if (!gst_buffer_pool_config_get_params (config, &caps, &size, &min_buffers, if (!gst_buffer_pool_config_get_params (config, &caps, &size, &min_buffers,
&max_buffers)) &max_buffers)) {
goto wrong_config; GST_WARNING_OBJECT (self, "invalid config");
if (caps == NULL)
goto no_caps;
if (!gst_buffer_pool_config_get_allocator (config, &allocator, params))
goto wrong_config;
/* now parse the caps from the config */
if (!gst_video_info_from_caps (info, caps))
goto wrong_caps;
GST_LOG_OBJECT (pool, "%dx%d, caps %" GST_PTR_FORMAT,
GST_VIDEO_INFO_WIDTH (info), GST_VIDEO_INFO_HEIGHT (info), caps);
gst_clear_object (&priv->allocator);
if (allocator) {
if (!GST_IS_CUDA_ALLOCATOR (allocator)) {
goto wrong_allocator;
} else {
priv->allocator = gst_object_ref (allocator);
}
} else {
allocator = priv->allocator = gst_cuda_allocator_new (cuda_pool->context);
if (G_UNLIKELY (priv->allocator == NULL))
goto no_allocator;
}
priv->add_videometa = gst_buffer_pool_config_has_option (config,
GST_BUFFER_POOL_OPTION_VIDEO_META);
priv->need_alignment = gst_buffer_pool_config_has_option (config,
GST_BUFFER_POOL_OPTION_VIDEO_ALIGNMENT);
max_align = params->align;
/* do memory align */
if (priv->need_alignment && priv->add_videometa) {
GstVideoAlignment valign;
gst_buffer_pool_config_get_video_alignment (config, &valign);
for (n = 0; n < GST_VIDEO_MAX_PLANES; ++n)
max_align |= valign.stride_align[n];
for (n = 0; n < GST_VIDEO_MAX_PLANES; ++n)
valign.stride_align[n] = max_align;
if (!gst_video_info_align (info, &valign))
goto failed_to_align;
gst_buffer_pool_config_set_video_alignment (config, &valign);
}
if (params->align < max_align) {
GST_WARNING_OBJECT (pool, "allocation params alignment %u is smaller "
"than the max specified video stride alignment %u, fixing",
(guint) params->align, max_align);
params->align = max_align;
gst_buffer_pool_config_set_allocator (config, allocator, params);
}
gst_buffer_pool_config_set_params (config, caps, GST_VIDEO_INFO_SIZE (info),
min_buffers, max_buffers);
return GST_BUFFER_POOL_CLASS (parent_class)->set_config (pool, config);
/* ERRORS */
wrong_config:
{
GST_WARNING_OBJECT (pool, "invalid config");
return FALSE; return FALSE;
} }
no_caps:
{ if (!caps) {
GST_WARNING_OBJECT (pool, "no caps in config"); GST_WARNING_OBJECT (pool, "no caps in config");
return FALSE; return FALSE;
} }
wrong_caps:
{ if (!gst_video_info_from_caps (&info, caps)) {
GST_WARNING_OBJECT (pool, GST_WARNING_OBJECT (self, "Failed to convert caps to video-info");
"failed getting geometry from caps %" GST_PTR_FORMAT, caps);
return FALSE; return FALSE;
} }
no_allocator:
{ gst_clear_object (&priv->allocator);
GST_WARNING_OBJECT (pool, "Could not create new CUDA allocator"); priv->allocator = (GstCudaAllocator *)
gst_allocator_find (GST_CUDA_MEMORY_TYPE_NAME);
if (!priv->allocator) {
GST_WARNING_OBJECT (self, "CudaAllocator is unavailable");
return FALSE; return FALSE;
} }
wrong_allocator:
{ mem = gst_cuda_allocator_alloc (priv->allocator, self->context, &info);
GST_WARNING_OBJECT (pool, "Incorrect allocator type for this pool"); if (!mem) {
return FALSE; GST_WARNING_OBJECT (self, "Failed to allocate memory");
}
failed_to_align:
{
GST_WARNING_OBJECT (pool, "Failed to align");
return FALSE; return FALSE;
} }
cmem = GST_CUDA_MEMORY_CAST (mem);
gst_buffer_pool_config_set_params (config, caps,
GST_VIDEO_INFO_SIZE (&cmem->info), min_buffers, max_buffers);
priv->info = info;
gst_memory_unref (mem);
return GST_BUFFER_POOL_CLASS (parent_class)->set_config (pool, config);
} }
static GstFlowReturn static GstFlowReturn
gst_cuda_buffer_pool_alloc (GstBufferPool * pool, GstBuffer ** buffer, gst_cuda_buffer_pool_alloc (GstBufferPool * pool, GstBuffer ** buffer,
GstBufferPoolAcquireParams * params) GstBufferPoolAcquireParams * params)
{ {
GstCudaBufferPool *cuda_pool = GST_CUDA_BUFFER_POOL_CAST (pool); GstCudaBufferPool *self = GST_CUDA_BUFFER_POOL_CAST (pool);
GstCudaBufferPoolPrivate *priv = cuda_pool->priv; GstCudaBufferPoolPrivate *priv = self->priv;
GstVideoInfo *info; GstVideoInfo *info = &priv->info;
GstBuffer *cuda; GstBuffer *buf;
GstMemory *mem; GstMemory *mem;
GstCudaMemory *cmem;
info = &priv->params.info; mem = gst_cuda_allocator_alloc (priv->allocator, self->context, &priv->info);
if (!mem) {
cuda = gst_buffer_new ();
mem = gst_cuda_allocator_alloc (GST_ALLOCATOR_CAST (priv->allocator),
GST_VIDEO_INFO_SIZE (info), &priv->params);
if (mem == NULL) {
gst_buffer_unref (cuda);
GST_WARNING_OBJECT (pool, "Cannot create CUDA memory"); GST_WARNING_OBJECT (pool, "Cannot create CUDA memory");
return GST_FLOW_ERROR; return GST_FLOW_ERROR;
} }
gst_buffer_append_memory (cuda, mem);
if (priv->add_videometa) { cmem = GST_CUDA_MEMORY_CAST (mem);
GST_DEBUG_OBJECT (pool, "adding GstVideoMeta");
gst_buffer_add_video_meta_full (cuda, GST_VIDEO_FRAME_FLAG_NONE,
GST_VIDEO_INFO_FORMAT (info), GST_VIDEO_INFO_WIDTH (info),
GST_VIDEO_INFO_HEIGHT (info), GST_VIDEO_INFO_N_PLANES (info),
info->offset, info->stride);
}
*buffer = cuda; buf = gst_buffer_new ();
gst_buffer_append_memory (buf, mem);
GST_DEBUG_OBJECT (pool, "adding GstVideoMeta");
gst_buffer_add_video_meta_full (buf, GST_VIDEO_FRAME_FLAG_NONE,
GST_VIDEO_INFO_FORMAT (info), GST_VIDEO_INFO_WIDTH (info),
GST_VIDEO_INFO_HEIGHT (info), GST_VIDEO_INFO_N_PLANES (info),
cmem->info.offset, cmem->info.stride);
*buffer = buf;
return GST_FLOW_OK; return GST_FLOW_OK;
} }
@ -208,44 +137,41 @@ gst_cuda_buffer_pool_alloc (GstBufferPool * pool, GstBuffer ** buffer,
GstBufferPool * GstBufferPool *
gst_cuda_buffer_pool_new (GstCudaContext * context) gst_cuda_buffer_pool_new (GstCudaContext * context)
{ {
GstCudaBufferPool *pool; GstCudaBufferPool *self;
pool = g_object_new (GST_TYPE_CUDA_BUFFER_POOL, NULL); g_return_val_if_fail (GST_IS_CUDA_CONTEXT (context), NULL);
gst_object_ref_sink (pool);
pool->context = gst_object_ref (context); self = g_object_new (GST_TYPE_CUDA_BUFFER_POOL, NULL);
gst_object_ref_sink (self);
GST_LOG_OBJECT (pool, "new CUDA buffer pool %p", pool); self->context = gst_object_ref (context);
return GST_BUFFER_POOL_CAST (pool); return GST_BUFFER_POOL_CAST (self);
} }
static void static void
gst_cuda_buffer_pool_dispose (GObject * object) gst_cuda_buffer_pool_dispose (GObject * object)
{ {
GstCudaBufferPool *pool = GST_CUDA_BUFFER_POOL_CAST (object); GstCudaBufferPool *self = GST_CUDA_BUFFER_POOL_CAST (object);
GstCudaBufferPoolPrivate *priv = pool->priv; GstCudaBufferPoolPrivate *priv = self->priv;
GST_LOG_OBJECT (pool, "finalize CUDA buffer pool %p", pool);
gst_clear_object (&priv->allocator); gst_clear_object (&priv->allocator);
gst_clear_object (&pool->context); gst_clear_object (&self->context);
G_OBJECT_CLASS (parent_class)->dispose (object); G_OBJECT_CLASS (parent_class)->dispose (object);
} }
static void static void
gst_cuda_buffer_pool_class_init (GstCudaBufferPoolClass * klass) gst_cuda_buffer_pool_class_init (GstCudaBufferPoolClass * klass)
{ {
GObjectClass *gobject_class = (GObjectClass *) klass; GObjectClass *gobject_class = (GObjectClass *) klass;
GstBufferPoolClass *gstbufferpool_class = (GstBufferPoolClass *) klass; GstBufferPoolClass *bufferpool_class = (GstBufferPoolClass *) klass;
gobject_class->dispose = gst_cuda_buffer_pool_dispose; gobject_class->dispose = gst_cuda_buffer_pool_dispose;
gstbufferpool_class->get_options = gst_cuda_buffer_pool_get_options; bufferpool_class->get_options = gst_cuda_buffer_pool_get_options;
gstbufferpool_class->set_config = gst_cuda_buffer_pool_set_config; bufferpool_class->set_config = gst_cuda_buffer_pool_set_config;
gstbufferpool_class->alloc_buffer = gst_cuda_buffer_pool_alloc; bufferpool_class->alloc_buffer = gst_cuda_buffer_pool_alloc;
GST_DEBUG_CATEGORY_INIT (gst_cuda_buffer_pool_debug, "cudabufferpool", 0, GST_DEBUG_CATEGORY_INIT (gst_cuda_buffer_pool_debug, "cudabufferpool", 0,
"CUDA Buffer Pool"); "CUDA Buffer Pool");

View file

@ -180,10 +180,6 @@ gst_cuda_download_propose_allocation (GstBaseTransform * trans,
if (gst_query_get_n_allocation_pools (query) == 0) { if (gst_query_get_n_allocation_pools (query) == 0) {
GstCapsFeatures *features; GstCapsFeatures *features;
GstStructure *config; GstStructure *config;
GstVideoAlignment align;
GstAllocationParams params = { 0, 31, 0, 0, };
GstAllocator *allocator = NULL;
gint i;
features = gst_caps_get_features (caps, 0); features = gst_caps_get_features (caps, 0);
@ -197,38 +193,25 @@ gst_cuda_download_propose_allocation (GstBaseTransform * trans,
config = gst_buffer_pool_get_config (pool); config = gst_buffer_pool_get_config (pool);
gst_video_alignment_reset (&align);
for (i = 0; i < GST_VIDEO_INFO_N_PLANES (&info); i++) {
align.stride_align[i] = 31;
}
gst_video_info_align (&info, &align);
gst_buffer_pool_config_add_option (config, gst_buffer_pool_config_add_option (config,
GST_BUFFER_POOL_OPTION_VIDEO_META); GST_BUFFER_POOL_OPTION_VIDEO_META);
gst_buffer_pool_config_add_option (config,
GST_BUFFER_POOL_OPTION_VIDEO_ALIGNMENT);
gst_buffer_pool_config_set_video_alignment (config, &align);
size = GST_VIDEO_INFO_SIZE (&info); size = GST_VIDEO_INFO_SIZE (&info);
gst_buffer_pool_config_set_params (config, caps, size, 0, 0); gst_buffer_pool_config_set_params (config, caps, size, 0, 0);
gst_query_add_allocation_meta (query, GST_VIDEO_META_API_TYPE, NULL);
gst_query_add_allocation_pool (query, pool, size, 0, 0);
if (gst_buffer_pool_config_get_allocator (config, &allocator, &params)) {
if (params.align < 31)
params.align = 31;
gst_query_add_allocation_param (query, allocator, &params);
gst_buffer_pool_config_set_allocator (config, allocator, &params);
}
if (!gst_buffer_pool_set_config (pool, config)) { if (!gst_buffer_pool_set_config (pool, config)) {
GST_ERROR_OBJECT (ctrans, "failed to set config"); GST_ERROR_OBJECT (ctrans, "failed to set config");
gst_object_unref (pool); gst_object_unref (pool);
return FALSE; return FALSE;
} }
/* Get updated size by cuda buffer pool */
config = gst_buffer_pool_get_config (pool);
gst_buffer_pool_config_get_params (config, NULL, &size, NULL, NULL);
gst_structure_free (config);
gst_query_add_allocation_pool (query, pool, size, 0, 0);
gst_object_unref (pool); gst_object_unref (pool);
} }
@ -295,6 +278,12 @@ gst_cuda_download_decide_allocation (GstBaseTransform * trans, GstQuery * query)
gst_buffer_pool_config_add_option (config, GST_BUFFER_POOL_OPTION_VIDEO_META); gst_buffer_pool_config_add_option (config, GST_BUFFER_POOL_OPTION_VIDEO_META);
gst_buffer_pool_config_set_params (config, outcaps, size, min, max); gst_buffer_pool_config_set_params (config, outcaps, size, min, max);
gst_buffer_pool_set_config (pool, config); gst_buffer_pool_set_config (pool, config);
/* Get updated size by cuda buffer pool */
config = gst_buffer_pool_get_config (pool);
gst_buffer_pool_config_get_params (config, NULL, &size, NULL, NULL);
gst_structure_free (config);
if (update_pool) if (update_pool)
gst_query_set_nth_allocation_pool (query, 0, pool, size, min, max); gst_query_set_nth_allocation_pool (query, 0, pool, size, min, max);
else else

View file

@ -26,20 +26,33 @@
#include <string.h> #include <string.h>
GST_DEBUG_CATEGORY_STATIC (cudaallocator_debug); GST_DEBUG_CATEGORY_STATIC (cuda_allocator_debug);
#define GST_CAT_DEFAULT cudaallocator_debug #define GST_CAT_DEFAULT cuda_allocator_debug
GST_DEBUG_CATEGORY_STATIC (GST_CAT_MEMORY);
static GstAllocator *_gst_cuda_allocator = NULL;
struct _GstCudaMemoryPrivate
{
CUdeviceptr data;
void *staging;
/* params used for cuMemAllocPitch */
gsize pitch;
guint width_in_bytes;
guint height;
GMutex lock;
};
#define gst_cuda_allocator_parent_class parent_class #define gst_cuda_allocator_parent_class parent_class
G_DEFINE_TYPE (GstCudaAllocator, gst_cuda_allocator, GST_TYPE_ALLOCATOR); G_DEFINE_TYPE (GstCudaAllocator, gst_cuda_allocator, GST_TYPE_ALLOCATOR);
static void gst_cuda_allocator_dispose (GObject * object);
static void gst_cuda_allocator_free (GstAllocator * allocator, static void gst_cuda_allocator_free (GstAllocator * allocator,
GstMemory * memory); GstMemory * memory);
static gpointer cuda_mem_map (GstCudaMemory * mem, gsize maxsize, static gpointer cuda_mem_map (GstMemory * mem, gsize maxsize,
GstMapFlags flags); GstMapFlags flags);
static void cuda_mem_unmap_full (GstCudaMemory * mem, GstMapInfo * info); static void cuda_mem_unmap_full (GstMemory * mem, GstMapInfo * info);
static GstMemory *cuda_mem_copy (GstMemory * mem, gssize offset, gssize size); static GstMemory *cuda_mem_copy (GstMemory * mem, gssize offset, gssize size);
static GstMemory * static GstMemory *
@ -52,17 +65,13 @@ gst_cuda_allocator_dummy_alloc (GstAllocator * allocator, gsize size,
static void static void
gst_cuda_allocator_class_init (GstCudaAllocatorClass * klass) gst_cuda_allocator_class_init (GstCudaAllocatorClass * klass)
{ {
GObjectClass *gobject_class = G_OBJECT_CLASS (klass);
GstAllocatorClass *allocator_class = GST_ALLOCATOR_CLASS (klass); GstAllocatorClass *allocator_class = GST_ALLOCATOR_CLASS (klass);
gobject_class->dispose = gst_cuda_allocator_dispose;
allocator_class->alloc = GST_DEBUG_FUNCPTR (gst_cuda_allocator_dummy_alloc); allocator_class->alloc = GST_DEBUG_FUNCPTR (gst_cuda_allocator_dummy_alloc);
allocator_class->free = GST_DEBUG_FUNCPTR (gst_cuda_allocator_free); allocator_class->free = GST_DEBUG_FUNCPTR (gst_cuda_allocator_free);
GST_DEBUG_CATEGORY_INIT (cudaallocator_debug, "cudaallocator", 0, GST_DEBUG_CATEGORY_INIT (cuda_allocator_debug, "cudaallocator", 0,
"CUDA Allocator"); "CUDA Allocator");
GST_DEBUG_CATEGORY_GET (GST_CAT_MEMORY, "GST_MEMORY");
} }
static void static void
@ -74,337 +83,273 @@ gst_cuda_allocator_init (GstCudaAllocator * allocator)
alloc->mem_type = GST_CUDA_MEMORY_TYPE_NAME; alloc->mem_type = GST_CUDA_MEMORY_TYPE_NAME;
alloc->mem_map = (GstMemoryMapFunction) cuda_mem_map; alloc->mem_map = cuda_mem_map;
alloc->mem_unmap_full = (GstMemoryUnmapFullFunction) cuda_mem_unmap_full; alloc->mem_unmap_full = cuda_mem_unmap_full;
alloc->mem_copy = (GstMemoryCopyFunction) cuda_mem_copy; alloc->mem_copy = cuda_mem_copy;
GST_OBJECT_FLAG_SET (allocator, GST_ALLOCATOR_FLAG_CUSTOM_ALLOC); GST_OBJECT_FLAG_SET (allocator, GST_ALLOCATOR_FLAG_CUSTOM_ALLOC);
} }
static void static GstMemory *
gst_cuda_allocator_dispose (GObject * object) gst_cuda_allocator_alloc_internal (GstCudaAllocator * self,
GstCudaContext * context, const GstVideoInfo * info,
guint width_in_bytes, guint alloc_height)
{ {
GstCudaAllocator *self = GST_CUDA_ALLOCATOR_CAST (object); GstCudaMemoryPrivate *priv;
GstCudaMemory *mem;
GST_DEBUG_OBJECT (self, "dispose");
gst_clear_object (&self->context);
G_OBJECT_CLASS (parent_class)->dispose (object);
}
GstMemory *
gst_cuda_allocator_alloc (GstAllocator * allocator, gsize size,
GstCudaAllocationParams * params)
{
GstCudaAllocator *self = GST_CUDA_ALLOCATOR_CAST (allocator);
gsize maxsize = size + params->parent.prefix + params->parent.padding;
gsize align = params->parent.align;
gsize offset = params->parent.prefix;
GstMemoryFlags flags = params->parent.flags;
CUdeviceptr data; CUdeviceptr data;
gboolean ret = FALSE; gboolean ret = FALSE;
GstCudaMemory *mem; gsize pitch;
GstVideoInfo *info = &params->info; guint height = GST_VIDEO_INFO_HEIGHT (info);
gint i; GstVideoInfo *alloc_info;
guint width, height;
gsize stride, plane_offset;
if (!gst_cuda_context_push (self->context)) if (!gst_cuda_context_push (context))
return NULL; return NULL;
/* ensure configured alignment */ ret = gst_cuda_result (CuMemAllocPitch (&data, &pitch, width_in_bytes,
align |= gst_memory_alignment; alloc_height, 16));
/* allocate more to compensate for alignment */
maxsize += align;
GST_CAT_DEBUG_OBJECT (GST_CAT_MEMORY, self, "allocate new cuda memory");
width = GST_VIDEO_INFO_COMP_WIDTH (info, 0) *
GST_VIDEO_INFO_COMP_PSTRIDE (info, 0);
height = 0;
for (i = 0; i < GST_VIDEO_INFO_N_PLANES (info); i++)
height += GST_VIDEO_INFO_COMP_HEIGHT (info, i);
ret = gst_cuda_result (CuMemAllocPitch (&data, &stride, width, height, 16));
gst_cuda_context_pop (NULL); gst_cuda_context_pop (NULL);
if (G_UNLIKELY (!ret)) { if (!ret) {
GST_CAT_ERROR_OBJECT (GST_CAT_MEMORY, self, "CUDA allocation failure"); GST_ERROR_OBJECT (self, "Failed to allocate CUDA memory");
return NULL; return NULL;
} }
mem = g_new0 (GstCudaMemory, 1); mem = g_new0 (GstCudaMemory, 1);
g_mutex_init (&mem->lock); mem->priv = priv = g_new0 (GstCudaMemoryPrivate, 1);
mem->data = data;
mem->alloc_params = *params;
mem->stride = stride;
plane_offset = 0; priv->data = data;
for (i = 0; i < GST_VIDEO_INFO_N_PLANES (info); i++) { priv->pitch = pitch;
mem->offset[i] = plane_offset; priv->width_in_bytes = width_in_bytes;
plane_offset += stride * GST_VIDEO_INFO_COMP_HEIGHT (info, i); priv->height = alloc_height;
g_mutex_init (&priv->lock);
mem->context = gst_object_ref (context);
mem->info = *info;
mem->info.size = pitch * alloc_height;
alloc_info = &mem->info;
gst_memory_init (GST_MEMORY_CAST (mem), 0, GST_ALLOCATOR_CAST (self),
NULL, alloc_info->size, 0, 0, alloc_info->size);
switch (GST_VIDEO_INFO_FORMAT (info)) {
case GST_VIDEO_FORMAT_I420:
case GST_VIDEO_FORMAT_YV12:
case GST_VIDEO_FORMAT_I420_10LE:
/* we are wasting space yes, but required so that this memory
* can be used in kernel function */
alloc_info->stride[0] = pitch;
alloc_info->stride[1] = pitch;
alloc_info->stride[2] = pitch;
alloc_info->offset[0] = 0;
alloc_info->offset[1] = alloc_info->stride[0] * height;
alloc_info->offset[2] = alloc_info->offset[1] +
alloc_info->stride[1] * height / 2;
break;
case GST_VIDEO_FORMAT_NV12:
case GST_VIDEO_FORMAT_NV21:
case GST_VIDEO_FORMAT_P010_10LE:
case GST_VIDEO_FORMAT_P016_LE:
alloc_info->stride[0] = pitch;
alloc_info->stride[1] = pitch;
alloc_info->offset[0] = 0;
alloc_info->offset[1] = alloc_info->stride[0] * height;
break;
case GST_VIDEO_FORMAT_Y444:
case GST_VIDEO_FORMAT_Y444_16LE:
alloc_info->stride[0] = pitch;
alloc_info->stride[1] = pitch;
alloc_info->stride[2] = pitch;
alloc_info->offset[0] = 0;
alloc_info->offset[1] = alloc_info->stride[0] * height;
alloc_info->offset[2] = alloc_info->offset[1] * 2;
break;
case GST_VIDEO_FORMAT_BGRA:
case GST_VIDEO_FORMAT_RGBA:
case GST_VIDEO_FORMAT_RGBx:
case GST_VIDEO_FORMAT_BGRx:
case GST_VIDEO_FORMAT_ARGB:
case GST_VIDEO_FORMAT_ABGR:
case GST_VIDEO_FORMAT_RGB:
case GST_VIDEO_FORMAT_BGR:
case GST_VIDEO_FORMAT_BGR10A2_LE:
case GST_VIDEO_FORMAT_RGB10A2_LE:
alloc_info->stride[0] = pitch;
alloc_info->offset[0] = 0;
break;
default:
GST_ERROR_OBJECT (self, "Unexpected format %s",
gst_video_format_to_string (GST_VIDEO_INFO_FORMAT (info)));
g_assert_not_reached ();
gst_memory_unref (GST_MEMORY_CAST (mem));
return NULL;
} }
mem->context = gst_object_ref (self->context);
gst_memory_init (GST_MEMORY_CAST (mem),
flags, GST_ALLOCATOR_CAST (self), NULL, maxsize, align, offset, size);
return GST_MEMORY_CAST (mem); return GST_MEMORY_CAST (mem);
} }
static void static void
gst_cuda_allocator_free (GstAllocator * allocator, GstMemory * memory) gst_cuda_allocator_free (GstAllocator * allocator, GstMemory * memory)
{ {
GstCudaAllocator *self = GST_CUDA_ALLOCATOR_CAST (allocator);
GstCudaMemory *mem = GST_CUDA_MEMORY_CAST (memory); GstCudaMemory *mem = GST_CUDA_MEMORY_CAST (memory);
GstCudaMemoryPrivate *priv = mem->priv;
GST_CAT_DEBUG_OBJECT (GST_CAT_MEMORY, allocator, "free cuda memory"); gst_cuda_context_push (mem->context);
if (priv->data)
g_mutex_clear (&mem->lock); gst_cuda_result (CuMemFree (priv->data));
gst_cuda_context_push (self->context);
if (mem->data)
gst_cuda_result (CuMemFree (mem->data));
if (mem->map_alloc_data)
gst_cuda_result (CuMemFreeHost (mem->map_alloc_data));
if (priv->staging)
gst_cuda_result (CuMemFreeHost (priv->staging));
gst_cuda_context_pop (NULL); gst_cuda_context_pop (NULL);
gst_object_unref (mem->context); gst_object_unref (mem->context);
g_mutex_clear (&priv->lock);
g_free (mem->priv);
g_free (mem); g_free (mem);
} }
/* called with lock */
static gboolean static gboolean
gst_cuda_memory_upload_transfer (GstCudaMemory * mem) gst_cuda_memory_upload (GstCudaAllocator * self, GstCudaMemory * mem)
{ {
gint i; GstCudaMemoryPrivate *priv = mem->priv;
GstVideoInfo *info = &mem->alloc_params.info;
gboolean ret = TRUE; gboolean ret = TRUE;
CUDA_MEMCPY2D param = { 0, };
if (!mem->map_data) { if (!priv->staging ||
GST_CAT_ERROR (GST_CAT_MEMORY, "no staging memory to upload"); !GST_MEMORY_FLAG_IS_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_UPLOAD)) {
return TRUE;
}
if (!gst_cuda_context_push (mem->context)) {
GST_ERROR_OBJECT (self, "Failed to push cuda context");
return FALSE; return FALSE;
} }
for (i = 0; i < GST_VIDEO_INFO_N_PLANES (info); i++) { param.srcMemoryType = CU_MEMORYTYPE_HOST;
CUDA_MEMCPY2D param = { 0, }; param.srcHost = priv->staging;
param.srcPitch = priv->pitch;
param.srcMemoryType = CU_MEMORYTYPE_HOST; param.dstMemoryType = CU_MEMORYTYPE_DEVICE;
param.srcHost = param.dstDevice = (CUdeviceptr) priv->data;
(guint8 *) mem->map_data + GST_VIDEO_INFO_PLANE_OFFSET (info, i); param.dstPitch = priv->pitch;
param.srcPitch = GST_VIDEO_INFO_PLANE_STRIDE (info, i); param.WidthInBytes = priv->width_in_bytes;
param.Height = priv->height;
param.dstMemoryType = CU_MEMORYTYPE_DEVICE; ret = gst_cuda_result (CuMemcpy2D (&param));
param.dstDevice = mem->data + mem->offset[i]; gst_cuda_context_pop (NULL);
param.dstPitch = mem->stride;
param.WidthInBytes = GST_VIDEO_INFO_COMP_WIDTH (info, i) *
GST_VIDEO_INFO_COMP_PSTRIDE (info, i);
param.Height = GST_VIDEO_INFO_COMP_HEIGHT (info, i);
if (!gst_cuda_result (CuMemcpy2DAsync (&param, NULL))) { if (!ret)
GST_CAT_ERROR (GST_CAT_MEMORY, "Failed to copy %dth plane", i); GST_ERROR_OBJECT (self, "Failed to upload memory");
ret = FALSE;
break;
}
}
gst_cuda_result (CuStreamSynchronize (NULL));
return ret; return ret;
} }
/* called with lock */
static gboolean static gboolean
gst_cuda_memory_download_transfer (GstCudaMemory * mem) gst_cuda_memory_download (GstCudaAllocator * self, GstCudaMemory * mem)
{ {
gint i; GstCudaMemoryPrivate *priv = mem->priv;
GstVideoInfo *info = &mem->alloc_params.info; gboolean ret = TRUE;
CUDA_MEMCPY2D param = { 0, };
if (!mem->map_data) { if (!GST_MEMORY_FLAG_IS_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_DOWNLOAD))
GST_CAT_ERROR (GST_CAT_MEMORY, "no staging memory to upload"); return TRUE;
if (!gst_cuda_context_push (mem->context)) {
GST_ERROR_OBJECT (self, "Failed to push cuda context");
return FALSE; return FALSE;
} }
for (i = 0; i < GST_VIDEO_INFO_N_PLANES (info); i++) { if (!priv->staging) {
CUDA_MEMCPY2D param = { 0, }; ret = gst_cuda_result (CuMemAllocHost (&priv->staging,
GST_MEMORY_CAST (mem)->size));
param.srcMemoryType = CU_MEMORYTYPE_DEVICE; if (!ret) {
param.srcDevice = mem->data + mem->offset[i]; GST_ERROR_OBJECT (self, "Failed to allocate staging memory");
param.srcPitch = mem->stride;
param.dstMemoryType = CU_MEMORYTYPE_HOST;
param.dstHost =
(guint8 *) mem->map_data + GST_VIDEO_INFO_PLANE_OFFSET (info, i);
param.dstPitch = GST_VIDEO_INFO_PLANE_STRIDE (info, i);
param.WidthInBytes = GST_VIDEO_INFO_COMP_WIDTH (info, i) *
GST_VIDEO_INFO_COMP_PSTRIDE (info, i);
param.Height = GST_VIDEO_INFO_COMP_HEIGHT (info, i);
if (!gst_cuda_result (CuMemcpy2DAsync (&param, NULL))) {
GST_CAT_ERROR (GST_CAT_MEMORY, "Failed to copy %dth plane", i);
CuMemFreeHost (mem->map_alloc_data);
mem->map_alloc_data = mem->map_data = mem->align_data = NULL;
break;
}
}
gst_cuda_result (CuStreamSynchronize (NULL));
if (!mem->map_data)
return FALSE;
return TRUE;
}
static gpointer
gst_cuda_memory_device_memory_map (GstCudaMemory * mem)
{
GstMemory *memory = GST_MEMORY_CAST (mem);
gpointer data;
gsize aoffset;
gsize align = memory->align;
if (mem->map_data) {
return mem->map_data;
}
GST_CAT_DEBUG (GST_CAT_MEMORY, "alloc host memory for map");
if (!mem->map_alloc_data) {
gsize maxsize;
guint8 *align_data;
maxsize = memory->maxsize + align;
if (!gst_cuda_context_push (mem->context)) {
GST_CAT_ERROR (GST_CAT_MEMORY, "cannot push cuda context");
return NULL;
}
if (!gst_cuda_result (CuMemAllocHost (&data, maxsize))) {
GST_CAT_ERROR (GST_CAT_MEMORY, "cannot alloc host memory");
gst_cuda_context_pop (NULL); gst_cuda_context_pop (NULL);
return FALSE;
return NULL;
}
if (!gst_cuda_context_pop (NULL)) {
GST_CAT_WARNING (GST_CAT_MEMORY, "cannot pop cuda context");
}
mem->map_alloc_data = data;
align_data = data;
/* do align */
if ((aoffset = ((guintptr) align_data & align))) {
aoffset = (align + 1) - aoffset;
align_data += aoffset;
}
mem->align_data = align_data;
/* first memory, always need download to staging */
GST_MINI_OBJECT_FLAG_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_DOWNLOAD);
}
mem->map_data = mem->align_data;
if (GST_MEMORY_FLAG_IS_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_DOWNLOAD)) {
if (!gst_cuda_context_push (mem->context)) {
GST_CAT_ERROR (GST_CAT_MEMORY, "cannot push cuda context");
return NULL;
}
gst_cuda_memory_download_transfer (mem);
if (!gst_cuda_context_pop (NULL)) {
GST_CAT_WARNING (GST_CAT_MEMORY, "cannot pop cuda context");
} }
} }
return mem->map_data; param.srcMemoryType = CU_MEMORYTYPE_DEVICE;
param.srcDevice = (CUdeviceptr) priv->data;
param.srcPitch = priv->pitch;
param.dstMemoryType = CU_MEMORYTYPE_HOST;
param.dstHost = priv->staging;
param.dstPitch = priv->pitch;
param.WidthInBytes = priv->width_in_bytes;
param.Height = priv->height;
ret = gst_cuda_result (CuMemcpy2D (&param));
gst_cuda_context_pop (NULL);
if (!ret)
GST_ERROR_OBJECT (self, "Failed to upload memory");
return ret;
} }
static gpointer static gpointer
cuda_mem_map (GstCudaMemory * mem, gsize maxsize, GstMapFlags flags) cuda_mem_map (GstMemory * mem, gsize maxsize, GstMapFlags flags)
{ {
GstCudaAllocator *self = GST_CUDA_ALLOCATOR (mem->allocator);
GstCudaMemory *cmem = GST_CUDA_MEMORY_CAST (mem);
GstCudaMemoryPrivate *priv = cmem->priv;
gpointer ret = NULL; gpointer ret = NULL;
g_mutex_lock (&mem->lock); g_mutex_lock (&priv->lock);
mem->map_count++;
if ((flags & GST_MAP_CUDA) == GST_MAP_CUDA) { if ((flags & GST_MAP_CUDA) == GST_MAP_CUDA) {
/* upload from staging to device memory if necessary */ if (!gst_cuda_memory_upload (self, cmem))
if (GST_MEMORY_FLAG_IS_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_UPLOAD)) { goto out;
if (!gst_cuda_context_push (mem->context)) {
GST_CAT_ERROR (GST_CAT_MEMORY, "cannot push cuda context");
g_mutex_unlock (&mem->lock);
return NULL;
}
if (!gst_cuda_memory_upload_transfer (mem)) {
g_mutex_unlock (&mem->lock);
return NULL;
}
gst_cuda_context_pop (NULL);
}
GST_MEMORY_FLAG_UNSET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_UPLOAD); GST_MEMORY_FLAG_UNSET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_UPLOAD);
if ((flags & GST_MAP_WRITE) == GST_MAP_WRITE) if ((flags & GST_MAP_WRITE) == GST_MAP_WRITE)
GST_MINI_OBJECT_FLAG_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_DOWNLOAD); GST_MINI_OBJECT_FLAG_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_DOWNLOAD);
g_mutex_unlock (&mem->lock); ret = (gpointer) priv->data;
return (gpointer) mem->data; goto out;
} }
ret = gst_cuda_memory_device_memory_map (mem); /* First CPU access, must be downloaded */
if (ret == NULL) { if (!priv->staging)
mem->map_count--; GST_MINI_OBJECT_FLAG_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_DOWNLOAD);
g_mutex_unlock (&mem->lock);
return NULL; if (!gst_cuda_memory_download (self, cmem))
} goto out;
ret = priv->staging;
if ((flags & GST_MAP_WRITE) == GST_MAP_WRITE) if ((flags & GST_MAP_WRITE) == GST_MAP_WRITE)
GST_MINI_OBJECT_FLAG_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_UPLOAD); GST_MINI_OBJECT_FLAG_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_UPLOAD);
GST_MEMORY_FLAG_UNSET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_DOWNLOAD); GST_MEMORY_FLAG_UNSET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_DOWNLOAD);
g_mutex_unlock (&mem->lock); out:
g_mutex_unlock (&priv->lock);
return ret; return ret;
} }
static void static void
cuda_mem_unmap_full (GstCudaMemory * mem, GstMapInfo * info) cuda_mem_unmap_full (GstMemory * mem, GstMapInfo * info)
{ {
g_mutex_lock (&mem->lock); GstCudaMemory *cmem = GST_CUDA_MEMORY_CAST (mem);
mem->map_count--; GstCudaMemoryPrivate *priv = cmem->priv;
GST_CAT_TRACE (GST_CAT_MEMORY,
"unmap CUDA memory %p, map count %d, have map_data %s",
mem, mem->map_count, mem->map_data ? "true" : "false");
g_mutex_lock (&priv->lock);
if ((info->flags & GST_MAP_CUDA) == GST_MAP_CUDA) { if ((info->flags & GST_MAP_CUDA) == GST_MAP_CUDA) {
if ((info->flags & GST_MAP_WRITE) == GST_MAP_WRITE) if ((info->flags & GST_MAP_WRITE) == GST_MAP_WRITE)
GST_MINI_OBJECT_FLAG_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_DOWNLOAD); GST_MINI_OBJECT_FLAG_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_DOWNLOAD);
g_mutex_unlock (&mem->lock); goto out;
return;
} }
if ((info->flags & GST_MAP_WRITE)) if ((info->flags & GST_MAP_WRITE) == GST_MAP_WRITE)
GST_MINI_OBJECT_FLAG_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_UPLOAD); GST_MINI_OBJECT_FLAG_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_UPLOAD);
if (mem->map_count > 0 || !mem->map_data) { out:
g_mutex_unlock (&mem->lock); g_mutex_unlock (&priv->lock);
return;
}
mem->map_data = NULL;
g_mutex_unlock (&mem->lock);
return; return;
} }
@ -412,72 +357,82 @@ cuda_mem_unmap_full (GstCudaMemory * mem, GstMapInfo * info)
static GstMemory * static GstMemory *
cuda_mem_copy (GstMemory * mem, gssize offset, gssize size) cuda_mem_copy (GstMemory * mem, gssize offset, gssize size)
{ {
GstMemory *copy; GstCudaAllocator *self = GST_CUDA_ALLOCATOR (mem->allocator);
GstCudaMemory *src_mem = GST_CUDA_MEMORY_CAST (mem); GstCudaMemory *src_mem = GST_CUDA_MEMORY_CAST (mem);
GstCudaMemory *dst_mem; GstCudaContext *context = src_mem->context;
GstCudaContext *ctx = GST_CUDA_ALLOCATOR_CAST (mem->allocator)->context; GstMapInfo src_info, dst_info;
gint i; CUDA_MEMCPY2D param = { 0, };
GstVideoInfo *info; GstMemory *copy;
gboolean ret;
/* offset and size are ignored */ /* offset and size are ignored */
copy = gst_cuda_allocator_alloc (mem->allocator, mem->size, copy = gst_cuda_allocator_alloc_internal (self, context,
&src_mem->alloc_params); &src_mem->info, src_mem->priv->width_in_bytes, src_mem->priv->height);
dst_mem = GST_CUDA_MEMORY_CAST (copy); if (!copy) {
GST_ERROR_OBJECT (self, "Failed to allocate memory for copying");
return NULL;
}
info = &src_mem->alloc_params.info; if (!gst_memory_map (mem, &src_info, GST_MAP_READ | GST_MAP_CUDA)) {
GST_ERROR_OBJECT (self, "Failed to map src memory");
gst_memory_unref (copy);
return NULL;
}
if (!gst_cuda_context_push (ctx)) { if (!gst_memory_map (copy, &dst_info, GST_MAP_WRITE | GST_MAP_CUDA)) {
GST_CAT_ERROR (GST_CAT_MEMORY, "cannot push cuda context"); GST_ERROR_OBJECT (self, "Failed to map dst memory");
gst_cuda_allocator_free (mem->allocator, copy); gst_memory_unmap (mem, &src_info);
gst_memory_unref (copy);
return NULL;
}
if (!gst_cuda_context_push (context)) {
GST_ERROR_OBJECT (self, "Failed to push cuda context");
gst_memory_unmap (mem, &src_info);
gst_memory_unmap (copy, &dst_info);
return NULL; return NULL;
} }
for (i = 0; i < GST_VIDEO_INFO_N_PLANES (info); i++) { param.srcMemoryType = CU_MEMORYTYPE_DEVICE;
CUDA_MEMCPY2D param = { 0, }; param.srcDevice = (CUdeviceptr) src_info.data;
param.srcPitch = src_mem->priv->pitch;
param.srcMemoryType = CU_MEMORYTYPE_DEVICE; param.dstMemoryType = CU_MEMORYTYPE_DEVICE;
param.srcDevice = src_mem->data + src_mem->offset[i]; param.dstDevice = (CUdeviceptr) dst_info.data;
param.srcPitch = src_mem->stride; param.dstPitch = src_mem->priv->pitch;
param.WidthInBytes = src_mem->priv->width_in_bytes;
param.Height = src_mem->priv->height;
param.dstMemoryType = CU_MEMORYTYPE_DEVICE; ret = gst_cuda_result (CuMemcpy2D (&param));
param.dstDevice = dst_mem->data + dst_mem->offset[i]; gst_cuda_context_pop (NULL);
param.dstPitch = dst_mem->stride;
param.WidthInBytes = GST_VIDEO_INFO_COMP_WIDTH (info, i) *
GST_VIDEO_INFO_COMP_PSTRIDE (info, i);
param.Height = GST_VIDEO_INFO_COMP_HEIGHT (info, i);
if (!gst_cuda_result (CuMemcpy2DAsync (&param, NULL))) { gst_memory_unmap (mem, &src_info);
GST_CAT_ERROR_OBJECT (GST_CAT_MEMORY, gst_memory_unmap (copy, &dst_info);
mem->allocator, "Failed to copy %dth plane", i);
gst_cuda_context_pop (NULL);
gst_cuda_allocator_free (mem->allocator, copy);
return NULL; if (!ret) {
} GST_ERROR_OBJECT (self, "Failed to copy memory");
} gst_memory_unref (copy);
return NULL;
gst_cuda_result (CuStreamSynchronize (NULL));
if (!gst_cuda_context_pop (NULL)) {
GST_CAT_WARNING (GST_CAT_MEMORY, "cannot pop cuda context");
} }
return copy; return copy;
} }
GstAllocator * void
gst_cuda_allocator_new (GstCudaContext * context) gst_cuda_memory_init_once (void)
{ {
GstCudaAllocator *allocator; static gsize _init = 0;
g_return_val_if_fail (GST_IS_CUDA_CONTEXT (context), NULL); if (g_once_init_enter (&_init)) {
_gst_cuda_allocator =
(GstAllocator *) g_object_new (GST_TYPE_CUDA_ALLOCATOR, NULL);
gst_object_ref_sink (_gst_cuda_allocator);
allocator = g_object_new (GST_TYPE_CUDA_ALLOCATOR, NULL); gst_allocator_register (GST_CUDA_MEMORY_TYPE_NAME, _gst_cuda_allocator);
allocator->context = gst_object_ref (context); g_once_init_leave (&_init, 1);
}
return GST_ALLOCATOR_CAST (allocator);
} }
gboolean gboolean
@ -486,3 +441,51 @@ gst_is_cuda_memory (GstMemory * mem)
return mem != NULL && mem->allocator != NULL && return mem != NULL && mem->allocator != NULL &&
GST_IS_CUDA_ALLOCATOR (mem->allocator); GST_IS_CUDA_ALLOCATOR (mem->allocator);
} }
GstMemory *
gst_cuda_allocator_alloc (GstCudaAllocator * allocator,
GstCudaContext * context, const GstVideoInfo * info)
{
guint alloc_height;
g_return_val_if_fail (GST_IS_CUDA_ALLOCATOR (allocator), NULL);
g_return_val_if_fail (GST_IS_CUDA_CONTEXT (context), NULL);
g_return_val_if_fail (info != NULL, NULL);
alloc_height = GST_VIDEO_INFO_HEIGHT (info);
/* make sure valid height for subsampled formats */
switch (GST_VIDEO_INFO_FORMAT (info)) {
case GST_VIDEO_FORMAT_I420:
case GST_VIDEO_FORMAT_YV12:
case GST_VIDEO_FORMAT_NV12:
case GST_VIDEO_FORMAT_P010_10LE:
case GST_VIDEO_FORMAT_P016_LE:
case GST_VIDEO_FORMAT_I420_10LE:
alloc_height = GST_ROUND_UP_2 (alloc_height);
break;
default:
break;
}
switch (GST_VIDEO_INFO_FORMAT (info)) {
case GST_VIDEO_FORMAT_I420:
case GST_VIDEO_FORMAT_YV12:
case GST_VIDEO_FORMAT_I420_10LE:
case GST_VIDEO_FORMAT_NV12:
case GST_VIDEO_FORMAT_NV21:
case GST_VIDEO_FORMAT_P010_10LE:
case GST_VIDEO_FORMAT_P016_LE:
alloc_height *= 2;
break;
case GST_VIDEO_FORMAT_Y444:
case GST_VIDEO_FORMAT_Y444_16LE:
alloc_height *= 3;
break;
default:
break;
}
return gst_cuda_allocator_alloc_internal (allocator, context,
info, info->stride[0], alloc_height);
}

View file

@ -37,10 +37,11 @@ G_BEGIN_DECLS
#define GST_CUDA_ALLOCATOR_CAST(obj) ((GstCudaAllocator *)(obj)) #define GST_CUDA_ALLOCATOR_CAST(obj) ((GstCudaAllocator *)(obj))
#define GST_CUDA_MEMORY_CAST(mem) ((GstCudaMemory *) (mem)) #define GST_CUDA_MEMORY_CAST(mem) ((GstCudaMemory *) (mem))
typedef struct _GstCudaAllocationParams GstCudaAllocationParams;
typedef struct _GstCudaAllocator GstCudaAllocator; typedef struct _GstCudaAllocator GstCudaAllocator;
typedef struct _GstCudaAllocatorClass GstCudaAllocatorClass; typedef struct _GstCudaAllocatorClass GstCudaAllocatorClass;
typedef struct _GstCudaMemory GstCudaMemory; typedef struct _GstCudaMemory GstCudaMemory;
typedef struct _GstCudaMemoryPrivate GstCudaMemoryPrivate;
/** /**
* GST_MAP_CUDA: * GST_MAP_CUDA:
@ -65,32 +66,6 @@ typedef struct _GstCudaMemory GstCudaMemory;
*/ */
#define GST_CAPS_FEATURE_MEMORY_CUDA_MEMORY "memory:CUDAMemory" #define GST_CAPS_FEATURE_MEMORY_CUDA_MEMORY "memory:CUDAMemory"
struct _GstCudaAllocationParams
{
GstAllocationParams parent;
GstVideoInfo info;
};
struct _GstCudaAllocator
{
GstAllocator parent;
GstCudaContext *context;
};
struct _GstCudaAllocatorClass
{
GstAllocatorClass parent_class;
};
GType gst_cuda_allocator_get_type (void);
GstAllocator * gst_cuda_allocator_new (GstCudaContext * context);
GstMemory * gst_cuda_allocator_alloc (GstAllocator * allocator,
gsize size,
GstCudaAllocationParams * params);
/** /**
* GstCudaMemoryTransfer: * GstCudaMemoryTransfer:
* @GST_CUDA_MEMORY_TRANSFER_NEED_DOWNLOAD: the device memory needs downloading * @GST_CUDA_MEMORY_TRANSFER_NEED_DOWNLOAD: the device memory needs downloading
@ -106,32 +81,36 @@ typedef enum
struct _GstCudaMemory struct _GstCudaMemory
{ {
GstMemory mem; GstMemory mem;
/*< public >*/
GstCudaContext *context; GstCudaContext *context;
CUdeviceptr data; GstVideoInfo info;
GstCudaAllocationParams alloc_params; /*< private >*/
GstCudaMemoryPrivate *priv;
/* offset and stride of CUDA device memory */ gpointer _gst_reserved[GST_PADDING];
gsize offset[GST_VIDEO_MAX_PLANES];
gint stride;
/* allocated CUDA Host memory */
gpointer map_alloc_data;
/* aligned CUDA Host memory */
guint8 *align_data;
/* pointing align_data if the memory is mapped */
gpointer map_data;
gint map_count;
GMutex lock;
}; };
gboolean gst_is_cuda_memory (GstMemory * mem); struct _GstCudaAllocator
{
GstAllocator parent;
};
struct _GstCudaAllocatorClass
{
GstAllocatorClass parent_class;
};
void gst_cuda_memory_init_once (void);
gboolean gst_is_cuda_memory (GstMemory * mem);
GType gst_cuda_allocator_get_type (void);
GstMemory * gst_cuda_allocator_alloc (GstCudaAllocator * allocator,
GstCudaContext * context,
const GstVideoInfo * info);
G_END_DECLS G_END_DECLS

View file

@ -174,10 +174,6 @@ gst_cuda_upload_propose_allocation (GstBaseTransform * trans,
if (gst_query_get_n_allocation_pools (query) == 0) { if (gst_query_get_n_allocation_pools (query) == 0) {
GstCapsFeatures *features; GstCapsFeatures *features;
GstStructure *config; GstStructure *config;
GstVideoAlignment align;
GstAllocationParams params = { 0, 31, 0, 0, };
GstAllocator *allocator = NULL;
gint i;
features = gst_caps_get_features (caps, 0); features = gst_caps_get_features (caps, 0);
@ -191,38 +187,25 @@ gst_cuda_upload_propose_allocation (GstBaseTransform * trans,
config = gst_buffer_pool_get_config (pool); config = gst_buffer_pool_get_config (pool);
gst_video_alignment_reset (&align);
for (i = 0; i < GST_VIDEO_INFO_N_PLANES (&info); i++) {
align.stride_align[i] = 31;
}
gst_video_info_align (&info, &align);
gst_buffer_pool_config_add_option (config, gst_buffer_pool_config_add_option (config,
GST_BUFFER_POOL_OPTION_VIDEO_META); GST_BUFFER_POOL_OPTION_VIDEO_META);
gst_buffer_pool_config_add_option (config,
GST_BUFFER_POOL_OPTION_VIDEO_ALIGNMENT);
gst_buffer_pool_config_set_video_alignment (config, &align);
size = GST_VIDEO_INFO_SIZE (&info); size = GST_VIDEO_INFO_SIZE (&info);
gst_buffer_pool_config_set_params (config, caps, size, 0, 0); gst_buffer_pool_config_set_params (config, caps, size, 0, 0);
gst_query_add_allocation_meta (query, GST_VIDEO_META_API_TYPE, NULL);
gst_query_add_allocation_pool (query, pool, size, 0, 0);
if (gst_buffer_pool_config_get_allocator (config, &allocator, &params)) {
if (params.align < 31)
params.align = 31;
gst_query_add_allocation_param (query, allocator, &params);
gst_buffer_pool_config_set_allocator (config, allocator, &params);
}
if (!gst_buffer_pool_set_config (pool, config)) { if (!gst_buffer_pool_set_config (pool, config)) {
GST_ERROR_OBJECT (ctrans, "failed to set config"); GST_ERROR_OBJECT (ctrans, "failed to set config");
gst_object_unref (pool); gst_object_unref (pool);
return FALSE; return FALSE;
} }
/* Get updated size by cuda buffer pool */
config = gst_buffer_pool_get_config (pool);
gst_buffer_pool_config_get_params (config, NULL, &size, NULL, NULL);
gst_structure_free (config);
gst_query_add_allocation_pool (query, pool, size, 0, 0);
gst_object_unref (pool); gst_object_unref (pool);
} }
@ -289,6 +272,12 @@ gst_cuda_upload_decide_allocation (GstBaseTransform * trans, GstQuery * query)
gst_buffer_pool_config_add_option (config, GST_BUFFER_POOL_OPTION_VIDEO_META); gst_buffer_pool_config_add_option (config, GST_BUFFER_POOL_OPTION_VIDEO_META);
gst_buffer_pool_config_set_params (config, outcaps, size, min, max); gst_buffer_pool_config_set_params (config, outcaps, size, min, max);
gst_buffer_pool_set_config (pool, config); gst_buffer_pool_set_config (pool, config);
/* Get updated size by cuda buffer pool */
config = gst_buffer_pool_get_config (pool);
gst_buffer_pool_config_get_params (config, NULL, &size, NULL, NULL);
gst_structure_free (config);
if (update_pool) if (update_pool)
gst_query_set_nth_allocation_pool (query, 0, pool, size, min, max); gst_query_set_nth_allocation_pool (query, 0, pool, size, min, max);
else else

View file

@ -625,6 +625,7 @@ gst_nv_base_enc_propose_allocation (GstVideoEncoder * enc, GstQuery * query)
GstBufferPool *pool; GstBufferPool *pool;
GstStructure *config; GstStructure *config;
GstCapsFeatures *features; GstCapsFeatures *features;
guint size;
GST_DEBUG_OBJECT (nvenc, "propose allocation"); GST_DEBUG_OBJECT (nvenc, "propose allocation");
@ -665,18 +666,25 @@ gst_nv_base_enc_propose_allocation (GstVideoEncoder * enc, GstQuery * query)
goto done; goto done;
} }
config = gst_buffer_pool_get_config (pool); size = GST_VIDEO_INFO_SIZE (&info);
gst_buffer_pool_config_set_params (config, caps, GST_VIDEO_INFO_SIZE (&info),
nvenc->items->len, nvenc->items->len);
gst_query_add_allocation_pool (query, pool, GST_VIDEO_INFO_SIZE (&info), config = gst_buffer_pool_get_config (pool);
gst_buffer_pool_config_set_params (config, caps, size,
nvenc->items->len, nvenc->items->len); nvenc->items->len, nvenc->items->len);
gst_buffer_pool_config_add_option (config, GST_BUFFER_POOL_OPTION_VIDEO_META); gst_buffer_pool_config_add_option (config, GST_BUFFER_POOL_OPTION_VIDEO_META);
gst_query_add_allocation_meta (query, GST_VIDEO_META_API_TYPE, NULL);
if (!gst_buffer_pool_set_config (pool, config)) if (!gst_buffer_pool_set_config (pool, config))
goto error_pool_config; goto error_pool_config;
/* Get updated size by cuda buffer pool */
config = gst_buffer_pool_get_config (pool);
gst_buffer_pool_config_get_params (config, NULL, &size, NULL, NULL);
gst_structure_free (config);
gst_query_add_allocation_pool (query, pool, size,
nvenc->items->len, nvenc->items->len);
gst_query_add_allocation_meta (query, GST_VIDEO_META_API_TYPE, NULL);
gst_object_unref (pool); gst_object_unref (pool);
done: done:
@ -2231,17 +2239,12 @@ gst_nv_base_enc_upload_frame (GstNvBaseEnc * nvenc, GstVideoFrame * frame,
CUdeviceptr dst = resource->cuda_pointer; CUdeviceptr dst = resource->cuda_pointer;
GstVideoInfo *info = &frame->info; GstVideoInfo *info = &frame->info;
CUresult cuda_ret; CUresult cuda_ret;
GstCudaMemory *cuda_mem = NULL;
if (!gst_cuda_context_push (nvenc->cuda_ctx)) { if (!gst_cuda_context_push (nvenc->cuda_ctx)) {
GST_ERROR_OBJECT (nvenc, "cannot push context"); GST_ERROR_OBJECT (nvenc, "cannot push context");
return FALSE; return FALSE;
} }
if (use_device_memory) {
cuda_mem = (GstCudaMemory *) gst_buffer_peek_memory (frame->buffer, 0);
}
for (i = 0; i < GST_VIDEO_FRAME_N_PLANES (frame); i++) { for (i = 0; i < GST_VIDEO_FRAME_N_PLANES (frame); i++) {
CUDA_MEMCPY2D param = { 0, }; CUDA_MEMCPY2D param = { 0, };
guint dest_stride = _get_cuda_device_stride (&nvenc->input_info, i, guint dest_stride = _get_cuda_device_stride (&nvenc->input_info, i,
@ -2249,13 +2252,12 @@ gst_nv_base_enc_upload_frame (GstNvBaseEnc * nvenc, GstVideoFrame * frame,
if (use_device_memory) { if (use_device_memory) {
param.srcMemoryType = CU_MEMORYTYPE_DEVICE; param.srcMemoryType = CU_MEMORYTYPE_DEVICE;
param.srcDevice = cuda_mem->data + cuda_mem->offset[i]; param.srcDevice = (CUdeviceptr) GST_VIDEO_FRAME_PLANE_DATA (frame, i);
param.srcPitch = cuda_mem->stride;
} else { } else {
param.srcMemoryType = CU_MEMORYTYPE_HOST; param.srcMemoryType = CU_MEMORYTYPE_HOST;
param.srcHost = GST_VIDEO_FRAME_PLANE_DATA (frame, i); param.srcHost = GST_VIDEO_FRAME_PLANE_DATA (frame, i);
param.srcPitch = GST_VIDEO_FRAME_PLANE_STRIDE (frame, i);
} }
param.srcPitch = GST_VIDEO_FRAME_PLANE_STRIDE (frame, i);
param.dstMemoryType = CU_MEMORYTYPE_DEVICE; param.dstMemoryType = CU_MEMORYTYPE_DEVICE;
param.dstDevice = dst; param.dstDevice = dst;

View file

@ -1241,34 +1241,25 @@ gst_nvdec_copy_device_to_memory (GstNvDec * nvdec,
GstVideoInfo *info = &nvdec->output_state->info; GstVideoInfo *info = &nvdec->output_state->info;
gint i; gint i;
GstMemory *mem; GstMemory *mem;
GstCudaMemory *cuda_mem = NULL; gboolean use_device_copy = FALSE;
GstMapFlags map_flags = GST_MAP_WRITE;
if (!gst_cuda_context_push (nvdec->cuda_ctx)) {
GST_WARNING_OBJECT (nvdec, "failed to lock CUDA context");
return FALSE;
}
if (nvdec->mem_type == GST_NVDEC_MEM_TYPE_CUDA && if (nvdec->mem_type == GST_NVDEC_MEM_TYPE_CUDA &&
(mem = gst_buffer_peek_memory (output_buffer, 0)) && (mem = gst_buffer_peek_memory (output_buffer, 0)) &&
gst_is_cuda_memory (mem)) { gst_is_cuda_memory (mem)) {
GstCudaMemory *cmem = GST_CUDA_MEMORY_CAST (mem); map_flags |= GST_MAP_CUDA;
use_device_copy = TRUE;
if (cmem->context == nvdec->cuda_ctx ||
gst_cuda_context_get_handle (cmem->context) ==
gst_cuda_context_get_handle (nvdec->cuda_ctx) ||
(gst_cuda_context_can_access_peer (cmem->context, nvdec->cuda_ctx) &&
gst_cuda_context_can_access_peer (nvdec->cuda_ctx,
cmem->context))) {
cuda_mem = cmem;
}
} }
if (!cuda_mem) { if (!gst_video_frame_map (&video_frame, info, output_buffer, map_flags)) {
if (!gst_video_frame_map (&video_frame, info, output_buffer, GST_MAP_WRITE)) { GST_ERROR_OBJECT (nvdec, "frame map failure");
GST_ERROR_OBJECT (nvdec, "frame map failure"); return FALSE;
gst_cuda_context_pop (NULL); }
return FALSE;
} if (!gst_cuda_context_push (nvdec->cuda_ctx)) {
gst_video_frame_unmap (&video_frame);
GST_WARNING_OBJECT (nvdec, "failed to lock CUDA context");
return FALSE;
} }
params.progressive_frame = dispinfo->progressive_frame; params.progressive_frame = dispinfo->progressive_frame;
@ -1286,17 +1277,17 @@ gst_nvdec_copy_device_to_memory (GstNvDec * nvdec,
copy_params.srcMemoryType = CU_MEMORYTYPE_DEVICE; copy_params.srcMemoryType = CU_MEMORYTYPE_DEVICE;
copy_params.srcPitch = pitch; copy_params.srcPitch = pitch;
copy_params.dstMemoryType = copy_params.dstMemoryType =
cuda_mem ? CU_MEMORYTYPE_DEVICE : CU_MEMORYTYPE_HOST; use_device_copy ? CU_MEMORYTYPE_DEVICE : CU_MEMORYTYPE_HOST;
for (i = 0; i < GST_VIDEO_INFO_N_PLANES (info); i++) { for (i = 0; i < GST_VIDEO_INFO_N_PLANES (info); i++) {
copy_params.srcDevice = dptr + (i * pitch * GST_VIDEO_INFO_HEIGHT (info)); copy_params.srcDevice = dptr + (i * pitch * GST_VIDEO_INFO_HEIGHT (info));
if (cuda_mem) { if (use_device_copy) {
copy_params.dstDevice = cuda_mem->data + cuda_mem->offset[i]; copy_params.dstDevice =
copy_params.dstPitch = cuda_mem->stride; (CUdeviceptr) GST_VIDEO_FRAME_PLANE_DATA (&video_frame, i);
} else { } else {
copy_params.dstHost = GST_VIDEO_FRAME_PLANE_DATA (&video_frame, i); copy_params.dstHost = GST_VIDEO_FRAME_PLANE_DATA (&video_frame, i);
copy_params.dstPitch = GST_VIDEO_FRAME_PLANE_STRIDE (&video_frame, i);
} }
copy_params.dstPitch = GST_VIDEO_FRAME_PLANE_STRIDE (&video_frame, i);
copy_params.WidthInBytes = GST_VIDEO_INFO_COMP_WIDTH (info, i) copy_params.WidthInBytes = GST_VIDEO_INFO_COMP_WIDTH (info, i)
* GST_VIDEO_INFO_COMP_PSTRIDE (info, i); * GST_VIDEO_INFO_COMP_PSTRIDE (info, i);
copy_params.Height = GST_VIDEO_INFO_COMP_HEIGHT (info, i); copy_params.Height = GST_VIDEO_INFO_COMP_HEIGHT (info, i);
@ -1304,8 +1295,7 @@ gst_nvdec_copy_device_to_memory (GstNvDec * nvdec,
if (!gst_cuda_result (CuMemcpy2DAsync (&copy_params, nvdec->cuda_stream))) { if (!gst_cuda_result (CuMemcpy2DAsync (&copy_params, nvdec->cuda_stream))) {
GST_ERROR_OBJECT (nvdec, "failed to copy %dth plane", i); GST_ERROR_OBJECT (nvdec, "failed to copy %dth plane", i);
CuvidUnmapVideoFrame (nvdec->decoder, dptr); CuvidUnmapVideoFrame (nvdec->decoder, dptr);
if (!cuda_mem) gst_video_frame_unmap (&video_frame);
gst_video_frame_unmap (&video_frame);
gst_cuda_context_pop (NULL); gst_cuda_context_pop (NULL);
return FALSE; return FALSE;
} }
@ -1313,8 +1303,7 @@ gst_nvdec_copy_device_to_memory (GstNvDec * nvdec,
gst_cuda_result (CuStreamSynchronize (nvdec->cuda_stream)); gst_cuda_result (CuStreamSynchronize (nvdec->cuda_stream));
if (!cuda_mem) gst_video_frame_unmap (&video_frame);
gst_video_frame_unmap (&video_frame);
if (!gst_cuda_result (CuvidUnmapVideoFrame (nvdec->decoder, dptr))) if (!gst_cuda_result (CuvidUnmapVideoFrame (nvdec->decoder, dptr)))
GST_WARNING_OBJECT (nvdec, "failed to unmap video frame"); GST_WARNING_OBJECT (nvdec, "failed to unmap video frame");
@ -1558,9 +1547,15 @@ gst_nvdec_ensure_cuda_pool (GstNvDec * nvdec, GstQuery * query)
n = gst_query_get_n_allocation_pools (query); n = gst_query_get_n_allocation_pools (query);
if (n > 0) { if (n > 0) {
gst_query_parse_nth_allocation_pool (query, 0, &pool, &size, &min, &max); gst_query_parse_nth_allocation_pool (query, 0, &pool, &size, &min, &max);
if (pool && !GST_IS_CUDA_BUFFER_POOL (pool)) { if (pool) {
gst_object_unref (pool); if (!GST_IS_CUDA_BUFFER_POOL (pool)) {
pool = NULL; gst_clear_object (&pool);
} else {
GstCudaBufferPool *cpool = GST_CUDA_BUFFER_POOL (pool);
if (cpool->context != nvdec->cuda_ctx)
gst_clear_object (&pool);
}
} }
} }
@ -1578,6 +1573,12 @@ gst_nvdec_ensure_cuda_pool (GstNvDec * nvdec, GstQuery * query)
gst_buffer_pool_config_set_params (config, outcaps, size, min, max); gst_buffer_pool_config_set_params (config, outcaps, size, min, max);
gst_buffer_pool_config_add_option (config, GST_BUFFER_POOL_OPTION_VIDEO_META); gst_buffer_pool_config_add_option (config, GST_BUFFER_POOL_OPTION_VIDEO_META);
gst_buffer_pool_set_config (pool, config); gst_buffer_pool_set_config (pool, config);
/* Get updated size by cuda buffer pool */
config = gst_buffer_pool_get_config (pool);
gst_buffer_pool_config_get_params (config, NULL, &size, NULL, NULL);
gst_structure_free (config);
if (n > 0) if (n > 0)
gst_query_set_nth_allocation_pool (query, 0, pool, size, min, max); gst_query_set_nth_allocation_pool (query, 0, pool, size, min, max);
else else

View file

@ -740,33 +740,24 @@ gst_nv_decoder_copy_frame_to_cuda (GstNvDecoder * decoder,
{ {
CUDA_MEMCPY2D copy_params = { 0, }; CUDA_MEMCPY2D copy_params = { 0, };
GstMemory *mem; GstMemory *mem;
GstCudaMemory *cuda_mem = NULL;
gint i; gint i;
gboolean ret = FALSE; gboolean ret = FALSE;
GstVideoFrame video_frame;
mem = gst_buffer_peek_memory (buffer, 0); mem = gst_buffer_peek_memory (buffer, 0);
if (!gst_is_cuda_memory (mem)) { if (!gst_is_cuda_memory (mem)) {
GST_WARNING_OBJECT (decoder, "Not a CUDA memory"); GST_WARNING_OBJECT (decoder, "Not a CUDA memory");
return FALSE; return FALSE;
} else {
GstCudaMemory *cmem = GST_CUDA_MEMORY_CAST (mem);
if (cmem->context == decoder->context ||
gst_cuda_context_get_handle (cmem->context) ==
gst_cuda_context_get_handle (decoder->context) ||
(gst_cuda_context_can_access_peer (cmem->context, decoder->context) &&
gst_cuda_context_can_access_peer (decoder->context,
cmem->context))) {
cuda_mem = cmem;
}
} }
if (!cuda_mem) { if (!gst_video_frame_map (&video_frame,
GST_WARNING_OBJECT (decoder, "Access to CUDA memory is not allowed"); &decoder->info, buffer, GST_MAP_WRITE | GST_MAP_CUDA)) {
GST_ERROR_OBJECT (decoder, "frame map failure");
return FALSE; return FALSE;
} }
if (!gst_cuda_context_push (decoder->context)) { if (!gst_cuda_context_push (decoder->context)) {
gst_video_frame_unmap (&video_frame);
GST_ERROR_OBJECT (decoder, "Failed to push CUDA context"); GST_ERROR_OBJECT (decoder, "Failed to push CUDA context");
return FALSE; return FALSE;
} }
@ -778,8 +769,9 @@ gst_nv_decoder_copy_frame_to_cuda (GstNvDecoder * decoder,
for (i = 0; i < GST_VIDEO_INFO_N_PLANES (&decoder->info); i++) { for (i = 0; i < GST_VIDEO_INFO_N_PLANES (&decoder->info); i++) {
copy_params.srcDevice = frame->devptr + copy_params.srcDevice = frame->devptr +
(i * frame->pitch * GST_VIDEO_INFO_HEIGHT (&decoder->info)); (i * frame->pitch * GST_VIDEO_INFO_HEIGHT (&decoder->info));
copy_params.dstDevice = cuda_mem->data + cuda_mem->offset[i]; copy_params.dstDevice =
copy_params.dstPitch = cuda_mem->stride; (CUdeviceptr) GST_VIDEO_FRAME_PLANE_DATA (&video_frame, i);
copy_params.dstPitch = GST_VIDEO_FRAME_PLANE_STRIDE (&video_frame, i);
copy_params.WidthInBytes = GST_VIDEO_INFO_COMP_WIDTH (&decoder->info, 0) copy_params.WidthInBytes = GST_VIDEO_INFO_COMP_WIDTH (&decoder->info, 0)
* GST_VIDEO_INFO_COMP_PSTRIDE (&decoder->info, 0); * GST_VIDEO_INFO_COMP_PSTRIDE (&decoder->info, 0);
copy_params.Height = GST_VIDEO_INFO_COMP_HEIGHT (&decoder->info, i); copy_params.Height = GST_VIDEO_INFO_COMP_HEIGHT (&decoder->info, i);
@ -795,6 +787,7 @@ gst_nv_decoder_copy_frame_to_cuda (GstNvDecoder * decoder,
ret = TRUE; ret = TRUE;
done: done:
gst_video_frame_unmap (&video_frame);
gst_cuda_context_pop (NULL); gst_cuda_context_pop (NULL);
GST_LOG_OBJECT (decoder, "Copy frame to CUDA ret %d", ret); GST_LOG_OBJECT (decoder, "Copy frame to CUDA ret %d", ret);
@ -1567,6 +1560,12 @@ gst_nv_decoder_ensure_cuda_pool (GstNvDecoder * decoder, GstQuery * query)
gst_buffer_pool_config_set_params (config, outcaps, size, min, max); gst_buffer_pool_config_set_params (config, outcaps, size, min, max);
gst_buffer_pool_config_add_option (config, GST_BUFFER_POOL_OPTION_VIDEO_META); gst_buffer_pool_config_add_option (config, GST_BUFFER_POOL_OPTION_VIDEO_META);
gst_buffer_pool_set_config (pool, config); gst_buffer_pool_set_config (pool, config);
/* Get updated size by cuda buffer pool */
config = gst_buffer_pool_get_config (pool);
gst_buffer_pool_config_get_params (config, NULL, &size, NULL, NULL);
gst_structure_free (config);
if (n > 0) if (n > 0)
gst_query_set_nth_allocation_pool (query, 0, pool, size, min, max); gst_query_set_nth_allocation_pool (query, 0, pool, size, min, max);
else else

View file

@ -39,6 +39,7 @@
#include "gstcudadownload.h" #include "gstcudadownload.h"
#include "gstcudaupload.h" #include "gstcudaupload.h"
#include "gstcudafilter.h" #include "gstcudafilter.h"
#include "gstcudamemory.h"
GST_DEBUG_CATEGORY (gst_nvcodec_debug); GST_DEBUG_CATEGORY (gst_nvcodec_debug);
GST_DEBUG_CATEGORY (gst_nvdec_debug); GST_DEBUG_CATEGORY (gst_nvdec_debug);
@ -238,6 +239,7 @@ plugin_init (GstPlugin * plugin)
GST_TYPE_CUDA_UPLOAD); GST_TYPE_CUDA_UPLOAD);
gst_cuda_filter_plugin_init (plugin); gst_cuda_filter_plugin_init (plugin);
gst_cuda_memory_init_once ();
return TRUE; return TRUE;
} }