mirror of
https://gitlab.freedesktop.org/gstreamer/gstreamer.git
synced 2024-11-26 19:51:11 +00:00
nvcodec: Refactor GstCudaMemory abstraction
* Hide GstCudaMemory member variables * Make GstCudaAllocator object GstCudaContext independent * Set offset/stride of memory correctly via video meta * Drop GST_BUFFER_POOL_OPTION_VIDEO_ALIGNMENT support. This implementation actually does not support custom alignment because we allocate device memory via cuMemAllocPitch of which alignment is almost uncontrollable Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/1834>
This commit is contained in:
parent
ad0e7fca14
commit
111b2c3f53
12 changed files with 580 additions and 759 deletions
|
@ -229,7 +229,7 @@ static const gchar templ_YUV_TO_YUV[] =
|
|||
GST_CUDA_KERNEL_FUNC
|
||||
"(cudaTextureObject_t tex0, cudaTextureObject_t tex1, cudaTextureObject_t tex2,\n"
|
||||
" unsigned char *dst0, unsigned char *dst1, unsigned char *dst2,\n"
|
||||
" int stride)\n"
|
||||
" int stride, int uv_stride)\n"
|
||||
"{\n"
|
||||
" int x_pos = blockIdx.x * blockDim.x + threadIdx.x;\n"
|
||||
" int y_pos = blockIdx.y * blockDim.y + threadIdx.y;\n"
|
||||
|
@ -265,7 +265,7 @@ GST_CUDA_KERNEL_FUNC
|
|||
" v = tmp;\n"
|
||||
" }\n"
|
||||
" write_chroma (dst1,\n"
|
||||
" dst2, u, v, x_pos, y_pos, CHROMA_PSTRIDE, stride, MASK);\n"
|
||||
" dst2, u, v, x_pos, y_pos, CHROMA_PSTRIDE, uv_stride, MASK);\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
|
@ -589,7 +589,7 @@ GST_CUDA_KERNEL_FUNC_TO_Y444
|
|||
GST_CUDA_KERNEL_FUNC_Y444_TO_YUV
|
||||
"(cudaTextureObject_t tex0, cudaTextureObject_t tex1, cudaTextureObject_t tex2,\n"
|
||||
" unsigned char *dst0, unsigned char *dst1, unsigned char *dst2,\n"
|
||||
" int stride)\n"
|
||||
" int stride, int uv_stride)\n"
|
||||
"{\n"
|
||||
" int x_pos = blockIdx.x * blockDim.x + threadIdx.x;\n"
|
||||
" int y_pos = blockIdx.y * blockDim.y + threadIdx.y;\n"
|
||||
|
@ -626,7 +626,7 @@ GST_CUDA_KERNEL_FUNC_Y444_TO_YUV
|
|||
" v = tmp;\n"
|
||||
" }\n"
|
||||
" write_chroma (dst1,\n"
|
||||
" dst2, u, v, x_pos, y_pos, CHROMA_PSTRIDE, stride, MASK);\n"
|
||||
" dst2, u, v, x_pos, y_pos, CHROMA_PSTRIDE, uv_stride, MASK);\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
|
@ -745,9 +745,10 @@ struct _GstCudaConverter
|
|||
gchar *ptx;
|
||||
GstCudaStageBuffer fallback_buffer[GST_VIDEO_MAX_PLANES];
|
||||
|
||||
gboolean (*convert) (GstCudaConverter * convert, const GstCudaMemory * src,
|
||||
GstVideoInfo * in_info, GstCudaMemory * dst, GstVideoInfo * out_info,
|
||||
CUstream cuda_stream);
|
||||
/* *INDENT-OFF* */
|
||||
gboolean (*convert) (GstCudaConverter * convert, GstVideoFrame * src_frame,
|
||||
GstVideoFrame * dst_frame, CUstream cuda_stream);
|
||||
/* *INDENT-ON* */
|
||||
|
||||
const CUdeviceptr src;
|
||||
GstVideoInfo *cur_in_info;
|
||||
|
@ -893,67 +894,25 @@ gst_cuda_converter_free (GstCudaConverter * convert)
|
|||
g_free (convert);
|
||||
}
|
||||
|
||||
/**
|
||||
* gst_cuda_converter_frame:
|
||||
* @convert: a #GstCudaConverter
|
||||
* @src: a #GstCudaMemory
|
||||
* @in_info: a #GstVideoInfo representing @src
|
||||
* @dst: a #GstCudaMemory
|
||||
* @out_info: a #GstVideoInfo representing @dst
|
||||
* @cuda_stream: a #CUstream
|
||||
*
|
||||
* Convert the pixels of @src into @dest using @convert.
|
||||
* Called without gst_cuda_context_push() and gst_cuda_context_pop() by caller
|
||||
*/
|
||||
gboolean
|
||||
gst_cuda_converter_frame (GstCudaConverter * convert, const GstCudaMemory * src,
|
||||
GstVideoInfo * in_info, GstCudaMemory * dst, GstVideoInfo * out_info,
|
||||
CUstream cuda_stream)
|
||||
gst_cuda_converter_convert_frame (GstCudaConverter * convert,
|
||||
GstVideoFrame * src_frame, GstVideoFrame * dst_frame, CUstream cuda_stream)
|
||||
{
|
||||
gboolean ret;
|
||||
|
||||
g_return_val_if_fail (convert, FALSE);
|
||||
g_return_val_if_fail (src, FALSE);
|
||||
g_return_val_if_fail (in_info, FALSE);
|
||||
g_return_val_if_fail (dst, FALSE);
|
||||
g_return_val_if_fail (out_info, FALSE);
|
||||
g_return_val_if_fail (src_frame, FALSE);
|
||||
g_return_val_if_fail (dst_frame, FALSE);
|
||||
|
||||
gst_cuda_context_push (convert->cuda_ctx);
|
||||
|
||||
ret = gst_cuda_converter_frame_unlocked (convert,
|
||||
src, in_info, dst, out_info, cuda_stream);
|
||||
ret = convert->convert (convert, src_frame, dst_frame, cuda_stream);
|
||||
|
||||
gst_cuda_context_pop (NULL);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* gst_cuda_converter_frame_unlocked:
|
||||
* @convert: a #GstCudaConverter
|
||||
* @src: a #GstCudaMemory
|
||||
* @in_info: a #GstVideoInfo representing @src
|
||||
* @dst: a #GstCudaMemory
|
||||
* @out_info: a #GstVideoInfo representing @dest
|
||||
* @cuda_stream: a #CUstream
|
||||
*
|
||||
* Convert the pixels of @src into @dest using @convert.
|
||||
* Caller should call this method after gst_cuda_context_push()
|
||||
*/
|
||||
gboolean
|
||||
gst_cuda_converter_frame_unlocked (GstCudaConverter * convert,
|
||||
const GstCudaMemory * src, GstVideoInfo * in_info, GstCudaMemory * dst,
|
||||
GstVideoInfo * out_info, CUstream cuda_stream)
|
||||
{
|
||||
g_return_val_if_fail (convert, FALSE);
|
||||
g_return_val_if_fail (src, FALSE);
|
||||
g_return_val_if_fail (in_info, FALSE);
|
||||
g_return_val_if_fail (dst, FALSE);
|
||||
g_return_val_if_fail (out_info, FALSE);
|
||||
|
||||
return convert->convert (convert, src, in_info, dst, out_info, cuda_stream);
|
||||
}
|
||||
|
||||
/* allocate fallback memory for texture alignment requirement */
|
||||
static gboolean
|
||||
convert_ensure_fallback_memory (GstCudaConverter * convert,
|
||||
|
@ -1020,8 +979,8 @@ convert_create_texture_unchecked (const CUdeviceptr src, gint width,
|
|||
}
|
||||
|
||||
static CUtexObject
|
||||
convert_create_texture (GstCudaConverter * convert, const GstCudaMemory * src,
|
||||
GstVideoInfo * info, guint plane, CUstream cuda_stream)
|
||||
convert_create_texture (GstCudaConverter * convert, GstVideoFrame * src_frame,
|
||||
guint plane, CUstream cuda_stream)
|
||||
{
|
||||
CUarray_format format = CU_AD_FORMAT_UNSIGNED_INT8;
|
||||
guint channels = 1;
|
||||
|
@ -1030,22 +989,23 @@ convert_create_texture (GstCudaConverter * convert, const GstCudaMemory * src,
|
|||
CUresult cuda_ret;
|
||||
CUfilter_mode mode;
|
||||
|
||||
if (GST_VIDEO_INFO_COMP_DEPTH (info, plane) > 8)
|
||||
if (GST_VIDEO_FRAME_COMP_DEPTH (src_frame, plane) > 8)
|
||||
format = CU_AD_FORMAT_UNSIGNED_INT16;
|
||||
|
||||
/* FIXME: more graceful method ? */
|
||||
if (plane != 0 &&
|
||||
GST_VIDEO_INFO_N_PLANES (info) != GST_VIDEO_INFO_N_COMPONENTS (info)) {
|
||||
GST_VIDEO_FRAME_N_PLANES (src_frame) !=
|
||||
GST_VIDEO_FRAME_N_COMPONENTS (src_frame)) {
|
||||
channels = 2;
|
||||
}
|
||||
|
||||
src_ptr = src->data + src->offset[plane];
|
||||
stride = src->stride;
|
||||
src_ptr = (CUdeviceptr) GST_VIDEO_FRAME_PLANE_DATA (src_frame, plane);
|
||||
stride = GST_VIDEO_FRAME_PLANE_STRIDE (src_frame, plane);
|
||||
|
||||
if (convert->texture_alignment && (src_ptr % convert->texture_alignment)) {
|
||||
CUDA_MEMCPY2D copy_params = { 0, };
|
||||
|
||||
if (!convert_ensure_fallback_memory (convert, info, plane))
|
||||
if (!convert_ensure_fallback_memory (convert, &src_frame->info, plane))
|
||||
return 0;
|
||||
|
||||
GST_LOG ("device memory was not aligned, copy to fallback memory");
|
||||
|
@ -1057,9 +1017,9 @@ convert_create_texture (GstCudaConverter * convert, const GstCudaMemory * src,
|
|||
copy_params.dstMemoryType = CU_MEMORYTYPE_DEVICE;
|
||||
copy_params.dstPitch = convert->fallback_buffer[plane].cuda_stride;
|
||||
copy_params.dstDevice = convert->fallback_buffer[plane].device_ptr;
|
||||
copy_params.WidthInBytes = GST_VIDEO_INFO_COMP_WIDTH (info, plane)
|
||||
* GST_VIDEO_INFO_COMP_PSTRIDE (info, plane);
|
||||
copy_params.Height = GST_VIDEO_INFO_COMP_HEIGHT (info, plane);
|
||||
copy_params.WidthInBytes = GST_VIDEO_FRAME_COMP_WIDTH (src_frame, plane)
|
||||
* GST_VIDEO_FRAME_COMP_PSTRIDE (src_frame, plane);
|
||||
copy_params.Height = GST_VIDEO_FRAME_COMP_HEIGHT (src_frame, plane);
|
||||
|
||||
cuda_ret = CuMemcpy2DAsync (©_params, cuda_stream);
|
||||
if (!gst_cuda_result (cuda_ret)) {
|
||||
|
@ -1079,27 +1039,26 @@ convert_create_texture (GstCudaConverter * convert, const GstCudaMemory * src,
|
|||
mode = CU_TR_FILTER_MODE_LINEAR;
|
||||
|
||||
return convert_create_texture_unchecked (src_ptr,
|
||||
GST_VIDEO_INFO_COMP_WIDTH (info, plane),
|
||||
GST_VIDEO_INFO_COMP_HEIGHT (info, plane), channels, stride, format, mode,
|
||||
cuda_stream);
|
||||
GST_VIDEO_FRAME_COMP_WIDTH (src_frame, plane),
|
||||
GST_VIDEO_FRAME_COMP_HEIGHT (src_frame, plane), channels, stride, format,
|
||||
mode, cuda_stream);
|
||||
}
|
||||
|
||||
/* main conversion function for YUV to YUV conversion */
|
||||
static gboolean
|
||||
convert_YUV_TO_YUV (GstCudaConverter * convert,
|
||||
const GstCudaMemory * src, GstVideoInfo * in_info, GstCudaMemory * dst,
|
||||
GstVideoInfo * out_info, CUstream cuda_stream)
|
||||
convert_YUV_TO_YUV (GstCudaConverter * convert, GstVideoFrame * src_frame,
|
||||
GstVideoFrame * dst_frame, CUstream cuda_stream)
|
||||
{
|
||||
CUtexObject texture[GST_VIDEO_MAX_PLANES] = { 0, };
|
||||
CUresult cuda_ret;
|
||||
gboolean ret = FALSE;
|
||||
CUdeviceptr dst_ptr[GST_VIDEO_MAX_PLANES] = { 0, };
|
||||
gint dst_stride;
|
||||
gint dst_stride, dst_uv_stride;
|
||||
gint width, height;
|
||||
gint i;
|
||||
|
||||
gpointer kernel_args[] = { &texture[0], &texture[1], &texture[2],
|
||||
&dst_ptr[0], &dst_ptr[1], &dst_ptr[2], &dst_stride
|
||||
&dst_ptr[0], &dst_ptr[1], &dst_ptr[2], &dst_stride, &dst_uv_stride
|
||||
};
|
||||
|
||||
/* conversion step
|
||||
|
@ -1110,21 +1069,23 @@ convert_YUV_TO_YUV (GstCudaConverter * convert,
|
|||
*/
|
||||
|
||||
/* map CUDA device memory to CUDA texture object */
|
||||
for (i = 0; i < GST_VIDEO_INFO_N_PLANES (in_info); i++) {
|
||||
texture[i] = convert_create_texture (convert, src, in_info, i, cuda_stream);
|
||||
for (i = 0; i < GST_VIDEO_FRAME_N_PLANES (src_frame); i++) {
|
||||
texture[i] = convert_create_texture (convert, src_frame, i, cuda_stream);
|
||||
if (!texture[i]) {
|
||||
GST_ERROR ("couldn't create texture for %d th plane", i);
|
||||
goto done;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < GST_VIDEO_INFO_N_PLANES (out_info); i++)
|
||||
dst_ptr[i] = dst->data + dst->offset[i];
|
||||
for (i = 0; i < GST_VIDEO_FRAME_N_PLANES (dst_frame); i++) {
|
||||
dst_ptr[i] = (CUdeviceptr) GST_VIDEO_FRAME_PLANE_DATA (dst_frame, i);
|
||||
}
|
||||
|
||||
dst_stride = dst->stride;
|
||||
dst_stride = GST_VIDEO_FRAME_PLANE_STRIDE (dst_frame, 0);
|
||||
dst_uv_stride = GST_VIDEO_FRAME_PLANE_STRIDE (dst_frame, 1);
|
||||
|
||||
width = GST_VIDEO_INFO_WIDTH (out_info);
|
||||
height = GST_VIDEO_INFO_HEIGHT (out_info);
|
||||
width = GST_VIDEO_FRAME_WIDTH (dst_frame);
|
||||
height = GST_VIDEO_FRAME_HEIGHT (dst_frame);
|
||||
|
||||
cuda_ret =
|
||||
CuLaunchKernel (convert->kernel_func[0], DIV_UP (width, CUDA_BLOCK_X),
|
||||
|
@ -1140,7 +1101,7 @@ convert_YUV_TO_YUV (GstCudaConverter * convert,
|
|||
gst_cuda_result (CuStreamSynchronize (cuda_stream));
|
||||
|
||||
done:
|
||||
for (i = 0; i < GST_VIDEO_INFO_N_PLANES (in_info); i++) {
|
||||
for (i = 0; i < GST_VIDEO_FRAME_N_PLANES (src_frame); i++) {
|
||||
if (texture[i])
|
||||
gst_cuda_result (CuTexObjectDestroy (texture[i]));
|
||||
}
|
||||
|
@ -1150,9 +1111,8 @@ done:
|
|||
|
||||
/* main conversion function for YUV to RGB conversion */
|
||||
static gboolean
|
||||
convert_YUV_TO_RGB (GstCudaConverter * convert,
|
||||
const GstCudaMemory * src, GstVideoInfo * in_info, GstCudaMemory * dst,
|
||||
GstVideoInfo * out_info, CUstream cuda_stream)
|
||||
convert_YUV_TO_RGB (GstCudaConverter * convert, GstVideoFrame * src_frame,
|
||||
GstVideoFrame * dst_frame, CUstream cuda_stream)
|
||||
{
|
||||
CUtexObject texture[GST_VIDEO_MAX_PLANES] = { 0, };
|
||||
CUresult cuda_ret;
|
||||
|
@ -1174,19 +1134,19 @@ convert_YUV_TO_RGB (GstCudaConverter * convert,
|
|||
*/
|
||||
|
||||
/* map CUDA device memory to CUDA texture object */
|
||||
for (i = 0; i < GST_VIDEO_INFO_N_PLANES (in_info); i++) {
|
||||
texture[i] = convert_create_texture (convert, src, in_info, i, cuda_stream);
|
||||
for (i = 0; i < GST_VIDEO_FRAME_N_PLANES (src_frame); i++) {
|
||||
texture[i] = convert_create_texture (convert, src_frame, i, cuda_stream);
|
||||
if (!texture[i]) {
|
||||
GST_ERROR ("couldn't create texture for %d th plane", i);
|
||||
goto done;
|
||||
}
|
||||
}
|
||||
|
||||
dstRGB = dst->data;
|
||||
dst_stride = dst->stride;
|
||||
dstRGB = (CUdeviceptr) GST_VIDEO_FRAME_PLANE_DATA (dst_frame, 0);
|
||||
dst_stride = GST_VIDEO_FRAME_PLANE_STRIDE (dst_frame, 0);
|
||||
|
||||
width = GST_VIDEO_INFO_WIDTH (out_info);
|
||||
height = GST_VIDEO_INFO_HEIGHT (out_info);
|
||||
width = GST_VIDEO_FRAME_WIDTH (dst_frame);
|
||||
height = GST_VIDEO_FRAME_HEIGHT (dst_frame);
|
||||
|
||||
cuda_ret =
|
||||
CuLaunchKernel (convert->kernel_func[0], DIV_UP (width, CUDA_BLOCK_X),
|
||||
|
@ -1202,7 +1162,7 @@ convert_YUV_TO_RGB (GstCudaConverter * convert,
|
|||
gst_cuda_result (CuStreamSynchronize (cuda_stream));
|
||||
|
||||
done:
|
||||
for (i = 0; i < GST_VIDEO_INFO_N_PLANES (in_info); i++) {
|
||||
for (i = 0; i < GST_VIDEO_FRAME_N_PLANES (src_frame); i++) {
|
||||
if (texture[i])
|
||||
gst_cuda_result (CuTexObjectDestroy (texture[i]));
|
||||
}
|
||||
|
@ -1212,7 +1172,7 @@ done:
|
|||
|
||||
static gboolean
|
||||
convert_UNPACK_RGB (GstCudaConverter * convert, CUfunction kernel_func,
|
||||
CUstream cuda_stream, const GstCudaMemory * src, GstVideoInfo * in_info,
|
||||
CUstream cuda_stream, GstVideoFrame * src_frame,
|
||||
CUdeviceptr dst, gint dst_stride, GstCudaRGBOrder * rgb_order)
|
||||
{
|
||||
CUdeviceptr srcRGB = 0;
|
||||
|
@ -1227,12 +1187,12 @@ convert_UNPACK_RGB (GstCudaConverter * convert, CUfunction kernel_func,
|
|||
&convert->in_rgb_order.B, &convert->in_rgb_order.A,
|
||||
};
|
||||
|
||||
srcRGB = src->data;
|
||||
src_stride = src->stride;
|
||||
srcRGB = (CUdeviceptr) GST_VIDEO_FRAME_PLANE_DATA (src_frame, 0);
|
||||
src_stride = GST_VIDEO_FRAME_PLANE_STRIDE (src_frame, 0);
|
||||
|
||||
width = GST_VIDEO_INFO_WIDTH (in_info);
|
||||
height = GST_VIDEO_INFO_HEIGHT (in_info);
|
||||
src_pstride = GST_VIDEO_INFO_COMP_PSTRIDE (in_info, 0);
|
||||
width = GST_VIDEO_FRAME_WIDTH (src_frame);
|
||||
height = GST_VIDEO_FRAME_HEIGHT (src_frame);
|
||||
src_pstride = GST_VIDEO_FRAME_COMP_PSTRIDE (src_frame, 0);
|
||||
|
||||
cuda_ret =
|
||||
CuLaunchKernel (kernel_func, DIV_UP (width, CUDA_BLOCK_X),
|
||||
|
@ -1274,9 +1234,8 @@ convert_TO_Y444 (GstCudaConverter * convert, CUfunction kernel_func,
|
|||
|
||||
/* main conversion function for RGB to YUV conversion */
|
||||
static gboolean
|
||||
convert_RGB_TO_YUV (GstCudaConverter * convert,
|
||||
const GstCudaMemory * src, GstVideoInfo * in_info, GstCudaMemory * dst,
|
||||
GstVideoInfo * out_info, CUstream cuda_stream)
|
||||
convert_RGB_TO_YUV (GstCudaConverter * convert, GstVideoFrame * src_frame,
|
||||
GstVideoFrame * dst_frame, CUstream cuda_stream)
|
||||
{
|
||||
CUtexObject texture = 0;
|
||||
CUtexObject yuv_texture[3] = { 0, };
|
||||
|
@ -1285,7 +1244,7 @@ convert_RGB_TO_YUV (GstCudaConverter * convert,
|
|||
gboolean ret = FALSE;
|
||||
gint in_width, in_height;
|
||||
gint out_width, out_height;
|
||||
gint dst_stride;
|
||||
gint dst_stride, dst_uv_stride;
|
||||
CUarray_format format = CU_AD_FORMAT_UNSIGNED_INT8;
|
||||
CUfilter_mode mode = CU_TR_FILTER_MODE_POINT;
|
||||
gint pstride = 1;
|
||||
|
@ -1293,7 +1252,7 @@ convert_RGB_TO_YUV (GstCudaConverter * convert,
|
|||
gint i;
|
||||
|
||||
gpointer kernel_args[] = { &yuv_texture[0], &yuv_texture[1], &yuv_texture[2],
|
||||
&dst_ptr[0], &dst_ptr[1], &dst_ptr[2], &dst_stride
|
||||
&dst_ptr[0], &dst_ptr[1], &dst_ptr[2], &dst_stride, &dst_uv_stride
|
||||
};
|
||||
|
||||
/* conversion step
|
||||
|
@ -1304,21 +1263,22 @@ convert_RGB_TO_YUV (GstCudaConverter * convert,
|
|||
* the CUDA kernel function
|
||||
*/
|
||||
if (!convert_UNPACK_RGB (convert, convert->kernel_func[0], cuda_stream,
|
||||
src, in_info, convert->unpack_surface.device_ptr,
|
||||
src_frame, convert->unpack_surface.device_ptr,
|
||||
convert->unpack_surface.cuda_stride, &convert->in_rgb_order)) {
|
||||
GST_ERROR ("could not unpack input rgb");
|
||||
|
||||
goto done;
|
||||
}
|
||||
|
||||
in_width = GST_VIDEO_INFO_WIDTH (in_info);
|
||||
in_height = GST_VIDEO_INFO_HEIGHT (in_info);
|
||||
in_width = GST_VIDEO_FRAME_WIDTH (src_frame);
|
||||
in_height = GST_VIDEO_FRAME_HEIGHT (src_frame);
|
||||
|
||||
out_width = GST_VIDEO_INFO_WIDTH (out_info);
|
||||
out_height = GST_VIDEO_INFO_HEIGHT (out_info);
|
||||
dst_stride = dst->stride;
|
||||
out_width = GST_VIDEO_FRAME_WIDTH (dst_frame);
|
||||
out_height = GST_VIDEO_FRAME_HEIGHT (dst_frame);
|
||||
dst_stride = GST_VIDEO_FRAME_PLANE_STRIDE (dst_frame, 0);
|
||||
dst_uv_stride = GST_VIDEO_FRAME_PLANE_STRIDE (dst_frame, 1);
|
||||
|
||||
if (GST_VIDEO_INFO_COMP_DEPTH (in_info, 0) > 8) {
|
||||
if (GST_VIDEO_FRAME_COMP_DEPTH (src_frame, 0) > 8) {
|
||||
pstride = 2;
|
||||
bitdepth = 16;
|
||||
format = CU_AD_FORMAT_UNSIGNED_INT16;
|
||||
|
@ -1365,8 +1325,8 @@ convert_RGB_TO_YUV (GstCudaConverter * convert,
|
|||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < GST_VIDEO_INFO_N_PLANES (out_info); i++)
|
||||
dst_ptr[i] = dst->data + dst->offset[i];
|
||||
for (i = 0; i < GST_VIDEO_FRAME_N_PLANES (dst_frame); i++)
|
||||
dst_ptr[i] = (CUdeviceptr) GST_VIDEO_FRAME_PLANE_DATA (dst_frame, i);
|
||||
|
||||
cuda_ret =
|
||||
CuLaunchKernel (convert->kernel_func[2], DIV_UP (out_width, CUDA_BLOCK_X),
|
||||
|
@ -1394,9 +1354,8 @@ done:
|
|||
|
||||
/* main conversion function for RGB to RGB conversion */
|
||||
static gboolean
|
||||
convert_RGB_TO_RGB (GstCudaConverter * convert,
|
||||
const GstCudaMemory * src, GstVideoInfo * in_info, GstCudaMemory * dst,
|
||||
GstVideoInfo * out_info, CUstream cuda_stream)
|
||||
convert_RGB_TO_RGB (GstCudaConverter * convert, GstVideoFrame * src_frame,
|
||||
GstVideoFrame * dst_frame, CUstream cuda_stream)
|
||||
{
|
||||
CUtexObject texture = 0;
|
||||
CUresult cuda_ret;
|
||||
|
@ -1418,23 +1377,23 @@ convert_RGB_TO_RGB (GstCudaConverter * convert,
|
|||
*/
|
||||
|
||||
if (!convert_UNPACK_RGB (convert, convert->kernel_func[0], cuda_stream,
|
||||
src, in_info, convert->unpack_surface.device_ptr,
|
||||
src_frame, convert->unpack_surface.device_ptr,
|
||||
convert->unpack_surface.cuda_stride, &convert->in_rgb_order)) {
|
||||
GST_ERROR ("could not unpack input rgb");
|
||||
|
||||
goto done;
|
||||
}
|
||||
|
||||
in_width = GST_VIDEO_INFO_WIDTH (in_info);
|
||||
in_height = GST_VIDEO_INFO_HEIGHT (in_info);
|
||||
in_width = GST_VIDEO_FRAME_WIDTH (src_frame);
|
||||
in_height = GST_VIDEO_FRAME_HEIGHT (src_frame);
|
||||
|
||||
out_width = GST_VIDEO_INFO_WIDTH (out_info);
|
||||
out_height = GST_VIDEO_INFO_HEIGHT (out_info);
|
||||
out_width = GST_VIDEO_FRAME_WIDTH (dst_frame);
|
||||
out_height = GST_VIDEO_FRAME_HEIGHT (dst_frame);
|
||||
|
||||
dstRGB = dst->data;
|
||||
dst_stride = dst->stride;
|
||||
dstRGB = (CUdeviceptr) GST_VIDEO_FRAME_PLANE_DATA (dst_frame, 0);
|
||||
dst_stride = GST_VIDEO_FRAME_PLANE_STRIDE (dst_frame, 0);
|
||||
|
||||
if (GST_VIDEO_INFO_COMP_DEPTH (in_info, 0) > 8)
|
||||
if (GST_VIDEO_FRAME_COMP_DEPTH (src_frame, 0) > 8)
|
||||
format = CU_AD_FORMAT_UNSIGNED_INT16;
|
||||
|
||||
/* Use h/w linear interpolation only when resize is required.
|
||||
|
|
|
@ -34,21 +34,11 @@ GstCudaConverter * gst_cuda_converter_new (GstVideoInfo * in_info,
|
|||
|
||||
void gst_cuda_converter_free (GstCudaConverter * convert);
|
||||
|
||||
gboolean gst_cuda_converter_frame (GstCudaConverter * convert,
|
||||
const GstCudaMemory * src,
|
||||
GstVideoInfo * in_info,
|
||||
GstCudaMemory * dst,
|
||||
GstVideoInfo * out_info,
|
||||
gboolean gst_cuda_converter_convert_frame (GstCudaConverter * convert,
|
||||
GstVideoFrame * src_frame,
|
||||
GstVideoFrame * dst_frame,
|
||||
CUstream cuda_stream);
|
||||
|
||||
gboolean gst_cuda_converter_frame_unlocked (GstCudaConverter * convert,
|
||||
const GstCudaMemory * src,
|
||||
GstVideoInfo * in_info,
|
||||
GstCudaMemory * dst,
|
||||
GstVideoInfo * out_info,
|
||||
CUstream cuda_stream);
|
||||
|
||||
|
||||
G_END_DECLS
|
||||
|
||||
#endif /* __GST_CUDA_CONVERTER_H__ */
|
||||
|
|
|
@ -168,47 +168,30 @@ gst_cuda_base_filter_propose_allocation (GstBaseTransform * trans,
|
|||
|
||||
if (gst_query_get_n_allocation_pools (query) == 0) {
|
||||
GstStructure *config;
|
||||
GstVideoAlignment align;
|
||||
GstAllocationParams params = { 0, 31, 0, 0, };
|
||||
GstAllocator *allocator = NULL;
|
||||
gint i;
|
||||
|
||||
pool = gst_cuda_buffer_pool_new (ctrans->context);
|
||||
|
||||
config = gst_buffer_pool_get_config (pool);
|
||||
|
||||
gst_video_alignment_reset (&align);
|
||||
for (i = 0; i < GST_VIDEO_INFO_N_PLANES (&info); i++) {
|
||||
align.stride_align[i] = 31;
|
||||
}
|
||||
gst_video_info_align (&info, &align);
|
||||
|
||||
gst_buffer_pool_config_add_option (config,
|
||||
GST_BUFFER_POOL_OPTION_VIDEO_META);
|
||||
gst_buffer_pool_config_add_option (config,
|
||||
GST_BUFFER_POOL_OPTION_VIDEO_ALIGNMENT);
|
||||
|
||||
gst_buffer_pool_config_set_video_alignment (config, &align);
|
||||
size = GST_VIDEO_INFO_SIZE (&info);
|
||||
gst_buffer_pool_config_set_params (config, caps, size, 0, 0);
|
||||
|
||||
gst_query_add_allocation_meta (query, GST_VIDEO_META_API_TYPE, NULL);
|
||||
gst_query_add_allocation_pool (query, pool, size, 0, 0);
|
||||
|
||||
if (gst_buffer_pool_config_get_allocator (config, &allocator, ¶ms)) {
|
||||
if (params.align < 31)
|
||||
params.align = 31;
|
||||
|
||||
gst_query_add_allocation_param (query, allocator, ¶ms);
|
||||
gst_buffer_pool_config_set_allocator (config, allocator, ¶ms);
|
||||
}
|
||||
|
||||
if (!gst_buffer_pool_set_config (pool, config)) {
|
||||
GST_ERROR_OBJECT (ctrans, "failed to set config");
|
||||
gst_object_unref (pool);
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/* Get updated size by cuda buffer pool */
|
||||
config = gst_buffer_pool_get_config (pool);
|
||||
gst_buffer_pool_config_get_params (config, NULL, &size, NULL, NULL);
|
||||
gst_structure_free (config);
|
||||
|
||||
gst_query_add_allocation_pool (query, pool, size, 0, 0);
|
||||
|
||||
gst_object_unref (pool);
|
||||
}
|
||||
|
||||
|
@ -265,6 +248,12 @@ gst_cuda_base_filter_decide_allocation (GstBaseTransform * trans,
|
|||
gst_buffer_pool_config_add_option (config, GST_BUFFER_POOL_OPTION_VIDEO_META);
|
||||
gst_buffer_pool_config_set_params (config, outcaps, size, min, max);
|
||||
gst_buffer_pool_set_config (pool, config);
|
||||
|
||||
/* Get updated size by cuda buffer pool */
|
||||
config = gst_buffer_pool_get_config (pool);
|
||||
gst_buffer_pool_config_get_params (config, NULL, &size, NULL, NULL);
|
||||
gst_structure_free (config);
|
||||
|
||||
if (update_pool)
|
||||
gst_query_set_nth_allocation_pool (query, 0, pool, size, min, max);
|
||||
else
|
||||
|
@ -285,8 +274,6 @@ gst_cuda_base_filter_transform (GstBaseTransform * trans,
|
|||
GstVideoFrame in_frame, out_frame;
|
||||
GstFlowReturn ret = GST_FLOW_OK;
|
||||
GstMemory *mem;
|
||||
GstCudaMemory *in_cuda_mem = NULL;
|
||||
GstCudaMemory *out_cuda_mem = NULL;
|
||||
|
||||
if (gst_buffer_n_memory (inbuf) != 1) {
|
||||
GST_ERROR_OBJECT (self, "Invalid input buffer");
|
||||
|
@ -299,8 +286,6 @@ gst_cuda_base_filter_transform (GstBaseTransform * trans,
|
|||
return GST_FLOW_ERROR;
|
||||
}
|
||||
|
||||
in_cuda_mem = GST_CUDA_MEMORY_CAST (mem);
|
||||
|
||||
if (gst_buffer_n_memory (outbuf) != 1) {
|
||||
GST_ERROR_OBJECT (self, "Invalid output buffer");
|
||||
return GST_FLOW_ERROR;
|
||||
|
@ -312,8 +297,6 @@ gst_cuda_base_filter_transform (GstBaseTransform * trans,
|
|||
return GST_FLOW_ERROR;
|
||||
}
|
||||
|
||||
out_cuda_mem = GST_CUDA_MEMORY_CAST (mem);
|
||||
|
||||
if (!gst_video_frame_map (&in_frame, &ctrans->in_info, inbuf,
|
||||
GST_MAP_READ | GST_MAP_CUDA)) {
|
||||
GST_ERROR_OBJECT (self, "Failed to map input buffer");
|
||||
|
@ -327,9 +310,8 @@ gst_cuda_base_filter_transform (GstBaseTransform * trans,
|
|||
return GST_FLOW_ERROR;
|
||||
}
|
||||
|
||||
if (!gst_cuda_converter_frame (self->converter,
|
||||
in_cuda_mem, &ctrans->in_info,
|
||||
out_cuda_mem, &ctrans->out_info, ctrans->cuda_stream)) {
|
||||
if (!gst_cuda_converter_convert_frame (self->converter, &in_frame, &out_frame,
|
||||
ctrans->cuda_stream)) {
|
||||
GST_ERROR_OBJECT (self, "Failed to convert frame");
|
||||
ret = GST_FLOW_ERROR;
|
||||
}
|
||||
|
|
|
@ -30,11 +30,8 @@ GST_DEBUG_CATEGORY_STATIC (gst_cuda_buffer_pool_debug);
|
|||
|
||||
struct _GstCudaBufferPoolPrivate
|
||||
{
|
||||
GstAllocator *allocator;
|
||||
GstCudaAllocator *allocator;
|
||||
GstVideoInfo info;
|
||||
gboolean add_videometa;
|
||||
gboolean need_alignment;
|
||||
GstCudaAllocationParams params;
|
||||
};
|
||||
|
||||
#define gst_cuda_buffer_pool_parent_class parent_class
|
||||
|
@ -44,8 +41,7 @@ G_DEFINE_TYPE_WITH_PRIVATE (GstCudaBufferPool, gst_cuda_buffer_pool,
|
|||
static const gchar **
|
||||
gst_cuda_buffer_pool_get_options (GstBufferPool * pool)
|
||||
{
|
||||
static const gchar *options[] = { GST_BUFFER_POOL_OPTION_VIDEO_META,
|
||||
GST_BUFFER_POOL_OPTION_VIDEO_ALIGNMENT, NULL
|
||||
static const gchar *options[] = { GST_BUFFER_POOL_OPTION_VIDEO_META, NULL
|
||||
};
|
||||
|
||||
return options;
|
||||
|
@ -54,153 +50,86 @@ gst_cuda_buffer_pool_get_options (GstBufferPool * pool)
|
|||
static gboolean
|
||||
gst_cuda_buffer_pool_set_config (GstBufferPool * pool, GstStructure * config)
|
||||
{
|
||||
GstCudaBufferPool *cuda_pool = GST_CUDA_BUFFER_POOL_CAST (pool);
|
||||
GstCudaBufferPoolPrivate *priv = cuda_pool->priv;
|
||||
GstCudaBufferPool *self = GST_CUDA_BUFFER_POOL (pool);
|
||||
GstCudaBufferPoolPrivate *priv = self->priv;
|
||||
GstCaps *caps = NULL;
|
||||
guint size, min_buffers, max_buffers;
|
||||
guint max_align, n;
|
||||
GstAllocator *allocator = NULL;
|
||||
GstAllocationParams *params = (GstAllocationParams *) & priv->params;
|
||||
GstVideoInfo *info = &priv->params.info;
|
||||
GstVideoInfo info;
|
||||
GstMemory *mem;
|
||||
GstCudaMemory *cmem;
|
||||
|
||||
if (!gst_buffer_pool_config_get_params (config, &caps, &size, &min_buffers,
|
||||
&max_buffers))
|
||||
goto wrong_config;
|
||||
|
||||
if (caps == NULL)
|
||||
goto no_caps;
|
||||
|
||||
if (!gst_buffer_pool_config_get_allocator (config, &allocator, params))
|
||||
goto wrong_config;
|
||||
|
||||
/* now parse the caps from the config */
|
||||
if (!gst_video_info_from_caps (info, caps))
|
||||
goto wrong_caps;
|
||||
|
||||
GST_LOG_OBJECT (pool, "%dx%d, caps %" GST_PTR_FORMAT,
|
||||
GST_VIDEO_INFO_WIDTH (info), GST_VIDEO_INFO_HEIGHT (info), caps);
|
||||
|
||||
gst_clear_object (&priv->allocator);
|
||||
|
||||
if (allocator) {
|
||||
if (!GST_IS_CUDA_ALLOCATOR (allocator)) {
|
||||
goto wrong_allocator;
|
||||
} else {
|
||||
priv->allocator = gst_object_ref (allocator);
|
||||
}
|
||||
} else {
|
||||
allocator = priv->allocator = gst_cuda_allocator_new (cuda_pool->context);
|
||||
if (G_UNLIKELY (priv->allocator == NULL))
|
||||
goto no_allocator;
|
||||
}
|
||||
|
||||
priv->add_videometa = gst_buffer_pool_config_has_option (config,
|
||||
GST_BUFFER_POOL_OPTION_VIDEO_META);
|
||||
|
||||
priv->need_alignment = gst_buffer_pool_config_has_option (config,
|
||||
GST_BUFFER_POOL_OPTION_VIDEO_ALIGNMENT);
|
||||
|
||||
max_align = params->align;
|
||||
|
||||
/* do memory align */
|
||||
if (priv->need_alignment && priv->add_videometa) {
|
||||
GstVideoAlignment valign;
|
||||
|
||||
gst_buffer_pool_config_get_video_alignment (config, &valign);
|
||||
|
||||
for (n = 0; n < GST_VIDEO_MAX_PLANES; ++n)
|
||||
max_align |= valign.stride_align[n];
|
||||
|
||||
for (n = 0; n < GST_VIDEO_MAX_PLANES; ++n)
|
||||
valign.stride_align[n] = max_align;
|
||||
|
||||
if (!gst_video_info_align (info, &valign))
|
||||
goto failed_to_align;
|
||||
|
||||
gst_buffer_pool_config_set_video_alignment (config, &valign);
|
||||
}
|
||||
|
||||
if (params->align < max_align) {
|
||||
GST_WARNING_OBJECT (pool, "allocation params alignment %u is smaller "
|
||||
"than the max specified video stride alignment %u, fixing",
|
||||
(guint) params->align, max_align);
|
||||
|
||||
params->align = max_align;
|
||||
gst_buffer_pool_config_set_allocator (config, allocator, params);
|
||||
}
|
||||
|
||||
gst_buffer_pool_config_set_params (config, caps, GST_VIDEO_INFO_SIZE (info),
|
||||
min_buffers, max_buffers);
|
||||
|
||||
return GST_BUFFER_POOL_CLASS (parent_class)->set_config (pool, config);
|
||||
|
||||
/* ERRORS */
|
||||
wrong_config:
|
||||
{
|
||||
GST_WARNING_OBJECT (pool, "invalid config");
|
||||
&max_buffers)) {
|
||||
GST_WARNING_OBJECT (self, "invalid config");
|
||||
return FALSE;
|
||||
}
|
||||
no_caps:
|
||||
{
|
||||
|
||||
if (!caps) {
|
||||
GST_WARNING_OBJECT (pool, "no caps in config");
|
||||
return FALSE;
|
||||
}
|
||||
wrong_caps:
|
||||
{
|
||||
GST_WARNING_OBJECT (pool,
|
||||
"failed getting geometry from caps %" GST_PTR_FORMAT, caps);
|
||||
|
||||
if (!gst_video_info_from_caps (&info, caps)) {
|
||||
GST_WARNING_OBJECT (self, "Failed to convert caps to video-info");
|
||||
return FALSE;
|
||||
}
|
||||
no_allocator:
|
||||
{
|
||||
GST_WARNING_OBJECT (pool, "Could not create new CUDA allocator");
|
||||
|
||||
gst_clear_object (&priv->allocator);
|
||||
priv->allocator = (GstCudaAllocator *)
|
||||
gst_allocator_find (GST_CUDA_MEMORY_TYPE_NAME);
|
||||
if (!priv->allocator) {
|
||||
GST_WARNING_OBJECT (self, "CudaAllocator is unavailable");
|
||||
return FALSE;
|
||||
}
|
||||
wrong_allocator:
|
||||
{
|
||||
GST_WARNING_OBJECT (pool, "Incorrect allocator type for this pool");
|
||||
return FALSE;
|
||||
}
|
||||
failed_to_align:
|
||||
{
|
||||
GST_WARNING_OBJECT (pool, "Failed to align");
|
||||
|
||||
mem = gst_cuda_allocator_alloc (priv->allocator, self->context, &info);
|
||||
if (!mem) {
|
||||
GST_WARNING_OBJECT (self, "Failed to allocate memory");
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
cmem = GST_CUDA_MEMORY_CAST (mem);
|
||||
|
||||
gst_buffer_pool_config_set_params (config, caps,
|
||||
GST_VIDEO_INFO_SIZE (&cmem->info), min_buffers, max_buffers);
|
||||
|
||||
priv->info = info;
|
||||
|
||||
gst_memory_unref (mem);
|
||||
|
||||
return GST_BUFFER_POOL_CLASS (parent_class)->set_config (pool, config);
|
||||
}
|
||||
|
||||
static GstFlowReturn
|
||||
gst_cuda_buffer_pool_alloc (GstBufferPool * pool, GstBuffer ** buffer,
|
||||
GstBufferPoolAcquireParams * params)
|
||||
{
|
||||
GstCudaBufferPool *cuda_pool = GST_CUDA_BUFFER_POOL_CAST (pool);
|
||||
GstCudaBufferPoolPrivate *priv = cuda_pool->priv;
|
||||
GstVideoInfo *info;
|
||||
GstBuffer *cuda;
|
||||
GstCudaBufferPool *self = GST_CUDA_BUFFER_POOL_CAST (pool);
|
||||
GstCudaBufferPoolPrivate *priv = self->priv;
|
||||
GstVideoInfo *info = &priv->info;
|
||||
GstBuffer *buf;
|
||||
GstMemory *mem;
|
||||
GstCudaMemory *cmem;
|
||||
|
||||
info = &priv->params.info;
|
||||
|
||||
cuda = gst_buffer_new ();
|
||||
|
||||
mem = gst_cuda_allocator_alloc (GST_ALLOCATOR_CAST (priv->allocator),
|
||||
GST_VIDEO_INFO_SIZE (info), &priv->params);
|
||||
|
||||
if (mem == NULL) {
|
||||
gst_buffer_unref (cuda);
|
||||
mem = gst_cuda_allocator_alloc (priv->allocator, self->context, &priv->info);
|
||||
if (!mem) {
|
||||
GST_WARNING_OBJECT (pool, "Cannot create CUDA memory");
|
||||
return GST_FLOW_ERROR;
|
||||
}
|
||||
gst_buffer_append_memory (cuda, mem);
|
||||
|
||||
if (priv->add_videometa) {
|
||||
GST_DEBUG_OBJECT (pool, "adding GstVideoMeta");
|
||||
gst_buffer_add_video_meta_full (cuda, GST_VIDEO_FRAME_FLAG_NONE,
|
||||
GST_VIDEO_INFO_FORMAT (info), GST_VIDEO_INFO_WIDTH (info),
|
||||
GST_VIDEO_INFO_HEIGHT (info), GST_VIDEO_INFO_N_PLANES (info),
|
||||
info->offset, info->stride);
|
||||
}
|
||||
cmem = GST_CUDA_MEMORY_CAST (mem);
|
||||
|
||||
*buffer = cuda;
|
||||
buf = gst_buffer_new ();
|
||||
|
||||
gst_buffer_append_memory (buf, mem);
|
||||
|
||||
GST_DEBUG_OBJECT (pool, "adding GstVideoMeta");
|
||||
gst_buffer_add_video_meta_full (buf, GST_VIDEO_FRAME_FLAG_NONE,
|
||||
GST_VIDEO_INFO_FORMAT (info), GST_VIDEO_INFO_WIDTH (info),
|
||||
GST_VIDEO_INFO_HEIGHT (info), GST_VIDEO_INFO_N_PLANES (info),
|
||||
cmem->info.offset, cmem->info.stride);
|
||||
|
||||
*buffer = buf;
|
||||
|
||||
return GST_FLOW_OK;
|
||||
}
|
||||
|
@ -208,44 +137,41 @@ gst_cuda_buffer_pool_alloc (GstBufferPool * pool, GstBuffer ** buffer,
|
|||
GstBufferPool *
|
||||
gst_cuda_buffer_pool_new (GstCudaContext * context)
|
||||
{
|
||||
GstCudaBufferPool *pool;
|
||||
GstCudaBufferPool *self;
|
||||
|
||||
pool = g_object_new (GST_TYPE_CUDA_BUFFER_POOL, NULL);
|
||||
gst_object_ref_sink (pool);
|
||||
g_return_val_if_fail (GST_IS_CUDA_CONTEXT (context), NULL);
|
||||
|
||||
pool->context = gst_object_ref (context);
|
||||
self = g_object_new (GST_TYPE_CUDA_BUFFER_POOL, NULL);
|
||||
gst_object_ref_sink (self);
|
||||
|
||||
GST_LOG_OBJECT (pool, "new CUDA buffer pool %p", pool);
|
||||
self->context = gst_object_ref (context);
|
||||
|
||||
return GST_BUFFER_POOL_CAST (pool);
|
||||
return GST_BUFFER_POOL_CAST (self);
|
||||
}
|
||||
|
||||
static void
|
||||
gst_cuda_buffer_pool_dispose (GObject * object)
|
||||
{
|
||||
GstCudaBufferPool *pool = GST_CUDA_BUFFER_POOL_CAST (object);
|
||||
GstCudaBufferPoolPrivate *priv = pool->priv;
|
||||
|
||||
GST_LOG_OBJECT (pool, "finalize CUDA buffer pool %p", pool);
|
||||
GstCudaBufferPool *self = GST_CUDA_BUFFER_POOL_CAST (object);
|
||||
GstCudaBufferPoolPrivate *priv = self->priv;
|
||||
|
||||
gst_clear_object (&priv->allocator);
|
||||
gst_clear_object (&pool->context);
|
||||
gst_clear_object (&self->context);
|
||||
|
||||
G_OBJECT_CLASS (parent_class)->dispose (object);
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
gst_cuda_buffer_pool_class_init (GstCudaBufferPoolClass * klass)
|
||||
{
|
||||
GObjectClass *gobject_class = (GObjectClass *) klass;
|
||||
GstBufferPoolClass *gstbufferpool_class = (GstBufferPoolClass *) klass;
|
||||
GstBufferPoolClass *bufferpool_class = (GstBufferPoolClass *) klass;
|
||||
|
||||
gobject_class->dispose = gst_cuda_buffer_pool_dispose;
|
||||
|
||||
gstbufferpool_class->get_options = gst_cuda_buffer_pool_get_options;
|
||||
gstbufferpool_class->set_config = gst_cuda_buffer_pool_set_config;
|
||||
gstbufferpool_class->alloc_buffer = gst_cuda_buffer_pool_alloc;
|
||||
bufferpool_class->get_options = gst_cuda_buffer_pool_get_options;
|
||||
bufferpool_class->set_config = gst_cuda_buffer_pool_set_config;
|
||||
bufferpool_class->alloc_buffer = gst_cuda_buffer_pool_alloc;
|
||||
|
||||
GST_DEBUG_CATEGORY_INIT (gst_cuda_buffer_pool_debug, "cudabufferpool", 0,
|
||||
"CUDA Buffer Pool");
|
||||
|
|
|
@ -180,10 +180,6 @@ gst_cuda_download_propose_allocation (GstBaseTransform * trans,
|
|||
if (gst_query_get_n_allocation_pools (query) == 0) {
|
||||
GstCapsFeatures *features;
|
||||
GstStructure *config;
|
||||
GstVideoAlignment align;
|
||||
GstAllocationParams params = { 0, 31, 0, 0, };
|
||||
GstAllocator *allocator = NULL;
|
||||
gint i;
|
||||
|
||||
features = gst_caps_get_features (caps, 0);
|
||||
|
||||
|
@ -197,38 +193,25 @@ gst_cuda_download_propose_allocation (GstBaseTransform * trans,
|
|||
|
||||
config = gst_buffer_pool_get_config (pool);
|
||||
|
||||
gst_video_alignment_reset (&align);
|
||||
for (i = 0; i < GST_VIDEO_INFO_N_PLANES (&info); i++) {
|
||||
align.stride_align[i] = 31;
|
||||
}
|
||||
gst_video_info_align (&info, &align);
|
||||
|
||||
gst_buffer_pool_config_add_option (config,
|
||||
GST_BUFFER_POOL_OPTION_VIDEO_META);
|
||||
gst_buffer_pool_config_add_option (config,
|
||||
GST_BUFFER_POOL_OPTION_VIDEO_ALIGNMENT);
|
||||
|
||||
gst_buffer_pool_config_set_video_alignment (config, &align);
|
||||
size = GST_VIDEO_INFO_SIZE (&info);
|
||||
gst_buffer_pool_config_set_params (config, caps, size, 0, 0);
|
||||
|
||||
gst_query_add_allocation_meta (query, GST_VIDEO_META_API_TYPE, NULL);
|
||||
gst_query_add_allocation_pool (query, pool, size, 0, 0);
|
||||
|
||||
if (gst_buffer_pool_config_get_allocator (config, &allocator, ¶ms)) {
|
||||
if (params.align < 31)
|
||||
params.align = 31;
|
||||
|
||||
gst_query_add_allocation_param (query, allocator, ¶ms);
|
||||
gst_buffer_pool_config_set_allocator (config, allocator, ¶ms);
|
||||
}
|
||||
|
||||
if (!gst_buffer_pool_set_config (pool, config)) {
|
||||
GST_ERROR_OBJECT (ctrans, "failed to set config");
|
||||
gst_object_unref (pool);
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/* Get updated size by cuda buffer pool */
|
||||
config = gst_buffer_pool_get_config (pool);
|
||||
gst_buffer_pool_config_get_params (config, NULL, &size, NULL, NULL);
|
||||
gst_structure_free (config);
|
||||
|
||||
gst_query_add_allocation_pool (query, pool, size, 0, 0);
|
||||
|
||||
gst_object_unref (pool);
|
||||
}
|
||||
|
||||
|
@ -295,6 +278,12 @@ gst_cuda_download_decide_allocation (GstBaseTransform * trans, GstQuery * query)
|
|||
gst_buffer_pool_config_add_option (config, GST_BUFFER_POOL_OPTION_VIDEO_META);
|
||||
gst_buffer_pool_config_set_params (config, outcaps, size, min, max);
|
||||
gst_buffer_pool_set_config (pool, config);
|
||||
|
||||
/* Get updated size by cuda buffer pool */
|
||||
config = gst_buffer_pool_get_config (pool);
|
||||
gst_buffer_pool_config_get_params (config, NULL, &size, NULL, NULL);
|
||||
gst_structure_free (config);
|
||||
|
||||
if (update_pool)
|
||||
gst_query_set_nth_allocation_pool (query, 0, pool, size, min, max);
|
||||
else
|
||||
|
|
|
@ -26,20 +26,33 @@
|
|||
|
||||
#include <string.h>
|
||||
|
||||
GST_DEBUG_CATEGORY_STATIC (cudaallocator_debug);
|
||||
#define GST_CAT_DEFAULT cudaallocator_debug
|
||||
GST_DEBUG_CATEGORY_STATIC (GST_CAT_MEMORY);
|
||||
GST_DEBUG_CATEGORY_STATIC (cuda_allocator_debug);
|
||||
#define GST_CAT_DEFAULT cuda_allocator_debug
|
||||
|
||||
static GstAllocator *_gst_cuda_allocator = NULL;
|
||||
|
||||
struct _GstCudaMemoryPrivate
|
||||
{
|
||||
CUdeviceptr data;
|
||||
void *staging;
|
||||
|
||||
/* params used for cuMemAllocPitch */
|
||||
gsize pitch;
|
||||
guint width_in_bytes;
|
||||
guint height;
|
||||
|
||||
GMutex lock;
|
||||
};
|
||||
|
||||
#define gst_cuda_allocator_parent_class parent_class
|
||||
G_DEFINE_TYPE (GstCudaAllocator, gst_cuda_allocator, GST_TYPE_ALLOCATOR);
|
||||
|
||||
static void gst_cuda_allocator_dispose (GObject * object);
|
||||
static void gst_cuda_allocator_free (GstAllocator * allocator,
|
||||
GstMemory * memory);
|
||||
|
||||
static gpointer cuda_mem_map (GstCudaMemory * mem, gsize maxsize,
|
||||
static gpointer cuda_mem_map (GstMemory * mem, gsize maxsize,
|
||||
GstMapFlags flags);
|
||||
static void cuda_mem_unmap_full (GstCudaMemory * mem, GstMapInfo * info);
|
||||
static void cuda_mem_unmap_full (GstMemory * mem, GstMapInfo * info);
|
||||
static GstMemory *cuda_mem_copy (GstMemory * mem, gssize offset, gssize size);
|
||||
|
||||
static GstMemory *
|
||||
|
@ -52,17 +65,13 @@ gst_cuda_allocator_dummy_alloc (GstAllocator * allocator, gsize size,
|
|||
static void
|
||||
gst_cuda_allocator_class_init (GstCudaAllocatorClass * klass)
|
||||
{
|
||||
GObjectClass *gobject_class = G_OBJECT_CLASS (klass);
|
||||
GstAllocatorClass *allocator_class = GST_ALLOCATOR_CLASS (klass);
|
||||
|
||||
gobject_class->dispose = gst_cuda_allocator_dispose;
|
||||
|
||||
allocator_class->alloc = GST_DEBUG_FUNCPTR (gst_cuda_allocator_dummy_alloc);
|
||||
allocator_class->free = GST_DEBUG_FUNCPTR (gst_cuda_allocator_free);
|
||||
|
||||
GST_DEBUG_CATEGORY_INIT (cudaallocator_debug, "cudaallocator", 0,
|
||||
GST_DEBUG_CATEGORY_INIT (cuda_allocator_debug, "cudaallocator", 0,
|
||||
"CUDA Allocator");
|
||||
GST_DEBUG_CATEGORY_GET (GST_CAT_MEMORY, "GST_MEMORY");
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -74,337 +83,273 @@ gst_cuda_allocator_init (GstCudaAllocator * allocator)
|
|||
|
||||
alloc->mem_type = GST_CUDA_MEMORY_TYPE_NAME;
|
||||
|
||||
alloc->mem_map = (GstMemoryMapFunction) cuda_mem_map;
|
||||
alloc->mem_unmap_full = (GstMemoryUnmapFullFunction) cuda_mem_unmap_full;
|
||||
alloc->mem_copy = (GstMemoryCopyFunction) cuda_mem_copy;
|
||||
alloc->mem_map = cuda_mem_map;
|
||||
alloc->mem_unmap_full = cuda_mem_unmap_full;
|
||||
alloc->mem_copy = cuda_mem_copy;
|
||||
|
||||
GST_OBJECT_FLAG_SET (allocator, GST_ALLOCATOR_FLAG_CUSTOM_ALLOC);
|
||||
}
|
||||
|
||||
static void
|
||||
gst_cuda_allocator_dispose (GObject * object)
|
||||
static GstMemory *
|
||||
gst_cuda_allocator_alloc_internal (GstCudaAllocator * self,
|
||||
GstCudaContext * context, const GstVideoInfo * info,
|
||||
guint width_in_bytes, guint alloc_height)
|
||||
{
|
||||
GstCudaAllocator *self = GST_CUDA_ALLOCATOR_CAST (object);
|
||||
|
||||
GST_DEBUG_OBJECT (self, "dispose");
|
||||
|
||||
gst_clear_object (&self->context);
|
||||
G_OBJECT_CLASS (parent_class)->dispose (object);
|
||||
}
|
||||
|
||||
GstMemory *
|
||||
gst_cuda_allocator_alloc (GstAllocator * allocator, gsize size,
|
||||
GstCudaAllocationParams * params)
|
||||
{
|
||||
GstCudaAllocator *self = GST_CUDA_ALLOCATOR_CAST (allocator);
|
||||
gsize maxsize = size + params->parent.prefix + params->parent.padding;
|
||||
gsize align = params->parent.align;
|
||||
gsize offset = params->parent.prefix;
|
||||
GstMemoryFlags flags = params->parent.flags;
|
||||
GstCudaMemoryPrivate *priv;
|
||||
GstCudaMemory *mem;
|
||||
CUdeviceptr data;
|
||||
gboolean ret = FALSE;
|
||||
GstCudaMemory *mem;
|
||||
GstVideoInfo *info = ¶ms->info;
|
||||
gint i;
|
||||
guint width, height;
|
||||
gsize stride, plane_offset;
|
||||
gsize pitch;
|
||||
guint height = GST_VIDEO_INFO_HEIGHT (info);
|
||||
GstVideoInfo *alloc_info;
|
||||
|
||||
if (!gst_cuda_context_push (self->context))
|
||||
if (!gst_cuda_context_push (context))
|
||||
return NULL;
|
||||
|
||||
/* ensure configured alignment */
|
||||
align |= gst_memory_alignment;
|
||||
/* allocate more to compensate for alignment */
|
||||
maxsize += align;
|
||||
|
||||
GST_CAT_DEBUG_OBJECT (GST_CAT_MEMORY, self, "allocate new cuda memory");
|
||||
|
||||
width = GST_VIDEO_INFO_COMP_WIDTH (info, 0) *
|
||||
GST_VIDEO_INFO_COMP_PSTRIDE (info, 0);
|
||||
height = 0;
|
||||
for (i = 0; i < GST_VIDEO_INFO_N_PLANES (info); i++)
|
||||
height += GST_VIDEO_INFO_COMP_HEIGHT (info, i);
|
||||
|
||||
ret = gst_cuda_result (CuMemAllocPitch (&data, &stride, width, height, 16));
|
||||
ret = gst_cuda_result (CuMemAllocPitch (&data, &pitch, width_in_bytes,
|
||||
alloc_height, 16));
|
||||
gst_cuda_context_pop (NULL);
|
||||
|
||||
if (G_UNLIKELY (!ret)) {
|
||||
GST_CAT_ERROR_OBJECT (GST_CAT_MEMORY, self, "CUDA allocation failure");
|
||||
if (!ret) {
|
||||
GST_ERROR_OBJECT (self, "Failed to allocate CUDA memory");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
mem = g_new0 (GstCudaMemory, 1);
|
||||
g_mutex_init (&mem->lock);
|
||||
mem->data = data;
|
||||
mem->alloc_params = *params;
|
||||
mem->stride = stride;
|
||||
mem->priv = priv = g_new0 (GstCudaMemoryPrivate, 1);
|
||||
|
||||
plane_offset = 0;
|
||||
for (i = 0; i < GST_VIDEO_INFO_N_PLANES (info); i++) {
|
||||
mem->offset[i] = plane_offset;
|
||||
plane_offset += stride * GST_VIDEO_INFO_COMP_HEIGHT (info, i);
|
||||
priv->data = data;
|
||||
priv->pitch = pitch;
|
||||
priv->width_in_bytes = width_in_bytes;
|
||||
priv->height = alloc_height;
|
||||
g_mutex_init (&priv->lock);
|
||||
|
||||
mem->context = gst_object_ref (context);
|
||||
mem->info = *info;
|
||||
mem->info.size = pitch * alloc_height;
|
||||
|
||||
alloc_info = &mem->info;
|
||||
gst_memory_init (GST_MEMORY_CAST (mem), 0, GST_ALLOCATOR_CAST (self),
|
||||
NULL, alloc_info->size, 0, 0, alloc_info->size);
|
||||
|
||||
switch (GST_VIDEO_INFO_FORMAT (info)) {
|
||||
case GST_VIDEO_FORMAT_I420:
|
||||
case GST_VIDEO_FORMAT_YV12:
|
||||
case GST_VIDEO_FORMAT_I420_10LE:
|
||||
/* we are wasting space yes, but required so that this memory
|
||||
* can be used in kernel function */
|
||||
alloc_info->stride[0] = pitch;
|
||||
alloc_info->stride[1] = pitch;
|
||||
alloc_info->stride[2] = pitch;
|
||||
alloc_info->offset[0] = 0;
|
||||
alloc_info->offset[1] = alloc_info->stride[0] * height;
|
||||
alloc_info->offset[2] = alloc_info->offset[1] +
|
||||
alloc_info->stride[1] * height / 2;
|
||||
break;
|
||||
case GST_VIDEO_FORMAT_NV12:
|
||||
case GST_VIDEO_FORMAT_NV21:
|
||||
case GST_VIDEO_FORMAT_P010_10LE:
|
||||
case GST_VIDEO_FORMAT_P016_LE:
|
||||
alloc_info->stride[0] = pitch;
|
||||
alloc_info->stride[1] = pitch;
|
||||
alloc_info->offset[0] = 0;
|
||||
alloc_info->offset[1] = alloc_info->stride[0] * height;
|
||||
break;
|
||||
case GST_VIDEO_FORMAT_Y444:
|
||||
case GST_VIDEO_FORMAT_Y444_16LE:
|
||||
alloc_info->stride[0] = pitch;
|
||||
alloc_info->stride[1] = pitch;
|
||||
alloc_info->stride[2] = pitch;
|
||||
alloc_info->offset[0] = 0;
|
||||
alloc_info->offset[1] = alloc_info->stride[0] * height;
|
||||
alloc_info->offset[2] = alloc_info->offset[1] * 2;
|
||||
break;
|
||||
case GST_VIDEO_FORMAT_BGRA:
|
||||
case GST_VIDEO_FORMAT_RGBA:
|
||||
case GST_VIDEO_FORMAT_RGBx:
|
||||
case GST_VIDEO_FORMAT_BGRx:
|
||||
case GST_VIDEO_FORMAT_ARGB:
|
||||
case GST_VIDEO_FORMAT_ABGR:
|
||||
case GST_VIDEO_FORMAT_RGB:
|
||||
case GST_VIDEO_FORMAT_BGR:
|
||||
case GST_VIDEO_FORMAT_BGR10A2_LE:
|
||||
case GST_VIDEO_FORMAT_RGB10A2_LE:
|
||||
alloc_info->stride[0] = pitch;
|
||||
alloc_info->offset[0] = 0;
|
||||
break;
|
||||
default:
|
||||
GST_ERROR_OBJECT (self, "Unexpected format %s",
|
||||
gst_video_format_to_string (GST_VIDEO_INFO_FORMAT (info)));
|
||||
g_assert_not_reached ();
|
||||
gst_memory_unref (GST_MEMORY_CAST (mem));
|
||||
return NULL;
|
||||
}
|
||||
|
||||
mem->context = gst_object_ref (self->context);
|
||||
|
||||
gst_memory_init (GST_MEMORY_CAST (mem),
|
||||
flags, GST_ALLOCATOR_CAST (self), NULL, maxsize, align, offset, size);
|
||||
|
||||
return GST_MEMORY_CAST (mem);
|
||||
}
|
||||
|
||||
static void
|
||||
gst_cuda_allocator_free (GstAllocator * allocator, GstMemory * memory)
|
||||
{
|
||||
GstCudaAllocator *self = GST_CUDA_ALLOCATOR_CAST (allocator);
|
||||
GstCudaMemory *mem = GST_CUDA_MEMORY_CAST (memory);
|
||||
GstCudaMemoryPrivate *priv = mem->priv;
|
||||
|
||||
GST_CAT_DEBUG_OBJECT (GST_CAT_MEMORY, allocator, "free cuda memory");
|
||||
|
||||
g_mutex_clear (&mem->lock);
|
||||
|
||||
gst_cuda_context_push (self->context);
|
||||
if (mem->data)
|
||||
gst_cuda_result (CuMemFree (mem->data));
|
||||
|
||||
if (mem->map_alloc_data)
|
||||
gst_cuda_result (CuMemFreeHost (mem->map_alloc_data));
|
||||
gst_cuda_context_push (mem->context);
|
||||
if (priv->data)
|
||||
gst_cuda_result (CuMemFree (priv->data));
|
||||
|
||||
if (priv->staging)
|
||||
gst_cuda_result (CuMemFreeHost (priv->staging));
|
||||
gst_cuda_context_pop (NULL);
|
||||
|
||||
gst_object_unref (mem->context);
|
||||
|
||||
g_mutex_clear (&priv->lock);
|
||||
g_free (mem->priv);
|
||||
g_free (mem);
|
||||
}
|
||||
|
||||
/* called with lock */
|
||||
static gboolean
|
||||
gst_cuda_memory_upload_transfer (GstCudaMemory * mem)
|
||||
gst_cuda_memory_upload (GstCudaAllocator * self, GstCudaMemory * mem)
|
||||
{
|
||||
gint i;
|
||||
GstVideoInfo *info = &mem->alloc_params.info;
|
||||
GstCudaMemoryPrivate *priv = mem->priv;
|
||||
gboolean ret = TRUE;
|
||||
CUDA_MEMCPY2D param = { 0, };
|
||||
|
||||
if (!mem->map_data) {
|
||||
GST_CAT_ERROR (GST_CAT_MEMORY, "no staging memory to upload");
|
||||
if (!priv->staging ||
|
||||
!GST_MEMORY_FLAG_IS_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_UPLOAD)) {
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
if (!gst_cuda_context_push (mem->context)) {
|
||||
GST_ERROR_OBJECT (self, "Failed to push cuda context");
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
for (i = 0; i < GST_VIDEO_INFO_N_PLANES (info); i++) {
|
||||
CUDA_MEMCPY2D param = { 0, };
|
||||
param.srcMemoryType = CU_MEMORYTYPE_HOST;
|
||||
param.srcHost = priv->staging;
|
||||
param.srcPitch = priv->pitch;
|
||||
|
||||
param.srcMemoryType = CU_MEMORYTYPE_HOST;
|
||||
param.srcHost =
|
||||
(guint8 *) mem->map_data + GST_VIDEO_INFO_PLANE_OFFSET (info, i);
|
||||
param.srcPitch = GST_VIDEO_INFO_PLANE_STRIDE (info, i);
|
||||
param.dstMemoryType = CU_MEMORYTYPE_DEVICE;
|
||||
param.dstDevice = (CUdeviceptr) priv->data;
|
||||
param.dstPitch = priv->pitch;
|
||||
param.WidthInBytes = priv->width_in_bytes;
|
||||
param.Height = priv->height;
|
||||
|
||||
param.dstMemoryType = CU_MEMORYTYPE_DEVICE;
|
||||
param.dstDevice = mem->data + mem->offset[i];
|
||||
param.dstPitch = mem->stride;
|
||||
param.WidthInBytes = GST_VIDEO_INFO_COMP_WIDTH (info, i) *
|
||||
GST_VIDEO_INFO_COMP_PSTRIDE (info, i);
|
||||
param.Height = GST_VIDEO_INFO_COMP_HEIGHT (info, i);
|
||||
ret = gst_cuda_result (CuMemcpy2D (¶m));
|
||||
gst_cuda_context_pop (NULL);
|
||||
|
||||
if (!gst_cuda_result (CuMemcpy2DAsync (¶m, NULL))) {
|
||||
GST_CAT_ERROR (GST_CAT_MEMORY, "Failed to copy %dth plane", i);
|
||||
ret = FALSE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
gst_cuda_result (CuStreamSynchronize (NULL));
|
||||
if (!ret)
|
||||
GST_ERROR_OBJECT (self, "Failed to upload memory");
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* called with lock */
|
||||
static gboolean
|
||||
gst_cuda_memory_download_transfer (GstCudaMemory * mem)
|
||||
gst_cuda_memory_download (GstCudaAllocator * self, GstCudaMemory * mem)
|
||||
{
|
||||
gint i;
|
||||
GstVideoInfo *info = &mem->alloc_params.info;
|
||||
GstCudaMemoryPrivate *priv = mem->priv;
|
||||
gboolean ret = TRUE;
|
||||
CUDA_MEMCPY2D param = { 0, };
|
||||
|
||||
if (!mem->map_data) {
|
||||
GST_CAT_ERROR (GST_CAT_MEMORY, "no staging memory to upload");
|
||||
if (!GST_MEMORY_FLAG_IS_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_DOWNLOAD))
|
||||
return TRUE;
|
||||
|
||||
if (!gst_cuda_context_push (mem->context)) {
|
||||
GST_ERROR_OBJECT (self, "Failed to push cuda context");
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
for (i = 0; i < GST_VIDEO_INFO_N_PLANES (info); i++) {
|
||||
CUDA_MEMCPY2D param = { 0, };
|
||||
|
||||
param.srcMemoryType = CU_MEMORYTYPE_DEVICE;
|
||||
param.srcDevice = mem->data + mem->offset[i];
|
||||
param.srcPitch = mem->stride;
|
||||
|
||||
param.dstMemoryType = CU_MEMORYTYPE_HOST;
|
||||
param.dstHost =
|
||||
(guint8 *) mem->map_data + GST_VIDEO_INFO_PLANE_OFFSET (info, i);
|
||||
param.dstPitch = GST_VIDEO_INFO_PLANE_STRIDE (info, i);
|
||||
param.WidthInBytes = GST_VIDEO_INFO_COMP_WIDTH (info, i) *
|
||||
GST_VIDEO_INFO_COMP_PSTRIDE (info, i);
|
||||
param.Height = GST_VIDEO_INFO_COMP_HEIGHT (info, i);
|
||||
|
||||
if (!gst_cuda_result (CuMemcpy2DAsync (¶m, NULL))) {
|
||||
GST_CAT_ERROR (GST_CAT_MEMORY, "Failed to copy %dth plane", i);
|
||||
CuMemFreeHost (mem->map_alloc_data);
|
||||
mem->map_alloc_data = mem->map_data = mem->align_data = NULL;
|
||||
break;
|
||||
}
|
||||
}
|
||||
gst_cuda_result (CuStreamSynchronize (NULL));
|
||||
|
||||
if (!mem->map_data)
|
||||
return FALSE;
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static gpointer
|
||||
gst_cuda_memory_device_memory_map (GstCudaMemory * mem)
|
||||
{
|
||||
GstMemory *memory = GST_MEMORY_CAST (mem);
|
||||
gpointer data;
|
||||
gsize aoffset;
|
||||
gsize align = memory->align;
|
||||
|
||||
if (mem->map_data) {
|
||||
return mem->map_data;
|
||||
}
|
||||
|
||||
GST_CAT_DEBUG (GST_CAT_MEMORY, "alloc host memory for map");
|
||||
|
||||
if (!mem->map_alloc_data) {
|
||||
gsize maxsize;
|
||||
guint8 *align_data;
|
||||
|
||||
maxsize = memory->maxsize + align;
|
||||
if (!gst_cuda_context_push (mem->context)) {
|
||||
GST_CAT_ERROR (GST_CAT_MEMORY, "cannot push cuda context");
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (!gst_cuda_result (CuMemAllocHost (&data, maxsize))) {
|
||||
GST_CAT_ERROR (GST_CAT_MEMORY, "cannot alloc host memory");
|
||||
if (!priv->staging) {
|
||||
ret = gst_cuda_result (CuMemAllocHost (&priv->staging,
|
||||
GST_MEMORY_CAST (mem)->size));
|
||||
if (!ret) {
|
||||
GST_ERROR_OBJECT (self, "Failed to allocate staging memory");
|
||||
gst_cuda_context_pop (NULL);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (!gst_cuda_context_pop (NULL)) {
|
||||
GST_CAT_WARNING (GST_CAT_MEMORY, "cannot pop cuda context");
|
||||
}
|
||||
|
||||
mem->map_alloc_data = data;
|
||||
align_data = data;
|
||||
|
||||
/* do align */
|
||||
if ((aoffset = ((guintptr) align_data & align))) {
|
||||
aoffset = (align + 1) - aoffset;
|
||||
align_data += aoffset;
|
||||
}
|
||||
mem->align_data = align_data;
|
||||
|
||||
/* first memory, always need download to staging */
|
||||
GST_MINI_OBJECT_FLAG_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_DOWNLOAD);
|
||||
}
|
||||
|
||||
mem->map_data = mem->align_data;
|
||||
|
||||
if (GST_MEMORY_FLAG_IS_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_DOWNLOAD)) {
|
||||
if (!gst_cuda_context_push (mem->context)) {
|
||||
GST_CAT_ERROR (GST_CAT_MEMORY, "cannot push cuda context");
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
gst_cuda_memory_download_transfer (mem);
|
||||
|
||||
if (!gst_cuda_context_pop (NULL)) {
|
||||
GST_CAT_WARNING (GST_CAT_MEMORY, "cannot pop cuda context");
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
return mem->map_data;
|
||||
param.srcMemoryType = CU_MEMORYTYPE_DEVICE;
|
||||
param.srcDevice = (CUdeviceptr) priv->data;
|
||||
param.srcPitch = priv->pitch;
|
||||
|
||||
param.dstMemoryType = CU_MEMORYTYPE_HOST;
|
||||
param.dstHost = priv->staging;
|
||||
param.dstPitch = priv->pitch;
|
||||
param.WidthInBytes = priv->width_in_bytes;
|
||||
param.Height = priv->height;
|
||||
|
||||
ret = gst_cuda_result (CuMemcpy2D (¶m));
|
||||
gst_cuda_context_pop (NULL);
|
||||
|
||||
if (!ret)
|
||||
GST_ERROR_OBJECT (self, "Failed to upload memory");
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static gpointer
|
||||
cuda_mem_map (GstCudaMemory * mem, gsize maxsize, GstMapFlags flags)
|
||||
cuda_mem_map (GstMemory * mem, gsize maxsize, GstMapFlags flags)
|
||||
{
|
||||
GstCudaAllocator *self = GST_CUDA_ALLOCATOR (mem->allocator);
|
||||
GstCudaMemory *cmem = GST_CUDA_MEMORY_CAST (mem);
|
||||
GstCudaMemoryPrivate *priv = cmem->priv;
|
||||
gpointer ret = NULL;
|
||||
|
||||
g_mutex_lock (&mem->lock);
|
||||
mem->map_count++;
|
||||
|
||||
g_mutex_lock (&priv->lock);
|
||||
if ((flags & GST_MAP_CUDA) == GST_MAP_CUDA) {
|
||||
/* upload from staging to device memory if necessary */
|
||||
if (GST_MEMORY_FLAG_IS_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_UPLOAD)) {
|
||||
if (!gst_cuda_context_push (mem->context)) {
|
||||
GST_CAT_ERROR (GST_CAT_MEMORY, "cannot push cuda context");
|
||||
g_mutex_unlock (&mem->lock);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (!gst_cuda_memory_upload_transfer (mem)) {
|
||||
g_mutex_unlock (&mem->lock);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
gst_cuda_context_pop (NULL);
|
||||
}
|
||||
if (!gst_cuda_memory_upload (self, cmem))
|
||||
goto out;
|
||||
|
||||
GST_MEMORY_FLAG_UNSET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_UPLOAD);
|
||||
|
||||
if ((flags & GST_MAP_WRITE) == GST_MAP_WRITE)
|
||||
GST_MINI_OBJECT_FLAG_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_DOWNLOAD);
|
||||
|
||||
g_mutex_unlock (&mem->lock);
|
||||
return (gpointer) mem->data;
|
||||
ret = (gpointer) priv->data;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = gst_cuda_memory_device_memory_map (mem);
|
||||
if (ret == NULL) {
|
||||
mem->map_count--;
|
||||
g_mutex_unlock (&mem->lock);
|
||||
return NULL;
|
||||
}
|
||||
/* First CPU access, must be downloaded */
|
||||
if (!priv->staging)
|
||||
GST_MINI_OBJECT_FLAG_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_DOWNLOAD);
|
||||
|
||||
if (!gst_cuda_memory_download (self, cmem))
|
||||
goto out;
|
||||
|
||||
ret = priv->staging;
|
||||
|
||||
if ((flags & GST_MAP_WRITE) == GST_MAP_WRITE)
|
||||
GST_MINI_OBJECT_FLAG_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_UPLOAD);
|
||||
|
||||
GST_MEMORY_FLAG_UNSET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_DOWNLOAD);
|
||||
|
||||
g_mutex_unlock (&mem->lock);
|
||||
out:
|
||||
g_mutex_unlock (&priv->lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void
|
||||
cuda_mem_unmap_full (GstCudaMemory * mem, GstMapInfo * info)
|
||||
cuda_mem_unmap_full (GstMemory * mem, GstMapInfo * info)
|
||||
{
|
||||
g_mutex_lock (&mem->lock);
|
||||
mem->map_count--;
|
||||
GST_CAT_TRACE (GST_CAT_MEMORY,
|
||||
"unmap CUDA memory %p, map count %d, have map_data %s",
|
||||
mem, mem->map_count, mem->map_data ? "true" : "false");
|
||||
GstCudaMemory *cmem = GST_CUDA_MEMORY_CAST (mem);
|
||||
GstCudaMemoryPrivate *priv = cmem->priv;
|
||||
|
||||
g_mutex_lock (&priv->lock);
|
||||
if ((info->flags & GST_MAP_CUDA) == GST_MAP_CUDA) {
|
||||
if ((info->flags & GST_MAP_WRITE) == GST_MAP_WRITE)
|
||||
GST_MINI_OBJECT_FLAG_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_DOWNLOAD);
|
||||
|
||||
g_mutex_unlock (&mem->lock);
|
||||
return;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if ((info->flags & GST_MAP_WRITE))
|
||||
if ((info->flags & GST_MAP_WRITE) == GST_MAP_WRITE)
|
||||
GST_MINI_OBJECT_FLAG_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_UPLOAD);
|
||||
|
||||
if (mem->map_count > 0 || !mem->map_data) {
|
||||
g_mutex_unlock (&mem->lock);
|
||||
return;
|
||||
}
|
||||
|
||||
mem->map_data = NULL;
|
||||
g_mutex_unlock (&mem->lock);
|
||||
out:
|
||||
g_mutex_unlock (&priv->lock);
|
||||
|
||||
return;
|
||||
}
|
||||
|
@ -412,72 +357,82 @@ cuda_mem_unmap_full (GstCudaMemory * mem, GstMapInfo * info)
|
|||
static GstMemory *
|
||||
cuda_mem_copy (GstMemory * mem, gssize offset, gssize size)
|
||||
{
|
||||
GstMemory *copy;
|
||||
GstCudaAllocator *self = GST_CUDA_ALLOCATOR (mem->allocator);
|
||||
GstCudaMemory *src_mem = GST_CUDA_MEMORY_CAST (mem);
|
||||
GstCudaMemory *dst_mem;
|
||||
GstCudaContext *ctx = GST_CUDA_ALLOCATOR_CAST (mem->allocator)->context;
|
||||
gint i;
|
||||
GstVideoInfo *info;
|
||||
GstCudaContext *context = src_mem->context;
|
||||
GstMapInfo src_info, dst_info;
|
||||
CUDA_MEMCPY2D param = { 0, };
|
||||
GstMemory *copy;
|
||||
gboolean ret;
|
||||
|
||||
/* offset and size are ignored */
|
||||
copy = gst_cuda_allocator_alloc (mem->allocator, mem->size,
|
||||
&src_mem->alloc_params);
|
||||
copy = gst_cuda_allocator_alloc_internal (self, context,
|
||||
&src_mem->info, src_mem->priv->width_in_bytes, src_mem->priv->height);
|
||||
|
||||
dst_mem = GST_CUDA_MEMORY_CAST (copy);
|
||||
if (!copy) {
|
||||
GST_ERROR_OBJECT (self, "Failed to allocate memory for copying");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
info = &src_mem->alloc_params.info;
|
||||
if (!gst_memory_map (mem, &src_info, GST_MAP_READ | GST_MAP_CUDA)) {
|
||||
GST_ERROR_OBJECT (self, "Failed to map src memory");
|
||||
gst_memory_unref (copy);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (!gst_cuda_context_push (ctx)) {
|
||||
GST_CAT_ERROR (GST_CAT_MEMORY, "cannot push cuda context");
|
||||
gst_cuda_allocator_free (mem->allocator, copy);
|
||||
if (!gst_memory_map (copy, &dst_info, GST_MAP_WRITE | GST_MAP_CUDA)) {
|
||||
GST_ERROR_OBJECT (self, "Failed to map dst memory");
|
||||
gst_memory_unmap (mem, &src_info);
|
||||
gst_memory_unref (copy);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (!gst_cuda_context_push (context)) {
|
||||
GST_ERROR_OBJECT (self, "Failed to push cuda context");
|
||||
gst_memory_unmap (mem, &src_info);
|
||||
gst_memory_unmap (copy, &dst_info);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
for (i = 0; i < GST_VIDEO_INFO_N_PLANES (info); i++) {
|
||||
CUDA_MEMCPY2D param = { 0, };
|
||||
param.srcMemoryType = CU_MEMORYTYPE_DEVICE;
|
||||
param.srcDevice = (CUdeviceptr) src_info.data;
|
||||
param.srcPitch = src_mem->priv->pitch;
|
||||
|
||||
param.srcMemoryType = CU_MEMORYTYPE_DEVICE;
|
||||
param.srcDevice = src_mem->data + src_mem->offset[i];
|
||||
param.srcPitch = src_mem->stride;
|
||||
param.dstMemoryType = CU_MEMORYTYPE_DEVICE;
|
||||
param.dstDevice = (CUdeviceptr) dst_info.data;
|
||||
param.dstPitch = src_mem->priv->pitch;
|
||||
param.WidthInBytes = src_mem->priv->width_in_bytes;
|
||||
param.Height = src_mem->priv->height;
|
||||
|
||||
param.dstMemoryType = CU_MEMORYTYPE_DEVICE;
|
||||
param.dstDevice = dst_mem->data + dst_mem->offset[i];
|
||||
param.dstPitch = dst_mem->stride;
|
||||
param.WidthInBytes = GST_VIDEO_INFO_COMP_WIDTH (info, i) *
|
||||
GST_VIDEO_INFO_COMP_PSTRIDE (info, i);
|
||||
param.Height = GST_VIDEO_INFO_COMP_HEIGHT (info, i);
|
||||
ret = gst_cuda_result (CuMemcpy2D (¶m));
|
||||
gst_cuda_context_pop (NULL);
|
||||
|
||||
if (!gst_cuda_result (CuMemcpy2DAsync (¶m, NULL))) {
|
||||
GST_CAT_ERROR_OBJECT (GST_CAT_MEMORY,
|
||||
mem->allocator, "Failed to copy %dth plane", i);
|
||||
gst_cuda_context_pop (NULL);
|
||||
gst_cuda_allocator_free (mem->allocator, copy);
|
||||
gst_memory_unmap (mem, &src_info);
|
||||
gst_memory_unmap (copy, &dst_info);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
gst_cuda_result (CuStreamSynchronize (NULL));
|
||||
|
||||
if (!gst_cuda_context_pop (NULL)) {
|
||||
GST_CAT_WARNING (GST_CAT_MEMORY, "cannot pop cuda context");
|
||||
if (!ret) {
|
||||
GST_ERROR_OBJECT (self, "Failed to copy memory");
|
||||
gst_memory_unref (copy);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return copy;
|
||||
}
|
||||
|
||||
GstAllocator *
|
||||
gst_cuda_allocator_new (GstCudaContext * context)
|
||||
void
|
||||
gst_cuda_memory_init_once (void)
|
||||
{
|
||||
GstCudaAllocator *allocator;
|
||||
static gsize _init = 0;
|
||||
|
||||
g_return_val_if_fail (GST_IS_CUDA_CONTEXT (context), NULL);
|
||||
if (g_once_init_enter (&_init)) {
|
||||
_gst_cuda_allocator =
|
||||
(GstAllocator *) g_object_new (GST_TYPE_CUDA_ALLOCATOR, NULL);
|
||||
gst_object_ref_sink (_gst_cuda_allocator);
|
||||
|
||||
allocator = g_object_new (GST_TYPE_CUDA_ALLOCATOR, NULL);
|
||||
allocator->context = gst_object_ref (context);
|
||||
|
||||
return GST_ALLOCATOR_CAST (allocator);
|
||||
gst_allocator_register (GST_CUDA_MEMORY_TYPE_NAME, _gst_cuda_allocator);
|
||||
g_once_init_leave (&_init, 1);
|
||||
}
|
||||
}
|
||||
|
||||
gboolean
|
||||
|
@ -486,3 +441,51 @@ gst_is_cuda_memory (GstMemory * mem)
|
|||
return mem != NULL && mem->allocator != NULL &&
|
||||
GST_IS_CUDA_ALLOCATOR (mem->allocator);
|
||||
}
|
||||
|
||||
GstMemory *
|
||||
gst_cuda_allocator_alloc (GstCudaAllocator * allocator,
|
||||
GstCudaContext * context, const GstVideoInfo * info)
|
||||
{
|
||||
guint alloc_height;
|
||||
|
||||
g_return_val_if_fail (GST_IS_CUDA_ALLOCATOR (allocator), NULL);
|
||||
g_return_val_if_fail (GST_IS_CUDA_CONTEXT (context), NULL);
|
||||
g_return_val_if_fail (info != NULL, NULL);
|
||||
|
||||
alloc_height = GST_VIDEO_INFO_HEIGHT (info);
|
||||
|
||||
/* make sure valid height for subsampled formats */
|
||||
switch (GST_VIDEO_INFO_FORMAT (info)) {
|
||||
case GST_VIDEO_FORMAT_I420:
|
||||
case GST_VIDEO_FORMAT_YV12:
|
||||
case GST_VIDEO_FORMAT_NV12:
|
||||
case GST_VIDEO_FORMAT_P010_10LE:
|
||||
case GST_VIDEO_FORMAT_P016_LE:
|
||||
case GST_VIDEO_FORMAT_I420_10LE:
|
||||
alloc_height = GST_ROUND_UP_2 (alloc_height);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
switch (GST_VIDEO_INFO_FORMAT (info)) {
|
||||
case GST_VIDEO_FORMAT_I420:
|
||||
case GST_VIDEO_FORMAT_YV12:
|
||||
case GST_VIDEO_FORMAT_I420_10LE:
|
||||
case GST_VIDEO_FORMAT_NV12:
|
||||
case GST_VIDEO_FORMAT_NV21:
|
||||
case GST_VIDEO_FORMAT_P010_10LE:
|
||||
case GST_VIDEO_FORMAT_P016_LE:
|
||||
alloc_height *= 2;
|
||||
break;
|
||||
case GST_VIDEO_FORMAT_Y444:
|
||||
case GST_VIDEO_FORMAT_Y444_16LE:
|
||||
alloc_height *= 3;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return gst_cuda_allocator_alloc_internal (allocator, context,
|
||||
info, info->stride[0], alloc_height);
|
||||
}
|
||||
|
|
|
@ -37,10 +37,11 @@ G_BEGIN_DECLS
|
|||
#define GST_CUDA_ALLOCATOR_CAST(obj) ((GstCudaAllocator *)(obj))
|
||||
#define GST_CUDA_MEMORY_CAST(mem) ((GstCudaMemory *) (mem))
|
||||
|
||||
typedef struct _GstCudaAllocationParams GstCudaAllocationParams;
|
||||
typedef struct _GstCudaAllocator GstCudaAllocator;
|
||||
typedef struct _GstCudaAllocatorClass GstCudaAllocatorClass;
|
||||
|
||||
typedef struct _GstCudaMemory GstCudaMemory;
|
||||
typedef struct _GstCudaMemoryPrivate GstCudaMemoryPrivate;
|
||||
|
||||
/**
|
||||
* GST_MAP_CUDA:
|
||||
|
@ -65,32 +66,6 @@ typedef struct _GstCudaMemory GstCudaMemory;
|
|||
*/
|
||||
#define GST_CAPS_FEATURE_MEMORY_CUDA_MEMORY "memory:CUDAMemory"
|
||||
|
||||
struct _GstCudaAllocationParams
|
||||
{
|
||||
GstAllocationParams parent;
|
||||
|
||||
GstVideoInfo info;
|
||||
};
|
||||
|
||||
struct _GstCudaAllocator
|
||||
{
|
||||
GstAllocator parent;
|
||||
GstCudaContext *context;
|
||||
};
|
||||
|
||||
struct _GstCudaAllocatorClass
|
||||
{
|
||||
GstAllocatorClass parent_class;
|
||||
};
|
||||
|
||||
GType gst_cuda_allocator_get_type (void);
|
||||
|
||||
GstAllocator * gst_cuda_allocator_new (GstCudaContext * context);
|
||||
|
||||
GstMemory * gst_cuda_allocator_alloc (GstAllocator * allocator,
|
||||
gsize size,
|
||||
GstCudaAllocationParams * params);
|
||||
|
||||
/**
|
||||
* GstCudaMemoryTransfer:
|
||||
* @GST_CUDA_MEMORY_TRANSFER_NEED_DOWNLOAD: the device memory needs downloading
|
||||
|
@ -106,32 +81,36 @@ typedef enum
|
|||
|
||||
struct _GstCudaMemory
|
||||
{
|
||||
GstMemory mem;
|
||||
GstMemory mem;
|
||||
|
||||
/*< public >*/
|
||||
GstCudaContext *context;
|
||||
CUdeviceptr data;
|
||||
GstVideoInfo info;
|
||||
|
||||
GstCudaAllocationParams alloc_params;
|
||||
|
||||
/* offset and stride of CUDA device memory */
|
||||
gsize offset[GST_VIDEO_MAX_PLANES];
|
||||
gint stride;
|
||||
|
||||
/* allocated CUDA Host memory */
|
||||
gpointer map_alloc_data;
|
||||
|
||||
/* aligned CUDA Host memory */
|
||||
guint8 *align_data;
|
||||
|
||||
/* pointing align_data if the memory is mapped */
|
||||
gpointer map_data;
|
||||
|
||||
gint map_count;
|
||||
|
||||
GMutex lock;
|
||||
/*< private >*/
|
||||
GstCudaMemoryPrivate *priv;
|
||||
gpointer _gst_reserved[GST_PADDING];
|
||||
};
|
||||
|
||||
gboolean gst_is_cuda_memory (GstMemory * mem);
|
||||
struct _GstCudaAllocator
|
||||
{
|
||||
GstAllocator parent;
|
||||
};
|
||||
|
||||
struct _GstCudaAllocatorClass
|
||||
{
|
||||
GstAllocatorClass parent_class;
|
||||
};
|
||||
|
||||
void gst_cuda_memory_init_once (void);
|
||||
|
||||
gboolean gst_is_cuda_memory (GstMemory * mem);
|
||||
|
||||
GType gst_cuda_allocator_get_type (void);
|
||||
|
||||
GstMemory * gst_cuda_allocator_alloc (GstCudaAllocator * allocator,
|
||||
GstCudaContext * context,
|
||||
const GstVideoInfo * info);
|
||||
|
||||
G_END_DECLS
|
||||
|
||||
|
|
|
@ -174,10 +174,6 @@ gst_cuda_upload_propose_allocation (GstBaseTransform * trans,
|
|||
if (gst_query_get_n_allocation_pools (query) == 0) {
|
||||
GstCapsFeatures *features;
|
||||
GstStructure *config;
|
||||
GstVideoAlignment align;
|
||||
GstAllocationParams params = { 0, 31, 0, 0, };
|
||||
GstAllocator *allocator = NULL;
|
||||
gint i;
|
||||
|
||||
features = gst_caps_get_features (caps, 0);
|
||||
|
||||
|
@ -191,38 +187,25 @@ gst_cuda_upload_propose_allocation (GstBaseTransform * trans,
|
|||
|
||||
config = gst_buffer_pool_get_config (pool);
|
||||
|
||||
gst_video_alignment_reset (&align);
|
||||
for (i = 0; i < GST_VIDEO_INFO_N_PLANES (&info); i++) {
|
||||
align.stride_align[i] = 31;
|
||||
}
|
||||
gst_video_info_align (&info, &align);
|
||||
|
||||
gst_buffer_pool_config_add_option (config,
|
||||
GST_BUFFER_POOL_OPTION_VIDEO_META);
|
||||
gst_buffer_pool_config_add_option (config,
|
||||
GST_BUFFER_POOL_OPTION_VIDEO_ALIGNMENT);
|
||||
|
||||
gst_buffer_pool_config_set_video_alignment (config, &align);
|
||||
size = GST_VIDEO_INFO_SIZE (&info);
|
||||
gst_buffer_pool_config_set_params (config, caps, size, 0, 0);
|
||||
|
||||
gst_query_add_allocation_meta (query, GST_VIDEO_META_API_TYPE, NULL);
|
||||
gst_query_add_allocation_pool (query, pool, size, 0, 0);
|
||||
|
||||
if (gst_buffer_pool_config_get_allocator (config, &allocator, ¶ms)) {
|
||||
if (params.align < 31)
|
||||
params.align = 31;
|
||||
|
||||
gst_query_add_allocation_param (query, allocator, ¶ms);
|
||||
gst_buffer_pool_config_set_allocator (config, allocator, ¶ms);
|
||||
}
|
||||
|
||||
if (!gst_buffer_pool_set_config (pool, config)) {
|
||||
GST_ERROR_OBJECT (ctrans, "failed to set config");
|
||||
gst_object_unref (pool);
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/* Get updated size by cuda buffer pool */
|
||||
config = gst_buffer_pool_get_config (pool);
|
||||
gst_buffer_pool_config_get_params (config, NULL, &size, NULL, NULL);
|
||||
gst_structure_free (config);
|
||||
|
||||
gst_query_add_allocation_pool (query, pool, size, 0, 0);
|
||||
|
||||
gst_object_unref (pool);
|
||||
}
|
||||
|
||||
|
@ -289,6 +272,12 @@ gst_cuda_upload_decide_allocation (GstBaseTransform * trans, GstQuery * query)
|
|||
gst_buffer_pool_config_add_option (config, GST_BUFFER_POOL_OPTION_VIDEO_META);
|
||||
gst_buffer_pool_config_set_params (config, outcaps, size, min, max);
|
||||
gst_buffer_pool_set_config (pool, config);
|
||||
|
||||
/* Get updated size by cuda buffer pool */
|
||||
config = gst_buffer_pool_get_config (pool);
|
||||
gst_buffer_pool_config_get_params (config, NULL, &size, NULL, NULL);
|
||||
gst_structure_free (config);
|
||||
|
||||
if (update_pool)
|
||||
gst_query_set_nth_allocation_pool (query, 0, pool, size, min, max);
|
||||
else
|
||||
|
|
|
@ -625,6 +625,7 @@ gst_nv_base_enc_propose_allocation (GstVideoEncoder * enc, GstQuery * query)
|
|||
GstBufferPool *pool;
|
||||
GstStructure *config;
|
||||
GstCapsFeatures *features;
|
||||
guint size;
|
||||
|
||||
GST_DEBUG_OBJECT (nvenc, "propose allocation");
|
||||
|
||||
|
@ -665,18 +666,25 @@ gst_nv_base_enc_propose_allocation (GstVideoEncoder * enc, GstQuery * query)
|
|||
goto done;
|
||||
}
|
||||
|
||||
config = gst_buffer_pool_get_config (pool);
|
||||
gst_buffer_pool_config_set_params (config, caps, GST_VIDEO_INFO_SIZE (&info),
|
||||
nvenc->items->len, nvenc->items->len);
|
||||
size = GST_VIDEO_INFO_SIZE (&info);
|
||||
|
||||
gst_query_add_allocation_pool (query, pool, GST_VIDEO_INFO_SIZE (&info),
|
||||
config = gst_buffer_pool_get_config (pool);
|
||||
gst_buffer_pool_config_set_params (config, caps, size,
|
||||
nvenc->items->len, nvenc->items->len);
|
||||
gst_buffer_pool_config_add_option (config, GST_BUFFER_POOL_OPTION_VIDEO_META);
|
||||
gst_query_add_allocation_meta (query, GST_VIDEO_META_API_TYPE, NULL);
|
||||
|
||||
if (!gst_buffer_pool_set_config (pool, config))
|
||||
goto error_pool_config;
|
||||
|
||||
/* Get updated size by cuda buffer pool */
|
||||
config = gst_buffer_pool_get_config (pool);
|
||||
gst_buffer_pool_config_get_params (config, NULL, &size, NULL, NULL);
|
||||
gst_structure_free (config);
|
||||
|
||||
gst_query_add_allocation_pool (query, pool, size,
|
||||
nvenc->items->len, nvenc->items->len);
|
||||
gst_query_add_allocation_meta (query, GST_VIDEO_META_API_TYPE, NULL);
|
||||
|
||||
gst_object_unref (pool);
|
||||
|
||||
done:
|
||||
|
@ -2231,17 +2239,12 @@ gst_nv_base_enc_upload_frame (GstNvBaseEnc * nvenc, GstVideoFrame * frame,
|
|||
CUdeviceptr dst = resource->cuda_pointer;
|
||||
GstVideoInfo *info = &frame->info;
|
||||
CUresult cuda_ret;
|
||||
GstCudaMemory *cuda_mem = NULL;
|
||||
|
||||
if (!gst_cuda_context_push (nvenc->cuda_ctx)) {
|
||||
GST_ERROR_OBJECT (nvenc, "cannot push context");
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
if (use_device_memory) {
|
||||
cuda_mem = (GstCudaMemory *) gst_buffer_peek_memory (frame->buffer, 0);
|
||||
}
|
||||
|
||||
for (i = 0; i < GST_VIDEO_FRAME_N_PLANES (frame); i++) {
|
||||
CUDA_MEMCPY2D param = { 0, };
|
||||
guint dest_stride = _get_cuda_device_stride (&nvenc->input_info, i,
|
||||
|
@ -2249,13 +2252,12 @@ gst_nv_base_enc_upload_frame (GstNvBaseEnc * nvenc, GstVideoFrame * frame,
|
|||
|
||||
if (use_device_memory) {
|
||||
param.srcMemoryType = CU_MEMORYTYPE_DEVICE;
|
||||
param.srcDevice = cuda_mem->data + cuda_mem->offset[i];
|
||||
param.srcPitch = cuda_mem->stride;
|
||||
param.srcDevice = (CUdeviceptr) GST_VIDEO_FRAME_PLANE_DATA (frame, i);
|
||||
} else {
|
||||
param.srcMemoryType = CU_MEMORYTYPE_HOST;
|
||||
param.srcHost = GST_VIDEO_FRAME_PLANE_DATA (frame, i);
|
||||
param.srcPitch = GST_VIDEO_FRAME_PLANE_STRIDE (frame, i);
|
||||
}
|
||||
param.srcPitch = GST_VIDEO_FRAME_PLANE_STRIDE (frame, i);
|
||||
|
||||
param.dstMemoryType = CU_MEMORYTYPE_DEVICE;
|
||||
param.dstDevice = dst;
|
||||
|
|
|
@ -1241,34 +1241,25 @@ gst_nvdec_copy_device_to_memory (GstNvDec * nvdec,
|
|||
GstVideoInfo *info = &nvdec->output_state->info;
|
||||
gint i;
|
||||
GstMemory *mem;
|
||||
GstCudaMemory *cuda_mem = NULL;
|
||||
|
||||
if (!gst_cuda_context_push (nvdec->cuda_ctx)) {
|
||||
GST_WARNING_OBJECT (nvdec, "failed to lock CUDA context");
|
||||
return FALSE;
|
||||
}
|
||||
gboolean use_device_copy = FALSE;
|
||||
GstMapFlags map_flags = GST_MAP_WRITE;
|
||||
|
||||
if (nvdec->mem_type == GST_NVDEC_MEM_TYPE_CUDA &&
|
||||
(mem = gst_buffer_peek_memory (output_buffer, 0)) &&
|
||||
gst_is_cuda_memory (mem)) {
|
||||
GstCudaMemory *cmem = GST_CUDA_MEMORY_CAST (mem);
|
||||
|
||||
if (cmem->context == nvdec->cuda_ctx ||
|
||||
gst_cuda_context_get_handle (cmem->context) ==
|
||||
gst_cuda_context_get_handle (nvdec->cuda_ctx) ||
|
||||
(gst_cuda_context_can_access_peer (cmem->context, nvdec->cuda_ctx) &&
|
||||
gst_cuda_context_can_access_peer (nvdec->cuda_ctx,
|
||||
cmem->context))) {
|
||||
cuda_mem = cmem;
|
||||
}
|
||||
map_flags |= GST_MAP_CUDA;
|
||||
use_device_copy = TRUE;
|
||||
}
|
||||
|
||||
if (!cuda_mem) {
|
||||
if (!gst_video_frame_map (&video_frame, info, output_buffer, GST_MAP_WRITE)) {
|
||||
GST_ERROR_OBJECT (nvdec, "frame map failure");
|
||||
gst_cuda_context_pop (NULL);
|
||||
return FALSE;
|
||||
}
|
||||
if (!gst_video_frame_map (&video_frame, info, output_buffer, map_flags)) {
|
||||
GST_ERROR_OBJECT (nvdec, "frame map failure");
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
if (!gst_cuda_context_push (nvdec->cuda_ctx)) {
|
||||
gst_video_frame_unmap (&video_frame);
|
||||
GST_WARNING_OBJECT (nvdec, "failed to lock CUDA context");
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
params.progressive_frame = dispinfo->progressive_frame;
|
||||
|
@ -1286,17 +1277,17 @@ gst_nvdec_copy_device_to_memory (GstNvDec * nvdec,
|
|||
copy_params.srcMemoryType = CU_MEMORYTYPE_DEVICE;
|
||||
copy_params.srcPitch = pitch;
|
||||
copy_params.dstMemoryType =
|
||||
cuda_mem ? CU_MEMORYTYPE_DEVICE : CU_MEMORYTYPE_HOST;
|
||||
use_device_copy ? CU_MEMORYTYPE_DEVICE : CU_MEMORYTYPE_HOST;
|
||||
|
||||
for (i = 0; i < GST_VIDEO_INFO_N_PLANES (info); i++) {
|
||||
copy_params.srcDevice = dptr + (i * pitch * GST_VIDEO_INFO_HEIGHT (info));
|
||||
if (cuda_mem) {
|
||||
copy_params.dstDevice = cuda_mem->data + cuda_mem->offset[i];
|
||||
copy_params.dstPitch = cuda_mem->stride;
|
||||
if (use_device_copy) {
|
||||
copy_params.dstDevice =
|
||||
(CUdeviceptr) GST_VIDEO_FRAME_PLANE_DATA (&video_frame, i);
|
||||
} else {
|
||||
copy_params.dstHost = GST_VIDEO_FRAME_PLANE_DATA (&video_frame, i);
|
||||
copy_params.dstPitch = GST_VIDEO_FRAME_PLANE_STRIDE (&video_frame, i);
|
||||
}
|
||||
copy_params.dstPitch = GST_VIDEO_FRAME_PLANE_STRIDE (&video_frame, i);
|
||||
copy_params.WidthInBytes = GST_VIDEO_INFO_COMP_WIDTH (info, i)
|
||||
* GST_VIDEO_INFO_COMP_PSTRIDE (info, i);
|
||||
copy_params.Height = GST_VIDEO_INFO_COMP_HEIGHT (info, i);
|
||||
|
@ -1304,8 +1295,7 @@ gst_nvdec_copy_device_to_memory (GstNvDec * nvdec,
|
|||
if (!gst_cuda_result (CuMemcpy2DAsync (©_params, nvdec->cuda_stream))) {
|
||||
GST_ERROR_OBJECT (nvdec, "failed to copy %dth plane", i);
|
||||
CuvidUnmapVideoFrame (nvdec->decoder, dptr);
|
||||
if (!cuda_mem)
|
||||
gst_video_frame_unmap (&video_frame);
|
||||
gst_video_frame_unmap (&video_frame);
|
||||
gst_cuda_context_pop (NULL);
|
||||
return FALSE;
|
||||
}
|
||||
|
@ -1313,8 +1303,7 @@ gst_nvdec_copy_device_to_memory (GstNvDec * nvdec,
|
|||
|
||||
gst_cuda_result (CuStreamSynchronize (nvdec->cuda_stream));
|
||||
|
||||
if (!cuda_mem)
|
||||
gst_video_frame_unmap (&video_frame);
|
||||
gst_video_frame_unmap (&video_frame);
|
||||
|
||||
if (!gst_cuda_result (CuvidUnmapVideoFrame (nvdec->decoder, dptr)))
|
||||
GST_WARNING_OBJECT (nvdec, "failed to unmap video frame");
|
||||
|
@ -1558,9 +1547,15 @@ gst_nvdec_ensure_cuda_pool (GstNvDec * nvdec, GstQuery * query)
|
|||
n = gst_query_get_n_allocation_pools (query);
|
||||
if (n > 0) {
|
||||
gst_query_parse_nth_allocation_pool (query, 0, &pool, &size, &min, &max);
|
||||
if (pool && !GST_IS_CUDA_BUFFER_POOL (pool)) {
|
||||
gst_object_unref (pool);
|
||||
pool = NULL;
|
||||
if (pool) {
|
||||
if (!GST_IS_CUDA_BUFFER_POOL (pool)) {
|
||||
gst_clear_object (&pool);
|
||||
} else {
|
||||
GstCudaBufferPool *cpool = GST_CUDA_BUFFER_POOL (pool);
|
||||
|
||||
if (cpool->context != nvdec->cuda_ctx)
|
||||
gst_clear_object (&pool);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1578,6 +1573,12 @@ gst_nvdec_ensure_cuda_pool (GstNvDec * nvdec, GstQuery * query)
|
|||
gst_buffer_pool_config_set_params (config, outcaps, size, min, max);
|
||||
gst_buffer_pool_config_add_option (config, GST_BUFFER_POOL_OPTION_VIDEO_META);
|
||||
gst_buffer_pool_set_config (pool, config);
|
||||
|
||||
/* Get updated size by cuda buffer pool */
|
||||
config = gst_buffer_pool_get_config (pool);
|
||||
gst_buffer_pool_config_get_params (config, NULL, &size, NULL, NULL);
|
||||
gst_structure_free (config);
|
||||
|
||||
if (n > 0)
|
||||
gst_query_set_nth_allocation_pool (query, 0, pool, size, min, max);
|
||||
else
|
||||
|
|
|
@ -740,33 +740,24 @@ gst_nv_decoder_copy_frame_to_cuda (GstNvDecoder * decoder,
|
|||
{
|
||||
CUDA_MEMCPY2D copy_params = { 0, };
|
||||
GstMemory *mem;
|
||||
GstCudaMemory *cuda_mem = NULL;
|
||||
gint i;
|
||||
gboolean ret = FALSE;
|
||||
GstVideoFrame video_frame;
|
||||
|
||||
mem = gst_buffer_peek_memory (buffer, 0);
|
||||
if (!gst_is_cuda_memory (mem)) {
|
||||
GST_WARNING_OBJECT (decoder, "Not a CUDA memory");
|
||||
return FALSE;
|
||||
} else {
|
||||
GstCudaMemory *cmem = GST_CUDA_MEMORY_CAST (mem);
|
||||
|
||||
if (cmem->context == decoder->context ||
|
||||
gst_cuda_context_get_handle (cmem->context) ==
|
||||
gst_cuda_context_get_handle (decoder->context) ||
|
||||
(gst_cuda_context_can_access_peer (cmem->context, decoder->context) &&
|
||||
gst_cuda_context_can_access_peer (decoder->context,
|
||||
cmem->context))) {
|
||||
cuda_mem = cmem;
|
||||
}
|
||||
}
|
||||
|
||||
if (!cuda_mem) {
|
||||
GST_WARNING_OBJECT (decoder, "Access to CUDA memory is not allowed");
|
||||
if (!gst_video_frame_map (&video_frame,
|
||||
&decoder->info, buffer, GST_MAP_WRITE | GST_MAP_CUDA)) {
|
||||
GST_ERROR_OBJECT (decoder, "frame map failure");
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
if (!gst_cuda_context_push (decoder->context)) {
|
||||
gst_video_frame_unmap (&video_frame);
|
||||
GST_ERROR_OBJECT (decoder, "Failed to push CUDA context");
|
||||
return FALSE;
|
||||
}
|
||||
|
@ -778,8 +769,9 @@ gst_nv_decoder_copy_frame_to_cuda (GstNvDecoder * decoder,
|
|||
for (i = 0; i < GST_VIDEO_INFO_N_PLANES (&decoder->info); i++) {
|
||||
copy_params.srcDevice = frame->devptr +
|
||||
(i * frame->pitch * GST_VIDEO_INFO_HEIGHT (&decoder->info));
|
||||
copy_params.dstDevice = cuda_mem->data + cuda_mem->offset[i];
|
||||
copy_params.dstPitch = cuda_mem->stride;
|
||||
copy_params.dstDevice =
|
||||
(CUdeviceptr) GST_VIDEO_FRAME_PLANE_DATA (&video_frame, i);
|
||||
copy_params.dstPitch = GST_VIDEO_FRAME_PLANE_STRIDE (&video_frame, i);
|
||||
copy_params.WidthInBytes = GST_VIDEO_INFO_COMP_WIDTH (&decoder->info, 0)
|
||||
* GST_VIDEO_INFO_COMP_PSTRIDE (&decoder->info, 0);
|
||||
copy_params.Height = GST_VIDEO_INFO_COMP_HEIGHT (&decoder->info, i);
|
||||
|
@ -795,6 +787,7 @@ gst_nv_decoder_copy_frame_to_cuda (GstNvDecoder * decoder,
|
|||
ret = TRUE;
|
||||
|
||||
done:
|
||||
gst_video_frame_unmap (&video_frame);
|
||||
gst_cuda_context_pop (NULL);
|
||||
|
||||
GST_LOG_OBJECT (decoder, "Copy frame to CUDA ret %d", ret);
|
||||
|
@ -1567,6 +1560,12 @@ gst_nv_decoder_ensure_cuda_pool (GstNvDecoder * decoder, GstQuery * query)
|
|||
gst_buffer_pool_config_set_params (config, outcaps, size, min, max);
|
||||
gst_buffer_pool_config_add_option (config, GST_BUFFER_POOL_OPTION_VIDEO_META);
|
||||
gst_buffer_pool_set_config (pool, config);
|
||||
|
||||
/* Get updated size by cuda buffer pool */
|
||||
config = gst_buffer_pool_get_config (pool);
|
||||
gst_buffer_pool_config_get_params (config, NULL, &size, NULL, NULL);
|
||||
gst_structure_free (config);
|
||||
|
||||
if (n > 0)
|
||||
gst_query_set_nth_allocation_pool (query, 0, pool, size, min, max);
|
||||
else
|
||||
|
|
|
@ -39,6 +39,7 @@
|
|||
#include "gstcudadownload.h"
|
||||
#include "gstcudaupload.h"
|
||||
#include "gstcudafilter.h"
|
||||
#include "gstcudamemory.h"
|
||||
|
||||
GST_DEBUG_CATEGORY (gst_nvcodec_debug);
|
||||
GST_DEBUG_CATEGORY (gst_nvdec_debug);
|
||||
|
@ -238,6 +239,7 @@ plugin_init (GstPlugin * plugin)
|
|||
GST_TYPE_CUDA_UPLOAD);
|
||||
|
||||
gst_cuda_filter_plugin_init (plugin);
|
||||
gst_cuda_memory_init_once ();
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue