mirror of
https://gitlab.freedesktop.org/gstreamer/gstreamer.git
synced 2024-12-23 00:36:51 +00:00
cudaconverter: Use cached texture
... instead of per conversion texture alloc/free Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/3884>
This commit is contained in:
parent
f44cac1c9f
commit
fa2bb42fda
1 changed files with 97 additions and 65 deletions
|
@ -1373,6 +1373,7 @@ typedef struct _TextureBuffer
|
||||||
{
|
{
|
||||||
CUdeviceptr ptr;
|
CUdeviceptr ptr;
|
||||||
gsize stride;
|
gsize stride;
|
||||||
|
CUtexObject texture;
|
||||||
} TextureBuffer;
|
} TextureBuffer;
|
||||||
|
|
||||||
typedef struct
|
typedef struct
|
||||||
|
@ -1447,12 +1448,22 @@ gst_cuda_converter_dispose (GObject * object)
|
||||||
|
|
||||||
for (i = 0; i < G_N_ELEMENTS (priv->fallback_buffer); i++) {
|
for (i = 0; i < G_N_ELEMENTS (priv->fallback_buffer); i++) {
|
||||||
if (priv->fallback_buffer[i].ptr) {
|
if (priv->fallback_buffer[i].ptr) {
|
||||||
|
if (priv->fallback_buffer[i].texture) {
|
||||||
|
CuTexObjectDestroy (priv->fallback_buffer[i].texture);
|
||||||
|
priv->fallback_buffer[i].texture = 0;
|
||||||
|
}
|
||||||
|
|
||||||
CuMemFree (priv->fallback_buffer[i].ptr);
|
CuMemFree (priv->fallback_buffer[i].ptr);
|
||||||
priv->fallback_buffer[i].ptr = 0;
|
priv->fallback_buffer[i].ptr = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (priv->unpack_buffer.ptr) {
|
if (priv->unpack_buffer.ptr) {
|
||||||
|
if (priv->unpack_buffer.texture) {
|
||||||
|
CuTexObjectDestroy (priv->unpack_buffer.texture);
|
||||||
|
priv->unpack_buffer.texture = 0;
|
||||||
|
}
|
||||||
|
|
||||||
CuMemFree (priv->unpack_buffer.ptr);
|
CuMemFree (priv->unpack_buffer.ptr);
|
||||||
priv->unpack_buffer.ptr = 0;
|
priv->unpack_buffer.ptr = 0;
|
||||||
}
|
}
|
||||||
|
@ -1867,6 +1878,18 @@ gst_cuda_converter_setup (GstCudaConverter * self)
|
||||||
return FALSE;
|
return FALSE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (priv->dest_rect.x != 0 || priv->dest_rect.y != 0 ||
|
||||||
|
priv->dest_rect.width != out_info->width ||
|
||||||
|
priv->dest_rect.height != out_info->height ||
|
||||||
|
in_info->width != out_info->width
|
||||||
|
|| in_info->height != out_info->height) {
|
||||||
|
for (i = 0; i < G_N_ELEMENTS (priv->filter_mode); i++)
|
||||||
|
priv->filter_mode[i] = CU_TR_FILTER_MODE_LINEAR;
|
||||||
|
} else {
|
||||||
|
for (i = 0; i < G_N_ELEMENTS (priv->filter_mode); i++)
|
||||||
|
priv->filter_mode[i] = CU_TR_FILTER_MODE_POINT;
|
||||||
|
}
|
||||||
|
|
||||||
if (!gst_cuda_context_push (self->context)) {
|
if (!gst_cuda_context_push (self->context)) {
|
||||||
GST_ERROR_OBJECT (self, "Couldn't push context");
|
GST_ERROR_OBJECT (self, "Couldn't push context");
|
||||||
return FALSE;
|
return FALSE;
|
||||||
|
@ -1874,6 +1897,13 @@ gst_cuda_converter_setup (GstCudaConverter * self)
|
||||||
|
|
||||||
/* Allocates intermediate memory for texture */
|
/* Allocates intermediate memory for texture */
|
||||||
if (unpack_function) {
|
if (unpack_function) {
|
||||||
|
CUDA_TEXTURE_DESC texture_desc;
|
||||||
|
CUDA_RESOURCE_DESC resource_desc;
|
||||||
|
CUtexObject texture = 0;
|
||||||
|
|
||||||
|
memset (&texture_desc, 0, sizeof (CUDA_TEXTURE_DESC));
|
||||||
|
memset (&resource_desc, 0, sizeof (CUDA_RESOURCE_DESC));
|
||||||
|
|
||||||
ret = CuMemAllocPitch (&priv->unpack_buffer.ptr,
|
ret = CuMemAllocPitch (&priv->unpack_buffer.ptr,
|
||||||
&priv->unpack_buffer.stride,
|
&priv->unpack_buffer.stride,
|
||||||
GST_VIDEO_INFO_COMP_WIDTH (texture_info, 0) *
|
GST_VIDEO_INFO_COMP_WIDTH (texture_info, 0) *
|
||||||
|
@ -1883,6 +1913,28 @@ gst_cuda_converter_setup (GstCudaConverter * self)
|
||||||
GST_ERROR_OBJECT (self, "Couldn't allocate unpack buffer");
|
GST_ERROR_OBJECT (self, "Couldn't allocate unpack buffer");
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
resource_desc.resType = CU_RESOURCE_TYPE_PITCH2D;
|
||||||
|
resource_desc.res.pitch2D.format = priv->texture_fmt->array_format[0];
|
||||||
|
resource_desc.res.pitch2D.numChannels = 4;
|
||||||
|
resource_desc.res.pitch2D.width = in_info->width;
|
||||||
|
resource_desc.res.pitch2D.height = in_info->height;
|
||||||
|
resource_desc.res.pitch2D.pitchInBytes = priv->unpack_buffer.stride;
|
||||||
|
resource_desc.res.pitch2D.devPtr = priv->unpack_buffer.ptr;
|
||||||
|
|
||||||
|
texture_desc.filterMode = priv->filter_mode[0];
|
||||||
|
texture_desc.flags = 0x2;
|
||||||
|
texture_desc.addressMode[0] = 1;
|
||||||
|
texture_desc.addressMode[1] = 1;
|
||||||
|
texture_desc.addressMode[2] = 1;
|
||||||
|
|
||||||
|
ret = CuTexObjectCreate (&texture, &resource_desc, &texture_desc, NULL);
|
||||||
|
if (!gst_cuda_result (ret)) {
|
||||||
|
GST_ERROR_OBJECT (self, "Couldn't create unpack texture");
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
|
priv->unpack_buffer.texture = texture;
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = CuModuleLoadData (&priv->module, ptx);
|
ret = CuModuleLoadData (&priv->module, ptx);
|
||||||
|
@ -1911,18 +1963,6 @@ gst_cuda_converter_setup (GstCudaConverter * self)
|
||||||
|
|
||||||
gst_cuda_context_pop (NULL);
|
gst_cuda_context_pop (NULL);
|
||||||
|
|
||||||
if (priv->dest_rect.x != 0 || priv->dest_rect.y != 0 ||
|
|
||||||
priv->dest_rect.width != out_info->width ||
|
|
||||||
priv->dest_rect.height != out_info->height ||
|
|
||||||
in_info->width != out_info->width
|
|
||||||
|| in_info->height != out_info->height) {
|
|
||||||
for (i = 0; i < G_N_ELEMENTS (priv->filter_mode); i++)
|
|
||||||
priv->filter_mode[i] = CU_TR_FILTER_MODE_LINEAR;
|
|
||||||
} else {
|
|
||||||
for (i = 0; i < G_N_ELEMENTS (priv->filter_mode); i++)
|
|
||||||
priv->filter_mode[i] = CU_TR_FILTER_MODE_POINT;
|
|
||||||
}
|
|
||||||
|
|
||||||
return TRUE;
|
return TRUE;
|
||||||
|
|
||||||
error:
|
error:
|
||||||
|
@ -2069,50 +2109,43 @@ ensure_fallback_buffer (GstCudaConverter * self, gint width_in_bytes,
|
||||||
static CUtexObject
|
static CUtexObject
|
||||||
gst_cuda_converter_create_texture (GstCudaConverter * self,
|
gst_cuda_converter_create_texture (GstCudaConverter * self,
|
||||||
CUdeviceptr src, gint width, gint height, gint stride, CUfilter_mode mode,
|
CUdeviceptr src, gint width, gint height, gint stride, CUfilter_mode mode,
|
||||||
CUarray_format format, guint channles, gint plane, CUstream stream,
|
CUarray_format format, guint channles, gint plane, CUstream stream)
|
||||||
gboolean * need_sync)
|
|
||||||
{
|
{
|
||||||
GstCudaConverterPrivate *priv = self->priv;
|
GstCudaConverterPrivate *priv = self->priv;
|
||||||
CUresult ret;
|
CUresult ret;
|
||||||
CUdeviceptr src_ptr;
|
CUdeviceptr src_ptr;
|
||||||
|
CUDA_MEMCPY2D params = { 0, };
|
||||||
|
|
||||||
*need_sync = FALSE;
|
if (!ensure_fallback_buffer (self, stride, height, plane))
|
||||||
|
return 0;
|
||||||
|
|
||||||
src_ptr = src;
|
params.srcMemoryType = CU_MEMORYTYPE_DEVICE;
|
||||||
|
params.srcPitch = stride;
|
||||||
|
params.srcDevice = (CUdeviceptr) src;
|
||||||
|
|
||||||
if (priv->texture_align > 0 && (src_ptr % priv->texture_align) != 0) {
|
params.dstMemoryType = CU_MEMORYTYPE_DEVICE;
|
||||||
CUDA_MEMCPY2D params = { 0, };
|
params.dstPitch = priv->fallback_buffer[plane].stride;
|
||||||
|
params.dstDevice = priv->fallback_buffer[plane].ptr;
|
||||||
|
params.WidthInBytes = GST_VIDEO_INFO_COMP_WIDTH (&priv->in_info, plane)
|
||||||
|
* GST_VIDEO_INFO_COMP_PSTRIDE (&priv->in_info, plane),
|
||||||
|
params.Height = GST_VIDEO_INFO_COMP_HEIGHT (&priv->in_info, plane);
|
||||||
|
|
||||||
GST_DEBUG_OBJECT (self, "Plane %d is not aligned, copying", plane);
|
ret = CuMemcpy2DAsync (¶ms, stream);
|
||||||
|
if (!gst_cuda_result (ret)) {
|
||||||
if (!ensure_fallback_buffer (self, stride, height, plane))
|
GST_ERROR_OBJECT (self, "Couldn't copy to fallback buffer");
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
params.srcMemoryType = CU_MEMORYTYPE_DEVICE;
|
|
||||||
params.srcPitch = stride;
|
|
||||||
params.srcDevice = (CUdeviceptr) src_ptr;
|
|
||||||
|
|
||||||
params.dstMemoryType = CU_MEMORYTYPE_DEVICE;
|
|
||||||
params.dstPitch = priv->fallback_buffer[plane].stride;
|
|
||||||
params.dstDevice = priv->fallback_buffer[plane].ptr;
|
|
||||||
params.WidthInBytes = GST_VIDEO_INFO_COMP_WIDTH (&priv->in_info, plane)
|
|
||||||
* GST_VIDEO_INFO_COMP_PSTRIDE (&priv->in_info, plane),
|
|
||||||
params.Height = GST_VIDEO_INFO_COMP_HEIGHT (&priv->in_info, plane);
|
|
||||||
|
|
||||||
ret = CuMemcpy2DAsync (¶ms, stream);
|
|
||||||
if (!gst_cuda_result (ret)) {
|
|
||||||
GST_ERROR_OBJECT (self, "Couldn't copy to fallback buffer");
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
*need_sync = TRUE;
|
|
||||||
|
|
||||||
src_ptr = priv->fallback_buffer[plane].ptr;
|
|
||||||
stride = priv->fallback_buffer[plane].stride;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return gst_cuda_converter_create_texture_unchecked (self,
|
if (!priv->fallback_buffer[plane].texture) {
|
||||||
src_ptr, width, height, format, channles, stride, plane, mode);
|
src_ptr = priv->fallback_buffer[plane].ptr;
|
||||||
|
stride = priv->fallback_buffer[plane].stride;
|
||||||
|
|
||||||
|
priv->fallback_buffer[plane].texture =
|
||||||
|
gst_cuda_converter_create_texture_unchecked (self, src_ptr, width,
|
||||||
|
height, format, channles, stride, plane, mode);
|
||||||
|
}
|
||||||
|
|
||||||
|
return priv->fallback_buffer[plane].texture;
|
||||||
}
|
}
|
||||||
|
|
||||||
static gboolean
|
static gboolean
|
||||||
|
@ -2166,6 +2199,7 @@ gst_cuda_converter_convert_frame (GstCudaConverter * converter,
|
||||||
&dst[0], &dst[1], &dst[2], &dst[3], &stride[0], &stride[1]
|
&dst[0], &dst[1], &dst[2], &dst[3], &stride[0], &stride[1]
|
||||||
};
|
};
|
||||||
gboolean need_sync = FALSE;
|
gboolean need_sync = FALSE;
|
||||||
|
GstCudaMemory *cmem;
|
||||||
|
|
||||||
g_return_val_if_fail (GST_IS_CUDA_CONVERTER (converter), FALSE);
|
g_return_val_if_fail (GST_IS_CUDA_CONVERTER (converter), FALSE);
|
||||||
g_return_val_if_fail (src_frame != NULL, FALSE);
|
g_return_val_if_fail (src_frame != NULL, FALSE);
|
||||||
|
@ -2176,6 +2210,9 @@ gst_cuda_converter_convert_frame (GstCudaConverter * converter,
|
||||||
|
|
||||||
g_assert (format);
|
g_assert (format);
|
||||||
|
|
||||||
|
cmem = (GstCudaMemory *) gst_buffer_peek_memory (src_frame->buffer, 0);
|
||||||
|
g_return_val_if_fail (gst_is_cuda_memory (GST_MEMORY_CAST (cmem)), FALSE);
|
||||||
|
|
||||||
if (!gst_cuda_context_push (converter->context)) {
|
if (!gst_cuda_context_push (converter->context)) {
|
||||||
GST_ERROR_OBJECT (converter, "Couldn't push context");
|
GST_ERROR_OBJECT (converter, "Couldn't push context");
|
||||||
return FALSE;
|
return FALSE;
|
||||||
|
@ -2185,25 +2222,26 @@ gst_cuda_converter_convert_frame (GstCudaConverter * converter,
|
||||||
if (!gst_cuda_converter_unpack_rgb (converter, src_frame, stream))
|
if (!gst_cuda_converter_unpack_rgb (converter, src_frame, stream))
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
texture[0] = gst_cuda_converter_create_texture_unchecked (converter,
|
texture[0] = priv->unpack_buffer.texture;
|
||||||
priv->unpack_buffer.ptr, priv->in_info.width, priv->in_info.height,
|
|
||||||
format->array_format[0], 4, priv->unpack_buffer.stride, 0,
|
|
||||||
priv->filter_mode[0]);
|
|
||||||
if (!texture[0]) {
|
if (!texture[0]) {
|
||||||
GST_ERROR_OBJECT (converter, "Couldn't create texture");
|
GST_ERROR_OBJECT (converter, "Unpack texture is unavailable");
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
for (i = 0; i < GST_VIDEO_FRAME_N_PLANES (src_frame); i++) {
|
for (i = 0; i < GST_VIDEO_FRAME_N_PLANES (src_frame); i++) {
|
||||||
CUdeviceptr src;
|
if (!gst_cuda_memory_get_texture (cmem,
|
||||||
|
i, priv->filter_mode[i], &texture[i])) {
|
||||||
|
CUdeviceptr src;
|
||||||
|
src = (CUdeviceptr) GST_VIDEO_FRAME_PLANE_DATA (src_frame, i);
|
||||||
|
texture[i] = gst_cuda_converter_create_texture (converter,
|
||||||
|
src, GST_VIDEO_FRAME_COMP_WIDTH (src_frame, i),
|
||||||
|
GST_VIDEO_FRAME_COMP_HEIGHT (src_frame, i),
|
||||||
|
GST_VIDEO_FRAME_PLANE_STRIDE (src_frame, i),
|
||||||
|
priv->filter_mode[i], format->array_format[i], format->channels[i],
|
||||||
|
i, stream);
|
||||||
|
need_sync = TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
src = (CUdeviceptr) GST_VIDEO_FRAME_PLANE_DATA (src_frame, i);
|
|
||||||
texture[i] = gst_cuda_converter_create_texture (converter,
|
|
||||||
src, GST_VIDEO_FRAME_COMP_WIDTH (src_frame, i),
|
|
||||||
GST_VIDEO_FRAME_COMP_HEIGHT (src_frame, i),
|
|
||||||
GST_VIDEO_FRAME_PLANE_STRIDE (src_frame, i),
|
|
||||||
priv->filter_mode[i], format->array_format[i], format->channels[i], i,
|
|
||||||
stream, &need_sync);
|
|
||||||
if (!texture[i]) {
|
if (!texture[i]) {
|
||||||
GST_ERROR_OBJECT (converter, "Couldn't create texture %d", i);
|
GST_ERROR_OBJECT (converter, "Couldn't create texture %d", i);
|
||||||
goto out;
|
goto out;
|
||||||
|
@ -2239,12 +2277,6 @@ gst_cuda_converter_convert_frame (GstCudaConverter * converter,
|
||||||
ret = TRUE;
|
ret = TRUE;
|
||||||
|
|
||||||
out:
|
out:
|
||||||
for (i = 0; i < G_N_ELEMENTS (texture); i++) {
|
|
||||||
if (texture[i])
|
|
||||||
CuTexObjectDestroy (texture[i]);
|
|
||||||
else
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
gst_cuda_context_pop (NULL);
|
gst_cuda_context_pop (NULL);
|
||||||
return ret;
|
return ret;
|
||||||
|
|
Loading…
Reference in a new issue