cuda: Add support for P012_LE and Y444/GBR high bitdepth formats

Adding P012, Y444_10, Y444_12, GBR_10, GBR_12 and GBR_16 formats support

Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/5375>
This commit is contained in:
Seungha Yang 2023-09-21 19:21:18 +09:00 committed by GStreamer Marge Bot
parent d731a7c2fc
commit a80f542f66
4 changed files with 133 additions and 15 deletions

View file

@ -217,6 +217,7 @@ gst_cuda_allocator_update_info (const GstVideoInfo * reference,
case GST_VIDEO_FORMAT_NV12:
case GST_VIDEO_FORMAT_NV21:
case GST_VIDEO_FORMAT_P010_10LE:
case GST_VIDEO_FORMAT_P012_LE:
case GST_VIDEO_FORMAT_P016_LE:
ret.stride[0] = pitch;
ret.stride[1] = pitch;
@ -224,10 +225,15 @@ gst_cuda_allocator_update_info (const GstVideoInfo * reference,
ret.offset[1] = ret.stride[0] * height;
break;
case GST_VIDEO_FORMAT_Y444:
case GST_VIDEO_FORMAT_Y444_10LE:
case GST_VIDEO_FORMAT_Y444_12LE:
case GST_VIDEO_FORMAT_Y444_16LE:
case GST_VIDEO_FORMAT_RGBP:
case GST_VIDEO_FORMAT_BGRP:
case GST_VIDEO_FORMAT_GBR:
case GST_VIDEO_FORMAT_GBR_10LE:
case GST_VIDEO_FORMAT_GBR_12LE:
case GST_VIDEO_FORMAT_GBR_16LE:
ret.stride[0] = pitch;
ret.stride[1] = pitch;
ret.stride[2] = pitch;
@ -729,9 +735,12 @@ static const TextureFormat format_map[] = {
MAKE_FORMAT_YUV_SEMI_PLANAR (NV12, UNSIGNED_INT8),
MAKE_FORMAT_YUV_SEMI_PLANAR (NV21, UNSIGNED_INT8),
MAKE_FORMAT_YUV_SEMI_PLANAR (P010_10LE, UNSIGNED_INT16),
MAKE_FORMAT_YUV_SEMI_PLANAR (P012_LE, UNSIGNED_INT16),
MAKE_FORMAT_YUV_SEMI_PLANAR (P016_LE, UNSIGNED_INT16),
MAKE_FORMAT_YUV_PLANAR (I420_10LE, UNSIGNED_INT16),
MAKE_FORMAT_YUV_PLANAR (Y444, UNSIGNED_INT8),
MAKE_FORMAT_YUV_PLANAR (Y444_10LE, UNSIGNED_INT16),
MAKE_FORMAT_YUV_PLANAR (Y444_12LE, UNSIGNED_INT16),
MAKE_FORMAT_YUV_PLANAR (Y444_16LE, UNSIGNED_INT16),
MAKE_FORMAT_RGB (RGBA, UNSIGNED_INT8),
MAKE_FORMAT_RGB (BGRA, UNSIGNED_INT8),
@ -746,6 +755,9 @@ static const TextureFormat format_map[] = {
MAKE_FORMAT_RGBP (RGBP, UNSIGNED_INT8),
MAKE_FORMAT_RGBP (BGRP, UNSIGNED_INT8),
MAKE_FORMAT_RGBP (GBR, UNSIGNED_INT8),
MAKE_FORMAT_RGBP (GBR_10LE, UNSIGNED_INT16),
MAKE_FORMAT_RGBP (GBR_12LE, UNSIGNED_INT16),
MAKE_FORMAT_RGBP (GBR_16LE, UNSIGNED_INT16),
MAKE_FORMAT_RGBAP (GBRA, UNSIGNED_INT8),
};
@ -1013,6 +1025,7 @@ gst_cuda_allocator_calculate_alloc_height (const GstVideoInfo * info)
case GST_VIDEO_FORMAT_YV12:
case GST_VIDEO_FORMAT_NV12:
case GST_VIDEO_FORMAT_P010_10LE:
case GST_VIDEO_FORMAT_P012_LE:
case GST_VIDEO_FORMAT_P016_LE:
case GST_VIDEO_FORMAT_I420_10LE:
alloc_height = GST_ROUND_UP_2 (alloc_height);
@ -1030,6 +1043,7 @@ gst_cuda_allocator_calculate_alloc_height (const GstVideoInfo * info)
case GST_VIDEO_FORMAT_NV12:
case GST_VIDEO_FORMAT_NV21:
case GST_VIDEO_FORMAT_P010_10LE:
case GST_VIDEO_FORMAT_P012_LE:
case GST_VIDEO_FORMAT_P016_LE:
alloc_height += alloc_height / 2;
break;
@ -1037,10 +1051,15 @@ gst_cuda_allocator_calculate_alloc_height (const GstVideoInfo * info)
case GST_VIDEO_FORMAT_I422_10LE:
case GST_VIDEO_FORMAT_I422_12LE:
case GST_VIDEO_FORMAT_Y444:
case GST_VIDEO_FORMAT_Y444_10LE:
case GST_VIDEO_FORMAT_Y444_12LE:
case GST_VIDEO_FORMAT_Y444_16LE:
case GST_VIDEO_FORMAT_RGBP:
case GST_VIDEO_FORMAT_BGRP:
case GST_VIDEO_FORMAT_GBR:
case GST_VIDEO_FORMAT_GBR_10LE:
case GST_VIDEO_FORMAT_GBR_12LE:
case GST_VIDEO_FORMAT_GBR_16LE:
alloc_height *= 3;
break;
case GST_VIDEO_FORMAT_GBRA:

View file

@ -635,6 +635,8 @@ typedef struct
#define SAMPLE_RGBP "sample_rgbp"
#define SAMPLE_BGRP "sample_bgrp"
#define SAMPLE_GBR "sample_gbr"
#define SAMPLE_GBR_10 "sample_gbr_10"
#define SAMPLE_GBR_12 "sample_gbr_12"
#define SAMPLE_GBRA "sample_gbra"
#define WRITE_I420 "write_i420"
@ -642,10 +644,10 @@ typedef struct
#define WRITE_NV12 "write_nv12"
#define WRITE_NV21 "write_nv21"
#define WRITE_P010 "write_p010"
/* same as P010 */
#define WRITE_P016 "write_p010"
#define WRITE_I420_10 "write_i420_10"
#define WRITE_Y444 "write_y444"
#define WRITE_Y444_10 "write_y444_10"
#define WRITE_Y444_12 "write_y444_12"
#define WRITE_Y444_16 "write_y444_16"
#define WRITE_RGBA "write_rgba"
#define WRITE_RGBx "write_rgbx"
@ -663,6 +665,9 @@ typedef struct
#define WRITE_RGBP "write_rgbp"
#define WRITE_BGRP "write_bgrp"
#define WRITE_GBR "write_gbr"
#define WRITE_GBR_10 "write_gbr_10"
#define WRITE_GBR_12 "write_gbr_12"
#define WRITE_GBR_16 "write_gbr_16"
#define WRITE_GBRA "write_gbra"
#define ROTATE_IDENTITY "rotate_identity"
#define ROTATE_90R "rotate_90r"
@ -792,7 +797,7 @@ SAMPLE_YUV_PLANAR_12BIS "(cudaTextureObject_t tex0, cudaTextureObject_t tex1,\n"
" float luma = tex2D<float>(tex0, x, y);\n"
" float u = tex2D<float>(tex1, x, y);\n"
" float v = tex2D<float>(tex2, x, y);\n"
" /* (1 << 6) to scale [0, 1.0) range */\n"
" /* (1 << 4) to scale [0, 1.0) range */\n"
" return make_float4 (luma * 16, u * 16, v * 16, 1);\n"
"}\n"
"\n"
@ -892,6 +897,27 @@ SAMPLE_GBR "(cudaTextureObject_t tex0, cudaTextureObject_t tex1,\n"
" float r = tex2D<float>(tex2, x, y);\n"
" return make_float4 (r, g, b, 1);\n"
"}\n"
"__device__ inline float4\n"
SAMPLE_GBR_10 "(cudaTextureObject_t tex0, cudaTextureObject_t tex1,\n"
" cudaTextureObject_t tex2, cudaTextureObject_t tex3, float x, float y)\n"
"{\n"
" float g = tex2D<float>(tex0, x, y);\n"
" float b = tex2D<float>(tex1, x, y);\n"
" float r = tex2D<float>(tex2, x, y);\n"
" /* (1 << 6) to scale [0, 1.0) range */\n"
" return make_float4 (r * 64, g * 64, b * 64, 1);\n"
"}\n"
"\n"
"__device__ inline float4\n"
SAMPLE_GBR_12 "(cudaTextureObject_t tex0, cudaTextureObject_t tex1,\n"
" cudaTextureObject_t tex2, cudaTextureObject_t tex3, float x, float y)\n"
"{\n"
" float g = tex2D<float>(tex0, x, y);\n"
" float b = tex2D<float>(tex1, x, y);\n"
" float r = tex2D<float>(tex2, x, y);\n"
" /* (1 << 4) to scale [0, 1.0) range */\n"
" return make_float4 (r * 16, g * 16, b * 16, 1);\n"
"}\n"
"\n"
"__device__ inline float4\n"
SAMPLE_GBRA "(cudaTextureObject_t tex0, cudaTextureObject_t tex1,\n"
@ -987,6 +1013,26 @@ WRITE_Y444 "(unsigned char * dst0, unsigned char * dst1, unsigned char * dst2,\n
"}\n"
"\n"
"__device__ inline void\n"
WRITE_Y444_10 "(unsigned char * dst0, unsigned char * dst1, unsigned char * dst2,\n"
" unsigned char * dst3, float4 sample, int x, int y, int stride0, int stride1)\n"
"{\n"
" int pos = x * 2 + y * stride0;\n"
" *(unsigned short *) &dst0[pos] = scale_to_10bits (sample.x);\n"
" *(unsigned short *) &dst1[pos] = scale_to_10bits (sample.y);\n"
" *(unsigned short *) &dst2[pos] = scale_to_10bits (sample.z);\n"
"}\n"
"\n"
"__device__ inline void\n"
WRITE_Y444_12 "(unsigned char * dst0, unsigned char * dst1, unsigned char * dst2,\n"
" unsigned char * dst3, float4 sample, int x, int y, int stride0, int stride1)\n"
"{\n"
" int pos = x * 2 + y * stride0;\n"
" *(unsigned short *) &dst0[pos] = scale_to_12bits (sample.x);\n"
" *(unsigned short *) &dst1[pos] = scale_to_12bits (sample.y);\n"
" *(unsigned short *) &dst2[pos] = scale_to_12bits (sample.z);\n"
"}\n"
"\n"
"__device__ inline void\n"
WRITE_Y444_16 "(unsigned char * dst0, unsigned char * dst1, unsigned char * dst2,\n"
" unsigned char * dst3, float4 sample, int x, int y, int stride0, int stride1)\n"
"{\n"
@ -1173,6 +1219,36 @@ WRITE_GBR "(unsigned char * dst0, unsigned char * dst1, unsigned char * dst2,\n"
"}\n"
"\n"
"__device__ inline void\n"
WRITE_GBR_10 "(unsigned char * dst0, unsigned char * dst1, unsigned char * dst2,\n"
" unsigned char * dst3, float4 sample, int x, int y, int stride0, int stride1)\n"
"{\n"
" int pos = x * 2 + y * stride0;\n"
" *(unsigned short *) &dst0[pos] = scale_to_10bits (sample.y);\n"
" *(unsigned short *) &dst1[pos] = scale_to_10bits (sample.z);\n"
" *(unsigned short *) &dst2[pos] = scale_to_10bits (sample.x);\n"
"}\n"
"\n"
"__device__ inline void\n"
WRITE_GBR_12 "(unsigned char * dst0, unsigned char * dst1, unsigned char * dst2,\n"
" unsigned char * dst3, float4 sample, int x, int y, int stride0, int stride1)\n"
"{\n"
" int pos = x * 2 + y * stride0;\n"
" *(unsigned short *) &dst0[pos] = scale_to_12bits (sample.y);\n"
" *(unsigned short *) &dst1[pos] = scale_to_12bits (sample.z);\n"
" *(unsigned short *) &dst2[pos] = scale_to_12bits (sample.x);\n"
"}\n"
"\n"
"__device__ inline void\n"
WRITE_GBR_16 "(unsigned char * dst0, unsigned char * dst1, unsigned char * dst2,\n"
" unsigned char * dst3, float4 sample, int x, int y, int stride0, int stride1)\n"
"{\n"
" int pos = x * 2 + y * stride0;\n"
" *(unsigned short *) &dst0[pos] = scale_to_ushort (sample.y);\n"
" *(unsigned short *) &dst1[pos] = scale_to_ushort (sample.z);\n"
" *(unsigned short *) &dst2[pos] = scale_to_ushort (sample.x);\n"
"}\n"
"\n"
"__device__ inline void\n"
WRITE_GBRA "(unsigned char * dst0, unsigned char * dst1, unsigned char * dst2,\n"
" unsigned char * dst3, float4 sample, int x, int y, int stride0, int stride1)\n"
"{\n"
@ -1406,9 +1482,12 @@ static const TextureFormat format_map[] = {
MAKE_FORMAT_YUV_SEMI_PLANAR (NV12, UNSIGNED_INT8, SAMPLE_SEMI_PLANAR),
MAKE_FORMAT_YUV_SEMI_PLANAR (NV21, UNSIGNED_INT8, SAMPLE_SEMI_PLANAR_SWAP),
MAKE_FORMAT_YUV_SEMI_PLANAR (P010_10LE, UNSIGNED_INT16, SAMPLE_SEMI_PLANAR),
MAKE_FORMAT_YUV_SEMI_PLANAR (P012_LE, UNSIGNED_INT16, SAMPLE_SEMI_PLANAR),
MAKE_FORMAT_YUV_SEMI_PLANAR (P016_LE, UNSIGNED_INT16, SAMPLE_SEMI_PLANAR),
MAKE_FORMAT_YUV_PLANAR (I420_10LE, UNSIGNED_INT16, SAMPLE_YUV_PLANAR_10BIS),
MAKE_FORMAT_YUV_PLANAR (Y444, UNSIGNED_INT8, SAMPLE_YUV_PLANAR),
MAKE_FORMAT_YUV_PLANAR (Y444_10LE, UNSIGNED_INT16, SAMPLE_YUV_PLANAR_10BIS),
MAKE_FORMAT_YUV_PLANAR (Y444_12LE, UNSIGNED_INT16, SAMPLE_YUV_PLANAR_12BIS),
MAKE_FORMAT_YUV_PLANAR (Y444_16LE, UNSIGNED_INT16, SAMPLE_YUV_PLANAR),
MAKE_FORMAT_RGB (RGBA, UNSIGNED_INT8, SAMPLE_RGBA),
MAKE_FORMAT_RGB (BGRA, UNSIGNED_INT8, SAMPLE_BGRA),
@ -1423,6 +1502,9 @@ static const TextureFormat format_map[] = {
MAKE_FORMAT_RGBP (RGBP, UNSIGNED_INT8, SAMPLE_RGBP),
MAKE_FORMAT_RGBP (BGRP, UNSIGNED_INT8, SAMPLE_BGRP),
MAKE_FORMAT_RGBP (GBR, UNSIGNED_INT8, SAMPLE_GBR),
MAKE_FORMAT_RGBP (GBR_10LE, UNSIGNED_INT16, SAMPLE_GBR_10),
MAKE_FORMAT_RGBP (GBR_12LE, UNSIGNED_INT16, SAMPLE_GBR_12),
MAKE_FORMAT_RGBP (GBR_16LE, UNSIGNED_INT16, SAMPLE_GBR),
MAKE_FORMAT_RGBAP (GBRA, UNSIGNED_INT8, SAMPLE_GBRA),
};
@ -1641,10 +1723,9 @@ gst_cuda_converter_setup (GstCudaConverter * self)
write_func = WRITE_NV21;
break;
case GST_VIDEO_FORMAT_P010_10LE:
write_func = WRITE_P010;
break;
case GST_VIDEO_FORMAT_P012_LE:
case GST_VIDEO_FORMAT_P016_LE:
write_func = WRITE_P016;
write_func = WRITE_P010;
break;
case GST_VIDEO_FORMAT_I420_10LE:
write_func = WRITE_I420_10;
@ -1652,6 +1733,12 @@ gst_cuda_converter_setup (GstCudaConverter * self)
case GST_VIDEO_FORMAT_Y444:
write_func = WRITE_Y444;
break;
case GST_VIDEO_FORMAT_Y444_10LE:
write_func = WRITE_Y444_10;
break;
case GST_VIDEO_FORMAT_Y444_12LE:
write_func = WRITE_Y444_12;
break;
case GST_VIDEO_FORMAT_Y444_16LE:
write_func = WRITE_Y444_16;
break;
@ -1703,6 +1790,15 @@ gst_cuda_converter_setup (GstCudaConverter * self)
case GST_VIDEO_FORMAT_GBR:
write_func = WRITE_GBR;
break;
case GST_VIDEO_FORMAT_GBR_10LE:
write_func = WRITE_GBR_10;
break;
case GST_VIDEO_FORMAT_GBR_12LE:
write_func = WRITE_GBR_12;
break;
case GST_VIDEO_FORMAT_GBR_16LE:
write_func = WRITE_GBR_16;
break;
case GST_VIDEO_FORMAT_GBRA:
write_func = WRITE_GBRA;
break;

View file

@ -30,9 +30,10 @@ GST_DEBUG_CATEGORY_STATIC (gst_cuda_base_convert_debug);
#define GST_CAT_DEFAULT gst_cuda_base_convert_debug
#define GST_CUDA_CONVET_FORMATS \
"{ I420, YV12, NV12, NV21, P010_10LE, P016_LE, I420_10LE, Y444, Y444_16LE, " \
"BGRA, RGBA, RGBx, BGRx, ARGB, ABGR, RGB, BGR, BGR10A2_LE, RGB10A2_LE, " \
"Y42B, I422_10LE, I422_12LE, RGBP, BGRP, GBR, GBRA }"
"{ I420, YV12, NV12, NV21, P010_10LE, P012_LE, P016_LE, I420_10LE, Y444, " \
"Y444_10LE, Y444_12LE, Y444_16LE, BGRA, RGBA, RGBx, BGRx, ARGB, ABGR, RGB, " \
"BGR, BGR10A2_LE, RGB10A2_LE, Y42B, I422_10LE, I422_12LE, RGBP, BGRP, GBR, " \
"GBRA, GBR_10LE, GBR_12LE, GBR_16LE }"
static GstStaticPadTemplate sink_template = GST_STATIC_PAD_TEMPLATE ("sink",
GST_PAD_SINK,

View file

@ -24,18 +24,20 @@
G_BEGIN_DECLS
#define GST_CUDA_FORMATS \
"{ I420, YV12, NV12, NV21, P010_10LE, P016_LE, I420_10LE, Y444, Y444_16LE, " \
"BGRA, RGBA, RGBx, BGRx, ARGB, ABGR, RGB, BGR, BGR10A2_LE, RGB10A2_LE, " \
"Y42B, I422_10LE, I422_12LE, YUY2, UYVY, RGBP, BGRP, GBR, GBRA }"
"{ I420, YV12, NV12, NV21, P010_10LE, P012_LE, P016_LE, I420_10LE, Y444, " \
"Y444_10LE, Y444_12LE, Y444_16LE, BGRA, RGBA, RGBx, BGRx, ARGB, ABGR, RGB, " \
"BGR, BGR10A2_LE, RGB10A2_LE, Y42B, I422_10LE, I422_12LE, YUY2, UYVY, RGBP, " \
"BGRP, GBR, GBR_10LE, GBR_12LE, GBR_16LE, GBRA }"
#define GST_CUDA_GL_FORMATS \
"{ I420, YV12, NV12, NV21, P010_10LE, P016_LE, Y444, " \
"{ I420, YV12, NV12, NV21, P010_10LE, P012_LE, P016_LE, Y444, " \
"BGRA, RGBA, RGBx, BGRx, ARGB, ABGR, RGB, BGR, BGR10A2_LE, RGB10A2_LE, " \
"YUY2, UYVY, RGBP, BGRP, GBR, GBRA }"
#define GST_CUDA_D3D11_FORMATS \
"{ I420, YV12, I420_10LE, Y444, Y444_16LE, " \
"BGRA, RGBA, BGRx, RGBx, Y42B, I422_10LE, I422_12LE }"
"{ I420, YV12, I420_10LE, Y444, Y444_10LE, Y444_12LE, Y444_16LE, " \
"BGRA, RGBA, BGRx, RGBx, Y42B, I422_10LE, I422_12LE, GBR, GBR, GBR_10LE, " \
"GBR_12LE, GBR_16LE }"
#define GST_CUDA_NVMM_FORMATS \
"{ I420, YV12, NV12, NV21, P010_10LE, Y444, " \