mirror of
https://gitlab.freedesktop.org/gstreamer/gstreamer.git
synced 2025-02-11 16:55:23 +00:00
3236 lines
108 KiB
C++
3236 lines
108 KiB
C++
/* GStreamer
|
|
* Copyright (C) 2022 Seungha Yang <seungha@centricular.com>
|
|
*
|
|
* This library is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Library General Public
|
|
* License as published by the Free Software Foundation; either
|
|
* version 2 of the License, or (at your option) any later version.
|
|
*
|
|
* This library is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Library General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Library General Public
|
|
* License along with this library; if not, write to the
|
|
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
|
|
* Boston, MA 02110-1301, USA.
|
|
*/
|
|
|
|
#ifdef HAVE_CONFIG_H
|
|
#include "config.h"
|
|
#endif
|
|
|
|
#include "gstcudaconverter.h"
|
|
#include <string.h>
|
|
#include <mutex>
|
|
|
|
GST_DEBUG_CATEGORY_STATIC (gst_cuda_converter_debug);
|
|
#define GST_CAT_DEFAULT gst_cuda_converter_debug
|
|
|
|
#define CUDA_BLOCK_X 16
|
|
#define CUDA_BLOCK_Y 16
|
|
#define DIV_UP(size,block) (((size) + ((block) - 1)) / (block))
|
|
|
|
/* from GstD3D11 */
|
|
struct GstCudaColorMatrix
|
|
{
|
|
gdouble matrix[3][3];
|
|
gdouble offset[3];
|
|
gdouble min[3];
|
|
gdouble max[3];
|
|
};
|
|
|
|
static gchar *
|
|
gst_cuda_dump_color_matrix (GstCudaColorMatrix * matrix)
|
|
{
|
|
/* *INDENT-OFF* */
|
|
static const gchar format[] =
|
|
"[MATRIX]\n"
|
|
"|% .6f, % .6f, % .6f|\n"
|
|
"|% .6f, % .6f, % .6f|\n"
|
|
"|% .6f, % .6f, % .6f|\n"
|
|
"[OFFSET]\n"
|
|
"|% .6f, % .6f, % .6f|\n"
|
|
"[MIN]\n"
|
|
"|% .6f, % .6f, % .6f|\n"
|
|
"[MAX]\n"
|
|
"|% .6f, % .6f, % .6f|";
|
|
/* *INDENT-ON* */
|
|
|
|
return g_strdup_printf (format,
|
|
matrix->matrix[0][0], matrix->matrix[0][1], matrix->matrix[0][2],
|
|
matrix->matrix[1][0], matrix->matrix[1][1], matrix->matrix[1][2],
|
|
matrix->matrix[2][0], matrix->matrix[2][1], matrix->matrix[2][2],
|
|
matrix->offset[0], matrix->offset[1], matrix->offset[2],
|
|
matrix->min[0], matrix->min[1], matrix->min[2],
|
|
matrix->max[0], matrix->max[1], matrix->max[2]);
|
|
}
|
|
|
|
static void
|
|
color_matrix_copy (GstCudaColorMatrix * dst, const GstCudaColorMatrix * src)
|
|
{
|
|
for (guint i = 0; i < 3; i++) {
|
|
for (guint j = 0; j < 3; j++) {
|
|
dst->matrix[i][j] = src->matrix[i][j];
|
|
}
|
|
}
|
|
}
|
|
|
|
static void
|
|
color_matrix_multiply (GstCudaColorMatrix * dst, GstCudaColorMatrix * a,
|
|
GstCudaColorMatrix * b)
|
|
{
|
|
GstCudaColorMatrix tmp;
|
|
|
|
for (guint i = 0; i < 3; i++) {
|
|
for (guint j = 0; j < 3; j++) {
|
|
gdouble val = 0;
|
|
for (guint k = 0; k < 3; k++) {
|
|
val += a->matrix[i][k] * b->matrix[k][j];
|
|
}
|
|
|
|
tmp.matrix[i][j] = val;
|
|
}
|
|
}
|
|
|
|
color_matrix_copy (dst, &tmp);
|
|
}
|
|
|
|
static void
|
|
color_matrix_identity (GstCudaColorMatrix * m)
|
|
{
|
|
for (guint i = 0; i < 3; i++) {
|
|
for (guint j = 0; j < 3; j++) {
|
|
if (i == j)
|
|
m->matrix[i][j] = 1.0;
|
|
else
|
|
m->matrix[i][j] = 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* gst_cuda_color_range_adjust_matrix_unorm:
|
|
* @in_info: a #GstVideoInfo
|
|
* @out_info: a #GstVideoInfo
|
|
* @matrix: a #GstCudaColorMatrix
|
|
*
|
|
* Calculates matrix for color range adjustment. Both input and output
|
|
* signals are in normalized [0.0..1.0] space.
|
|
*
|
|
* Resulting values can be calculated by
|
|
* | Yout | | Yin | | matrix.offset[0] |
|
|
* | Uout | = clamp ( matrix.matrix * | Uin | + | matrix.offset[1] |, matrix.min, matrix.max )
|
|
* | Vout | | Vin | | matrix.offset[2] |
|
|
*
|
|
* Returns: %TRUE if successful
|
|
*/
|
|
static gboolean
|
|
gst_cuda_color_range_adjust_matrix_unorm (const GstVideoInfo * in_info,
|
|
const GstVideoInfo * out_info, GstCudaColorMatrix * matrix)
|
|
{
|
|
gboolean in_rgb, out_rgb;
|
|
gint in_offset[GST_VIDEO_MAX_COMPONENTS];
|
|
gint in_scale[GST_VIDEO_MAX_COMPONENTS];
|
|
gint out_offset[GST_VIDEO_MAX_COMPONENTS];
|
|
gint out_scale[GST_VIDEO_MAX_COMPONENTS];
|
|
GstVideoColorRange in_range;
|
|
GstVideoColorRange out_range;
|
|
gdouble src_fullscale, dst_fullscale;
|
|
|
|
memset (matrix, 0, sizeof (GstCudaColorMatrix));
|
|
for (guint i = 0; i < 3; i++) {
|
|
matrix->matrix[i][i] = 1.0;
|
|
matrix->matrix[i][i] = 1.0;
|
|
matrix->matrix[i][i] = 1.0;
|
|
matrix->max[i] = 1.0;
|
|
}
|
|
|
|
in_rgb = GST_VIDEO_INFO_IS_RGB (in_info);
|
|
out_rgb = GST_VIDEO_INFO_IS_RGB (out_info);
|
|
|
|
if (in_rgb != out_rgb) {
|
|
GST_WARNING ("Invalid format conversion");
|
|
return FALSE;
|
|
}
|
|
|
|
in_range = in_info->colorimetry.range;
|
|
out_range = out_info->colorimetry.range;
|
|
|
|
if (in_range == GST_VIDEO_COLOR_RANGE_UNKNOWN) {
|
|
GST_WARNING ("Unknown input color range");
|
|
if (in_rgb || GST_VIDEO_INFO_IS_GRAY (in_info))
|
|
in_range = GST_VIDEO_COLOR_RANGE_0_255;
|
|
else
|
|
in_range = GST_VIDEO_COLOR_RANGE_16_235;
|
|
}
|
|
|
|
if (out_range == GST_VIDEO_COLOR_RANGE_UNKNOWN) {
|
|
GST_WARNING ("Unknown output color range");
|
|
if (out_rgb || GST_VIDEO_INFO_IS_GRAY (out_info))
|
|
out_range = GST_VIDEO_COLOR_RANGE_0_255;
|
|
else
|
|
out_range = GST_VIDEO_COLOR_RANGE_16_235;
|
|
}
|
|
|
|
src_fullscale = (gdouble) ((1 << in_info->finfo->depth[0]) - 1);
|
|
dst_fullscale = (gdouble) ((1 << out_info->finfo->depth[0]) - 1);
|
|
|
|
gst_video_color_range_offsets (in_range, in_info->finfo, in_offset, in_scale);
|
|
gst_video_color_range_offsets (out_range,
|
|
out_info->finfo, out_offset, out_scale);
|
|
|
|
matrix->min[0] = matrix->min[1] = matrix->min[2] =
|
|
(gdouble) out_offset[0] / dst_fullscale;
|
|
|
|
matrix->max[0] = (out_scale[0] + out_offset[0]) / dst_fullscale;
|
|
matrix->max[1] = matrix->max[2] =
|
|
(out_scale[1] + out_offset[0]) / dst_fullscale;
|
|
|
|
if (in_info->colorimetry.range == out_info->colorimetry.range) {
|
|
GST_DEBUG ("Same color range");
|
|
return TRUE;
|
|
}
|
|
|
|
/* Formula
|
|
*
|
|
* 1) Scales and offset compensates input to [0..1] range
|
|
* SRC_NORM[i] = (src[i] * src_fullscale - in_offset[i]) / in_scale[i]
|
|
* = (src[i] * src_fullscale / in_scale[i]) - in_offset[i] / in_scale[i]
|
|
*
|
|
* 2) Reverse to output UNIT scale
|
|
* DST_UINT[i] = SRC_NORM[i] * out_scale[i] + out_offset[i]
|
|
* = src[i] * src_fullscale * out_scale[i] / in_scale[i]
|
|
* - in_offset[i] * out_scale[i] / in_scale[i]
|
|
* + out_offset[i]
|
|
*
|
|
* 3) Back to [0..1] scale
|
|
* dst[i] = DST_UINT[i] / dst_fullscale
|
|
* = COEFF[i] * src[i] + OFF[i]
|
|
* where
|
|
* src_fullscale * out_scale[i]
|
|
* COEFF[i] = ------------------------------
|
|
* dst_fullscale * in_scale[i]
|
|
*
|
|
* out_offset[i] in_offset[i] * out_scale[i]
|
|
* OFF[i] = -------------- - ------------------------------
|
|
* dst_fullscale dst_fullscale * in_scale[i]
|
|
*/
|
|
for (guint i = 0; i < 3; i++) {
|
|
matrix->matrix[i][i] = (src_fullscale * out_scale[i]) /
|
|
(dst_fullscale * in_scale[i]);
|
|
matrix->offset[i] = (out_offset[i] / dst_fullscale) -
|
|
((gdouble) in_offset[i] * out_scale[i] / (dst_fullscale * in_scale[i]));
|
|
}
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
/**
|
|
* gst_cuda_yuv_to_rgb_matrix_unorm:
|
|
* @in_yuv_info: a #GstVideoInfo of input YUV signal
|
|
* @out_rgb_info: a #GstVideoInfo of output RGB signal
|
|
* @matrix: a #GstCudaColorMatrix
|
|
*
|
|
* Calculates transform matrix from YUV to RGB conversion. Both input and output
|
|
* signals are in normalized [0.0..1.0] space and additional gamma decoding
|
|
* or primary/transfer function transform is not performed by this matrix.
|
|
*
|
|
* Resulting non-linear RGB values can be calculated by
|
|
* | R' | | Y' | | matrix.offset[0] |
|
|
* | G' | = clamp ( matrix.matrix * | Cb | + | matrix.offset[1] | matrix.min, matrix.max )
|
|
* | B' | | Cr | | matrix.offset[2] |
|
|
*
|
|
* Returns: %TRUE if successful
|
|
*/
|
|
static gboolean
|
|
gst_cuda_yuv_to_rgb_matrix_unorm (const GstVideoInfo * in_yuv_info,
|
|
const GstVideoInfo * out_rgb_info, GstCudaColorMatrix * matrix)
|
|
{
|
|
gint offset[4], scale[4];
|
|
gdouble Kr, Kb, Kg;
|
|
|
|
/*
|
|
* <Formula>
|
|
*
|
|
* Input: Unsigned normalized Y'CbCr(unorm), [0.0..1.0] range
|
|
* Output: Unsigned normalized non-linear R'G'B'(unorm), [0.0..1.0] range
|
|
*
|
|
* 1) Y'CbCr(unorm) to scaled Y'CbCr
|
|
* | Y' | | Y'(unorm) |
|
|
* | Cb | = S | Cb(unorm) |
|
|
* | Cb | | Cr(unorm) |
|
|
* where S = (2 ^ bitdepth) - 1
|
|
*
|
|
* 2) Y'CbCr to YPbPr
|
|
* Y = (Y' - offsetY ) / scaleY
|
|
* Pb = [(Cb - offsetCbCr) / scaleCbCr]
|
|
* Pr = [(Cr - offsetCrCr) / scaleCrCr]
|
|
* =>
|
|
* Y = Y'(unorm) * Sy + Oy
|
|
* Pb = Cb(unorm) * Suv + Ouv
|
|
* Pb = Cr(unorm) * Suv + Ouv
|
|
* where
|
|
* Sy = S / scaleY
|
|
* Suv = S / scaleCbCr
|
|
* Oy = -(offsetY / scaleY)
|
|
* Ouv = -(offsetCbCr / scaleCbCr)
|
|
*
|
|
* 3) YPbPr to R'G'B'
|
|
* | R' | | Y |
|
|
* | G' | = M *| Pb |
|
|
* | B' | | Pr |
|
|
* where
|
|
* | vecR |
|
|
* M = | vecG |
|
|
* | vecB |
|
|
* vecR = | 1, 0 , 2(1 - Kr) |
|
|
* vecG = | 1, -(Kb/Kg) * 2(1 - Kb), -(Kr/Kg) * 2(1 - Kr) |
|
|
* vecB = | 1, 2(1 - Kb) , 0 |
|
|
* =>
|
|
* R' = dot(vecR, (Syuv * Y'CbCr(unorm))) + dot(vecR, Offset)
|
|
* G' = dot(vecG, (Svuy * Y'CbCr(unorm))) + dot(vecG, Offset)
|
|
* B' = dot(vecB, (Syuv * Y'CbCr(unorm)) + dot(vecB, Offset)
|
|
* where
|
|
* | Sy, 0, 0 |
|
|
* Syuv = | 0, Suv, 0 |
|
|
* | 0 0, Suv |
|
|
*
|
|
* | Oy |
|
|
* Offset = | Ouv |
|
|
* | Ouv |
|
|
*
|
|
* 4) YUV -> RGB matrix
|
|
* | R' | | Y'(unorm) | | offsetA |
|
|
* | G' | = Matrix * | Cb(unorm) | + | offsetB |
|
|
* | B' | | Cr(unorm) | | offsetC |
|
|
*
|
|
* where
|
|
* | vecR |
|
|
* Matrix = | vecG | * Syuv
|
|
* | vecB |
|
|
*
|
|
* offsetA = dot(vecR, Offset)
|
|
* offsetB = dot(vecG, Offset)
|
|
* offsetC = dot(vecB, Offset)
|
|
*
|
|
* 4) Consider 16-235 scale RGB
|
|
* RGBfull(0..255) -> RGBfull(16..235) matrix is represented by
|
|
* | Rs | | Rf | | Or |
|
|
* | Gs | = Ms | Gf | + | Og |
|
|
* | Bs | | Bf | | Ob |
|
|
*
|
|
* Combining all matrix into
|
|
* | Rs | | Y'(unorm) | | offsetA | | Or |
|
|
* | Gs | = Ms * ( Matrix * | Cb(unorm) | + | offsetB | ) + | Og |
|
|
* | Bs | | Cr(unorm) | | offsetC | | Ob |
|
|
*
|
|
* | Y'(unorm) | | offsetA | | Or |
|
|
* = Ms * Matrix * | Cb(unorm) | + Ms | offsetB | + | Og |
|
|
* | Cr(unorm) | | offsetC | | Ob |
|
|
*/
|
|
|
|
memset (matrix, 0, sizeof (GstCudaColorMatrix));
|
|
for (guint i = 0; i < 3; i++)
|
|
matrix->max[i] = 1.0;
|
|
|
|
gst_video_color_range_offsets (in_yuv_info->colorimetry.range,
|
|
in_yuv_info->finfo, offset, scale);
|
|
|
|
if (gst_video_color_matrix_get_Kr_Kb (in_yuv_info->colorimetry.matrix,
|
|
&Kr, &Kb)) {
|
|
guint S;
|
|
gdouble Sy, Suv;
|
|
gdouble Oy, Ouv;
|
|
gdouble vecR[3], vecG[3], vecB[3];
|
|
|
|
Kg = 1.0 - Kr - Kb;
|
|
|
|
vecR[0] = 1.0;
|
|
vecR[1] = 0;
|
|
vecR[2] = 2 * (1 - Kr);
|
|
|
|
vecG[0] = 1.0;
|
|
vecG[1] = -(Kb / Kg) * 2 * (1 - Kb);
|
|
vecG[2] = -(Kr / Kg) * 2 * (1 - Kr);
|
|
|
|
vecB[0] = 1.0;
|
|
vecB[1] = 2 * (1 - Kb);
|
|
vecB[2] = 0;
|
|
|
|
/* Assume all components has the same bitdepth */
|
|
S = (1 << in_yuv_info->finfo->depth[0]) - 1;
|
|
Sy = (gdouble) S / scale[0];
|
|
Suv = (gdouble) S / scale[1];
|
|
Oy = -((gdouble) offset[0] / scale[0]);
|
|
Ouv = -((gdouble) offset[1] / scale[1]);
|
|
|
|
matrix->matrix[0][0] = Sy * vecR[0];
|
|
matrix->matrix[1][0] = Sy * vecG[0];
|
|
matrix->matrix[2][0] = Sy * vecB[0];
|
|
|
|
matrix->matrix[0][1] = Suv * vecR[1];
|
|
matrix->matrix[1][1] = Suv * vecG[1];
|
|
matrix->matrix[2][1] = Suv * vecB[1];
|
|
|
|
matrix->matrix[0][2] = Suv * vecR[2];
|
|
matrix->matrix[1][2] = Suv * vecG[2];
|
|
matrix->matrix[2][2] = Suv * vecB[2];
|
|
|
|
matrix->offset[0] = vecR[0] * Oy + vecR[1] * Ouv + vecR[2] * Ouv;
|
|
matrix->offset[1] = vecG[0] * Oy + vecG[1] * Ouv + vecG[2] * Ouv;
|
|
matrix->offset[2] = vecB[0] * Oy + vecB[1] * Ouv + vecB[2] * Ouv;
|
|
|
|
/* Apply RGB range scale matrix */
|
|
if (out_rgb_info->colorimetry.range == GST_VIDEO_COLOR_RANGE_16_235) {
|
|
GstCudaColorMatrix scale_matrix, rst;
|
|
GstVideoInfo full_rgb = *out_rgb_info;
|
|
|
|
full_rgb.colorimetry.range = GST_VIDEO_COLOR_RANGE_0_255;
|
|
|
|
if (gst_cuda_color_range_adjust_matrix_unorm (&full_rgb,
|
|
out_rgb_info, &scale_matrix)) {
|
|
/* Ms * Matrix */
|
|
color_matrix_multiply (&rst, &scale_matrix, matrix);
|
|
|
|
/* Ms * transform offsets */
|
|
for (guint i = 0; i < 3; i++) {
|
|
gdouble val = 0;
|
|
for (guint j = 0; j < 3; j++) {
|
|
val += scale_matrix.matrix[i][j] * matrix->offset[j];
|
|
}
|
|
rst.offset[i] = val + scale_matrix.offset[i];
|
|
}
|
|
|
|
/* copy back to output matrix */
|
|
for (guint i = 0; i < 3; i++) {
|
|
for (guint j = 0; j < 3; j++) {
|
|
matrix->matrix[i][j] = rst.matrix[i][j];
|
|
}
|
|
matrix->offset[i] = rst.offset[i];
|
|
matrix->min[i] = scale_matrix.min[i];
|
|
matrix->max[i] = scale_matrix.max[i];
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
/* Unknown matrix */
|
|
matrix->matrix[0][0] = 1.0;
|
|
matrix->matrix[1][1] = 1.0;
|
|
matrix->matrix[2][2] = 1.0;
|
|
}
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
/**
|
|
* gst_cuda_rgb_to_yuv_matrix_unorm:
|
|
* @in_rgb_info: a #GstVideoInfo of input RGB signal
|
|
* @out_yuv_info: a #GstVideoInfo of output YUV signal
|
|
* @matrix: a #GstCudaColorMatrix
|
|
*
|
|
* Calculates transform matrix from RGB to YUV conversion. Both input and output
|
|
* signals are in normalized [0.0..1.0] space and additional gamma decoding
|
|
* or primary/transfer function transform is not performed by this matrix.
|
|
*
|
|
* Resulting RGB values can be calculated by
|
|
* | Y' | | R' | | matrix.offset[0] |
|
|
* | Cb | = clamp ( matrix.matrix * | G' | + | matrix.offset[1] |, matrix.min, matrix.max )
|
|
* | Cr | | B' | | matrix.offset[2] |
|
|
*
|
|
* Returns: %TRUE if successful
|
|
*/
|
|
static gboolean
|
|
gst_cuda_rgb_to_yuv_matrix_unorm (const GstVideoInfo * in_rgb_info,
|
|
const GstVideoInfo * out_yuv_info, GstCudaColorMatrix * matrix)
|
|
{
|
|
gint offset[4], scale[4];
|
|
gdouble Kr, Kb, Kg;
|
|
|
|
/*
|
|
* <Formula>
|
|
*
|
|
* Input: Unsigned normalized non-linear R'G'B'(unorm), [0.0..1.0] range
|
|
* Output: Unsigned normalized Y'CbCr(unorm), [0.0..1.0] range
|
|
*
|
|
* 1) R'G'B' to YPbPr
|
|
* | Y | | R' |
|
|
* | Pb | = M *| G' |
|
|
* | Pr | | B' |
|
|
* where
|
|
* | vecY |
|
|
* M = | vecU |
|
|
* | vecV |
|
|
* vecY = | Kr , Kg , Kb |
|
|
* vecU = | -0.5*Kr/(1-Kb), -0.5*Kg/(1-Kb), 0.5 |
|
|
* vecV = | 0.5 , -0.5*Kg/(1-Kr), -0.5*Kb(1-Kr) |
|
|
*
|
|
* 2) YPbPr to Y'CbCr(unorm)
|
|
* Y'(unorm) = (Y * scaleY + offsetY) / S
|
|
* Cb(unorm) = (Pb * scaleCbCr + offsetCbCr) / S
|
|
* Cr(unorm) = (Pr * scaleCbCr + offsetCbCr) / S
|
|
* =>
|
|
* Y'(unorm) = (Y * scaleY / S) + (offsetY / S)
|
|
* Cb(unorm) = (Pb * scaleCbCr / S) + (offsetCbCr / S)
|
|
* Cr(unorm) = (Pb * scaleCbCr / S) + (offsetCbCr / S)
|
|
* where S = (2 ^ bitdepth) - 1
|
|
*
|
|
* 3) RGB -> YUV matrix
|
|
* | Y'(unorm) | | R' | | offsetA |
|
|
* | Cb(unorm) | = Matrix * | G' | + | offsetB |
|
|
* | Cr(unorm) | | B' | | offsetC |
|
|
*
|
|
* where
|
|
* | (scaleY/S) * vecY |
|
|
* Matrix = | (scaleCbCr/S) * vecU |
|
|
* | (scaleCbCr/S) * vecV |
|
|
*
|
|
* offsetA = offsetY / S
|
|
* offsetB = offsetCbCr / S
|
|
* offsetC = offsetCbCr / S
|
|
*
|
|
* 4) Consider 16-235 scale RGB
|
|
* RGBstudio(16..235) -> RGBfull(0..255) matrix is represented by
|
|
* | Rf | | Rs | | Or |
|
|
* | Gf | = Ms | Gs | + | Og |
|
|
* | Bf | | Bs | | Ob |
|
|
*
|
|
* Combining all matrix into
|
|
* | Y'(unorm) | | Rs | | Or | | offsetA |
|
|
* | Cb(unorm) | = Matrix * ( Ms | Gs | + | Og | ) + | offsetB |
|
|
* | Cr(unorm) | | Bs | | Ob | | offsetC |
|
|
*
|
|
* | Rs | | Or | | offsetA |
|
|
* = Matrix * Ms | Gs | + Matrix | Og | + | offsetB |
|
|
* | Bs | | Ob | | offsetB |
|
|
*/
|
|
|
|
memset (matrix, 0, sizeof (GstCudaColorMatrix));
|
|
for (guint i = 0; i < 3; i++)
|
|
matrix->max[i] = 1.0;
|
|
|
|
gst_video_color_range_offsets (out_yuv_info->colorimetry.range,
|
|
out_yuv_info->finfo, offset, scale);
|
|
|
|
if (gst_video_color_matrix_get_Kr_Kb (out_yuv_info->colorimetry.matrix,
|
|
&Kr, &Kb)) {
|
|
guint S;
|
|
gdouble Sy, Suv;
|
|
gdouble Oy, Ouv;
|
|
gdouble vecY[3], vecU[3], vecV[3];
|
|
|
|
Kg = 1.0 - Kr - Kb;
|
|
|
|
vecY[0] = Kr;
|
|
vecY[1] = Kg;
|
|
vecY[2] = Kb;
|
|
|
|
vecU[0] = -0.5 * Kr / (1 - Kb);
|
|
vecU[1] = -0.5 * Kg / (1 - Kb);
|
|
vecU[2] = 0.5;
|
|
|
|
vecV[0] = 0.5;
|
|
vecV[1] = -0.5 * Kg / (1 - Kr);
|
|
vecV[2] = -0.5 * Kb / (1 - Kr);
|
|
|
|
/* Assume all components has the same bitdepth */
|
|
S = (1 << out_yuv_info->finfo->depth[0]) - 1;
|
|
Sy = (gdouble) scale[0] / S;
|
|
Suv = (gdouble) scale[1] / S;
|
|
Oy = (gdouble) offset[0] / S;
|
|
Ouv = (gdouble) offset[1] / S;
|
|
|
|
for (guint i = 0; i < 3; i++) {
|
|
matrix->matrix[0][i] = Sy * vecY[i];
|
|
matrix->matrix[1][i] = Suv * vecU[i];
|
|
matrix->matrix[2][i] = Suv * vecV[i];
|
|
}
|
|
|
|
matrix->offset[0] = Oy;
|
|
matrix->offset[1] = Ouv;
|
|
matrix->offset[2] = Ouv;
|
|
|
|
matrix->min[0] = Oy;
|
|
matrix->min[1] = Oy;
|
|
matrix->min[2] = Oy;
|
|
|
|
matrix->max[0] = ((gdouble) scale[0] + offset[0]) / S;
|
|
matrix->max[1] = ((gdouble) scale[1] + offset[0]) / S;
|
|
matrix->max[2] = ((gdouble) scale[1] + offset[0]) / S;
|
|
|
|
/* Apply RGB range scale matrix */
|
|
if (in_rgb_info->colorimetry.range == GST_VIDEO_COLOR_RANGE_16_235) {
|
|
GstCudaColorMatrix scale_matrix, rst;
|
|
GstVideoInfo full_rgb = *in_rgb_info;
|
|
|
|
full_rgb.colorimetry.range = GST_VIDEO_COLOR_RANGE_0_255;
|
|
|
|
if (gst_cuda_color_range_adjust_matrix_unorm (in_rgb_info,
|
|
&full_rgb, &scale_matrix)) {
|
|
/* Matrix * Ms */
|
|
color_matrix_multiply (&rst, matrix, &scale_matrix);
|
|
|
|
/* Matrix * scale offsets */
|
|
for (guint i = 0; i < 3; i++) {
|
|
gdouble val = 0;
|
|
for (guint j = 0; j < 3; j++) {
|
|
val += matrix->matrix[i][j] * scale_matrix.offset[j];
|
|
}
|
|
rst.offset[i] = val + matrix->offset[i];
|
|
}
|
|
|
|
/* copy back to output matrix */
|
|
for (guint i = 0; i < 3; i++) {
|
|
for (guint j = 0; j < 3; j++) {
|
|
matrix->matrix[i][j] = rst.matrix[i][j];
|
|
}
|
|
matrix->offset[i] = rst.offset[i];
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
/* Unknown matrix */
|
|
matrix->matrix[0][0] = 1.0;
|
|
matrix->matrix[1][1] = 1.0;
|
|
matrix->matrix[2][2] = 1.0;
|
|
}
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
struct ColorMatrix
|
|
{
|
|
float coeffX[3];
|
|
float coeffY[3];
|
|
float coeffZ[3];
|
|
float offset[3];
|
|
float min[3];
|
|
float max[3];
|
|
};
|
|
|
|
struct ConstBuffer
|
|
{
|
|
ColorMatrix toRGBCoeff;
|
|
ColorMatrix toYuvCoeff;
|
|
int width;
|
|
int height;
|
|
int left;
|
|
int top;
|
|
int right;
|
|
int bottom;
|
|
int view_width;
|
|
int view_height;
|
|
float border_x;
|
|
float border_y;
|
|
float border_z;
|
|
float border_w;
|
|
int fill_border;
|
|
int video_direction;
|
|
float alpha;
|
|
int do_blend;
|
|
};
|
|
|
|
#define COLOR_SPACE_IDENTITY "color_space_identity"
|
|
#define COLOR_SPACE_CONVERT "color_space_convert"
|
|
|
|
#define SAMPLE_YUV_PLANAR "sample_yuv_planar"
|
|
#define SAMPLE_YV12 "sample_yv12"
|
|
#define SAMPLE_YUV_PLANAR_10BIS "sample_yuv_planar_10bits"
|
|
#define SAMPLE_YUV_PLANAR_12BIS "sample_yuv_planar_12bits"
|
|
#define SAMPLE_SEMI_PLANAR "sample_semi_planar"
|
|
#define SAMPLE_SEMI_PLANAR_SWAP "sample_semi_planar_swap"
|
|
#define SAMPLE_RGBA "sample_rgba"
|
|
#define SAMPLE_BGRA "sample_bgra"
|
|
#define SAMPLE_RGBx "sample_rgbx"
|
|
#define SAMPLE_BGRx "sample_bgrx"
|
|
#define SAMPLE_ARGB "sample_argb"
|
|
/* same as ARGB */
|
|
#define SAMPLE_ARGB64 "sample_argb"
|
|
#define SAMPLE_AGBR "sample_abgr"
|
|
#define SAMPLE_RGBP "sample_rgbp"
|
|
#define SAMPLE_BGRP "sample_bgrp"
|
|
#define SAMPLE_GBR "sample_gbr"
|
|
#define SAMPLE_GBR_10 "sample_gbr_10"
|
|
#define SAMPLE_GBR_12 "sample_gbr_12"
|
|
#define SAMPLE_GBRA "sample_gbra"
|
|
#define SAMPLE_VUYA "sample_vuya"
|
|
|
|
#define WRITE_I420 "write_i420"
|
|
#define BLEND_I420 "blend_i420"
|
|
#define WRITE_YV12 "write_yv12"
|
|
#define BLEND_YV12 "blend_yv12"
|
|
#define WRITE_NV12 "write_nv12"
|
|
#define BLEND_NV12 "blend_nv12"
|
|
#define WRITE_NV21 "write_nv21"
|
|
#define BLEND_NV21 "blend_nv21"
|
|
#define WRITE_P010 "write_p010"
|
|
#define BLEND_P010 "blend_p010"
|
|
#define WRITE_I420_10 "write_i420_10"
|
|
#define BLEND_I420_10 "blend_i420_10"
|
|
#define WRITE_I420_12 "write_i420_12"
|
|
#define BLEND_I420_12 "blend_i420_12"
|
|
#define WRITE_Y444 "write_y444"
|
|
#define BLEND_Y444 "blend_y444"
|
|
#define WRITE_Y444_10 "write_y444_10"
|
|
#define BLEND_Y444_10 "blend_y444_10"
|
|
#define WRITE_Y444_12 "write_y444_12"
|
|
#define BLEND_Y444_12 "blend_y444_12"
|
|
#define WRITE_Y444_16 "write_y444_16"
|
|
#define BLEND_Y444_16 "blend_y444_16"
|
|
#define WRITE_RGBA "write_rgba"
|
|
#define BLEND_RGBA "blend_rgba"
|
|
#define WRITE_RGBx "write_rgbx"
|
|
#define BLEND_RGBx "blend_rgbx"
|
|
#define WRITE_BGRA "write_bgra"
|
|
#define BLEND_BGRA "blend_bgra"
|
|
#define WRITE_BGRx "write_bgrx"
|
|
#define BLEND_BGRx "blend_bgrx"
|
|
#define WRITE_ARGB "write_argb"
|
|
#define BLEND_ARGB "blend_argb"
|
|
#define WRITE_ABGR "write_abgr"
|
|
#define BLEND_ABGR "blend_abgr"
|
|
#define WRITE_RGB "write_rgb"
|
|
#define BLEND_RGB "blend_rgb"
|
|
#define WRITE_BGR "write_bgr"
|
|
#define BLEND_BGR "blend_bgr"
|
|
#define WRITE_RGB10A2 "write_rgb10a2"
|
|
#define BLEND_RGB10A2 "blend_rgb10a2"
|
|
#define WRITE_BGR10A2 "write_bgr10a2"
|
|
#define BLEND_BGR10A2 "blend_bgr10a2"
|
|
#define WRITE_Y42B "write_y42b"
|
|
#define BLEND_Y42B "blend_y42b"
|
|
#define WRITE_I422_10 "write_i422_10"
|
|
#define BLEND_I422_10 "blend_i422_10"
|
|
#define WRITE_I422_12 "write_i422_12"
|
|
#define BLEND_I422_12 "blend_i422_12"
|
|
#define WRITE_RGBP "write_rgbp"
|
|
#define BLEND_RGBP "blend_rgbp"
|
|
#define WRITE_BGRP "write_bgrp"
|
|
#define BLEND_BGRP "blend_bgrp"
|
|
#define WRITE_GBR "write_gbr"
|
|
#define BLEND_GBR "blend_gbr"
|
|
#define WRITE_GBR_10 "write_gbr_10"
|
|
#define BLEND_GBR_10 "blend_gbr_10"
|
|
#define WRITE_GBR_12 "write_gbr_12"
|
|
#define BLEND_GBR_12 "blend_gbr_12"
|
|
#define WRITE_GBR_16 "write_gbr_16"
|
|
#define BLEND_GBR_16 "blend_gbr_16"
|
|
#define WRITE_GBRA "write_gbra"
|
|
#define BLEND_GBRA "blend_gbra"
|
|
#define WRITE_VUYA "write_vuya"
|
|
#define BLEND_VUYA "blend_vuya"
|
|
|
|
/* *INDENT-OFF* */
|
|
const static gchar KERNEL_COMMON[] =
|
|
"struct ColorMatrix\n"
|
|
"{\n"
|
|
" float CoeffX[3];\n"
|
|
" float CoeffY[3];\n"
|
|
" float CoeffZ[3];\n"
|
|
" float Offset[3];\n"
|
|
" float Min[3];\n"
|
|
" float Max[3];\n"
|
|
"};\n"
|
|
"\n"
|
|
"__device__ inline float\n"
|
|
"dot (const float coeff[3], float3 val)\n"
|
|
"{\n"
|
|
" return coeff[0] * val.x + coeff[1] * val.y + coeff[2] * val.z;\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline float\n"
|
|
"clamp (float val, float min_val, float max_val)\n"
|
|
"{\n"
|
|
" return max (min_val, min (val, max_val));\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline float3\n"
|
|
"clamp3 (float3 val, const float min_val[3], const float max_val[3])\n"
|
|
"{\n"
|
|
" return make_float3 (clamp (val.x, min_val[0], max_val[0]),\n"
|
|
" clamp (val.y, min_val[1], max_val[2]),\n"
|
|
" clamp (val.z, min_val[1], max_val[2]));\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline unsigned char\n"
|
|
"scale_to_2bits (float val)\n"
|
|
"{\n"
|
|
" return (unsigned short) __float2int_rz (val * 3.0);\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline unsigned char\n"
|
|
"scale_to_uchar (float val)\n"
|
|
"{\n"
|
|
" return (unsigned char) __float2int_rz (val * 255.0);\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline unsigned short\n"
|
|
"scale_to_ushort (float val)\n"
|
|
"{\n"
|
|
" return (unsigned short) __float2int_rz (val * 65535.0);\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline unsigned short\n"
|
|
"scale_to_10bits (float val)\n"
|
|
"{\n"
|
|
" return (unsigned short) __float2int_rz (val * 1023.0);\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline unsigned short\n"
|
|
"scale_to_12bits (float val)\n"
|
|
"{\n"
|
|
" return (unsigned short) __float2int_rz (val * 4095.0);\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline unsigned char\n"
|
|
"blend_uchar (unsigned char dst, float src, float src_alpha)\n"
|
|
"{\n"
|
|
" // DstColor' = SrcA * SrcColor + (1 - SrcA) DstColor\n"
|
|
" float src_val = src * src_alpha;\n"
|
|
" float dst_val = __int2float_rz (dst) / 255.0 * (1.0 - src_alpha);\n"
|
|
" return scale_to_uchar(clamp(src_val + dst_val, 0, 1.0));\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline unsigned short\n"
|
|
"blend_ushort (unsigned short dst, float src, float src_alpha)\n"
|
|
"{\n"
|
|
" // DstColor' = SrcA * SrcColor + (1 - SrcA) DstColor\n"
|
|
" float src_val = src * src_alpha;\n"
|
|
" float dst_val = __int2float_rz (dst) / 65535.0 * (1.0 - src_alpha);\n"
|
|
" return scale_to_ushort(clamp(src_val + dst_val, 0, 1.0));\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline unsigned short\n"
|
|
"blend_10bits (unsigned short dst, float src, float src_alpha)\n"
|
|
"{\n"
|
|
" // DstColor' = SrcA * SrcColor + (1 - SrcA) DstColor\n"
|
|
" float src_val = src * src_alpha;\n"
|
|
" float dst_val = __int2float_rz (dst) / 1023.0 * (1.0 - src_alpha);\n"
|
|
" return scale_to_10bits(clamp(src_val + dst_val, 0, 1.0));\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline unsigned short\n"
|
|
"blend_12bits (unsigned short dst, float src, float src_alpha)\n"
|
|
"{\n"
|
|
" // DstColor' = SrcA * SrcColor + (1 - SrcA) DstColor\n"
|
|
" float src_val = src * src_alpha;\n"
|
|
" float dst_val = __int2float_rz (dst) / 4095.0 * (1.0 - src_alpha);\n"
|
|
" return scale_to_12bits(clamp(src_val + dst_val, 0, 1.0));\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline float3\n"
|
|
COLOR_SPACE_IDENTITY "(float3 sample, const ColorMatrix * matrix)\n"
|
|
"{\n"
|
|
" return sample;\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline float3\n"
|
|
COLOR_SPACE_CONVERT "(float3 sample, const ColorMatrix * matrix)\n"
|
|
"{\n"
|
|
" float3 out;\n"
|
|
" out.x = dot (matrix->CoeffX, sample);\n"
|
|
" out.y = dot (matrix->CoeffY, sample);\n"
|
|
" out.z = dot (matrix->CoeffZ, sample);\n"
|
|
" out.x += matrix->Offset[0];\n"
|
|
" out.y += matrix->Offset[1];\n"
|
|
" out.z += matrix->Offset[2];\n"
|
|
" return clamp3 (out, matrix->Min, matrix->Max);\n"
|
|
"}\n"
|
|
"/* All 8bits yuv planar except for yv12 */\n"
|
|
"__device__ inline float4\n"
|
|
SAMPLE_YUV_PLANAR "(cudaTextureObject_t tex0, cudaTextureObject_t tex1,\n"
|
|
" cudaTextureObject_t tex2, cudaTextureObject_t tex3, float x, float y)\n"
|
|
"{\n"
|
|
" float luma = tex2D<float>(tex0, x, y);\n"
|
|
" float u = tex2D<float>(tex1, x, y);\n"
|
|
" float v = tex2D<float>(tex2, x, y);\n"
|
|
" return make_float4 (luma, u, v, 1);\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline float4\n"
|
|
SAMPLE_YV12 "(cudaTextureObject_t tex0, cudaTextureObject_t tex1,\n"
|
|
" cudaTextureObject_t tex2, cudaTextureObject_t tex3, float x, float y)\n"
|
|
"{\n"
|
|
" float luma = tex2D<float>(tex0, x, y);\n"
|
|
" float u = tex2D<float>(tex2, x, y);\n"
|
|
" float v = tex2D<float>(tex1, x, y);\n"
|
|
" return make_float4 (luma, u, v, 1);\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline float4\n"
|
|
SAMPLE_YUV_PLANAR_10BIS "(cudaTextureObject_t tex0, cudaTextureObject_t tex1,\n"
|
|
" cudaTextureObject_t tex2, cudaTextureObject_t tex3, float x, float y)\n"
|
|
"{\n"
|
|
" float luma = tex2D<float>(tex0, x, y);\n"
|
|
" float u = tex2D<float>(tex1, x, y);\n"
|
|
" float v = tex2D<float>(tex2, x, y);\n"
|
|
" /* (1 << 6) to scale [0, 1.0) range */\n"
|
|
" return make_float4 (luma * 64, u * 64, v * 64, 1);\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline float4\n"
|
|
SAMPLE_YUV_PLANAR_12BIS "(cudaTextureObject_t tex0, cudaTextureObject_t tex1,\n"
|
|
" cudaTextureObject_t tex2, cudaTextureObject_t tex3, float x, float y)\n"
|
|
"{\n"
|
|
" float luma = tex2D<float>(tex0, x, y);\n"
|
|
" float u = tex2D<float>(tex1, x, y);\n"
|
|
" float v = tex2D<float>(tex2, x, y);\n"
|
|
" /* (1 << 4) to scale [0, 1.0) range */\n"
|
|
" return make_float4 (luma * 16, u * 16, v * 16, 1);\n"
|
|
"}\n"
|
|
"\n"
|
|
"/* NV12, P010, and P016 */\n"
|
|
"__device__ inline float4\n"
|
|
SAMPLE_SEMI_PLANAR "(cudaTextureObject_t tex0, cudaTextureObject_t tex1,\n"
|
|
" cudaTextureObject_t tex2, cudaTextureObject_t tex3, float x, float y)\n"
|
|
"{\n"
|
|
" float luma = tex2D<float>(tex0, x, y);\n"
|
|
" float2 uv = tex2D<float2>(tex1, x, y);\n"
|
|
" return make_float4 (luma, uv.x, uv.y, 1);\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline float4\n"
|
|
SAMPLE_SEMI_PLANAR_SWAP "(cudaTextureObject_t tex0, cudaTextureObject_t tex1,\n"
|
|
" cudaTextureObject_t tex2, cudaTextureObject_t tex3, float x, float y)\n"
|
|
"{\n"
|
|
" float luma = tex2D<float>(tex0, x, y);\n"
|
|
" float2 vu = tex2D<float2>(tex1, x, y);\n"
|
|
" return make_float4 (luma, vu.y, vu.x, 1);\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline float4\n"
|
|
SAMPLE_RGBA "(cudaTextureObject_t tex0, cudaTextureObject_t tex1,\n"
|
|
" cudaTextureObject_t tex2, cudaTextureObject_t tex3, float x, float y)\n"
|
|
"{\n"
|
|
" return tex2D<float4>(tex0, x, y);\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline float4\n"
|
|
SAMPLE_BGRA "(cudaTextureObject_t tex0, cudaTextureObject_t tex1,\n"
|
|
" cudaTextureObject_t tex2, cudaTextureObject_t tex3, float x, float y)\n"
|
|
"{\n"
|
|
" float4 bgra = tex2D<float4>(tex0, x, y);\n"
|
|
" return make_float4 (bgra.z, bgra.y, bgra.x, bgra.w);\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline float4\n"
|
|
SAMPLE_RGBx "(cudaTextureObject_t tex0, cudaTextureObject_t tex1,\n"
|
|
" cudaTextureObject_t tex2, cudaTextureObject_t tex3, float x, float y)\n"
|
|
"{\n"
|
|
" float4 rgbx = tex2D<float4>(tex0, x, y);\n"
|
|
" rgbx.w = 1;\n"
|
|
" return rgbx;\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline float4\n"
|
|
SAMPLE_BGRx "(cudaTextureObject_t tex0, cudaTextureObject_t tex1,\n"
|
|
" cudaTextureObject_t tex2, cudaTextureObject_t tex3, float x, float y)\n"
|
|
"{\n"
|
|
" float4 bgrx = tex2D<float4>(tex0, x, y);\n"
|
|
" return make_float4 (bgrx.z, bgrx.y, bgrx.x, 1);\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline float4\n"
|
|
SAMPLE_ARGB "(cudaTextureObject_t tex0, cudaTextureObject_t tex1,\n"
|
|
" cudaTextureObject_t tex2, cudaTextureObject_t tex3, float x, float y)\n"
|
|
"{\n"
|
|
" float4 argb = tex2D<float4>(tex0, x, y);\n"
|
|
" return make_float4 (argb.y, argb.z, argb.w, argb.x);\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline float4\n"
|
|
SAMPLE_AGBR "(cudaTextureObject_t tex0, cudaTextureObject_t tex1,\n"
|
|
" cudaTextureObject_t tex2, cudaTextureObject_t tex3, float x, float y)\n"
|
|
"{\n"
|
|
" float4 abgr = tex2D<float4>(tex0, x, y);\n"
|
|
" return make_float4 (abgr.w, abgr.z, abgr.y, abgr.x);\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline float4\n"
|
|
SAMPLE_RGBP "(cudaTextureObject_t tex0, cudaTextureObject_t tex1,\n"
|
|
" cudaTextureObject_t tex2, cudaTextureObject_t tex3, float x, float y)\n"
|
|
"{\n"
|
|
" float r = tex2D<float>(tex0, x, y);\n"
|
|
" float g = tex2D<float>(tex1, x, y);\n"
|
|
" float b = tex2D<float>(tex2, x, y);\n"
|
|
" return make_float4 (r, g, b, 1);\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline float4\n"
|
|
SAMPLE_BGRP "(cudaTextureObject_t tex0, cudaTextureObject_t tex1,\n"
|
|
" cudaTextureObject_t tex2, cudaTextureObject_t tex3, float x, float y)\n"
|
|
"{\n"
|
|
" float b = tex2D<float>(tex0, x, y);\n"
|
|
" float g = tex2D<float>(tex1, x, y);\n"
|
|
" float r = tex2D<float>(tex2, x, y);\n"
|
|
" return make_float4 (r, g, b, 1);\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline float4\n"
|
|
SAMPLE_GBR "(cudaTextureObject_t tex0, cudaTextureObject_t tex1,\n"
|
|
" cudaTextureObject_t tex2, cudaTextureObject_t tex3, float x, float y)\n"
|
|
"{\n"
|
|
" float g = tex2D<float>(tex0, x, y);\n"
|
|
" float b = tex2D<float>(tex1, x, y);\n"
|
|
" float r = tex2D<float>(tex2, x, y);\n"
|
|
" return make_float4 (r, g, b, 1);\n"
|
|
"}\n"
|
|
"__device__ inline float4\n"
|
|
SAMPLE_GBR_10 "(cudaTextureObject_t tex0, cudaTextureObject_t tex1,\n"
|
|
" cudaTextureObject_t tex2, cudaTextureObject_t tex3, float x, float y)\n"
|
|
"{\n"
|
|
" float g = tex2D<float>(tex0, x, y);\n"
|
|
" float b = tex2D<float>(tex1, x, y);\n"
|
|
" float r = tex2D<float>(tex2, x, y);\n"
|
|
" /* (1 << 6) to scale [0, 1.0) range */\n"
|
|
" return make_float4 (r * 64, g * 64, b * 64, 1);\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline float4\n"
|
|
SAMPLE_GBR_12 "(cudaTextureObject_t tex0, cudaTextureObject_t tex1,\n"
|
|
" cudaTextureObject_t tex2, cudaTextureObject_t tex3, float x, float y)\n"
|
|
"{\n"
|
|
" float g = tex2D<float>(tex0, x, y);\n"
|
|
" float b = tex2D<float>(tex1, x, y);\n"
|
|
" float r = tex2D<float>(tex2, x, y);\n"
|
|
" /* (1 << 4) to scale [0, 1.0) range */\n"
|
|
" return make_float4 (r * 16, g * 16, b * 16, 1);\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline float4\n"
|
|
SAMPLE_GBRA "(cudaTextureObject_t tex0, cudaTextureObject_t tex1,\n"
|
|
" cudaTextureObject_t tex2, cudaTextureObject_t tex3, float x, float y)\n"
|
|
"{\n"
|
|
" float g = tex2D<float>(tex0, x, y);\n"
|
|
" float b = tex2D<float>(tex1, x, y);\n"
|
|
" float r = tex2D<float>(tex2, x, y);\n"
|
|
" float a = tex2D<float>(tex3, x, y);\n"
|
|
" return make_float4 (r, g, b, a);\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline float4\n"
|
|
SAMPLE_VUYA "(cudaTextureObject_t tex0, cudaTextureObject_t tex1,\n"
|
|
" cudaTextureObject_t tex2, cudaTextureObject_t tex3, float x, float y)\n"
|
|
"{\n"
|
|
" float4 vuya = tex2D<float4>(tex0, x, y);\n"
|
|
" return make_float4 (vuya.z, vuya.y, vuya.x, vuya.w);\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline void\n"
|
|
WRITE_I420 "(unsigned char * dst0, unsigned char * dst1, unsigned char * dst2,\n"
|
|
" unsigned char * dst3, float4 sample, int x, int y, int stride0, int stride1)\n"
|
|
"{\n"
|
|
" dst0[x + y * stride0] = scale_to_uchar (sample.x);\n"
|
|
" if (x % 2 == 0 && y % 2 == 0) {\n"
|
|
" unsigned int pos = x / 2 + (y / 2) * stride1;\n"
|
|
" dst1[pos] = scale_to_uchar (sample.y);\n"
|
|
" dst2[pos] = scale_to_uchar (sample.z);\n"
|
|
" }\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline void\n"
|
|
BLEND_I420 "(unsigned char * dst0, unsigned char * dst1, unsigned char * dst2,\n"
|
|
" unsigned char * dst3, float4 sample, int x, int y, int stride0, int stride1)\n"
|
|
"{\n"
|
|
" unsigned int pos = x + y * stride0;\n"
|
|
" dst0[pos] = blend_uchar (dst0[pos], sample.x, sample.w);\n"
|
|
" if (x % 2 == 0 && y % 2 == 0) {\n"
|
|
" pos = x / 2 + (y / 2) * stride1;\n"
|
|
" dst1[pos] = blend_uchar (dst1[pos], sample.y, sample.w);\n"
|
|
" dst2[pos] = blend_uchar (dst2[pos], sample.z, sample.w);\n"
|
|
" }\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline void\n"
|
|
WRITE_YV12 "(unsigned char * dst0, unsigned char * dst1, unsigned char * dst2,\n"
|
|
" unsigned char * dst3, float4 sample, int x, int y, int stride0, int stride1)\n"
|
|
"{\n"
|
|
" dst0[x + y * stride0] = scale_to_uchar (sample.x);\n"
|
|
" if (x % 2 == 0 && y % 2 == 0) {\n"
|
|
" unsigned int pos = x / 2 + (y / 2) * stride1;\n"
|
|
" dst1[pos] = scale_to_uchar (sample.z);\n"
|
|
" dst2[pos] = scale_to_uchar (sample.y);\n"
|
|
" }\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline void\n"
|
|
BLEND_YV12 "(unsigned char * dst0, unsigned char * dst1, unsigned char * dst2,\n"
|
|
" unsigned char * dst3, float4 sample, int x, int y, int stride0, int stride1)\n"
|
|
"{\n"
|
|
" unsigned int pos = x + y * stride0;\n"
|
|
" dst0[pos] = blend_uchar (dst0[pos], sample.x, sample.w);\n"
|
|
" if (x % 2 == 0 && y % 2 == 0) {\n"
|
|
" pos = x / 2 + (y / 2) * stride1;\n"
|
|
" dst1[pos] = blend_uchar (dst1[pos], sample.z, sample.w);\n"
|
|
" dst2[pos] = blend_uchar (dst2[pos], sample.y, sample.w);\n"
|
|
" }\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline void\n"
|
|
WRITE_NV12 "(unsigned char * dst0, unsigned char * dst1, unsigned char * dst2,\n"
|
|
" unsigned char * dst3, float4 sample, int x, int y, int stride0, int stride1)\n"
|
|
"{\n"
|
|
" dst0[x + y * stride0] = scale_to_uchar (sample.x);\n"
|
|
" if (x % 2 == 0 && y % 2 == 0) {\n"
|
|
" unsigned int pos = x + (y / 2) * stride1;\n"
|
|
" dst1[pos] = scale_to_uchar (sample.y);\n"
|
|
" dst1[pos + 1] = scale_to_uchar (sample.z);\n"
|
|
" }\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline void\n"
|
|
BLEND_NV12 "(unsigned char * dst0, unsigned char * dst1, unsigned char * dst2,\n"
|
|
" unsigned char * dst3, float4 sample, int x, int y, int stride0, int stride1)\n"
|
|
"{\n"
|
|
" unsigned int pos = x + y * stride0;\n"
|
|
" dst0[pos] = blend_uchar (dst0[pos], sample.x, sample.w);\n"
|
|
" if (x % 2 == 0 && y % 2 == 0) {\n"
|
|
" pos = x + (y / 2) * stride1;\n"
|
|
" dst1[pos] = blend_uchar (dst1[pos], sample.y, sample.w);\n"
|
|
" dst1[pos + 1] = blend_uchar (dst1[pos + 1], sample.z, sample.w);\n"
|
|
" }\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline void\n"
|
|
WRITE_NV21 "(unsigned char * dst0, unsigned char * dst1, unsigned char * dst2,\n"
|
|
" unsigned char * dst3, float4 sample, int x, int y, int stride0, int stride1)\n"
|
|
"{\n"
|
|
" dst0[x + y * stride0] = scale_to_uchar (sample.x);\n"
|
|
" if (x % 2 == 0 && y % 2 == 0) {\n"
|
|
" unsigned int pos = x + (y / 2) * stride1;\n"
|
|
" dst1[pos] = scale_to_uchar (sample.z);\n"
|
|
" dst1[pos + 1] = scale_to_uchar (sample.y);\n"
|
|
" }\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline void\n"
|
|
BLEND_NV21 "(unsigned char * dst0, unsigned char * dst1, unsigned char * dst2,\n"
|
|
" unsigned char * dst3, float4 sample, int x, int y, int stride0, int stride1)\n"
|
|
"{\n"
|
|
" unsigned int pos = x + y * stride0;\n"
|
|
" dst0[pos] = blend_uchar (dst0[pos], sample.x, sample.w);\n"
|
|
" if (x % 2 == 0 && y % 2 == 0) {\n"
|
|
" pos = x + (y / 2) * stride1;\n"
|
|
" dst1[pos] = blend_uchar (dst1[pos], sample.z, sample.w);\n"
|
|
" dst1[pos + 1] = blend_uchar (dst1[pos + 1], sample.y, sample.w);\n"
|
|
" }\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline void\n"
|
|
WRITE_P010 "(unsigned char * dst0, unsigned char * dst1, unsigned char * dst2,\n"
|
|
" unsigned char * dst3, float4 sample, int x, int y, int stride0, int stride1)\n"
|
|
"{\n"
|
|
" *(unsigned short *) &dst0[x * 2 + y * stride0] = scale_to_ushort (sample.x);\n"
|
|
" if (x % 2 == 0 && y % 2 == 0) {\n"
|
|
" unsigned int pos = x * 2 + (y / 2) * stride1;\n"
|
|
" *(unsigned short *) &dst1[pos] = scale_to_ushort (sample.y);\n"
|
|
" *(unsigned short *) &dst1[pos + 2] = scale_to_ushort (sample.z);\n"
|
|
" }\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline void\n"
|
|
BLEND_P010 "(unsigned char * dst0, unsigned char * dst1, unsigned char * dst2,\n"
|
|
" unsigned char * dst3, float4 sample, int x, int y, int stride0, int stride1)\n"
|
|
"{\n"
|
|
" unsigned int pos = x * 2 + y * stride0;\n"
|
|
" unsigned short * target = (unsigned short *) &dst0[pos];\n"
|
|
" *target = blend_ushort (*target, sample.x, sample.w);\n"
|
|
" if (x % 2 == 0 && y % 2 == 0) {\n"
|
|
" pos = x * 2 + (y / 2) * stride1;\n"
|
|
" target = (unsigned short *) &dst1[pos];\n"
|
|
" *target = blend_ushort (*target, sample.y, sample.w);\n"
|
|
" target = (unsigned short *) &dst1[pos + 2];\n"
|
|
" *target = blend_ushort (*target, sample.z, sample.w);\n"
|
|
" }\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline void\n"
|
|
WRITE_I420_10 "(unsigned char * dst0, unsigned char * dst1, unsigned char * dst2,\n"
|
|
" unsigned char * dst3, float4 sample, int x, int y, int stride0, int stride1)\n"
|
|
"{\n"
|
|
" *(unsigned short *) &dst0[x * 2 + y * stride0] = scale_to_10bits (sample.x);\n"
|
|
" if (x % 2 == 0 && y % 2 == 0) {\n"
|
|
" unsigned int pos = x + (y / 2) * stride1;\n"
|
|
" *(unsigned short *) &dst1[pos] = scale_to_10bits (sample.y);\n"
|
|
" *(unsigned short *) &dst2[pos] = scale_to_10bits (sample.z);\n"
|
|
" }\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline void\n"
|
|
BLEND_I420_10 "(unsigned char * dst0, unsigned char * dst1, unsigned char * dst2,\n"
|
|
" unsigned char * dst3, float4 sample, int x, int y, int stride0, int stride1)\n"
|
|
"{\n"
|
|
" unsigned int pos = x * 2 + y * stride0;\n"
|
|
" unsigned short * target = (unsigned short *) &dst0[pos];\n"
|
|
" *target = blend_10bits (*target, sample.x, sample.w);\n"
|
|
" if (x % 2 == 0 && y % 2 == 0) {\n"
|
|
" pos = x * 2 + (y / 2) * stride1;\n"
|
|
" target = (unsigned short *) &dst1[pos];\n"
|
|
" *target = blend_10bits (*target, sample.y, sample.w);\n"
|
|
" target = (unsigned short *) &dst2[pos];\n"
|
|
" *target = blend_10bits (*target, sample.z, sample.w);\n"
|
|
" }\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline void\n"
|
|
WRITE_I420_12 "(unsigned char * dst0, unsigned char * dst1, unsigned char * dst2,\n"
|
|
" unsigned char * dst3, float4 sample, int x, int y, int stride0, int stride1)\n"
|
|
"{\n"
|
|
" *(unsigned short *) &dst0[x * 2 + y * stride0] = scale_to_12bits (sample.x);\n"
|
|
" if (x % 2 == 0 && y % 2 == 0) {\n"
|
|
" unsigned int pos = x + (y / 2) * stride1;\n"
|
|
" *(unsigned short *) &dst1[pos] = scale_to_12bits (sample.y);\n"
|
|
" *(unsigned short *) &dst2[pos] = scale_to_12bits (sample.z);\n"
|
|
" }\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline void\n"
|
|
BLEND_I420_12 "(unsigned char * dst0, unsigned char * dst1, unsigned char * dst2,\n"
|
|
" unsigned char * dst3, float4 sample, int x, int y, int stride0, int stride1)\n"
|
|
"{\n"
|
|
" unsigned int pos = x * 2 + y * stride0;\n"
|
|
" unsigned short * target = (unsigned short *) &dst0[pos];\n"
|
|
" *target = blend_12bits (*target, sample.x, sample.w);\n"
|
|
" if (x % 2 == 0 && y % 2 == 0) {\n"
|
|
" pos = x * 2 + (y / 2) * stride1;\n"
|
|
" target = (unsigned short *) &dst1[pos];\n"
|
|
" *target = blend_12bits (*target, sample.y, sample.w);\n"
|
|
" target = (unsigned short *) &dst2[pos];\n"
|
|
" *target = blend_12bits (*target, sample.z, sample.w);\n"
|
|
" }\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline void\n"
|
|
WRITE_Y444 "(unsigned char * dst0, unsigned char * dst1, unsigned char * dst2,\n"
|
|
" unsigned char * dst3, float4 sample, int x, int y, int stride0, int stride1)\n"
|
|
"{\n"
|
|
" int pos = x + y * stride0;\n"
|
|
" dst0[pos] = scale_to_uchar (sample.x);\n"
|
|
" dst1[pos] = scale_to_uchar (sample.y);\n"
|
|
" dst2[pos] = scale_to_uchar (sample.z);\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline void\n"
|
|
BLEND_Y444 "(unsigned char * dst0, unsigned char * dst1, unsigned char * dst2,\n"
|
|
" unsigned char * dst3, float4 sample, int x, int y, int stride0, int stride1)\n"
|
|
"{\n"
|
|
" int pos = x + y * stride0;\n"
|
|
" dst0[pos] = blend_uchar (dst0[pos], sample.x, sample.w);\n"
|
|
" dst1[pos] = blend_uchar (dst1[pos], sample.y, sample.w);\n"
|
|
" dst2[pos] = blend_uchar (dst2[pos], sample.z, sample.w);\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline void\n"
|
|
WRITE_Y444_10 "(unsigned char * dst0, unsigned char * dst1, unsigned char * dst2,\n"
|
|
" unsigned char * dst3, float4 sample, int x, int y, int stride0, int stride1)\n"
|
|
"{\n"
|
|
" int pos = x * 2 + y * stride0;\n"
|
|
" *(unsigned short *) &dst0[pos] = scale_to_10bits (sample.x);\n"
|
|
" *(unsigned short *) &dst1[pos] = scale_to_10bits (sample.y);\n"
|
|
" *(unsigned short *) &dst2[pos] = scale_to_10bits (sample.z);\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline void\n"
|
|
BLEND_Y444_10 "(unsigned char * dst0, unsigned char * dst1, unsigned char * dst2,\n"
|
|
" unsigned char * dst3, float4 sample, int x, int y, int stride0, int stride1)\n"
|
|
"{\n"
|
|
" int pos = x * 2 + y * stride0;\n"
|
|
" unsigned short * target = (unsigned short *) &dst0[pos];\n"
|
|
" *target = blend_10bits (*target, sample.x, sample.w);\n"
|
|
" target = (unsigned short *) &dst1[pos];\n"
|
|
" *target = blend_10bits (*target, sample.y, sample.w);\n"
|
|
" target = (unsigned short *) &dst2[pos];\n"
|
|
" *target = blend_10bits (*target, sample.z, sample.w);\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline void\n"
|
|
WRITE_Y444_12 "(unsigned char * dst0, unsigned char * dst1, unsigned char * dst2,\n"
|
|
" unsigned char * dst3, float4 sample, int x, int y, int stride0, int stride1)\n"
|
|
"{\n"
|
|
" int pos = x * 2 + y * stride0;\n"
|
|
" *(unsigned short *) &dst0[pos] = scale_to_12bits (sample.x);\n"
|
|
" *(unsigned short *) &dst1[pos] = scale_to_12bits (sample.y);\n"
|
|
" *(unsigned short *) &dst2[pos] = scale_to_12bits (sample.z);\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline void\n"
|
|
BLEND_Y444_12 "(unsigned char * dst0, unsigned char * dst1, unsigned char * dst2,\n"
|
|
" unsigned char * dst3, float4 sample, int x, int y, int stride0, int stride1)\n"
|
|
"{\n"
|
|
" int pos = x * 2 + y * stride0;\n"
|
|
" unsigned short * target = (unsigned short *) &dst0[pos];\n"
|
|
" *target = blend_12bits (*target, sample.x, sample.w);\n"
|
|
" target = (unsigned short *) &dst1[pos];\n"
|
|
" *target = blend_12bits (*target, sample.y, sample.w);\n"
|
|
" target = (unsigned short *) &dst2[pos];\n"
|
|
" *target = blend_12bits (*target, sample.z, sample.w);\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline void\n"
|
|
WRITE_Y444_16 "(unsigned char * dst0, unsigned char * dst1, unsigned char * dst2,\n"
|
|
" unsigned char * dst3, float4 sample, int x, int y, int stride0, int stride1)\n"
|
|
"{\n"
|
|
" int pos = x * 2 + y * stride0;\n"
|
|
" *(unsigned short *) &dst0[pos] = scale_to_ushort (sample.x);\n"
|
|
" *(unsigned short *) &dst1[pos] = scale_to_ushort (sample.y);\n"
|
|
" *(unsigned short *) &dst2[pos] = scale_to_ushort (sample.z);\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline void\n"
|
|
BLEND_Y444_16 "(unsigned char * dst0, unsigned char * dst1, unsigned char * dst2,\n"
|
|
" unsigned char * dst3, float4 sample, int x, int y, int stride0, int stride1)\n"
|
|
"{\n"
|
|
" int pos = x * 2 + y * stride0;\n"
|
|
" unsigned short * target = (unsigned short *) &dst0[pos];\n"
|
|
" *target = blend_ushort (*target, sample.x, sample.w);\n"
|
|
" target = (unsigned short *) &dst1[pos];\n"
|
|
" *target = blend_ushort (*target, sample.y, sample.w);\n"
|
|
" target = (unsigned short *) &dst2[pos];\n"
|
|
" *target = blend_ushort (*target, sample.z, sample.w);\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline void\n"
|
|
WRITE_RGBA "(unsigned char * dst0, unsigned char * dst1, unsigned char * dst2,\n"
|
|
" unsigned char * dst3, float4 sample, int x, int y, int stride0, int stride1)\n"
|
|
"{\n"
|
|
" int pos = x * 4 + y * stride0;\n"
|
|
" dst0[pos] = scale_to_uchar (sample.x);\n"
|
|
" dst0[pos + 1] = scale_to_uchar (sample.y);\n"
|
|
" dst0[pos + 2] = scale_to_uchar (sample.z);\n"
|
|
" dst0[pos + 3] = scale_to_uchar (sample.w);\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline void\n"
|
|
BLEND_RGBA "(unsigned char * dst0, unsigned char * dst1, unsigned char * dst2,\n"
|
|
" unsigned char * dst3, float4 sample, int x, int y, int stride0, int stride1)\n"
|
|
"{\n"
|
|
" int pos = x * 4 + y * stride0;\n"
|
|
" dst0[pos] = blend_uchar (dst0[pos], sample.x, sample.w);\n"
|
|
" dst0[pos + 1] = blend_uchar (dst0[pos + 1], sample.y, sample.w);\n"
|
|
" dst0[pos + 2] = blend_uchar (dst0[pos + 2], sample.z, sample.w);\n"
|
|
" dst0[pos + 3] = blend_uchar (dst0[pos + 3], 1.0, sample.w);\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline void\n"
|
|
WRITE_RGBx "(unsigned char * dst0, unsigned char * dst1, unsigned char * dst2,\n"
|
|
" unsigned char * dst3, float4 sample, int x, int y, int stride0, int stride1)\n"
|
|
"{\n"
|
|
" int pos = x * 4 + y * stride0;\n"
|
|
" dst0[pos] = scale_to_uchar (sample.x);\n"
|
|
" dst0[pos + 1] = scale_to_uchar (sample.y);\n"
|
|
" dst0[pos + 2] = scale_to_uchar (sample.z);\n"
|
|
" dst0[pos + 3] = 255;\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline void\n"
|
|
BLEND_RGBx "(unsigned char * dst0, unsigned char * dst1, unsigned char * dst2,\n"
|
|
" unsigned char * dst3, float4 sample, int x, int y, int stride0, int stride1)\n"
|
|
"{\n"
|
|
" int pos = x * 4 + y * stride0;\n"
|
|
" dst0[pos] = blend_uchar (dst0[pos], sample.x, sample.w);\n"
|
|
" dst0[pos + 1] = blend_uchar (dst0[pos + 1], sample.y, sample.w);\n"
|
|
" dst0[pos + 2] = blend_uchar (dst0[pos + 2], sample.z, sample.w);\n"
|
|
" dst0[pos + 3] = 255;\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline void\n"
|
|
WRITE_BGRA "(unsigned char * dst0, unsigned char * dst1, unsigned char * dst2,\n"
|
|
" unsigned char * dst3, float4 sample, int x, int y, int stride0, int stride1)\n"
|
|
"{\n"
|
|
" int pos = x * 4 + y * stride0;\n"
|
|
" dst0[pos] = scale_to_uchar (sample.z);\n"
|
|
" dst0[pos + 1] = scale_to_uchar (sample.y);\n"
|
|
" dst0[pos + 2] = scale_to_uchar (sample.x);\n"
|
|
" dst0[pos + 3] = scale_to_uchar (sample.w);\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline void\n"
|
|
BLEND_BGRA "(unsigned char * dst0, unsigned char * dst1, unsigned char * dst2,\n"
|
|
" unsigned char * dst3, float4 sample, int x, int y, int stride0, int stride1)\n"
|
|
"{\n"
|
|
" int pos = x * 4 + y * stride0;\n"
|
|
" dst0[pos] = blend_uchar (dst0[pos], sample.z, sample.w);\n"
|
|
" dst0[pos + 1] = blend_uchar (dst0[pos + 1], sample.y, sample.w);\n"
|
|
" dst0[pos + 2] = blend_uchar (dst0[pos + 2], sample.x, sample.w);\n"
|
|
" dst0[pos + 3] = blend_uchar (dst0[pos + 3], 1.0, sample.w);\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline void\n"
|
|
WRITE_BGRx "(unsigned char * dst0, unsigned char * dst1, unsigned char * dst2,\n"
|
|
" unsigned char * dst3, float4 sample, int x, int y, int stride0, int stride1)\n"
|
|
"{\n"
|
|
" int pos = x * 4 + y * stride0;\n"
|
|
" dst0[pos] = scale_to_uchar (sample.z);\n"
|
|
" dst0[pos + 1] = scale_to_uchar (sample.y);\n"
|
|
" dst0[pos + 2] = scale_to_uchar (sample.x);\n"
|
|
" dst0[pos + 3] = 255;\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline void\n"
|
|
BLEND_BGRx "(unsigned char * dst0, unsigned char * dst1, unsigned char * dst2,\n"
|
|
" unsigned char * dst3, float4 sample, int x, int y, int stride0, int stride1)\n"
|
|
"{\n"
|
|
" int pos = x * 4 + y * stride0;\n"
|
|
" dst0[pos] = blend_uchar (dst0[pos], sample.z, sample.w);\n"
|
|
" dst0[pos + 1] = blend_uchar (dst0[pos + 1], sample.y, sample.w);\n"
|
|
" dst0[pos + 2] = blend_uchar (dst0[pos + 2], sample.x, sample.w);\n"
|
|
" dst0[pos + 3] = 255;\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline void\n"
|
|
WRITE_ARGB "(unsigned char * dst0, unsigned char * dst1, unsigned char * dst2,\n"
|
|
" unsigned char * dst3, float4 sample, int x, int y, int stride0, int stride1)\n"
|
|
"{\n"
|
|
" int pos = x * 4 + y * stride0;\n"
|
|
" dst0[pos] = scale_to_uchar (sample.w);\n"
|
|
" dst0[pos + 1] = scale_to_uchar (sample.x);\n"
|
|
" dst0[pos + 2] = scale_to_uchar (sample.y);\n"
|
|
" dst0[pos + 3] = scale_to_uchar (sample.z);\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline void\n"
|
|
BLEND_ARGB "(unsigned char * dst0, unsigned char * dst1, unsigned char * dst2,\n"
|
|
" unsigned char * dst3, float4 sample, int x, int y, int stride0, int stride1)\n"
|
|
"{\n"
|
|
" int pos = x * 4 + y * stride0;\n"
|
|
" dst0[pos] = blend_uchar (dst0[pos], 1.0, sample.w);\n"
|
|
" dst0[pos + 1] = blend_uchar (dst0[pos + 1], sample.x, sample.w);\n"
|
|
" dst0[pos + 2] = blend_uchar (dst0[pos + 2], sample.y, sample.w);\n"
|
|
" dst0[pos + 3] = blend_uchar (dst0[pos + 3], sample.z, sample.w);\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline void\n"
|
|
WRITE_ABGR "(unsigned char * dst0, unsigned char * dst1, unsigned char * dst2,\n"
|
|
" unsigned char * dst3, float4 sample, int x, int y, int stride0, int stride1)\n"
|
|
"{\n"
|
|
" int pos = x * 4 + y * stride0;\n"
|
|
" dst0[pos] = scale_to_uchar (sample.w);\n"
|
|
" dst0[pos + 1] = scale_to_uchar (sample.z);\n"
|
|
" dst0[pos + 2] = scale_to_uchar (sample.y);\n"
|
|
" dst0[pos + 3] = scale_to_uchar (sample.x);\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline void\n"
|
|
BLEND_ABGR "(unsigned char * dst0, unsigned char * dst1, unsigned char * dst2,\n"
|
|
" unsigned char * dst3, float4 sample, int x, int y, int stride0, int stride1)\n"
|
|
"{\n"
|
|
" int pos = x * 4 + y * stride0;\n"
|
|
" dst0[pos] = blend_uchar (dst0[pos], 1.0, sample.w);\n"
|
|
" dst0[pos + 1] = blend_uchar (dst0[pos + 1], sample.z, sample.w);\n"
|
|
" dst0[pos + 2] = blend_uchar (dst0[pos + 2], sample.y, sample.w);\n"
|
|
" dst0[pos + 3] = blend_uchar (dst0[pos + 3], sample.x, sample.w);\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline void\n"
|
|
WRITE_RGB "(unsigned char * dst0, unsigned char * dst1, unsigned char * dst2,\n"
|
|
" unsigned char * dst3, float4 sample, int x, int y, int stride0, int stride1)\n"
|
|
"{\n"
|
|
" int pos = x * 3 + y * stride0;\n"
|
|
" dst0[pos] = scale_to_uchar (sample.x);\n"
|
|
" dst0[pos + 1] = scale_to_uchar (sample.y);\n"
|
|
" dst0[pos + 2] = scale_to_uchar (sample.z);\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline void\n"
|
|
BLEND_RGB "(unsigned char * dst0, unsigned char * dst1, unsigned char * dst2,\n"
|
|
" unsigned char * dst3, float4 sample, int x, int y, int stride0, int stride1)\n"
|
|
"{\n"
|
|
" int pos = x * 3 + y * stride0;\n"
|
|
" dst0[pos] = blend_uchar (dst0[pos], sample.x, sample.w);\n"
|
|
" dst0[pos + 1] = blend_uchar (dst0[pos + 1], sample.y, sample.w);\n"
|
|
" dst0[pos + 2] = blend_uchar (dst0[pos + 2], sample.z, sample.w);\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline void\n"
|
|
WRITE_BGR "(unsigned char * dst0, unsigned char * dst1, unsigned char * dst2,\n"
|
|
" unsigned char * dst3, float4 sample, int x, int y, int stride0, int stride1)\n"
|
|
"{\n"
|
|
" int pos = x * 3 + y * stride0;\n"
|
|
" dst0[pos] = scale_to_uchar (sample.z);\n"
|
|
" dst0[pos + 1] = scale_to_uchar (sample.y);\n"
|
|
" dst0[pos + 2] = scale_to_uchar (sample.x);\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline void\n"
|
|
BLEND_BGR "(unsigned char * dst0, unsigned char * dst1, unsigned char * dst2,\n"
|
|
" unsigned char * dst3, float4 sample, int x, int y, int stride0, int stride1)\n"
|
|
"{\n"
|
|
" int pos = x * 3 + y * stride0;\n"
|
|
" dst0[pos] = blend_uchar (dst0[pos], sample.z, sample.w);\n"
|
|
" dst0[pos + 1] = blend_uchar (dst0[pos + 1], sample.y, sample.w);\n"
|
|
" dst0[pos + 2] = blend_uchar (dst0[pos + 2], sample.x, sample.w);\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline void\n"
|
|
WRITE_RGB10A2 "(unsigned char * dst0, unsigned char * dst1, unsigned char * dst2,\n"
|
|
" unsigned char * dst3, float4 sample, int x, int y, int stride0, int stride1)\n"
|
|
"{\n"
|
|
" unsigned int alpha = (unsigned int) scale_to_2bits (sample.w);\n"
|
|
" unsigned int packed_rgb = alpha << 30;\n"
|
|
" packed_rgb |= ((unsigned int) scale_to_10bits (sample.x));\n"
|
|
" packed_rgb |= ((unsigned int) scale_to_10bits (sample.y)) << 10;\n"
|
|
" packed_rgb |= ((unsigned int) scale_to_10bits (sample.z)) << 20;\n"
|
|
" *(unsigned int *) &dst0[x * 4 + y * stride0] = packed_rgb;\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline ushort3\n"
|
|
"unpack_rgb10a2 (unsigned int val)\n"
|
|
"{\n"
|
|
" unsigned short r, g, b;\n"
|
|
" r = (val & 0x3ff);\n"
|
|
" r = (r << 6) | (r >> 4);\n"
|
|
" g = ((val >> 10) & 0x3ff);\n"
|
|
" g = (g << 6) | (g >> 4);\n"
|
|
" b = ((val >> 20) & 0x3ff);\n"
|
|
" b = (b << 6) | (b >> 4);\n"
|
|
" return make_ushort3 (r, g, b);\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline void\n"
|
|
BLEND_RGB10A2 "(unsigned char * dst0, unsigned char * dst1, unsigned char * dst2,\n"
|
|
" unsigned char * dst3, float4 sample, int x, int y, int stride0, int stride1)\n"
|
|
"{\n"
|
|
" unsigned int * target = (unsigned int *) &dst0[x * 4 + y * stride0];\n"
|
|
" ushort3 val = unpack_rgb10a2 (*target);\n"
|
|
" unsigned int alpha = (unsigned int) scale_to_2bits (sample.w);\n"
|
|
" unsigned int packed_rgb = alpha << 30;\n"
|
|
" packed_rgb |= ((unsigned int) blend_10bits (val.x, sample.x, sample.w));\n"
|
|
" packed_rgb |= ((unsigned int) blend_10bits (val.y, sample.y, sample.w)) << 10;\n"
|
|
" packed_rgb |= ((unsigned int) blend_10bits (val.z, sample.z, sample.w)) << 20;\n"
|
|
" *target = packed_rgb;\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline void\n"
|
|
WRITE_BGR10A2 "(unsigned char * dst0, unsigned char * dst1, unsigned char * dst2,\n"
|
|
" unsigned char * dst3, float4 sample, int x, int y, int stride0, int stride1)\n"
|
|
"{\n"
|
|
" unsigned int alpha = (unsigned int) scale_to_2bits (sample.x);\n"
|
|
" unsigned int packed_rgb = alpha << 30;\n"
|
|
" packed_rgb |= ((unsigned int) scale_to_10bits (sample.x)) << 20;\n"
|
|
" packed_rgb |= ((unsigned int) scale_to_10bits (sample.y)) << 10;\n"
|
|
" packed_rgb |= ((unsigned int) scale_to_10bits (sample.z));\n"
|
|
" *(unsigned int *) &dst0[x * 4 + y * stride0] = packed_rgb;\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline ushort3\n"
|
|
"unpack_bgr10a2 (unsigned int val)\n"
|
|
"{\n"
|
|
" unsigned short r, g, b;\n"
|
|
" b = (val & 0x3ff);\n"
|
|
" b = (b << 6) | (b >> 4);\n"
|
|
" g = ((val >> 10) & 0x3ff);\n"
|
|
" g = (g << 6) | (g >> 4);\n"
|
|
" r = ((val >> 20) & 0x3ff);\n"
|
|
" r = (r << 6) | (r >> 4);\n"
|
|
" return make_ushort3 (r, g, b);\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline void\n"
|
|
BLEND_BGR10A2 "(unsigned char * dst0, unsigned char * dst1, unsigned char * dst2,\n"
|
|
" unsigned char * dst3, float4 sample, int x, int y, int stride0, int stride1)\n"
|
|
"{\n"
|
|
" unsigned int * target = (unsigned int *) &dst0[x * 4 + y * stride0];\n"
|
|
" ushort3 val = unpack_bgr10a2 (*target);\n"
|
|
" unsigned int alpha = (unsigned int) scale_to_2bits (sample.w);\n"
|
|
" unsigned int packed_rgb = alpha << 30;\n"
|
|
" packed_rgb |= ((unsigned int) blend_10bits (val.x, sample.x, sample.w)) << 20;\n"
|
|
" packed_rgb |= ((unsigned int) blend_10bits (val.y, sample.y, sample.w)) << 10;\n"
|
|
" packed_rgb |= ((unsigned int) blend_10bits (val.z, sample.z, sample.w));\n"
|
|
" *target = packed_rgb;\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline void\n"
|
|
WRITE_Y42B "(unsigned char * dst0, unsigned char * dst1, unsigned char * dst2,\n"
|
|
" unsigned char * dst3, float4 sample, int x, int y, int stride0, int stride1)\n"
|
|
"{\n"
|
|
" dst0[x + y * stride0] = scale_to_uchar (sample.x);\n"
|
|
" if (x % 2 == 0) {\n"
|
|
" unsigned int pos = x / 2 + y * stride1;\n"
|
|
" dst1[pos] = scale_to_uchar (sample.y);\n"
|
|
" dst2[pos] = scale_to_uchar (sample.z);\n"
|
|
" }\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline void\n"
|
|
BLEND_Y42B "(unsigned char * dst0, unsigned char * dst1, unsigned char * dst2,\n"
|
|
" unsigned char * dst3, float4 sample, int x, int y, int stride0, int stride1)\n"
|
|
"{\n"
|
|
" unsigned int pos = x + y * stride0;\n"
|
|
" dst0[pos] = blend_uchar (dst0[pos], sample.x, sample.w);\n"
|
|
" if (x % 2 == 0) {\n"
|
|
" pos = x / 2 + y * stride1;\n"
|
|
" dst1[pos] = blend_uchar (dst1[pos], sample.y, sample.w);\n"
|
|
" dst2[pos] = blend_uchar (dst2[pos], sample.z, sample.w);\n"
|
|
" }\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline void\n"
|
|
WRITE_I422_10 "(unsigned char * dst0, unsigned char * dst1, unsigned char * dst2,\n"
|
|
" unsigned char * dst3, float4 sample, int x, int y, int stride0, int stride1)\n"
|
|
"{\n"
|
|
" *(unsigned short *) &dst0[x * 2 + y * stride0] = scale_to_10bits (sample.x);\n"
|
|
" if (x % 2 == 0) {\n"
|
|
" unsigned int pos = x + y * stride1;\n"
|
|
" *(unsigned short *) &dst1[pos] = scale_to_10bits (sample.y);\n"
|
|
" *(unsigned short *) &dst2[pos] = scale_to_10bits (sample.z);\n"
|
|
" }\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline void\n"
|
|
BLEND_I422_10 "(unsigned char * dst0, unsigned char * dst1, unsigned char * dst2,\n"
|
|
" unsigned char * dst3, float4 sample, int x, int y, int stride0, int stride1)\n"
|
|
"{\n"
|
|
" unsigned int pos = x * 2 + y * stride0;\n"
|
|
" unsigned short * target = (unsigned short *) &dst0[pos];\n"
|
|
" *target = blend_10bits (*target, sample.x, sample.w);\n"
|
|
" if (x % 2 == 0) {\n"
|
|
" pos = x / 2 + y * stride1;\n"
|
|
" target = (unsigned short *) &dst1[pos];\n"
|
|
" *target = blend_10bits (*target, sample.y, sample.w);\n"
|
|
" target = (unsigned short *) &dst2[pos];\n"
|
|
" *target = blend_10bits (*target, sample.z, sample.w);\n"
|
|
" }\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline void\n"
|
|
WRITE_I422_12 "(unsigned char * dst0, unsigned char * dst1, unsigned char * dst2,\n"
|
|
" unsigned char * dst3, float4 sample, int x, int y, int stride0, int stride1)\n"
|
|
"{\n"
|
|
" *(unsigned short *) &dst0[x * 2 + y * stride0] = scale_to_12bits (sample.x);\n"
|
|
" if (x % 2 == 0) {\n"
|
|
" unsigned int pos = x + y * stride1;\n"
|
|
" *(unsigned short *) &dst1[pos] = scale_to_12bits (sample.y);\n"
|
|
" *(unsigned short *) &dst2[pos] = scale_to_12bits (sample.z);\n"
|
|
" }\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline void\n"
|
|
BLEND_I422_12 "(unsigned char * dst0, unsigned char * dst1, unsigned char * dst2,\n"
|
|
" unsigned char * dst3, float4 sample, int x, int y, int stride0, int stride1)\n"
|
|
"{\n"
|
|
" unsigned int pos = x * 2 + y * stride0;\n"
|
|
" unsigned short * target = (unsigned short *) &dst0[pos];\n"
|
|
" *target = blend_12bits (*target, sample.x, sample.w);\n"
|
|
" if (x % 2 == 0) {\n"
|
|
" pos = x / 2 + y * stride1;\n"
|
|
" target = (unsigned short *) &dst1[pos];\n"
|
|
" *target = blend_12bits (*target, sample.y, sample.w);\n"
|
|
" target = (unsigned short *) &dst2[pos];\n"
|
|
" *target = blend_12bits (*target, sample.z, sample.w);\n"
|
|
" }\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline void\n"
|
|
WRITE_RGBP "(unsigned char * dst0, unsigned char * dst1, unsigned char * dst2,\n"
|
|
" unsigned char * dst3, float4 sample, int x, int y, int stride0, int stride1)\n"
|
|
"{\n"
|
|
" int pos = x + y * stride0;\n"
|
|
" dst0[pos] = scale_to_uchar (sample.x);\n"
|
|
" dst1[pos] = scale_to_uchar (sample.y);\n"
|
|
" dst2[pos] = scale_to_uchar (sample.z);\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline void\n"
|
|
BLEND_RGBP "(unsigned char * dst0, unsigned char * dst1, unsigned char * dst2,\n"
|
|
" unsigned char * dst3, float4 sample, int x, int y, int stride0, int stride1)\n"
|
|
"{\n"
|
|
" int pos = x + y * stride0;\n"
|
|
" dst0[pos] = blend_uchar (dst0[pos], sample.x, sample.w);\n"
|
|
" dst1[pos] = blend_uchar (dst1[pos], sample.y, sample.w);\n"
|
|
" dst2[pos] = blend_uchar (dst2[pos], sample.z, sample.w);\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline void\n"
|
|
WRITE_BGRP "(unsigned char * dst0, unsigned char * dst1, unsigned char * dst2,\n"
|
|
" unsigned char * dst3, float4 sample, int x, int y, int stride0, int stride1)\n"
|
|
"{\n"
|
|
" int pos = x + y * stride0;\n"
|
|
" dst0[pos] = scale_to_uchar (sample.z);\n"
|
|
" dst1[pos] = scale_to_uchar (sample.y);\n"
|
|
" dst2[pos] = scale_to_uchar (sample.x);\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline void\n"
|
|
BLEND_BGRP "(unsigned char * dst0, unsigned char * dst1, unsigned char * dst2,\n"
|
|
" unsigned char * dst3, float4 sample, int x, int y, int stride0, int stride1)\n"
|
|
"{\n"
|
|
" int pos = x + y * stride0;\n"
|
|
" dst0[pos] = blend_uchar (dst0[pos], sample.z, sample.w);\n"
|
|
" dst1[pos] = blend_uchar (dst1[pos], sample.y, sample.w);\n"
|
|
" dst2[pos] = blend_uchar (dst2[pos], sample.x, sample.w);\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline void\n"
|
|
WRITE_GBR "(unsigned char * dst0, unsigned char * dst1, unsigned char * dst2,\n"
|
|
" unsigned char * dst3, float4 sample, int x, int y, int stride0, int stride1)\n"
|
|
"{\n"
|
|
" int pos = x + y * stride0;\n"
|
|
" dst0[pos] = scale_to_uchar (sample.y);\n"
|
|
" dst1[pos] = scale_to_uchar (sample.z);\n"
|
|
" dst2[pos] = scale_to_uchar (sample.x);\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline void\n"
|
|
BLEND_GBR "(unsigned char * dst0, unsigned char * dst1, unsigned char * dst2,\n"
|
|
" unsigned char * dst3, float4 sample, int x, int y, int stride0, int stride1)\n"
|
|
"{\n"
|
|
" int pos = x + y * stride0;\n"
|
|
" dst0[pos] = blend_uchar (dst0[pos], sample.y, sample.w);\n"
|
|
" dst1[pos] = blend_uchar (dst1[pos], sample.z, sample.w);\n"
|
|
" dst2[pos] = blend_uchar (dst2[pos], sample.x, sample.w);\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline void\n"
|
|
WRITE_GBR_10 "(unsigned char * dst0, unsigned char * dst1, unsigned char * dst2,\n"
|
|
" unsigned char * dst3, float4 sample, int x, int y, int stride0, int stride1)\n"
|
|
"{\n"
|
|
" int pos = x * 2 + y * stride0;\n"
|
|
" *(unsigned short *) &dst0[pos] = scale_to_10bits (sample.y);\n"
|
|
" *(unsigned short *) &dst1[pos] = scale_to_10bits (sample.z);\n"
|
|
" *(unsigned short *) &dst2[pos] = scale_to_10bits (sample.x);\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline void\n"
|
|
BLEND_GBR_10 "(unsigned char * dst0, unsigned char * dst1, unsigned char * dst2,\n"
|
|
" unsigned char * dst3, float4 sample, int x, int y, int stride0, int stride1)\n"
|
|
"{\n"
|
|
" int pos = x * 2 + y * stride0;\n"
|
|
" unsigned short * target = (unsigned short *) &dst0[pos];\n"
|
|
" *target = blend_10bits (*target, sample.y, sample.w);\n"
|
|
" target = (unsigned short *) &dst1[pos];\n"
|
|
" *target = blend_10bits (*target, sample.z, sample.w);\n"
|
|
" target = (unsigned short *) &dst2[pos];\n"
|
|
" *target = blend_10bits (*target, sample.x, sample.w);\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline void\n"
|
|
WRITE_GBR_12 "(unsigned char * dst0, unsigned char * dst1, unsigned char * dst2,\n"
|
|
" unsigned char * dst3, float4 sample, int x, int y, int stride0, int stride1)\n"
|
|
"{\n"
|
|
" int pos = x * 2 + y * stride0;\n"
|
|
" *(unsigned short *) &dst0[pos] = scale_to_12bits (sample.y);\n"
|
|
" *(unsigned short *) &dst1[pos] = scale_to_12bits (sample.z);\n"
|
|
" *(unsigned short *) &dst2[pos] = scale_to_12bits (sample.x);\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline void\n"
|
|
BLEND_GBR_12 "(unsigned char * dst0, unsigned char * dst1, unsigned char * dst2,\n"
|
|
" unsigned char * dst3, float4 sample, int x, int y, int stride0, int stride1)\n"
|
|
"{\n"
|
|
" int pos = x * 2 + y * stride0;\n"
|
|
" unsigned short * target = (unsigned short *) &dst0[pos];\n"
|
|
" *target = blend_12bits (*target, sample.y, sample.w);\n"
|
|
" target = (unsigned short *) &dst1[pos];\n"
|
|
" *target = blend_12bits (*target, sample.z, sample.w);\n"
|
|
" target = (unsigned short *) &dst2[pos];\n"
|
|
" *target = blend_12bits (*target, sample.x, sample.w);\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline void\n"
|
|
WRITE_GBR_16 "(unsigned char * dst0, unsigned char * dst1, unsigned char * dst2,\n"
|
|
" unsigned char * dst3, float4 sample, int x, int y, int stride0, int stride1)\n"
|
|
"{\n"
|
|
" int pos = x * 2 + y * stride0;\n"
|
|
" *(unsigned short *) &dst0[pos] = scale_to_ushort (sample.y);\n"
|
|
" *(unsigned short *) &dst1[pos] = scale_to_ushort (sample.z);\n"
|
|
" *(unsigned short *) &dst2[pos] = scale_to_ushort (sample.x);\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline void\n"
|
|
BLEND_GBR_16 "(unsigned char * dst0, unsigned char * dst1, unsigned char * dst2,\n"
|
|
" unsigned char * dst3, float4 sample, int x, int y, int stride0, int stride1)\n"
|
|
"{\n"
|
|
" int pos = x * 2 + y * stride0;\n"
|
|
" unsigned short * target = (unsigned short *) &dst0[pos];\n"
|
|
" *target = blend_ushort (*target, sample.y, sample.w);\n"
|
|
" target = (unsigned short *) &dst1[pos];\n"
|
|
" *target = blend_ushort (*target, sample.z, sample.w);\n"
|
|
" target = (unsigned short *) &dst2[pos];\n"
|
|
" *target = blend_ushort (*target, sample.x, sample.w);\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline void\n"
|
|
WRITE_GBRA "(unsigned char * dst0, unsigned char * dst1, unsigned char * dst2,\n"
|
|
" unsigned char * dst3, float4 sample, int x, int y, int stride0, int stride1)\n"
|
|
"{\n"
|
|
" int pos = x + y * stride0;\n"
|
|
" dst0[pos] = scale_to_uchar (sample.y);\n"
|
|
" dst1[pos] = scale_to_uchar (sample.z);\n"
|
|
" dst2[pos] = scale_to_uchar (sample.x);\n"
|
|
" dst3[pos] = scale_to_uchar (sample.w);\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline void\n"
|
|
BLEND_GBRA "(unsigned char * dst0, unsigned char * dst1, unsigned char * dst2,\n"
|
|
" unsigned char * dst3, float4 sample, int x, int y, int stride0, int stride1)\n"
|
|
"{\n"
|
|
" int pos = x + y * stride0;\n"
|
|
" dst0[pos] = blend_uchar (dst0[pos], sample.y, sample.w);\n"
|
|
" dst1[pos] = blend_uchar (dst1[pos], sample.z, sample.w);\n"
|
|
" dst2[pos] = blend_uchar (dst2[pos], sample.x, sample.w);\n"
|
|
" dst3[pos] = blend_uchar (dst3[pos], 1.0, sample.w);\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline void\n"
|
|
WRITE_VUYA "(unsigned char * dst0, unsigned char * dst1, unsigned char * dst2,\n"
|
|
" unsigned char * dst3, float4 sample, int x, int y, int stride0, int stride1)\n"
|
|
"{\n"
|
|
" int pos = x * 4 + y * stride0;\n"
|
|
" dst0[pos] = scale_to_uchar (sample.z);\n"
|
|
" dst0[pos + 1] = scale_to_uchar (sample.y);\n"
|
|
" dst0[pos + 2] = scale_to_uchar (sample.x);\n"
|
|
" dst0[pos + 3] = scale_to_uchar (sample.w);\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline void\n"
|
|
BLEND_VUYA "(unsigned char * dst0, unsigned char * dst1, unsigned char * dst2,\n"
|
|
" unsigned char * dst3, float4 sample, int x, int y, int stride0, int stride1)\n"
|
|
"{\n"
|
|
" int pos = x * 4 + y * stride0;\n"
|
|
" dst0[pos] = blend_uchar (dst0[pos], sample.z, sample.w);\n"
|
|
" dst0[pos + 1] = blend_uchar (dst0[pos + 1], sample.y, sample.w);\n"
|
|
" dst0[pos + 2] = blend_uchar (dst0[pos + 2], sample.x, sample.w);\n"
|
|
" dst0[pos + 3] = blend_uchar (dst0[pos + 3], 1.0, sample.w);\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline float2\n"
|
|
"rotate_identity (float x, float y)\n"
|
|
"{\n"
|
|
" return make_float2(x, y);\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline float2\n"
|
|
"rotate_90r (float x, float y)\n"
|
|
"{\n"
|
|
" return make_float2(y, 1.0 - x);\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline float2\n"
|
|
"rotate_180 (float x, float y)\n"
|
|
"{\n"
|
|
" return make_float2(1.0 - x, 1.0 - y);\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline float2\n"
|
|
"rotate_90l (float x, float y)\n"
|
|
"{\n"
|
|
" return make_float2(1.0 - y, x);\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline float2\n"
|
|
"rotate_horiz (float x, float y)\n"
|
|
"{\n"
|
|
" return make_float2(1.0 - x, y);\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline float2\n"
|
|
"rotate_vert (float x, float y)\n"
|
|
"{\n"
|
|
" return make_float2(x, 1.0 - y);\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline float2\n"
|
|
"rotate_ul_lr (float x, float y)\n"
|
|
"{\n"
|
|
" return make_float2(y, x);\n"
|
|
"}\n"
|
|
"\n"
|
|
"__device__ inline float2\n"
|
|
"rotate_ur_ll (float x, float y)\n"
|
|
"{\n"
|
|
" return make_float2(1.0 - y, 1.0 - x);\n"
|
|
"}\n"
|
|
"__device__ inline float2\n"
|
|
"do_rotate (float x, float y, int direction)"
|
|
"{\n"
|
|
" switch (direction) {\n"
|
|
" case 1:\n"
|
|
" return rotate_90r (x, y);\n"
|
|
" case 2:\n"
|
|
" return rotate_180 (x, y);\n"
|
|
" case 3:\n"
|
|
" return rotate_90l (x, y);\n"
|
|
" case 4:\n"
|
|
" return rotate_horiz (x, y);\n"
|
|
" case 5:\n"
|
|
" return rotate_vert (x, y);\n"
|
|
" case 6:\n"
|
|
" return rotate_ul_lr (x, y);\n"
|
|
" case 7:\n"
|
|
" return rotate_ur_ll (x, y);\n"
|
|
" default:\n"
|
|
" return rotate_identity (x, y);\n"
|
|
" }\n"
|
|
"}\n"
|
|
"\n";
|
|
|
|
#define GST_CUDA_KERNEL_UNPACK_FUNC "gst_cuda_kernel_unpack_func"
|
|
static const gchar RGB_TO_RGBx[] =
|
|
"extern \"C\" {\n"
|
|
"__global__ void\n"
|
|
GST_CUDA_KERNEL_UNPACK_FUNC
|
|
"(unsigned char *src, unsigned char *dst, int width, int height,\n"
|
|
" int src_stride, int dst_stride)\n"
|
|
"{\n"
|
|
" int x_pos = blockIdx.x * blockDim.x + threadIdx.x;\n"
|
|
" int y_pos = blockIdx.y * blockDim.y + threadIdx.y;\n"
|
|
" if (x_pos < width && y_pos < height) {\n"
|
|
" int dst_pos = x_pos * 4 + y_pos * dst_stride;\n"
|
|
" int src_pos = x_pos * 3 + y_pos * src_stride;\n"
|
|
" dst[dst_pos] = src[src_pos];\n"
|
|
" dst[dst_pos + 1] = src[src_pos + 1];\n"
|
|
" dst[dst_pos + 2] = src[src_pos + 2];\n"
|
|
" dst[dst_pos + 3] = 0xff;\n"
|
|
" }\n"
|
|
"}\n"
|
|
"}\n";
|
|
|
|
static const gchar RGB10A2_TO_ARGB64[] =
|
|
"extern \"C\" {\n"
|
|
"__global__ void\n"
|
|
GST_CUDA_KERNEL_UNPACK_FUNC
|
|
"(unsigned char *src, unsigned char *dst, int width, int height,\n"
|
|
" int src_stride, int dst_stride)\n"
|
|
"{\n"
|
|
" int x_pos = blockIdx.x * blockDim.x + threadIdx.x;\n"
|
|
" int y_pos = blockIdx.y * blockDim.y + threadIdx.y;\n"
|
|
" if (x_pos < width && y_pos < height) {\n"
|
|
" unsigned short a, r, g, b;\n"
|
|
" unsigned int val;\n"
|
|
" int dst_pos = x_pos * 8 + y_pos * dst_stride;\n"
|
|
" val = *(unsigned int *)&src[x_pos * 4 + y_pos * src_stride];\n"
|
|
" a = (val >> 30) & 0x03;\n"
|
|
" a = (a << 14) | (a << 12) | (a << 10) | (a << 8) | (a << 6) | (a << 4) | (a << 2) | (a << 0);\n"
|
|
" r = (val & 0x3ff);\n"
|
|
" r = (r << 6) | (r >> 4);\n"
|
|
" g = ((val >> 10) & 0x3ff);\n"
|
|
" g = (g << 6) | (g >> 4);\n"
|
|
" b = ((val >> 20) & 0x3ff);\n"
|
|
" b = (b << 6) | (b >> 4);\n"
|
|
" *(unsigned short *) &dst[dst_pos] = a;\n"
|
|
" *(unsigned short *) &dst[dst_pos + 2] = r;\n"
|
|
" *(unsigned short *) &dst[dst_pos + 4] = g;\n"
|
|
" *(unsigned short *) &dst[dst_pos + 6] = b;\n"
|
|
" }\n"
|
|
"}\n"
|
|
"}\n";
|
|
|
|
static const gchar BGR10A2_TO_ARGB64[] =
|
|
"extern \"C\" {\n"
|
|
"__global__ void\n"
|
|
GST_CUDA_KERNEL_UNPACK_FUNC
|
|
"(unsigned char *src, unsigned char *dst, int width, int height,\n"
|
|
" int src_stride, int dst_stride)\n"
|
|
"{\n"
|
|
" int x_pos = blockIdx.x * blockDim.x + threadIdx.x;\n"
|
|
" int y_pos = blockIdx.y * blockDim.y + threadIdx.y;\n"
|
|
" if (x_pos < width && y_pos < height) {\n"
|
|
" unsigned short a, r, g, b;\n"
|
|
" unsigned int val;\n"
|
|
" int dst_pos = x_pos * 8 + y_pos * dst_stride;\n"
|
|
" val = *(unsigned int *)&src[x_pos * 4 + y_pos * src_stride];\n"
|
|
" a = (val >> 30) & 0x03;\n"
|
|
" a = (a << 14) | (a << 12) | (a << 10) | (a << 8) | (a << 6) | (a << 4) | (a << 2) | (a << 0);\n"
|
|
" b = (val & 0x3ff);\n"
|
|
" b = (b << 6) | (b >> 4);\n"
|
|
" g = ((val >> 10) & 0x3ff);\n"
|
|
" g = (g << 6) | (g >> 4);\n"
|
|
" r = ((val >> 20) & 0x3ff);\n"
|
|
" r = (r << 6) | (r >> 4);\n"
|
|
" *(unsigned short *) &dst[dst_pos] = a;\n"
|
|
" *(unsigned short *) &dst[dst_pos + 2] = r;\n"
|
|
" *(unsigned short *) &dst[dst_pos + 4] = g;\n"
|
|
" *(unsigned short *) &dst[dst_pos + 6] = b;\n"
|
|
" }\n"
|
|
"}\n"
|
|
"}\n";
|
|
|
|
#define GST_CUDA_KERNEL_MAIN_FUNC "gst_cuda_converter_main"
|
|
|
|
static const gchar TEMPLATE_KERNEL[] =
|
|
/* KERNEL_COMMON */
|
|
"%s\n"
|
|
/* UNPACK FUNCTION */
|
|
"%s\n"
|
|
"struct ConstBuffer\n"
|
|
"{\n"
|
|
" ColorMatrix toRGBCoeff;\n"
|
|
" ColorMatrix toYuvCoeff;\n"
|
|
" int width;\n"
|
|
" int height;\n"
|
|
" int left;\n"
|
|
" int top;\n"
|
|
" int right;\n"
|
|
" int bottom;\n"
|
|
" int view_width;\n"
|
|
" int view_height;\n"
|
|
" float border_x;\n"
|
|
" float border_y;\n"
|
|
" float border_z;\n"
|
|
" float border_w;\n"
|
|
" int fill_border;\n"
|
|
" int video_direction;\n"
|
|
" float alpha;\n"
|
|
" int do_blend;\n"
|
|
"};\n"
|
|
"\n"
|
|
"extern \"C\" {\n"
|
|
"__global__ void\n"
|
|
GST_CUDA_KERNEL_MAIN_FUNC "(cudaTextureObject_t tex0, cudaTextureObject_t tex1,\n"
|
|
" cudaTextureObject_t tex2, cudaTextureObject_t tex3, unsigned char * dst0,\n"
|
|
" unsigned char * dst1, unsigned char * dst2, unsigned char * dst3,\n"
|
|
" int stride0, int stride1, ConstBuffer * const_buf, int off_x, int off_y)\n"
|
|
"{\n"
|
|
" int x_pos = blockIdx.x * blockDim.x + threadIdx.x + off_x;\n"
|
|
" int y_pos = blockIdx.y * blockDim.y + threadIdx.y + off_y;\n"
|
|
" float4 sample;\n"
|
|
" if (x_pos >= const_buf->width || y_pos >= const_buf->height ||\n"
|
|
" const_buf->view_width <= 0 || const_buf->view_height <= 0)\n"
|
|
" return;\n"
|
|
" if (x_pos < const_buf->left || x_pos >= const_buf->right ||\n"
|
|
" y_pos < const_buf->top || y_pos >= const_buf->bottom) {\n"
|
|
" if (!const_buf->fill_border)\n"
|
|
" return;\n"
|
|
" sample = make_float4 (const_buf->border_x, const_buf->border_y,\n"
|
|
" const_buf->border_z, const_buf->border_w);\n"
|
|
" } else {\n"
|
|
" float x = (__int2float_rz (x_pos - const_buf->left) + 0.5) / const_buf->view_width;\n"
|
|
" if (x < 0.0 || x > 1.0)\n"
|
|
" return;\n"
|
|
" float y = (__int2float_rz (y_pos - const_buf->top) + 0.5) / const_buf->view_height;\n"
|
|
" if (y < 0.0 || y > 1.0)\n"
|
|
" return;\n"
|
|
" float2 rotated = do_rotate (x, y, const_buf->video_direction);\n"
|
|
" float4 s = %s (tex0, tex1, tex2, tex3, rotated.x, rotated.y);\n"
|
|
" float3 xyz = make_float3 (s.x, s.y, s.z);\n"
|
|
" float3 rgb = %s (xyz, &const_buf->toRGBCoeff);\n"
|
|
" float3 yuv = %s (rgb, &const_buf->toYuvCoeff);\n"
|
|
" sample = make_float4 (yuv.x, yuv.y, yuv.z, s.w);\n"
|
|
" }\n"
|
|
" sample.w = sample.w * const_buf->alpha;\n"
|
|
" if (!const_buf->do_blend) {\n"
|
|
" %s (dst0, dst1, dst2, dst3, sample, x_pos, y_pos, stride0, stride1);\n"
|
|
" } else {\n"
|
|
" %s (dst0, dst1, dst2, dst3, sample, x_pos, y_pos, stride0, stride1);\n"
|
|
" }"
|
|
"}\n"
|
|
"}\n";
|
|
/* *INDENT-ON* */
|
|
|
|
typedef struct _TextureFormat
|
|
{
|
|
GstVideoFormat format;
|
|
CUarray_format array_format[GST_VIDEO_MAX_COMPONENTS];
|
|
guint channels[GST_VIDEO_MAX_COMPONENTS];
|
|
const gchar *sample_func;
|
|
} TextureFormat;
|
|
|
|
#define CU_AD_FORMAT_NONE ((CUarray_format)0)
|
|
#define MAKE_FORMAT_YUV_PLANAR(f,cf,sample_func) \
|
|
{ GST_VIDEO_FORMAT_ ##f, { CU_AD_FORMAT_ ##cf, CU_AD_FORMAT_ ##cf, \
|
|
CU_AD_FORMAT_ ##cf, CU_AD_FORMAT_NONE }, {1, 1, 1, 0}, sample_func }
|
|
#define MAKE_FORMAT_YUV_SEMI_PLANAR(f,cf,sample_func) \
|
|
{ GST_VIDEO_FORMAT_ ##f, { CU_AD_FORMAT_ ##cf, CU_AD_FORMAT_ ##cf, \
|
|
CU_AD_FORMAT_NONE, CU_AD_FORMAT_NONE }, {1, 2, 0, 0}, sample_func }
|
|
#define MAKE_FORMAT_RGB(f,cf,sample_func) \
|
|
{ GST_VIDEO_FORMAT_ ##f, { CU_AD_FORMAT_ ##cf, CU_AD_FORMAT_NONE, \
|
|
CU_AD_FORMAT_NONE, CU_AD_FORMAT_NONE }, {4, 0, 0, 0}, sample_func }
|
|
#define MAKE_FORMAT_RGBP(f,cf,sample_func) \
|
|
{ GST_VIDEO_FORMAT_ ##f, { CU_AD_FORMAT_ ##cf, CU_AD_FORMAT_ ##cf, \
|
|
CU_AD_FORMAT_ ##cf, CU_AD_FORMAT_NONE }, {1, 1, 1, 0}, sample_func }
|
|
#define MAKE_FORMAT_RGBAP(f,cf,sample_func) \
|
|
{ GST_VIDEO_FORMAT_ ##f, { CU_AD_FORMAT_ ##cf, CU_AD_FORMAT_ ##cf, \
|
|
CU_AD_FORMAT_ ##cf, CU_AD_FORMAT_ ##cf }, {1, 1, 1, 1}, sample_func }
|
|
|
|
static const TextureFormat format_map[] = {
|
|
MAKE_FORMAT_YUV_PLANAR (I420, UNSIGNED_INT8, SAMPLE_YUV_PLANAR),
|
|
MAKE_FORMAT_YUV_PLANAR (YV12, UNSIGNED_INT8, SAMPLE_YV12),
|
|
MAKE_FORMAT_YUV_SEMI_PLANAR (NV12, UNSIGNED_INT8, SAMPLE_SEMI_PLANAR),
|
|
MAKE_FORMAT_YUV_SEMI_PLANAR (NV21, UNSIGNED_INT8, SAMPLE_SEMI_PLANAR_SWAP),
|
|
MAKE_FORMAT_YUV_SEMI_PLANAR (P010_10LE, UNSIGNED_INT16, SAMPLE_SEMI_PLANAR),
|
|
MAKE_FORMAT_YUV_SEMI_PLANAR (P012_LE, UNSIGNED_INT16, SAMPLE_SEMI_PLANAR),
|
|
MAKE_FORMAT_YUV_SEMI_PLANAR (P016_LE, UNSIGNED_INT16, SAMPLE_SEMI_PLANAR),
|
|
MAKE_FORMAT_YUV_PLANAR (I420_10LE, UNSIGNED_INT16, SAMPLE_YUV_PLANAR_10BIS),
|
|
MAKE_FORMAT_YUV_PLANAR (I420_12LE, UNSIGNED_INT16, SAMPLE_YUV_PLANAR_12BIS),
|
|
MAKE_FORMAT_YUV_PLANAR (Y444, UNSIGNED_INT8, SAMPLE_YUV_PLANAR),
|
|
MAKE_FORMAT_YUV_PLANAR (Y444_10LE, UNSIGNED_INT16, SAMPLE_YUV_PLANAR_10BIS),
|
|
MAKE_FORMAT_YUV_PLANAR (Y444_12LE, UNSIGNED_INT16, SAMPLE_YUV_PLANAR_12BIS),
|
|
MAKE_FORMAT_YUV_PLANAR (Y444_16LE, UNSIGNED_INT16, SAMPLE_YUV_PLANAR),
|
|
MAKE_FORMAT_RGB (RGBA, UNSIGNED_INT8, SAMPLE_RGBA),
|
|
MAKE_FORMAT_RGB (BGRA, UNSIGNED_INT8, SAMPLE_BGRA),
|
|
MAKE_FORMAT_RGB (RGBx, UNSIGNED_INT8, SAMPLE_RGBx),
|
|
MAKE_FORMAT_RGB (BGRx, UNSIGNED_INT8, SAMPLE_BGRx),
|
|
MAKE_FORMAT_RGB (ARGB, UNSIGNED_INT8, SAMPLE_ARGB),
|
|
MAKE_FORMAT_RGB (ARGB64, UNSIGNED_INT16, SAMPLE_ARGB64),
|
|
MAKE_FORMAT_RGB (ABGR, UNSIGNED_INT8, SAMPLE_AGBR),
|
|
MAKE_FORMAT_YUV_PLANAR (Y42B, UNSIGNED_INT8, SAMPLE_YUV_PLANAR),
|
|
MAKE_FORMAT_YUV_PLANAR (I422_10LE, UNSIGNED_INT16, SAMPLE_YUV_PLANAR_10BIS),
|
|
MAKE_FORMAT_YUV_PLANAR (I422_12LE, UNSIGNED_INT16, SAMPLE_YUV_PLANAR_12BIS),
|
|
MAKE_FORMAT_RGBP (RGBP, UNSIGNED_INT8, SAMPLE_RGBP),
|
|
MAKE_FORMAT_RGBP (BGRP, UNSIGNED_INT8, SAMPLE_BGRP),
|
|
MAKE_FORMAT_RGBP (GBR, UNSIGNED_INT8, SAMPLE_GBR),
|
|
MAKE_FORMAT_RGBP (GBR_10LE, UNSIGNED_INT16, SAMPLE_GBR_10),
|
|
MAKE_FORMAT_RGBP (GBR_12LE, UNSIGNED_INT16, SAMPLE_GBR_12),
|
|
MAKE_FORMAT_RGBP (GBR_16LE, UNSIGNED_INT16, SAMPLE_GBR),
|
|
MAKE_FORMAT_RGBAP (GBRA, UNSIGNED_INT8, SAMPLE_GBRA),
|
|
MAKE_FORMAT_RGB (VUYA, UNSIGNED_INT8, SAMPLE_VUYA),
|
|
};
|
|
|
|
struct TextureBuffer
|
|
{
|
|
CUdeviceptr ptr = 0;
|
|
gsize stride = 0;
|
|
CUtexObject texture = 0;
|
|
};
|
|
|
|
enum
|
|
{
|
|
PROP_0,
|
|
PROP_DEST_X,
|
|
PROP_DEST_Y,
|
|
PROP_DEST_WIDTH,
|
|
PROP_DEST_HEIGHT,
|
|
PROP_FILL_BORDER,
|
|
PROP_VIDEO_DIRECTION,
|
|
PROP_ALPHA,
|
|
PROP_BLEND,
|
|
};
|
|
|
|
struct _GstCudaConverterPrivate
|
|
{
|
|
_GstCudaConverterPrivate ()
|
|
{
|
|
config = gst_structure_new_empty ("converter-config");
|
|
}
|
|
|
|
~_GstCudaConverterPrivate ()
|
|
{
|
|
if (config)
|
|
gst_structure_free (config);
|
|
}
|
|
|
|
std::mutex lock;
|
|
|
|
GstVideoInfo in_info;
|
|
GstVideoInfo out_info;
|
|
|
|
GstStructure *config = nullptr;
|
|
|
|
GstVideoInfo texture_info;
|
|
const TextureFormat *texture_fmt;
|
|
gint texture_align;
|
|
|
|
TextureBuffer fallback_buffer[GST_VIDEO_MAX_COMPONENTS];
|
|
TextureBuffer unpack_buffer;
|
|
ConstBuffer *const_buf_staging = nullptr;
|
|
CUdeviceptr const_buf = 0;
|
|
|
|
CUmodule module = nullptr;
|
|
CUfunction main_func = nullptr;
|
|
CUfunction unpack_func = nullptr;
|
|
|
|
gboolean update_const_buf = TRUE;
|
|
|
|
/* properties */
|
|
gint dest_x = 0;
|
|
gint dest_y = 0;
|
|
gint dest_width = 0;
|
|
gint dest_height = 0;
|
|
GstVideoOrientationMethod video_direction = GST_VIDEO_ORIENTATION_IDENTITY;
|
|
gboolean fill_border = FALSE;
|
|
CUfilter_mode filter_mode = CU_TR_FILTER_MODE_LINEAR;
|
|
gdouble alpha = 1.0;
|
|
gboolean blend = FALSE;
|
|
};
|
|
|
|
static void gst_cuda_converter_dispose (GObject * object);
|
|
static void gst_cuda_converter_finalize (GObject * object);
|
|
static void gst_cuda_converter_set_property (GObject * object, guint prop_id,
|
|
const GValue * value, GParamSpec * pspec);
|
|
static void gst_cuda_converter_get_property (GObject * object, guint prop_id,
|
|
GValue * value, GParamSpec * pspec);
|
|
|
|
#define gst_cuda_converter_parent_class parent_class
|
|
G_DEFINE_TYPE (GstCudaConverter, gst_cuda_converter, GST_TYPE_OBJECT);
|
|
|
|
static void
|
|
gst_cuda_converter_class_init (GstCudaConverterClass * klass)
|
|
{
|
|
auto object_class = G_OBJECT_CLASS (klass);
|
|
auto param_flags = (GParamFlags) (G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS);
|
|
|
|
object_class->dispose = gst_cuda_converter_dispose;
|
|
object_class->finalize = gst_cuda_converter_finalize;
|
|
object_class->set_property = gst_cuda_converter_set_property;
|
|
object_class->get_property = gst_cuda_converter_get_property;
|
|
|
|
g_object_class_install_property (object_class, PROP_DEST_X,
|
|
g_param_spec_int ("dest-x", "Dest-X",
|
|
"x poisition in the destination frame", G_MININT, G_MAXINT, 0,
|
|
param_flags));
|
|
g_object_class_install_property (object_class, PROP_DEST_Y,
|
|
g_param_spec_int ("dest-y", "Dest-Y",
|
|
"y poisition in the destination frame", G_MININT, G_MAXINT, 0,
|
|
param_flags));
|
|
g_object_class_install_property (object_class, PROP_DEST_WIDTH,
|
|
g_param_spec_int ("dest-width", "Dest-Width",
|
|
"Width in the destination frame", 0, G_MAXINT, 0, param_flags));
|
|
g_object_class_install_property (object_class, PROP_DEST_HEIGHT,
|
|
g_param_spec_int ("dest-height", "Dest-Height",
|
|
"Height in the destination frame", 0, G_MAXINT, 0, param_flags));
|
|
g_object_class_install_property (object_class, PROP_FILL_BORDER,
|
|
g_param_spec_boolean ("fill-border", "Fill border",
|
|
"Fill border", FALSE, param_flags));
|
|
g_object_class_install_property (object_class, PROP_VIDEO_DIRECTION,
|
|
g_param_spec_enum ("video-direction", "Video Direction",
|
|
"Video direction", GST_TYPE_VIDEO_ORIENTATION_METHOD,
|
|
GST_VIDEO_ORIENTATION_IDENTITY, param_flags));
|
|
g_object_class_install_property (object_class, PROP_ALPHA,
|
|
g_param_spec_double ("alpha", "Alpha",
|
|
"The alpha color value to use", 0, 1.0, 1.0, param_flags));
|
|
g_object_class_install_property (object_class, PROP_BLEND,
|
|
g_param_spec_boolean ("blend", "Blend",
|
|
"Enable alpha blending", FALSE, param_flags));
|
|
|
|
GST_DEBUG_CATEGORY_INIT (gst_cuda_converter_debug,
|
|
"cudaconverter", 0, "cudaconverter");
|
|
}
|
|
|
|
static void
|
|
gst_cuda_converter_init (GstCudaConverter * self)
|
|
{
|
|
self->priv = new GstCudaConverterPrivate ();
|
|
}
|
|
|
|
static void
|
|
gst_cuda_converter_dispose (GObject * object)
|
|
{
|
|
auto self = GST_CUDA_CONVERTER (object);
|
|
auto priv = self->priv;
|
|
|
|
if (self->context && gst_cuda_context_push (self->context)) {
|
|
if (priv->module) {
|
|
CuModuleUnload (priv->module);
|
|
priv->module = nullptr;
|
|
}
|
|
|
|
for (guint i = 0; i < G_N_ELEMENTS (priv->fallback_buffer); i++) {
|
|
if (priv->fallback_buffer[i].ptr) {
|
|
if (priv->fallback_buffer[i].texture) {
|
|
CuTexObjectDestroy (priv->fallback_buffer[i].texture);
|
|
priv->fallback_buffer[i].texture = 0;
|
|
}
|
|
|
|
CuMemFree (priv->fallback_buffer[i].ptr);
|
|
priv->fallback_buffer[i].ptr = 0;
|
|
}
|
|
}
|
|
|
|
if (priv->unpack_buffer.ptr) {
|
|
if (priv->unpack_buffer.texture) {
|
|
CuTexObjectDestroy (priv->unpack_buffer.texture);
|
|
priv->unpack_buffer.texture = 0;
|
|
}
|
|
|
|
CuMemFree (priv->unpack_buffer.ptr);
|
|
priv->unpack_buffer.ptr = 0;
|
|
}
|
|
|
|
if (priv->const_buf_staging) {
|
|
CuMemFreeHost (priv->const_buf_staging);
|
|
priv->const_buf_staging = nullptr;
|
|
}
|
|
|
|
if (priv->const_buf) {
|
|
CuMemFree (priv->const_buf);
|
|
priv->const_buf = 0;
|
|
}
|
|
|
|
gst_cuda_context_pop (nullptr);
|
|
}
|
|
|
|
gst_clear_object (&self->context);
|
|
|
|
G_OBJECT_CLASS (parent_class)->dispose (object);
|
|
}
|
|
|
|
static void
|
|
gst_cuda_converter_finalize (GObject * object)
|
|
{
|
|
auto self = GST_CUDA_CONVERTER (object);
|
|
|
|
delete self->priv;
|
|
|
|
G_OBJECT_CLASS (parent_class)->finalize (object);
|
|
}
|
|
|
|
static void
|
|
gst_cuda_converter_set_property (GObject * object, guint prop_id,
|
|
const GValue * value, GParamSpec * pspec)
|
|
{
|
|
auto self = GST_CUDA_CONVERTER (object);
|
|
auto priv = self->priv;
|
|
|
|
std::lock_guard < std::mutex > lk (priv->lock);
|
|
switch (prop_id) {
|
|
case PROP_DEST_X:
|
|
{
|
|
auto dest_x = g_value_get_int (value);
|
|
if (priv->dest_x != dest_x) {
|
|
priv->update_const_buf = TRUE;
|
|
priv->dest_x = dest_x;
|
|
priv->const_buf_staging->left = dest_x;
|
|
priv->const_buf_staging->right = priv->dest_x + priv->dest_width;
|
|
}
|
|
break;
|
|
}
|
|
case PROP_DEST_Y:
|
|
{
|
|
auto dest_y = g_value_get_int (value);
|
|
if (priv->dest_y != dest_y) {
|
|
priv->update_const_buf = TRUE;
|
|
priv->dest_y = dest_y;
|
|
priv->const_buf_staging->top = dest_y;
|
|
priv->const_buf_staging->bottom = priv->dest_y + priv->dest_height;
|
|
}
|
|
break;
|
|
}
|
|
case PROP_DEST_WIDTH:
|
|
{
|
|
auto dest_width = g_value_get_int (value);
|
|
if (priv->dest_width != dest_width) {
|
|
priv->update_const_buf = TRUE;
|
|
priv->dest_width = dest_width;
|
|
priv->const_buf_staging->right = priv->dest_x + dest_width;
|
|
priv->const_buf_staging->view_width = dest_width;
|
|
}
|
|
break;
|
|
}
|
|
case PROP_DEST_HEIGHT:
|
|
{
|
|
auto dest_height = g_value_get_int (value);
|
|
if (priv->dest_height != dest_height) {
|
|
priv->update_const_buf = TRUE;
|
|
priv->dest_height = dest_height;
|
|
priv->const_buf_staging->bottom = priv->dest_y + dest_height;
|
|
priv->const_buf_staging->view_height = dest_height;
|
|
}
|
|
break;
|
|
}
|
|
case PROP_FILL_BORDER:
|
|
{
|
|
auto fill_border = g_value_get_boolean (value);
|
|
if (priv->fill_border != fill_border) {
|
|
priv->update_const_buf = TRUE;
|
|
priv->fill_border = fill_border;
|
|
priv->const_buf_staging->fill_border = fill_border;
|
|
}
|
|
break;
|
|
}
|
|
case PROP_VIDEO_DIRECTION:
|
|
{
|
|
auto video_direction =
|
|
(GstVideoOrientationMethod) g_value_get_enum (value);
|
|
if (priv->video_direction != video_direction) {
|
|
priv->update_const_buf = TRUE;
|
|
priv->video_direction = video_direction;
|
|
priv->const_buf_staging->video_direction = video_direction;
|
|
}
|
|
break;
|
|
}
|
|
case PROP_ALPHA:
|
|
{
|
|
auto alpha = g_value_get_double (value);
|
|
if (priv->alpha != alpha) {
|
|
priv->update_const_buf = TRUE;
|
|
priv->const_buf_staging->alpha = (float) alpha;
|
|
}
|
|
break;
|
|
}
|
|
case PROP_BLEND:
|
|
{
|
|
auto blend = g_value_get_boolean (value);
|
|
if (priv->blend != blend) {
|
|
priv->update_const_buf = TRUE;
|
|
priv->const_buf_staging->do_blend = blend;
|
|
}
|
|
break;
|
|
}
|
|
default:
|
|
G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
|
|
break;
|
|
}
|
|
}
|
|
|
|
static void
|
|
gst_cuda_converter_get_property (GObject * object, guint prop_id,
|
|
GValue * value, GParamSpec * pspec)
|
|
{
|
|
auto self = GST_CUDA_CONVERTER (object);
|
|
auto priv = self->priv;
|
|
|
|
std::lock_guard < std::mutex > lk (priv->lock);
|
|
switch (prop_id) {
|
|
case PROP_DEST_X:
|
|
g_value_set_int (value, priv->dest_x);
|
|
break;
|
|
case PROP_DEST_Y:
|
|
g_value_set_int (value, priv->dest_y);
|
|
break;
|
|
case PROP_DEST_WIDTH:
|
|
g_value_set_int (value, priv->dest_width);
|
|
break;
|
|
case PROP_DEST_HEIGHT:
|
|
g_value_set_int (value, priv->dest_height);
|
|
break;
|
|
case PROP_FILL_BORDER:
|
|
g_value_set_boolean (value, priv->fill_border);
|
|
break;
|
|
case PROP_VIDEO_DIRECTION:
|
|
g_value_set_enum (value, priv->video_direction);
|
|
break;
|
|
case PROP_ALPHA:
|
|
g_value_set_double (value, priv->alpha);
|
|
break;
|
|
case PROP_BLEND:
|
|
g_value_set_boolean (value, priv->blend);
|
|
break;
|
|
default:
|
|
G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
|
|
break;
|
|
}
|
|
}
|
|
|
|
static const gchar *
|
|
get_color_range_name (GstVideoColorRange range)
|
|
{
|
|
switch (range) {
|
|
case GST_VIDEO_COLOR_RANGE_0_255:
|
|
return "FULL";
|
|
case GST_VIDEO_COLOR_RANGE_16_235:
|
|
return "STUDIO";
|
|
default:
|
|
break;
|
|
}
|
|
|
|
return "UNKNOWN";
|
|
}
|
|
|
|
static gboolean
|
|
gst_cuda_converter_setup (GstCudaConverter * self)
|
|
{
|
|
GstCudaConverterPrivate *priv = self->priv;
|
|
const GstVideoInfo *in_info;
|
|
const GstVideoInfo *out_info;
|
|
const GstVideoInfo *texture_info;
|
|
GstCudaColorMatrix to_rgb_matrix;
|
|
GstCudaColorMatrix to_yuv_matrix;
|
|
GstCudaColorMatrix border_color_matrix;
|
|
gdouble border_color[4];
|
|
guint i, j;
|
|
const gchar *unpack_function = nullptr;
|
|
const gchar *write_func = nullptr;
|
|
const gchar *blend_func = nullptr;
|
|
const gchar *to_rgb_func = COLOR_SPACE_IDENTITY;
|
|
const gchar *to_yuv_func = COLOR_SPACE_IDENTITY;
|
|
const GstVideoColorimetry *in_color;
|
|
const GstVideoColorimetry *out_color;
|
|
gchar *str;
|
|
gchar *program = nullptr;
|
|
CUresult ret;
|
|
|
|
in_info = &priv->in_info;
|
|
out_info = &priv->out_info;
|
|
texture_info = &priv->texture_info;
|
|
in_color = &in_info->colorimetry;
|
|
out_color = &out_info->colorimetry;
|
|
|
|
memset (&to_rgb_matrix, 0, sizeof (GstCudaColorMatrix));
|
|
color_matrix_identity (&to_rgb_matrix);
|
|
|
|
memset (&to_yuv_matrix, 0, sizeof (GstCudaColorMatrix));
|
|
color_matrix_identity (&to_yuv_matrix);
|
|
|
|
switch (GST_VIDEO_INFO_FORMAT (out_info)) {
|
|
case GST_VIDEO_FORMAT_I420:
|
|
write_func = WRITE_I420;
|
|
blend_func = BLEND_I420;
|
|
break;
|
|
case GST_VIDEO_FORMAT_YV12:
|
|
write_func = WRITE_YV12;
|
|
blend_func = BLEND_YV12;
|
|
break;
|
|
case GST_VIDEO_FORMAT_NV12:
|
|
write_func = WRITE_NV12;
|
|
blend_func = BLEND_NV12;
|
|
break;
|
|
case GST_VIDEO_FORMAT_NV21:
|
|
write_func = WRITE_NV21;
|
|
blend_func = BLEND_NV21;
|
|
break;
|
|
case GST_VIDEO_FORMAT_P010_10LE:
|
|
case GST_VIDEO_FORMAT_P012_LE:
|
|
case GST_VIDEO_FORMAT_P016_LE:
|
|
write_func = WRITE_P010;
|
|
blend_func = BLEND_P010;
|
|
break;
|
|
case GST_VIDEO_FORMAT_I420_10LE:
|
|
write_func = WRITE_I420_10;
|
|
blend_func = BLEND_I420_10;
|
|
break;
|
|
case GST_VIDEO_FORMAT_I420_12LE:
|
|
write_func = WRITE_I420_12;
|
|
blend_func = BLEND_I420_12;
|
|
break;
|
|
case GST_VIDEO_FORMAT_Y444:
|
|
write_func = WRITE_Y444;
|
|
blend_func = BLEND_Y444;
|
|
break;
|
|
case GST_VIDEO_FORMAT_Y444_10LE:
|
|
write_func = WRITE_Y444_10;
|
|
blend_func = BLEND_Y444_10;
|
|
break;
|
|
case GST_VIDEO_FORMAT_Y444_12LE:
|
|
write_func = WRITE_Y444_12;
|
|
blend_func = BLEND_Y444_12;
|
|
break;
|
|
case GST_VIDEO_FORMAT_Y444_16LE:
|
|
write_func = WRITE_Y444_16;
|
|
blend_func = BLEND_Y444_16;
|
|
break;
|
|
case GST_VIDEO_FORMAT_RGBA:
|
|
write_func = WRITE_RGBA;
|
|
blend_func = BLEND_RGBA;
|
|
break;
|
|
case GST_VIDEO_FORMAT_RGBx:
|
|
write_func = WRITE_RGBx;
|
|
blend_func = BLEND_RGBx;
|
|
break;
|
|
case GST_VIDEO_FORMAT_BGRA:
|
|
write_func = WRITE_BGRA;
|
|
blend_func = BLEND_BGRA;
|
|
break;
|
|
case GST_VIDEO_FORMAT_BGRx:
|
|
write_func = WRITE_BGRx;
|
|
blend_func = BLEND_BGRx;
|
|
break;
|
|
case GST_VIDEO_FORMAT_ARGB:
|
|
write_func = WRITE_ARGB;
|
|
blend_func = BLEND_ARGB;
|
|
break;
|
|
case GST_VIDEO_FORMAT_ABGR:
|
|
write_func = WRITE_ABGR;
|
|
blend_func = BLEND_ABGR;
|
|
break;
|
|
case GST_VIDEO_FORMAT_RGB:
|
|
write_func = WRITE_RGB;
|
|
blend_func = BLEND_RGB;
|
|
break;
|
|
case GST_VIDEO_FORMAT_BGR:
|
|
write_func = WRITE_BGR;
|
|
blend_func = BLEND_BGR;
|
|
break;
|
|
case GST_VIDEO_FORMAT_RGB10A2_LE:
|
|
write_func = WRITE_RGB10A2;
|
|
blend_func = BLEND_RGB10A2;
|
|
break;
|
|
case GST_VIDEO_FORMAT_BGR10A2_LE:
|
|
write_func = WRITE_BGR10A2;
|
|
blend_func = BLEND_BGR10A2;
|
|
break;
|
|
case GST_VIDEO_FORMAT_Y42B:
|
|
write_func = WRITE_Y42B;
|
|
blend_func = BLEND_Y42B;
|
|
break;
|
|
case GST_VIDEO_FORMAT_I422_10LE:
|
|
write_func = WRITE_I422_10;
|
|
blend_func = BLEND_I422_10;
|
|
break;
|
|
case GST_VIDEO_FORMAT_I422_12LE:
|
|
write_func = WRITE_I422_12;
|
|
blend_func = BLEND_I422_12;
|
|
break;
|
|
case GST_VIDEO_FORMAT_RGBP:
|
|
write_func = WRITE_RGBP;
|
|
blend_func = BLEND_RGBP;
|
|
break;
|
|
case GST_VIDEO_FORMAT_BGRP:
|
|
write_func = WRITE_BGRP;
|
|
blend_func = BLEND_BGRP;
|
|
break;
|
|
case GST_VIDEO_FORMAT_GBR:
|
|
write_func = WRITE_GBR;
|
|
blend_func = BLEND_GBR;
|
|
break;
|
|
case GST_VIDEO_FORMAT_GBR_10LE:
|
|
write_func = WRITE_GBR_10;
|
|
blend_func = BLEND_GBR_10;
|
|
break;
|
|
case GST_VIDEO_FORMAT_GBR_12LE:
|
|
write_func = WRITE_GBR_12;
|
|
blend_func = BLEND_GBR_12;
|
|
break;
|
|
case GST_VIDEO_FORMAT_GBR_16LE:
|
|
write_func = WRITE_GBR_16;
|
|
blend_func = BLEND_GBR_16;
|
|
break;
|
|
case GST_VIDEO_FORMAT_GBRA:
|
|
write_func = WRITE_GBRA;
|
|
blend_func = BLEND_GBRA;
|
|
break;
|
|
case GST_VIDEO_FORMAT_VUYA:
|
|
write_func = WRITE_VUYA;
|
|
blend_func = BLEND_VUYA;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
if (!write_func) {
|
|
GST_ERROR_OBJECT (self, "Unknown write function for format %s",
|
|
gst_video_format_to_string (GST_VIDEO_INFO_FORMAT (out_info)));
|
|
return FALSE;
|
|
}
|
|
|
|
/* Decide texture info to use, 3 channel RGB or 10bits packed RGB
|
|
* need be converted to other format */
|
|
priv->texture_info = priv->in_info;
|
|
switch (GST_VIDEO_INFO_FORMAT (in_info)) {
|
|
case GST_VIDEO_FORMAT_RGB:
|
|
gst_video_info_set_format (&priv->texture_info,
|
|
GST_VIDEO_FORMAT_RGBx, GST_VIDEO_INFO_WIDTH (in_info),
|
|
GST_VIDEO_INFO_HEIGHT (in_info));
|
|
unpack_function = RGB_TO_RGBx;
|
|
break;
|
|
case GST_VIDEO_FORMAT_BGR:
|
|
gst_video_info_set_format (&priv->texture_info,
|
|
GST_VIDEO_FORMAT_BGRx, GST_VIDEO_INFO_WIDTH (in_info),
|
|
GST_VIDEO_INFO_HEIGHT (in_info));
|
|
unpack_function = RGB_TO_RGBx;
|
|
break;
|
|
case GST_VIDEO_FORMAT_RGB10A2_LE:
|
|
gst_video_info_set_format (&priv->texture_info,
|
|
GST_VIDEO_FORMAT_ARGB64, GST_VIDEO_INFO_WIDTH (in_info),
|
|
GST_VIDEO_INFO_HEIGHT (in_info));
|
|
unpack_function = RGB10A2_TO_ARGB64;
|
|
break;
|
|
case GST_VIDEO_FORMAT_BGR10A2_LE:
|
|
gst_video_info_set_format (&priv->texture_info,
|
|
GST_VIDEO_FORMAT_ARGB64, GST_VIDEO_INFO_WIDTH (in_info),
|
|
GST_VIDEO_INFO_HEIGHT (in_info));
|
|
unpack_function = BGR10A2_TO_ARGB64;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
for (i = 0; i < G_N_ELEMENTS (format_map); i++) {
|
|
if (format_map[i].format == GST_VIDEO_INFO_FORMAT (texture_info)) {
|
|
priv->texture_fmt = &format_map[i];
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (!priv->texture_fmt) {
|
|
GST_ERROR_OBJECT (self, "Couldn't find texture format for %s (%s)",
|
|
gst_video_format_to_string (GST_VIDEO_INFO_FORMAT (in_info)),
|
|
gst_video_format_to_string (GST_VIDEO_INFO_FORMAT (texture_info)));
|
|
return FALSE;
|
|
}
|
|
|
|
/* calculate black color
|
|
* TODO: add support border color */
|
|
if (GST_VIDEO_INFO_IS_RGB (out_info)) {
|
|
GstVideoInfo rgb_info = *out_info;
|
|
rgb_info.colorimetry.range = GST_VIDEO_COLOR_RANGE_0_255;
|
|
gst_cuda_color_range_adjust_matrix_unorm (&rgb_info, out_info,
|
|
&border_color_matrix);
|
|
} else {
|
|
GstVideoInfo rgb_info;
|
|
|
|
gst_video_info_set_format (&rgb_info, GST_VIDEO_FORMAT_RGBA64_LE,
|
|
out_info->width, out_info->height);
|
|
|
|
gst_cuda_rgb_to_yuv_matrix_unorm (&rgb_info,
|
|
out_info, &border_color_matrix);
|
|
}
|
|
|
|
for (i = 0; i < 3; i++) {
|
|
/* TODO: property */
|
|
gdouble border_rgba[4] = { 0, 0, 0 };
|
|
border_color[i] = 0;
|
|
for (j = 0; j < 3; j++)
|
|
border_color[i] += border_color_matrix.matrix[i][j] * border_rgba[i];
|
|
border_color[i] = border_color_matrix.offset[i];
|
|
border_color[i] = CLAMP (border_color[i],
|
|
border_color_matrix.min[i], border_color_matrix.max[i]);
|
|
}
|
|
|
|
/* FIXME: handle primaries and transfer functions */
|
|
if (GST_VIDEO_INFO_IS_RGB (texture_info)) {
|
|
if (GST_VIDEO_INFO_IS_RGB (out_info)) {
|
|
/* RGB -> RGB */
|
|
if (in_color->range == out_color->range) {
|
|
GST_DEBUG_OBJECT (self, "RGB -> RGB conversion without matrix");
|
|
} else {
|
|
if (!gst_cuda_color_range_adjust_matrix_unorm (in_info, out_info,
|
|
&to_rgb_matrix)) {
|
|
GST_ERROR_OBJECT (self, "Failed to get RGB range adjust matrix");
|
|
return FALSE;
|
|
}
|
|
|
|
str = gst_cuda_dump_color_matrix (&to_rgb_matrix);
|
|
GST_DEBUG_OBJECT (self, "RGB range adjust %s -> %s\n%s",
|
|
get_color_range_name (in_color->range),
|
|
get_color_range_name (out_color->range), str);
|
|
g_free (str);
|
|
|
|
to_rgb_func = COLOR_SPACE_CONVERT;
|
|
}
|
|
} else {
|
|
/* RGB -> YUV */
|
|
if (!gst_cuda_rgb_to_yuv_matrix_unorm (in_info, out_info, &to_yuv_matrix)) {
|
|
GST_ERROR_OBJECT (self, "Failed to get RGB -> YUV transform matrix");
|
|
return FALSE;
|
|
}
|
|
|
|
str = gst_cuda_dump_color_matrix (&to_yuv_matrix);
|
|
GST_DEBUG_OBJECT (self, "RGB -> YUV matrix:\n%s", str);
|
|
g_free (str);
|
|
|
|
to_yuv_func = COLOR_SPACE_CONVERT;
|
|
}
|
|
} else {
|
|
if (GST_VIDEO_INFO_IS_RGB (out_info)) {
|
|
/* YUV -> RGB */
|
|
if (!gst_cuda_yuv_to_rgb_matrix_unorm (in_info, out_info, &to_rgb_matrix)) {
|
|
GST_ERROR_OBJECT (self, "Failed to get YUV -> RGB transform matrix");
|
|
return FALSE;
|
|
}
|
|
|
|
str = gst_cuda_dump_color_matrix (&to_rgb_matrix);
|
|
GST_DEBUG_OBJECT (self, "YUV -> RGB matrix:\n%s", str);
|
|
g_free (str);
|
|
|
|
to_rgb_func = COLOR_SPACE_CONVERT;
|
|
} else {
|
|
/* YUV -> YUV */
|
|
if (in_color->range == out_color->range) {
|
|
GST_DEBUG_OBJECT (self, "YUV -> YU conversion without matrix");
|
|
} else {
|
|
if (!gst_cuda_color_range_adjust_matrix_unorm (in_info, out_info,
|
|
&to_yuv_matrix)) {
|
|
GST_ERROR_OBJECT (self, "Failed to get GRAY range adjust matrix");
|
|
return FALSE;
|
|
}
|
|
|
|
str = gst_cuda_dump_color_matrix (&to_yuv_matrix);
|
|
GST_DEBUG_OBJECT (self, "YUV range adjust matrix:\n%s", str);
|
|
g_free (str);
|
|
|
|
to_yuv_func = COLOR_SPACE_CONVERT;
|
|
}
|
|
}
|
|
}
|
|
|
|
for (i = 0; i < 3; i++) {
|
|
priv->const_buf_staging->toRGBCoeff.coeffX[i] = to_rgb_matrix.matrix[0][i];
|
|
priv->const_buf_staging->toRGBCoeff.coeffY[i] = to_rgb_matrix.matrix[1][i];
|
|
priv->const_buf_staging->toRGBCoeff.coeffZ[i] = to_rgb_matrix.matrix[2][i];
|
|
priv->const_buf_staging->toRGBCoeff.offset[i] = to_rgb_matrix.offset[i];
|
|
priv->const_buf_staging->toRGBCoeff.min[i] = to_rgb_matrix.min[i];
|
|
priv->const_buf_staging->toRGBCoeff.max[i] = to_rgb_matrix.max[i];
|
|
|
|
priv->const_buf_staging->toYuvCoeff.coeffX[i] = to_yuv_matrix.matrix[0][i];
|
|
priv->const_buf_staging->toYuvCoeff.coeffY[i] = to_yuv_matrix.matrix[1][i];
|
|
priv->const_buf_staging->toYuvCoeff.coeffZ[i] = to_yuv_matrix.matrix[2][i];
|
|
priv->const_buf_staging->toYuvCoeff.offset[i] = to_yuv_matrix.offset[i];
|
|
priv->const_buf_staging->toYuvCoeff.min[i] = to_yuv_matrix.min[i];
|
|
priv->const_buf_staging->toYuvCoeff.max[i] = to_yuv_matrix.max[i];
|
|
}
|
|
|
|
priv->const_buf_staging->width = out_info->width;
|
|
priv->const_buf_staging->height = out_info->height;
|
|
priv->const_buf_staging->left = 0;
|
|
priv->const_buf_staging->top = 0;
|
|
priv->const_buf_staging->right = out_info->width;
|
|
priv->const_buf_staging->bottom = out_info->height;
|
|
priv->const_buf_staging->view_width = out_info->width;
|
|
priv->const_buf_staging->view_height = out_info->height;
|
|
priv->const_buf_staging->border_x = border_color[0];
|
|
priv->const_buf_staging->border_y = border_color[1];
|
|
priv->const_buf_staging->border_z = border_color[2];
|
|
priv->const_buf_staging->border_w = border_color[3];
|
|
priv->const_buf_staging->fill_border = 0;
|
|
priv->const_buf_staging->video_direction = 0;
|
|
priv->const_buf_staging->alpha = 1;
|
|
priv->const_buf_staging->do_blend = 0;
|
|
|
|
str = g_strdup_printf (TEMPLATE_KERNEL, KERNEL_COMMON,
|
|
unpack_function ? unpack_function : "",
|
|
/* sampler function name */
|
|
priv->texture_fmt->sample_func,
|
|
/* TO RGB conversion function name */
|
|
to_rgb_func,
|
|
/* TO YUV conversion function name */
|
|
to_yuv_func,
|
|
/* write function name */
|
|
write_func,
|
|
/* blend function name */
|
|
blend_func);
|
|
|
|
GST_LOG_OBJECT (self, "kernel code:\n%s\n", str);
|
|
gint cuda_device;
|
|
g_object_get (self->context, "cuda-device-id", &cuda_device, nullptr);
|
|
program = gst_cuda_nvrtc_compile_cubin (str, cuda_device);
|
|
if (!program) {
|
|
GST_WARNING_OBJECT (self, "Couldn't compile to cubin, trying ptx");
|
|
program = gst_cuda_nvrtc_compile (str);
|
|
}
|
|
g_free (str);
|
|
|
|
if (!program) {
|
|
GST_ERROR_OBJECT (self, "Could not compile code");
|
|
return FALSE;
|
|
}
|
|
|
|
if (!gst_cuda_context_push (self->context)) {
|
|
GST_ERROR_OBJECT (self, "Couldn't push context");
|
|
g_free (program);
|
|
return FALSE;
|
|
}
|
|
|
|
/* Allocates intermediate memory for texture */
|
|
if (unpack_function) {
|
|
CUDA_TEXTURE_DESC texture_desc;
|
|
CUDA_RESOURCE_DESC resource_desc;
|
|
CUtexObject texture = 0;
|
|
|
|
memset (&texture_desc, 0, sizeof (CUDA_TEXTURE_DESC));
|
|
memset (&resource_desc, 0, sizeof (CUDA_RESOURCE_DESC));
|
|
|
|
ret = CuMemAllocPitch (&priv->unpack_buffer.ptr,
|
|
&priv->unpack_buffer.stride,
|
|
GST_VIDEO_INFO_COMP_WIDTH (texture_info, 0) *
|
|
GST_VIDEO_INFO_COMP_PSTRIDE (texture_info, 0),
|
|
GST_VIDEO_INFO_HEIGHT (texture_info), 16);
|
|
if (!gst_cuda_result (ret)) {
|
|
GST_ERROR_OBJECT (self, "Couldn't allocate unpack buffer");
|
|
goto error;
|
|
}
|
|
|
|
resource_desc.resType = CU_RESOURCE_TYPE_PITCH2D;
|
|
resource_desc.res.pitch2D.format = priv->texture_fmt->array_format[0];
|
|
resource_desc.res.pitch2D.numChannels = 4;
|
|
resource_desc.res.pitch2D.width = in_info->width;
|
|
resource_desc.res.pitch2D.height = in_info->height;
|
|
resource_desc.res.pitch2D.pitchInBytes = priv->unpack_buffer.stride;
|
|
resource_desc.res.pitch2D.devPtr = priv->unpack_buffer.ptr;
|
|
|
|
texture_desc.filterMode = priv->filter_mode;
|
|
texture_desc.flags = 0x2;
|
|
texture_desc.addressMode[0] = (CUaddress_mode) 1;
|
|
texture_desc.addressMode[1] = (CUaddress_mode) 1;
|
|
texture_desc.addressMode[2] = (CUaddress_mode) 1;
|
|
|
|
ret = CuTexObjectCreate (&texture, &resource_desc, &texture_desc, nullptr);
|
|
if (!gst_cuda_result (ret)) {
|
|
GST_ERROR_OBJECT (self, "Couldn't create unpack texture");
|
|
goto error;
|
|
}
|
|
|
|
priv->unpack_buffer.texture = texture;
|
|
}
|
|
|
|
ret = CuModuleLoadData (&priv->module, program);
|
|
g_clear_pointer (&program, g_free);
|
|
if (!gst_cuda_result (ret)) {
|
|
GST_ERROR_OBJECT (self, "Could not load module");
|
|
priv->module = nullptr;
|
|
goto error;
|
|
}
|
|
|
|
ret = CuModuleGetFunction (&priv->main_func,
|
|
priv->module, GST_CUDA_KERNEL_MAIN_FUNC);
|
|
if (!gst_cuda_result (ret)) {
|
|
GST_ERROR_OBJECT (self, "Could not get main function");
|
|
goto error;
|
|
}
|
|
|
|
if (unpack_function) {
|
|
ret = CuModuleGetFunction (&priv->unpack_func,
|
|
priv->module, GST_CUDA_KERNEL_UNPACK_FUNC);
|
|
if (!gst_cuda_result (ret)) {
|
|
GST_ERROR_OBJECT (self, "Could not get unpack function");
|
|
goto error;
|
|
}
|
|
}
|
|
|
|
ret = CuMemcpyHtoD (priv->const_buf,
|
|
priv->const_buf_staging, sizeof (ConstBuffer));
|
|
if (!gst_cuda_result (ret)) {
|
|
GST_ERROR_OBJECT (self, "Could upload const buf");
|
|
goto error;
|
|
}
|
|
|
|
gst_cuda_context_pop (nullptr);
|
|
|
|
return TRUE;
|
|
|
|
error:
|
|
gst_cuda_context_pop (nullptr);
|
|
g_free (program);
|
|
|
|
return FALSE;
|
|
}
|
|
|
|
static gboolean
|
|
copy_config (const GstIdStr * fieldname, const GValue * value,
|
|
gpointer user_data)
|
|
{
|
|
GstCudaConverter *self = (GstCudaConverter *) user_data;
|
|
|
|
gst_structure_id_str_set_value (self->priv->config, fieldname, value);
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
static void
|
|
gst_cuda_converter_set_config (GstCudaConverter * self, GstStructure * config)
|
|
{
|
|
gst_structure_foreach_id_str (config, copy_config, self);
|
|
gst_structure_free (config);
|
|
}
|
|
|
|
GstCudaConverter *
|
|
gst_cuda_converter_new (const GstVideoInfo * in_info,
|
|
const GstVideoInfo * out_info, GstCudaContext * context,
|
|
GstStructure * config)
|
|
{
|
|
GstCudaConverter *self;
|
|
GstCudaConverterPrivate *priv;
|
|
CUresult cuda_ret;
|
|
|
|
g_return_val_if_fail (in_info != nullptr, nullptr);
|
|
g_return_val_if_fail (out_info != nullptr, nullptr);
|
|
g_return_val_if_fail (GST_IS_CUDA_CONTEXT (context), nullptr);
|
|
|
|
self = (GstCudaConverter *) g_object_new (GST_TYPE_CUDA_CONVERTER, nullptr);
|
|
|
|
if (!GST_IS_CUDA_CONTEXT (context)) {
|
|
GST_WARNING_OBJECT (self, "Not a valid cuda context object");
|
|
goto error;
|
|
}
|
|
|
|
self->context = (GstCudaContext *) gst_object_ref (context);
|
|
priv = self->priv;
|
|
priv->in_info = *in_info;
|
|
priv->out_info = *out_info;
|
|
priv->dest_width = out_info->width;
|
|
priv->dest_height = out_info->height;
|
|
|
|
if (config)
|
|
gst_cuda_converter_set_config (self, config);
|
|
|
|
if (!gst_cuda_context_push (context)) {
|
|
GST_ERROR_OBJECT (self, "Couldn't push context");
|
|
goto error;
|
|
}
|
|
|
|
cuda_ret = CuMemAllocHost ((void **) &priv->const_buf_staging,
|
|
sizeof (ConstBuffer));
|
|
if (!gst_cuda_result (cuda_ret)) {
|
|
GST_ERROR_OBJECT (self, "Couldn't allocate staging const buf");
|
|
gst_cuda_context_pop (nullptr);
|
|
goto error;
|
|
}
|
|
|
|
cuda_ret = CuMemAlloc (&priv->const_buf, sizeof (ConstBuffer));
|
|
gst_cuda_context_pop (nullptr);
|
|
if (!gst_cuda_result (cuda_ret)) {
|
|
GST_ERROR_OBJECT (self, "Couldn't allocate const buf");
|
|
goto error;
|
|
}
|
|
|
|
if (!gst_cuda_converter_setup (self))
|
|
goto error;
|
|
|
|
priv->texture_align = gst_cuda_context_get_texture_alignment (context);
|
|
|
|
gst_object_ref_sink (self);
|
|
return self;
|
|
|
|
error:
|
|
gst_object_unref (self);
|
|
return nullptr;
|
|
}
|
|
|
|
static CUtexObject
|
|
gst_cuda_converter_create_texture_unchecked (GstCudaConverter * self,
|
|
CUdeviceptr src, gint width, gint height, CUarray_format format,
|
|
guint channels, gint stride, gint plane, CUfilter_mode mode)
|
|
{
|
|
CUDA_TEXTURE_DESC texture_desc;
|
|
CUDA_RESOURCE_DESC resource_desc;
|
|
CUtexObject texture = 0;
|
|
CUresult cuda_ret;
|
|
|
|
memset (&texture_desc, 0, sizeof (CUDA_TEXTURE_DESC));
|
|
memset (&resource_desc, 0, sizeof (CUDA_RESOURCE_DESC));
|
|
|
|
resource_desc.resType = CU_RESOURCE_TYPE_PITCH2D;
|
|
resource_desc.res.pitch2D.format = format;
|
|
resource_desc.res.pitch2D.numChannels = channels;
|
|
resource_desc.res.pitch2D.width = width;
|
|
resource_desc.res.pitch2D.height = height;
|
|
resource_desc.res.pitch2D.pitchInBytes = stride;
|
|
resource_desc.res.pitch2D.devPtr = src;
|
|
|
|
texture_desc.filterMode = mode;
|
|
/* Will read texture value as a normalized [0, 1] float value
|
|
* with [0, 1) coordinates */
|
|
/* CU_TRSF_NORMALIZED_COORDINATES */
|
|
texture_desc.flags = 0x2;
|
|
/* CU_TR_ADDRESS_MODE_CLAMP */
|
|
texture_desc.addressMode[0] = (CUaddress_mode) 1;
|
|
texture_desc.addressMode[1] = (CUaddress_mode) 1;
|
|
texture_desc.addressMode[2] = (CUaddress_mode) 1;
|
|
|
|
cuda_ret =
|
|
CuTexObjectCreate (&texture, &resource_desc, &texture_desc, nullptr);
|
|
|
|
if (!gst_cuda_result (cuda_ret)) {
|
|
GST_ERROR_OBJECT (self, "Could not create texture");
|
|
return 0;
|
|
}
|
|
|
|
return texture;
|
|
}
|
|
|
|
static gboolean
|
|
ensure_fallback_buffer (GstCudaConverter * self, gint width_in_bytes,
|
|
gint height, guint plane)
|
|
{
|
|
GstCudaConverterPrivate *priv = self->priv;
|
|
CUresult ret;
|
|
|
|
if (priv->fallback_buffer[plane].ptr)
|
|
return TRUE;
|
|
|
|
ret = CuMemAllocPitch (&priv->fallback_buffer[plane].ptr,
|
|
&priv->fallback_buffer[plane].stride, width_in_bytes, height, 16);
|
|
|
|
if (!gst_cuda_result (ret)) {
|
|
GST_ERROR_OBJECT (self, "Couldn't allocate fallback buffer");
|
|
return FALSE;
|
|
}
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
static CUtexObject
|
|
gst_cuda_converter_create_texture (GstCudaConverter * self,
|
|
CUdeviceptr src, gint width, gint height, gint stride, CUfilter_mode mode,
|
|
CUarray_format format, guint channles, gint plane, CUstream stream)
|
|
{
|
|
GstCudaConverterPrivate *priv = self->priv;
|
|
CUresult ret;
|
|
CUdeviceptr src_ptr;
|
|
CUDA_MEMCPY2D params = { 0, };
|
|
|
|
if (!ensure_fallback_buffer (self, stride, height, plane))
|
|
return 0;
|
|
|
|
params.srcMemoryType = CU_MEMORYTYPE_DEVICE;
|
|
params.srcPitch = stride;
|
|
params.srcDevice = (CUdeviceptr) src;
|
|
|
|
params.dstMemoryType = CU_MEMORYTYPE_DEVICE;
|
|
params.dstPitch = priv->fallback_buffer[plane].stride;
|
|
params.dstDevice = priv->fallback_buffer[plane].ptr;
|
|
params.WidthInBytes = GST_VIDEO_INFO_COMP_WIDTH (&priv->in_info, plane)
|
|
* GST_VIDEO_INFO_COMP_PSTRIDE (&priv->in_info, plane),
|
|
params.Height = GST_VIDEO_INFO_COMP_HEIGHT (&priv->in_info, plane);
|
|
|
|
ret = CuMemcpy2DAsync (¶ms, stream);
|
|
if (!gst_cuda_result (ret)) {
|
|
GST_ERROR_OBJECT (self, "Couldn't copy to fallback buffer");
|
|
return 0;
|
|
}
|
|
|
|
if (!priv->fallback_buffer[plane].texture) {
|
|
src_ptr = priv->fallback_buffer[plane].ptr;
|
|
stride = priv->fallback_buffer[plane].stride;
|
|
|
|
priv->fallback_buffer[plane].texture =
|
|
gst_cuda_converter_create_texture_unchecked (self, src_ptr, width,
|
|
height, format, channles, stride, plane, mode);
|
|
}
|
|
|
|
return priv->fallback_buffer[plane].texture;
|
|
}
|
|
|
|
static gboolean
|
|
gst_cuda_converter_unpack_rgb (GstCudaConverter * self,
|
|
GstVideoFrame * src_frame, CUstream stream)
|
|
{
|
|
GstCudaConverterPrivate *priv = self->priv;
|
|
CUdeviceptr src;
|
|
gint width, height, src_stride, dst_stride;
|
|
CUresult ret;
|
|
gpointer args[] = { &src, &priv->unpack_buffer.ptr,
|
|
&width, &height, &src_stride, &dst_stride
|
|
};
|
|
|
|
g_assert (priv->unpack_buffer.ptr);
|
|
g_assert (priv->unpack_buffer.stride > 0);
|
|
|
|
src = (CUdeviceptr) GST_VIDEO_FRAME_PLANE_DATA (src_frame, 0);
|
|
width = GST_VIDEO_FRAME_WIDTH (src_frame);
|
|
height = GST_VIDEO_FRAME_HEIGHT (src_frame);
|
|
src_stride = GST_VIDEO_FRAME_PLANE_STRIDE (src_frame, 0);
|
|
dst_stride = (gint) priv->unpack_buffer.stride;
|
|
|
|
ret = CuLaunchKernel (priv->unpack_func, DIV_UP (width, CUDA_BLOCK_X),
|
|
DIV_UP (height, CUDA_BLOCK_Y), 1, CUDA_BLOCK_X, CUDA_BLOCK_Y, 1, 0,
|
|
stream, args, nullptr);
|
|
|
|
if (!gst_cuda_result (ret)) {
|
|
GST_ERROR_OBJECT (self, "Couldn't unpack source RGB");
|
|
return FALSE;
|
|
}
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
gboolean
|
|
gst_cuda_converter_convert_frame (GstCudaConverter * converter,
|
|
GstVideoFrame * src_frame, GstVideoFrame * dst_frame, CUstream stream,
|
|
gboolean * synchronized)
|
|
{
|
|
GstCudaConverterPrivate *priv;
|
|
const TextureFormat *format;
|
|
CUtexObject texture[GST_VIDEO_MAX_COMPONENTS] = { 0, };
|
|
guint8 *dst[GST_VIDEO_MAX_COMPONENTS] = { nullptr, };
|
|
gint stride[2] = { 0, };
|
|
guint i;
|
|
gboolean ret = FALSE;
|
|
CUresult cuda_ret;
|
|
gint width, height;
|
|
gboolean need_sync = FALSE;
|
|
GstCudaMemory *cmem;
|
|
gint off_x = 0;
|
|
gint off_y = 0;
|
|
|
|
g_return_val_if_fail (GST_IS_CUDA_CONVERTER (converter), FALSE);
|
|
g_return_val_if_fail (src_frame != nullptr, FALSE);
|
|
g_return_val_if_fail (dst_frame != nullptr, FALSE);
|
|
|
|
priv = converter->priv;
|
|
format = priv->texture_fmt;
|
|
|
|
g_assert (format);
|
|
|
|
std::lock_guard < std::mutex > lk (priv->lock);
|
|
if (!priv->fill_border && (priv->dest_width <= 0 || priv->dest_height <= 0))
|
|
return TRUE;
|
|
|
|
if (priv->update_const_buf) {
|
|
priv->update_const_buf = FALSE;
|
|
cuda_ret = CuMemcpyHtoDAsync (priv->const_buf, priv->const_buf_staging,
|
|
sizeof (ConstBuffer), stream);
|
|
|
|
if (!gst_cuda_result (cuda_ret)) {
|
|
GST_ERROR_OBJECT (converter, "Couldn't upload const buffer");
|
|
return FALSE;
|
|
}
|
|
}
|
|
|
|
gpointer args[] = { &texture[0], &texture[1], &texture[2], &texture[3],
|
|
&dst[0], &dst[1], &dst[2], &dst[3], &stride[0], &stride[1],
|
|
&priv->const_buf, &off_x, &off_y
|
|
};
|
|
|
|
cmem = (GstCudaMemory *) gst_buffer_peek_memory (src_frame->buffer, 0);
|
|
g_return_val_if_fail (gst_is_cuda_memory (GST_MEMORY_CAST (cmem)), FALSE);
|
|
|
|
if (!gst_cuda_context_push (converter->context)) {
|
|
GST_ERROR_OBJECT (converter, "Couldn't push context");
|
|
return FALSE;
|
|
}
|
|
|
|
if (priv->unpack_func) {
|
|
if (!gst_cuda_converter_unpack_rgb (converter, src_frame, stream))
|
|
goto out;
|
|
|
|
texture[0] = priv->unpack_buffer.texture;
|
|
if (!texture[0]) {
|
|
GST_ERROR_OBJECT (converter, "Unpack texture is unavailable");
|
|
goto out;
|
|
}
|
|
} else {
|
|
for (i = 0; i < GST_VIDEO_FRAME_N_PLANES (src_frame); i++) {
|
|
if (!gst_cuda_memory_get_texture (cmem,
|
|
i, priv->filter_mode, &texture[i])) {
|
|
CUdeviceptr src;
|
|
src = (CUdeviceptr) GST_VIDEO_FRAME_PLANE_DATA (src_frame, i);
|
|
texture[i] = gst_cuda_converter_create_texture (converter,
|
|
src, GST_VIDEO_FRAME_COMP_WIDTH (src_frame, i),
|
|
GST_VIDEO_FRAME_COMP_HEIGHT (src_frame, i),
|
|
GST_VIDEO_FRAME_PLANE_STRIDE (src_frame, i),
|
|
priv->filter_mode, format->array_format[i], format->channels[i],
|
|
i, stream);
|
|
need_sync = TRUE;
|
|
}
|
|
|
|
if (!texture[i]) {
|
|
GST_ERROR_OBJECT (converter, "Couldn't create texture %d", i);
|
|
goto out;
|
|
}
|
|
}
|
|
}
|
|
|
|
width = GST_VIDEO_FRAME_WIDTH (dst_frame);
|
|
height = GST_VIDEO_FRAME_HEIGHT (dst_frame);
|
|
|
|
if (!priv->fill_border) {
|
|
if (priv->dest_width < width) {
|
|
off_x = priv->dest_x;
|
|
width = priv->dest_width;
|
|
}
|
|
|
|
if (priv->dest_height < height) {
|
|
off_y = priv->dest_y;
|
|
height = priv->dest_height;
|
|
}
|
|
}
|
|
|
|
for (i = 0; i < GST_VIDEO_FRAME_N_PLANES (dst_frame); i++)
|
|
dst[i] = (guint8 *) GST_VIDEO_FRAME_PLANE_DATA (dst_frame, i);
|
|
|
|
stride[0] = stride[1] = GST_VIDEO_FRAME_PLANE_STRIDE (dst_frame, 0);
|
|
if (GST_VIDEO_FRAME_N_PLANES (dst_frame) > 1)
|
|
stride[1] = GST_VIDEO_FRAME_PLANE_STRIDE (dst_frame, 1);
|
|
|
|
cuda_ret = CuLaunchKernel (priv->main_func, DIV_UP (width, CUDA_BLOCK_X),
|
|
DIV_UP (height, CUDA_BLOCK_Y), 1, CUDA_BLOCK_X, CUDA_BLOCK_Y, 1, 0,
|
|
stream, args, nullptr);
|
|
|
|
if (!gst_cuda_result (cuda_ret)) {
|
|
GST_ERROR_OBJECT (converter, "Couldn't convert frame");
|
|
goto out;
|
|
}
|
|
|
|
if (need_sync)
|
|
CuStreamSynchronize (stream);
|
|
|
|
if (synchronized)
|
|
*synchronized = need_sync;
|
|
|
|
ret = TRUE;
|
|
|
|
out:
|
|
gst_cuda_context_pop (nullptr);
|
|
return ret;
|
|
}
|