compositor: Add support for all formats

For formats which we don't have fast-path implementation, compositor
will convert it to common unpack formats (AYUV, ARGB, AYUV64 and ARGB64)
then blending will happen using the intermediate formats.
Finally blended image will be converted back to the selected output format
if required.

Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/1486>
This commit is contained in:
Seungha Yang 2021-11-24 20:21:52 +09:00 committed by GStreamer Marge Bot
parent 22a4543c5f
commit 56d47f641f
4 changed files with 873 additions and 29 deletions

View file

@ -696,6 +696,693 @@ PLANAR_YUV_HIGH_FILL_COLOR (y444_16le, LE, compositor_orc_memset_u16_2d);
PLANAR_YUV_HIGH_FILL_CHECKER (y444_16be, 16, BE, compositor_orc_memset_u16_2d);
PLANAR_YUV_HIGH_FILL_COLOR (y444_16be, BE, compositor_orc_memset_u16_2d);
/* TODO: port to ORC */
#if G_BYTE_ORDER == G_LITTLE_ENDIAN
static void
compositor_blend_argb64 (guint8 * ORC_RESTRICT d1, int d1_stride,
const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
{
gint i, j, k;
const guint64 comp_mask_0 = 0xffff000000000000;
const guint64 comp_mask_1 = 0x0000ffff00000000;
const guint64 comp_mask_2 = 0x00000000ffff0000;
const guint64 comp_mask_alpha = 0x000000000000ffff;
for (j = 0; j < m; j++) {
guint64 *dst;
guint64 *src;
dst = (guint64 *) (d1 + (d1_stride * j));
src = (guint64 *) (s1 + (s1_stride * j));
for (i = 0; i < n; i++) {
guint64 src_val = src[i];
guint64 dst_val = dst[i];
guint64 src_comp[3];
guint64 dst_comp[3];
guint64 src_alpha;
guint64 src_alpha_inv;
src_comp[0] = (src_val & comp_mask_0) >> 48;
src_comp[1] = (src_val & comp_mask_1) >> 32;
src_comp[2] = (src_val & comp_mask_2) >> 16;
dst_comp[0] = (dst_val & comp_mask_0) >> 48;
dst_comp[1] = (dst_val & comp_mask_1) >> 32;
dst_comp[2] = (dst_val & comp_mask_2) >> 16;
src_alpha = src_val & comp_mask_alpha;
src_alpha *= p1;
src_alpha /= G_MAXUINT16;
src_alpha = CLAMP (src_alpha, 0, G_MAXUINT16);
src_alpha_inv = G_MAXUINT16 - src_alpha;
for (k = 0; k < G_N_ELEMENTS (src_comp); k++) {
src_comp[k] *= src_alpha;
dst_comp[k] *= src_alpha_inv;
dst_comp[k] += src_comp[k];
dst_comp[k] /= G_MAXUINT16;
dst_comp[k] = CLAMP (dst_comp[k], 0, G_MAXUINT16);
}
dst_val = (dst_comp[0] << 48) | (dst_comp[1] << 32) | (dst_comp[2] << 16)
| comp_mask_alpha;
dst[i] = dst_val;
}
}
}
static void
compositor_source_argb64 (guint8 * ORC_RESTRICT d1, int d1_stride,
const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
{
gint i, j;
const guint64 comp_mask_non_alpha = 0xffffffffffff0000;
const guint64 comp_mask_alpha = 0x000000000000ffff;
for (j = 0; j < m; j++) {
guint64 *dst;
guint64 *src;
dst = (guint64 *) (d1 + (d1_stride * j));
src = (guint64 *) (s1 + (s1_stride * j));
for (i = 0; i < n; i++) {
guint64 src_val = src[i];
guint64 dst_val;
guint64 src_alpha;
src_alpha = src_val & comp_mask_alpha;
src_alpha *= p1;
src_alpha /= G_MAXUINT16;
src_alpha = CLAMP (src_alpha, 0, G_MAXUINT16);
dst_val = (src_val & comp_mask_non_alpha) | src_alpha;
dst[i] = dst_val;
}
}
}
static void
compositor_overlay_argb64 (guint8 * ORC_RESTRICT d1, int d1_stride,
const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
{
gint i, j, k;
const guint64 comp_mask_0 = 0xffff000000000000;
const guint64 comp_mask_1 = 0x0000ffff00000000;
const guint64 comp_mask_2 = 0x00000000ffff0000;
const guint64 comp_mask_alpha = 0x000000000000ffff;
for (j = 0; j < m; j++) {
guint64 *dst;
guint64 *src;
dst = (guint64 *) (d1 + (d1_stride * j));
src = (guint64 *) (s1 + (s1_stride * j));
for (i = 0; i < n; i++) {
guint64 src_val = src[i];
guint64 dst_val = dst[i];
guint64 src_comp[3];
guint64 dst_comp[3];
guint64 src_alpha;
guint64 src_alpha_inv;
guint64 dst_alpha;
src_comp[0] = (src_val & comp_mask_0) >> 48;
src_comp[1] = (src_val & comp_mask_1) >> 32;
src_comp[2] = (src_val & comp_mask_2) >> 16;
dst_comp[0] = (dst_val & comp_mask_0) >> 48;
dst_comp[1] = (dst_val & comp_mask_1) >> 32;
dst_comp[2] = (dst_val & comp_mask_2) >> 16;
/* calc source alpha as alpha_s = alpha_s * alpha / 255 */
src_alpha = src_val & comp_mask_alpha;
src_alpha *= p1;
src_alpha /= G_MAXUINT16;
src_alpha = CLAMP (src_alpha, 0, G_MAXUINT16);
src_alpha_inv = G_MAXUINT16 - src_alpha;
for (k = 0; k < G_N_ELEMENTS (src_comp); k++)
src_comp[k] *= src_alpha;
/* calc destination alpha as alpha_d = (1.0 - alpha_s) * alpha_d / 1.0 */
dst_alpha = dst_val & comp_mask_alpha;
dst_alpha *= src_alpha_inv;
dst_alpha /= G_MAXUINT16;
for (k = 0; k < G_N_ELEMENTS (dst_comp); k++)
dst_comp[k] *= dst_alpha;
/* calc final pixel as pix_d = pix_s*alpha_s + pix_d*alpha_d*(255-alpha_s)/255 */
for (k = 0; k < G_N_ELEMENTS (dst_comp); k++)
dst_comp[k] += src_comp[k];
/* calc the final destination alpha_d = alpha_s + alpha_d * (255-alpha_s)/255 */
dst_alpha += src_alpha;
dst_alpha = CLAMP (dst_alpha, 0, G_MAXUINT16);
/* now normalize the pix_d by the final alpha to make it associative */
for (k = 0; k < G_N_ELEMENTS (dst_comp); k++) {
if (dst_alpha > 0)
dst_comp[k] /= dst_alpha;
dst_comp[k] = CLAMP (dst_comp[k], 0, G_MAXUINT16);
}
dst_val = (dst_comp[0] << 48) | (dst_comp[1] << 32) | (dst_comp[2] << 16)
| dst_alpha;
dst[i] = dst_val;
}
}
}
static void
compositor_overlay_argb64_addition (guint8 * ORC_RESTRICT d1, int d1_stride,
const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
{
gint i, j, k;
const guint64 comp_mask_0 = 0xffff000000000000;
const guint64 comp_mask_1 = 0x0000ffff00000000;
const guint64 comp_mask_2 = 0x00000000ffff0000;
const guint64 comp_mask_alpha = 0x000000000000ffff;
for (j = 0; j < m; j++) {
guint64 *dst;
guint64 *src;
dst = (guint64 *) (d1 + (d1_stride * j));
src = (guint64 *) (s1 + (s1_stride * j));
for (i = 0; i < n; i++) {
guint64 src_val = src[i];
guint64 dst_val = dst[i];
guint64 src_comp[3];
guint64 dst_comp[3];
guint64 src_alpha;
guint64 src_alpha_inv;
guint64 alpha_factor;
guint64 dst_alpha;
src_comp[0] = (src_val & comp_mask_0) >> 48;
src_comp[1] = (src_val & comp_mask_1) >> 32;
src_comp[2] = (src_val & comp_mask_2) >> 16;
dst_comp[0] = (dst_val & comp_mask_0) >> 48;
dst_comp[1] = (dst_val & comp_mask_1) >> 32;
dst_comp[2] = (dst_val & comp_mask_2) >> 16;
/* calc source alpha as alpha_s = alpha_s * alpha / 255 */
src_alpha = src_val & comp_mask_alpha;
src_alpha *= p1;
src_alpha /= G_MAXUINT16;
src_alpha = CLAMP (src_alpha, 0, G_MAXUINT16);
src_alpha_inv = G_MAXUINT16 - src_alpha;
for (k = 0; k < G_N_ELEMENTS (src_comp); k++)
src_comp[k] *= src_alpha;
/* calc destination alpha as alpha_factor = (255-alpha_s) * alpha_factor / factor */
alpha_factor = dst_val & comp_mask_alpha;
alpha_factor *= src_alpha_inv;
alpha_factor /= G_MAXUINT16;
for (k = 0; k < G_N_ELEMENTS (dst_comp); k++)
dst_comp[k] *= alpha_factor;
/* calc final pixel as pix_d = pix_s*alpha_s + pix_d*alpha_factor*(255-alpha_s)/255 */
for (k = 0; k < G_N_ELEMENTS (dst_comp); k++)
dst_comp[k] += src_comp[k];
/* calc the alpha factor alpha_factor = alpha_s + alpha_factor * (255-alpha_s)/255 */
alpha_factor += src_alpha;
alpha_factor = CLAMP (alpha_factor, 0, G_MAXUINT16);
/* now normalize the pix_d by the final alpha to make it associative */
for (k = 0; k < G_N_ELEMENTS (dst_comp); k++) {
if (alpha_factor > 0)
dst_comp[k] /= alpha_factor;
dst_comp[k] = CLAMP (dst_comp[k], 0, G_MAXUINT16);
}
/* calc the final global alpha_d = alpha_d + (alpha_s * (alpha / 255)) */
dst_alpha = dst_val & comp_mask_alpha;
dst_alpha += src_alpha;
dst_alpha = CLAMP (dst_alpha, 0, G_MAXUINT16);
dst_val = (dst_comp[0] << 48) | (dst_comp[1] << 32) | (dst_comp[2] << 16)
| dst_alpha;
dst[i] = dst_val;
}
}
}
#else /* if G_BYTE_ORDER == G_LITTLE_ENDIAN */
static void
compositor_blend_bgra64 (guint8 * ORC_RESTRICT d1, int d1_stride,
const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
{
gint i, j, k;
const guint64 comp_mask_0 = 0x000000000000ffff;
const guint64 comp_mask_1 = 0x00000000ffff0000;
const guint64 comp_mask_2 = 0x0000ffff00000000;
const guint64 comp_mask_alpha = 0xffff000000000000;
for (j = 0; j < m; j++) {
guint64 *dst;
guint64 *src;
dst = (guint64 *) (d1 + (d1_stride * j));
src = (guint64 *) (s1 + (s1_stride * j));
for (i = 0; i < n; i++) {
guint64 src_val = src[i];
guint64 dst_val = dst[i];
guint64 src_comp[3];
guint64 dst_comp[3];
guint64 src_alpha;
guint64 src_alpha_inv;
src_comp[0] = src_val & comp_mask_0;
src_comp[1] = (src_val & comp_mask_1) >> 16;
src_comp[2] = (src_val & comp_mask_2) >> 32;
dst_comp[0] = dst_val & comp_mask_0;
dst_comp[1] = (dst_val & comp_mask_1) >> 16;
dst_comp[2] = (dst_val & comp_mask_2) >> 32;
src_alpha = (src_val & comp_mask_alpha) >> 48;
src_alpha *= p1;
src_alpha /= G_MAXUINT16;
src_alpha = CLAMP (src_alpha, 0, G_MAXUINT16);
src_alpha_inv = G_MAXUINT16 - src_alpha;
for (k = 0; k < G_N_ELEMENTS (src_comp); k++) {
src_comp[k] *= src_alpha;
dst_comp[k] *= src_alpha_inv;
dst_comp[k] += src_comp[k];
dst_comp[k] /= G_MAXUINT16;
dst_comp[k] = CLAMP (dst_comp[k], 0, G_MAXUINT16);
}
dst_val = (dst_comp[0]) | (dst_comp[1] << 16) | (dst_comp[2] << 32)
| comp_mask_alpha;
dst[i] = dst_val;
}
}
}
static void
compositor_source_bgra64 (guint8 * ORC_RESTRICT d1, int d1_stride,
const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
{
gint i, j;
const guint64 comp_mask_non_alpha = 0x0000ffffffffffff;
const guint64 comp_mask_alpha = 0xffff000000000000;
for (j = 0; j < m; j++) {
guint64 *dst;
guint64 *src;
dst = (guint64 *) (d1 + (d1_stride * j));
src = (guint64 *) (s1 + (s1_stride * j));
for (i = 0; i < n; i++) {
guint64 src_val = src[i];
guint64 dst_val;
guint64 src_alpha;
src_alpha = (src_val & comp_mask_alpha) >> 48;
src_alpha *= p1;
src_alpha /= G_MAXUINT16;
src_alpha = CLAMP (src_alpha, 0, G_MAXUINT16);
src_alpha <<= 48;
dst_val = (src_val & comp_mask_non_alpha) | src_alpha;
dst[i] = dst_val;
}
}
}
static void
compositor_overlay_bgra64 (guint8 * ORC_RESTRICT d1, int d1_stride,
const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
{
gint i, j, k;
const guint64 comp_mask_0 = 0x000000000000ffff;
const guint64 comp_mask_1 = 0x00000000ffff0000;
const guint64 comp_mask_2 = 0x0000ffff00000000;
const guint64 comp_mask_alpha = 0xffff000000000000;
for (j = 0; j < m; j++) {
guint64 *dst;
guint64 *src;
dst = (guint64 *) (d1 + (d1_stride * j));
src = (guint64 *) (s1 + (s1_stride * j));
for (i = 0; i < n; i++) {
guint64 src_val = src[i];
guint64 dst_val = dst[i];
guint64 src_comp[3];
guint64 dst_comp[3];
guint64 src_alpha;
guint64 src_alpha_inv;
guint64 dst_alpha;
src_comp[0] = src_val & comp_mask_0;
src_comp[1] = (src_val & comp_mask_1) >> 16;
src_comp[2] = (src_val & comp_mask_2) >> 32;
dst_comp[0] = dst_val & comp_mask_0;
dst_comp[1] = (dst_val & comp_mask_1) >> 16;
dst_comp[2] = (dst_val & comp_mask_2) >> 32;
/* calc source alpha as alpha_s = alpha_s * alpha / 255 */
src_alpha = (src_val & comp_mask_alpha) >> 48;
src_alpha *= p1;
src_alpha /= G_MAXUINT16;
src_alpha = CLAMP (src_alpha, 0, G_MAXUINT16);
src_alpha_inv = G_MAXUINT16 - src_alpha;
for (k = 0; k < G_N_ELEMENTS (src_comp); k++)
src_comp[k] *= src_alpha;
/* calc destination alpha as alpha_d = (1.0 - alpha_s) * alpha_d / 1.0 */
dst_alpha = (dst_val & comp_mask_alpha) >> 48;
dst_alpha *= src_alpha_inv;
dst_alpha /= G_MAXUINT16;
for (k = 0; k < G_N_ELEMENTS (dst_comp); k++)
dst_comp[k] *= dst_alpha;
/* calc final pixel as pix_d = pix_s*alpha_s + pix_d*alpha_d*(255-alpha_s)/255 */
for (k = 0; k < G_N_ELEMENTS (dst_comp); k++)
dst_comp[k] += src_comp[k];
/* calc the final destination alpha_d = alpha_s + alpha_d * (255-alpha_s)/255 */
dst_alpha += src_alpha;
dst_alpha = CLAMP (dst_alpha, 0, G_MAXUINT16);
/* now normalize the pix_d by the final alpha to make it associative */
for (k = 0; k < G_N_ELEMENTS (dst_comp); k++) {
if (dst_alpha > 0)
dst_comp[k] /= dst_alpha;
dst_comp[k] = CLAMP (dst_comp[k], 0, G_MAXUINT16);
}
dst_val = (dst_comp[0]) | (dst_comp[1] << 16) | (dst_comp[2] << 32)
| (dst_alpha << 48);
dst[i] = dst_val;
}
}
}
static void
compositor_overlay_bgra64_addition (guint8 * ORC_RESTRICT d1, int d1_stride,
const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
{
gint i, j, k;
const guint64 comp_mask_0 = 0x000000000000ffff;
const guint64 comp_mask_1 = 0x00000000ffff0000;
const guint64 comp_mask_2 = 0x0000ffff00000000;
const guint64 comp_mask_alpha = 0xffff000000000000;
for (j = 0; j < m; j++) {
guint64 *dst;
guint64 *src;
dst = (guint64 *) (d1 + (d1_stride * j));
src = (guint64 *) (s1 + (s1_stride * j));
for (i = 0; i < n; i++) {
guint64 src_val = src[i];
guint64 dst_val = dst[i];
guint64 src_comp[3];
guint64 dst_comp[3];
guint64 src_alpha;
guint64 src_alpha_inv;
guint64 alpha_factor;
guint64 dst_alpha;
src_comp[0] = src_val & comp_mask_0;
src_comp[1] = (src_val & comp_mask_1) >> 16;
src_comp[2] = (src_val & comp_mask_2) >> 32;
dst_comp[0] = dst_val & comp_mask_0;
dst_comp[1] = (dst_val & comp_mask_1) >> 16;
dst_comp[2] = (dst_val & comp_mask_2) >> 32;
/* calc source alpha as alpha_s = alpha_s * alpha / 255 */
src_alpha = (src_val & comp_mask_alpha) >> 48;
src_alpha *= p1;
src_alpha /= G_MAXUINT16;
src_alpha = CLAMP (src_alpha, 0, G_MAXUINT16);
src_alpha_inv = G_MAXUINT16 - src_alpha;
for (k = 0; k < G_N_ELEMENTS (src_comp); k++)
src_comp[k] *= src_alpha;
/* calc destination alpha as alpha_factor = (255-alpha_s) * alpha_factor / factor */
alpha_factor = (dst_val & comp_mask_alpha) >> 48;
alpha_factor *= src_alpha_inv;
alpha_factor /= G_MAXUINT16;
for (k = 0; k < G_N_ELEMENTS (dst_comp); k++)
dst_comp[k] *= alpha_factor;
/* calc final pixel as pix_d = pix_s*alpha_s + pix_d*alpha_factor*(255-alpha_s)/255 */
for (k = 0; k < G_N_ELEMENTS (dst_comp); k++)
dst_comp[k] += src_comp[k];
/* calc the alpha factor alpha_factor = alpha_s + alpha_factor * (255-alpha_s)/255 */
alpha_factor += src_alpha;
alpha_factor = CLAMP (alpha_factor, 0, G_MAXUINT16);
/* now normalize the pix_d by the final alpha to make it associative */
for (k = 0; k < G_N_ELEMENTS (dst_comp); k++) {
if (alpha_factor > 0)
dst_comp[k] /= alpha_factor;
dst_comp[k] = CLAMP (dst_comp[k], 0, G_MAXUINT16);
}
/* calc the final global alpha_d = alpha_d + (alpha_s * (alpha / 255)) */
dst_alpha = (dst_val & comp_mask_alpha) >> 48;
dst_alpha += src_alpha;
dst_alpha = CLAMP (dst_alpha, 0, G_MAXUINT16);
dst_val = (dst_comp[0]) | (dst_comp[1] << 16) | (dst_comp[2] << 32)
| (dst_alpha << 48);
dst[i] = dst_val;
}
}
}
#endif /* if G_BYTE_ORDER == G_LITTLE_ENDIAN */
/* for AYUV64, ARGB64 */
#define BLEND_A64(name, method, LOOP) \
static void \
method##_ ##name (GstVideoFrame * srcframe, gint xpos, gint ypos, \
gdouble src_alpha, GstVideoFrame * destframe, gint dst_y_start, \
gint dst_y_end, GstCompositorBlendMode mode) \
{ \
guint s_alpha; \
gint src_stride, dest_stride; \
gint dest_width, dest_height; \
guint8 *src, *dest; \
gint src_width, src_height; \
\
src_width = GST_VIDEO_FRAME_WIDTH (srcframe); \
src_height = GST_VIDEO_FRAME_HEIGHT (srcframe); \
src = GST_VIDEO_FRAME_PLANE_DATA (srcframe, 0); \
src_stride = GST_VIDEO_FRAME_COMP_STRIDE (srcframe, 0); \
dest = GST_VIDEO_FRAME_PLANE_DATA (destframe, 0); \
dest_stride = GST_VIDEO_FRAME_COMP_STRIDE (destframe, 0); \
dest_width = GST_VIDEO_FRAME_COMP_WIDTH (destframe, 0); \
dest_height = GST_VIDEO_FRAME_COMP_HEIGHT (destframe, 0); \
\
s_alpha = CLAMP ((gint) (src_alpha * G_MAXUINT16), 0, G_MAXUINT16); \
\
/* If it's completely transparent... we just return */ \
if (G_UNLIKELY (s_alpha == 0)) \
return; \
\
if (dst_y_end > dest_height) { \
dst_y_end = dest_height; \
} \
/* adjust src pointers for negative sizes */ \
if (xpos < 0) { \
src += -xpos * 8; \
src_width -= -xpos; \
xpos = 0; \
} \
if (ypos < dst_y_start) { \
src += (dst_y_start - ypos) * src_stride; \
src_height -= dst_y_start - ypos; \
ypos = dst_y_start; \
} \
/* adjust width/height if the src is bigger than dest */ \
if (xpos + src_width > dest_width) { \
src_width = dest_width - xpos; \
} \
if (ypos + src_height > dst_y_end) { \
src_height = dst_y_end - ypos; \
} \
\
if (src_height > 0 && src_width > 0) { \
dest = dest + 8 * xpos + (ypos * dest_stride); \
\
LOOP (dest, src, src_height, src_width, src_stride, dest_stride, s_alpha, \
mode); \
} \
}
#define OVERLAY_A64_LOOP(name) \
static inline void \
_overlay_loop_##name (guint8 * dest, const guint8 * src, gint src_height, \
gint src_width, gint src_stride, gint dest_stride, guint s_alpha, \
GstCompositorBlendMode mode) \
{ \
s_alpha = MIN (G_MAXUINT16, s_alpha); \
switch (mode) { \
case COMPOSITOR_BLEND_MODE_SOURCE:\
if (s_alpha == G_MAXUINT16) { \
guint y; \
for (y = 0; y < src_height; y++) { \
memcpy (dest, src, 8 * src_width); \
dest += dest_stride; \
src += src_stride; \
} \
} else { \
compositor_source_##name (dest, dest_stride, src, src_stride, \
s_alpha, src_width, src_height); \
} \
break;\
case COMPOSITOR_BLEND_MODE_OVER:\
compositor_overlay_##name (dest, dest_stride, src, src_stride, \
s_alpha, src_width, src_height); \
break;\
case COMPOSITOR_BLEND_MODE_ADD:\
compositor_overlay_##name##_addition (dest, dest_stride, src, src_stride, \
s_alpha, src_width, src_height); \
break;\
}\
}
#define BLEND_A64_LOOP(name) \
static inline void \
_blend_loop_##name (guint8 * dest, const guint8 * src, gint src_height, \
gint src_width, gint src_stride, gint dest_stride, guint s_alpha, \
GstCompositorBlendMode mode) \
{ \
s_alpha = MIN (G_MAXUINT16, s_alpha); \
switch (mode) { \
case COMPOSITOR_BLEND_MODE_SOURCE:\
if (s_alpha == G_MAXUINT16) { \
guint y; \
for (y = 0; y < src_height; y++) { \
memcpy (dest, src, 8 * src_width); \
dest += dest_stride; \
src += src_stride; \
} \
} else { \
compositor_source_##name (dest, dest_stride, src, src_stride, \
s_alpha, src_width, src_height); \
} \
break;\
case COMPOSITOR_BLEND_MODE_OVER:\
case COMPOSITOR_BLEND_MODE_ADD:\
/* both modes are the same for opaque background */ \
compositor_blend_##name (dest, dest_stride, src, src_stride, \
s_alpha, src_width, src_height); \
break;\
}\
}
#if G_BYTE_ORDER == G_LITTLE_ENDIAN
OVERLAY_A64_LOOP (argb64);
BLEND_A64_LOOP (argb64);
BLEND_A64 (argb64, blend, _blend_loop_argb64);
BLEND_A64 (argb64, overlay, _overlay_loop_argb64);
#else
OVERLAY_A64_LOOP (bgra64);
BLEND_A64_LOOP (bgra64);
BLEND_A64 (argb64, blend, _blend_loop_bgra64);
BLEND_A64 (argb64, overlay, _overlay_loop_bgra64);
#endif
#define A64_CHECKER_C(name, RGB, A, C1, C2, C3) \
static void \
fill_checker_##name##_c (GstVideoFrame * frame, guint y_start, guint y_end) \
{ \
gint i, j; \
gint val; \
static const gint tab[] = { 20480, 40960, 20480, 40960 }; \
static const gint uv = 1 << 15; \
gint width, stride; \
guint8 *dest; \
\
dest = GST_VIDEO_FRAME_PLANE_DATA (frame, 0); \
width = GST_VIDEO_FRAME_COMP_WIDTH (frame, 0); \
stride = GST_VIDEO_FRAME_COMP_STRIDE (frame, 0); \
\
if (!RGB) { \
for (i = y_start; i < y_end; i++) { \
guint16 *data = (guint16 *) (dest + i * stride); \
for (j = 0; j < width; j++) { \
data[A] = 0xffff; \
data[C1] = tab[((i & 0x8) >> 3) + ((j & 0x8) >> 3)]; \
data[C2] = uv; \
data[C3] = uv; \
data += 4; \
} \
} \
} else { \
for (i = y_start; i < y_end; i++) { \
guint16 *data = (guint16 *) (dest + i * stride); \
for (j = 0; j < width; j++) { \
val = tab[((i & 0x8) >> 3) + ((j & 0x8) >> 3)]; \
data[A] = 0xffff; \
data[C1] = val; \
data[C2] = val; \
data[C3] = val; \
data += 4; \
} \
} \
} \
}
A64_CHECKER_C (argb64, TRUE, 0, 1, 2, 3);
A64_CHECKER_C (ayuv64, FALSE, 0, 1, 2, 3);
#define A64_COLOR(name, A, C1, C2, C3) \
static void \
fill_color_##name (GstVideoFrame * frame, guint y_start, guint y_end, gint c1, gint c2, gint c3) \
{ \
gint i, j; \
gint stride; \
guint8 *dest; \
guint width; \
guint height; \
\
height = y_end - y_start; \
if (height <= 0) \
return; \
\
dest = GST_VIDEO_FRAME_PLANE_DATA (frame, 0); \
stride = GST_VIDEO_FRAME_COMP_STRIDE (frame, 0); \
width = GST_VIDEO_FRAME_WIDTH (frame); \
\
for (i = y_start; i < y_end; i++) { \
guint16 *data = (guint16 *) (dest + i * stride); \
for (j = 0; j < width; j++) { \
data[A] = 0xffff; \
data[C1] = c1; \
data[C2] = c2; \
data[C3] = c3; \
data += 4; \
} \
} \
}
A64_COLOR (argb64, 0, 1, 2, 3);
/* NV12, NV21 */
#define NV_YUV_BLEND(format_name,MEMCPY,BLENDLOOP) \
inline static void \
@ -1270,6 +1957,9 @@ BlendFunction gst_compositor_blend_y444_12le;
BlendFunction gst_compositor_blend_y444_12be;
BlendFunction gst_compositor_blend_y444_16le;
BlendFunction gst_compositor_blend_y444_16be;
BlendFunction gst_compositor_blend_argb64;
BlendFunction gst_compositor_overlay_argb64;
/* AYUV64 is equal to ARGB64 */
FillCheckerFunction gst_compositor_fill_checker_argb;
FillCheckerFunction gst_compositor_fill_checker_bgra;
@ -1297,6 +1987,8 @@ FillCheckerFunction gst_compositor_fill_checker_i420_12le;
FillCheckerFunction gst_compositor_fill_checker_i420_12be;
FillCheckerFunction gst_compositor_fill_checker_y444_16le;
FillCheckerFunction gst_compositor_fill_checker_y444_16be;
FillCheckerFunction gst_compositor_fill_checker_argb64;
FillCheckerFunction gst_compositor_fill_checker_ayuv64;
FillColorFunction gst_compositor_fill_color_argb;
FillColorFunction gst_compositor_fill_color_bgra;
@ -1326,6 +2018,7 @@ FillColorFunction gst_compositor_fill_color_i420_12le;
FillColorFunction gst_compositor_fill_color_i420_12be;
FillColorFunction gst_compositor_fill_color_y444_16le;
FillColorFunction gst_compositor_fill_color_y444_16be;
FillColorFunction gst_compositor_fill_color_argb64;
void
gst_compositor_init_blend (void)
@ -1360,6 +2053,8 @@ gst_compositor_init_blend (void)
gst_compositor_blend_y444_12be = GST_DEBUG_FUNCPTR (blend_y444_12be);
gst_compositor_blend_y444_16le = GST_DEBUG_FUNCPTR (blend_y444_16le);
gst_compositor_blend_y444_16be = GST_DEBUG_FUNCPTR (blend_y444_16be);
gst_compositor_blend_argb64 = GST_DEBUG_FUNCPTR (blend_argb64);
gst_compositor_overlay_argb64 = GST_DEBUG_FUNCPTR (overlay_argb64);
gst_compositor_fill_checker_argb = GST_DEBUG_FUNCPTR (fill_checker_argb_c);
gst_compositor_fill_checker_bgra = GST_DEBUG_FUNCPTR (fill_checker_bgra_c);
@ -1388,6 +2083,10 @@ gst_compositor_init_blend (void)
GST_DEBUG_FUNCPTR (fill_checker_y444_16le);
gst_compositor_fill_checker_y444_16be =
GST_DEBUG_FUNCPTR (fill_checker_y444_16be);
gst_compositor_fill_checker_argb64 =
GST_DEBUG_FUNCPTR (fill_checker_argb64_c);
gst_compositor_fill_checker_ayuv64 =
GST_DEBUG_FUNCPTR (fill_checker_ayuv64_c);
gst_compositor_fill_color_argb = GST_DEBUG_FUNCPTR (fill_color_argb);
gst_compositor_fill_color_bgra = GST_DEBUG_FUNCPTR (fill_color_bgra);
@ -1422,4 +2121,5 @@ gst_compositor_init_blend (void)
GST_DEBUG_FUNCPTR (fill_color_y444_16le);
gst_compositor_fill_color_y444_16be =
GST_DEBUG_FUNCPTR (fill_color_y444_16be);
gst_compositor_fill_color_argb64 = GST_DEBUG_FUNCPTR (fill_color_argb64);
}

View file

@ -65,6 +65,9 @@ extern BlendFunction gst_compositor_overlay_bgra;
#define gst_compositor_overlay_vuya gst_compositor_overlay_bgra
#define gst_compositor_overlay_abgr gst_compositor_overlay_argb
#define gst_compositor_overlay_rgba gst_compositor_overlay_bgra
extern BlendFunction gst_compositor_overlay_argb64;
#define gst_compositor_overlay_ayuv64 gst_compositor_overlay_argb64;
extern BlendFunction gst_compositor_blend_i420;
#define gst_compositor_blend_yv12 gst_compositor_blend_i420
extern BlendFunction gst_compositor_blend_nv12;
@ -95,6 +98,9 @@ extern BlendFunction gst_compositor_blend_y444_12le;
extern BlendFunction gst_compositor_blend_y444_12be;
extern BlendFunction gst_compositor_blend_y444_16le;
extern BlendFunction gst_compositor_blend_y444_16be;
extern BlendFunction gst_compositor_blend_argb64;
#define gst_compositor_blend_ayuv64 gst_compositor_blend_argb64;
extern FillCheckerFunction gst_compositor_fill_checker_argb;
#define gst_compositor_fill_checker_abgr gst_compositor_fill_checker_argb
@ -132,6 +138,8 @@ extern FillCheckerFunction gst_compositor_fill_checker_i420_12be;
#define gst_compositor_fill_checker_y444_12be gst_compositor_fill_checker_i420_12be
extern FillCheckerFunction gst_compositor_fill_checker_y444_16le;
extern FillCheckerFunction gst_compositor_fill_checker_y444_16be;
extern FillCheckerFunction gst_compositor_fill_checker_argb64;
extern FillCheckerFunction gst_compositor_fill_checker_ayuv64;
extern FillColorFunction gst_compositor_fill_color_argb;
extern FillColorFunction gst_compositor_fill_color_abgr;
@ -169,6 +177,8 @@ extern FillColorFunction gst_compositor_fill_color_i420_12be;
#define gst_compositor_fill_color_y444_12be gst_compositor_fill_color_i420_12be
extern FillColorFunction gst_compositor_fill_color_y444_16le;
extern FillColorFunction gst_compositor_fill_color_y444_16be;
extern FillColorFunction gst_compositor_fill_color_argb64;
#define gst_compositor_fill_color_ayuv64 gst_compositor_fill_color_argb64;
void gst_compositor_init_blend (void);

View file

@ -103,28 +103,10 @@
GST_DEBUG_CATEGORY_STATIC (gst_compositor_debug);
#define GST_CAT_DEFAULT gst_compositor_debug
#if G_BYTE_ORDER == G_LITTLE_ENDIAN
#define FORMATS " { AYUV, VUYA, BGRA, ARGB, RGBA, ABGR, " \
" Y444_16LE, Y444_16BE, Y444_12LE, Y444_12BE, Y444_10LE, Y444_10BE, " \
" Y444, Y42B, YUY2, UYVY, YVYU, "\
" I422_12LE, I422_12BE, I422_10LE, I422_10BE, "\
" I420_12LE, I420_12BE, I420_10LE, I420_10BE, " \
" I420, YV12, NV12, NV21, Y41B, RGB, BGR, xRGB, xBGR, "\
" RGBx, BGRx } "
#else
#define FORMATS " { AYUV, VUYA, BGRA, ARGB, RGBA, ABGR, "\
" Y444_16BE, Y444_16LE, Y444_12BE, Y444_12LE, Y444_10BE, Y444_10LE, " \
" Y444, Y42B, YUY2, UYVY, YVYU, "\
" I422_12BE, I422_12LE, I422_10BE, I422_10LE, "\
" I420_12BE, I420_12LE, I420_10BE, I420_10LE, "\
" I420, YV12, NV12, NV21, Y41B, RGB, BGR, xRGB, xBGR, "\
" RGBx, BGRx } "
#endif
static GstStaticPadTemplate src_factory = GST_STATIC_PAD_TEMPLATE ("src",
GST_PAD_SRC,
GST_PAD_ALWAYS,
GST_STATIC_CAPS (GST_VIDEO_CAPS_MAKE (FORMATS))
GST_STATIC_CAPS (GST_VIDEO_CAPS_MAKE (GST_VIDEO_FORMATS_ALL))
);
static GstStaticPadTemplate sink_factory = GST_STATIC_PAD_TEMPLATE ("sink_%u",
@ -622,6 +604,7 @@ static void
gst_compositor_pad_create_conversion_info (GstVideoAggregatorConvertPad * pad,
GstVideoAggregator * vagg, GstVideoInfo * conversion_info)
{
GstCompositor *self = GST_COMPOSITOR (vagg);
GstCompositorPad *cpad = GST_COMPOSITOR_PAD (pad);
gint width, height;
gint x_offset, y_offset;
@ -632,7 +615,24 @@ gst_compositor_pad_create_conversion_info (GstVideoAggregatorConvertPad * pad,
if (!conversion_info->finfo)
return;
_mixer_pad_get_output_size (GST_COMPOSITOR (vagg), cpad,
/* Need intermediate conversion? */
if (self->intermediate_frame) {
GstVideoInfo intermediate_info;
gst_video_info_set_interlaced_format (&intermediate_info,
GST_VIDEO_INFO_FORMAT (&self->intermediate_info),
conversion_info->interlace_mode,
GST_VIDEO_INFO_WIDTH (conversion_info),
GST_VIDEO_INFO_HEIGHT (conversion_info));
intermediate_info.colorimetry = conversion_info->colorimetry;
intermediate_info.par_n = conversion_info->par_n;
intermediate_info.par_d = conversion_info->par_d;
intermediate_info.fps_n = conversion_info->fps_n;
intermediate_info.fps_d = conversion_info->fps_d;
intermediate_info.flags = conversion_info->flags;
*conversion_info = intermediate_info;
}
_mixer_pad_get_output_size (self, cpad,
GST_VIDEO_INFO_PAR_N (&vagg->info), GST_VIDEO_INFO_PAR_D (&vagg->info),
&width, &height, &x_offset, &y_offset);
@ -647,8 +647,9 @@ gst_compositor_pad_create_conversion_info (GstVideoAggregatorConvertPad * pad,
* colorimetry, and chroma-site and our current pixel-aspect-ratio
* and other relevant fields.
*/
gst_video_info_set_format (&tmp_info,
GST_VIDEO_INFO_FORMAT (conversion_info), width, height);
gst_video_info_set_interlaced_format (&tmp_info,
GST_VIDEO_INFO_FORMAT (conversion_info),
conversion_info->interlace_mode, width, height);
tmp_info.chroma_site = conversion_info->chroma_site;
tmp_info.colorimetry = conversion_info->colorimetry;
tmp_info.par_n = conversion_info->par_n;
@ -656,7 +657,6 @@ gst_compositor_pad_create_conversion_info (GstVideoAggregatorConvertPad * pad,
tmp_info.fps_n = conversion_info->fps_n;
tmp_info.fps_d = conversion_info->fps_d;
tmp_info.flags = conversion_info->flags;
tmp_info.interlace_mode = conversion_info->interlace_mode;
*conversion_info = tmp_info;
}
@ -818,11 +818,16 @@ set_functions (GstCompositor * self, const GstVideoInfo * info)
gint scale[GST_VIDEO_MAX_COMPONENTS] = { 0, };
gint i;
gst_clear_buffer (&self->intermediate_frame);
g_clear_pointer (&self->intermediate_convert, gst_video_converter_free);
self->blend = NULL;
self->overlay = NULL;
self->fill_checker = NULL;
self->fill_color = NULL;
self->intermediate_info = *info;
switch (GST_VIDEO_INFO_FORMAT (info)) {
case GST_VIDEO_FORMAT_AYUV:
self->blend = gst_compositor_blend_ayuv;
@ -1040,15 +1045,93 @@ set_functions (GstCompositor * self, const GstVideoInfo * info)
self->fill_checker = gst_compositor_fill_checker_bgrx;
self->fill_color = gst_compositor_fill_color_bgrx;
break;
case GST_VIDEO_FORMAT_ARGB64:
self->blend = gst_compositor_blend_argb64;
self->overlay = gst_compositor_overlay_argb64;
self->fill_checker = gst_compositor_fill_checker_argb64;
self->fill_color = gst_compositor_fill_color_argb64;
break;
case GST_VIDEO_FORMAT_AYUV64:
self->blend = gst_compositor_blend_ayuv64;
self->overlay = gst_compositor_overlay_ayuv64;
self->fill_checker = gst_compositor_fill_checker_ayuv64;
self->fill_color = gst_compositor_fill_color_ayuv64;
break;
default:
GST_ERROR_OBJECT (self, "Unhandled format %s",
{
GstVideoFormat format = GST_VIDEO_FORMAT_UNKNOWN;
GstVideoInfo *intermediate_info = &self->intermediate_info;
if (GST_VIDEO_INFO_IS_YUV (info)) {
if (GST_VIDEO_INFO_COMP_DEPTH (info, 0) == 8)
format = GST_VIDEO_FORMAT_AYUV;
else
format = GST_VIDEO_FORMAT_AYUV64;
} else {
if (GST_VIDEO_INFO_COMP_DEPTH (info, 0) == 8)
format = GST_VIDEO_FORMAT_ARGB;
else
format = GST_VIDEO_FORMAT_ARGB64;
}
switch (format) {
case GST_VIDEO_FORMAT_AYUV:
self->blend = gst_compositor_blend_ayuv;
self->overlay = gst_compositor_overlay_ayuv;
self->fill_checker = gst_compositor_fill_checker_ayuv;
self->fill_color = gst_compositor_fill_color_ayuv;
break;
case GST_VIDEO_FORMAT_AYUV64:
self->blend = gst_compositor_blend_ayuv64;
self->overlay = gst_compositor_overlay_ayuv64;
self->fill_checker = gst_compositor_fill_checker_ayuv64;
self->fill_color = gst_compositor_fill_color_ayuv64;
break;
case GST_VIDEO_FORMAT_ARGB:
self->blend = gst_compositor_blend_argb;
self->overlay = gst_compositor_overlay_argb;
self->fill_checker = gst_compositor_fill_checker_argb;
self->fill_color = gst_compositor_fill_color_argb;
break;
case GST_VIDEO_FORMAT_ARGB64:
self->blend = gst_compositor_blend_argb64;
self->overlay = gst_compositor_overlay_argb64;
self->fill_checker = gst_compositor_fill_checker_argb64;
self->fill_color = gst_compositor_fill_color_argb64;
break;
default:
GST_ERROR_OBJECT (self, "Unhandled format %s -> %s",
gst_video_format_to_string (GST_VIDEO_INFO_FORMAT (info)),
gst_video_format_to_string (format));
return FALSE;
}
GST_DEBUG_OBJECT (self,
"Configured intermediate format %s for output format %s",
gst_video_format_to_string (format),
gst_video_format_to_string (GST_VIDEO_INFO_FORMAT (info)));
return FALSE;
/* needs intermediate conversion */
gst_video_info_set_interlaced_format (intermediate_info,
format, info->interlace_mode, info->width, info->height);
intermediate_info->par_n = info->par_n;
intermediate_info->par_d = info->par_d;
intermediate_info->fps_n = info->fps_n;
intermediate_info->fps_d = info->fps_d;
intermediate_info->flags = info->flags;
/* preserve colorimetry if required */
if (!GST_VIDEO_INFO_IS_GRAY (info))
intermediate_info->colorimetry = info->colorimetry;
self->intermediate_frame =
gst_buffer_new_and_alloc (self->intermediate_info.size);
break;
}
}
/* calculate black and white colors */
gst_video_color_range_offsets (info->colorimetry.range, info->finfo,
offset, scale);
gst_video_color_range_offsets (self->intermediate_info.colorimetry.range,
self->intermediate_info.finfo, offset, scale);
if (GST_VIDEO_INFO_IS_YUV (info)) {
/* black color [0.0, 0.0, 0.0] */
self->black_color[0] = offset[0];
@ -1341,9 +1424,36 @@ _negotiated_caps (GstAggregator * agg, GstCaps * caps)
gst_clear_object (&pool);
}
if (compositor->intermediate_frame) {
GstStructure *config = NULL;
GstTaskPool *pool = gst_video_aggregator_get_execution_task_pool (vagg);
if (pool && n_threads > 1) {
config = gst_structure_new_empty ("GstVideoConverterConfig");
gst_structure_set (config, GST_VIDEO_CONVERTER_OPT_THREADS,
G_TYPE_UINT, n_threads, NULL);
}
compositor->intermediate_convert =
gst_video_converter_new_with_pool (&compositor->intermediate_info,
&v_info, config, pool);
gst_clear_object (&pool);
}
return GST_AGGREGATOR_CLASS (parent_class)->negotiated_src_caps (agg, caps);
}
static gboolean
gst_composior_stop (GstAggregator * agg)
{
GstCompositor *self = GST_COMPOSITOR (agg);
gst_clear_buffer (&self->intermediate_frame);
g_clear_pointer (&self->intermediate_convert, gst_video_converter_free);
return GST_AGGREGATOR_CLASS (parent_class)->stop (agg);
}
static gboolean
_should_draw_background (GstVideoAggregator * vagg)
{
@ -1490,7 +1600,7 @@ gst_compositor_aggregate_frames (GstVideoAggregator * vagg, GstBuffer * outbuf)
{
GstCompositor *compositor = GST_COMPOSITOR (vagg);
GList *l;
GstVideoFrame out_frame, *outframe;
GstVideoFrame out_frame, intermediate_frame, *outframe;
gboolean draw_background;
guint drawn_a_pad = FALSE;
struct CompositePadInfo *pads_info;
@ -1503,6 +1613,18 @@ gst_compositor_aggregate_frames (GstVideoAggregator * vagg, GstBuffer * outbuf)
outframe = &out_frame;
if (compositor->intermediate_frame) {
if (!gst_video_frame_map (&intermediate_frame,
&compositor->intermediate_info, compositor->intermediate_frame,
GST_MAP_READWRITE)) {
GST_WARNING_OBJECT (vagg, "Could not map intermediate buffer");
gst_video_frame_unmap (&out_frame);
return GST_FLOW_ERROR;
}
outframe = &intermediate_frame;
}
/* If one of the frames to be composited completely obscures the background,
* don't bother drawing the background at all. We can also always use the
* 'blend' BlendFunction in that case because it only changes if we have to
@ -1603,7 +1725,14 @@ gst_compositor_aggregate_frames (GstVideoAggregator * vagg, GstBuffer * outbuf)
GST_OBJECT_UNLOCK (vagg);
gst_video_frame_unmap (outframe);
if (compositor->intermediate_frame) {
gst_video_converter_frame (compositor->intermediate_convert,
&intermediate_frame, &out_frame);
gst_video_frame_unmap (&intermediate_frame);
}
gst_video_frame_unmap (&out_frame);
return GST_FLOW_OK;
}
@ -1797,6 +1926,7 @@ gst_compositor_class_init (GstCompositorClass * klass)
agg_class->src_event = _src_event;
agg_class->fixate_src_caps = _fixate_caps;
agg_class->negotiated_src_caps = _negotiated_caps;
agg_class->stop = GST_DEBUG_FUNCPTR (gst_composior_stop);
videoaggregator_class->aggregate_frames = gst_compositor_aggregate_frames;
g_object_class_install_property (gobject_class, PROP_BACKGROUND,

View file

@ -149,6 +149,10 @@ struct _GstCompositor
gint white_color[GST_VIDEO_MAX_COMPONENTS];
gint black_color[GST_VIDEO_MAX_COMPONENTS];
GstBuffer *intermediate_frame;
GstVideoInfo intermediate_info;
GstVideoConverter *intermediate_convert;
GstParallelizedTaskRunner *blend_runner;
};