gstreamer/gst/videomixer/blend.c
Sebastian Dröge 6da14d0c41 videomixer: Optimize ARGB blending and implement BGRA blending with orc
This now means, that we have absolutely no handwritten assembly anymore
in videomixer and it's also faster now when using SSE.
2010-09-04 14:52:09 +02:00

771 lines
27 KiB
C

/*
* Copyright (C) 2004 Wim Taymans <wim@fluendo.com>
* Copyright (C) 2006 Mindfruit Bv.
* Author: Sjoerd Simons <sjoerd@luon.net>
* Author: Alex Ugarte <alexugarte@gmail.com>
* Copyright (C) 2009 Alex Ugarte <augarte@vicomtech.org>
* Copyright (C) 2009 Sebastian Dröge <sebastian.droege@collabora.co.uk>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 02111-1307, USA.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "blend.h"
#include "blendorc.h"
#include <string.h>
#include <gst/video/video.h>
#ifdef HAVE_ORC
#include <orc/orc.h>
#endif
#define BLEND(D,S,alpha) (((D) * (256 - (alpha)) + (S) * (alpha)) >> 8)
GST_DEBUG_CATEGORY_STATIC (gst_videomixer_blend_debug);
#define GST_CAT_DEFAULT gst_videomixer_blend_debug
/* Below are the implementations of everything */
/* A32 is for AYUV, ARGB and BGRA */
#define BLEND_A32(name, LOOP) \
static void \
blend_##name (const guint8 * src, gint xpos, gint ypos, \
gint src_width, gint src_height, gdouble src_alpha, \
guint8 * dest, gint dest_width, gint dest_height) \
{ \
guint s_alpha; \
gint src_stride, dest_stride; \
\
src_stride = src_width * 4; \
dest_stride = dest_width * 4; \
\
s_alpha = CLAMP ((gint) (src_alpha * 256), 0, 256); \
\
/* If it's completely transparent... we just return */ \
if (G_UNLIKELY (s_alpha == 0)) \
return; \
\
/* adjust src pointers for negative sizes */ \
if (xpos < 0) { \
src += -xpos * 4; \
src_width -= -xpos; \
xpos = 0; \
} \
if (ypos < 0) { \
src += -ypos * src_stride; \
src_height -= -ypos; \
ypos = 0; \
} \
/* adjust width/height if the src is bigger than dest */ \
if (xpos + src_width > dest_width) { \
src_width = dest_width - xpos; \
} \
if (ypos + src_height > dest_height) { \
src_height = dest_height - ypos; \
} \
\
dest = dest + 4 * xpos + (ypos * dest_stride); \
\
LOOP (dest, src, src_height, src_width, src_stride, dest_stride, s_alpha); \
}
#define BLEND_A32_LOOP(name) \
static inline void \
_blend_loop_##name (guint8 * dest, const guint8 * src, gint src_height, \
gint src_width, gint src_stride, gint dest_stride, guint s_alpha) \
{ \
s_alpha = MIN (255, s_alpha); \
orc_blend_##name (dest, dest_stride, src, src_stride, \
s_alpha, src_width, src_height); \
}
BLEND_A32_LOOP (argb);
BLEND_A32_LOOP (bgra);
#if G_BYTE_ORDER == LITTLE_ENDIAN
BLEND_A32 (argb, _blend_loop_argb);
BLEND_A32 (bgra, _blend_loop_bgra);
#else
BLEND_A32 (argb, _blend_loop_bgra);
BLEND_A32 (bgra, _blend_loop_argb);
#endif
#define A32_CHECKER_C(name, RGB, A, C1, C2, C3) \
static void \
fill_checker_##name##_c (guint8 * dest, gint width, gint height) \
{ \
gint i, j; \
gint val; \
static const gint tab[] = { 80, 160, 80, 160 }; \
\
if (!RGB) { \
for (i = 0; i < height; i++) { \
for (j = 0; j < width; j++) { \
dest[A] = 0xff; \
dest[C1] = tab[((i & 0x8) >> 3) + ((j & 0x8) >> 3)]; \
dest[C2] = 128; \
dest[C3] = 128; \
dest += 4; \
} \
} \
} else { \
for (i = 0; i < height; i++) { \
for (j = 0; j < width; j++) { \
val = tab[((i & 0x8) >> 3) + ((j & 0x8) >> 3)]; \
dest[A] = 0xFF; \
dest[C1] = val; \
dest[C2] = val; \
dest[C3] = val; \
dest += 4; \
} \
} \
} \
}
A32_CHECKER_C (argb, TRUE, 0, 1, 2, 3);
A32_CHECKER_C (bgra, TRUE, 3, 2, 1, 0);
A32_CHECKER_C (ayuv, FALSE, 0, 1, 2, 3);
#define YUV_TO_R(Y,U,V) (CLAMP (1.164 * (Y - 16) + 1.596 * (V - 128), 0, 255))
#define YUV_TO_G(Y,U,V) (CLAMP (1.164 * (Y - 16) - 0.813 * (V - 128) - 0.391 * (U - 128), 0, 255))
#define YUV_TO_B(Y,U,V) (CLAMP (1.164 * (Y - 16) + 2.018 * (U - 128), 0, 255))
#define A32_COLOR(name, RGB, A, C1, C2, C3) \
static void \
fill_color_##name (guint8 * dest, gint width, gint height, gint Y, gint U, gint V) \
{ \
gint c1, c2, c3; \
guint32 val; \
\
if (RGB) { \
c1 = YUV_TO_R (Y, U, V); \
c2 = YUV_TO_G (Y, U, V); \
c3 = YUV_TO_B (Y, U, V); \
} else { \
c1 = Y; \
c2 = U; \
c3 = V; \
} \
val = GUINT32_FROM_BE ((0xff << A) | (c1 << C1) | (c2 << C2) | (c3 << C3)); \
\
orc_splat_u32 ((guint32 *) dest, val, height * width); \
}
A32_COLOR (argb, TRUE, 24, 16, 8, 0);
A32_COLOR (bgra, TRUE, 0, 8, 16, 24);
A32_COLOR (abgr, TRUE, 24, 0, 8, 16);
A32_COLOR (rgba, TRUE, 0, 24, 16, 8);
A32_COLOR (ayuv, FALSE, 24, 16, 8, 0);
/* Y444, Y42B, I420, YV12, Y41B */
#define PLANAR_YUV_BLEND(format_name,format_enum,x_round,y_round,MEMCPY,BLENDLOOP) \
inline static void \
_blend_##format_name (const guint8 * src, guint8 * dest, \
gint src_stride, gint dest_stride, gint src_width, gint src_height, \
gdouble src_alpha) \
{ \
gint i; \
gint b_alpha; \
\
/* If it's completely transparent... we just return */ \
if (G_UNLIKELY (src_alpha == 0.0)) { \
GST_INFO ("Fast copy (alpha == 0.0)"); \
return; \
} \
\
/* If it's completely opaque, we do a fast copy */ \
if (G_UNLIKELY (src_alpha == 1.0)) { \
GST_INFO ("Fast copy (alpha == 1.0)"); \
for (i = 0; i < src_height; i++) { \
MEMCPY (dest, src, src_width); \
src += src_stride; \
dest += dest_stride; \
} \
return; \
} \
\
b_alpha = CLAMP ((gint) (src_alpha * 256), 0, 256); \
\
BLENDLOOP(dest, dest_stride, src, src_stride, b_alpha, src_width, src_height); \
} \
\
static void \
blend_##format_name (const guint8 * src, gint xpos, gint ypos, \
gint src_width, gint src_height, gdouble src_alpha, \
guint8 * dest, gint dest_width, gint dest_height) \
{ \
const guint8 *b_src; \
guint8 *b_dest; \
gint b_src_width = src_width; \
gint b_src_height = src_height; \
gint xoffset = 0; \
gint yoffset = 0; \
gint src_comp_rowstride, dest_comp_rowstride; \
gint src_comp_height, dest_comp_height; \
gint src_comp_width, dest_comp_width; \
gint comp_ypos, comp_xpos; \
gint comp_yoffset, comp_xoffset; \
\
xpos = x_round (xpos); \
ypos = y_round (ypos); \
\
/* adjust src pointers for negative sizes */ \
if (xpos < 0) { \
xoffset = -xpos; \
b_src_width -= -xpos; \
xpos = 0; \
} \
if (ypos < 0) { \
yoffset += -ypos; \
b_src_height -= -ypos; \
ypos = 0; \
} \
/* If x or y offset are larger then the source it's outside of the picture */ \
if (xoffset > src_width || yoffset > src_width) { \
return; \
} \
\
/* adjust width/height if the src is bigger than dest */ \
if (xpos + src_width > dest_width) { \
b_src_width = dest_width - xpos; \
} \
if (ypos + src_height > dest_height) { \
b_src_height = dest_height - ypos; \
} \
if (b_src_width < 0 || b_src_height < 0) { \
return; \
} \
\
/* First mix Y, then U, then V */ \
b_src = src + gst_video_format_get_component_offset (format_enum, 0, src_width, src_height); \
b_dest = dest + gst_video_format_get_component_offset (format_enum, 0, dest_width, dest_height); \
src_comp_rowstride = gst_video_format_get_row_stride (format_enum, 0, src_width); \
dest_comp_rowstride = gst_video_format_get_row_stride (format_enum, 0, dest_width); \
src_comp_height = gst_video_format_get_component_height (format_enum, 0, b_src_height); \
dest_comp_height = gst_video_format_get_component_height (format_enum, 0, dest_height); \
src_comp_width = gst_video_format_get_component_width (format_enum, 0, b_src_width); \
dest_comp_width = gst_video_format_get_component_width (format_enum, 0, dest_width); \
comp_xpos = (xpos == 0) ? 0 : gst_video_format_get_component_width (format_enum, 0, xpos); \
comp_ypos = (ypos == 0) ? 0 : gst_video_format_get_component_height (format_enum, 0, ypos); \
comp_xoffset = (xoffset == 0) ? 0 : gst_video_format_get_component_width (format_enum, 0, xoffset); \
comp_yoffset = (yoffset == 0) ? 0 : gst_video_format_get_component_height (format_enum, 0, yoffset); \
_blend_##format_name (b_src + comp_xoffset + comp_yoffset * src_comp_rowstride, \
b_dest + comp_xpos + comp_ypos * dest_comp_rowstride, \
src_comp_rowstride, \
dest_comp_rowstride, src_comp_width, src_comp_height, \
src_alpha); \
\
b_src = src + gst_video_format_get_component_offset (format_enum, 1, src_width, src_height); \
b_dest = dest + gst_video_format_get_component_offset (format_enum, 1, dest_width, dest_height); \
src_comp_rowstride = gst_video_format_get_row_stride (format_enum, 1, src_width); \
dest_comp_rowstride = gst_video_format_get_row_stride (format_enum, 1, dest_width); \
src_comp_height = gst_video_format_get_component_height (format_enum, 1, b_src_height); \
dest_comp_height = gst_video_format_get_component_height (format_enum, 1, dest_height); \
src_comp_width = gst_video_format_get_component_width (format_enum, 1, b_src_width); \
dest_comp_width = gst_video_format_get_component_width (format_enum, 1, dest_width); \
comp_xpos = (xpos == 0) ? 0 : gst_video_format_get_component_width (format_enum, 1, xpos); \
comp_ypos = (ypos == 0) ? 0 : gst_video_format_get_component_height (format_enum, 1, ypos); \
comp_xoffset = (xoffset == 0) ? 0 : gst_video_format_get_component_width (format_enum, 1, xoffset); \
comp_yoffset = (yoffset == 0) ? 0 : gst_video_format_get_component_height (format_enum, 1, yoffset); \
_blend_##format_name (b_src + comp_xoffset + comp_yoffset * src_comp_rowstride, \
b_dest + comp_xpos + comp_ypos * dest_comp_rowstride, \
src_comp_rowstride, \
dest_comp_rowstride, src_comp_width, src_comp_height, \
src_alpha); \
\
b_src = src + gst_video_format_get_component_offset (format_enum, 2, src_width, src_height); \
b_dest = dest + gst_video_format_get_component_offset (format_enum, 2, dest_width, dest_height); \
src_comp_rowstride = gst_video_format_get_row_stride (format_enum, 2, src_width); \
dest_comp_rowstride = gst_video_format_get_row_stride (format_enum, 2, dest_width); \
src_comp_height = gst_video_format_get_component_height (format_enum, 2, b_src_height); \
dest_comp_height = gst_video_format_get_component_height (format_enum, 2, dest_height); \
src_comp_width = gst_video_format_get_component_width (format_enum, 2, b_src_width); \
dest_comp_width = gst_video_format_get_component_width (format_enum, 2, dest_width); \
comp_xpos = (xpos == 0) ? 0 : gst_video_format_get_component_width (format_enum, 2, xpos); \
comp_ypos = (ypos == 0) ? 0 : gst_video_format_get_component_height (format_enum, 2, ypos); \
comp_xoffset = (xoffset == 0) ? 0 : gst_video_format_get_component_width (format_enum, 2, xoffset); \
comp_yoffset = (yoffset == 0) ? 0 : gst_video_format_get_component_height (format_enum, 2, yoffset); \
_blend_##format_name (b_src + comp_xoffset + comp_yoffset * src_comp_rowstride, \
b_dest + comp_xpos + comp_ypos * dest_comp_rowstride, \
src_comp_rowstride, \
dest_comp_rowstride, src_comp_width, src_comp_height, \
src_alpha); \
}
#define PLANAR_YUV_FILL_CHECKER(format_name, format_enum, MEMSET) \
static void \
fill_checker_##format_name (guint8 * dest, gint width, gint height) \
{ \
gint i, j; \
static const int tab[] = { 80, 160, 80, 160 }; \
guint8 *p; \
gint comp_width, comp_height; \
gint rowstride; \
\
p = dest + gst_video_format_get_component_offset (format_enum, 0, width, height); \
comp_width = gst_video_format_get_component_width (format_enum, 0, width); \
comp_height = gst_video_format_get_component_height (format_enum, 0, height); \
rowstride = gst_video_format_get_row_stride (format_enum, 0, width); \
\
for (i = 0; i < comp_height; i++) { \
for (j = 0; j < comp_width; j++) { \
*p++ = tab[((i & 0x8) >> 3) + ((j & 0x8) >> 3)]; \
} \
p += rowstride - comp_width; \
} \
\
p = dest + gst_video_format_get_component_offset (format_enum, 1, width, height); \
comp_width = gst_video_format_get_component_width (format_enum, 1, width); \
comp_height = gst_video_format_get_component_height (format_enum, 1, height); \
rowstride = gst_video_format_get_row_stride (format_enum, 1, width); \
\
for (i = 0; i < comp_height; i++) { \
MEMSET (p, 0x80, comp_width); \
p += rowstride; \
} \
\
p = dest + gst_video_format_get_component_offset (format_enum, 2, width, height); \
comp_width = gst_video_format_get_component_width (format_enum, 2, width); \
comp_height = gst_video_format_get_component_height (format_enum, 2, height); \
rowstride = gst_video_format_get_row_stride (format_enum, 2, width); \
\
for (i = 0; i < comp_height; i++) { \
MEMSET (p, 0x80, comp_width); \
p += rowstride; \
} \
}
#define PLANAR_YUV_FILL_COLOR(format_name,format_enum,MEMSET) \
static void \
fill_color_##format_name (guint8 * dest, gint width, gint height, \
gint colY, gint colU, gint colV) \
{ \
guint8 *p; \
gint comp_width, comp_height; \
gint rowstride; \
gint i; \
\
p = dest + gst_video_format_get_component_offset (format_enum, 0, width, height); \
comp_width = gst_video_format_get_component_width (format_enum, 0, width); \
comp_height = gst_video_format_get_component_height (format_enum, 0, height); \
rowstride = gst_video_format_get_row_stride (format_enum, 0, width); \
\
for (i = 0; i < comp_height; i++) { \
MEMSET (p, colY, comp_width); \
p += rowstride; \
} \
\
p = dest + gst_video_format_get_component_offset (format_enum, 1, width, height); \
comp_width = gst_video_format_get_component_width (format_enum, 1, width); \
comp_height = gst_video_format_get_component_height (format_enum, 1, height); \
rowstride = gst_video_format_get_row_stride (format_enum, 1, width); \
\
for (i = 0; i < comp_height; i++) { \
MEMSET (p, colU, comp_width); \
p += rowstride; \
} \
\
p = dest + gst_video_format_get_component_offset (format_enum, 2, width, height); \
comp_width = gst_video_format_get_component_width (format_enum, 2, width); \
comp_height = gst_video_format_get_component_height (format_enum, 2, height); \
rowstride = gst_video_format_get_row_stride (format_enum, 2, width); \
\
for (i = 0; i < comp_height; i++) { \
MEMSET (p, colV, comp_width); \
p += rowstride; \
} \
}
#define GST_ROUND_UP_1(x) (x)
PLANAR_YUV_BLEND (i420, GST_VIDEO_FORMAT_I420, GST_ROUND_UP_2,
GST_ROUND_UP_2, memcpy, orc_blend_u8);
PLANAR_YUV_FILL_CHECKER (i420, GST_VIDEO_FORMAT_I420, memset);
PLANAR_YUV_FILL_COLOR (i420, GST_VIDEO_FORMAT_I420, memset);
PLANAR_YUV_FILL_COLOR (yv12, GST_VIDEO_FORMAT_YV12, memset);
PLANAR_YUV_BLEND (y444, GST_VIDEO_FORMAT_Y444, GST_ROUND_UP_1,
GST_ROUND_UP_1, memcpy, orc_blend_u8);
PLANAR_YUV_FILL_CHECKER (y444, GST_VIDEO_FORMAT_Y444, memset);
PLANAR_YUV_FILL_COLOR (y444, GST_VIDEO_FORMAT_Y444, memset);
PLANAR_YUV_BLEND (y42b, GST_VIDEO_FORMAT_Y42B, GST_ROUND_UP_2,
GST_ROUND_UP_1, memcpy, orc_blend_u8);
PLANAR_YUV_FILL_CHECKER (y42b, GST_VIDEO_FORMAT_Y42B, memset);
PLANAR_YUV_FILL_COLOR (y42b, GST_VIDEO_FORMAT_Y42B, memset);
PLANAR_YUV_BLEND (y41b, GST_VIDEO_FORMAT_Y41B, GST_ROUND_UP_4,
GST_ROUND_UP_1, memcpy, orc_blend_u8);
PLANAR_YUV_FILL_CHECKER (y41b, GST_VIDEO_FORMAT_Y41B, memset);
PLANAR_YUV_FILL_COLOR (y41b, GST_VIDEO_FORMAT_Y41B, memset);
/* RGB, BGR, xRGB, xBGR, RGBx, BGRx */
#define RGB_BLEND(name, bpp, MEMCPY, BLENDLOOP) \
static void \
blend_##name (const guint8 * src, gint xpos, gint ypos, \
gint src_width, gint src_height, gdouble src_alpha, \
guint8 * dest, gint dest_width, gint dest_height) \
{ \
gint b_alpha; \
gint i; \
gint src_stride, dest_stride; \
\
src_stride = GST_ROUND_UP_4 (src_width * bpp); \
dest_stride = GST_ROUND_UP_4 (dest_width * bpp); \
\
b_alpha = CLAMP ((gint) (src_alpha * 256), 0, 256); \
\
/* adjust src pointers for negative sizes */ \
if (xpos < 0) { \
src += -xpos * bpp; \
src_width -= -xpos; \
xpos = 0; \
} \
if (ypos < 0) { \
src += -ypos * src_stride; \
src_height -= -ypos; \
ypos = 0; \
} \
/* adjust width/height if the src is bigger than dest */ \
if (xpos + src_width > dest_width) { \
src_width = dest_width - xpos; \
} \
if (ypos + src_height > dest_height) { \
src_height = dest_height - ypos; \
} \
\
dest = dest + bpp * xpos + (ypos * dest_stride); \
/* If it's completely transparent... we just return */ \
if (G_UNLIKELY (src_alpha == 0.0)) { \
GST_INFO ("Fast copy (alpha == 0.0)"); \
return; \
} \
\
/* If it's completely opaque, we do a fast copy */ \
if (G_UNLIKELY (src_alpha == 1.0)) { \
GST_INFO ("Fast copy (alpha == 1.0)"); \
for (i = 0; i < src_height; i++) { \
MEMCPY (dest, src, bpp * src_width); \
src += src_stride; \
dest += dest_stride; \
} \
return; \
} \
\
BLENDLOOP(dest, dest_stride, src, src_stride, b_alpha, src_width * bpp, src_height); \
}
#define RGB_FILL_CHECKER_C(name, bpp, r, g, b) \
static void \
fill_checker_##name##_c (guint8 * dest, gint width, gint height) \
{ \
gint i, j; \
static const int tab[] = { 80, 160, 80, 160 }; \
gint dest_add = GST_ROUND_UP_4 (width * bpp) - width * bpp; \
\
for (i = 0; i < height; i++) { \
for (j = 0; j < width; j++) { \
dest[r] = tab[((i & 0x8) >> 3) + ((j & 0x8) >> 3)]; /* red */ \
dest[g] = tab[((i & 0x8) >> 3) + ((j & 0x8) >> 3)]; /* green */ \
dest[b] = tab[((i & 0x8) >> 3) + ((j & 0x8) >> 3)]; /* blue */ \
dest += bpp; \
} \
dest += dest_add; \
} \
}
#define RGB_FILL_COLOR(name, bpp, MEMSET_RGB) \
static void \
fill_color_##name (guint8 * dest, gint width, gint height, \
gint colY, gint colU, gint colV) \
{ \
gint red, green, blue; \
gint i; \
gint dest_stride = GST_ROUND_UP_4 (width * bpp); \
\
red = YUV_TO_R (colY, colU, colV); \
green = YUV_TO_G (colY, colU, colV); \
blue = YUV_TO_B (colY, colU, colV); \
\
for (i = 0; i < height; i++) { \
MEMSET_RGB (dest, red, green, blue, width); \
dest += dest_stride; \
} \
}
#define MEMSET_RGB_C(name, r, g, b) \
static inline void \
_memset_##name##_c (guint8* dest, gint red, gint green, gint blue, gint width) { \
gint j; \
\
for (j = 0; j < width; j++) { \
dest[r] = red; \
dest[g] = green; \
dest[b] = blue; \
dest += 3; \
} \
}
#define MEMSET_XRGB(name, r, g, b) \
static inline void \
_memset_##name (guint8* dest, gint red, gint green, gint blue, gint width) { \
guint32 val; \
\
val = GUINT32_FROM_BE ((red << r) | (green << g) | (blue << b)); \
orc_splat_u32 ((guint32 *) dest, val, width); \
}
#define _orc_memcpy_u32(dest,src,len) orc_memcpy_u32((guint32 *) dest, (const guint32 *) src, len/4)
RGB_BLEND (rgb, 3, memcpy, orc_blend_u8);
RGB_FILL_CHECKER_C (rgb, 3, 0, 1, 2);
MEMSET_RGB_C (rgb, 0, 1, 2);
RGB_FILL_COLOR (rgb_c, 3, _memset_rgb_c);
MEMSET_RGB_C (bgr, 2, 1, 0);
RGB_FILL_COLOR (bgr_c, 3, _memset_bgr_c);
RGB_BLEND (xrgb, 4, _orc_memcpy_u32, orc_blend_u8);
RGB_FILL_CHECKER_C (xrgb, 4, 1, 2, 3);
MEMSET_XRGB (xrgb, 24, 16, 0);
RGB_FILL_COLOR (xrgb, 4, _memset_xrgb);
MEMSET_XRGB (xbgr, 0, 16, 24);
RGB_FILL_COLOR (xbgr, 4, _memset_xbgr);
MEMSET_XRGB (rgbx, 24, 16, 8);
RGB_FILL_COLOR (rgbx, 4, _memset_rgbx);
MEMSET_XRGB (bgrx, 8, 16, 24);
RGB_FILL_COLOR (bgrx, 4, _memset_bgrx);
/* YUY2, YVYU, UYVY */
#define PACKED_422_BLEND(name, MEMCPY, BLENDLOOP) \
static void \
blend_##name (const guint8 * src, gint xpos, gint ypos, \
gint src_width, gint src_height, gdouble src_alpha, \
guint8 * dest, gint dest_width, gint dest_height) \
{ \
gint b_alpha; \
gint i; \
gint src_stride, dest_stride; \
\
src_stride = GST_ROUND_UP_4 (src_width * 2); \
dest_stride = GST_ROUND_UP_4 (dest_width * 2); \
\
b_alpha = CLAMP ((gint) (src_alpha * 256), 0, 256); \
\
xpos = GST_ROUND_UP_2 (xpos); \
\
/* adjust src pointers for negative sizes */ \
if (xpos < 0) { \
src += -xpos * 2; \
src_width -= -xpos; \
xpos = 0; \
} \
if (ypos < 0) { \
src += -ypos * src_stride; \
src_height -= -ypos; \
ypos = 0; \
} \
\
/* adjust width/height if the src is bigger than dest */ \
if (xpos + src_width > dest_width) { \
src_width = dest_width - xpos; \
} \
if (ypos + src_height > dest_height) { \
src_height = dest_height - ypos; \
} \
\
dest = dest + 2 * xpos + (ypos * dest_stride); \
/* If it's completely transparent... we just return */ \
if (G_UNLIKELY (src_alpha == 0.0)) { \
GST_INFO ("Fast copy (alpha == 0.0)"); \
return; \
} \
\
/* If it's completely opaque, we do a fast copy */ \
if (G_UNLIKELY (src_alpha == 1.0)) { \
GST_INFO ("Fast copy (alpha == 1.0)"); \
for (i = 0; i < src_height; i++) { \
MEMCPY (dest, src, 2 * src_width); \
src += src_stride; \
dest += dest_stride; \
} \
return; \
} \
\
BLENDLOOP(dest, dest_stride, src, src_stride, b_alpha, 2 * src_width, src_height); \
}
#define PACKED_422_FILL_CHECKER_C(name, Y1, U, Y2, V) \
static void \
fill_checker_##name##_c (guint8 * dest, gint width, gint height) \
{ \
gint i, j; \
static const int tab[] = { 80, 160, 80, 160 }; \
gint dest_add; \
\
width = GST_ROUND_UP_2 (width); \
dest_add = GST_ROUND_UP_4 (width * 2) - width * 2; \
width /= 2; \
\
for (i = 0; i < height; i++) { \
for (j = 0; j < width; j++) { \
dest[Y1] = tab[((i & 0x8) >> 3) + ((j & 0x8) >> 3)]; \
dest[Y2] = tab[((i & 0x8) >> 3) + ((j & 0x8) >> 3)]; \
dest[U] = 128; \
dest[V] = 128; \
dest += 4; \
} \
dest += dest_add; \
} \
}
#define PACKED_422_FILL_COLOR(name, Y1, U, Y2, V) \
static void \
fill_color_##name (guint8 * dest, gint width, gint height, \
gint colY, gint colU, gint colV) \
{ \
gint i; \
gint dest_stride; \
guint32 val; \
\
width = GST_ROUND_UP_2 (width); \
dest_stride = GST_ROUND_UP_4 (width * 2); \
width /= 2; \
\
val = GUINT32_FROM_BE ((colY << Y1) | (colY << Y2) | (colU << U) | (colV << V)); \
\
for (i = 0; i < height; i++) { \
orc_splat_u32 ((guint32 *) dest, val, width); \
dest += dest_stride; \
} \
}
PACKED_422_BLEND (yuy2, memcpy, orc_blend_u8);
PACKED_422_FILL_CHECKER_C (yuy2, 0, 1, 2, 3);
PACKED_422_FILL_CHECKER_C (uyvy, 1, 0, 3, 2);
PACKED_422_FILL_COLOR (yuy2, 24, 16, 8, 0);
PACKED_422_FILL_COLOR (yvyu, 24, 0, 8, 16);
PACKED_422_FILL_COLOR (uyvy, 16, 24, 0, 8);
/* Init function */
BlendFunction gst_video_mixer_blend_argb;
BlendFunction gst_video_mixer_blend_bgra;
/* AYUV/ABGR is equal to ARGB, RGBA is equal to BGRA */
BlendFunction gst_video_mixer_blend_y444;
BlendFunction gst_video_mixer_blend_y42b;
BlendFunction gst_video_mixer_blend_i420;
/* I420 is equal to YV12 */
BlendFunction gst_video_mixer_blend_y41b;
BlendFunction gst_video_mixer_blend_rgb;
/* BGR is equal to RGB */
BlendFunction gst_video_mixer_blend_rgbx;
/* BGRx, xRGB, xBGR are equal to RGBx */
BlendFunction gst_video_mixer_blend_yuy2;
/* YVYU and UYVY are equal to YUY2 */
FillCheckerFunction gst_video_mixer_fill_checker_argb;
FillCheckerFunction gst_video_mixer_fill_checker_bgra;
/* ABGR is equal to ARGB, RGBA is equal to BGRA */
FillCheckerFunction gst_video_mixer_fill_checker_ayuv;
FillCheckerFunction gst_video_mixer_fill_checker_y444;
FillCheckerFunction gst_video_mixer_fill_checker_y42b;
FillCheckerFunction gst_video_mixer_fill_checker_i420;
/* I420 is equal to YV12 */
FillCheckerFunction gst_video_mixer_fill_checker_y41b;
FillCheckerFunction gst_video_mixer_fill_checker_rgb;
/* BGR is equal to RGB */
FillCheckerFunction gst_video_mixer_fill_checker_xrgb;
/* BGRx, xRGB, xBGR are equal to RGBx */
FillCheckerFunction gst_video_mixer_fill_checker_yuy2;
/* YVYU is equal to YUY2 */
FillCheckerFunction gst_video_mixer_fill_checker_uyvy;
FillColorFunction gst_video_mixer_fill_color_argb;
FillColorFunction gst_video_mixer_fill_color_bgra;
FillColorFunction gst_video_mixer_fill_color_abgr;
FillColorFunction gst_video_mixer_fill_color_rgba;
FillColorFunction gst_video_mixer_fill_color_ayuv;
FillColorFunction gst_video_mixer_fill_color_y444;
FillColorFunction gst_video_mixer_fill_color_y42b;
FillColorFunction gst_video_mixer_fill_color_i420;
FillColorFunction gst_video_mixer_fill_color_yv12;
FillColorFunction gst_video_mixer_fill_color_y41b;
FillColorFunction gst_video_mixer_fill_color_rgb;
FillColorFunction gst_video_mixer_fill_color_bgr;
FillColorFunction gst_video_mixer_fill_color_xrgb;
FillColorFunction gst_video_mixer_fill_color_xbgr;
FillColorFunction gst_video_mixer_fill_color_rgbx;
FillColorFunction gst_video_mixer_fill_color_bgrx;
FillColorFunction gst_video_mixer_fill_color_yuy2;
FillColorFunction gst_video_mixer_fill_color_yvyu;
FillColorFunction gst_video_mixer_fill_color_uyvy;
void
gst_video_mixer_init_blend (void)
{
orc_init ();
GST_DEBUG_CATEGORY_INIT (gst_videomixer_blend_debug, "videomixer_blend", 0,
"video mixer blending functions");
gst_video_mixer_blend_argb = blend_argb;
gst_video_mixer_blend_bgra = blend_bgra;
gst_video_mixer_blend_i420 = blend_i420;
gst_video_mixer_blend_y444 = blend_y444;
gst_video_mixer_blend_y42b = blend_y42b;
gst_video_mixer_blend_y41b = blend_y41b;
gst_video_mixer_blend_rgb = blend_rgb;
gst_video_mixer_blend_xrgb = blend_xrgb;
gst_video_mixer_blend_yuy2 = blend_yuy2;
gst_video_mixer_fill_checker_argb = fill_checker_argb_c;
gst_video_mixer_fill_checker_bgra = fill_checker_bgra_c;
gst_video_mixer_fill_checker_ayuv = fill_checker_ayuv_c;
gst_video_mixer_fill_checker_i420 = fill_checker_i420;
gst_video_mixer_fill_checker_y444 = fill_checker_y444;
gst_video_mixer_fill_checker_y42b = fill_checker_y42b;
gst_video_mixer_fill_checker_y41b = fill_checker_y41b;
gst_video_mixer_fill_checker_rgb = fill_checker_rgb_c;
gst_video_mixer_fill_checker_xrgb = fill_checker_xrgb_c;
gst_video_mixer_fill_checker_yuy2 = fill_checker_yuy2_c;
gst_video_mixer_fill_checker_uyvy = fill_checker_uyvy_c;
gst_video_mixer_fill_color_argb = fill_color_argb;
gst_video_mixer_fill_color_bgra = fill_color_bgra;
gst_video_mixer_fill_color_abgr = fill_color_abgr;
gst_video_mixer_fill_color_rgba = fill_color_rgba;
gst_video_mixer_fill_color_ayuv = fill_color_ayuv;
gst_video_mixer_fill_color_i420 = fill_color_i420;
gst_video_mixer_fill_color_yv12 = fill_color_yv12;
gst_video_mixer_fill_color_y444 = fill_color_y444;
gst_video_mixer_fill_color_y42b = fill_color_y42b;
gst_video_mixer_fill_color_y41b = fill_color_y41b;
gst_video_mixer_fill_color_rgb = fill_color_rgb_c;
gst_video_mixer_fill_color_bgr = fill_color_bgr_c;
gst_video_mixer_fill_color_xrgb = fill_color_xrgb;
gst_video_mixer_fill_color_xbgr = fill_color_xbgr;
gst_video_mixer_fill_color_rgbx = fill_color_rgbx;
gst_video_mixer_fill_color_bgrx = fill_color_bgrx;
gst_video_mixer_fill_color_yuy2 = fill_color_yuy2;
gst_video_mixer_fill_color_yvyu = fill_color_yvyu;
gst_video_mixer_fill_color_uyvy = fill_color_uyvy;
}