compositor: blend with multiple threads

Increases the throughput of compositing by using more CPU cycles across
multiple threads.  Simple cases (the output contains one pixel from at
most one input) can have up to a 70% increase in throughput.  Not so
simple cases are limited by the region with the most number of
composite operations.

Part-of: <https://gitlab.freedesktop.org/gstreamer/gst-plugins-base/-/merge_requests/755>
This commit is contained in:
Matthew Waters 2020-07-15 15:46:56 +10:00
parent d0f36c7e13
commit d7abf832af
4 changed files with 499 additions and 136 deletions

View file

@ -42,7 +42,8 @@ GST_DEBUG_CATEGORY_STATIC (gst_compositor_blend_debug);
#define BLEND_A32(name, method, LOOP) \
static void \
method##_ ##name (GstVideoFrame * srcframe, gint xpos, gint ypos, \
gdouble src_alpha, GstVideoFrame * destframe, GstCompositorBlendMode mode) \
gdouble src_alpha, GstVideoFrame * destframe, gint dst_y_start, \
gint dst_y_end, GstCompositorBlendMode mode) \
{ \
guint s_alpha; \
gint src_stride, dest_stride; \
@ -65,23 +66,26 @@ method##_ ##name (GstVideoFrame * srcframe, gint xpos, gint ypos, \
if (G_UNLIKELY (s_alpha == 0)) \
return; \
\
if (dst_y_end > dest_height) { \
dst_y_end = dest_height; \
} \
/* adjust src pointers for negative sizes */ \
if (xpos < 0) { \
src += -xpos * 4; \
src_width -= -xpos; \
xpos = 0; \
} \
if (ypos < 0) { \
src += -ypos * src_stride; \
src_height -= -ypos; \
ypos = 0; \
if (ypos < dst_y_start) { \
src += (dst_y_start - ypos) * src_stride; \
src_height -= dst_y_start - ypos; \
ypos = dst_y_start; \
} \
/* adjust width/height if the src is bigger than dest */ \
if (xpos + src_width > dest_width) { \
src_width = dest_width - xpos; \
} \
if (ypos + src_height > dest_height) { \
src_height = dest_height - ypos; \
if (ypos + src_height > dst_y_end) { \
src_height = dst_y_end - ypos; \
} \
\
if (src_height > 0 && src_width > 0) { \
@ -173,20 +177,21 @@ BLEND_A32 (bgra, overlay, _overlay_loop_argb);
#define A32_CHECKER_C(name, RGB, A, C1, C2, C3) \
static void \
fill_checker_##name##_c (GstVideoFrame * frame) \
fill_checker_##name##_c (GstVideoFrame * frame, guint y_start, guint y_end) \
{ \
gint i, j; \
gint val; \
static const gint tab[] = { 80, 160, 80, 160 }; \
gint width, height; \
gint width, stride; \
guint8 *dest; \
\
dest = GST_VIDEO_FRAME_PLANE_DATA (frame, 0); \
width = GST_VIDEO_FRAME_COMP_WIDTH (frame, 0); \
height = GST_VIDEO_FRAME_COMP_HEIGHT (frame, 0); \
stride = GST_VIDEO_FRAME_COMP_STRIDE (frame, 0); \
\
dest += y_start * stride; \
if (!RGB) { \
for (i = 0; i < height; i++) { \
for (i = y_start; i < y_end; i++) { \
for (j = 0; j < width; j++) { \
dest[A] = 0xff; \
dest[C1] = tab[((i & 0x8) >> 3) + ((j & 0x8) >> 3)]; \
@ -196,7 +201,7 @@ fill_checker_##name##_c (GstVideoFrame * frame) \
} \
} \
} else { \
for (i = 0; i < height; i++) { \
for (i = y_start; i < y_end; i++) { \
for (j = 0; j < width; j++) { \
val = tab[((i & 0x8) >> 3) + ((j & 0x8) >> 3)]; \
dest[A] = 0xFF; \
@ -220,17 +225,17 @@ A32_CHECKER_C (vuya, FALSE, 3, 2, 1, 0);
#define A32_COLOR(name, RGB, A, C1, C2, C3) \
static void \
fill_color_##name (GstVideoFrame * frame, gint Y, gint U, gint V) \
fill_color_##name (GstVideoFrame * frame, guint y_start, guint y_end, gint Y, gint U, gint V) \
{ \
gint c1, c2, c3; \
guint32 val; \
gint width, height; \
gint stride; \
guint8 *dest; \
\
dest = GST_VIDEO_FRAME_PLANE_DATA (frame, 0); \
width = GST_VIDEO_FRAME_COMP_WIDTH (frame, 0); \
height = GST_VIDEO_FRAME_COMP_HEIGHT (frame, 0); \
stride = GST_VIDEO_FRAME_COMP_STRIDE (frame, 0); \
\
dest += y_start * stride; \
if (RGB) { \
c1 = YUV_TO_R (Y, U, V); \
c2 = YUV_TO_G (Y, U, V); \
@ -242,7 +247,7 @@ fill_color_##name (GstVideoFrame * frame, gint Y, gint U, gint V) \
} \
val = GUINT32_FROM_BE ((0xff << A) | (c1 << C1) | (c2 << C2) | (c3 << C3)); \
\
compositor_orc_splat_u32 ((guint32 *) dest, val, height * width); \
compositor_orc_splat_u32 ((guint32 *) dest, val, (y_end - y_start) * (stride / 4)); \
}
A32_COLOR (argb, TRUE, 24, 16, 8, 0);
@ -291,7 +296,8 @@ _blend_##format_name (const guint8 * src, guint8 * dest, \
\
static void \
blend_##format_name (GstVideoFrame * srcframe, gint xpos, gint ypos, \
gdouble src_alpha, GstVideoFrame * destframe, GstCompositorBlendMode mode) \
gdouble src_alpha, GstVideoFrame * destframe, gint dst_y_start, \
gint dst_y_end, GstCompositorBlendMode mode) \
{ \
const guint8 *b_src; \
guint8 *b_dest; \
@ -315,6 +321,9 @@ blend_##format_name (GstVideoFrame * srcframe, gint xpos, gint ypos, \
dest_width = GST_VIDEO_FRAME_WIDTH (destframe); \
dest_height = GST_VIDEO_FRAME_HEIGHT (destframe); \
\
if (dst_y_end > dest_height) { \
dst_y_end = dest_height; \
} \
xpos = x_round (xpos); \
ypos = y_round (ypos); \
\
@ -327,10 +336,10 @@ blend_##format_name (GstVideoFrame * srcframe, gint xpos, gint ypos, \
b_src_width -= -xpos; \
xpos = 0; \
} \
if (ypos < 0) { \
yoffset = -ypos; \
b_src_height -= -ypos; \
ypos = 0; \
if (ypos < dst_y_start) { \
yoffset = dst_y_start - ypos; \
b_src_height -= dst_y_start - ypos; \
ypos = dst_y_start; \
} \
/* If x or y offset are larger then the source it's outside of the picture */ \
if (xoffset >= src_width || yoffset >= src_height) { \
@ -341,8 +350,8 @@ blend_##format_name (GstVideoFrame * srcframe, gint xpos, gint ypos, \
if (xpos + b_src_width > dest_width) { \
b_src_width = dest_width - xpos; \
} \
if (ypos + b_src_height > dest_height) { \
b_src_height = dest_height - ypos; \
if (ypos + b_src_height > dst_y_end) { \
b_src_height = dst_y_end - ypos; \
} \
if (b_src_width <= 0 || b_src_height <= 0) { \
return; \
@ -400,18 +409,22 @@ blend_##format_name (GstVideoFrame * srcframe, gint xpos, gint ypos, \
#define PLANAR_YUV_FILL_CHECKER(format_name, format_enum, MEMSET) \
static void \
fill_checker_##format_name (GstVideoFrame * frame) \
fill_checker_##format_name (GstVideoFrame * frame, guint y_start, guint y_end) \
{ \
gint i, j; \
static const int tab[] = { 80, 160, 80, 160 }; \
guint8 *p; \
gint comp_width, comp_height; \
gint rowstride; \
gint rowstride, comp_yoffset; \
const GstVideoFormatInfo *info; \
\
info = frame->info.finfo; \
p = GST_VIDEO_FRAME_COMP_DATA (frame, 0); \
comp_width = GST_VIDEO_FRAME_COMP_WIDTH (frame, 0); \
comp_height = GST_VIDEO_FRAME_COMP_HEIGHT (frame, 0); \
comp_height = GST_VIDEO_FORMAT_INFO_SCALE_HEIGHT(info, 0, y_end - y_start); \
rowstride = GST_VIDEO_FRAME_COMP_STRIDE (frame, 0); \
comp_yoffset = (y_start == 0) ? 0 : GST_VIDEO_FORMAT_INFO_SCALE_HEIGHT (info, 0, y_start); \
p += comp_yoffset * rowstride; \
\
for (i = 0; i < comp_height; i++) { \
for (j = 0; j < comp_width; j++) { \
@ -422,8 +435,10 @@ fill_checker_##format_name (GstVideoFrame * frame) \
\
p = GST_VIDEO_FRAME_COMP_DATA (frame, 1); \
comp_width = GST_VIDEO_FRAME_COMP_WIDTH (frame, 1); \
comp_height = GST_VIDEO_FRAME_COMP_HEIGHT (frame, 1); \
comp_height = GST_VIDEO_FORMAT_INFO_SCALE_HEIGHT(info, 1, y_end - y_start); \
rowstride = GST_VIDEO_FRAME_COMP_STRIDE (frame, 1); \
comp_yoffset = (y_start == 0) ? 0 : GST_VIDEO_FORMAT_INFO_SCALE_HEIGHT (info, 1, y_start); \
p += comp_yoffset * rowstride; \
\
for (i = 0; i < comp_height; i++) { \
MEMSET (p, 0x80, comp_width); \
@ -432,8 +447,10 @@ fill_checker_##format_name (GstVideoFrame * frame) \
\
p = GST_VIDEO_FRAME_COMP_DATA (frame, 2); \
comp_width = GST_VIDEO_FRAME_COMP_WIDTH (frame, 2); \
comp_height = GST_VIDEO_FRAME_COMP_HEIGHT (frame, 2); \
comp_height = GST_VIDEO_FORMAT_INFO_SCALE_HEIGHT(info, 2, y_end - y_start); \
rowstride = GST_VIDEO_FRAME_COMP_STRIDE (frame, 2); \
comp_yoffset = (y_start == 0) ? 0 : GST_VIDEO_FORMAT_INFO_SCALE_HEIGHT (info, 2, y_start); \
p += comp_yoffset * rowstride; \
\
for (i = 0; i < comp_height; i++) { \
MEMSET (p, 0x80, comp_width); \
@ -444,17 +461,21 @@ fill_checker_##format_name (GstVideoFrame * frame) \
#define PLANAR_YUV_FILL_COLOR(format_name,format_enum,MEMSET) \
static void \
fill_color_##format_name (GstVideoFrame * frame, \
gint colY, gint colU, gint colV) \
guint y_start, guint y_end, gint colY, gint colU, gint colV) \
{ \
guint8 *p; \
gint comp_width, comp_height; \
gint rowstride; \
gint rowstride, comp_yoffset; \
gint i; \
const GstVideoFormatInfo *info; \
\
info = frame->info.finfo; \
p = GST_VIDEO_FRAME_COMP_DATA (frame, 0); \
comp_width = GST_VIDEO_FRAME_COMP_WIDTH (frame, 0); \
comp_height = GST_VIDEO_FRAME_COMP_HEIGHT (frame, 0); \
comp_height = GST_VIDEO_FORMAT_INFO_SCALE_HEIGHT(info, 0, y_end - y_start); \
rowstride = GST_VIDEO_FRAME_COMP_STRIDE (frame, 0); \
comp_yoffset = (y_start == 0) ? 0 : GST_VIDEO_FORMAT_INFO_SCALE_HEIGHT (info, 0, y_start); \
p += comp_yoffset * rowstride; \
\
for (i = 0; i < comp_height; i++) { \
MEMSET (p, colY, comp_width); \
@ -463,8 +484,10 @@ fill_color_##format_name (GstVideoFrame * frame, \
\
p = GST_VIDEO_FRAME_COMP_DATA (frame, 1); \
comp_width = GST_VIDEO_FRAME_COMP_WIDTH (frame, 1); \
comp_height = GST_VIDEO_FRAME_COMP_HEIGHT (frame, 1); \
comp_height = GST_VIDEO_FORMAT_INFO_SCALE_HEIGHT(info, 1, y_end - y_start); \
rowstride = GST_VIDEO_FRAME_COMP_STRIDE (frame, 1); \
comp_yoffset = (y_start == 0) ? 0 : GST_VIDEO_FORMAT_INFO_SCALE_HEIGHT (info, 1, y_start); \
p += comp_yoffset * rowstride; \
\
for (i = 0; i < comp_height; i++) { \
MEMSET (p, colU, comp_width); \
@ -473,8 +496,10 @@ fill_color_##format_name (GstVideoFrame * frame, \
\
p = GST_VIDEO_FRAME_COMP_DATA (frame, 2); \
comp_width = GST_VIDEO_FRAME_COMP_WIDTH (frame, 2); \
comp_height = GST_VIDEO_FRAME_COMP_HEIGHT (frame, 2); \
comp_height = GST_VIDEO_FORMAT_INFO_SCALE_HEIGHT(info, 2, y_end - y_start); \
rowstride = GST_VIDEO_FRAME_COMP_STRIDE (frame, 2); \
comp_yoffset = (y_start == 0) ? 0 : GST_VIDEO_FORMAT_INFO_SCALE_HEIGHT (info, 2, y_start); \
p += comp_yoffset * rowstride; \
\
for (i = 0; i < comp_height; i++) { \
MEMSET (p, colV, comp_width); \
@ -541,7 +566,8 @@ _blend_##format_name (const guint8 * src, guint8 * dest, \
\
static void \
blend_##format_name (GstVideoFrame * srcframe, gint xpos, gint ypos, \
gdouble src_alpha, GstVideoFrame * destframe, GstCompositorBlendMode mode) \
gdouble src_alpha, GstVideoFrame * destframe, gint dst_y_start, \
gint dst_y_end, GstCompositorBlendMode mode) \
{ \
const guint8 *b_src; \
guint8 *b_dest; \
@ -565,6 +591,9 @@ blend_##format_name (GstVideoFrame * srcframe, gint xpos, gint ypos, \
dest_width = GST_VIDEO_FRAME_WIDTH (destframe); \
dest_height = GST_VIDEO_FRAME_HEIGHT (destframe); \
\
if (dst_y_end > dest_height) { \
dst_y_end = dest_height; \
} \
xpos = GST_ROUND_UP_2 (xpos); \
ypos = GST_ROUND_UP_2 (ypos); \
\
@ -577,10 +606,10 @@ blend_##format_name (GstVideoFrame * srcframe, gint xpos, gint ypos, \
b_src_width -= -xpos; \
xpos = 0; \
} \
if (ypos < 0) { \
yoffset += -ypos; \
b_src_height -= -ypos; \
ypos = 0; \
if (ypos < dst_y_start) { \
yoffset += dst_y_start - ypos; \
b_src_height -= dst_y_start - ypos; \
ypos = dst_y_start; \
} \
/* If x or y offset are larger then the source it's outside of the picture */ \
if (xoffset > src_width || yoffset > src_height) { \
@ -591,8 +620,8 @@ blend_##format_name (GstVideoFrame * srcframe, gint xpos, gint ypos, \
if (xpos + src_width > dest_width) { \
b_src_width = dest_width - xpos; \
} \
if (ypos + src_height > dest_height) { \
b_src_height = dest_height - ypos; \
if (ypos + src_height > dst_y_end) { \
b_src_height = dst_y_end - ypos; \
} \
if (b_src_width < 0 || b_src_height < 0) { \
return; \
@ -634,18 +663,22 @@ blend_##format_name (GstVideoFrame * srcframe, gint xpos, gint ypos, \
#define NV_YUV_FILL_CHECKER(format_name, MEMSET) \
static void \
fill_checker_##format_name (GstVideoFrame * frame) \
fill_checker_##format_name (GstVideoFrame * frame, guint y_start, guint y_end) \
{ \
gint i, j; \
static const int tab[] = { 80, 160, 80, 160 }; \
guint8 *p; \
gint comp_width, comp_height; \
gint rowstride; \
gint rowstride, comp_yoffset; \
const GstVideoFormatInfo *info; \
\
info = frame->info.finfo; \
p = GST_VIDEO_FRAME_COMP_DATA (frame, 0); \
comp_width = GST_VIDEO_FRAME_COMP_WIDTH (frame, 0); \
comp_height = GST_VIDEO_FRAME_COMP_HEIGHT (frame, 0); \
comp_height = GST_VIDEO_FORMAT_INFO_SCALE_HEIGHT(info, 0, y_end - y_start); \
rowstride = GST_VIDEO_FRAME_COMP_STRIDE (frame, 0); \
comp_yoffset = (y_start == 0) ? 0 : GST_VIDEO_FORMAT_INFO_SCALE_HEIGHT (info, 0, y_start); \
p += comp_yoffset * rowstride; \
\
for (i = 0; i < comp_height; i++) { \
for (j = 0; j < comp_width; j++) { \
@ -656,8 +689,10 @@ fill_checker_##format_name (GstVideoFrame * frame) \
\
p = GST_VIDEO_FRAME_PLANE_DATA (frame, 1); \
comp_width = GST_VIDEO_FRAME_COMP_WIDTH (frame, 1); \
comp_height = GST_VIDEO_FRAME_COMP_HEIGHT (frame, 1); \
comp_height = GST_VIDEO_FORMAT_INFO_SCALE_HEIGHT(info, 1, y_end - y_start); \
rowstride = GST_VIDEO_FRAME_COMP_STRIDE (frame, 1); \
comp_yoffset = (y_start == 0) ? 0 : GST_VIDEO_FORMAT_INFO_SCALE_HEIGHT (info, 1, y_start); \
p += comp_yoffset * rowstride; \
\
for (i = 0; i < comp_height; i++) { \
MEMSET (p, 0x80, comp_width * 2); \
@ -668,18 +703,22 @@ fill_checker_##format_name (GstVideoFrame * frame) \
#define NV_YUV_FILL_COLOR(format_name,MEMSET) \
static void \
fill_color_##format_name (GstVideoFrame * frame, \
gint colY, gint colU, gint colV) \
guint y_start, guint y_end, gint colY, gint colU, gint colV) \
{ \
guint8 *y, *u, *v; \
gint comp_width, comp_height; \
gint rowstride; \
gint rowstride, comp_yoffset; \
gint i, j; \
const GstVideoFormatInfo *info; \
\
info = frame->info.finfo; \
y = GST_VIDEO_FRAME_COMP_DATA (frame, 0); \
comp_width = GST_VIDEO_FRAME_COMP_WIDTH (frame, 0); \
comp_height = GST_VIDEO_FRAME_COMP_HEIGHT (frame, 0); \
comp_height = GST_VIDEO_FORMAT_INFO_SCALE_HEIGHT(info, 0, y_end - y_start); \
rowstride = GST_VIDEO_FRAME_COMP_STRIDE (frame, 0); \
comp_yoffset = (y_start == 0) ? 0 : GST_VIDEO_FORMAT_INFO_SCALE_HEIGHT (info, 0, y_start); \
\
y += comp_yoffset * rowstride; \
for (i = 0; i < comp_height; i++) { \
MEMSET (y, colY, comp_width); \
y += rowstride; \
@ -688,9 +727,12 @@ fill_color_##format_name (GstVideoFrame * frame, \
u = GST_VIDEO_FRAME_COMP_DATA (frame, 1); \
v = GST_VIDEO_FRAME_COMP_DATA (frame, 2); \
comp_width = GST_VIDEO_FRAME_COMP_WIDTH (frame, 1); \
comp_height = GST_VIDEO_FRAME_COMP_HEIGHT (frame, 1); \
comp_height = GST_VIDEO_FORMAT_INFO_SCALE_HEIGHT(info, 1, y_end - y_start); \
rowstride = GST_VIDEO_FRAME_COMP_STRIDE (frame, 1); \
comp_yoffset = (y_start == 0) ? 0 : GST_VIDEO_FORMAT_INFO_SCALE_HEIGHT (info, 1, y_start); \
\
u += comp_yoffset * rowstride; \
v += comp_yoffset * rowstride; \
for (i = 0; i < comp_height; i++) { \
for (j = 0; j < comp_width; j++) { \
u[j*2] = colU; \
@ -712,7 +754,8 @@ NV_YUV_FILL_CHECKER (nv21, memset);
#define RGB_BLEND(name, bpp, MEMCPY, BLENDLOOP) \
static void \
blend_##name (GstVideoFrame * srcframe, gint xpos, gint ypos, \
gdouble src_alpha, GstVideoFrame * destframe, GstCompositorBlendMode mode) \
gdouble src_alpha, GstVideoFrame * destframe, gint dst_y_start, \
gint dst_y_end, GstCompositorBlendMode mode) \
{ \
gint b_alpha; \
gint i; \
@ -735,23 +778,26 @@ blend_##name (GstVideoFrame * srcframe, gint xpos, gint ypos, \
\
b_alpha = CLAMP ((gint) (src_alpha * 255), 0, 255); \
\
if (dst_y_end > dest_height) { \
dst_y_end = dest_height; \
} \
/* adjust src pointers for negative sizes */ \
if (xpos < 0) { \
src += -xpos * bpp; \
src_width -= -xpos; \
xpos = 0; \
} \
if (ypos < 0) { \
src += -ypos * src_stride; \
src_height -= -ypos; \
ypos = 0; \
if (ypos < dst_y_start) { \
src += (dst_y_start - ypos) * src_stride; \
src_height -= dst_y_start - ypos; \
ypos = dst_y_start; \
} \
/* adjust width/height if the src is bigger than dest */ \
if (xpos + src_width > dest_width) { \
src_width = dest_width - xpos; \
} \
if (ypos + src_height > dest_height) { \
src_height = dest_height - ypos; \
if (ypos + src_height > dst_y_end) { \
src_height = dst_y_end - ypos; \
} \
\
dest = dest + bpp * xpos + (ypos * dest_stride); \
@ -783,7 +829,7 @@ blend_##name (GstVideoFrame * srcframe, gint xpos, gint ypos, \
#define RGB_FILL_CHECKER_C(name, bpp, r, g, b) \
static void \
fill_checker_##name##_c (GstVideoFrame * frame) \
fill_checker_##name##_c (GstVideoFrame * frame, guint y_start, guint y_end) \
{ \
gint i, j; \
static const int tab[] = { 80, 160, 80, 160 }; \
@ -791,11 +837,12 @@ fill_checker_##name##_c (GstVideoFrame * frame) \
guint8 *dest; \
\
width = GST_VIDEO_FRAME_WIDTH (frame); \
height = GST_VIDEO_FRAME_HEIGHT (frame); \
height = y_end - y_start; \
dest = GST_VIDEO_FRAME_PLANE_DATA (frame, 0); \
stride = GST_VIDEO_FRAME_COMP_STRIDE (frame, 0); \
dest_add = stride - width * bpp; \
\
dest += y_start * stride; \
for (i = 0; i < height; i++) { \
for (j = 0; j < width; j++) { \
dest[r] = tab[((i & 0x8) >> 3) + ((j & 0x8) >> 3)]; /* red */ \
@ -810,7 +857,7 @@ fill_checker_##name##_c (GstVideoFrame * frame) \
#define RGB_FILL_COLOR(name, bpp, MEMSET_RGB) \
static void \
fill_color_##name (GstVideoFrame * frame, \
gint colY, gint colU, gint colV) \
guint y_start, guint y_end, gint colY, gint colU, gint colV) \
{ \
gint red, green, blue; \
gint i; \
@ -819,7 +866,7 @@ fill_color_##name (GstVideoFrame * frame, \
guint8 *dest; \
\
width = GST_VIDEO_FRAME_WIDTH (frame); \
height = GST_VIDEO_FRAME_HEIGHT (frame); \
height = y_end - y_start; \
dest = GST_VIDEO_FRAME_PLANE_DATA (frame, 0); \
dest_stride = GST_VIDEO_FRAME_COMP_STRIDE (frame, 0); \
\
@ -827,6 +874,7 @@ fill_color_##name (GstVideoFrame * frame, \
green = YUV_TO_G (colY, colU, colV); \
blue = YUV_TO_B (colY, colU, colV); \
\
dest += y_start * dest_stride; \
for (i = 0; i < height; i++) { \
MEMSET_RGB (dest, red, green, blue, width); \
dest += dest_stride; \
@ -885,7 +933,8 @@ RGB_FILL_COLOR (bgrx, 4, _memset_bgrx);
#define PACKED_422_BLEND(name, MEMCPY, BLENDLOOP) \
static void \
blend_##name (GstVideoFrame * srcframe, gint xpos, gint ypos, \
gdouble src_alpha, GstVideoFrame * destframe, GstCompositorBlendMode mode) \
gdouble src_alpha, GstVideoFrame * destframe, gint dst_y_start, \
gint dst_y_end, GstCompositorBlendMode mode) \
{ \
gint b_alpha; \
gint i; \
@ -910,24 +959,27 @@ blend_##name (GstVideoFrame * srcframe, gint xpos, gint ypos, \
\
xpos = GST_ROUND_UP_2 (xpos); \
\
if (dst_y_end > dest_height) { \
dst_y_end = dest_height; \
} \
/* adjust src pointers for negative sizes */ \
if (xpos < 0) { \
src += -xpos * 2; \
src_width -= -xpos; \
xpos = 0; \
} \
if (ypos < 0) { \
if (ypos < dst_y_start) { \
src += -ypos * src_stride; \
src_height -= -ypos; \
ypos = 0; \
ypos = dst_y_start; \
} \
\
/* adjust width/height if the src is bigger than dest */ \
if (xpos + src_width > dest_width) { \
src_width = dest_width - xpos; \
} \
if (ypos + src_height > dest_height) { \
src_height = dest_height - ypos; \
if (ypos + src_height > dst_y_end) { \
src_height = dst_y_end - ypos; \
} \
\
dest = dest + 2 * xpos + (ypos * dest_stride); \
@ -959,7 +1011,7 @@ blend_##name (GstVideoFrame * srcframe, gint xpos, gint ypos, \
#define PACKED_422_FILL_CHECKER_C(name, Y1, U, Y2, V) \
static void \
fill_checker_##name##_c (GstVideoFrame * frame) \
fill_checker_##name##_c (GstVideoFrame * frame, guint y_start, guint y_end) \
{ \
gint i, j; \
static const int tab[] = { 80, 160, 80, 160 }; \
@ -969,11 +1021,12 @@ fill_checker_##name##_c (GstVideoFrame * frame) \
\
width = GST_VIDEO_FRAME_WIDTH (frame); \
width = GST_ROUND_UP_2 (width); \
height = GST_VIDEO_FRAME_HEIGHT (frame); \
height = y_end - y_start; \
dest = GST_VIDEO_FRAME_PLANE_DATA (frame, 0); \
dest_add = GST_VIDEO_FRAME_COMP_STRIDE (frame, 0) - width * 2; \
width /= 2; \
\
dest += dest_add * y_start; \
for (i = 0; i < height; i++) { \
for (j = 0; j < width; j++) { \
dest[Y1] = tab[((i & 0x8) >> 3) + (((2 * j + 0) & 0x8) >> 3)]; \
@ -989,7 +1042,7 @@ fill_checker_##name##_c (GstVideoFrame * frame) \
#define PACKED_422_FILL_COLOR(name, Y1, U, Y2, V) \
static void \
fill_color_##name (GstVideoFrame * frame, \
gint colY, gint colU, gint colV) \
guint y_start, guint y_end, gint colY, gint colU, gint colV) \
{ \
gint i; \
gint dest_stride; \
@ -999,13 +1052,14 @@ fill_color_##name (GstVideoFrame * frame, \
\
width = GST_VIDEO_FRAME_WIDTH (frame); \
width = GST_ROUND_UP_2 (width); \
height = GST_VIDEO_FRAME_HEIGHT (frame); \
height = y_end - y_start; \
dest = GST_VIDEO_FRAME_PLANE_DATA (frame, 0); \
dest_stride = GST_VIDEO_FRAME_COMP_STRIDE (frame, 0); \
width /= 2; \
\
val = GUINT32_FROM_BE ((colY << Y1) | (colY << Y2) | (colU << U) | (colV << V)); \
\
dest += dest_stride * y_start; \
for (i = 0; i < height; i++) { \
compositor_orc_splat_u32 ((guint32 *) dest, val, width); \
dest += dest_stride; \

View file

@ -38,10 +38,19 @@ typedef enum
COMPOSITOR_BLEND_MODE_ADD,
} GstCompositorBlendMode;
/*
* @srcframe: source #GstVideoFrame
* @xpos: horizontal start position of @srcframe, leftmost pixel line.
* @ypos: vertical start position of @srcframe, topmost pixel line.
* @gdouble: src_alpha, alpha factor applied to @srcframe
* @destframe: destination #GstVideoFrame
* @dst_y_start: start position of where to write into @destframe. Used for splitting work across multiple sequences.
* @dst_y_end: end position of where to write into @destframe. Used for splitting work across multiple sequences.
*/
typedef void (*BlendFunction) (GstVideoFrame *srcframe, gint xpos, gint ypos, gdouble src_alpha, GstVideoFrame * destframe,
GstCompositorBlendMode mode);
typedef void (*FillCheckerFunction) (GstVideoFrame * frame);
typedef void (*FillColorFunction) (GstVideoFrame * frame, gint c1, gint c2, gint c3);
gint dst_y_start, gint dst_y_end, GstCompositorBlendMode mode);
typedef void (*FillCheckerFunction) (GstVideoFrame * frame, guint y_start, guint y_end);
typedef void (*FillColorFunction) (GstVideoFrame * frame, guint y_start, guint y_end, gint c1, gint c2, gint c3);
extern BlendFunction gst_compositor_blend_argb;
extern BlendFunction gst_compositor_blend_bgra;

View file

@ -827,21 +827,189 @@ _fixate_caps (GstAggregator * agg, GstCaps * caps)
return ret;
}
static gpointer
gst_parallelized_task_thread_func (gpointer data)
{
GstParallelizedTaskThread *self = data;
g_mutex_lock (&self->runner->lock);
self->runner->n_done++;
if (self->runner->n_done == self->runner->n_threads - 1)
g_cond_signal (&self->runner->cond_done);
do {
gint idx;
while (self->runner->n_todo == -1 && !self->runner->quit)
g_cond_wait (&self->runner->cond_todo, &self->runner->lock);
if (self->runner->quit)
break;
idx = self->runner->n_todo--;
g_assert (self->runner->n_todo >= -1);
g_mutex_unlock (&self->runner->lock);
g_assert (self->runner->func != NULL);
self->runner->func (self->runner->task_data[idx]);
g_mutex_lock (&self->runner->lock);
self->runner->n_done++;
if (self->runner->n_done == self->runner->n_threads - 1)
g_cond_signal (&self->runner->cond_done);
} while (TRUE);
g_mutex_unlock (&self->runner->lock);
return NULL;
}
static void
gst_parallelized_task_runner_free (GstParallelizedTaskRunner * self)
{
guint i;
g_mutex_lock (&self->lock);
self->quit = TRUE;
g_cond_broadcast (&self->cond_todo);
g_mutex_unlock (&self->lock);
for (i = 1; i < self->n_threads; i++) {
if (!self->threads[i].thread)
continue;
g_thread_join (self->threads[i].thread);
}
g_mutex_clear (&self->lock);
g_cond_clear (&self->cond_todo);
g_cond_clear (&self->cond_done);
g_free (self->threads);
g_free (self);
}
static GstParallelizedTaskRunner *
gst_parallelized_task_runner_new (guint n_threads)
{
GstParallelizedTaskRunner *self;
guint i;
GError *err = NULL;
if (n_threads == 0)
n_threads = g_get_num_processors ();
self = g_new0 (GstParallelizedTaskRunner, 1);
self->n_threads = n_threads;
self->threads = g_new0 (GstParallelizedTaskThread, n_threads);
self->quit = FALSE;
self->n_todo = -1;
self->n_done = 0;
g_mutex_init (&self->lock);
g_cond_init (&self->cond_todo);
g_cond_init (&self->cond_done);
/* Set when scheduling a job */
self->func = NULL;
self->task_data = NULL;
for (i = 0; i < n_threads; i++) {
self->threads[i].runner = self;
self->threads[i].idx = i;
/* First thread is the one calling run() */
if (i > 0) {
self->threads[i].thread =
g_thread_try_new ("compositor-blend",
gst_parallelized_task_thread_func, &self->threads[i], &err);
if (!self->threads[i].thread)
goto error;
}
}
g_mutex_lock (&self->lock);
while (self->n_done < self->n_threads - 1)
g_cond_wait (&self->cond_done, &self->lock);
self->n_done = 0;
g_mutex_unlock (&self->lock);
return self;
error:
{
GST_ERROR ("Failed to start thread %u: %s", i, err->message);
g_clear_error (&err);
gst_parallelized_task_runner_free (self);
return NULL;
}
}
static void
gst_parallelized_task_runner_run (GstParallelizedTaskRunner * self,
GstParallelizedTaskFunc func, gpointer * task_data)
{
guint n_threads = self->n_threads;
self->func = func;
self->task_data = task_data;
if (n_threads > 1) {
g_mutex_lock (&self->lock);
self->n_todo = self->n_threads - 2;
self->n_done = 0;
g_cond_broadcast (&self->cond_todo);
g_mutex_unlock (&self->lock);
}
self->func (self->task_data[self->n_threads - 1]);
if (n_threads > 1) {
g_mutex_lock (&self->lock);
while (self->n_done < self->n_threads - 1)
g_cond_wait (&self->cond_done, &self->lock);
self->n_done = 0;
g_mutex_unlock (&self->lock);
}
self->func = NULL;
self->task_data = NULL;
}
static gboolean
_negotiated_caps (GstAggregator * agg, GstCaps * caps)
{
GstCompositor *compositor = GST_COMPOSITOR (agg);
GstVideoInfo v_info;
guint n_threads;
GST_DEBUG_OBJECT (agg, "Negotiated caps %" GST_PTR_FORMAT, caps);
if (!gst_video_info_from_caps (&v_info, caps))
return FALSE;
if (!set_functions (GST_COMPOSITOR (agg), &v_info)) {
if (!set_functions (compositor, &v_info)) {
GST_ERROR_OBJECT (agg, "Failed to setup vfuncs");
return FALSE;
}
n_threads = g_get_num_processors ();
/* Magic number of 200 lines */
if (GST_VIDEO_INFO_HEIGHT (&v_info) / n_threads < 200)
n_threads = (GST_VIDEO_INFO_HEIGHT (&v_info) + 199) / 200;
if (n_threads < 1)
n_threads = 1;
/* XXX: implement better thread count change */
if (compositor->blend_runner
&& compositor->blend_runner->n_threads != n_threads) {
gst_parallelized_task_runner_free (compositor->blend_runner);
compositor->blend_runner = NULL;
}
if (!compositor->blend_runner)
compositor->blend_runner = gst_parallelized_task_runner_new (n_threads);
return GST_AGGREGATOR_CLASS (parent_class)->negotiated_src_caps (agg, caps);
}
@ -869,58 +1037,6 @@ _should_draw_background (GstVideoAggregator * vagg)
return draw;
}
static gboolean
_draw_background (GstVideoAggregator * vagg, GstVideoFrame * outframe,
BlendFunction * composite)
{
GstCompositor *comp = GST_COMPOSITOR (vagg);
*composite = comp->blend;
/* If one of the frames to be composited completely obscures the background,
* don't bother drawing the background at all. We can also always use the
* 'blend' BlendFunction in that case because it only changes if we have to
* overlay on top of a transparent background. */
if (!_should_draw_background (vagg))
return FALSE;
switch (comp->background) {
case COMPOSITOR_BACKGROUND_CHECKER:
comp->fill_checker (outframe);
break;
case COMPOSITOR_BACKGROUND_BLACK:
comp->fill_color (outframe, 16, 128, 128);
break;
case COMPOSITOR_BACKGROUND_WHITE:
comp->fill_color (outframe, 240, 128, 128);
break;
case COMPOSITOR_BACKGROUND_TRANSPARENT:
{
guint i, plane, num_planes, height;
num_planes = GST_VIDEO_FRAME_N_PLANES (outframe);
for (plane = 0; plane < num_planes; ++plane) {
guint8 *pdata;
gsize rowsize, plane_stride;
pdata = GST_VIDEO_FRAME_PLANE_DATA (outframe, plane);
plane_stride = GST_VIDEO_FRAME_PLANE_STRIDE (outframe, plane);
rowsize = GST_VIDEO_FRAME_COMP_WIDTH (outframe, plane)
* GST_VIDEO_FRAME_COMP_PSTRIDE (outframe, plane);
height = GST_VIDEO_FRAME_COMP_HEIGHT (outframe, plane);
for (i = 0; i < height; ++i) {
memset (pdata, 0, rowsize);
pdata += plane_stride;
}
}
/* use overlay to keep background transparent */
*composite = comp->overlay;
break;
}
}
return TRUE;
}
static gboolean
frames_can_copy (const GstVideoFrame * frame1, const GstVideoFrame * frame2)
{
@ -933,14 +1049,101 @@ frames_can_copy (const GstVideoFrame * frame1, const GstVideoFrame * frame2)
return TRUE;
}
struct CompositePadInfo
{
GstVideoFrame *prepared_frame;
GstCompositorPad *pad;
GstCompositorBlendMode blend_mode;
};
struct CompositeTask
{
GstCompositor *compositor;
GstVideoFrame *out_frame;
guint dst_line_start;
guint dst_line_end;
gboolean draw_background;
guint n_pads;
struct CompositePadInfo *pads_info;
};
static void
_draw_background (GstCompositor * comp, GstVideoFrame * outframe,
guint y_start, guint y_end, BlendFunction * composite)
{
*composite = comp->blend;
switch (comp->background) {
case COMPOSITOR_BACKGROUND_CHECKER:
comp->fill_checker (outframe, y_start, y_end);
break;
case COMPOSITOR_BACKGROUND_BLACK:
comp->fill_color (outframe, y_start, y_end, 16, 128, 128);
break;
case COMPOSITOR_BACKGROUND_WHITE:
comp->fill_color (outframe, y_start, y_end, 240, 128, 128);
break;
case COMPOSITOR_BACKGROUND_TRANSPARENT:
{
guint i, plane, num_planes, height;
num_planes = GST_VIDEO_FRAME_N_PLANES (outframe);
for (plane = 0; plane < num_planes; ++plane) {
const GstVideoFormatInfo *info;
guint8 *pdata;
gsize rowsize, plane_stride;
info = outframe->info.finfo;
pdata = GST_VIDEO_FRAME_PLANE_DATA (outframe, plane);
plane_stride = GST_VIDEO_FRAME_PLANE_STRIDE (outframe, plane);
rowsize = GST_VIDEO_FRAME_COMP_WIDTH (outframe, plane)
* GST_VIDEO_FRAME_COMP_PSTRIDE (outframe, plane);
height =
GST_VIDEO_FORMAT_INFO_SCALE_HEIGHT (info, plane, y_end - y_start);
pdata += y_start * plane_stride;
for (i = 0; i < height; ++i) {
memset (pdata, 0, rowsize);
pdata += plane_stride;
}
}
/* use overlay to keep background transparent */
*composite = comp->overlay;
break;
}
}
}
static void
blend_pads (struct CompositeTask *comp)
{
BlendFunction composite;
guint i;
composite = comp->compositor->blend;
if (comp->draw_background) {
_draw_background (comp->compositor, comp->out_frame, comp->dst_line_start,
comp->dst_line_end, &composite);
}
for (i = 0; i < comp->n_pads; i++) {
composite (comp->pads_info[i].prepared_frame,
comp->pads_info[i].pad->xpos, comp->pads_info[i].pad->ypos,
comp->pads_info[i].pad->alpha, comp->out_frame, comp->dst_line_start,
comp->dst_line_end, comp->pads_info[i].blend_mode);
}
}
static GstFlowReturn
gst_compositor_aggregate_frames (GstVideoAggregator * vagg, GstBuffer * outbuf)
{
GstCompositor *compositor = GST_COMPOSITOR (vagg);
GList *l;
BlendFunction composite;
GstVideoFrame out_frame, *outframe;
gboolean drew_background;
guint drawn_pads = 0;
gboolean draw_background;
guint drawn_a_pad = FALSE;
struct CompositePadInfo *pads_info;
guint i, n_pads = 0;
if (!gst_video_frame_map (&out_frame, &vagg->info, outbuf, GST_MAP_WRITE)) {
GST_WARNING_OBJECT (vagg, "Could not map output buffer");
@ -948,9 +1151,26 @@ gst_compositor_aggregate_frames (GstVideoAggregator * vagg, GstBuffer * outbuf)
}
outframe = &out_frame;
drew_background = _draw_background (vagg, outframe, &composite);
/* If one of the frames to be composited completely obscures the background,
* don't bother drawing the background at all. We can also always use the
* 'blend' BlendFunction in that case because it only changes if we have to
* overlay on top of a transparent background. */
draw_background = _should_draw_background (vagg);
GST_OBJECT_LOCK (vagg);
for (l = GST_ELEMENT (vagg)->sinkpads; l; l = l->next) {
GstVideoAggregatorPad *pad = l->data;
GstVideoFrame *prepared_frame =
gst_video_aggregator_pad_get_prepared_frame (pad);
if (prepared_frame)
n_pads++;
}
pads_info = g_newa (struct CompositePadInfo, n_pads);
n_pads = 0;
for (l = GST_ELEMENT (vagg)->sinkpads; l; l = l->next) {
GstVideoAggregatorPad *pad = l->data;
GstCompositorPad *compo_pad = GST_COMPOSITOR_PAD (pad);
@ -978,16 +1198,53 @@ gst_compositor_aggregate_frames (GstVideoAggregator * vagg, GstBuffer * outbuf)
* background, and @prepared_frame has the same format, height, and width
* as @outframe, then we can just copy it as-is. Subsequent pads (if any)
* will be composited on top of it. */
if (drawn_pads == 0 && !drew_background &&
frames_can_copy (prepared_frame, outframe))
if (!drawn_a_pad && !draw_background &&
frames_can_copy (prepared_frame, outframe)) {
gst_video_frame_copy (outframe, prepared_frame);
else
composite (prepared_frame,
compo_pad->xpos,
compo_pad->ypos, compo_pad->alpha, outframe, blend_mode);
drawn_pads++;
} else {
pads_info[n_pads].pad = compo_pad;
pads_info[n_pads].prepared_frame = prepared_frame;
pads_info[n_pads].blend_mode = blend_mode;
n_pads++;
}
drawn_a_pad = TRUE;
}
}
{
guint n_threads, lines_per_thread;
guint out_height;
struct CompositeTask *tasks;
struct CompositeTask **tasks_p;
n_threads = compositor->blend_runner->n_threads;
tasks = g_newa (struct CompositeTask, n_threads);
tasks_p = g_newa (struct CompositeTask *, n_threads);
out_height = GST_VIDEO_FRAME_HEIGHT (outframe);
lines_per_thread = (out_height + n_threads - 1) / n_threads;
for (i = 0; i < n_threads; i++) {
tasks[i].compositor = compositor;
tasks[i].n_pads = n_pads;
tasks[i].pads_info = pads_info;
tasks[i].out_frame = outframe;
tasks[i].draw_background = draw_background;
/* This is a dumb split of the work by number of output lines.
* If there is a section of the output that reads from a lot of source
* pads, then that thread will consume more time. Maybe tracking and
* splitting on the source fill rate would produce better results. */
tasks[i].dst_line_start = i * lines_per_thread;
tasks[i].dst_line_end = MIN ((i + 1) * lines_per_thread, out_height);
tasks_p[i] = &tasks[i];
}
gst_parallelized_task_runner_run (compositor->blend_runner,
(GstParallelizedTaskFunc) blend_pads, (gpointer *) tasks_p);
}
GST_OBJECT_UNLOCK (vagg);
gst_video_frame_unmap (outframe);
@ -1077,6 +1334,18 @@ _sink_query (GstAggregator * agg, GstAggregatorPad * bpad, GstQuery * query)
}
}
static void
gst_compositor_finalize (GObject * object)
{
GstCompositor *compositor = GST_COMPOSITOR (object);
if (compositor->blend_runner)
gst_parallelized_task_runner_free (compositor->blend_runner);
compositor->blend_runner = NULL;
G_OBJECT_CLASS (parent_class)->finalize (object);
}
/* GObject boilerplate */
static void
gst_compositor_class_init (GstCompositorClass * klass)
@ -1089,6 +1358,7 @@ gst_compositor_class_init (GstCompositorClass * klass)
gobject_class->get_property = gst_compositor_get_property;
gobject_class->set_property = gst_compositor_set_property;
gobject_class->finalize = gst_compositor_finalize;
gstelement_class->request_new_pad =
GST_DEBUG_FUNCPTR (gst_compositor_request_new_pad);

View file

@ -76,6 +76,34 @@ typedef enum
COMPOSITOR_OPERATOR_ADD,
} GstCompositorOperator;
/* copied from video-converter.c */
typedef void (*GstParallelizedTaskFunc) (gpointer user_data);
typedef struct _GstParallelizedTaskRunner GstParallelizedTaskRunner;
typedef struct _GstParallelizedTaskThread GstParallelizedTaskThread;
struct _GstParallelizedTaskThread
{
GstParallelizedTaskRunner *runner;
guint idx;
GThread *thread;
};
struct _GstParallelizedTaskRunner
{
guint n_threads;
GstParallelizedTaskThread *threads;
GstParallelizedTaskFunc func;
gpointer *task_data;
GMutex lock;
GCond cond_todo, cond_done;
gint n_todo, n_done;
gboolean quit;
};
/**
* GstCompositor:
*
@ -92,6 +120,8 @@ struct _GstCompositor
BlendFunction blend, overlay;
FillCheckerFunction fill_checker;
FillColorFunction fill_color;
GstParallelizedTaskRunner *blend_runner;
};
/**