kate: support for rendering on several YUV formats

This speeds up rendering a fair bit by not requiring colorspace
conversion, whether there is anything to overlay or not.

The blending code was nicked from textoverlay. I would think
this might be a helpful thing to put in, say, libgstvideo at
some point.

https://bugzilla.gnome.org/show_bug.cgi?id=660528
This commit is contained in:
Vincent Penquerc'h 2011-09-29 22:43:30 +01:00 committed by Vincent Penquerc'h
parent 4735a7554b
commit 9eb79984a8
2 changed files with 483 additions and 7 deletions

View file

@ -3,6 +3,7 @@
* Copyright 2005 Thomas Vander Stichele <thomas@apestaart.org>
* Copyright 2005 Ronald S. Bultje <rbultje@ronald.bitfreak.net>
* Copyright 2008 Vincent Penquerc'h <ogg.k.ogg.k@googlemail.com>
* Copyright (C) <2009> Young-Ho Cha <ganadist@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@ -125,6 +126,57 @@ enum
ARG_SILENT
};
/* RGB -> YUV blitting routines taken from textoverlay,
original code from Young-Ho Cha <ganadist@gmail.com> */
#define COMP_Y(ret, r, g, b) \
{ \
ret = (int) (((19595 * r) >> 16) + ((38470 * g) >> 16) + ((7471 * b) >> 16)); \
ret = CLAMP (ret, 0, 255); \
}
#define COMP_U(ret, r, g, b) \
{ \
ret = (int) (-((11059 * r) >> 16) - ((21709 * g) >> 16) + ((32768 * b) >> 16) + 128); \
ret = CLAMP (ret, 0, 255); \
}
#define COMP_V(ret, r, g, b) \
{ \
ret = (int) (((32768 * r) >> 16) - ((27439 * g) >> 16) - ((5329 * b) >> 16) + 128); \
ret = CLAMP (ret, 0, 255); \
}
#define BLEND(ret, alpha, v0, v1) \
{ \
ret = (v0 * alpha + v1 * (255 - alpha)) / 255; \
}
#define OVER(ret, alphaA, Ca, alphaB, Cb, alphaNew) \
{ \
gint _tmp; \
_tmp = (Ca * alphaA + Cb * alphaB * (255 - alphaA) / 255) / alphaNew; \
ret = CLAMP (_tmp, 0, 255); \
}
#if G_BYTE_ORDER == G_LITTLE_ENDIAN
# define TIGER_ARGB_A 3
# define TIGER_ARGB_R 2
# define TIGER_ARGB_G 1
# define TIGER_ARGB_B 0
#else
# define TIGER_ARGB_A 0
# define TIGER_ARGB_R 1
# define TIGER_ARGB_G 2
# define TIGER_ARGB_B 3
#endif
#define TIGER_UNPREMULTIPLY(a,r,g,b) G_STMT_START { \
b = (a > 0) ? MIN ((b * 255 + a / 2) / a, 255) : 0; \
g = (a > 0) ? MIN ((g * 255 + a / 2) / a, 255) : 0; \
r = (a > 0) ? MIN ((r * 255 + a / 2) / a, 255) : 0; \
} G_STMT_END
static GstStaticPadTemplate kate_sink_factory =
GST_STATIC_PAD_TEMPLATE ("subtitle_sink",
GST_PAD_SINK,
@ -134,12 +186,12 @@ static GstStaticPadTemplate kate_sink_factory =
#if G_BYTE_ORDER == G_LITTLE_ENDIAN
#define TIGER_VIDEO_CAPS \
GST_VIDEO_CAPS_xRGB ", endianness = (int)1234; " \
GST_VIDEO_CAPS_BGRx ", endianness = (int)4321"
GST_VIDEO_CAPS_xRGB ";" GST_VIDEO_CAPS_BGRx ";" \
GST_VIDEO_CAPS_YUV ("{AYUV, I420, YV12, UYVY, NV12, NV21}")
#else
#define TIGER_VIDEO_CAPS \
GST_VIDEO_CAPS_BGRx ", endianness = (int)4321; " \
GST_VIDEO_CAPS_xRGB ", endianness = (int)1234"
GST_VIDEO_CAPS_BGRx ";" GST_VIDEO_CAPS_xRGB ";" \
GST_VIDEO_CAPS_YUV ("{AYUV, I420, YV12, UYVY, NV12, NV21}")
#endif
static GstStaticPadTemplate video_sink_factory =
@ -379,6 +431,9 @@ gst_kate_tiger_dispose (GObject * object)
tiger->default_font_desc = NULL;
}
g_free (tiger->render_buffer);
tiger->render_buffer = NULL;
g_cond_free (tiger->cond);
tiger->cond = NULL;
@ -707,6 +762,7 @@ gst_kate_tiger_video_set_caps (GstPad * pad, GstCaps * caps)
tiger->swap_rgb = FALSE;
if (gst_video_format_parse_caps (caps, &format, &w, &h)) {
tiger->video_format = format;
tiger->video_width = w;
tiger->video_height = h;
}
@ -731,6 +787,405 @@ gst_kate_tiger_get_time (GstKateTiger * tiger)
return pos / (gdouble) GST_SECOND;
}
static inline void
gst_kate_tiger_blit_1 (GstKateTiger * tiger, guchar * dest, gint xpos,
gint ypos, const guint8 * image, gint image_width, gint image_height,
guint dest_stride)
{
gint i, j = 0;
gint x, y;
guchar r, g, b, a;
const guint8 *pimage;
guchar *py;
gint width = image_width;
gint height = image_height;
if (xpos < 0) {
xpos = 0;
}
if (xpos + width > tiger->video_width) {
width = tiger->video_width - xpos;
}
if (ypos + height > tiger->video_height) {
height = tiger->video_height - ypos;
}
dest += (ypos / 1) * dest_stride;
for (i = 0; i < height; i++) {
pimage = image + 4 * (i * image_width);
py = dest + i * dest_stride + xpos;
for (j = 0; j < width; j++) {
b = pimage[TIGER_ARGB_B];
g = pimage[TIGER_ARGB_G];
r = pimage[TIGER_ARGB_R];
a = pimage[TIGER_ARGB_A];
TIGER_UNPREMULTIPLY (a, r, g, b);
pimage += 4;
if (a == 0) {
py++;
continue;
}
COMP_Y (y, r, g, b);
x = *py;
BLEND (*py++, a, y, x);
}
}
}
static inline void
gst_kate_tiger_blit_sub2x2cbcr (GstKateTiger * tiger,
guchar * destcb, guchar * destcr, gint xpos, gint ypos,
const guint8 * image, gint image_width, gint image_height,
guint destcb_stride, guint destcr_stride, guint pix_stride)
{
gint i, j;
gint x, cb, cr;
gushort r, g, b, a;
gushort r1, g1, b1, a1;
const guint8 *pimage1, *pimage2;
guchar *pcb, *pcr;
gint width = image_width - 2;
gint height = image_height - 2;
xpos *= pix_stride;
if (xpos < 0) {
xpos = 0;
}
if (xpos + width > tiger->video_width) {
width = tiger->video_width - xpos;
}
if (ypos + height > tiger->video_height) {
height = tiger->video_height - ypos;
}
destcb += (ypos / 2) * destcb_stride;
destcr += (ypos / 2) * destcr_stride;
for (i = 0; i < height; i += 2) {
pimage1 = image + 4 * (i * image_width);
pimage2 = pimage1 + 4 * image_width;
pcb = destcb + (i / 2) * destcb_stride + xpos / 2;
pcr = destcr + (i / 2) * destcr_stride + xpos / 2;
for (j = 0; j < width; j += 2) {
b = pimage1[TIGER_ARGB_B];
g = pimage1[TIGER_ARGB_G];
r = pimage1[TIGER_ARGB_R];
a = pimage1[TIGER_ARGB_A];
TIGER_UNPREMULTIPLY (a, r, g, b);
pimage1 += 4;
b1 = pimage1[TIGER_ARGB_B];
g1 = pimage1[TIGER_ARGB_G];
r1 = pimage1[TIGER_ARGB_R];
a1 = pimage1[TIGER_ARGB_A];
TIGER_UNPREMULTIPLY (a1, r1, g1, b1);
b += b1;
g += g1;
r += r1;
a += a1;
pimage1 += 4;
b1 = pimage2[TIGER_ARGB_B];
g1 = pimage2[TIGER_ARGB_G];
r1 = pimage2[TIGER_ARGB_R];
a1 = pimage2[TIGER_ARGB_A];
TIGER_UNPREMULTIPLY (a1, r1, g1, b1);
b += b1;
g += g1;
r += r1;
a += a1;
pimage2 += 4;
/* + 2 for rounding */
b1 = pimage2[TIGER_ARGB_B];
g1 = pimage2[TIGER_ARGB_G];
r1 = pimage2[TIGER_ARGB_R];
a1 = pimage2[TIGER_ARGB_A];
TIGER_UNPREMULTIPLY (a1, r1, g1, b1);
b += b1 + 2;
g += g1 + 2;
r += r1 + 2;
a += a1 + 2;
pimage2 += 4;
b /= 4;
g /= 4;
r /= 4;
a /= 4;
if (a == 0) {
pcb += pix_stride;
pcr += pix_stride;
continue;
}
COMP_U (cb, r, g, b);
COMP_V (cr, r, g, b);
x = *pcb;
BLEND (*pcb, a, cb, x);
x = *pcr;
BLEND (*pcr, a, cr, x);
pcb += pix_stride;
pcr += pix_stride;
}
}
}
/* FIXME:
* - use proper strides and offset for I420
*/
static inline void
gst_kate_tiger_blit_NV12_NV21 (GstKateTiger * tiger,
guint8 * yuv_pixels, gint xpos, gint ypos, const guint8 * image,
gint image_width, gint image_height)
{
int y_stride, uv_stride;
int u_offset, v_offset;
int h, w;
/* because U/V is 2x2 subsampled, we need to round, either up or down,
* to a boundary of integer number of U/V pixels:
*/
xpos = GST_ROUND_UP_2 (xpos);
ypos = GST_ROUND_UP_2 (ypos);
w = tiger->video_width;
h = tiger->video_height;
y_stride = gst_video_format_get_row_stride (tiger->video_format, 0, w);
uv_stride = gst_video_format_get_row_stride (tiger->video_format, 1, w);
u_offset =
gst_video_format_get_component_offset (tiger->video_format, 1, w, h);
v_offset =
gst_video_format_get_component_offset (tiger->video_format, 2, w, h);
gst_kate_tiger_blit_1 (tiger, yuv_pixels, xpos, ypos, image, image_width,
image_height, y_stride);
gst_kate_tiger_blit_sub2x2cbcr (tiger, yuv_pixels + u_offset,
yuv_pixels + v_offset, xpos, ypos, image, image_width, image_height,
uv_stride, uv_stride, 2);
}
static inline void
gst_kate_tiger_blit_I420_YV12 (GstKateTiger * tiger,
guint8 * yuv_pixels, gint xpos, gint ypos, const guint8 * image,
gint image_width, gint image_height)
{
int y_stride, u_stride, v_stride;
int u_offset, v_offset;
int h, w;
/* because U/V is 2x2 subsampled, we need to round, either up or down,
* to a boundary of integer number of U/V pixels:
*/
xpos = GST_ROUND_UP_2 (xpos);
ypos = GST_ROUND_UP_2 (ypos);
w = tiger->video_width;
h = tiger->video_height;
y_stride = gst_video_format_get_row_stride (tiger->video_format, 0, w);
u_stride = gst_video_format_get_row_stride (tiger->video_format, 1, w);
v_stride = gst_video_format_get_row_stride (tiger->video_format, 2, w);
u_offset =
gst_video_format_get_component_offset (tiger->video_format, 1, w, h);
v_offset =
gst_video_format_get_component_offset (tiger->video_format, 2, w, h);
gst_kate_tiger_blit_1 (tiger, yuv_pixels, xpos, ypos, image, image_width,
image_height, y_stride);
gst_kate_tiger_blit_sub2x2cbcr (tiger, yuv_pixels + u_offset,
yuv_pixels + v_offset, xpos, ypos, image, image_width, image_height,
u_stride, v_stride, 1);
}
static inline void
gst_kate_tiger_blit_UYVY (GstKateTiger * tiger,
guint8 * yuv_pixels, gint xpos, gint ypos, const guint8 * image,
gint image_width, gint image_height)
{
int a0, r0, g0, b0;
int a1, r1, g1, b1;
int y0, y1, u, v;
int i, j;
int h, w;
const guint8 *pimage;
guchar *dest;
/* because U/V is 2x horizontally subsampled, we need to round to a
* boundary of integer number of U/V pixels in x dimension:
*/
xpos = GST_ROUND_UP_2 (xpos);
w = image_width - 2;
h = image_height - 2;
if (xpos < 0) {
xpos = 0;
}
if (xpos + w > tiger->video_width) {
w = tiger->video_width - xpos;
}
if (ypos + h > tiger->video_height) {
h = tiger->video_height - ypos;
}
for (i = 0; i < h; i++) {
pimage = image + i * image_width * 4;
dest = yuv_pixels + (i + ypos) * tiger->video_width * 2 + xpos * 2;
for (j = 0; j < w; j += 2) {
b0 = pimage[TIGER_ARGB_B];
g0 = pimage[TIGER_ARGB_G];
r0 = pimage[TIGER_ARGB_R];
a0 = pimage[TIGER_ARGB_A];
TIGER_UNPREMULTIPLY (a0, r0, g0, b0);
pimage += 4;
b1 = pimage[TIGER_ARGB_B];
g1 = pimage[TIGER_ARGB_G];
r1 = pimage[TIGER_ARGB_R];
a1 = pimage[TIGER_ARGB_A];
TIGER_UNPREMULTIPLY (a1, r1, g1, b1);
pimage += 4;
a0 += a1 + 2;
a0 /= 2;
if (a0 == 0) {
dest += 4;
continue;
}
COMP_Y (y0, r0, g0, b0);
COMP_Y (y1, r1, g1, b1);
b0 += b1 + 2;
g0 += g1 + 2;
r0 += r1 + 2;
b0 /= 2;
g0 /= 2;
r0 /= 2;
COMP_U (u, r0, g0, b0);
COMP_V (v, r0, g0, b0);
BLEND (*dest, a0, u, *dest);
dest++;
BLEND (*dest, a0, y0, *dest);
dest++;
BLEND (*dest, a0, v, *dest);
dest++;
BLEND (*dest, a0, y1, *dest);
dest++;
}
}
}
static inline void
gst_kate_tiger_blit_AYUV (GstKateTiger * tiger,
guint8 * rgb_pixels, gint xpos, gint ypos, const guint8 * image,
gint image_width, gint image_height)
{
int a, r, g, b, a1;
int y, u, v;
int i, j;
int h, w;
const guint8 *pimage;
guchar *dest;
w = image_width;
h = image_height;
if (xpos < 0) {
xpos = 0;
}
if (xpos + w > tiger->video_width) {
w = tiger->video_width - xpos;
}
if (ypos + h > tiger->video_height) {
h = tiger->video_height - ypos;
}
for (i = 0; i < h; i++) {
pimage = image + i * image_width * 4;
dest = rgb_pixels + (i + ypos) * 4 * tiger->video_width + xpos * 4;
for (j = 0; j < w; j++) {
a = pimage[TIGER_ARGB_A];
b = pimage[TIGER_ARGB_B];
g = pimage[TIGER_ARGB_G];
r = pimage[TIGER_ARGB_R];
TIGER_UNPREMULTIPLY (a, r, g, b);
// convert background to yuv
COMP_Y (y, r, g, b);
COMP_U (u, r, g, b);
COMP_V (v, r, g, b);
// preform text "OVER" background alpha compositing
a1 = a + (dest[0] * (255 - a)) / 255 + 1; // add 1 to prevent divide by 0
OVER (dest[1], a, y, dest[0], dest[1], a1);
OVER (dest[2], a, u, dest[0], dest[2], a1);
OVER (dest[3], a, v, dest[0], dest[3], a1);
dest[0] = a1 - 1; // remove the temporary 1 we added
pimage += 4;
dest += 4;
}
}
}
static void
gst_kate_tiger_blend_yuv (GstKateTiger * tiger, GstBuffer * video_frame,
const guint8 * image, gint image_width, gint image_height)
{
gint xpos = 0, ypos = 0;
gint width, height;
width = image_width;
height = image_height;
switch (tiger->video_format) {
case GST_VIDEO_FORMAT_I420:
case GST_VIDEO_FORMAT_YV12:
gst_kate_tiger_blit_I420_YV12 (tiger,
GST_BUFFER_DATA (video_frame), xpos, ypos, image, image_width,
image_height);
break;
case GST_VIDEO_FORMAT_NV12:
case GST_VIDEO_FORMAT_NV21:
gst_kate_tiger_blit_NV12_NV21 (tiger,
GST_BUFFER_DATA (video_frame), xpos, ypos, image, image_width,
image_height);
break;
case GST_VIDEO_FORMAT_UYVY:
gst_kate_tiger_blit_UYVY (tiger,
GST_BUFFER_DATA (video_frame), xpos, ypos, image, image_width,
image_height);
break;
case GST_VIDEO_FORMAT_AYUV:
gst_kate_tiger_blit_AYUV (tiger,
GST_BUFFER_DATA (video_frame), xpos, ypos, image, image_width,
image_height);
break;
default:
g_assert_not_reached ();
}
}
static GstFlowReturn
gst_kate_tiger_video_chain (GstPad * pad, GstBuffer * buf)
{
@ -757,8 +1212,9 @@ gst_kate_tiger_video_chain (GstPad * pad, GstBuffer * buf)
g_cond_broadcast (tiger->cond);
}
/* Update first with a dummy buffer pointer we cannot write to. If there is nothing
to draw, we will not have to make it writeable */
/* Update first with a dummy buffer pointer we cannot write to, but with the
right dimensions. If there is nothing to draw, we will not have to make
it writeable. */
ptr = GST_BUFFER_DATA (buf);
ret =
tiger_renderer_set_buffer (tiger->tr, ptr, tiger->video_width,
@ -791,7 +1247,19 @@ gst_kate_tiger_video_chain (GstPad * pad, GstBuffer * buf)
}
/* and setup that buffer before rendering */
ptr = GST_BUFFER_DATA (buf);
if (gst_video_format_is_yuv (tiger->video_format)) {
guint8 *tmp = g_realloc (tiger->render_buffer,
tiger->video_width * tiger->video_height * 4);
if (!tmp) {
GST_WARNING_OBJECT (tiger, "Failed to allocate render buffer");
goto pass;
}
tiger->render_buffer = tmp;
ptr = tiger->render_buffer;
tiger_renderer_set_surface_clear_color (tiger->tr, 1, 0.0, 0.0, 0.0, 0.0);
} else {
ptr = GST_BUFFER_DATA (buf);
}
ret =
tiger_renderer_set_buffer (tiger->tr, ptr, tiger->video_width,
tiger->video_height, tiger->video_width * 4, tiger->swap_rgb);
@ -808,6 +1276,11 @@ gst_kate_tiger_video_chain (GstPad * pad, GstBuffer * buf)
GST_LOG_OBJECT (tiger, "Tiger renderer rendered on video frame at %f", t);
}
if (gst_video_format_is_yuv (tiger->video_format)) {
gst_kate_tiger_blend_yuv (tiger, buf, tiger->render_buffer,
tiger->video_width, tiger->video_height);
}
pass:
GST_KATE_TIGER_MUTEX_UNLOCK (tiger);

View file

@ -49,6 +49,7 @@
#include <kate/kate.h>
#include <tiger/tiger.h>
#include <gst/gst.h>
#include <gst/video/video.h>
#include "gstkateutil.h"
G_BEGIN_DECLS
@ -90,9 +91,11 @@ struct _GstKateTiger
guchar default_background_a;
gboolean silent;
GstVideoFormat video_format;
gint video_width;
gint video_height;
gboolean swap_rgb;
guint8 *render_buffer;
GMutex *mutex;
GCond *cond;