videomixer: Refactor processing functions

This allows easier plugging of optimized processing functions in the future, like for SSE or AltiVec.
2025-04-14 03:54:11 +00:00 · 2010-01-04 10:24:45 +01:00 · 2010-01-04 10:24:45 +01:00 · 2950262186
commit 2950262186
parent 5975b01b01
10 changed files with 897 additions and 1239 deletions
--- a/gst/videomixer/Makefile.am
+++ b/gst/videomixer/Makefile.am
@ -1,10 +1,10 @@
 plugin_LTLIBRARIES = libgstvideomixer.la

-libgstvideomixer_la_SOURCES = videomixer.c blend_ayuv.c blend_bgra.c blend_i420.c blend_rgb.c
+libgstvideomixer_la_SOURCES = videomixer.c blend.c
 libgstvideomixer_la_CFLAGS = $(GST_CFLAGS) $(GST_BASE_CFLAGS) $(GST_CONTROLLER_CFLAGS) $(GST_PLUGINS_BASE_CFLAGS) $(LIBOIL_CFLAGS)
 libgstvideomixer_la_LIBADD = $(GST_LIBS) $(GST_BASE_LIBS) $(GST_CONTROLLER_LIBS) $(GST_PLUGINS_BASE_LIBS) -lgstvideo-@GST_MAJORMINOR@ $(LIBOIL_LIBS)
 libgstvideomixer_la_LDFLAGS = $(GST_PLUGIN_LDFLAGS)
 libgstvideomixer_la_LIBTOOLFLAGS = --tag=disable-static

 # headers we need but don't want installed
-noinst_HEADERS = videomixer.h videomixerpad.h
+noinst_HEADERS = videomixer.h videomixerpad.h blend.h blend_mmx.h
--- a/gst/videomixer/blend.c
+++ b/gst/videomixer/blend.c
@ -0,0 +1,621 @@
+/* 
+ * Copyright (C) 2004 Wim Taymans <wim@fluendo.com>
+ * Copyright (C) 2006 Mindfruit Bv.
+ *   Author: Sjoerd Simons <sjoerd@luon.net>
+ *   Author: Alex Ugarte <alexugarte@gmail.com>
+ * Copyright (C) 2009 Alex Ugarte <augarte@vicomtech.org>
+ * Copyright (C) 2009 Sebastian Dröge <sebastian.droege@collabora.co.uk>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "blend.h"
+
+#include <liboil/liboil.h>
+#include <liboil/liboilcpu.h>
+#include <liboil/liboilfunction.h>
+
+#include <string.h>
+
+#ifdef HAVE_GCC_ASM
+#if defined(HAVE_CPU_I386) || defined(HAVE_CPU_X86_64)
+#define BUILD_X86_ASM
+
+#define GENERIC
+#include "blend_mmx.h"
+#undef GENERIC
+#endif
+#endif
+
+/* Below are the implementations of everything */
+
+#define BLEND(D,S,alpha) (((D) * (255 - (alpha)) + (S) * (alpha)) >> 8)
+
+inline static void
+_blend_u8_c (guint8 * dest, const guint8 * src,
+    gint src_stride, gint dest_stride, gint src_width, gint src_height,
+    gint dest_width, gint b_alpha)
+{
+  gint i, j;
+  gint src_add = src_stride - src_width;
+  gint dest_add = dest_stride - dest_width;
+
+  for (i = 0; i < src_height; i++) {
+    for (j = 0; j < src_width; j++) {
+      *dest = BLEND (*dest, *src, b_alpha);
+      dest++;
+      src++;
+    }
+    src += src_add;
+    dest += dest_add;
+  }
+}
+
+/* A32 is for AYUV, ARGB and BGRA */
+#define BLEND_A32(name, LOOP) \
+static void \
+blend_##name (const guint8 * src, gint xpos, gint ypos, \
+    gint src_width, gint src_height, gdouble src_alpha, \
+    guint8 * dest, gint dest_width, gint dest_height) \
+{ \
+  guint s_alpha; \
+  gint src_stride, dest_stride; \
+  \
+  src_stride = src_width * 4; \
+  dest_stride = dest_width * 4; \
+  \
+  s_alpha = CLAMP ((gint) (src_alpha * 256), 0, 256); \
+  \
+  /* If it's completely transparent... we just return */ \
+  if (G_UNLIKELY (s_alpha == 0)) \
+    return; \
+  \
+  /* adjust src pointers for negative sizes */ \
+  if (xpos < 0) { \
+    src += -xpos * 4; \
+    src_width -= -xpos; \
+    xpos = 0; \
+  } \
+  if (ypos < 0) { \
+    src += -ypos * src_stride; \
+    src_height -= -ypos; \
+    ypos = 0; \
+  } \
+  /* adjust width/height if the src is bigger than dest */ \
+  if (xpos + src_width > dest_width) { \
+    src_width = dest_width - xpos; \
+  } \
+  if (ypos + src_height > dest_height) { \
+    src_height = dest_height - ypos; \
+  } \
+  \
+  dest = dest + 4 * xpos + (ypos * dest_stride); \
+  \
+  LOOP (dest, src, src_height, src_width, src_stride, dest_stride, s_alpha); \
+}
+
+#define BLEND_A32_LOOP_C(name, A, C1, C2, C3) \
+static inline void \
+_blend_loop_##name##_c (guint8 *dest, const guint8 *src, gint src_height, gint src_width, gint src_stride, gint dest_stride, guint s_alpha) { \
+  gint i, j; \
+  gint alpha; \
+  gint src_add = src_stride - (4 * src_width); \
+  gint dest_add = dest_stride - (4 * src_width); \
+  \
+  for (i = 0; i < src_height; i++) { \
+    for (j = 0; j < src_width; j++) { \
+      alpha = (src[A] * s_alpha) >> 8; \
+      dest[A] = 0xff; \
+      dest[C1] = BLEND(dest[C1], src[C1], alpha); \
+      dest[C2] = BLEND(dest[C2], src[C2], alpha); \
+      dest[C3] = BLEND(dest[C3], src[C3], alpha); \
+      \
+      src += 4; \
+      dest += 4; \
+    } \
+    src += src_add; \
+    dest += dest_add; \
+  } \
+}
+
+BLEND_A32_LOOP_C (argb, 0, 1, 2, 3);
+BLEND_A32_LOOP_C (bgra, 3, 2, 1, 0);
+BLEND_A32 (argb_c, _blend_loop_argb_c);
+BLEND_A32 (bgra_c, _blend_loop_bgra_c);
+
+#define A32_CHECKER_C(name, RGB, A, C1, C2, C3) \
+static void \
+fill_checker_##name##_c (guint8 * dest, gint width, gint height) \
+{ \
+  gint i, j; \
+  gint val; \
+  static const gint tab[] = { 80, 160, 80, 160 }; \
+  \
+  if (!RGB) { \
+    for (i = 0; i < height; i++) { \
+      for (j = 0; j < width; j++) { \
+        dest[A] = 0xff; \
+        dest[C1] = tab[((i & 0x8) >> 3) + ((j & 0x8) >> 3)]; \
+        dest[C2] = 128; \
+        dest[C3] = 128; \
+      } \
+    } \
+  } else { \
+    for (i = 0; i < height; i++) { \
+      for (j = 0; j < width; j++) { \
+        val = tab[((i & 0x8) >> 3) + ((j & 0x8) >> 3)]; \
+        dest[A] = 0xFF; \
+        dest[C1] = val; \
+        dest[C2] = val; \
+        dest[C3] = val; \
+        dest += 4; \
+      } \
+    } \
+  } \
+}
+
+A32_CHECKER_C (argb, TRUE, 0, 1, 2, 3);
+A32_CHECKER_C (bgra, TRUE, 3, 2, 1, 0);
+A32_CHECKER_C (ayuv, FALSE, 0, 1, 2, 3);
+
+#define YUV_TO_R(Y,U,V) (CLAMP (1.164 * (Y - 16) + 1.596 * (V - 128), 0, 255))
+#define YUV_TO_G(Y,U,V) (CLAMP (1.164 * (Y - 16) - 0.813 * (V - 128) - 0.391 * (U - 128), 0, 255))
+#define YUV_TO_B(Y,U,V) (CLAMP (1.164 * (Y - 16) + 2.018 * (U - 128), 0, 255))
+
+#define A32_COLOR(name, RGB, LOOP) \
+static void \
+fill_color_##name (guint8 * dest, gint width, gint height, gint Y, gint U, gint V) \
+{ \
+  gint c1, c2, c3; \
+  \
+  if (RGB) { \
+    c1 = YUV_TO_R (Y, U, V); \
+    c2 = YUV_TO_G (Y, U, V); \
+    c3 = YUV_TO_B (Y, U, V); \
+  } else { \
+    c1 = Y; \
+    c2 = U; \
+    c3 = V; \
+  } \
+  LOOP (dest, height, width, c1, c2, c3); \
+}
+
+#define A32_COLOR_LOOP_C(name, A, C1, C2, C3) \
+static inline void \
+_fill_color_loop_##name##_c (guint8 *dest, gint height, gint width, gint c1, gint c2, gint c3) { \
+  gint i, j; \
+  \
+  for (i = 0; i < height; i++) { \
+    for (j = 0; j < width; j++) { \
+      dest[A] = 0xff; \
+      dest[C1] = c1; \
+      dest[C2] = c2; \
+      dest[C3] = c3; \
+    } \
+  } \
+}
+
+A32_COLOR_LOOP_C (ac1c2c3, 0, 1, 2, 3);
+A32_COLOR_LOOP_C (c3c2c1a, 3, 2, 1, 0);
+A32_COLOR (argb_c, TRUE, _fill_color_loop_ac1c2c3_c);
+A32_COLOR (bgra_c, TRUE, _fill_color_loop_c3c2c1a_c);
+A32_COLOR (ayuv_c, FALSE, _fill_color_loop_ac1c2c3_c);
+
+/* I420 */
+#define I420_Y_ROWSTRIDE(width) (GST_ROUND_UP_4(width))
+#define I420_U_ROWSTRIDE(width) (GST_ROUND_UP_8(width)/2)
+#define I420_V_ROWSTRIDE(width) ((GST_ROUND_UP_8(I420_Y_ROWSTRIDE(width)))/2)
+
+#define I420_Y_OFFSET(w,h) (0)
+#define I420_U_OFFSET(w,h) (I420_Y_OFFSET(w,h)+(I420_Y_ROWSTRIDE(w)*GST_ROUND_UP_2(h)))
+#define I420_V_OFFSET(w,h) (I420_U_OFFSET(w,h)+(I420_U_ROWSTRIDE(w)*GST_ROUND_UP_2(h)/2))
+
+#define I420_BLEND(name,MEMCPY,BLENDLOOP) \
+inline static void \
+_blend_i420_##name (const guint8 * src, guint8 * dest, \
+    gint src_stride, gint dest_stride, gint src_width, gint src_height, \
+    gint dest_width, gdouble src_alpha) \
+{ \
+  gint i; \
+  gint b_alpha; \
+  \
+  /* If it's completely transparent... we just return */ \
+  if (G_UNLIKELY (src_alpha == 0.0)) { \
+    GST_INFO ("Fast copy (alpha == 0.0)"); \
+    return; \
+  } \
+  \
+  /* If it's completely opaque, we do a fast copy */ \
+  if (G_UNLIKELY (src_alpha == 1.0)) { \
+    GST_INFO ("Fast copy (alpha == 1.0)"); \
+    for (i = 0; i < src_height; i++) { \
+      MEMCPY (dest, src, src_width); \
+      src += src_stride; \
+      dest += dest_stride; \
+    } \
+    return; \
+  } \
+  \
+  b_alpha = CLAMP ((gint) (src_alpha * 256), 0, 256); \
+  \
+  BLENDLOOP(dest, src, src_stride, dest_stride, src_width, src_height, dest_width, b_alpha); \
+} \
+\
+static void \
+blend_i420_##name (const guint8 * src, gint xpos, gint ypos, \
+    gint src_width, gint src_height, gdouble src_alpha, \
+    guint8 * dest, gint dest_width, gint dest_height) \
+{ \
+  const guint8 *b_src; \
+  guint8 *b_dest; \
+  gint b_src_width = src_width; \
+  gint b_src_height = src_height; \
+  gint xoffset = 0; \
+  gint yoffset = 0; \
+  \
+  xpos = GST_ROUND_UP_2 (xpos); \
+  ypos = GST_ROUND_UP_2 (ypos); \
+  \
+  /* adjust src pointers for negative sizes */ \
+  if (xpos < 0) { \
+    xoffset = -xpos; \
+    b_src_width -= -xpos; \
+    xpos = 0; \
+  } \
+  if (ypos < 0) { \
+    yoffset += -ypos; \
+    b_src_height -= -ypos; \
+    ypos = 0; \
+  } \
+  /* If x or y offset are larger then the source it's outside of the picture */ \
+  if (xoffset > src_width || yoffset > src_width) { \
+    return; \
+  } \
+  \
+  /* adjust width/height if the src is bigger than dest */ \
+  if (xpos + src_width > dest_width) { \
+    b_src_width = dest_width - xpos; \
+  } \
+  if (ypos + src_height > dest_height) { \
+    b_src_height = dest_height - ypos; \
+  } \
+  if (b_src_width < 0 || b_src_height < 0) { \
+    return; \
+  } \
+  \
+  /* First mix Y, then U, then V */ \
+  b_src = src + I420_Y_OFFSET (src_width, src_height); \
+  b_dest = dest + I420_Y_OFFSET (dest_width, dest_height); \
+  _blend_i420_##name (b_src + xoffset + yoffset * I420_Y_ROWSTRIDE (src_width), \
+      b_dest + xpos + ypos * I420_Y_ROWSTRIDE (dest_width), \
+      I420_Y_ROWSTRIDE (src_width), \
+      I420_Y_ROWSTRIDE (dest_width), b_src_width, b_src_height, \
+      dest_width, src_alpha); \
+  \
+  b_src = src + I420_U_OFFSET (src_width, src_height); \
+  b_dest = dest + I420_U_OFFSET (dest_width, dest_height); \
+  \
+  _blend_i420_##name (b_src + xoffset / 2 + \
+      yoffset / 2 * I420_U_ROWSTRIDE (src_width), \
+      b_dest + xpos / 2 + ypos / 2 * I420_U_ROWSTRIDE (dest_width), \
+      I420_U_ROWSTRIDE (src_width), I420_U_ROWSTRIDE (dest_width), \
+      b_src_width / 2, GST_ROUND_UP_2 (b_src_height) / 2, dest_width / 2, \
+      src_alpha); \
+  \
+  b_src = src + I420_V_OFFSET (src_width, src_height); \
+  b_dest = dest + I420_V_OFFSET (dest_width, dest_height); \
+  \
+  _blend_i420_##name (b_src + xoffset / 2 + \
+      yoffset / 2 * I420_V_ROWSTRIDE (src_width), \
+      b_dest + xpos / 2 + ypos / 2 * I420_V_ROWSTRIDE (dest_width), \
+      I420_V_ROWSTRIDE (src_width), I420_V_ROWSTRIDE (dest_width), \
+      b_src_width / 2, GST_ROUND_UP_2 (b_src_height) / 2, dest_width / 2, \
+      src_alpha); \
+}
+
+#define I420_FILL_CHECKER(name, MEMSET) \
+static void \
+fill_checker_i420_##name (guint8 * dest, gint width, gint height) \
+{ \
+  gint size; \
+  gint i, j; \
+  static const int tab[] = { 80, 160, 80, 160 }; \
+  guint8 *p = dest; \
+  \
+  for (i = 0; i < height; i++) { \
+    for (j = 0; j < width; j++) { \
+      *p++ = tab[((i & 0x8) >> 3) + ((j & 0x8) >> 3)]; \
+    } \
+    p += I420_Y_ROWSTRIDE (width) - width; \
+  } \
+  \
+  size = (I420_U_ROWSTRIDE (width) * height) / 2; \
+  MEMSET (dest + I420_U_OFFSET (width, height), 0x80, size); \
+  \
+  size = (I420_V_ROWSTRIDE (width) * height) / 2; \
+  MEMSET (dest + I420_V_OFFSET (width, height), 0x80, size); \
+}
+
+#define I420_FILL_COLOR(name,MEMSET) \
+static void \
+fill_color_i420_##name (guint8 * dest, gint width, gint height, \
+    gint colY, gint colU, gint colV) \
+{ \
+  gint size; \
+  \
+  size = I420_Y_ROWSTRIDE (width) * height; \
+  MEMSET (dest, colY, size); \
+  \
+  size = (I420_U_ROWSTRIDE (width) * height) / 2; \
+  MEMSET (dest + I420_U_OFFSET (width, height), colU, size); \
+  \
+  size = (I420_V_ROWSTRIDE (width) * height) / 2; \
+  MEMSET (dest + I420_V_OFFSET (width, height), colV, size); \
+}
+
+I420_BLEND (c, memcpy, _blend_u8_c);
+I420_FILL_CHECKER (c, memset);
+I420_FILL_COLOR (c, memset);
+
+/* RGB, BGR, xRGB, xBGR, RGBx, BGRx */
+
+#define RGB_BLEND(name, bpp, MEMCPY, BLENDLOOP) \
+static void \
+blend_##name (const guint8 * src, gint xpos, gint ypos, \
+    gint src_width, gint src_height, gdouble src_alpha, \
+    guint8 * dest, gint dest_width, gint dest_height) \
+{ \
+  gint b_alpha; \
+  gint i; \
+  gint src_stride, dest_stride; \
+  \
+  src_stride = GST_ROUND_UP_4 (src_width * bpp); \
+  dest_stride = GST_ROUND_UP_4 (dest_width * bpp); \
+  \
+  b_alpha = CLAMP ((gint) (src_alpha * 256), 0, 256); \
+  \
+  /* adjust src pointers for negative sizes */ \
+  if (xpos < 0) { \
+    src += -xpos * bpp; \
+    src_width -= -xpos; \
+    xpos = 0; \
+  } \
+  if (ypos < 0) { \
+    src += -ypos * src_stride; \
+    src_height -= -ypos; \
+    ypos = 0; \
+  } \
+  /* adjust width/height if the src is bigger than dest */ \
+  if (xpos + src_width > dest_width) { \
+    src_width = dest_width - xpos; \
+  } \
+  if (ypos + src_height > dest_height) { \
+    src_height = dest_height - ypos; \
+  } \
+  \
+  dest = dest + bpp * xpos + (ypos * dest_stride); \
+  /* If it's completely transparent... we just return */ \
+  if (G_UNLIKELY (src_alpha == 0.0)) { \
+    GST_INFO ("Fast copy (alpha == 0.0)"); \
+    return; \
+  } \
+  \
+  /* If it's completely opaque, we do a fast copy */ \
+  if (G_UNLIKELY (src_alpha == 1.0)) { \
+    GST_INFO ("Fast copy (alpha == 1.0)"); \
+    for (i = 0; i < src_height; i++) { \
+      MEMCPY (dest, src, bpp * src_width); \
+      src += src_stride; \
+      dest += dest_stride; \
+    } \
+    return; \
+  } \
+  \
+  BLENDLOOP(dest, src, src_stride, dest_stride, bpp * src_width, src_height, bpp * dest_width, b_alpha); \
+}
+
+#define RGB_FILL_CHECKER_C(name, bpp, r, g, b) \
+static void \
+fill_checker_##name##_c (guint8 * dest, gint width, gint height) \
+{ \
+  gint i, j; \
+  static const int tab[] = { 80, 160, 80, 160 }; \
+  gint dest_add = GST_ROUND_UP_4 (width * bpp) - width * bpp; \
+  \
+  for (i = 0; i < height; i++) { \
+    for (j = 0; j < width; j++) { \
+      dest[r] = tab[((i & 0x8) >> 3) + ((j & 0x8) >> 3)];       /* red */ \
+      dest[g] = tab[((i & 0x8) >> 3) + ((j & 0x8) >> 3)];       /* green */ \
+      dest[b] = tab[((i & 0x8) >> 3) + ((j & 0x8) >> 3)];       /* blue */ \
+      dest += bpp; \
+    } \
+    dest += dest_add; \
+  } \
+}
+
+#define RGB_FILL_COLOR(name, bpp, MEMSET_RGB) \
+static void \
+fill_color_##name (guint8 * dest, gint width, gint height, \
+    gint colY, gint colU, gint colV) \
+{ \
+  gint red, green, blue; \
+  gint i; \
+  gint dest_stride = GST_ROUND_UP_4 (width * bpp); \
+  \
+  red = CLAMP (1.164 * (colY - 16) + 1.596 * (colV - 128), 0, 255); \
+  green = \
+      CLAMP (1.164 * (colY - 16) - 0.813 * (colV - 128) - 0.391 * (colU - 128), \
+      0, 255); \
+  blue = CLAMP (1.164 * (colY - 16) + 2.018 * (colU - 128), 0, 255); \
+  \
+  for (i = 0; i < height; i++) { \
+    MEMSET_RGB (dest, red, green, blue, width); \
+    dest += dest_stride; \
+  } \
+}
+
+#define MEMSET_RGB_C(name, bpp, r, g, b) \
+static inline void \
+_memset_##name##_c (guint8* dest, gint red, gint green, gint blue, gint width) { \
+  gint j; \
+  \
+  for (j = 0; j < width; j++) { \
+    dest[r] = red; \
+    dest[g] = green; \
+    dest[b] = blue; \
+    dest += bpp; \
+  } \
+}
+
+RGB_BLEND (rgb_c, 3, memcpy, _blend_u8_c);
+RGB_FILL_CHECKER_C (rgb, 3, 0, 1, 2);
+MEMSET_RGB_C (rgb, 3, 0, 1, 2);
+RGB_FILL_COLOR (rgb_c, 3, _memset_rgb_c);
+
+MEMSET_RGB_C (bgr, 3, 2, 1, 0);
+RGB_FILL_COLOR (bgr_c, 3, _memset_bgr_c);
+
+RGB_BLEND (xrgb_c, 4, memcpy, _blend_u8_c);
+RGB_FILL_CHECKER_C (xrgb, 4, 1, 2, 3);
+MEMSET_RGB_C (xrgb, 4, 1, 2, 3);
+RGB_FILL_COLOR (xrgb_c, 4, _memset_xrgb_c);
+
+MEMSET_RGB_C (xbgr, 4, 3, 2, 1);
+RGB_FILL_COLOR (xbgr_c, 4, _memset_xbgr_c);
+
+MEMSET_RGB_C (rgbx, 4, 0, 1, 2);
+RGB_FILL_COLOR (rgbx_c, 4, _memset_rgbx_c);
+
+MEMSET_RGB_C (bgrx, 4, 2, 1, 0);
+RGB_FILL_COLOR (bgrx_c, 4, _memset_bgrx_c);
+
+/* MMX Implementations */
+#ifdef BUILD_X86_ASM
+#define A32
+#define NAME_BLEND _blend_loop_argb_mmx
+#define NAME_FILL_COLOR _fill_color_loop_argb_mmx
+#define A_OFF 0
+#define C1_OFF 8
+#define C2_OFF 16
+#define C3_OFF 24
+#include "blend_mmx.h"
+#undef NAME_BLEND
+#undef NAME_FILL_COLOR
+#undef A_OFF
+#undef C1_OFF
+#undef C2_OFF
+#undef C3_OFF
+
+#define NAME_BLEND _blend_loop_bgra_mmx
+#define NAME_FILL_COLOR _fill_color_loop_bgra_mmx
+#define A_OFF 24
+#define C1_OFF 16
+#define C2_OFF 8
+#define C3_OFF 0
+#include "blend_mmx.h"
+#undef NAME_BLEND
+#undef NAME_FILL_COLOR
+#undef A_OFF
+#undef C1_OFF
+#undef C2_OFF
+#undef C3_OFF
+#undef A32
+
+BLEND_A32 (argb_mmx, _blend_loop_argb_mmx);
+BLEND_A32 (bgra_mmx, _blend_loop_bgra_mmx);
+
+A32_COLOR (argb_mmx, TRUE, _fill_color_loop_argb_mmx);
+A32_COLOR (bgra_mmx, TRUE, _fill_color_loop_bgra_mmx);
+A32_COLOR (ayuv_mmx, FALSE, _fill_color_loop_argb_mmx);
+#endif
+
+/* Init function */
+BlendFunction gst_video_mixer_blend_argb;
+BlendFunction gst_video_mixer_blend_bgra;
+/* AYUV is equal to ARGB */
+BlendFunction gst_video_mixer_blend_i420;
+BlendFunction gst_video_mixer_blend_rgb;
+/* BGR is equal to RGB */
+BlendFunction gst_video_mixer_blend_rgbx;
+/* BGRx, xRGB, xBGR are equal to RGBx */
+
+FillCheckerFunction gst_video_mixer_fill_checker_argb;
+FillCheckerFunction gst_video_mixer_fill_checker_bgra;
+FillCheckerFunction gst_video_mixer_fill_checker_ayuv;
+FillCheckerFunction gst_video_mixer_fill_checker_i420;
+FillCheckerFunction gst_video_mixer_fill_checker_rgb;
+/* BGR is equal to RGB */
+FillCheckerFunction gst_video_mixer_fill_checker_xrgb;
+/* BGRx, xRGB, xBGR are equal to RGBx */
+
+FillColorFunction gst_video_mixer_fill_color_argb;
+FillColorFunction gst_video_mixer_fill_color_bgra;
+FillColorFunction gst_video_mixer_fill_color_ayuv;
+FillColorFunction gst_video_mixer_fill_color_i420;
+FillColorFunction gst_video_mixer_fill_color_rgb;
+FillColorFunction gst_video_mixer_fill_color_bgr;
+FillColorFunction gst_video_mixer_fill_color_xrgb;
+FillColorFunction gst_video_mixer_fill_color_xbgr;
+FillColorFunction gst_video_mixer_fill_color_rgbx;
+FillColorFunction gst_video_mixer_fill_color_bgrx;
+
+void
+gst_video_mixer_init_blend (void)
+{
+  guint cpu_flags;
+
+  oil_init ();
+  cpu_flags = oil_cpu_get_flags ();
+
+  gst_video_mixer_blend_argb = blend_argb_c;
+  gst_video_mixer_blend_bgra = blend_bgra_c;
+  gst_video_mixer_blend_i420 = blend_i420_c;
+  gst_video_mixer_blend_rgb = blend_rgb_c;
+  gst_video_mixer_blend_xrgb = blend_xrgb_c;
+
+  gst_video_mixer_fill_checker_argb = fill_checker_argb_c;
+  gst_video_mixer_fill_checker_bgra = fill_checker_bgra_c;
+  gst_video_mixer_fill_checker_ayuv = fill_checker_ayuv_c;
+  gst_video_mixer_fill_checker_i420 = fill_checker_i420_c;
+  gst_video_mixer_fill_checker_rgb = fill_checker_rgb_c;
+  gst_video_mixer_fill_checker_xrgb = fill_checker_xrgb_c;
+
+  gst_video_mixer_fill_color_argb = fill_color_argb_c;
+  gst_video_mixer_fill_color_bgra = fill_color_bgra_c;
+  gst_video_mixer_fill_color_ayuv = fill_color_ayuv_c;
+  gst_video_mixer_fill_color_i420 = fill_color_i420_c;
+  gst_video_mixer_fill_color_rgb = fill_color_rgb_c;
+  gst_video_mixer_fill_color_bgr = fill_color_bgr_c;
+  gst_video_mixer_fill_color_xrgb = fill_color_xrgb_c;
+  gst_video_mixer_fill_color_xbgr = fill_color_xbgr_c;
+  gst_video_mixer_fill_color_rgbx = fill_color_rgbx_c;
+  gst_video_mixer_fill_color_bgrx = fill_color_bgrx_c;
+
+#ifdef BUILD_X86_ASM
+  if (cpu_flags & OIL_IMPL_FLAG_MMX) {
+    gst_video_mixer_blend_argb = blend_argb_mmx;
+    gst_video_mixer_blend_bgra = blend_bgra_mmx;
+
+    gst_video_mixer_fill_color_argb = fill_color_argb_mmx;
+    gst_video_mixer_fill_color_bgra = fill_color_bgra_mmx;
+    gst_video_mixer_fill_color_ayuv = fill_color_ayuv_mmx;
+  }
+#endif
+}
--- a/gst/videomixer/blend.h
+++ b/gst/videomixer/blend.h
@ -0,0 +1,64 @@
+/* 
+ * Copyright (C) 2009 Sebastian Dröge <sebastian.droege@collabora.co.uk>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __BLEND_H__
+#define __BLEND_H__
+
+#include <gst/gst.h>
+
+typedef void (*BlendFunction) (const guint8 * src, gint xpos, gint ypos, gint src_width, gint src_height, gdouble src_alpha, guint8 * dest, gint dest_width, gint dest_height);
+typedef void (*FillCheckerFunction) (guint8 * dest, gint width, gint height);
+typedef void (*FillColorFunction) (guint8 * dest, gint width, gint height, gint c1, gint c2, gint c3);
+
+extern BlendFunction gst_video_mixer_blend_argb;
+extern BlendFunction gst_video_mixer_blend_bgra;
+#define gst_video_mixer_blend_ayuv gst_video_mixer_blend_argb
+extern BlendFunction gst_video_mixer_blend_i420;
+extern BlendFunction gst_video_mixer_blend_rgb;
+#define gst_video_mixer_blend_bgr gst_video_mixer_blend_rgb
+extern BlendFunction gst_video_mixer_blend_rgbx;
+#define gst_video_mixer_blend_bgrx gst_video_mixer_blend_rgbx
+#define gst_video_mixer_blend_xrgb gst_video_mixer_blend_rgbx
+#define gst_video_mixer_blend_xbgr gst_video_mixer_blend_rgbx
+
+extern FillCheckerFunction gst_video_mixer_fill_checker_argb;
+extern FillCheckerFunction gst_video_mixer_fill_checker_bgra;
+extern FillCheckerFunction gst_video_mixer_fill_checker_ayuv;
+extern FillCheckerFunction gst_video_mixer_fill_checker_i420;
+extern FillCheckerFunction gst_video_mixer_fill_checker_rgb;
+#define gst_video_mixer_fill_checker_bgr gst_video_mixer_fill_checker_rgb
+extern FillCheckerFunction gst_video_mixer_fill_checker_rgbx;
+#define gst_video_mixer_fill_checker_bgrx gst_video_mixer_fill_checker_rgbx
+#define gst_video_mixer_fill_checker_xrgb gst_video_mixer_fill_checker_rgbx
+#define gst_video_mixer_fill_checker_xbgr gst_video_mixer_fill_checker_rgbx
+
+extern FillColorFunction gst_video_mixer_fill_color_argb;
+extern FillColorFunction gst_video_mixer_fill_color_bgra;
+extern FillColorFunction gst_video_mixer_fill_color_ayuv;
+extern FillColorFunction gst_video_mixer_fill_color_i420;
+extern FillColorFunction gst_video_mixer_fill_color_rgb;
+extern FillColorFunction gst_video_mixer_fill_color_bgr;
+extern FillColorFunction gst_video_mixer_fill_color_xrgb;
+extern FillColorFunction gst_video_mixer_fill_color_xbgr;
+extern FillColorFunction gst_video_mixer_fill_color_rgbx;
+extern FillColorFunction gst_video_mixer_fill_color_bgrx;
+
+void gst_video_mixer_init_blend (void);
+
+#endif /* __BLEND_H__ */
--- a/gst/videomixer/blend_ayuv.c
+++ b/gst/videomixer/blend_ayuv.c
@ -1,477 +0,0 @@
-/* 
- * Copyright (C) 2004 Wim Taymans <wim@fluendo.com>
- * Copyright (C) 2009 Sebastian Dröge <sebastian.droege@collabora.co.uk>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Library General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Library General Public License for more details.
- *
- * You should have received a copy of the GNU Library General Public
- * License along with this library; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 02111-1307, USA.
- */
-
-#ifdef HAVE_CONFIG_H
-#include "config.h"
-#endif
-
-#include <gst/gst.h>
-
-#ifdef HAVE_GCC_ASM
-#if defined(HAVE_CPU_I386) || defined(HAVE_CPU_X86_64)
-#define BUILD_X86_ASM
-#endif
-#endif
-
-#define BLEND_NORMAL(Y1,U1,V1,Y2,U2,V2,alpha,Y,U,V)     \
-        Y = ((Y1*(255-alpha))+(Y2*alpha))>>8;           \
-        U = ((U1*(255-alpha))+(U2*alpha))>>8;           \
-        V = ((V1*(255-alpha))+(V2*alpha))>>8;
-
-#define BLEND_ADD(Y1,U1,V1,Y2,U2,V2,alpha,Y,U,V)                \
-        Y = Y1+((Y2*alpha)>>8);                                 \
-        U = U1+(((127*(255-alpha)+(U2*alpha)))>>8)-127;         \
-        V = V1+(((127*(255-alpha)+(V2*alpha)))>>8)-127;         \
-        if (Y>255) {                                            \
-          gint mult = MAX (0, 288-Y);                           \
-          U = ((U*mult) + (127*(32-mult)))>>5;                  \
-          V = ((V*mult) + (127*(32-mult)))>>5;                  \
-          Y = 255;                                              \
-        }                                                       \
-        U = MIN (U,255);                                        \
-        V = MIN (V,255);
-
-#define BLEND_SUBTRACT(Y1,U1,V1,Y2,U2,V2,alpha,Y,U,V)           \
-        Y = Y1-((Y2*alpha)>>8);                                 \
-        U = U1+(((127*(255-alpha)+(U2*alpha)))>>8)-127;         \
-        V = V1+(((127*(255-alpha)+(V2*alpha)))>>8)-127;         \
-        if (Y<0) {                                              \
-          gint mult = MIN (32, -Y);                             \
-          U = ((U*(32-mult)) + (127*mult))>>5;                  \
-          V = ((V*(32-mult)) + (127*mult))>>5;                  \
-          Y = 0;                                                \
-        }
-
-#define BLEND_DARKEN(Y1,U1,V1,Y2,U2,V2,alpha,Y,U,V)     \
-        if (Y1 < Y2) {                                  \
-          Y = Y1; U = U1; V = V1;                       \
-        }                                               \
-        else {                                          \
-          Y = ((Y1*(255-alpha))+(Y2*alpha))>>8;         \
-          U = ((U1*(255-alpha))+(U2*alpha))>>8;         \
-          V = ((V1*(255-alpha))+(V2*alpha))>>8;         \
-        }
-
-#define BLEND_LIGHTEN(Y1,U1,V1,Y2,U2,V2,alpha,Y,U,V)    \
-        if (Y1 > Y2) {                                  \
-          Y = Y1; U = U1; V = V1;                       \
-        }                                               \
-        else {                                          \
-          Y = ((Y1*(255-alpha))+(Y2*alpha))>>8;         \
-          U = ((U1*(255-alpha))+(U2*alpha))>>8;         \
-          V = ((V1*(255-alpha))+(V2*alpha))>>8;         \
-        }
-
-#define BLEND_MULTIPLY(Y1,U1,V1,Y2,U2,V2,alpha,Y,U,V)                   \
-        Y = (Y1*(256*(255-alpha) +(Y2*alpha)))>>16;                     \
-        U = ((U1*(255-alpha)*256)+(alpha*(U1*Y2+128*(256-Y2))))>>16;    \
-        V = ((V1*(255-alpha)*256)+(alpha*(V1*Y2+128*(256-Y2))))>>16;
-
-#define BLEND_DIFFERENCE(Y1,U1,V1,Y2,U2,V2,alpha,Y,U,V)         \
-        Y = ABS((gint)Y1-(gint)Y2)+127;                         \
-        U = ABS((gint)U1-(gint)U2)+127;                         \
-        V = ABS((gint)V1-(gint)V2)+127;                         \
-        Y = ((Y*alpha)+(Y1*(255-alpha)))>>8;                    \
-        U = ((U*alpha)+(U1*(255-alpha)))>>8;                    \
-        V = ((V*alpha)+(V1*(255-alpha)))>>8;                    \
-        if (Y>255) {                                            \
-          gint mult = MAX (0, 288-Y);                           \
-          U = ((U*mult) + (127*(32-mult)))>>5;                  \
-          V = ((V*mult) + (127*(32-mult)))>>5;                  \
-          Y = 255;                                              \
-        } else if (Y<0) {                                       \
-          gint mult = MIN (32, -Y);                             \
-          U = ((U*(32-mult)) + (127*mult))>>5;                  \
-          V = ((V*(32-mult)) + (127*mult))>>5;                  \
-          Y = 0;                                                \
-        }                                                       \
-        U = CLAMP(U, 0, 255);                                   \
-        V = CLAMP(V, 0, 255);
-
-#define BLEND_EXCLUSION(Y1,U1,V1,Y2,U2,V2,alpha,Y,U,V)          \
-        Y = ((gint)(Y1^0xff)*Y2+(gint)(Y2^0xff)*Y1)>>8;         \
-        U = ((gint)(U1^0xff)*Y2+(gint)(Y2^0xff)*U1)>>8;         \
-        V = ((gint)(V1^0xff)*Y2+(gint)(Y2^0xff)*V1)>>8;         \
-        Y = ((Y*alpha)+(Y1*(255-alpha)))>>8;                    \
-        U = ((U*alpha)+(U1*(255-alpha)))>>8;                    \
-        V = ((V*alpha)+(V1*(255-alpha)))>>8;                    \
-        if (Y>255) {                                            \
-          gint mult = MAX (0, 288-Y);                           \
-          U = ((U*mult) + (127*(32-mult)))>>5;                  \
-          V = ((V*mult) + (127*(32-mult)))>>5;                  \
-          Y = 255;                                              \
-        } else if (Y<0) {                                       \
-          gint mult = MIN (32, -Y);                             \
-          U = ((U*(32-mult)) + (127*mult))>>5;                  \
-          V = ((V*(32-mult)) + (127*mult))>>5;                  \
-          Y = 0;                                                \
-        }                                                       \
-        U = CLAMP(U, 0, 255);                                   \
-        V = CLAMP(V, 0, 255);
-
-#define BLEND_SOFTLIGHT(Y1,U1,V1,Y2,U2,V2,alpha,Y,U,V)          \
-        Y = (gint)Y1+(gint)Y2 - 127;                            \
-        U = (gint)U1+(gint)U2 - 127;                            \
-        V = (gint)V1+(gint)V2 - 127;                            \
-        Y = ((Y*alpha)+(Y1*(255-alpha)))>>8;                    \
-        U = ((U*alpha)+(U1*(255-alpha)))>>8;                    \
-        V = ((V*alpha)+(V1*(255-alpha)))>>8;                    \
-        if (Y>255) {                                            \
-          gint mult = MAX (0, 288-Y);                           \
-          U = ((U*mult) + (127*(32-mult)))>>5;                  \
-          V = ((V*mult) + (127*(32-mult)))>>5;                  \
-          Y = 255;                                              \
-        } else if (Y<0) {                                       \
-          gint mult = MIN (32, -Y);                             \
-          U = ((U*(32-mult)) + (127*mult))>>5;                  \
-          V = ((V*(32-mult)) + (127*mult))>>5;                  \
-          Y = 0;                                                \
-        }                                                       \
-
-#define BLEND_HARDLIGHT(Y1,U1,V1,Y2,U2,V2,alpha,Y,U,V)          \
-        Y = (gint)Y1+(gint)Y2*2 - 255;                          \
-        U = (gint)U1+(gint)U2 - 127;                            \
-        V = (gint)V1+(gint)V2 - 127;                            \
-        Y = ((Y*alpha)+(Y1*(255-alpha)))>>8;                    \
-        U = ((U*alpha)+(U1*(255-alpha)))>>8;                    \
-        V = ((V*alpha)+(V1*(255-alpha)))>>8;                    \
-        if (Y>255) {                                            \
-          gint mult = MAX (0, 288-Y);                           \
-          U = ((U*mult) + (127*(32-mult)))>>5;                  \
-          V = ((V*mult) + (127*(32-mult)))>>5;                  \
-          Y = 255;                                              \
-        } else if (Y<0) {                                       \
-          gint mult = MIN (32, -Y);                             \
-          U = ((U*(32-mult)) + (127*mult))>>5;                  \
-          V = ((V*(32-mult)) + (127*mult))>>5;                  \
-          Y = 0;                                                \
-        }                                                       \
-
-#define BLEND_MODE BLEND_NORMAL
-#if 0
-#define BLEND_MODE BLEND_NORMAL
-#define BLEND_MODE BLEND_ADD
-#define BLEND_MODE BLEND_SUBTRACT
-#define BLEND_MODE BLEND_LIGHTEN
-#define BLEND_MODE BLEND_DARKEN
-#define BLEND_MODE BLEND_MULTIPLY
-#define BLEND_MODE BLEND_DIFFERENCE
-#define BLEND_MODE BLEND_EXCLUSION
-#define BLEND_MODE BLEND_SOFTLIGHT
-#define BLEND_MODE BLEND_HARDLIGHT
-#endif
-
-/* note that this function does packing conversion and blending at the
- * same time */
-void
-gst_videomixer_blend_ayuv_ayuv (guint8 * src, gint xpos, gint ypos,
-    gint src_width, gint src_height, gdouble src_alpha,
-    guint8 * dest, gint dest_width, gint dest_height)
-{
-  guint s_alpha, alpha;
-  gint i, j;
-  gint src_stride, dest_stride;
-  gint src_add, dest_add;
-
-  src_stride = src_width * 4;
-  dest_stride = dest_width * 4;
-
-  s_alpha = CLAMP ((gint) (src_alpha * 256), 0, 256);
-  //g_print ("%f %d\n", src_alpha, s_alpha);
-
-  /* adjust src pointers for negative sizes */
-  if (xpos < 0) {
-    src += -xpos * 4;
-    src_width -= -xpos;
-    xpos = 0;
-  }
-  if (ypos < 0) {
-    src += -ypos * src_stride;
-    src_height -= -ypos;
-    ypos = 0;
-  }
-  /* adjust width/height if the src is bigger than dest */
-  if (xpos + src_width > dest_width) {
-    src_width = dest_width - xpos;
-  }
-  if (ypos + src_height > dest_height) {
-    src_height = dest_height - ypos;
-  }
-
-  src_add = src_stride - (4 * src_width);
-  dest_add = dest_stride - (4 * src_width);
-
-  dest = dest + 4 * xpos + (ypos * dest_stride);
-
-  for (i = 0; i < src_height; i++) {
-    for (j = 0; j < src_width; j++) {
-#if 0
-      gint Y, U, V;
-
-      alpha = (src[0] * s_alpha) >> 8;
-      Y = dest[1];
-      U = dest[2];
-      V = dest[3];
-      dest[0] = 0xff;
-      dest[1] = (((src[1] - Y) * alpha) >> 8) + Y;
-      dest[2] = (((src[2] - U) * alpha) >> 8) + U;
-      dest[3] = (((src[3] - V) * alpha) >> 8) + V;
-#else
-      gint Y, U, V;
-
-      alpha = (src[0] * s_alpha) >> 8;
-      BLEND_MODE (dest[1], dest[2], dest[3], src[1], src[2], src[3],
-          alpha, Y, U, V);
-      dest[0] = 0xff;
-      dest[1] = Y;
-      dest[2] = U;
-      dest[3] = V;
-#endif
-
-      src += 4;
-      dest += 4;
-    }
-    src += src_add;
-    dest += dest_add;
-  }
-}
-
-#undef BLEND_MODE
-
-#ifdef BUILD_X86_ASM
-void
-gst_videomixer_blend_ayuv_ayuv_mmx (guint8 * src, gint xpos, gint ypos,
-    gint src_width, gint src_height, gdouble src_alpha,
-    guint8 * dest, gint dest_width, gint dest_height)
-{
-  gint s_alpha;
-  gint i;
-  gint src_stride, dest_stride;
-  gint src_add, dest_add;
-
-  src_stride = src_width * 4;
-  dest_stride = dest_width * 4;
-
-  s_alpha = CLAMP ((gint) (src_alpha * 256), 0, 256);
-
-  /* adjust src pointers for negative sizes */
-  if (xpos < 0) {
-    src += -xpos * 4;
-    src_width -= -xpos;
-    xpos = 0;
-  }
-  if (ypos < 0) {
-    src += -ypos * src_stride;
-    src_height -= -ypos;
-    ypos = 0;
-  }
-  /* adjust width/height if the src is bigger than dest */
-  if (xpos + src_width > dest_width) {
-    src_width = dest_width - xpos;
-  }
-  if (ypos + src_height > dest_height) {
-    src_height = dest_height - ypos;
-  }
-
-  src_add = src_stride - (4 * src_width);
-  dest_add = dest_stride - (4 * src_width);
-
-  dest = dest + 4 * xpos + (ypos * dest_stride);
-
-  for (i = 0; i < src_height; i++) {
-    gulong old_ebx;
-
-    /*      (P1 * (256 - A) + (P2 * A)) / 256
-     * =>   (P1 * 256 - P1 * A + P2 * A) / 256
-     * =>   (P1 * 256 + A * (P2 - P1) / 256
-     * =>   P1 + (A * (P2 - P1)) / 256
-     */
-    /* *INDENT-OFF* */
-    __asm__ __volatile__ (
-        " movl       %%ebx ,      %6   \n\t"
-
-        " pcmpeqd    %%mm5 ,   %%mm5   \n\t"   /* mm5 = 0xffff... */
-        " psrld        $24 ,   %%mm5   \n\t"   /* mm5 = 00 00 00 ff 00 00 00 0ff, selector for alpha */
-        " mov           %4 ,   %%eax   \n\t"   /* eax = s_alpha */
-        " movd       %%eax ,   %%mm6   \n\t"   /* mm6 = s_alpha */
-        " punpckldq  %%mm6 ,   %%mm6   \n\t"   /* mm6 = 00 00 00 aa 00 00 00 aa, alpha scale factor */
-
-        " movl          %5 ,   %%ebx   \n\t"   /* ebx = src_width */
-        " test          $1 ,   %%ebx   \n\t"   /* check odd pixel */
-        " je                      1f   \n\t"
-
-        /* do odd pixel */
-        " movd        (%2) ,   %%mm2   \n\t"   /* mm2 = src,  00 00 00 00 sv su sy sa */
-        " movd        (%3) ,   %%mm1   \n\t"   /* mm1 = dest, 00 00 00 00 dv du dy da */
-        " movq       %%mm2 ,   %%mm0   \n\t"
-        " punpcklbw  %%mm7 ,   %%mm2   \n\t"   /* mm2 = 00 sv 00 su 00 sy 00 sa */
-        " pand       %%mm5 ,   %%mm0   \n\t"   /* mm0 = 00 00 00 00 00 00 00 sa, get alpha component  */
-        " punpcklbw  %%mm7 ,   %%mm1   \n\t"   /* mm1 = 00 dv 00 du 00 dy 00 da */
-        " pmullw     %%mm6 ,   %%mm0   \n\t"   /* mult with scale */
-        " psubw      %%mm1 ,   %%mm2   \n\t"   /* mm2 = mm2 - mm1 */
-        " punpcklwd  %%mm0 ,   %%mm0   \n\t"
-        " punpckldq  %%mm0 ,   %%mm0   \n\t"   /* mm0 == 00 aa 00 aa 00 aa 00 aa */
-        " psrlw         $8 ,   %%mm0   \n\t"
-        " pmullw     %%mm0 ,   %%mm2   \n\t"   /* mm2 == a * mm2 */
-        " psllw         $8 ,   %%mm1   \n\t"   /* scale up */
-        " paddw      %%mm1 ,   %%mm2   \n\t"   /* mm2 == mm2 + mm1 */
-        " psrlw         $8 ,   %%mm2   \n\t"   /* scale down */
-        " por        %%mm5 ,   %%mm2   \n\t"   /* set alpha to ff */
-        " packuswb   %%mm2 ,   %%mm2   \n\t" 
-        " movd       %%mm2 ,    (%3)   \n\t"   /* dest = mm1 */
-        " add           $4 ,     %1    \n\t"
-        " add           $4 ,     %0    \n\t"
-
-        "1:                            \n\t"
-        " sar           $1 ,   %%ebx   \n\t"   /* prepare for 2 pixel per loop */
-        " cmp           $0 ,   %%ebx   \n\t"
-        " je                      3f   \n\t"
-        "2:                            \n\t"
-
-        /* do even pixels */
-        " movq        (%2) ,   %%mm2   \n\t"   /* mm2 = src,  sv1 su1 sy1 sa1  sv0 su0 sy0 sa0 */
-        " movq        (%3) ,   %%mm1   \n\t"   /* mm1 = dest, dv1 du1 dy1 da1  dv0 du0 dy0 da0 */
-        " movq       %%mm2 ,   %%mm4   \n\t"
-        " movq       %%mm1 ,   %%mm3   \n\t"
-        " movq       %%mm2 ,   %%mm0   \n\t"   /* copy for doing the alpha */
-
-        " pxor       %%mm7 ,   %%mm7   \n\t"  
-        " punpcklbw  %%mm7 ,   %%mm2   \n\t"   /* mm2 = 00 sv0  00 su0  00 sy0  00 sa0 */
-        " punpckhbw  %%mm7 ,   %%mm4   \n\t"   /* mm4 = 00 sv1  00 su1  00 sy1  00 sa1 */
-        " punpcklbw  %%mm7 ,   %%mm1   \n\t"   /* mm1 = 00 dv0  00 du0  00 dy0  00 da0 */
-        " punpckhbw  %%mm7 ,   %%mm3   \n\t"   /* mm2 = 00 dv1  00 du1  00 dy1  00 da1 */
-
-        " pand       %%mm5 ,   %%mm0   \n\t"   /* mm0 = 00 00 00 sa1  00 00 00 sa0 */
-        " psubw      %%mm1 ,   %%mm2   \n\t"   /* mm2 = mm2 - mm1 */
-        " pmullw     %%mm6 ,   %%mm0   \n\t"   /* mult with scale */
-        " psubw      %%mm3 ,   %%mm4   \n\t"   /* mm4 = mm4 - mm3 */
-        " psrlw         $8 ,   %%mm0   \n\t"   /* scale back */
-        " movq       %%mm0 ,   %%mm7   \n\t"   /* save copy */
-        " punpcklwd  %%mm0 ,   %%mm0   \n\t"   /* mm0 = 00 00   00 00   00 aa0  00 aa0 */
-        " punpckhwd  %%mm7 ,   %%mm7   \n\t"   /* mm7 = 00 00   00 00   00 aa1  00 aa1 */
-        " punpckldq  %%mm0 ,   %%mm0   \n\t"   /* mm0 = 00 aa0  00 aa0  00 aa0  00 aa0 */
-        " punpckldq  %%mm7 ,   %%mm7   \n\t"   /* mm7 = 00 aa1  00 aa1  00 aa1  00 aa1 */
-
-        " pmullw     %%mm0 ,   %%mm2   \n\t"   /* mm2 == aa * mm2 */
-        " pmullw     %%mm7 ,   %%mm4   \n\t"   /* mm2 == aa * mm2 */
-        " psllw         $8 ,   %%mm1   \n\t"
-        " psllw         $8 ,   %%mm3   \n\t"
-        " paddw      %%mm1 ,   %%mm2   \n\t"   /* mm2 == mm2 + mm1 */
-        " paddw      %%mm3 ,   %%mm4   \n\t"   /* mm2 == mm2 + mm1 */
-
-        " psrlw         $8 ,   %%mm2   \n\t"
-        " psrlw         $8 ,   %%mm4   \n\t"
-        " packuswb   %%mm4 ,   %%mm2   \n\t"
-        " por        %%mm5 ,   %%mm2   \n\t"   /* set alpha to ff */
-        " movq       %%mm2 ,    (%3)   \n\t"
-
-        " add           $8 ,     %1    \n\t"
-        " add           $8 ,     %0    \n\t"
-        " dec           %%ebx          \n\t"
-        " jne                     2b   \n\t"
-
-        "3:                            \n\t"
-        " movl          %6 ,   %%ebx   \n\t"
-        :"=r" (src), "=r" (dest)
-        :"0" (src), "1" (dest), "m" (s_alpha), "m" (src_width), "m" (old_ebx)
-        :"%eax", "memory"
-#ifdef __MMX__
-        , "mm0", "mm1", "mm2", "mm5", "mm6", "mm7"
-#endif
-    );
-      /* *INDENT-ON* */
-    src += src_add;
-    dest += dest_add;
-  }
-  __asm__ __volatile__ ("emms");
-}
-#endif
-
-/* fill a buffer with a checkerboard pattern */
-void
-gst_videomixer_fill_ayuv_checker (guint8 * dest, gint width, gint height)
-{
-  gint i, j;
-  static const int tab[] = { 80, 160, 80, 160 };
-
-  for (i = 0; i < height; i++) {
-    for (j = 0; j < width; j++) {
-      *dest++ = 0xff;
-      *dest++ = tab[((i & 0x8) >> 3) + ((j & 0x8) >> 3)];
-      *dest++ = 128;
-      *dest++ = 128;
-    }
-  }
-}
-
-void
-gst_videomixer_fill_ayuv_color (guint8 * dest, gint width, gint height,
-    gint colY, gint colU, gint colV)
-{
-  gint i, j;
-
-  for (i = 0; i < height; i++) {
-    for (j = 0; j < width; j++) {
-      *dest++ = 0xff;
-      *dest++ = colY;
-      *dest++ = colU;
-      *dest++ = colV;
-    }
-  }
-}
-
-#ifdef BUILD_X86_ASM
-void
-gst_videomixer_fill_ayuv_color_mmx (guint8 * dest, gint width, gint height,
-    gint colY, gint colU, gint colV)
-{
-  guint64 val;
-  guint nvals = width * height;
-
-  val = (((guint64) 0xff)) | (((guint64) colY) << 8) |
-      (((guint64) colU) << 16) | (((guint64) colV) << 24);
-  val = (val << 32) | val;
-
-  /* *INDENT-OFF* */
-  __asm__ __volatile__ (
-    "cmp      $2 ,    %2  \n\t"
-    "jb       2f          \n\t"
-    "movq     %4 , %%mm0  \n\t"
-    "1:                   \n\t"
-    "movq  %%mm0 ,  (%1)  \n\t"
-    "sub      $2 ,    %0  \n\t"
-    "add      $8 ,    %1  \n\t"
-    "cmp      $2 ,    %2  \n\t"
-    "jae      1b          \n\t"
-    "emms                 \n\t"
-    "2:                   \n\t"
-    : "=r" (nvals), "=r" (dest)
-    : "0" (nvals), "1" (dest), "m" (val)
-    : "memory"
-#ifdef __MMX__
-      , "mm0"
-#endif
-  );
-
-  /* *INDENT-ON* */
-  if (nvals)
-    GST_WRITE_UINT32_LE (&dest[-4], (guint32) (val & 0xffffffff));
-}
-#endif
--- a/gst/videomixer/blend_bgra.c
+++ b/gst/videomixer/blend_bgra.c
@ -1,133 +0,0 @@
-/* 
- * Copyright (C) 2009 Alex Ugarte <augarte@vicomtech.org>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Library General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Library General Public License for more details.
- *
- * You should have received a copy of the GNU Library General Public
- * License along with this library; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 02111-1307, USA.
- */
-
-#include <gst/gst.h>
-
-#define BLEND_NORMAL(B1,G1,R1,B2,G2,R2,B,G,R,alpha)     \
-        B = ((B1*(255-alpha))+(B2*alpha))>>8;           \
-        G = ((G1*(255-alpha))+(G2*alpha))>>8;           \
-        R = ((R1*(255-alpha))+(R2*alpha))>>8;
-
-#define BLEND_MODE BLEND_NORMAL
-
-#define CREATE_FUNCTIONS(name, a, r, g, b) \
-void \
-gst_videomixer_blend_##name##_##name (guint8 * src, gint xpos, gint ypos, \
-    gint src_width, gint src_height, gdouble src_alpha, \
-    guint8 * dest, gint dest_width, gint dest_height) \
-{ \
-  gint alpha, s_alpha; \
-  gint i, j; \
-  gint src_stride, dest_stride; \
-  gint src_add, dest_add; \
-  gint B, G, R; \
-  \
-  src_stride = src_width * 4; \
-  dest_stride = dest_width * 4; \
-  \
-  s_alpha = CLAMP ((gint) (src_alpha * 256), 0, 256); \
-  \
-  /* adjust src pointers for negative sizes */ \
-  if (xpos < 0) { \
-    src += -xpos * 4; \
-    src_width -= -xpos; \
-    xpos = 0; \
-  } \
-  if (ypos < 0) { \
-    src += -ypos * src_stride; \
-    src_height -= -ypos; \
-    ypos = 0; \
-  } \
-  /* adjust width/height if the src is bigger than dest */ \
-  if (xpos + src_width > dest_width) { \
-    src_width = dest_width - xpos; \
-  } \
-  if (ypos + src_height > dest_height) { \
-    src_height = dest_height - ypos; \
-  } \
-  \
-  src_add = src_stride - (4 * src_width); \
-  dest_add = dest_stride - (4 * src_width); \
-  \
-  dest = dest + 4 * xpos + (ypos * dest_stride); \
-  \
-  for (i = 0; i < src_height; i++) { \
-    for (j = 0; j < src_width; j++) { \
-      alpha = (src[a] * s_alpha) >> 8; \
-      BLEND_MODE (dest[b], dest[g], dest[r], src[b], src[g], src[r], \
-          B, G, R, alpha); \
-      dest[b] = B; \
-      dest[g] = G; \
-      dest[r] = R; \
-      dest[a] = 0xff; \
-      \
-      src += 4; \
-      dest += 4; \
-    } \
-    src += src_add; \
-    dest += dest_add; \
-  } \
-} \
-\
-/* fill a buffer with a checkerboard pattern */ \
-void \
-gst_videomixer_fill_##name##_checker (guint8 * dest, gint width, gint height) \
-{ \
-  gint i, j; \
-  static const int tab[] = { 80, 160, 80, 160 }; \
-  \
-  for (i = 0; i < height; i++) { \
-    for (j = 0; j < width; j++) { \
-      dest[b] = tab[((i & 0x8) >> 3) + ((j & 0x8) >> 3)];       /* blue */ \
-      dest[g] = tab[((i & 0x8) >> 3) + ((j & 0x8) >> 3)];       /* green */ \
-      dest[r] = tab[((i & 0x8) >> 3) + ((j & 0x8) >> 3)];       /* red */ \
-      dest[a] = 0xFF;           /* alpha */ \
-      dest += 4; \
-    } \
-  } \
-} \
-\
-void \
-gst_videomixer_fill_##name##_color (guint8 * dest, gint width, gint height, \
-    gint colY, gint colU, gint colV) \
-{ \
-  gint red, green, blue; \
-  gint i, j; \
-  \
-  red = CLAMP (1.164 * (colY - 16) + 1.596 * (colV - 128), 0, 255); \
-  green = \
-      CLAMP (1.164 * (colY - 16) - 0.813 * (colV - 128) - 0.391 * (colU - 128), \
-      0, 255); \
-  blue = CLAMP (1.164 * (colY - 16) + 2.018 * (colU - 128), 0, 255); \
-  \
-  for (i = 0; i < height; i++) { \
-    for (j = 0; j < width; j++) { \
-      dest[b] = blue; \
-      dest[g] = green; \
-      dest[r] = red; \
-      dest[a] = 0xff; \
-      dest += 4; \
-    } \
-  } \
-}
-
-CREATE_FUNCTIONS (argb, 0, 1, 2, 3);
-CREATE_FUNCTIONS (bgra, 3, 2, 1, 0);
-
-#undef BLEND_MODE
--- a/gst/videomixer/blend_i420.c
+++ b/gst/videomixer/blend_i420.c
@ -1,339 +0,0 @@
-/* 
- * Copyright (C) 2006 Mindfruit Bv.
- *   Author: Sjoerd Simons <sjoerd@luon.net>
- *   Author: Alex Ugarte <alexugarte@gmail.com>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Library General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Library General Public License for more details.
- *
- * You should have received a copy of the GNU Library General Public
- * License along with this library; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 02111-1307, USA.
- */
-
-
-#include <gst/gst.h>
-#include <string.h>
-#include "videomixer.h"
-
-#define BLEND_NORMAL(Y1,U1,V1,Y2,U2,V2,alpha,Y,U,V)     \
-        Y = ((Y1*(255-alpha))+(Y2*alpha))>>8;           \
-        U = ((U1*(255-alpha))+(U2*alpha))>>8;           \
-        V = ((V1*(255-alpha))+(V2*alpha))>>8;
-
-#define BLEND_ADD(Y1,U1,V1,Y2,U2,V2,alpha,Y,U,V)                \
-        Y = Y1+((Y2*alpha)>>8);                                 \
-        U = U1+(((127*(255-alpha)+(U2*alpha)))>>8)-127;         \
-        V = V1+(((127*(255-alpha)+(V2*alpha)))>>8)-127;         \
-        if (Y>255) {                                            \
-          gint mult = MAX (0, 288-Y);                           \
-          U = ((U*mult) + (127*(32-mult)))>>5;                  \
-          V = ((V*mult) + (127*(32-mult)))>>5;                  \
-          Y = 255;                                              \
-        }                                                       \
-        U = MIN (U,255);                                        \
-        V = MIN (V,255);
-
-#define BLEND_SUBTRACT(Y1,U1,V1,Y2,U2,V2,alpha,Y,U,V)           \
-        Y = Y1-((Y2*alpha)>>8);                                 \
-        U = U1+(((127*(255-alpha)+(U2*alpha)))>>8)-127;         \
-        V = V1+(((127*(255-alpha)+(V2*alpha)))>>8)-127;         \
-        if (Y<0) {                                              \
-          gint mult = MIN (32, -Y);                             \
-          U = ((U*(32-mult)) + (127*mult))>>5;                  \
-          V = ((V*(32-mult)) + (127*mult))>>5;                  \
-          Y = 0;                                                \
-        }
-
-#define BLEND_DARKEN(Y1,U1,V1,Y2,U2,V2,alpha,Y,U,V)     \
-        if (Y1 < Y2) {                                  \
-          Y = Y1; U = U1; V = V1;                       \
-        }                                               \
-        else {                                          \
-          Y = ((Y1*(255-alpha))+(Y2*alpha))>>8;         \
-          U = ((U1*(255-alpha))+(U2*alpha))>>8;         \
-          V = ((V1*(255-alpha))+(V2*alpha))>>8;         \
-        }
-
-#define BLEND_LIGHTEN(Y1,U1,V1,Y2,U2,V2,alpha,Y,U,V)    \
-        if (Y1 > Y2) {                                  \
-          Y = Y1; U = U1; V = V1;                       \
-        }                                               \
-        else {                                          \
-          Y = ((Y1*(255-alpha))+(Y2*alpha))>>8;         \
-          U = ((U1*(255-alpha))+(U2*alpha))>>8;         \
-          V = ((V1*(255-alpha))+(V2*alpha))>>8;         \
-        }
-
-#define BLEND_MULTIPLY(Y1,U1,V1,Y2,U2,V2,alpha,Y,U,V)                   \
-        Y = (Y1*(256*(255-alpha) +(Y2*alpha)))>>16;                     \
-        U = ((U1*(255-alpha)*256)+(alpha*(U1*Y2+128*(256-Y2))))>>16;    \
-        V = ((V1*(255-alpha)*256)+(alpha*(V1*Y2+128*(256-Y2))))>>16;
-
-#define BLEND_DIFFERENCE(Y1,U1,V1,Y2,U2,V2,alpha,Y,U,V)         \
-        Y = ABS((gint)Y1-(gint)Y2)+127;                         \
-        U = ABS((gint)U1-(gint)U2)+127;                         \
-        V = ABS((gint)V1-(gint)V2)+127;                         \
-        Y = ((Y*alpha)+(Y1*(255-alpha)))>>8;                    \
-        U = ((U*alpha)+(U1*(255-alpha)))>>8;                    \
-        V = ((V*alpha)+(V1*(255-alpha)))>>8;                    \
-        if (Y>255) {                                            \
-          gint mult = MAX (0, 288-Y);                           \
-          U = ((U*mult) + (127*(32-mult)))>>5;                  \
-          V = ((V*mult) + (127*(32-mult)))>>5;                  \
-          Y = 255;                                              \
-        } else if (Y<0) {                                       \
-          gint mult = MIN (32, -Y);                             \
-          U = ((U*(32-mult)) + (127*mult))>>5;                  \
-          V = ((V*(32-mult)) + (127*mult))>>5;                  \
-          Y = 0;                                                \
-        }                                                       \
-        U = CLAMP(U, 0, 255);                                   \
-        V = CLAMP(V, 0, 255);
-
-#define BLEND_EXCLUSION(Y1,U1,V1,Y2,U2,V2,alpha,Y,U,V)          \
-        Y = ((gint)(Y1^0xff)*Y2+(gint)(Y2^0xff)*Y1)>>8;         \
-        U = ((gint)(U1^0xff)*Y2+(gint)(Y2^0xff)*U1)>>8;         \
-        V = ((gint)(V1^0xff)*Y2+(gint)(Y2^0xff)*V1)>>8;         \
-        Y = ((Y*alpha)+(Y1*(255-alpha)))>>8;                    \
-        U = ((U*alpha)+(U1*(255-alpha)))>>8;                    \
-        V = ((V*alpha)+(V1*(255-alpha)))>>8;                    \
-        if (Y>255) {                                            \
-          gint mult = MAX (0, 288-Y);                           \
-          U = ((U*mult) + (127*(32-mult)))>>5;                  \
-          V = ((V*mult) + (127*(32-mult)))>>5;                  \
-          Y = 255;                                              \
-        } else if (Y<0) {                                       \
-          gint mult = MIN (32, -Y);                             \
-          U = ((U*(32-mult)) + (127*mult))>>5;                  \
-          V = ((V*(32-mult)) + (127*mult))>>5;                  \
-          Y = 0;                                                \
-        }                                                       \
-        U = CLAMP(U, 0, 255);                                   \
-        V = CLAMP(V, 0, 255);
-
-#define BLEND_SOFTLIGHT(Y1,U1,V1,Y2,U2,V2,alpha,Y,U,V)          \
-        Y = (gint)Y1+(gint)Y2 - 127;                            \
-        U = (gint)U1+(gint)U2 - 127;                            \
-        V = (gint)V1+(gint)V2 - 127;                            \
-        Y = ((Y*alpha)+(Y1*(255-alpha)))>>8;                    \
-        U = ((U*alpha)+(U1*(255-alpha)))>>8;                    \
-        V = ((V*alpha)+(V1*(255-alpha)))>>8;                    \
-        if (Y>255) {                                            \
-          gint mult = MAX (0, 288-Y);                           \
-          U = ((U*mult) + (127*(32-mult)))>>5;                  \
-          V = ((V*mult) + (127*(32-mult)))>>5;                  \
-          Y = 255;                                              \
-        } else if (Y<0) {                                       \
-          gint mult = MIN (32, -Y);                             \
-          U = ((U*(32-mult)) + (127*mult))>>5;                  \
-          V = ((V*(32-mult)) + (127*mult))>>5;                  \
-          Y = 0;                                                \
-        }                                                       \
-
-#define BLEND_HARDLIGHT(Y1,U1,V1,Y2,U2,V2,alpha,Y,U,V)          \
-        Y = (gint)Y1+(gint)Y2*2 - 255;                          \
-        U = (gint)U1+(gint)U2 - 127;                            \
-        V = (gint)V1+(gint)V2 - 127;                            \
-        Y = ((Y*alpha)+(Y1*(255-alpha)))>>8;                    \
-        U = ((U*alpha)+(U1*(255-alpha)))>>8;                    \
-        V = ((V*alpha)+(V1*(255-alpha)))>>8;                    \
-        if (Y>255) {                                            \
-          gint mult = MAX (0, 288-Y);                           \
-          U = ((U*mult) + (127*(32-mult)))>>5;                  \
-          V = ((V*mult) + (127*(32-mult)))>>5;                  \
-          Y = 255;                                              \
-        } else if (Y<0) {                                       \
-          gint mult = MIN (32, -Y);                             \
-          U = ((U*(32-mult)) + (127*mult))>>5;                  \
-          V = ((V*(32-mult)) + (127*mult))>>5;                  \
-          Y = 0;                                                \
-        }                                                       \
-
-#define BLEND_MODE BLEND_NORMAL
-#if 0
-#define BLEND_MODE BLEND_NORMAL
-#define BLEND_MODE BLEND_ADD
-#define BLEND_MODE BLEND_SUBTRACT
-#define BLEND_MODE BLEND_LIGHTEN
-#define BLEND_MODE BLEND_DARKEN
-#define BLEND_MODE BLEND_MULTIPLY
-#define BLEND_MODE BLEND_DIFFERENCE
-#define BLEND_MODE BLEND_EXCLUSION
-#define BLEND_MODE BLEND_SOFTLIGHT
-#define BLEND_MODE BLEND_HARDLIGHT
-#endif
-
-/* I420 */
-/* Copied from jpegenc */
-#define VIDEO_Y_ROWSTRIDE(width) (GST_ROUND_UP_4(width))
-#define VIDEO_U_ROWSTRIDE(width) (GST_ROUND_UP_8(width)/2)
-#define VIDEO_V_ROWSTRIDE(width) ((GST_ROUND_UP_8(VIDEO_Y_ROWSTRIDE(width)))/2)
-
-#define VIDEO_Y_OFFSET(w,h) (0)
-#define VIDEO_U_OFFSET(w,h) (VIDEO_Y_OFFSET(w,h)+(VIDEO_Y_ROWSTRIDE(w)*GST_ROUND_UP_2(h)))
-#define VIDEO_V_OFFSET(w,h) (VIDEO_U_OFFSET(w,h)+(VIDEO_U_ROWSTRIDE(w)*GST_ROUND_UP_2(h)/2))
-
-#define VIDEO_SIZE(w,h)     (VIDEO_V_OFFSET(w,h)+(VIDEO_V_ROWSTRIDE(w)*GST_ROUND_UP_2(h)/2))
-
-inline static void
-gst_i420_do_blend (guint8 * src, guint8 * dest,
-    gint src_stride, gint dest_stride, gint src_width, gint src_height,
-    gint dest_width, gdouble src_alpha)
-{
-  int i, j;
-  gint b_alpha;
-
-  /* If it's completely transparent... we just return */
-  if (G_UNLIKELY (src_alpha == 0.0)) {
-    GST_INFO ("Fast copy (alpha == 0.0)");
-    return;
-  }
-
-  /* If it's completely opaque, we do a fast copy */
-  if (G_UNLIKELY (src_alpha == 1.0)) {
-    GST_INFO ("Fast copy (alpha == 1.0)");
-    for (i = 0; i < src_height; i++) {
-      memcpy (dest, src, src_width);
-      src += src_stride;
-      dest += dest_stride;
-    }
-    return;
-  }
-
-  b_alpha = CLAMP ((gint) (src_alpha * 255), 0, 255);
-
-  for (i = 0; i < src_height; i++) {
-    for (j = 0; j < src_width; j++) {
-      *dest = (b_alpha * (*src) + (255 - b_alpha) * (*dest)) >> 8;
-      dest++;
-      src++;
-    }
-    src += src_stride - src_width;
-    dest += dest_stride - dest_width;
-  }
-}
-
-/* note that this function does packing conversion and blending at the
- * same time */
-void
-gst_videomixer_blend_i420_i420 (guint8 * src, gint xpos, gint ypos,
-    gint src_width, gint src_height, gdouble src_alpha,
-    guint8 * dest, gint dest_width, gint dest_height)
-{
-  guint8 *b_src;
-  guint8 *b_dest;
-  gint b_src_width = src_width;
-  gint b_src_height = src_height;
-  gint xoffset = 0;
-  gint yoffset = 0;
-
-  xpos = GST_ROUND_UP_2 (xpos);
-  ypos = GST_ROUND_UP_2 (ypos);
-
-  /* adjust src pointers for negative sizes */
-  if (xpos < 0) {
-    xoffset = -xpos;
-    b_src_width -= -xpos;
-    xpos = 0;
-  }
-  if (ypos < 0) {
-    yoffset += -ypos;
-    b_src_height -= -ypos;
-    ypos = 0;
-  }
-  /* If x or y offset are larger then the source it's outside of the picture */
-  if (xoffset > src_width || yoffset > src_width) {
-    return;
-  }
-
-  /* adjust width/height if the src is bigger than dest */
-  if (xpos + src_width > dest_width) {
-    b_src_width = dest_width - xpos;
-  }
-  if (ypos + src_height > dest_height) {
-    b_src_height = dest_height - ypos;
-  }
-  if (b_src_width < 0 || b_src_height < 0) {
-    return;
-  }
-
-  /* First mix Y, then U, then V */
-  b_src = src + VIDEO_Y_OFFSET (src_width, src_height);
-  b_dest = dest + VIDEO_Y_OFFSET (dest_width, dest_height);
-  gst_i420_do_blend (b_src + xoffset + yoffset * VIDEO_Y_ROWSTRIDE (src_width),
-      b_dest + xpos + ypos * VIDEO_Y_ROWSTRIDE (dest_width),
-      VIDEO_Y_ROWSTRIDE (src_width),
-      VIDEO_Y_ROWSTRIDE (dest_width), b_src_width, b_src_height,
-      dest_width, src_alpha);
-
-  b_src = src + VIDEO_U_OFFSET (src_width, src_height);
-  b_dest = dest + VIDEO_U_OFFSET (dest_width, dest_height);
-
-  gst_i420_do_blend (b_src + xoffset / 2 +
-      yoffset / 2 * VIDEO_U_ROWSTRIDE (src_width),
-      b_dest + xpos / 2 + ypos / 2 * VIDEO_U_ROWSTRIDE (dest_width),
-      VIDEO_U_ROWSTRIDE (src_width), VIDEO_U_ROWSTRIDE (dest_width),
-      b_src_width / 2, GST_ROUND_UP_2 (b_src_height) / 2, dest_width / 2,
-      src_alpha);
-
-  b_src = src + VIDEO_V_OFFSET (src_width, src_height);
-  b_dest = dest + VIDEO_V_OFFSET (dest_width, dest_height);
-
-  gst_i420_do_blend (b_src + xoffset / 2 +
-      yoffset / 2 * VIDEO_V_ROWSTRIDE (src_width),
-      b_dest + xpos / 2 + ypos / 2 * VIDEO_V_ROWSTRIDE (dest_width),
-      VIDEO_V_ROWSTRIDE (src_width), VIDEO_V_ROWSTRIDE (dest_width),
-      b_src_width / 2, GST_ROUND_UP_2 (b_src_height) / 2, dest_width / 2,
-      src_alpha);
-}
-
-#undef BLEND_MODE
-
-/* fill a buffer with a checkerboard pattern */
-void
-gst_videomixer_fill_i420_checker (guint8 * dest, gint width, gint height)
-{
-  int size;
-  gint i, j;
-  static const int tab[] = { 80, 160, 80, 160 };
-  guint8 *p = dest;
-
-  for (i = 0; i < height; i++) {
-    for (j = 0; j < width; j++) {
-      *p++ = tab[((i & 0x8) >> 3) + ((j & 0x8) >> 3)];
-    }
-    p += VIDEO_Y_ROWSTRIDE (width) - width;
-  }
-
-  size = (VIDEO_U_ROWSTRIDE (width) * height) / 2;
-  memset (dest + VIDEO_U_OFFSET (width, height), 0x80, size);
-
-  size = (VIDEO_V_ROWSTRIDE (width) * height) / 2;
-  memset (dest + VIDEO_V_OFFSET (width, height), 0x80, size);
-}
-
-void
-gst_videomixer_fill_i420_color (guint8 * dest, gint width, gint height,
-    gint colY, gint colU, gint colV)
-{
-  int size;
-
-  size = VIDEO_Y_ROWSTRIDE (width) * height;
-  memset (dest, colY, size);
-
-  size = (VIDEO_U_ROWSTRIDE (width) * height) / 2;
-  memset (dest + VIDEO_U_OFFSET (width, height), colU, size);
-
-  size = (VIDEO_V_ROWSTRIDE (width) * height) / 2;
-  memset (dest + VIDEO_V_OFFSET (width, height), colV, size);
-
-}
--- a/gst/videomixer/blend_mmx.h
+++ b/gst/videomixer/blend_mmx.h
@ -0,0 +1,168 @@
+#ifdef A32
+static inline void
+NAME_BLEND (guint8 * dest, const guint8 * src, gint src_height, gint src_width,
+    gint src_stride, gint dest_stride, guint s_alpha)
+{
+  gint i;
+  gint src_add = src_stride - (4 * src_width);
+  gint dest_add = dest_stride - (4 * src_width);
+
+  for (i = 0; i < src_height; i++) {
+    gulong old_ebx;
+
+    /*      (P1 * (256 - A) + (P2 * A)) / 256
+     * =>   (P1 * 256 - P1 * A + P2 * A) / 256
+     * =>   (P1 * 256 + A * (P2 - P1) / 256
+     * =>   P1 + (A * (P2 - P1)) / 256
+     */
+    /* *INDENT-OFF* */
+    __asm__ __volatile__ (
+        " movl       %%ebx ,      %6   \n\t"
+
+        " pcmpeqd    %%mm5 ,   %%mm5   \n\t"   /* mm5 = 0xffff... */
+#if A_OFF == 0
+        " psrld        $24 ,   %%mm5   \n\t"   /* mm5 = 00 00 00 ff 00 00 00 ff, selector for alpha */
+#else
+        " pslld        $24 ,   %%mm5   \n\t"   /* mm5 = ff 00 00 00 ff 00 00 00, selector for alpha */
+#endif
+        " mov           %4 ,   %%eax   \n\t"   /* eax = s_alpha */
+        " movd       %%eax ,   %%mm6   \n\t"   /* mm6 = s_alpha */
+        " punpckldq  %%mm6 ,   %%mm6   \n\t"   /* mm6 = 00 00 00 aa 00 00 00 aa, alpha scale factor */
+
+        " movl          %5 ,   %%ebx   \n\t"   /* ebx = src_width */
+        " test          $1 ,   %%ebx   \n\t"   /* check odd pixel */
+        " je                      1f   \n\t"
+
+        /* do odd pixel */
+        " movd        (%2) ,   %%mm2   \n\t"   /* mm2 = src,  00 00 00 00 sv su sy sa */
+        " movd        (%3) ,   %%mm1   \n\t"   /* mm1 = dest, 00 00 00 00 dv du dy da */
+        " movq       %%mm2 ,   %%mm0   \n\t"
+        " punpcklbw  %%mm7 ,   %%mm2   \n\t"   /* mm2 = 00 sv 00 su 00 sy 00 sa */
+        " pand       %%mm5 ,   %%mm0   \n\t"   /* mm0 = 00 00 00 00 00 00 00 sa, get alpha component  */
+#if A_OFF != 0
+        " psrld        $24 ,   %%mm0   \n\t"
+#endif
+        " punpcklbw  %%mm7 ,   %%mm1   \n\t"   /* mm1 = 00 dv 00 du 00 dy 00 da */
+        " pmullw     %%mm6 ,   %%mm0   \n\t"   /* mult with scale */
+        " psubw      %%mm1 ,   %%mm2   \n\t"   /* mm2 = mm2 - mm1 */
+        " punpcklwd  %%mm0 ,   %%mm0   \n\t"
+        " punpckldq  %%mm0 ,   %%mm0   \n\t"   /* mm0 == 00 aa 00 aa 00 aa 00 aa */
+        " psrlw         $8 ,   %%mm0   \n\t"
+        " pmullw     %%mm0 ,   %%mm2   \n\t"   /* mm2 == a * mm2 */
+        " psllw         $8 ,   %%mm1   \n\t"   /* scale up */
+        " paddw      %%mm1 ,   %%mm2   \n\t"   /* mm2 == mm2 + mm1 */
+        " psrlw         $8 ,   %%mm2   \n\t"   /* scale down */
+        " por        %%mm5 ,   %%mm2   \n\t"   /* set alpha to ff */
+        " packuswb   %%mm2 ,   %%mm2   \n\t" 
+        " movd       %%mm2 ,    (%3)   \n\t"   /* dest = mm1 */
+        " add           $4 ,     %1    \n\t"
+        " add           $4 ,     %0    \n\t"
+
+        "1:                            \n\t"
+        " sar           $1 ,   %%ebx   \n\t"   /* prepare for 2 pixel per loop */
+        " cmp           $0 ,   %%ebx   \n\t"
+        " je                      3f   \n\t"
+        "2:                            \n\t"
+
+        /* do even pixels */
+        " movq        (%2) ,   %%mm2   \n\t"   /* mm2 = src,  sv1 su1 sy1 sa1  sv0 su0 sy0 sa0 */
+        " movq        (%3) ,   %%mm1   \n\t"   /* mm1 = dest, dv1 du1 dy1 da1  dv0 du0 dy0 da0 */
+        " movq       %%mm2 ,   %%mm4   \n\t"
+        " movq       %%mm1 ,   %%mm3   \n\t"
+        " movq       %%mm2 ,   %%mm0   \n\t"   /* copy for doing the alpha */
+
+        " pxor       %%mm7 ,   %%mm7   \n\t"  
+        " punpcklbw  %%mm7 ,   %%mm2   \n\t"   /* mm2 = 00 sv0  00 su0  00 sy0  00 sa0 */
+        " punpckhbw  %%mm7 ,   %%mm4   \n\t"   /* mm4 = 00 sv1  00 su1  00 sy1  00 sa1 */
+        " punpcklbw  %%mm7 ,   %%mm1   \n\t"   /* mm1 = 00 dv0  00 du0  00 dy0  00 da0 */
+        " punpckhbw  %%mm7 ,   %%mm3   \n\t"   /* mm2 = 00 dv1  00 du1  00 dy1  00 da1 */
+
+        " pand       %%mm5 ,   %%mm0   \n\t"   /* mm0 = 00 00 00 sa1  00 00 00 sa0 */
+#if A_OFF != 0
+        " psrld        $24 ,   %%mm0   \n\t"
+#endif
+        " psubw      %%mm1 ,   %%mm2   \n\t"   /* mm2 = mm2 - mm1 */
+        " pmullw     %%mm6 ,   %%mm0   \n\t"   /* mult with scale */
+        " psubw      %%mm3 ,   %%mm4   \n\t"   /* mm4 = mm4 - mm3 */
+        " psrlw         $8 ,   %%mm0   \n\t"   /* scale back */
+        " movq       %%mm0 ,   %%mm7   \n\t"   /* save copy */
+        " punpcklwd  %%mm0 ,   %%mm0   \n\t"   /* mm0 = 00 00   00 00   00 aa0  00 aa0 */
+        " punpckhwd  %%mm7 ,   %%mm7   \n\t"   /* mm7 = 00 00   00 00   00 aa1  00 aa1 */
+        " punpckldq  %%mm0 ,   %%mm0   \n\t"   /* mm0 = 00 aa0  00 aa0  00 aa0  00 aa0 */
+        " punpckldq  %%mm7 ,   %%mm7   \n\t"   /* mm7 = 00 aa1  00 aa1  00 aa1  00 aa1 */
+
+        " pmullw     %%mm0 ,   %%mm2   \n\t"   /* mm2 == aa * mm2 */
+        " pmullw     %%mm7 ,   %%mm4   \n\t"   /* mm2 == aa * mm2 */
+        " psllw         $8 ,   %%mm1   \n\t"
+        " psllw         $8 ,   %%mm3   \n\t"
+        " paddw      %%mm1 ,   %%mm2   \n\t"   /* mm2 == mm2 + mm1 */
+        " paddw      %%mm3 ,   %%mm4   \n\t"   /* mm2 == mm2 + mm1 */
+
+        " psrlw         $8 ,   %%mm2   \n\t"
+        " psrlw         $8 ,   %%mm4   \n\t"
+        " packuswb   %%mm4 ,   %%mm2   \n\t"
+        " por        %%mm5 ,   %%mm2   \n\t"   /* set alpha to ff */
+        " movq       %%mm2 ,    (%3)   \n\t"
+
+        " add           $8 ,     %1    \n\t"
+        " add           $8 ,     %0    \n\t"
+        " dec           %%ebx          \n\t"
+        " jne                     2b   \n\t"
+
+        "3:                            \n\t"
+        " movl          %6 ,   %%ebx   \n\t"
+        :"=r" (src), "=r" (dest)
+        :"0" (src), "1" (dest), "m" (s_alpha), "m" (src_width), "m" (old_ebx)
+        :"%eax", "memory"
+#ifdef __MMX__
+        , "mm0", "mm1", "mm2", "mm5", "mm6", "mm7"
+#endif
+    );
+      /* *INDENT-ON* */
+    src += src_add;
+    dest += dest_add;
+  }
+  __asm__ __volatile__ ("emms");
+}
+
+static inline void
+NAME_FILL_COLOR (guint8 * dest, gint height, gint width, gint c1, gint c2,
+    gint c3)
+{
+  guint64 val;
+  guint nvals = width * height;
+
+  val = (((guint64) 0xff << A_OFF)) | (((guint64) c1) << C1_OFF) |
+      (((guint64) c2) << C2_OFF) | (((guint64) c3) << C3_OFF);
+  val = (val << 32) | val;
+
+  /* *INDENT-OFF* */
+  __asm__ __volatile__ (
+    "movq     %4 , %%mm0  \n\t"
+    "test     $1 ,    %0  \n\t"
+    "je       1f          \n\t"
+    "movd  %%mm0 ,  (%1)  \n\t"
+    "add      $4 ,    %1  \n\t"
+    "dec      %0          \n\t"
+    "1:                   \n\t"
+    "sar      $1 ,    %0  \n\t"
+    "cmp      $0 ,    %0  \n\t"
+    "je       3f          \n\t"
+    "2:                   \n\t"
+    "movq  %%mm0 ,  (%1)  \n\t"
+    "add      $8 ,    %1  \n\t"
+    "dec      %0          \n\t"
+    "jne      2b          \n\t"
+    "3:                   \n\t"
+    "emms                 \n\t"
+    : "=r" (nvals), "=r" (dest)
+    : "0" (nvals), "1" (dest), "m" (val)
+    : "memory"
+#ifdef __MMX__
+      , "mm0"
+#endif
+  );
+  /* *INDENT-ON* */
+}
+#endif
+
--- a/gst/videomixer/blend_rgb.c
+++ b/gst/videomixer/blend_rgb.c
@ -1,155 +0,0 @@
-/* 
- * Copyright (C) 2009 Alex Ugarte <augarte@vicomtech.org>
- * Copyright (C) 2009 Sebastian Dröge <sebastian.droege@collabora.co.uk>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Library General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Library General Public License for more details.
- *
- * You should have received a copy of the GNU Library General Public
- * License along with this library; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 02111-1307, USA.
- */
-
-
-#include <gst/gst.h>
-#include <string.h>
-#include "videomixer.h"
-
-#define BLEND_NORMAL(R1,G1,B1,R2,G2,B2,alpha,R,G,B)     \
-        R = ((R1*(255-alpha))+(R2*alpha))>>8;           \
-        G = ((G1*(255-alpha))+(G2*alpha))>>8;           \
-        B = ((B1*(255-alpha))+(B2*alpha))>>8;
-
-#define BLEND_MODE BLEND_NORMAL
-
-#define CREATE_FUNCTIONS(name, bpp, r, g, b) \
-void \
-gst_videomixer_blend_##name##_##name (guint8 * src, gint xpos, gint ypos, \
-    gint src_width, gint src_height, gdouble src_alpha, \
-    guint8 * dest, gint dest_width, gint dest_height) \
-{ \
-  gint b_alpha; \
-  gint i, j; \
-  gint src_stride, dest_stride; \
-  gint src_add, dest_add; \
-  gint R, G, B; \
-  \
-  src_stride = GST_ROUND_UP_4 (src_width * bpp); \
-  dest_stride = GST_ROUND_UP_4 (dest_width * bpp); \
-  \
-  b_alpha = CLAMP ((gint) (src_alpha * 255), 0, 255); \
-  \
-  /* adjust src pointers for negative sizes */ \
-  if (xpos < 0) { \
-    src += -xpos * bpp; \
-    src_width -= -xpos; \
-    xpos = 0; \
-  } \
-  if (ypos < 0) { \
-    src += -ypos * src_stride; \
-    src_height -= -ypos; \
-    ypos = 0; \
-  } \
-  /* adjust width/height if the src is bigger than dest */ \
-  if (xpos + src_width > dest_width) { \
-    src_width = dest_width - xpos; \
-  } \
-  if (ypos + src_height > dest_height) { \
-    src_height = dest_height - ypos; \
-  } \
-  \
-  src_add = src_stride - (bpp * src_width); \
-  dest_add = dest_stride - (bpp * src_width); \
-  \
-  dest = dest + bpp * xpos + (ypos * dest_stride); \
-  /* If it's completely transparent... we just return */ \
-  if (G_UNLIKELY (src_alpha == 0.0)) { \
-    GST_INFO ("Fast copy (alpha == 0.0)"); \
-    return; \
-  } \
-  \
-  /* If it's completely opaque, we do a fast copy */ \
-  if (G_UNLIKELY (src_alpha == 1.0)) { \
-    GST_INFO ("Fast copy (alpha == 1.0)"); \
-    for (i = 0; i < src_height; i++) { \
-      memcpy (dest, src, bpp * src_width); \
-      src += src_stride; \
-      dest += dest_stride; \
-    } \
-    return; \
-  } \
-  \
-  for (i = 0; i < src_height; i++) { \
-    for (j = 0; j < src_width; j++) { \
-      BLEND_MODE (dest[r], dest[g], dest[b], src[r], src[g], src[b], \
-          b_alpha, R, G, B); \
-      dest[r] = R; \
-      dest[g] = G; \
-      dest[b] = B; \
-      \
-      src += bpp; \
-      dest += bpp; \
-    } \
-    src += src_add; \
-    dest += dest_add; \
-  } \
-} \
-\
-/* fill a buffer with a checkerboard pattern */ \
-void \
-gst_videomixer_fill_##name##_checker (guint8 * dest, gint width, gint height) \
-{ \
-  gint i, j; \
-  static const int tab[] = { 80, 160, 80, 160 }; \
-  gint dest_add = GST_ROUND_UP_4 (width * bpp) - width * bpp; \
-  \
-  for (i = 0; i < height; i++) { \
-    for (j = 0; j < width; j++) { \
-      dest[r] = tab[((i & 0x8) >> 3) + ((j & 0x8) >> 3)];       /* red */ \
-      dest[g] = tab[((i & 0x8) >> 3) + ((j & 0x8) >> 3)];       /* green */ \
-      dest[b] = tab[((i & 0x8) >> 3) + ((j & 0x8) >> 3)];       /* blue */ \
-      dest += bpp; \
-    } \
-    dest += dest_add; \
-  } \
-} \
-\
-void \
-gst_videomixer_fill_##name##_color (guint8 * dest, gint width, gint height, \
-    gint colY, gint colU, gint colV) \
-{ \
-  gint red, green, blue; \
-  gint i, j; \
-  gint dest_add = GST_ROUND_UP_4 (width * bpp) - width * bpp; \
-  \
-  red = CLAMP (1.164 * (colY - 16) + 1.596 * (colV - 128), 0, 255); \
-  green = \
-      CLAMP (1.164 * (colY - 16) - 0.813 * (colV - 128) - 0.391 * (colU - 128), \
-      0, 255); \
-  blue = CLAMP (1.164 * (colY - 16) + 2.018 * (colU - 128), 0, 255); \
-  \
-  for (i = 0; i < height; i++) { \
-    for (j = 0; j < width; j++) { \
-      dest[r] = red; \
-      dest[g] = green; \
-      dest[b] = blue; \
-      dest += bpp; \
-    } \
-    dest += dest_add; \
-  } \
-}
-
-CREATE_FUNCTIONS (rgb, 3, 0, 1, 2);
-CREATE_FUNCTIONS (bgr, 3, 2, 1, 0);
-CREATE_FUNCTIONS (xrgb, 4, 1, 2, 3);
-CREATE_FUNCTIONS (xbgr, 4, 3, 2, 1);
-CREATE_FUNCTIONS (rgbx, 4, 0, 1, 2);
-CREATE_FUNCTIONS (bgrx, 4, 2, 1, 0);
--- a/gst/videomixer/videomixer.c
+++ b/gst/videomixer/videomixer.c
@ -68,10 +68,6 @@
 #include <gst/controller/gstcontroller.h>
 #include <gst/video/video.h>

-#include <liboil/liboil.h>
-#include <liboil/liboilcpu.h>
-#include <liboil/liboilfunction.h>
-
 #ifdef HAVE_STDLIB_H
 #include <stdlib.h>
 #endif
@ -105,79 +101,6 @@ static gboolean gst_videomixer_sink_event (GstPad * pad, GstEvent * event);

 static void gst_videomixer_sort_pads (GstVideoMixer * mix);

-/*AYUV function definitions see file: blend_ayuv*/
-void gst_videomixer_blend_ayuv_ayuv (guint8 * src, gint xpos, gint ypos,
-    gint src_width, gint src_height, gdouble src_alpha,
-    guint8 * dest, gint dest_width, gint dest_height);
-void gst_videomixer_fill_ayuv_checker (guint8 * dest, gint width, gint height);
-void gst_videomixer_fill_ayuv_color (guint8 * dest, gint width, gint height,
-    gint colY, gint colU, gint colV);
-/*BGRA/ARGB function definitions see file: blend_bgra*/
-void gst_videomixer_blend_bgra_bgra (guint8 * src, gint xpos, gint ypos,
-    gint src_width, gint src_height, gdouble src_alpha,
-    guint8 * dest, gint dest_width, gint dest_height);
-void gst_videomixer_fill_bgra_checker (guint8 * dest, gint width, gint height);
-void gst_videomixer_fill_bgra_color (guint8 * dest, gint width, gint height,
-    gint colY, gint colU, gint colV);
-void gst_videomixer_blend_argb_argb (guint8 * src, gint xpos, gint ypos,
-    gint src_width, gint src_height, gdouble src_alpha,
-    guint8 * dest, gint dest_width, gint dest_height);
-void gst_videomixer_fill_argb_checker (guint8 * dest, gint width, gint height);
-void gst_videomixer_fill_argb_color (guint8 * dest, gint width, gint height,
-    gint colY, gint colU, gint colV);
-/* RGB function definitions see file: blend_rgb.c */
-void gst_videomixer_blend_rgb_rgb (guint8 * src, gint xpos, gint ypos,
-    gint src_width, gint src_height, gdouble src_alpha,
-    guint8 * dest, gint dest_width, gint dest_height);
-void gst_videomixer_fill_rgb_checker (guint8 * dest, gint width, gint height);
-void gst_videomixer_fill_rgb_color (guint8 * dest, gint width, gint height,
-    gint colY, gint colU, gint colV);
-void gst_videomixer_blend_bgr_bgr (guint8 * src, gint xpos, gint ypos,
-    gint src_width, gint src_height, gdouble src_alpha,
-    guint8 * dest, gint dest_width, gint dest_height);
-void gst_videomixer_fill_bgr_checker (guint8 * dest, gint width, gint height);
-void gst_videomixer_fill_bgr_color (guint8 * dest, gint width, gint height,
-    gint colY, gint colU, gint colV);
-void gst_videomixer_blend_xrgb_xrgb (guint8 * src, gint xpos, gint ypos,
-    gint src_width, gint src_height, gdouble src_alpha,
-    guint8 * dest, gint dest_width, gint dest_height);
-void gst_videomixer_fill_xrgb_checker (guint8 * dest, gint width, gint height);
-void gst_videomixer_fill_xrgb_color (guint8 * dest, gint width, gint height,
-    gint colY, gint colU, gint colV);
-void gst_videomixer_blend_xbgr_xbgr (guint8 * src, gint xpos, gint ypos,
-    gint src_width, gint src_height, gdouble src_alpha,
-    guint8 * dest, gint dest_width, gint dest_height);
-void gst_videomixer_fill_xbgr_checker (guint8 * dest, gint width, gint height);
-void gst_videomixer_fill_xbgr_color (guint8 * dest, gint width, gint height,
-    gint colY, gint colU, gint colV);
-void gst_videomixer_blend_rgbx_rgbx (guint8 * src, gint xpos, gint ypos,
-    gint src_width, gint src_height, gdouble src_alpha,
-    guint8 * dest, gint dest_width, gint dest_height);
-void gst_videomixer_fill_rgbx_checker (guint8 * dest, gint width, gint height);
-void gst_videomixer_fill_rgbx_color (guint8 * dest, gint width, gint height,
-    gint colY, gint colU, gint colV);
-void gst_videomixer_blend_bgrx_bgrx (guint8 * src, gint xpos, gint ypos,
-    gint src_width, gint src_height, gdouble src_alpha,
-    guint8 * dest, gint dest_width, gint dest_height);
-void gst_videomixer_fill_bgrx_checker (guint8 * dest, gint width, gint height);
-void gst_videomixer_fill_bgrx_color (guint8 * dest, gint width, gint height,
-    gint colY, gint colU, gint colV);
-/*I420 function definitions see file: blend_i420.c*/
-void gst_videomixer_blend_i420_i420 (guint8 * src, gint xpos, gint ypos,
-    gint src_width, gint src_height, gdouble src_alpha,
-    guint8 * dest, gint dest_width, gint dest_heighty);
-void gst_videomixer_fill_i420_checker (guint8 * dest, gint width, gint height);
-void gst_videomixer_fill_i420_color (guint8 * dest, gint width, gint height,
-    gint colY, gint colU, gint colV);
-
-#ifdef BUILD_X86_ASM
-void gst_videomixer_blend_ayuv_ayuv_mmx (guint8 * src, gint xpos, gint ypos,
-    gint src_width, gint src_height, gdouble src_alpha,
-    guint8 * dest, gint dest_width, gint dest_height);
-void gst_videomixer_fill_ayuv_color_mmx (guint8 * dest, gint width, gint height,
-    gint colY, gint colU, gint colV);
-#endif
-
 #define DEFAULT_PAD_ZORDER 0
 #define DEFAULT_PAD_XPOS   0
 #define DEFAULT_PAD_YPOS   0
@ -938,77 +861,64 @@ gst_videomixer_setcaps (GstPad * pad, GstCaps * caps)
    goto done;

  switch (mixer->fmt) {
-    case GST_VIDEO_FORMAT_AYUV:{
-#ifdef BUILD_X86_ASM
-      guint cpu_flags = oil_cpu_get_flags ();
-
-      mixer->blend =
-          (cpu_flags & OIL_IMPL_FLAG_MMX) ? gst_videomixer_blend_ayuv_ayuv_mmx :
-          gst_videomixer_blend_ayuv_ayuv;
-      mixer->fill_checker = gst_videomixer_fill_ayuv_checker;
-      mixer->fill_color =
-          (cpu_flags & OIL_IMPL_FLAG_MMX) ? gst_videomixer_fill_ayuv_color_mmx :
-          gst_videomixer_fill_ayuv_color;
-#else
-      mixer->blend = gst_videomixer_blend_ayuv_ayuv;
-      mixer->fill_checker = gst_videomixer_fill_ayuv_checker;
-      mixer->fill_color = gst_videomixer_fill_ayuv_color;
-#endif
-      ret = TRUE;
-      break;
-    }
-    case GST_VIDEO_FORMAT_I420:
-      mixer->blend = gst_videomixer_blend_i420_i420;
-      mixer->fill_checker = gst_videomixer_fill_i420_checker;
-      mixer->fill_color = gst_videomixer_fill_i420_color;
-      ret = TRUE;
-      break;
-    case GST_VIDEO_FORMAT_BGRA:
-      mixer->blend = gst_videomixer_blend_bgra_bgra;
-      mixer->fill_checker = gst_videomixer_fill_bgra_checker;
-      mixer->fill_color = gst_videomixer_fill_bgra_color;
+    case GST_VIDEO_FORMAT_AYUV:
+      mixer->blend = gst_video_mixer_blend_ayuv;
+      mixer->fill_checker = gst_video_mixer_fill_checker_ayuv;
+      mixer->fill_color = gst_video_mixer_fill_color_ayuv;
      ret = TRUE;
      break;
    case GST_VIDEO_FORMAT_ARGB:
-      mixer->blend = gst_videomixer_blend_argb_argb;
-      mixer->fill_checker = gst_videomixer_fill_argb_checker;
-      mixer->fill_color = gst_videomixer_fill_argb_color;
+      mixer->blend = gst_video_mixer_blend_argb;
+      mixer->fill_checker = gst_video_mixer_fill_checker_argb;
+      mixer->fill_color = gst_video_mixer_fill_color_argb;
+      ret = TRUE;
+      break;
+    case GST_VIDEO_FORMAT_BGRA:
+      mixer->blend = gst_video_mixer_blend_bgra;
+      mixer->fill_checker = gst_video_mixer_fill_checker_bgra;
+      mixer->fill_color = gst_video_mixer_fill_color_bgra;
+      ret = TRUE;
+      break;
+    case GST_VIDEO_FORMAT_I420:
+      mixer->blend = gst_video_mixer_blend_i420;
+      mixer->fill_checker = gst_video_mixer_fill_checker_i420;
+      mixer->fill_color = gst_video_mixer_fill_color_i420;
      ret = TRUE;
      break;
    case GST_VIDEO_FORMAT_RGB:
-      mixer->blend = gst_videomixer_blend_rgb_rgb;
-      mixer->fill_checker = gst_videomixer_fill_rgb_checker;
-      mixer->fill_color = gst_videomixer_fill_rgb_color;
+      mixer->blend = gst_video_mixer_blend_rgb;
+      mixer->fill_checker = gst_video_mixer_fill_checker_rgb;
+      mixer->fill_color = gst_video_mixer_fill_color_rgb;
      ret = TRUE;
      break;
    case GST_VIDEO_FORMAT_BGR:
-      mixer->blend = gst_videomixer_blend_bgr_bgr;
-      mixer->fill_checker = gst_videomixer_fill_bgr_checker;
-      mixer->fill_color = gst_videomixer_fill_bgr_color;
+      mixer->blend = gst_video_mixer_blend_bgr;
+      mixer->fill_checker = gst_video_mixer_fill_checker_bgr;
+      mixer->fill_color = gst_video_mixer_fill_color_bgr;
      ret = TRUE;
      break;
    case GST_VIDEO_FORMAT_xRGB:
-      mixer->blend = gst_videomixer_blend_xrgb_xrgb;
-      mixer->fill_checker = gst_videomixer_fill_xrgb_checker;
-      mixer->fill_color = gst_videomixer_fill_xrgb_color;
+      mixer->blend = gst_video_mixer_blend_xrgb;
+      mixer->fill_checker = gst_video_mixer_fill_checker_xrgb;
+      mixer->fill_color = gst_video_mixer_fill_color_xrgb;
      ret = TRUE;
      break;
    case GST_VIDEO_FORMAT_xBGR:
-      mixer->blend = gst_videomixer_blend_xbgr_xbgr;
-      mixer->fill_checker = gst_videomixer_fill_xbgr_checker;
-      mixer->fill_color = gst_videomixer_fill_xbgr_color;
+      mixer->blend = gst_video_mixer_blend_xbgr;
+      mixer->fill_checker = gst_video_mixer_fill_checker_xbgr;
+      mixer->fill_color = gst_video_mixer_fill_color_xbgr;
      ret = TRUE;
      break;
    case GST_VIDEO_FORMAT_RGBx:
-      mixer->blend = gst_videomixer_blend_rgbx_rgbx;
-      mixer->fill_checker = gst_videomixer_fill_rgbx_checker;
-      mixer->fill_color = gst_videomixer_fill_rgbx_color;
+      mixer->blend = gst_video_mixer_blend_rgbx;
+      mixer->fill_checker = gst_video_mixer_fill_checker_rgbx;
+      mixer->fill_color = gst_video_mixer_fill_color_rgbx;
      ret = TRUE;
      break;
    case GST_VIDEO_FORMAT_BGRx:
-      mixer->blend = gst_videomixer_blend_bgrx_bgrx;
-      mixer->fill_checker = gst_videomixer_fill_bgrx_checker;
-      mixer->fill_color = gst_videomixer_fill_bgrx_color;
+      mixer->blend = gst_video_mixer_blend_bgrx;
+      mixer->fill_checker = gst_video_mixer_fill_checker_bgrx;
+      mixer->fill_color = gst_video_mixer_fill_color_bgrx;
      ret = TRUE;
      break;
    default:
@ -1649,7 +1559,7 @@ plugin_init (GstPlugin * plugin)
  GST_DEBUG_CATEGORY_INIT (gst_videomixer_debug, "videomixer", 0,
      "video mixer");

-  oil_init ();
+  gst_video_mixer_init_blend ();

  return gst_element_register (plugin, "videomixer", GST_RANK_PRIMARY,
      GST_TYPE_VIDEO_MIXER);
--- a/gst/videomixer/videomixer.h
+++ b/gst/videomixer/videomixer.h
@ -23,6 +23,7 @@
 #include <gst/gst.h>
 #include <gst/video/video.h>
 #include "videomixerpad.h"
+#include "blend.h"

 GST_DEBUG_CATEGORY_EXTERN (gst_videomixer_debug);
 #define GST_CAT_DEFAULT gst_videomixer_debug
@ -105,11 +106,9 @@ struct _GstVideoMixer
  guint64	segment_position;
  gdouble	segment_rate;

-  void (*blend) (guint8 * src, gint xpos, gint ypos, gint src_width, gint src_height, gdouble src_alpha,
-                 guint8 * dest, gint dest_width, gint dest_height);
-  void (*fill_checker) (guint8 * dest, gint width, gint height);
-
-  void (*fill_color) (guint8 * dest, gint width, gint height, gint colY, gint colU, gint colV);
+  BlendFunction blend;
+  FillCheckerFunction fill_checker;
+  FillColorFunction fill_color;
 };

 struct _GstVideoMixerClass