/* GStreamer
 * Copyright (C) 1999 Erik Walthinsen <omega@cse.ogi.edu>
 * Copyright (C) 2006 Tim-Philipp Müller <tim centricular net>
 * Copyright (C) 2010 Sebastian Dröge <sebastian.droege@collabora.co.uk>
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Library General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Library General Public License for more details.
 *
 * You should have received a copy of the GNU Library General Public
 * License along with this library; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 02111-1307, USA.
 */
/**
 * SECTION:element-videobox
 * @see_also: #GstVideoCrop
 *
 * This plugin crops or enlarges the image. It takes 4 values as input, a
 * top, bottom, left and right offset. Positive values will crop that much
 * pixels from the respective border of the image, negative values will add
 * that much pixels. When pixels are added, you can specify their color. 
 * Some predefined colors are usable with an enum property.
 * 
 * The plugin is alpha channel aware and will try to negotiate with a format
 * that supports alpha channels first. When alpha channel is active two
 * other properties, alpha and border_alpha can be used to set the alpha
 * values of the inner picture and the border respectively. an alpha value of
 * 0.0 means total transparency, 1.0 is opaque.
 * 
 * The videobox plugin has many uses such as doing a mosaic of pictures, 
 * letterboxing video, cutting out pieces of video, picture in picture, etc..
 *
 * Setting autocrop to true changes the behavior of the plugin so that
 * caps determine crop properties rather than the other way around: given
 * input and output dimensions, the crop values are selected so that the
 * smaller frame is effectively centered in the larger frame.  This
 * involves either cropping or padding.
 * 
 * If you use autocrop there is little point in setting the other
 * properties manually because they will be overriden if the caps change,
 * but nothing stops you from doing so.
 * 
 * Sample pipeline:
 * |[
 * gst-launch-1.0 videotestsrc ! videobox autocrop=true ! \
 *   "video/x-raw, width=600, height=400" ! videoconvert ! ximagesink
 * ]|
 */

#ifdef HAVE_CONFIG_H
#include "config.h"
#endif

#include "gstvideobox.h"
#include "gstvideoboxorc.h"

#include <math.h>
#include <string.h>

GST_DEBUG_CATEGORY_STATIC (videobox_debug);
#define GST_CAT_DEFAULT videobox_debug

/* From videotestsrc.c */
static const guint8 yuv_sdtv_colors_Y[VIDEO_BOX_FILL_LAST] =
    { 16, 145, 41, 81, 210, 235 };
static const guint8 yuv_sdtv_colors_U[VIDEO_BOX_FILL_LAST] =
    { 128, 54, 240, 90, 16, 128 };
static const guint8 yuv_sdtv_colors_V[VIDEO_BOX_FILL_LAST] =
    { 128, 34, 110, 240, 146, 128 };

static const guint8 yuv_hdtv_colors_Y[VIDEO_BOX_FILL_LAST] =
    { 16, 173, 32, 63, 219, 235 };
static const guint8 yuv_hdtv_colors_U[VIDEO_BOX_FILL_LAST] =
    { 128, 42, 240, 102, 16, 128 };
static const guint8 yuv_hdtv_colors_V[VIDEO_BOX_FILL_LAST] =
    { 128, 26, 118, 240, 138, 128 };

static const guint8 rgb_colors_R[VIDEO_BOX_FILL_LAST] =
    { 0, 0, 0, 255, 255, 255 };
static const guint8 rgb_colors_G[VIDEO_BOX_FILL_LAST] =
    { 0, 255, 0, 0, 255, 255 };
static const guint8 rgb_colors_B[VIDEO_BOX_FILL_LAST] =
    { 0, 0, 255, 0, 0, 255 };

/* Generated by -bad/ext/cog/generate_tables */
static const int cog_ycbcr_to_rgb_matrix_8bit_hdtv[] = {
  298, 0, 459, -63514,
  298, -55, -136, 19681,
  298, 541, 0, -73988,
};

static const int cog_ycbcr_to_rgb_matrix_8bit_sdtv[] = {
  298, 0, 409, -57068,
  298, -100, -208, 34707,
  298, 516, 0, -70870,
};

static const gint cog_rgb_to_ycbcr_matrix_8bit_hdtv[] = {
  47, 157, 16, 4096,
  -26, -87, 112, 32768,
  112, -102, -10, 32768,
};

static const gint cog_rgb_to_ycbcr_matrix_8bit_sdtv[] = {
  66, 129, 25, 4096,
  -38, -74, 112, 32768,
  112, -94, -18, 32768,
};

static const gint cog_ycbcr_sdtv_to_ycbcr_hdtv_matrix_8bit[] = {
  256, -30, -53, 10600,
  0, 261, 29, -4367,
  0, 19, 262, -3289,
};

static const gint cog_ycbcr_hdtv_to_ycbcr_sdtv_matrix_8bit[] = {
  256, 25, 49, -9536,
  0, 253, -28, 3958,
  0, -19, 252, 2918,
};

static const gint cog_identity_matrix_8bit[] = {
  256, 0, 0, 0,
  0, 256, 0, 0,
  0, 0, 256, 0,
};

#define APPLY_MATRIX(m,o,v1,v2,v3) ((m[o*4] * v1 + m[o*4+1] * v2 + m[o*4+2] * v3 + m[o*4+3]) >> 8)

static void
fill_ayuv (GstVideoBoxFill fill_type, guint b_alpha,
    GstVideoFrame * frame, gboolean sdtv)
{
  guint32 empty_pixel;
  guint8 *dest;
  gint width, height;
  gint stride;

  width = GST_VIDEO_FRAME_WIDTH (frame);
  height = GST_VIDEO_FRAME_HEIGHT (frame);

  b_alpha = CLAMP (b_alpha, 0, 255);

  if (sdtv)
    empty_pixel = GUINT32_FROM_BE ((b_alpha << 24) |
        (yuv_sdtv_colors_Y[fill_type] << 16) |
        (yuv_sdtv_colors_U[fill_type] << 8) | yuv_sdtv_colors_V[fill_type]);
  else
    empty_pixel = GUINT32_FROM_BE ((b_alpha << 24) |
        (yuv_hdtv_colors_Y[fill_type] << 16) |
        (yuv_hdtv_colors_U[fill_type] << 8) | yuv_hdtv_colors_V[fill_type]);

  dest = GST_VIDEO_FRAME_PLANE_DATA (frame, 0);
  stride = GST_VIDEO_FRAME_PLANE_STRIDE (frame, 0);

  if (G_LIKELY (stride == 4 * width))
    video_box_orc_splat_u32 ((guint32 *) dest, empty_pixel, width * height);
  else if (height) {
    for (; height; --height) {
      video_box_orc_splat_u32 ((guint32 *) dest, empty_pixel, width);
      dest += stride;
    }
  }
}

static void
copy_ayuv_ayuv (guint i_alpha, GstVideoFrame * dest_frame,
    gboolean dest_sdtv, gint dest_x, gint dest_y, GstVideoFrame * src_frame,
    gboolean src_sdtv, gint src_x, gint src_y, gint w, gint h)
{
  gint i, j;
  gint src_stride;
  gint dest_stride;
  guint8 *dest, *src;

  src_stride = GST_VIDEO_FRAME_PLANE_STRIDE (src_frame, 0);
  dest_stride = GST_VIDEO_FRAME_PLANE_STRIDE (dest_frame, 0);

  src = GST_VIDEO_FRAME_PLANE_DATA (src_frame, 0);
  dest = GST_VIDEO_FRAME_PLANE_DATA (dest_frame, 0);

  dest = dest + dest_y * dest_stride + dest_x * 4;
  src = src + src_y * src_stride + src_x * 4;

  w *= 4;

  if (dest_sdtv != src_sdtv) {
    gint matrix[12];
    gint y, u, v;

    memcpy (matrix,
        dest_sdtv ? cog_ycbcr_hdtv_to_ycbcr_sdtv_matrix_8bit :
        cog_ycbcr_sdtv_to_ycbcr_hdtv_matrix_8bit, 12 * sizeof (gint));

    for (i = 0; i < h; i++) {
      for (j = 0; j < w; j += 4) {
        /* ORC FIXME */
        dest[j] = (src[j] * i_alpha) >> 8;
        y = src[j + 1];
        u = src[j + 2];
        v = src[j + 3];
        dest[j + 1] = APPLY_MATRIX (matrix, 0, y, u, v);
        dest[j + 2] = APPLY_MATRIX (matrix, 1, y, u, v);
        dest[j + 3] = APPLY_MATRIX (matrix, 2, y, u, v);
      }
      dest += dest_stride;
      src += src_stride;
    }
  } else {
    for (i = 0; i < h; i++) {
      for (j = 0; j < w; j += 4) {
        /* ORC FIXME */
        dest[j] = (src[j] * i_alpha) >> 8;
        dest[j + 1] = src[j + 1];
        dest[j + 2] = src[j + 2];
        dest[j + 3] = src[j + 3];
      }
      dest += dest_stride;
      src += src_stride;
    }
  }
}

static void
copy_ayuv_i420 (guint i_alpha, GstVideoFrame * dest_frame,
    gboolean dest_sdtv, gint dest_x, gint dest_y, GstVideoFrame * src_frame,
    gboolean src_sdtv, gint src_x, gint src_y, gint w, gint h)
{
  gint i, j;
  guint8 *destY, *destY2, *destU, *destV;
  gint dest_strideY, dest_strideU, dest_strideV;
  const guint8 *src2;
  gint src_stride;
  gint y_idx, uv_idx;
  gint y1, y2, y3, y4;
  gint u1, u2, u3, u4;
  gint v1, v2, v3, v4;
  gint matrix[12];
  guint8 *src;
  gint dest_height, src_height, dest_width;

  dest_height = GST_VIDEO_FRAME_HEIGHT (dest_frame);
  dest_width = GST_VIDEO_FRAME_WIDTH (dest_frame);
  src_height = GST_VIDEO_FRAME_HEIGHT (src_frame);

  dest_strideY = GST_VIDEO_FRAME_COMP_STRIDE (dest_frame, 0);
  dest_strideU = GST_VIDEO_FRAME_COMP_STRIDE (dest_frame, 1);
  dest_strideV = GST_VIDEO_FRAME_COMP_STRIDE (dest_frame, 2);

  src_stride = GST_VIDEO_FRAME_PLANE_STRIDE (src_frame, 0);

  destY = GST_VIDEO_FRAME_COMP_DATA (dest_frame, 0);
  destU = GST_VIDEO_FRAME_COMP_DATA (dest_frame, 1);
  destV = GST_VIDEO_FRAME_COMP_DATA (dest_frame, 2);

  destY = destY + dest_y * dest_strideY + dest_x;
  destY2 = (dest_y < dest_height) ? destY + dest_strideY : destY;
  destU = destU + (dest_y / 2) * dest_strideU + dest_x / 2;
  destV = destV + (dest_y / 2) * dest_strideV + dest_x / 2;

  src = GST_VIDEO_FRAME_PLANE_DATA (src_frame, 0);
  src = src + src_y * src_stride + src_x * 4;
  src2 = (src_y < src_height) ? src + src_stride : src;

  h = dest_y + h;
  w = dest_x + w;

  if (src_sdtv != dest_sdtv)
    memcpy (matrix,
        dest_sdtv ? cog_ycbcr_hdtv_to_ycbcr_sdtv_matrix_8bit :
        cog_ycbcr_sdtv_to_ycbcr_hdtv_matrix_8bit, 12 * sizeof (gint));
  else
    memcpy (matrix, cog_identity_matrix_8bit, 12 * sizeof (gint));

  /* 1. Handle the first destination scanline specially if it
   *    doesn't start at the macro pixel boundary, i.e. blend
   *    with the background! */
  if (dest_y % 2 == 1) {
    /* 1.1. Handle the first destination pixel if it doesn't
     *      start at the macro pixel boundary, i.e. blend with
     *      the background! */
    if (dest_x % 2 == 1) {
      y1 = src[4 * 0 + 1];
      u1 = src[4 * 0 + 2];
      v1 = src[4 * 0 + 3];

      destY[0] = CLAMP (APPLY_MATRIX (matrix, 0, y1, u1, v1), 0, 255);
      destU[0] =
          CLAMP ((3 * destU[0] + APPLY_MATRIX (matrix, 1, y1, u1, v1)) / 4, 0,
          255);
      destV[0] =
          CLAMP ((3 * destV[0] + APPLY_MATRIX (matrix, 2, y1, u1, v1)) / 4, 0,
          255);

      j = dest_x + 1;
      y_idx = uv_idx = 1;
    } else {
      j = dest_x;
      y_idx = uv_idx = 0;
    }

    /* 1.2. Copy all macro pixels from the source to the destination
     *      but blend with the background because we're only filling
     *      the lower part of the macro pixels. */
    for (; j < w - 1; j += 2) {
      y1 = src[4 * y_idx + 1];
      y2 = src[4 * y_idx + 4 + 1];

      u1 = src[4 * y_idx + 2];
      u2 = src[4 * y_idx + 4 + 2];

      v1 = src[4 * y_idx + 3];
      v2 = src[4 * y_idx + 4 + 3];

      destY[y_idx] = CLAMP (APPLY_MATRIX (matrix, 0, y1, u1, v1), 0, 255);
      destY[y_idx + 1] = CLAMP (APPLY_MATRIX (matrix, 0, y2, u2, v2), 0, 255);
      destU[uv_idx] = CLAMP (
          (2 * destU[uv_idx] + APPLY_MATRIX (matrix, 1, y1, u1,
                  v1) + APPLY_MATRIX (matrix, 1, y2, u2, v2)) / 4, 0, 255);
      destV[uv_idx] = CLAMP (
          (2 * destV[uv_idx] + APPLY_MATRIX (matrix, 2, y1, u1,
                  v1) + APPLY_MATRIX (matrix, 2, y2, u2, v2)) / 4, 0, 255);

      y_idx += 2;
      uv_idx++;
    }

    /* 1.3. Now copy the last pixel if one exists and blend it
     *      with the background because we only fill part of
     *      the macro pixel. In case this is the last pixel of
     *      the destination we will a larger part. */
    if (j == w - 1 && j == dest_width - 1) {
      y1 = src[4 * y_idx + 1];
      u1 = src[4 * y_idx + 2];
      v1 = src[4 * y_idx + 3];

      destY[y_idx] = CLAMP (APPLY_MATRIX (matrix, 0, y1, u1, v1), 0, 255);
      destU[uv_idx] = CLAMP (
          (destU[uv_idx] + APPLY_MATRIX (matrix, 1, y1, u1, v1)) / 2, 0, 255);
      destV[uv_idx] = CLAMP (
          (destV[uv_idx] + APPLY_MATRIX (matrix, 2, y1, u1, v1)) / 2, 0, 255);
    } else if (j == w - 1) {
      y1 = src[4 * y_idx + 1];
      u1 = src[4 * y_idx + 2];
      v1 = src[4 * y_idx + 3];

      destY[y_idx] = CLAMP (APPLY_MATRIX (matrix, 0, y1, u1, v1), 0, 255);
      destU[uv_idx] = CLAMP (
          (3 * destU[uv_idx] + APPLY_MATRIX (matrix, 1, y1, u1, v1)) / 4, 0,
          255);
      destV[uv_idx] =
          CLAMP ((3 * destV[uv_idx] + APPLY_MATRIX (matrix, 2, y1, u1, v1)) / 4,
          0, 255);
    }

    destY += dest_strideY;
    destY2 += dest_strideY;
    destU += dest_strideU;
    destV += dest_strideV;
    src += src_stride;
    src2 += src_stride;
    i = dest_y + 1;
  } else {
    i = dest_y;
  }

  /* 2. Copy all macro pixel scanlines, the destination scanline
   *    now starts at macro pixel boundary. */
  for (; i < h - 1; i += 2) {
    /* 2.1. Handle the first destination pixel if it doesn't
     *      start at the macro pixel boundary, i.e. blend with
     *      the background! */
    if (dest_x % 2 == 1) {
      y1 = src[4 * 0 + 1];
      y2 = src2[4 * 0 + 1];
      u1 = src[4 * 0 + 2];
      u2 = src2[4 * 0 + 2];
      v1 = src[4 * 0 + 3];
      v2 = src2[4 * 0 + 3];

      destY[0] = CLAMP (APPLY_MATRIX (matrix, 0, y1, u1, v1), 0, 255);
      destY2[0] = CLAMP (APPLY_MATRIX (matrix, 0, y2, u2, v2), 0, 255);
      destU[0] = CLAMP (
          (2 * destU[0] + APPLY_MATRIX (matrix, 1, y1, u1,
                  v1) + APPLY_MATRIX (matrix, 1, y2, u2, v2)) / 4, 0, 255);
      destV[0] = CLAMP (
          (2 * destV[0] + APPLY_MATRIX (matrix, 2, y1, u1,
                  v1) + APPLY_MATRIX (matrix, 2, y2, u2, v2)) / 4, 0, 255);
      j = dest_x + 1;
      y_idx = uv_idx = 1;
    } else {
      j = dest_x;
      y_idx = uv_idx = 0;
    }

    /* 2.2. Copy all macro pixels from the source to the destination.
     *      All pixels now start at macro pixel boundary, i.e. no
     *      blending with the background is necessary. */
    for (; j < w - 1; j += 2) {
      y1 = src[4 * y_idx + 1];
      y2 = src[4 * y_idx + 4 + 1];
      y3 = src2[4 * y_idx + 1];
      y4 = src2[4 * y_idx + 4 + 1];

      u1 = src[4 * y_idx + 2];
      u2 = src[4 * y_idx + 4 + 2];
      u3 = src2[4 * y_idx + 2];
      u4 = src2[4 * y_idx + 4 + 2];

      v1 = src[4 * y_idx + 3];
      v2 = src[4 * y_idx + 4 + 3];
      v3 = src2[4 * y_idx + 3];
      v4 = src2[4 * y_idx + 4 + 3];

      destY[y_idx] = CLAMP (APPLY_MATRIX (matrix, 0, y1, u1, v1), 0, 255);
      destY[y_idx + 1] = CLAMP (APPLY_MATRIX (matrix, 0, y2, u2, v2), 0, 255);
      destY2[y_idx] = CLAMP (APPLY_MATRIX (matrix, 0, y3, u3, v3), 0, 255);
      destY2[y_idx + 1] = CLAMP (APPLY_MATRIX (matrix, 0, y4, u4, v4), 0, 255);

      destU[uv_idx] = CLAMP (
          (APPLY_MATRIX (matrix, 1, y1, u1, v1) + APPLY_MATRIX (matrix, 1, y2,
                  u2, v2) + APPLY_MATRIX (matrix, 1, y3, u3,
                  v3) + APPLY_MATRIX (matrix, 1, y4, u4, v4)) / 4, 0, 255);
      destV[uv_idx] = CLAMP (
          (APPLY_MATRIX (matrix, 2, y1, u1, v1) + APPLY_MATRIX (matrix, 2, y2,
                  u2, v2) + APPLY_MATRIX (matrix, 2, y3, u3,
                  v3) + APPLY_MATRIX (matrix, 2, y4, u4, v4)) / 4, 0, 255);

      y_idx += 2;
      uv_idx++;
    }

    /* 2.3. Now copy the last pixel if one exists and blend it
     *      with the background because we only fill part of
     *      the macro pixel. In case this is the last pixel of
     *      the destination we will a larger part. */
    if (j == w - 1 && j == dest_width - 1) {
      y1 = src[4 * y_idx + 1];
      y2 = src2[4 * y_idx + 1];

      u1 = src[4 * y_idx + 2];
      u2 = src2[4 * y_idx + 2];

      v1 = src[4 * y_idx + 3];
      v2 = src2[4 * y_idx + 3];

      destY[y_idx] = CLAMP (APPLY_MATRIX (matrix, 0, y1, u1, v1), 0, 255);
      destY2[y_idx] = CLAMP (APPLY_MATRIX (matrix, 0, y2, u2, v2), 0, 255);
      destU[uv_idx] = CLAMP (
          (APPLY_MATRIX (matrix, 1, y1, u1, v1) + APPLY_MATRIX (matrix, 2, y2,
                  u2, v2)) / 2, 0, 255);
      destV[uv_idx] = CLAMP (
          (APPLY_MATRIX (matrix, 1, y1, u1, v1) + APPLY_MATRIX (matrix, 2, y2,
                  u2, v2)) / 2, 0, 255);
    } else if (j == w - 1) {
      y1 = src[4 * y_idx + 1];
      y2 = src2[4 * y_idx + 1];

      u1 = src[4 * y_idx + 2];
      u2 = src2[4 * y_idx + 2];

      v1 = src[4 * y_idx + 3];
      v2 = src2[4 * y_idx + 3];

      destY[y_idx] = CLAMP (APPLY_MATRIX (matrix, 0, y1, u1, v1), 0, 255);
      destY2[y_idx] = CLAMP (APPLY_MATRIX (matrix, 0, y2, u2, v2), 0, 255);
      destU[uv_idx] = CLAMP (
          (2 * destU[uv_idx] + APPLY_MATRIX (matrix, 1, y1, u1,
                  v1) + APPLY_MATRIX (matrix, 2, y2, u2, v2)) / 4, 0, 255);
      destV[uv_idx] = CLAMP (
          (2 * destV[uv_idx] + APPLY_MATRIX (matrix, 1, y1, u1,
                  v1) + APPLY_MATRIX (matrix, 2, y2, u2, v2)) / 4, 0, 255);
    }

    destY += 2 * dest_strideY;
    destY2 += 2 * dest_strideY;
    destU += dest_strideU;
    destV += dest_strideV;
    src += 2 * src_stride;
    src2 += 2 * src_stride;
  }

  /* 3. Handle the last scanline if one exists. This again
   *    doesn't start at macro pixel boundary but should
   *    only fill the upper part of the macro pixels. */
  if (i == h - 1 && i == dest_height - 1) {
    /* 3.1. Handle the first destination pixel if it doesn't
     *      start at the macro pixel boundary, i.e. blend with
     *      the background! */
    if (dest_x % 2 == 1) {
      y1 = src[4 * 0 + 1];
      u1 = src[4 * 0 + 2];
      v1 = src[4 * 0 + 3];

      destY[0] = CLAMP (APPLY_MATRIX (matrix, 0, y1, u1, v1), 0, 255);
      destU[0] =
          CLAMP ((destU[0] + APPLY_MATRIX (matrix, 1, y1, u1, v1)) / 2, 0, 255);
      destV[0] =
          CLAMP ((destV[0] + APPLY_MATRIX (matrix, 2, y1, u1, v1)) / 2, 0, 255);

      j = dest_x + 1;
      y_idx = uv_idx = 1;
    } else {
      j = dest_x;
      y_idx = uv_idx = 0;
    }

    /* 3.2. Copy all macro pixels from the source to the destination
     *      but blend with the background because we're only filling
     *      the upper part of the macro pixels. */
    for (; j < w - 1; j += 2) {
      y1 = src[4 * y_idx + 1];
      y2 = src[4 * y_idx + 4 + 1];

      u1 = src[4 * y_idx + 2];
      u2 = src[4 * y_idx + 4 + 2];

      v1 = src[4 * y_idx + 3];
      v2 = src[4 * y_idx + 4 + 3];

      destY[y_idx] = CLAMP (APPLY_MATRIX (matrix, 0, y1, u1, v1), 0, 255);
      destY[y_idx + 1] = CLAMP (APPLY_MATRIX (matrix, 0, y2, u2, v2), 0, 255);

      destU[uv_idx] = CLAMP (
          (2 * destU[uv_idx] + APPLY_MATRIX (matrix, 1, y1, u1,
                  v1) + APPLY_MATRIX (matrix, 1, y2, u2, v2)) / 4, 0, 255);
      destV[uv_idx] = CLAMP (
          (2 * destV[uv_idx] + APPLY_MATRIX (matrix, 2, y1, u1,
                  v1) + APPLY_MATRIX (matrix, 2, y2, u2, v2)) / 4, 0, 255);

      y_idx += 2;
      uv_idx++;
    }

    /* 3.3. Now copy the last pixel if one exists and blend it
     *      with the background because we only fill part of
     *      the macro pixel. In case this is the last pixel of
     *      the destination we will a larger part. */
    if (j == w - 1 && j == dest_width - 1) {
      y1 = src[4 * y_idx + 1];
      u1 = src[4 * y_idx + 2];
      v1 = src[4 * y_idx + 3];

      destY[y_idx] = CLAMP (APPLY_MATRIX (matrix, 0, y1, u1, v1), 0, 255);
      destU[uv_idx] = CLAMP (
          (destU[uv_idx] + APPLY_MATRIX (matrix, 1, y1, u1, v1)) / 2, 0, 255);
      destV[uv_idx] = CLAMP (
          (destV[uv_idx] + APPLY_MATRIX (matrix, 1, y1, u1, v1)) / 2, 0, 255);
    } else if (j == w - 1) {
      y1 = src[4 * y_idx + 1];
      u1 = src[4 * y_idx + 2];
      v1 = src[4 * y_idx + 3];

      destY[y_idx] = CLAMP (APPLY_MATRIX (matrix, 0, y1, u1, v1), 0, 255);
      destU[uv_idx] = CLAMP (
          (3 * destU[uv_idx] + APPLY_MATRIX (matrix, 1, y1, u1, v1)) / 4, 0,
          255);
      destV[uv_idx] =
          CLAMP ((3 * destV[uv_idx] + APPLY_MATRIX (matrix, 1, y1, u1, v1)) / 4,
          0, 255);
    }
  } else if (i == h - 1) {
    /* 3.1. Handle the first destination pixel if it doesn't
     *      start at the macro pixel boundary, i.e. blend with
     *      the background! */
    if (dest_x % 2 == 1) {
      y1 = src[4 * 0 + 1];
      u1 = src[4 * 0 + 2];
      v1 = src[4 * 0 + 3];

      destY[0] = CLAMP (APPLY_MATRIX (matrix, 0, y1, u1, v1), 0, 255);
      destU[0] =
          CLAMP ((3 * destU[0] + APPLY_MATRIX (matrix, 1, y1, u1, v1)) / 4, 0,
          255);
      destV[0] =
          CLAMP ((3 * destV[0] + APPLY_MATRIX (matrix, 2, y1, u1, v1)) / 4, 0,
          255);

      j = dest_x + 1;
      y_idx = uv_idx = 1;
    } else {
      j = dest_x;
      y_idx = uv_idx = 0;
    }

    /* 3.2. Copy all macro pixels from the source to the destination
     *      but blend with the background because we're only filling
     *      the upper part of the macro pixels. */
    for (; j < w - 1; j += 2) {
      y1 = src[4 * y_idx + 1];
      y2 = src[4 * y_idx + 4 + 1];

      u1 = src[4 * y_idx + 2];
      u2 = src[4 * y_idx + 4 + 2];

      v1 = src[4 * y_idx + 3];
      v2 = src[4 * y_idx + 4 + 3];

      destY[y_idx] = CLAMP (APPLY_MATRIX (matrix, 0, y1, u1, v1), 0, 255);
      destY[y_idx + 1] = CLAMP (APPLY_MATRIX (matrix, 0, y2, u2, v2), 0, 255);

      destU[uv_idx] = CLAMP (
          (2 * destU[uv_idx] + APPLY_MATRIX (matrix, 1, y1, u1,
                  v1) + APPLY_MATRIX (matrix, 1, y2, u2, v2)) / 4, 0, 255);
      destV[uv_idx] = CLAMP (
          (2 * destV[uv_idx] + APPLY_MATRIX (matrix, 2, y1, u1,
                  v1) + APPLY_MATRIX (matrix, 2, y2, u2, v2)) / 4, 0, 255);

      y_idx += 2;
      uv_idx++;
    }

    /* 3.3. Now copy the last pixel if one exists and blend it
     *      with the background because we only fill part of
     *      the macro pixel. In case this is the last pixel of
     *      the destination we will a larger part. */
    if (j == w - 1 && j == dest_width - 1) {
      y1 = src[4 * y_idx + 1];
      u1 = src[4 * y_idx + 2];
      v1 = src[4 * y_idx + 3];

      destY[y_idx] = CLAMP (APPLY_MATRIX (matrix, 0, y1, u1, v1), 0, 255);
      destU[uv_idx] = CLAMP (
          (destU[uv_idx] + APPLY_MATRIX (matrix, 1, y1, u1, v1)) / 2, 0, 255);
      destV[uv_idx] = CLAMP (
          (destV[uv_idx] + APPLY_MATRIX (matrix, 1, y1, u1, v1)) / 2, 0, 255);
    } else if (j == w - 1) {
      y1 = src[4 * y_idx + 1];
      u1 = src[4 * y_idx + 2];
      v1 = src[4 * y_idx + 3];

      destY[y_idx] = CLAMP (APPLY_MATRIX (matrix, 0, y1, u1, v1), 0, 255);
      destU[uv_idx] = CLAMP (
          (3 * destU[uv_idx] + APPLY_MATRIX (matrix, 1, y1, u1, v1)) / 4, 0,
          255);
      destV[uv_idx] =
          CLAMP ((3 * destV[uv_idx] + APPLY_MATRIX (matrix, 1, y1, u1, v1)) / 4,
          0, 255);
    }
  }
}

static void
fill_planar_yuv (GstVideoBoxFill fill_type, guint b_alpha,
    GstVideoFrame * frame, gboolean sdtv)
{
  guint8 empty_pixel[3];
  guint8 *destY, *destU, *destV;
  gint strideY, strideU, strideV;
  gint heightY, heightU, heightV;
  gint widthY, widthU, widthV;

  if (sdtv) {
    empty_pixel[0] = yuv_sdtv_colors_Y[fill_type];
    empty_pixel[1] = yuv_sdtv_colors_U[fill_type];
    empty_pixel[2] = yuv_sdtv_colors_V[fill_type];
  } else {
    empty_pixel[0] = yuv_hdtv_colors_Y[fill_type];
    empty_pixel[1] = yuv_hdtv_colors_U[fill_type];
    empty_pixel[2] = yuv_hdtv_colors_V[fill_type];
  }

  strideY = GST_VIDEO_FRAME_COMP_STRIDE (frame, 0);
  strideU = GST_VIDEO_FRAME_COMP_STRIDE (frame, 1);
  strideV = GST_VIDEO_FRAME_COMP_STRIDE (frame, 2);

  destY = GST_VIDEO_FRAME_COMP_DATA (frame, 0);
  destU = GST_VIDEO_FRAME_COMP_DATA (frame, 1);
  destV = GST_VIDEO_FRAME_COMP_DATA (frame, 2);

  widthY = GST_VIDEO_FRAME_COMP_WIDTH (frame, 0);
  widthU = GST_VIDEO_FRAME_COMP_WIDTH (frame, 1);
  widthV = GST_VIDEO_FRAME_COMP_WIDTH (frame, 2);

  heightY = GST_VIDEO_FRAME_COMP_HEIGHT (frame, 0);
  heightU = GST_VIDEO_FRAME_COMP_HEIGHT (frame, 1);
  heightV = GST_VIDEO_FRAME_COMP_HEIGHT (frame, 2);

  if (strideY == widthY) {
    memset (destY, empty_pixel[0], strideY * heightY);
  } else if (heightY) {
    for (; heightY; --heightY) {
      memset (destY, empty_pixel[0], widthY);
      destY += strideY;
    }
  }
  if (strideU == widthU) {
    memset (destU, empty_pixel[1], strideU * heightU);
  } else if (heightU) {
    for (; heightU; --heightU) {
      memset (destU, empty_pixel[0], widthY);
      destU += strideU;
    }
  }
  if (strideV == widthV) {
    memset (destV, empty_pixel[2], strideV * heightV);
  } else if (heightV) {
    for (; heightV; --heightV) {
      memset (destV, empty_pixel[0], widthV);
      destV += strideV;
    }
  }
}

static void
copy_y444_y444 (guint i_alpha, GstVideoFrame * dest,
    gboolean dest_sdtv, gint dest_x, gint dest_y, GstVideoFrame * src,
    gboolean src_sdtv, gint src_x, gint src_y, gint w, gint h)
{
  gint i, j;
  guint8 *destY, *destU, *destV;
  const guint8 *srcY, *srcU, *srcV;
  gint dest_strideY, dest_strideU, dest_strideV;
  gint src_strideY, src_strideU, src_strideV;

  dest_strideY = GST_VIDEO_FRAME_COMP_STRIDE (dest, 0);
  dest_strideU = GST_VIDEO_FRAME_COMP_STRIDE (dest, 1);
  dest_strideV = GST_VIDEO_FRAME_COMP_STRIDE (dest, 2);

  src_strideY = GST_VIDEO_FRAME_COMP_STRIDE (src, 0);
  src_strideU = GST_VIDEO_FRAME_COMP_STRIDE (src, 1);
  src_strideV = GST_VIDEO_FRAME_COMP_STRIDE (src, 2);

  destY = GST_VIDEO_FRAME_COMP_DATA (dest, 0);
  destU = GST_VIDEO_FRAME_COMP_DATA (dest, 1);
  destV = GST_VIDEO_FRAME_COMP_DATA (dest, 2);

  srcY = GST_VIDEO_FRAME_COMP_DATA (src, 0);
  srcU = GST_VIDEO_FRAME_COMP_DATA (src, 1);
  srcV = GST_VIDEO_FRAME_COMP_DATA (src, 2);

  destY = destY + dest_y * dest_strideY + dest_x;
  destU = destU + dest_y * dest_strideU + dest_x;
  destV = destV + dest_y * dest_strideV + dest_x;

  srcY = srcY + src_y * src_strideY + src_x;
  srcU = srcU + src_y * src_strideU + src_x;
  srcV = srcV + src_y * src_strideV + src_x;

  if (src_sdtv != dest_sdtv) {
    gint matrix[12];
    gint y, u, v;

    memcpy (matrix,
        dest_sdtv ? cog_ycbcr_hdtv_to_ycbcr_sdtv_matrix_8bit :
        cog_ycbcr_sdtv_to_ycbcr_hdtv_matrix_8bit, 12 * sizeof (gint));

    for (i = 0; i < h; i++) {
      for (j = 0; j < w; j++) {
        y = APPLY_MATRIX (matrix, 0, srcY[j], srcU[j], srcV[j]);
        u = APPLY_MATRIX (matrix, 1, srcY[j], srcU[j], srcV[j]);
        v = APPLY_MATRIX (matrix, 2, srcY[j], srcU[j], srcV[j]);

        destY[j] = y;
        destU[j] = u;
        destV[j] = v;
      }
      destY += dest_strideY;
      destU += dest_strideU;
      destV += dest_strideV;

      srcY += src_strideY;
      srcU += src_strideU;
      srcV += src_strideV;
    }
  } else {
    for (i = 0; i < h; i++) {
      memcpy (destY, srcY, w);
      memcpy (destU, srcU, w);
      memcpy (destV, srcV, w);

      destY += dest_strideY;
      destU += dest_strideU;
      destV += dest_strideV;

      srcY += src_strideY;
      srcU += src_strideU;
      srcV += src_strideV;
    }
  }
}

static void
copy_y42b_y42b (guint i_alpha, GstVideoFrame * dest,
    gboolean dest_sdtv, gint dest_x, gint dest_y, GstVideoFrame * src,
    gboolean src_sdtv, gint src_x, gint src_y, gint w, gint h)
{
  gint i, j;
  guint8 *destY, *destU, *destV;
  const guint8 *srcY, *srcU, *srcV;
  gint dest_strideY, dest_strideU, dest_strideV;
  gint src_strideY, src_strideU, src_strideV;
  gint src_y_idx, src_uv_idx;
  gint dest_y_idx, dest_uv_idx;
  gint matrix[12];
  gint y1, y2;
  gint u1, u2;
  gint v1, v2;
  gint dest_width;

  dest_width = GST_VIDEO_FRAME_WIDTH (dest);

  dest_strideY = GST_VIDEO_FRAME_COMP_STRIDE (dest, 0);
  dest_strideU = GST_VIDEO_FRAME_COMP_STRIDE (dest, 1);
  dest_strideV = GST_VIDEO_FRAME_COMP_STRIDE (dest, 2);

  src_strideY = GST_VIDEO_FRAME_COMP_STRIDE (src, 0);
  src_strideU = GST_VIDEO_FRAME_COMP_STRIDE (src, 1);
  src_strideV = GST_VIDEO_FRAME_COMP_STRIDE (src, 2);

  destY = GST_VIDEO_FRAME_COMP_DATA (dest, 0);
  destU = GST_VIDEO_FRAME_COMP_DATA (dest, 1);
  destV = GST_VIDEO_FRAME_COMP_DATA (dest, 2);

  srcY = GST_VIDEO_FRAME_COMP_DATA (src, 0);
  srcU = GST_VIDEO_FRAME_COMP_DATA (src, 1);
  srcV = GST_VIDEO_FRAME_COMP_DATA (src, 2);

  destY = destY + dest_y * dest_strideY + dest_x;
  destU = destU + dest_y * dest_strideU + dest_x / 2;
  destV = destV + dest_y * dest_strideV + dest_x / 2;

  srcY = srcY + src_y * src_strideY + src_x;
  srcU = srcU + src_y * src_strideU + src_x / 2;
  srcV = srcV + src_y * src_strideV + src_x / 2;

  h = dest_y + h;
  w = dest_x + w;

  if (src_sdtv != dest_sdtv)
    memcpy (matrix,
        dest_sdtv ? cog_ycbcr_hdtv_to_ycbcr_sdtv_matrix_8bit :
        cog_ycbcr_sdtv_to_ycbcr_hdtv_matrix_8bit, 12 * sizeof (gint));
  else
    memcpy (matrix, cog_identity_matrix_8bit, 12 * sizeof (gint));

  /* 1. Copy all macro pixel scanlines, the destination scanline
   *    now starts at macro pixel boundary. */
  for (i = dest_y; i < h; i++) {
    /* 1.1. Handle the first destination pixel if it doesn't
     *      start at the macro pixel boundary, i.e. blend with
     *      the background! */
    if (dest_x % 2 == 1) {
      y1 = srcY[0];
      u1 = srcU[0];
      v1 = srcV[0];

      destY[0] = CLAMP (APPLY_MATRIX (matrix, 0, y1, u1, v1), 0, 255);
      destU[0] = CLAMP (
          (destU[0] + APPLY_MATRIX (matrix, 1, y1, u1, v1)) / 2, 0, 255);
      destV[0] = CLAMP (
          (destV[0] + APPLY_MATRIX (matrix, 2, y1, u1, v1)) / 2, 0, 255);
      j = dest_x + 1;
      src_y_idx = dest_y_idx = dest_uv_idx = 1;
      src_uv_idx = (src_x % 2) + 1;
    } else {
      j = dest_x;
      src_y_idx = dest_y_idx = dest_uv_idx = 0;
      src_uv_idx = (src_x % 2);
    }

    /* 1.2. Copy all macro pixels from the source to the destination.
     *      All pixels now start at macro pixel boundary, i.e. no
     *      blending with the background is necessary. */
    for (; j < w - 1; j += 2) {
      y1 = srcY[src_y_idx];
      y2 = srcY[src_y_idx + 1];

      u1 = srcU[src_uv_idx / 2];
      v1 = srcV[src_uv_idx / 2];
      src_uv_idx++;
      u2 = srcU[src_uv_idx / 2];
      v2 = srcV[src_uv_idx / 2];
      src_uv_idx++;

      destY[dest_y_idx] = CLAMP (APPLY_MATRIX (matrix, 0, y1, u1, v1), 0, 255);
      destY[dest_y_idx + 1] =
          CLAMP (APPLY_MATRIX (matrix, 0, y2, u2, v2), 0, 255);

      destU[dest_uv_idx] = CLAMP (
          (APPLY_MATRIX (matrix, 1, y1, u1, v1) + APPLY_MATRIX (matrix, 1, y2,
                  u2, v2)) / 2, 0, 255);
      destV[dest_uv_idx] = CLAMP (
          (APPLY_MATRIX (matrix, 2, y1, u1, v1) + APPLY_MATRIX (matrix, 2, y2,
                  u2, v2)) / 2, 0, 255);

      dest_y_idx += 2;
      src_y_idx += 2;
      dest_uv_idx++;
    }

    /* 1.3. Now copy the last pixel if one exists and blend it
     *      with the background because we only fill part of
     *      the macro pixel. In case this is the last pixel of
     *      the destination we will a larger part. */
    if (j == w - 1 && j == dest_width - 1) {
      y1 = srcY[src_y_idx];
      u1 = srcU[src_uv_idx / 2];
      v1 = srcV[src_uv_idx / 2];

      destY[dest_y_idx] = CLAMP (APPLY_MATRIX (matrix, 0, y1, u1, v1), 0, 255);
      destU[dest_uv_idx] = CLAMP (APPLY_MATRIX (matrix, 1, y1, u1, v1), 0, 255);
      destV[dest_uv_idx] = CLAMP (APPLY_MATRIX (matrix, 1, y1, u1, v1), 0, 255);
    } else if (j == w - 1) {
      y1 = srcY[src_y_idx];
      u1 = srcU[src_uv_idx / 2];
      v1 = srcV[src_uv_idx / 2];

      destY[dest_y_idx] = CLAMP (APPLY_MATRIX (matrix, 0, y1, u1, v1), 0, 255);
      destU[dest_uv_idx] = CLAMP (
          (destU[dest_uv_idx] + APPLY_MATRIX (matrix, 1, y1, u1,
                  v1)) / 2, 0, 255);
      destV[dest_uv_idx] = CLAMP (
          (destV[dest_uv_idx] + APPLY_MATRIX (matrix, 1, y1, u1,
                  v1)) / 2, 0, 255);
    }

    destY += dest_strideY;
    destU += dest_strideU;
    destV += dest_strideV;
    srcY += src_strideY;

    srcU += src_strideU;
    srcV += src_strideV;
  }
}

static void
copy_y41b_y41b (guint i_alpha, GstVideoFrame * dest,
    gboolean dest_sdtv, gint dest_x, gint dest_y, GstVideoFrame * src,
    gboolean src_sdtv, gint src_x, gint src_y, gint w, gint h)
{
  gint i, j;
  guint8 *destY, *destU, *destV;
  const guint8 *srcY, *srcU, *srcV;
  gint dest_strideY, dest_strideU, dest_strideV;
  gint src_strideY, src_strideU, src_strideV;
  gint src_y_idx, src_uv_idx;
  gint dest_y_idx, dest_uv_idx;
  gint matrix[12];
  gint y1, y2, y3, y4;
  gint u1, u2, u3, u4;
  gint v1, v2, v3, v4;
  gint dest_width;

  dest_width = GST_VIDEO_FRAME_WIDTH (dest);

  dest_strideY = GST_VIDEO_FRAME_COMP_STRIDE (dest, 0);
  dest_strideU = GST_VIDEO_FRAME_COMP_STRIDE (dest, 1);
  dest_strideV = GST_VIDEO_FRAME_COMP_STRIDE (dest, 2);

  src_strideY = GST_VIDEO_FRAME_COMP_STRIDE (src, 0);
  src_strideU = GST_VIDEO_FRAME_COMP_STRIDE (src, 1);
  src_strideV = GST_VIDEO_FRAME_COMP_STRIDE (src, 2);

  destY = GST_VIDEO_FRAME_COMP_DATA (dest, 0);
  destU = GST_VIDEO_FRAME_COMP_DATA (dest, 1);
  destV = GST_VIDEO_FRAME_COMP_DATA (dest, 2);

  srcY = GST_VIDEO_FRAME_COMP_DATA (src, 0);
  srcU = GST_VIDEO_FRAME_COMP_DATA (src, 1);
  srcV = GST_VIDEO_FRAME_COMP_DATA (src, 2);

  destY = destY + dest_y * dest_strideY + dest_x;
  destU = destU + dest_y * dest_strideU + dest_x / 4;
  destV = destV + dest_y * dest_strideV + dest_x / 4;

  srcY = srcY + src_y * src_strideY + src_x;
  srcU = srcU + src_y * src_strideU + src_x / 4;
  srcV = srcV + src_y * src_strideV + src_x / 4;

  h = dest_y + h;
  w = dest_x + w;

  if (src_sdtv != dest_sdtv)
    memcpy (matrix,
        dest_sdtv ? cog_ycbcr_hdtv_to_ycbcr_sdtv_matrix_8bit :
        cog_ycbcr_sdtv_to_ycbcr_hdtv_matrix_8bit, 12 * sizeof (gint));
  else
    memcpy (matrix, cog_identity_matrix_8bit, 12 * sizeof (gint));

  /* 1. Copy all macro pixel scanlines, the destination scanline
   *    now starts at macro pixel boundary. */
  for (i = dest_y; i < h; i++) {
    /* 1.1. Handle the first destination pixel if it doesn't
     *      start at the macro pixel boundary, i.e. blend with
     *      the background! */
    if (dest_x % 4 == 1) {
      y1 = srcY[0];
      y2 = srcY[1];
      y3 = srcY[2];
      u1 = srcU[0];
      v1 = srcV[0];

      destY[0] = CLAMP (APPLY_MATRIX (matrix, 0, y1, u1, v1), 0, 255);
      destY[1] = CLAMP (APPLY_MATRIX (matrix, 0, y2, u1, v1), 0, 255);
      destY[2] = CLAMP (APPLY_MATRIX (matrix, 0, y3, u1, v1), 0, 255);

      destU[0] = CLAMP (
          (destU[0] + APPLY_MATRIX (matrix, 1, y1, u1,
                  v1) + APPLY_MATRIX (matrix, 1, y2, u1,
                  v1) + APPLY_MATRIX (matrix, 1, y3, u1, v1)) / 4, 0, 255);
      destV[0] =
          CLAMP ((destV[0] + APPLY_MATRIX (matrix, 2, y1, u1,
                  v1) + APPLY_MATRIX (matrix, 2, y2, u1,
                  v1) + APPLY_MATRIX (matrix, 2, y3, u1, v1)) / 4, 0, 255);

      j = dest_x + 3;
      src_y_idx = dest_y_idx = 3;
      dest_uv_idx = 1;
      src_uv_idx = (src_x % 4) + 3;
    } else if (dest_x % 4 == 2) {
      y1 = srcY[0];
      y2 = srcY[1];
      u1 = srcU[0];
      v1 = srcV[0];

      destY[0] = CLAMP (APPLY_MATRIX (matrix, 0, y1, u1, v1), 0, 255);
      destY[1] = CLAMP (APPLY_MATRIX (matrix, 0, y2, u1, v1), 0, 255);

      destU[0] = CLAMP (
          (2 * destU[0] + APPLY_MATRIX (matrix, 1, y1, u1,
                  v1) + APPLY_MATRIX (matrix, 1, y2, u1, v1)) / 4, 0, 255);
      destV[0] =
          CLAMP ((2 * destV[0] + APPLY_MATRIX (matrix, 2, y1, u1,
                  v1) + APPLY_MATRIX (matrix, 2, y2, u1, v1)) / 4, 0, 255);

      j = dest_x + 2;
      src_y_idx = dest_y_idx = 2;
      dest_uv_idx = 1;
      src_uv_idx = (src_x % 4) + 2;
    } else if (dest_x % 4 == 3) {
      y1 = srcY[0];
      u1 = srcU[0];
      v1 = srcV[0];

      destY[0] = CLAMP (APPLY_MATRIX (matrix, 0, y1, u1, v1), 0, 255);

      destU[0] = CLAMP (
          (3 * destU[0] + APPLY_MATRIX (matrix, 1, y1, u1, v1)) / 4, 0, 255);
      destV[0] = CLAMP (
          (3 * destV[0] + APPLY_MATRIX (matrix, 2, y1, u1, v1)) / 4, 0, 255);

      j = dest_x + 1;
      src_y_idx = dest_y_idx = 1;
      dest_uv_idx = 1;
      src_uv_idx = (src_x % 4) + 1;
    } else {
      j = dest_x;
      src_y_idx = dest_y_idx = dest_uv_idx = 0;
      src_uv_idx = (src_x % 4);
    }

    /* 1.2. Copy all macro pixels from the source to the destination.
     *      All pixels now start at macro pixel boundary, i.e. no
     *      blending with the background is necessary. */
    for (; j < w - 3; j += 4) {
      y1 = srcY[src_y_idx];
      y2 = srcY[src_y_idx + 1];
      y3 = srcY[src_y_idx + 2];
      y4 = srcY[src_y_idx + 3];

      u1 = srcU[src_uv_idx / 4];
      v1 = srcV[src_uv_idx / 4];
      src_uv_idx++;
      u2 = srcU[src_uv_idx / 4];
      v2 = srcV[src_uv_idx / 4];
      src_uv_idx++;
      u3 = srcU[src_uv_idx / 4];
      v3 = srcV[src_uv_idx / 4];
      src_uv_idx++;
      u4 = srcU[src_uv_idx / 4];
      v4 = srcV[src_uv_idx / 4];
      src_uv_idx++;

      destY[dest_y_idx] = CLAMP (APPLY_MATRIX (matrix, 0, y1, u1, v1), 0, 255);
      destY[dest_y_idx + 1] =
          CLAMP (APPLY_MATRIX (matrix, 0, y2, u2, v2), 0, 255);
      destY[dest_y_idx + 2] =
          CLAMP (APPLY_MATRIX (matrix, 0, y3, u3, v3), 0, 255);
      destY[dest_y_idx + 3] =
          CLAMP (APPLY_MATRIX (matrix, 0, y4, u4, v4), 0, 255);

      destU[dest_uv_idx] = CLAMP (
          (APPLY_MATRIX (matrix, 1, y1, u1, v1) + APPLY_MATRIX (matrix, 1, y2,
                  u2, v2) + APPLY_MATRIX (matrix, 1, y3, u3,
                  v3) + APPLY_MATRIX (matrix, 1, y4, u4, v4)) / 4, 0, 255);
      destV[dest_uv_idx] =
          CLAMP ((APPLY_MATRIX (matrix, 2, y1, u1, v1) + APPLY_MATRIX (matrix,
                  2, y2, u2, v2) + APPLY_MATRIX (matrix, 2, y3, u3,
                  v3) + APPLY_MATRIX (matrix, 2, y4, u4, v4)) / 4, 0, 255);

      dest_y_idx += 4;
      src_y_idx += 4;
      dest_uv_idx++;
    }

    /* 1.3. Now copy the last pixel if one exists and blend it
     *      with the background because we only fill part of
     *      the macro pixel. In case this is the last pixel of
     *      the destination we will a larger part. */
    if (j == w - 1 && j == dest_width - 1) {
      y1 = srcY[src_y_idx];
      u1 = srcU[src_uv_idx / 4];
      v1 = srcV[src_uv_idx / 4];

      destY[dest_y_idx] = CLAMP (APPLY_MATRIX (matrix, 0, y1, u1, v1), 0, 255);
      destU[dest_uv_idx] = CLAMP (APPLY_MATRIX (matrix, 1, y1, u1, v1), 0, 255);
      destV[dest_uv_idx] = CLAMP (APPLY_MATRIX (matrix, 1, y1, u1, v1), 0, 255);
    } else if (j == w - 1) {
      y1 = srcY[src_y_idx];
      u1 = srcU[src_uv_idx / 4];
      v1 = srcV[src_uv_idx / 4];

      destY[dest_y_idx] = CLAMP (APPLY_MATRIX (matrix, 0, y1, u1, v1), 0, 255);
      destU[dest_uv_idx] = CLAMP (
          (destU[dest_uv_idx] + 3 * APPLY_MATRIX (matrix, 1, y1, u1,
                  v1)) / 4, 0, 255);
      destV[dest_uv_idx] = CLAMP (
          (destV[dest_uv_idx] + 3 * APPLY_MATRIX (matrix, 1, y1, u1,
                  v1)) / 4, 0, 255);
    } else if (j == w - 2 && j == dest_width - 2) {
      y1 = srcY[src_y_idx];
      y2 = srcY[src_y_idx + 1];
      u1 = srcU[src_uv_idx / 4];
      v1 = srcV[src_uv_idx / 4];

      destY[dest_y_idx] = CLAMP (APPLY_MATRIX (matrix, 0, y1, u1, v1), 0, 255);
      destY[dest_y_idx + 1] =
          CLAMP (APPLY_MATRIX (matrix, 0, y2, u1, v1), 0, 255);
      destU[dest_uv_idx] = CLAMP (APPLY_MATRIX (matrix, 1, y1, u1, v1), 0, 255);
      destV[dest_uv_idx] = CLAMP (APPLY_MATRIX (matrix, 1, y1, u1, v1), 0, 255);
    } else if (j == w - 2) {
      y1 = srcY[src_y_idx];
      y2 = srcY[src_y_idx + 1];
      u1 = srcU[src_uv_idx / 4];
      v1 = srcV[src_uv_idx / 4];

      destY[dest_y_idx] = CLAMP (APPLY_MATRIX (matrix, 0, y1, u1, v1), 0, 255);
      destY[dest_y_idx + 1] =
          CLAMP (APPLY_MATRIX (matrix, 0, y2, u1, v1), 0, 255);
      destU[dest_uv_idx] =
          CLAMP ((destU[dest_uv_idx] + APPLY_MATRIX (matrix, 1, y1, u1,
                  v1)) / 2, 0, 255);
      destV[dest_uv_idx] =
          CLAMP ((destV[dest_uv_idx] + APPLY_MATRIX (matrix, 1, y1, u1,
                  v1)) / 2, 0, 255);
    } else if (j == w - 3 && j == dest_width - 3) {
      y1 = srcY[src_y_idx];
      y2 = srcY[src_y_idx + 1];
      y3 = srcY[src_y_idx + 2];
      u1 = srcU[src_uv_idx / 4];
      v1 = srcV[src_uv_idx / 4];

      destY[dest_y_idx] = CLAMP (APPLY_MATRIX (matrix, 0, y1, u1, v1), 0, 255);
      destY[dest_y_idx + 1] =
          CLAMP (APPLY_MATRIX (matrix, 0, y2, u1, v1), 0, 255);
      destY[dest_y_idx + 2] =
          CLAMP (APPLY_MATRIX (matrix, 0, y3, u1, v1), 0, 255);
      destU[dest_uv_idx] = CLAMP (APPLY_MATRIX (matrix, 1, y1, u1, v1), 0, 255);
      destV[dest_uv_idx] = CLAMP (APPLY_MATRIX (matrix, 1, y1, u1, v1), 0, 255);
    } else if (j == w - 3) {
      y1 = srcY[src_y_idx];
      y2 = srcY[src_y_idx + 1];
      y3 = srcY[src_y_idx + 2];
      u1 = srcU[src_uv_idx / 4];
      v1 = srcV[src_uv_idx / 4];

      destY[dest_y_idx] = CLAMP (APPLY_MATRIX (matrix, 0, y1, u1, v1), 0, 255);
      destY[dest_y_idx + 1] =
          CLAMP (APPLY_MATRIX (matrix, 0, y2, u1, v1), 0, 255);
      destY[dest_y_idx + 2] =
          CLAMP (APPLY_MATRIX (matrix, 0, y3, u1, v1), 0, 255);
      destU[dest_uv_idx] =
          CLAMP ((3 * destU[dest_uv_idx] + APPLY_MATRIX (matrix, 1, y1, u1,
                  v1)) / 4, 0, 255);
      destV[dest_uv_idx] =
          CLAMP ((3 * destV[dest_uv_idx] + APPLY_MATRIX (matrix, 1, y1, u1,
                  v1)) / 4, 0, 255);
    }

    destY += dest_strideY;
    destU += dest_strideU;
    destV += dest_strideV;
    srcY += src_strideY;
    srcU += src_strideU;
    srcV += src_strideV;
  }
}

static void
copy_i420_i420 (guint i_alpha, GstVideoFrame * dest,
    gboolean dest_sdtv, gint dest_x, gint dest_y, GstVideoFrame * src,
    gboolean src_sdtv, gint src_x, gint src_y, gint w, gint h)
{
  gint i, j;
  guint8 *destY, *destU, *destV;
  const guint8 *srcY, *srcU, *srcV;
  guint8 *destY2;
  const guint8 *srcY2, *srcU2, *srcV2;
  gint dest_strideY, dest_strideU, dest_strideV;
  gint src_strideY, src_strideU, src_strideV;
  gint src_y_idx, src_uv_idx;
  gint dest_y_idx, dest_uv_idx;
  gint matrix[12];
  gint y1, y2, y3, y4;
  gint u1, u2, u3, u4;
  gint v1, v2, v3, v4;
  gint dest_width, dest_height;

  dest_width = GST_VIDEO_FRAME_WIDTH (dest);
  dest_height = GST_VIDEO_FRAME_HEIGHT (dest);

  dest_strideY = GST_VIDEO_FRAME_COMP_STRIDE (dest, 0);
  dest_strideU = GST_VIDEO_FRAME_COMP_STRIDE (dest, 1);
  dest_strideV = GST_VIDEO_FRAME_COMP_STRIDE (dest, 2);

  src_strideY = GST_VIDEO_FRAME_COMP_STRIDE (src, 0);
  src_strideU = GST_VIDEO_FRAME_COMP_STRIDE (src, 1);
  src_strideV = GST_VIDEO_FRAME_COMP_STRIDE (src, 2);

  destY = GST_VIDEO_FRAME_COMP_DATA (dest, 0);
  destU = GST_VIDEO_FRAME_COMP_DATA (dest, 1);
  destV = GST_VIDEO_FRAME_COMP_DATA (dest, 2);

  srcY = GST_VIDEO_FRAME_COMP_DATA (src, 0);
  srcU = GST_VIDEO_FRAME_COMP_DATA (src, 1);
  srcV = GST_VIDEO_FRAME_COMP_DATA (src, 2);

  destY = destY + dest_y * dest_strideY + dest_x;
  destU = destU + (dest_y / 2) * dest_strideU + dest_x / 2;
  destV = destV + (dest_y / 2) * dest_strideV + dest_x / 2;

  srcY = srcY + src_y * src_strideY + src_x;
  srcU = srcU + (src_y / 2) * src_strideU + src_x / 2;
  srcV = srcV + (src_y / 2) * src_strideV + src_x / 2;

  destY2 = destY + dest_strideY;
  srcY2 = srcY + src_strideY;

  h = dest_y + h;
  w = dest_x + w;

  if (src_sdtv != dest_sdtv)
    memcpy (matrix,
        dest_sdtv ? cog_ycbcr_hdtv_to_ycbcr_sdtv_matrix_8bit :
        cog_ycbcr_sdtv_to_ycbcr_hdtv_matrix_8bit, 12 * sizeof (gint));
  else
    memcpy (matrix, cog_identity_matrix_8bit, 12 * sizeof (gint));

  /* 1. Handle the first destination scanline specially if it
   *    doesn't start at the macro pixel boundary, i.e. blend
   *    with the background! */
  if (dest_y % 2 == 1) {
    /* 1.1. Handle the first destination pixel if it doesn't
     *      start at the macro pixel boundary, i.e. blend with
     *      the background! */
    if (dest_x % 2 == 1) {
      y1 = srcY[0];
      u1 = srcU[0];
      v1 = srcV[0];

      destY[0] = CLAMP (APPLY_MATRIX (matrix, 0, y1, u1, v1), 0, 255);
      destU[0] =
          CLAMP ((3 * destU[0] + APPLY_MATRIX (matrix, 1, y1, u1, v1)) / 4, 0,
          255);
      destV[0] =
          CLAMP ((3 * destV[0] + APPLY_MATRIX (matrix, 2, y1, u1, v1)) / 4, 0,
          255);

      j = dest_x + 1;
      src_y_idx = dest_y_idx = dest_uv_idx = 1;
      src_uv_idx = (src_x % 2) + 1;
    } else {
      j = dest_x;
      src_y_idx = dest_y_idx = dest_uv_idx = 0;
      src_uv_idx = (src_x % 2);
    }

    /* 1.2. Copy all macro pixels from the source to the destination
     *      but blend with the background because we're only filling
     *      the lower part of the macro pixels. */
    for (; j < w - 1; j += 2) {
      y1 = srcY[src_y_idx];
      y2 = srcY[src_y_idx + 1];

      u1 = srcU[src_uv_idx / 2];
      v1 = srcV[src_uv_idx / 2];
      src_uv_idx++;
      u2 = srcU[src_uv_idx / 2];
      v2 = srcV[src_uv_idx / 2];
      src_uv_idx++;

      destY[dest_y_idx] = CLAMP (APPLY_MATRIX (matrix, 0, y1, u1, v1), 0, 255);
      destY[dest_y_idx + 1] =
          CLAMP (APPLY_MATRIX (matrix, 0, y2, u2, v2), 0, 255);
      destU[dest_uv_idx] =
          CLAMP ((2 * destU[dest_uv_idx] + APPLY_MATRIX (matrix, 1, y1, u1,
                  v1) + APPLY_MATRIX (matrix, 1, y2, u2, v2)) / 4, 0, 255);
      destV[dest_uv_idx] =
          CLAMP ((2 * destV[dest_uv_idx] + APPLY_MATRIX (matrix, 2, y1, u1,
                  v1) + APPLY_MATRIX (matrix, 2, y2, u2, v2)) / 4, 0, 255);

      dest_y_idx += 2;
      src_y_idx += 2;
      dest_uv_idx++;
    }

    /* 1.3. Now copy the last pixel if one exists and blend it
     *      with the background because we only fill part of
     *      the macro pixel. In case this is the last pixel of
     *      the destination we will a larger part. */
    if (j == w - 1 && j == dest_width - 1) {
      y1 = srcY[src_y_idx];
      u1 = srcU[src_uv_idx / 2];
      v1 = srcV[src_uv_idx / 2];

      destY[dest_y_idx] = CLAMP (APPLY_MATRIX (matrix, 0, y1, u1, v1), 0, 255);
      destU[dest_uv_idx] = CLAMP (
          (destU[dest_uv_idx] + APPLY_MATRIX (matrix, 1, y1, u1, v1)) / 2, 0,
          255);
      destV[dest_uv_idx] =
          CLAMP ((destV[dest_uv_idx] + APPLY_MATRIX (matrix, 2, y1, u1,
                  v1)) / 2, 0, 255);
    } else if (j == w - 1) {
      y1 = srcY[src_y_idx];
      u1 = srcU[src_uv_idx / 2];
      v1 = srcV[src_uv_idx / 2];

      destY[dest_y_idx] = CLAMP (APPLY_MATRIX (matrix, 0, y1, u1, v1), 0, 255);
      destU[dest_uv_idx] = CLAMP (
          (3 * destU[dest_uv_idx] + APPLY_MATRIX (matrix, 1, y1, u1, v1)) / 4,
          0, 255);
      destV[dest_uv_idx] =
          CLAMP ((3 * destV[dest_uv_idx] + APPLY_MATRIX (matrix, 2, y1, u1,
                  v1)) / 4, 0, 255);
    }

    destY += dest_strideY;
    destY2 += dest_strideY;
    destU += dest_strideU;
    destV += dest_strideV;
    srcY += src_strideY;
    srcY2 += src_strideY;
    src_y++;
    if (src_y % 2 == 0) {
      srcU += src_strideU;
      srcV += src_strideV;
    }
    i = dest_y + 1;
  } else {
    i = dest_y;
  }

  /* 2. Copy all macro pixel scanlines, the destination scanline
   *    now starts at macro pixel boundary. */
  for (; i < h - 1; i += 2) {
    /* 2.1. Handle the first destination pixel if it doesn't
     *      start at the macro pixel boundary, i.e. blend with
     *      the background! */

    srcU2 = srcU;
    srcV2 = srcV;
    if (src_y % 2 == 1) {
      srcU2 += src_strideU;
      srcV2 += src_strideV;
    }

    if (dest_x % 2 == 1) {
      y1 = srcY[0];
      y2 = srcY2[0];
      u1 = srcU[0];
      v1 = srcV[0];
      u2 = srcU2[0];
      v2 = srcV2[0];

      destY[0] = CLAMP (APPLY_MATRIX (matrix, 0, y1, u1, v1), 0, 255);
      destY2[0] = CLAMP (APPLY_MATRIX (matrix, 0, y2, u2, v2), 0, 255);
      destU[0] = CLAMP (
          (2 * destU[0] + APPLY_MATRIX (matrix, 1, y1, u1,
                  v1) + APPLY_MATRIX (matrix, 1, y2, u2, v2)) / 4, 0, 255);
      destV[0] = CLAMP (
          (2 * destV[0] + APPLY_MATRIX (matrix, 2, y1, u1,
                  v1) + APPLY_MATRIX (matrix, 2, y2, u2, v2)) / 4, 0, 255);
      j = dest_x + 1;
      src_y_idx = dest_y_idx = dest_uv_idx = 1;
      src_uv_idx = (src_x % 2) + 1;
    } else {
      j = dest_x;
      src_y_idx = dest_y_idx = dest_uv_idx = 0;
      src_uv_idx = (src_x % 2);
    }

    /* 2.2. Copy all macro pixels from the source to the destination.
     *      All pixels now start at macro pixel boundary, i.e. no
     *      blending with the background is necessary. */
    for (; j < w - 1; j += 2) {
      y1 = srcY[src_y_idx];
      y2 = srcY[src_y_idx + 1];
      y3 = srcY2[src_y_idx];
      y4 = srcY2[src_y_idx + 1];

      u1 = srcU[src_uv_idx / 2];
      u3 = srcU2[src_uv_idx / 2];
      v1 = srcV[src_uv_idx / 2];
      v3 = srcV2[src_uv_idx / 2];
      src_uv_idx++;
      u2 = srcU[src_uv_idx / 2];
      u4 = srcU2[src_uv_idx / 2];
      v2 = srcV[src_uv_idx / 2];
      v4 = srcV2[src_uv_idx / 2];
      src_uv_idx++;

      destY[dest_y_idx] = CLAMP (APPLY_MATRIX (matrix, 0, y1, u1, v1), 0, 255);
      destY[dest_y_idx + 1] =
          CLAMP (APPLY_MATRIX (matrix, 0, y2, u2, v2), 0, 255);
      destY2[dest_y_idx] = CLAMP (APPLY_MATRIX (matrix, 0, y3, u3, v3), 0, 255);
      destY2[dest_y_idx + 1] =
          CLAMP (APPLY_MATRIX (matrix, 0, y4, u4, v4), 0, 255);

      destU[dest_uv_idx] = CLAMP (
          (APPLY_MATRIX (matrix, 1, y1, u1, v1) + APPLY_MATRIX (matrix, 1, y2,
                  u2, v2) + APPLY_MATRIX (matrix, 1, y3, u3,
                  v3) + APPLY_MATRIX (matrix, 1, y4, u4, v4)) / 4, 0, 255);
      destV[dest_uv_idx] = CLAMP (
          (APPLY_MATRIX (matrix, 2, y1, u1, v1) + APPLY_MATRIX (matrix, 2, y2,
                  u2, v2) + APPLY_MATRIX (matrix, 2, y3, u3,
                  v3) + APPLY_MATRIX (matrix, 2, y4, u4, v4)) / 4, 0, 255);

      dest_y_idx += 2;
      src_y_idx += 2;
      dest_uv_idx++;
    }

    /* 2.3. Now copy the last pixel if one exists and blend it
     *      with the background because we only fill part of
     *      the macro pixel. In case this is the last pixel of
     *      the destination we will a larger part. */
    if (j == w - 1 && j == dest_width - 1) {
      y1 = srcY[src_y_idx];
      y2 = srcY2[src_y_idx];

      u1 = srcU[src_uv_idx / 2];
      u2 = srcU2[src_uv_idx / 2];

      v1 = srcV[src_uv_idx / 2];
      v2 = srcV2[src_uv_idx / 2];

      destY[dest_y_idx] = CLAMP (APPLY_MATRIX (matrix, 0, y1, u1, v1), 0, 255);
      destY2[dest_y_idx] = CLAMP (APPLY_MATRIX (matrix, 0, y2, u2, v2), 0, 255);
      destU[dest_uv_idx] = CLAMP (
          (APPLY_MATRIX (matrix, 1, y1, u1, v1) + APPLY_MATRIX (matrix, 2, y2,
                  u2, v2)) / 2, 0, 255);
      destV[dest_uv_idx] = CLAMP (
          (APPLY_MATRIX (matrix, 1, y1, u1, v1) + APPLY_MATRIX (matrix, 2, y2,
                  u2, v2)) / 2, 0, 255);
    } else if (j == w - 1) {
      y1 = srcY[src_y_idx];
      y2 = srcY2[src_y_idx];

      u1 = srcU[src_uv_idx / 2];
      u2 = srcU2[src_uv_idx / 2];

      v1 = srcV[src_uv_idx / 2];
      v2 = srcV2[src_uv_idx / 2];

      destY[dest_y_idx] = CLAMP (APPLY_MATRIX (matrix, 0, y1, u1, v1), 0, 255);
      destY2[dest_y_idx] = CLAMP (APPLY_MATRIX (matrix, 0, y2, u2, v2), 0, 255);
      destU[dest_uv_idx] = CLAMP (
          (2 * destU[dest_uv_idx] + APPLY_MATRIX (matrix, 1, y1, u1,
                  v1) + APPLY_MATRIX (matrix, 2, y2, u2, v2)) / 4, 0, 255);
      destV[dest_uv_idx] = CLAMP (
          (2 * destV[dest_uv_idx] + APPLY_MATRIX (matrix, 1, y1, u1,
                  v1) + APPLY_MATRIX (matrix, 2, y2, u2, v2)) / 4, 0, 255);
    }

    destY += 2 * dest_strideY;
    destY2 += 2 * dest_strideY;
    destU += dest_strideU;
    destV += dest_strideV;
    srcY += 2 * src_strideY;
    srcY2 += 2 * src_strideY;

    src_y += 2;
    srcU += src_strideU;
    srcV += src_strideV;
  }

  /* 3. Handle the last scanline if one exists. This again
   *    doesn't start at macro pixel boundary but should
   *    only fill the upper part of the macro pixels. */
  if (i == h - 1 && i == dest_height - 1) {
    /* 3.1. Handle the first destination pixel if it doesn't
     *      start at the macro pixel boundary, i.e. blend with
     *      the background! */
    if (dest_x % 2 == 1) {
      y1 = srcY[0];
      u1 = srcU[0];
      v1 = srcV[0];

      destY[0] = CLAMP (APPLY_MATRIX (matrix, 0, y1, u1, v1), 0, 255);
      destU[0] =
          CLAMP ((destU[0] + APPLY_MATRIX (matrix, 1, y1, u1, v1)) / 2, 0, 255);
      destV[0] =
          CLAMP ((destV[0] + APPLY_MATRIX (matrix, 2, y1, u1, v1)) / 2, 0, 255);

      j = dest_x + 1;
      src_y_idx = dest_y_idx = dest_uv_idx = 1;
      src_uv_idx = (src_x % 2) + 1;
    } else {
      j = dest_x;
      src_y_idx = dest_y_idx = dest_uv_idx = 0;
      src_uv_idx = (src_x % 2);
    }

    /* 3.2. Copy all macro pixels from the source to the destination
     *      but blend with the background because we're only filling
     *      the upper part of the macro pixels. */
    for (; j < w - 1; j += 2) {
      y1 = srcY[src_y_idx];
      y2 = srcY[src_y_idx + 1];

      u1 = srcU[src_uv_idx / 2];
      v1 = srcV[src_uv_idx / 2];
      src_uv_idx++;
      u2 = srcU[src_uv_idx / 2];
      v2 = srcV[src_uv_idx / 2];
      src_uv_idx++;

      destY[dest_y_idx] = CLAMP (APPLY_MATRIX (matrix, 0, y1, u1, v1), 0, 255);
      destY[dest_y_idx + 1] =
          CLAMP (APPLY_MATRIX (matrix, 0, y2, u2, v2), 0, 255);

      destU[dest_uv_idx] = CLAMP (
          (2 * destU[dest_uv_idx] + APPLY_MATRIX (matrix, 1, y1, u1,
                  v1) + APPLY_MATRIX (matrix, 1, y2, u2, v2)) / 4, 0, 255);
      destV[dest_uv_idx] = CLAMP (
          (2 * destV[dest_uv_idx] + APPLY_MATRIX (matrix, 2, y1, u1,
                  v1) + APPLY_MATRIX (matrix, 2, y2, u2, v2)) / 4, 0, 255);

      dest_y_idx += 2;
      src_y_idx += 2;
      dest_uv_idx++;
    }

    /* 3.3. Now copy the last pixel if one exists and blend it
     *      with the background because we only fill part of
     *      the macro pixel. In case this is the last pixel of
     *      the destination we will a larger part. */
    if (j == w - 1 && j == dest_width - 1) {
      y1 = srcY[src_y_idx];
      u1 = srcU[src_uv_idx / 2];
      v1 = srcV[src_uv_idx / 2];

      destY[dest_y_idx] = CLAMP (APPLY_MATRIX (matrix, 0, y1, u1, v1), 0, 255);
      destU[dest_uv_idx] = CLAMP (
          (destU[dest_uv_idx] + APPLY_MATRIX (matrix, 1, y1, u1, v1)) / 2, 0,
          255);
      destV[dest_uv_idx] =
          CLAMP ((destV[dest_uv_idx] + APPLY_MATRIX (matrix, 1, y1, u1,
                  v1)) / 2, 0, 255);
    } else if (j == w - 1) {
      y1 = srcY[src_y_idx];
      u1 = srcU[src_uv_idx / 2];
      v1 = srcV[src_uv_idx / 2];

      destY[dest_y_idx] = CLAMP (APPLY_MATRIX (matrix, 0, y1, u1, v1), 0, 255);
      destU[dest_uv_idx] = CLAMP (
          (3 * destU[dest_uv_idx] + APPLY_MATRIX (matrix, 1, y1, u1, v1)) / 4,
          0, 255);
      destV[dest_uv_idx] =
          CLAMP ((3 * destV[dest_uv_idx] + APPLY_MATRIX (matrix, 1, y1, u1,
                  v1)) / 4, 0, 255);
    }
  } else if (i == h - 1) {
    /* 3.1. Handle the first destination pixel if it doesn't
     *      start at the macro pixel boundary, i.e. blend with
     *      the background! */
    if (dest_x % 2 == 1) {
      y1 = srcY[0];
      u1 = srcU[0];
      v1 = srcV[0];

      destY[0] = CLAMP (APPLY_MATRIX (matrix, 0, y1, u1, v1), 0, 255);
      destU[0] =
          CLAMP ((3 * destU[0] + APPLY_MATRIX (matrix, 1, y1, u1, v1)) / 4, 0,
          255);
      destV[0] =
          CLAMP ((3 * destV[0] + APPLY_MATRIX (matrix, 2, y1, u1, v1)) / 4, 0,
          255);

      j = dest_x + 1;
      src_y_idx = dest_y_idx = dest_uv_idx = 1;
      src_uv_idx = (src_x % 2) + 1;
    } else {
      j = dest_x;
      src_y_idx = dest_y_idx = dest_uv_idx = 0;
      src_uv_idx = (src_x % 2);
    }

    /* 3.2. Copy all macro pixels from the source to the destination
     *      but blend with the background because we're only filling
     *      the upper part of the macro pixels. */
    for (; j < w - 1; j += 2) {
      y1 = srcY[src_y_idx];
      y2 = srcY[src_y_idx + 1];

      u1 = srcU[src_uv_idx / 2];
      v1 = srcV[src_uv_idx / 2];
      src_uv_idx++;
      u2 = srcU[src_uv_idx / 2];
      v2 = srcV[src_uv_idx / 2];
      src_uv_idx++;

      destY[dest_y_idx] = CLAMP (APPLY_MATRIX (matrix, 0, y1, u1, v1), 0, 255);
      destY[dest_y_idx + 1] =
          CLAMP (APPLY_MATRIX (matrix, 0, y2, u2, v2), 0, 255);

      destU[dest_uv_idx] = CLAMP (
          (2 * destU[dest_uv_idx] + APPLY_MATRIX (matrix, 1, y1, u1,
                  v1) + APPLY_MATRIX (matrix, 1, y2, u2, v2)) / 4, 0, 255);
      destV[dest_uv_idx] = CLAMP (
          (2 * destV[dest_uv_idx] + APPLY_MATRIX (matrix, 2, y1, u1,
                  v1) + APPLY_MATRIX (matrix, 2, y2, u2, v2)) / 4, 0, 255);

      dest_y_idx += 2;
      src_y_idx += 2;
      dest_uv_idx++;
    }

    /* 3.3. Now copy the last pixel if one exists and blend it
     *      with the background because we only fill part of
     *      the macro pixel. In case this is the last pixel of
     *      the destination we will a larger part. */
    if (j == w - 1 && j == dest_width - 1) {
      y1 = srcY[src_y_idx];
      u1 = srcU[src_uv_idx / 2];
      v1 = srcV[src_uv_idx / 2];

      destY[dest_y_idx] = CLAMP (APPLY_MATRIX (matrix, 0, y1, u1, v1), 0, 255);
      destU[dest_uv_idx] = CLAMP (
          (destU[dest_uv_idx] + APPLY_MATRIX (matrix, 1, y1, u1, v1)) / 2, 0,
          255);
      destV[dest_uv_idx] =
          CLAMP ((destV[dest_uv_idx] + APPLY_MATRIX (matrix, 1, y1, u1,
                  v1)) / 2, 0, 255);
    } else if (j == w - 1) {
      y1 = srcY[src_y_idx];
      u1 = srcU[src_uv_idx / 2];
      v1 = srcV[src_uv_idx / 2];

      destY[dest_y_idx] = CLAMP (APPLY_MATRIX (matrix, 0, y1, u1, v1), 0, 255);
      destU[dest_uv_idx] = CLAMP (
          (3 * destU[dest_uv_idx] + APPLY_MATRIX (matrix, 1, y1, u1, v1)) / 4,
          0, 255);
      destV[dest_uv_idx] =
          CLAMP ((3 * destV[dest_uv_idx] + APPLY_MATRIX (matrix, 1, y1, u1,
                  v1)) / 4, 0, 255);
    }
  }
}

static void
copy_i420_ayuv (guint i_alpha, GstVideoFrame * dest_frame,
    gboolean dest_sdtv, gint dest_x, gint dest_y, GstVideoFrame * src_frame,
    gboolean src_sdtv, gint src_x, gint src_y, gint w, gint h)
{
  const guint8 *srcY, *srcU, *srcV;
  gint src_strideY, src_strideU, src_strideV;
  gint dest_stride;
  guint8 *dest;

  src_strideY = GST_VIDEO_FRAME_COMP_STRIDE (src_frame, 0);
  src_strideU = GST_VIDEO_FRAME_COMP_STRIDE (src_frame, 1);
  src_strideV = GST_VIDEO_FRAME_COMP_STRIDE (src_frame, 2);

  srcY = GST_VIDEO_FRAME_COMP_DATA (src_frame, 0);
  srcU = GST_VIDEO_FRAME_COMP_DATA (src_frame, 1);
  srcV = GST_VIDEO_FRAME_COMP_DATA (src_frame, 2);

  dest_stride = GST_VIDEO_FRAME_PLANE_STRIDE (dest_frame, 0);

  dest = GST_VIDEO_FRAME_PLANE_DATA (dest_frame, 0);
  dest = dest + dest_y * dest_stride + dest_x * 4;

  srcY = srcY + src_y * src_strideY + src_x;
  srcU = srcU + (src_y / 2) * src_strideU + src_x / 2;
  srcV = srcV + (src_y / 2) * src_strideV + src_x / 2;

  i_alpha = CLAMP (i_alpha, 0, 255);

  if (src_sdtv != dest_sdtv) {
    gint i, j, uv_idx;
    gint y, u, v;
    gint y1, u1, v1;
    gint matrix[12];

    memcpy (matrix,
        dest_sdtv ? cog_ycbcr_hdtv_to_ycbcr_sdtv_matrix_8bit :
        cog_ycbcr_sdtv_to_ycbcr_hdtv_matrix_8bit, 12 * sizeof (gint));

    for (i = 0; i < h; i++) {
      for (j = 0, uv_idx = src_x % 2; j < w; j++, uv_idx++) {
        y = srcY[j];
        u = srcU[uv_idx / 2];
        v = srcV[uv_idx / 2];

        y1 = APPLY_MATRIX (matrix, 0, y, u, v);
        u1 = APPLY_MATRIX (matrix, 1, y, u, v);
        v1 = APPLY_MATRIX (matrix, 2, y, u, v);

        dest[4 * j + 0] = i_alpha;
        dest[4 * j + 1] = y1;
        dest[4 * j + 2] = u1;
        dest[4 * j + 3] = v1;
      }
      dest += dest_stride;

      src_y++;
      srcY += src_strideY;
      if (src_y % 2 == 0) {
        srcU += src_strideU;
        srcV += src_strideV;
      }
    }
  } else {
    gint i, j, uv_idx;
    gint y, u, v;

    for (i = 0; i < h; i++) {
      for (j = 0, uv_idx = src_x % 2; j < w; j++, uv_idx++) {
        y = srcY[j];
        u = srcU[uv_idx / 2];
        v = srcV[uv_idx / 2];

        dest[4 * j + 0] = i_alpha;
        dest[4 * j + 1] = y;
        dest[4 * j + 2] = u;
        dest[4 * j + 3] = v;
      }
      dest += dest_stride;

      src_y++;
      srcY += src_strideY;
      if (src_y % 2 == 0) {
        srcU += src_strideU;
        srcV += src_strideV;
      }
    }
  }
}

static void
fill_rgb32 (GstVideoBoxFill fill_type, guint b_alpha,
    GstVideoFrame * frame, gboolean sdtv)
{
  guint32 empty_pixel;
  gint p[4];
  guint8 *dest;
  guint stride;
  gint width, height;

  width = GST_VIDEO_FRAME_WIDTH (frame);
  height = GST_VIDEO_FRAME_HEIGHT (frame);

  dest = GST_VIDEO_FRAME_PLANE_DATA (frame, 0);
  stride = GST_VIDEO_FRAME_PLANE_STRIDE (frame, 0);

  p[0] = GST_VIDEO_FRAME_COMP_OFFSET (frame, 3);
  p[1] = GST_VIDEO_FRAME_COMP_OFFSET (frame, 0);
  p[2] = GST_VIDEO_FRAME_COMP_OFFSET (frame, 1);
  p[3] = GST_VIDEO_FRAME_COMP_OFFSET (frame, 2);

  b_alpha = CLAMP (b_alpha, 0, 255);

  empty_pixel = GUINT32_FROM_LE ((b_alpha << (p[0] * 8)) |
      (rgb_colors_R[fill_type] << (p[1] * 8)) |
      (rgb_colors_G[fill_type] << (p[2] * 8)) |
      (rgb_colors_B[fill_type] << (p[3] * 8)));

  if (stride == width * 4) {
    video_box_orc_splat_u32 ((guint32 *) dest, empty_pixel, width * height);
  } else if (height) {
    for (; height; --height) {
      video_box_orc_splat_u32 ((guint32 *) dest, empty_pixel, width);
      dest += stride;
    }
  }
}

static void
fill_rgb24 (GstVideoBoxFill fill_type, guint b_alpha,
    GstVideoFrame * frame, gboolean sdtv)
{
  gint dest_stride;
  gint p[4];
  gint i, j;
  guint8 *dest;
  gint width, height;

  width = GST_VIDEO_FRAME_WIDTH (frame);
  height = GST_VIDEO_FRAME_HEIGHT (frame);

  dest = GST_VIDEO_FRAME_PLANE_DATA (frame, 0);
  dest_stride = GST_VIDEO_FRAME_PLANE_STRIDE (frame, 0);

  p[1] = GST_VIDEO_FRAME_COMP_OFFSET (frame, 0);
  p[2] = GST_VIDEO_FRAME_COMP_OFFSET (frame, 1);
  p[3] = GST_VIDEO_FRAME_COMP_OFFSET (frame, 2);

  for (i = 0; i < height; i++) {
    for (j = 0; j < width; j++) {
      dest[3 * j + p[1]] = rgb_colors_R[fill_type];
      dest[3 * j + p[2]] = rgb_colors_G[fill_type];
      dest[3 * j + p[3]] = rgb_colors_B[fill_type];
    }
    dest += dest_stride;
  }
}

static void
copy_rgb32 (guint i_alpha, GstVideoFrame * dest_frame,
    gboolean dest_sdtv, gint dest_x, gint dest_y, GstVideoFrame * src_frame,
    gboolean src_sdtv, gint src_x, gint src_y, gint w, gint h)
{
  gint i, j;
  gint src_stride, dest_stride;
  gboolean in_alpha, out_alpha;
  gint in_bpp, out_bpp;
  gint p_out[4];
  gint p_in[4];
  gboolean packed_out;
  gboolean packed_in;
  guint8 *src, *dest;

  src_stride = GST_VIDEO_FRAME_PLANE_STRIDE (src_frame, 0);
  dest_stride = GST_VIDEO_FRAME_PLANE_STRIDE (dest_frame, 0);
  in_bpp = GST_VIDEO_FRAME_COMP_PSTRIDE (src_frame, 0);
  out_bpp = GST_VIDEO_FRAME_COMP_PSTRIDE (dest_frame, 0);
  packed_in = (in_bpp < 4);
  packed_out = (out_bpp < 4);

  out_alpha = GST_VIDEO_INFO_HAS_ALPHA (&dest_frame->info);
  p_out[0] = GST_VIDEO_FRAME_COMP_OFFSET (dest_frame, 3);
  p_out[1] = GST_VIDEO_FRAME_COMP_OFFSET (dest_frame, 0);
  p_out[2] = GST_VIDEO_FRAME_COMP_OFFSET (dest_frame, 1);
  p_out[3] = GST_VIDEO_FRAME_COMP_OFFSET (dest_frame, 2);

  in_alpha = GST_VIDEO_INFO_HAS_ALPHA (&src_frame->info);
  p_in[0] = GST_VIDEO_FRAME_COMP_OFFSET (src_frame, 3);
  p_in[1] = GST_VIDEO_FRAME_COMP_OFFSET (src_frame, 0);
  p_in[2] = GST_VIDEO_FRAME_COMP_OFFSET (src_frame, 1);
  p_in[3] = GST_VIDEO_FRAME_COMP_OFFSET (src_frame, 2);

  dest = GST_VIDEO_FRAME_PLANE_DATA (dest_frame, 0);
  dest = dest + dest_y * dest_stride + dest_x * out_bpp;
  src = GST_VIDEO_FRAME_PLANE_DATA (src_frame, 0);
  src = src + src_y * src_stride + src_x * in_bpp;

  if (in_alpha && out_alpha) {
    w *= 4;
    for (i = 0; i < h; i++) {
      for (j = 0; j < w; j += 4) {
        dest[j + p_out[0]] = (src[j + p_in[0]] * i_alpha) >> 8;
        dest[j + p_out[1]] = src[j + p_in[1]];
        dest[j + p_out[2]] = src[j + p_in[2]];
        dest[j + p_out[3]] = src[j + p_in[3]];
      }
      dest += dest_stride;
      src += src_stride;
    }
  } else if (out_alpha && !packed_in) {
    w *= 4;
    i_alpha = CLAMP (i_alpha, 0, 255);

    for (i = 0; i < h; i++) {
      for (j = 0; j < w; j += 4) {
        dest[j + p_out[0]] = i_alpha;
        dest[j + p_out[1]] = src[j + p_in[1]];
        dest[j + p_out[2]] = src[j + p_in[2]];
        dest[j + p_out[3]] = src[j + p_in[3]];
      }
      dest += dest_stride;
      src += src_stride;
    }
  } else if (out_alpha && packed_in) {
    i_alpha = CLAMP (i_alpha, 0, 255);

    for (i = 0; i < h; i++) {
      for (j = 0; j < w; j++) {
        dest[4 * j + p_out[0]] = i_alpha;
        dest[4 * j + p_out[1]] = src[in_bpp * j + p_in[1]];
        dest[4 * j + p_out[2]] = src[in_bpp * j + p_in[2]];
        dest[4 * j + p_out[3]] = src[in_bpp * j + p_in[3]];
      }
      dest += dest_stride;
      src += src_stride;
    }
  } else if (!packed_out && !packed_in) {
    w *= 4;
    for (i = 0; i < h; i++) {
      for (j = 0; j < w; j += 4) {
        dest[j + p_out[1]] = src[j + p_in[1]];
        dest[j + p_out[2]] = src[j + p_in[2]];
        dest[j + p_out[3]] = src[j + p_in[3]];
      }
      dest += dest_stride;
      src += src_stride;
    }
  } else {
    for (i = 0; i < h; i++) {
      for (j = 0; j < w; j++) {
        dest[out_bpp * j + p_out[1]] = src[in_bpp * j + p_in[1]];
        dest[out_bpp * j + p_out[2]] = src[in_bpp * j + p_in[2]];
        dest[out_bpp * j + p_out[3]] = src[in_bpp * j + p_in[3]];
      }
      dest += dest_stride;
      src += src_stride;
    }
  }
}

static void
copy_rgb32_ayuv (guint i_alpha, GstVideoFrame * dest_frame,
    gboolean dest_sdtv, gint dest_x, gint dest_y, GstVideoFrame * src_frame,
    gboolean src_sdtv, gint src_x, gint src_y, gint w, gint h)
{
  gint i, j;
  gint src_stride, dest_stride;
  gboolean in_alpha;
  gint in_bpp;
  gint p_in[4];
  gboolean packed_in;
  gint matrix[12];
  gint a;
  gint y, u, v;
  gint r, g, b;
  guint8 *dest, *src;

  dest = GST_VIDEO_FRAME_PLANE_DATA (dest_frame, 0);
  dest_stride = GST_VIDEO_FRAME_PLANE_STRIDE (dest_frame, 0);

  src_stride = GST_VIDEO_FRAME_PLANE_STRIDE (src_frame, 0);
  in_bpp = GST_VIDEO_FRAME_COMP_PSTRIDE (src_frame, 0);
  packed_in = (in_bpp < 4);

  in_alpha = GST_VIDEO_INFO_HAS_ALPHA (&src_frame->info);
  p_in[0] = GST_VIDEO_FRAME_COMP_OFFSET (src_frame, 3);
  p_in[1] = GST_VIDEO_FRAME_COMP_OFFSET (src_frame, 0);
  p_in[2] = GST_VIDEO_FRAME_COMP_OFFSET (src_frame, 1);
  p_in[3] = GST_VIDEO_FRAME_COMP_OFFSET (src_frame, 2);

  memcpy (matrix,
      (dest_sdtv) ? cog_rgb_to_ycbcr_matrix_8bit_sdtv :
      cog_rgb_to_ycbcr_matrix_8bit_hdtv, 12 * sizeof (gint));

  dest = GST_VIDEO_FRAME_PLANE_DATA (dest_frame, 0);
  dest = dest + dest_y * dest_stride + dest_x * 4;
  src = GST_VIDEO_FRAME_PLANE_DATA (src_frame, 0);
  src = src + src_y * src_stride + src_x * in_bpp;

  if (in_alpha) {
    w *= 4;
    for (i = 0; i < h; i++) {
      for (j = 0; j < w; j += 4) {
        a = (src[j + p_in[0]] * i_alpha) >> 8;
        r = src[j + p_in[1]];
        g = src[j + p_in[2]];
        b = src[j + p_in[3]];

        y = APPLY_MATRIX (matrix, 0, r, g, b);
        u = APPLY_MATRIX (matrix, 1, r, g, b);
        v = APPLY_MATRIX (matrix, 2, r, g, b);

        dest[j + 0] = a;
        dest[j + 1] = CLAMP (y, 0, 255);
        dest[j + 2] = CLAMP (u, 0, 255);
        dest[j + 3] = CLAMP (v, 0, 255);
      }
      dest += dest_stride;
      src += src_stride;
    }
  } else if (!packed_in) {
    w *= 4;
    i_alpha = CLAMP (i_alpha, 0, 255);

    for (i = 0; i < h; i++) {
      for (j = 0; j < w; j += 4) {
        a = i_alpha;
        r = src[j + p_in[1]];
        g = src[j + p_in[2]];
        b = src[j + p_in[3]];

        y = APPLY_MATRIX (matrix, 0, r, g, b);
        u = APPLY_MATRIX (matrix, 1, r, g, b);
        v = APPLY_MATRIX (matrix, 2, r, g, b);

        dest[j + 0] = a;
        dest[j + 1] = CLAMP (y, 0, 255);
        dest[j + 2] = CLAMP (u, 0, 255);
        dest[j + 3] = CLAMP (v, 0, 255);
      }
      dest += dest_stride;
      src += src_stride;
    }
  } else {
    i_alpha = CLAMP (i_alpha, 0, 255);

    for (i = 0; i < h; i++) {
      for (j = 0; j < w; j++) {
        a = i_alpha;
        r = src[in_bpp * j + p_in[1]];
        g = src[in_bpp * j + p_in[2]];
        b = src[in_bpp * j + p_in[3]];

        y = APPLY_MATRIX (matrix, 0, r, g, b);
        u = APPLY_MATRIX (matrix, 1, r, g, b);
        v = APPLY_MATRIX (matrix, 2, r, g, b);

        dest[4 * j + 0] = a;
        dest[4 * j + 1] = CLAMP (y, 0, 255);
        dest[4 * j + 2] = CLAMP (u, 0, 255);
        dest[4 * j + 3] = CLAMP (v, 0, 255);
      }
      dest += dest_stride;
      src += src_stride;
    }
  }
}

static void
copy_ayuv_rgb32 (guint i_alpha, GstVideoFrame * dest_frame,
    gboolean dest_sdtv, gint dest_x, gint dest_y, GstVideoFrame * src_frame,
    gboolean src_sdtv, gint src_x, gint src_y, gint w, gint h)
{
  gint i, j;
  gint src_stride, dest_stride;
  gboolean out_alpha;
  gint out_bpp;
  gint p_out[4];
  gboolean packed_out;
  gint matrix[12];
  gint a;
  gint y, u, v;
  gint r, g, b;
  guint8 *src, *dest;

  src = GST_VIDEO_FRAME_PLANE_DATA (src_frame, 0);
  src_stride = GST_VIDEO_FRAME_PLANE_STRIDE (src_frame, 0);

  dest_stride = GST_VIDEO_FRAME_PLANE_STRIDE (dest_frame, 0);
  out_bpp = GST_VIDEO_FRAME_COMP_PSTRIDE (dest_frame, 0);
  packed_out = (out_bpp < 4);

  out_alpha = GST_VIDEO_INFO_HAS_ALPHA (&dest_frame->info);
  p_out[0] = GST_VIDEO_FRAME_COMP_OFFSET (dest_frame, 3);
  p_out[1] = GST_VIDEO_FRAME_COMP_OFFSET (dest_frame, 0);
  p_out[2] = GST_VIDEO_FRAME_COMP_OFFSET (dest_frame, 1);
  p_out[3] = GST_VIDEO_FRAME_COMP_OFFSET (dest_frame, 2);

  memcpy (matrix,
      (src_sdtv) ? cog_ycbcr_to_rgb_matrix_8bit_sdtv :
      cog_ycbcr_to_rgb_matrix_8bit_hdtv, 12 * sizeof (gint));

  dest = GST_VIDEO_FRAME_PLANE_DATA (dest_frame, 0);
  dest = dest + dest_y * dest_stride + dest_x * out_bpp;
  src = GST_VIDEO_FRAME_PLANE_DATA (src_frame, 0);
  src = src + src_y * src_stride + src_x * 4;

  if (out_alpha) {
    w *= 4;
    for (i = 0; i < h; i++) {
      for (j = 0; j < w; j += 4) {
        a = (src[j + 0] * i_alpha) >> 8;
        y = src[j + 1];
        u = src[j + 2];
        v = src[j + 3];

        r = APPLY_MATRIX (matrix, 0, y, u, v);
        g = APPLY_MATRIX (matrix, 1, y, u, v);
        b = APPLY_MATRIX (matrix, 2, y, u, v);

        dest[j + p_out[0]] = a;
        dest[j + p_out[1]] = CLAMP (r, 0, 255);
        dest[j + p_out[2]] = CLAMP (g, 0, 255);
        dest[j + p_out[3]] = CLAMP (b, 0, 255);
      }
      dest += dest_stride;
      src += src_stride;
    }
  } else if (!packed_out) {
    w *= 4;
    for (i = 0; i < h; i++) {
      for (j = 0; j < w; j += 4) {
        y = src[j + 1];
        u = src[j + 2];
        v = src[j + 3];

        r = APPLY_MATRIX (matrix, 0, y, u, v);
        g = APPLY_MATRIX (matrix, 1, y, u, v);
        b = APPLY_MATRIX (matrix, 2, y, u, v);

        dest[j + p_out[1]] = CLAMP (r, 0, 255);
        dest[j + p_out[2]] = CLAMP (g, 0, 255);
        dest[j + p_out[3]] = CLAMP (b, 0, 255);
      }
      dest += dest_stride;
      src += src_stride;
    }
  } else {
    for (i = 0; i < h; i++) {
      for (j = 0; j < w; j++) {
        y = src[4 * j + 1];
        u = src[4 * j + 2];
        v = src[4 * j + 3];

        r = APPLY_MATRIX (matrix, 0, y, u, v);
        g = APPLY_MATRIX (matrix, 1, y, u, v);
        b = APPLY_MATRIX (matrix, 2, y, u, v);

        dest[out_bpp * j + p_out[1]] = CLAMP (r, 0, 255);
        dest[out_bpp * j + p_out[2]] = CLAMP (g, 0, 255);
        dest[out_bpp * j + p_out[3]] = CLAMP (b, 0, 255);
      }
      dest += dest_stride;
      src += src_stride;
    }
  }
}

static void
fill_gray (GstVideoBoxFill fill_type, guint b_alpha,
    GstVideoFrame * frame, gboolean sdtv)
{
  gint i, j;
  gint dest_stride;
  guint8 *dest;
  gint width, height;
  GstVideoFormat format;

  format = GST_VIDEO_FRAME_FORMAT (frame);

  width = GST_VIDEO_FRAME_WIDTH (frame);
  height = GST_VIDEO_FRAME_WIDTH (frame);

  dest = GST_VIDEO_FRAME_PLANE_DATA (frame, 0);
  dest_stride = GST_VIDEO_FRAME_PLANE_STRIDE (frame, 0);

  if (format == GST_VIDEO_FORMAT_GRAY8) {
    guint8 val = yuv_sdtv_colors_Y[fill_type];

    for (i = 0; i < height; i++) {
      memset (dest, val, width);
      dest += dest_stride;
    }
  } else {
    guint16 val = yuv_sdtv_colors_Y[fill_type] << 8;

    if (format == GST_VIDEO_FORMAT_GRAY16_BE) {
      for (i = 0; i < height; i++) {
        for (j = 0; j < width; j++) {
          GST_WRITE_UINT16_BE (dest + 2 * j, val);
        }
        dest += dest_stride;
      }
    } else {
      for (i = 0; i < height; i++) {
        for (j = 0; j < width; j++) {
          GST_WRITE_UINT16_LE (dest + 2 * j, val);
        }
        dest += dest_stride;
      }
    }
  }
}

static void
copy_packed_simple (guint i_alpha, GstVideoFrame * dest_frame,
    gboolean dest_sdtv, gint dest_x, gint dest_y, GstVideoFrame * src_frame,
    gboolean src_sdtv, gint src_x, gint src_y, gint w, gint h)
{
  gint i;
  gint src_stride, dest_stride;
  gint pixel_stride, row_size;
  guint8 *src, *dest;

  src_stride = GST_VIDEO_FRAME_PLANE_STRIDE (src_frame, 0);
  dest_stride = GST_VIDEO_FRAME_PLANE_STRIDE (dest_frame, 0);
  pixel_stride = GST_VIDEO_FRAME_COMP_PSTRIDE (dest_frame, 0);
  row_size = w * pixel_stride;

  dest = GST_VIDEO_FRAME_PLANE_DATA (dest_frame, 0);
  dest = dest + dest_y * dest_stride + dest_x * pixel_stride;
  src = GST_VIDEO_FRAME_PLANE_DATA (src_frame, 0);
  src = src + src_y * src_stride + src_x * pixel_stride;

  for (i = 0; i < h; i++) {
    memcpy (dest, src, row_size);
    dest += dest_stride;
    src += src_stride;
  }
}

static void
fill_yuy2 (GstVideoBoxFill fill_type, guint b_alpha,
    GstVideoFrame * frame, gboolean sdtv)
{
  guint8 y, u, v;
  gint i, j;
  gint stride;
  gint width, height;
  guint8 *dest;
  GstVideoFormat format;

  format = GST_VIDEO_FRAME_FORMAT (frame);

  width = GST_VIDEO_FRAME_WIDTH (frame);
  height = GST_VIDEO_FRAME_HEIGHT (frame);

  dest = GST_VIDEO_FRAME_PLANE_DATA (frame, 0);
  stride = GST_VIDEO_FRAME_PLANE_STRIDE (frame, 0);

  y = (sdtv) ? yuv_sdtv_colors_Y[fill_type] : yuv_hdtv_colors_Y[fill_type];
  u = (sdtv) ? yuv_sdtv_colors_U[fill_type] : yuv_hdtv_colors_U[fill_type];
  v = (sdtv) ? yuv_sdtv_colors_V[fill_type] : yuv_hdtv_colors_V[fill_type];

  width = width + (width % 2);

  if (format == GST_VIDEO_FORMAT_YUY2) {
    for (i = 0; i < height; i++) {
      for (j = 0; j < width; j += 2) {
        dest[j * 2 + 0] = y;
        dest[j * 2 + 1] = u;
        dest[j * 2 + 2] = y;
        dest[j * 2 + 3] = v;
      }

      dest += stride;
    }
  } else if (format == GST_VIDEO_FORMAT_YVYU) {
    for (i = 0; i < height; i++) {
      for (j = 0; j < width; j += 2) {
        dest[j * 2 + 0] = y;
        dest[j * 2 + 1] = v;
        dest[j * 2 + 2] = y;
        dest[j * 2 + 3] = u;
      }

      dest += stride;
    }
  } else {
    for (i = 0; i < height; i++) {
      for (j = 0; j < width; j += 2) {
        dest[j * 2 + 0] = u;
        dest[j * 2 + 1] = y;
        dest[j * 2 + 2] = v;
        dest[j * 2 + 3] = y;
      }

      dest += stride;
    }
  }
}

static void
copy_yuy2_yuy2 (guint i_alpha, GstVideoFrame * dest_frame,
    gboolean dest_sdtv, gint dest_x, gint dest_y, GstVideoFrame * src_frame,
    gboolean src_sdtv, gint src_x, gint src_y, gint w, gint h)
{
  gint i, j;
  gint src_stride, dest_stride;
  guint8 *src, *dest;
  GstVideoFormat src_format;

  src_format = GST_VIDEO_FRAME_FORMAT (src_frame);

  src_stride = GST_VIDEO_FRAME_PLANE_STRIDE (src_frame, 0);
  dest_stride = GST_VIDEO_FRAME_PLANE_STRIDE (dest_frame, 0);

  dest_x = (dest_x & ~1);
  src_x = (src_x & ~1);

  w = w + (w % 2);

  dest = GST_VIDEO_FRAME_PLANE_DATA (dest_frame, 0);
  dest = dest + dest_y * dest_stride + dest_x * 2;
  src = GST_VIDEO_FRAME_PLANE_DATA (src_frame, 0);
  src = src + src_y * src_stride + src_x * 2;

  if (src_sdtv != dest_sdtv) {
    gint y1, u1, v1;
    gint y2, u2, v2;
    gint matrix[12];

    memcpy (matrix,
        dest_sdtv ? cog_ycbcr_hdtv_to_ycbcr_sdtv_matrix_8bit :
        cog_ycbcr_sdtv_to_ycbcr_hdtv_matrix_8bit, 12 * sizeof (gint));

    if (src_format == GST_VIDEO_FORMAT_YUY2) {
      for (i = 0; i < h; i++) {
        for (j = 0; j < w; j += 2) {
          y1 = src[j * 2 + 0];
          y2 = src[j * 2 + 2];
          u1 = u2 = src[j * 2 + 1];
          v1 = v2 = src[j * 2 + 3];

          dest[j * 2 + 0] = APPLY_MATRIX (matrix, 0, y1, u1, v1);
          dest[j * 2 + 1] = APPLY_MATRIX (matrix, 1, y1, u1, v1);
          dest[j * 2 + 2] = APPLY_MATRIX (matrix, 0, y1, u2, v2);
          dest[j * 2 + 3] = APPLY_MATRIX (matrix, 2, y2, u2, v2);
        }
        dest += dest_stride;
        src += src_stride;
      }
    } else if (src_format == GST_VIDEO_FORMAT_YVYU) {
      for (i = 0; i < h; i++) {
        for (j = 0; j < w; j += 2) {
          y1 = src[j * 2 + 0];
          y2 = src[j * 2 + 2];
          v1 = v2 = src[j * 2 + 1];
          u1 = u2 = src[j * 2 + 3];

          dest[j * 2 + 0] = APPLY_MATRIX (matrix, 0, y1, u1, v1);
          dest[j * 2 + 1] = APPLY_MATRIX (matrix, 2, y1, u1, v1);
          dest[j * 2 + 2] = APPLY_MATRIX (matrix, 0, y1, u2, v2);
          dest[j * 2 + 3] = APPLY_MATRIX (matrix, 1, y2, u2, v2);
        }
        dest += dest_stride;
        src += src_stride;
      }
    } else {
      for (i = 0; i < h; i++) {
        for (j = 0; j < w; j += 2) {
          u1 = u2 = src[j * 2 + 0];
          v1 = v2 = src[j * 2 + 2];
          y1 = src[j * 2 + 1];
          y2 = src[j * 2 + 3];

          dest[j * 2 + 1] = APPLY_MATRIX (matrix, 0, y1, u1, v1);
          dest[j * 2 + 0] = APPLY_MATRIX (matrix, 1, y1, u1, v1);
          dest[j * 2 + 3] = APPLY_MATRIX (matrix, 0, y1, u2, v2);
          dest[j * 2 + 2] = APPLY_MATRIX (matrix, 2, y2, u2, v2);
        }
        dest += dest_stride;
        src += src_stride;
      }
    }
  } else {
    for (i = 0; i < h; i++) {
      memcpy (dest, src, w * 2);
      dest += dest_stride;
      src += src_stride;
    }
  }
}

#define DEFAULT_LEFT      0
#define DEFAULT_RIGHT     0
#define DEFAULT_TOP       0
#define DEFAULT_BOTTOM    0
#define DEFAULT_FILL_TYPE VIDEO_BOX_FILL_BLACK
#define DEFAULT_ALPHA     1.0
#define DEFAULT_BORDER_ALPHA 1.0

enum
{
  PROP_0,
  PROP_LEFT,
  PROP_RIGHT,
  PROP_TOP,
  PROP_BOTTOM,
  PROP_FILL_TYPE,
  PROP_ALPHA,
  PROP_BORDER_ALPHA,
  PROP_AUTOCROP
      /* FILL ME */
};

static GstStaticPadTemplate gst_video_box_src_template =
GST_STATIC_PAD_TEMPLATE ("src",
    GST_PAD_SRC,
    GST_PAD_ALWAYS,
    GST_STATIC_CAPS (GST_VIDEO_CAPS_MAKE ("{ AYUV, "
            "ARGB, BGRA, ABGR, RGBA, xRGB, BGRx, xBGR, RGBx, RGB, BGR, "
            "Y444, Y42B, YUY2, YVYU, UYVY, I420, YV12, Y41B, "
            "GRAY8, GRAY16_BE, GRAY16_LE } "))
    );

static GstStaticPadTemplate gst_video_box_sink_template =
GST_STATIC_PAD_TEMPLATE ("sink",
    GST_PAD_SINK,
    GST_PAD_ALWAYS,
    GST_STATIC_CAPS (GST_VIDEO_CAPS_MAKE ("{ AYUV, "
            "ARGB, BGRA, ABGR, RGBA, xRGB, BGRx, xBGR, RGBx, RGB, BGR, "
            "Y444, Y42B, YUY2, YVYU, UYVY, I420, YV12, Y41B, "
            "GRAY8, GRAY16_BE, GRAY16_LE } "))
    );

#define gst_video_box_parent_class parent_class
G_DEFINE_TYPE (GstVideoBox, gst_video_box, GST_TYPE_VIDEO_FILTER);

static void gst_video_box_set_property (GObject * object, guint prop_id,
    const GValue * value, GParamSpec * pspec);
static void gst_video_box_get_property (GObject * object, guint prop_id,
    GValue * value, GParamSpec * pspec);

static gboolean gst_video_box_recalc_transform (GstVideoBox * video_box);
static GstCaps *gst_video_box_transform_caps (GstBaseTransform * trans,
    GstPadDirection direction, GstCaps * from, GstCaps * filter);
static void gst_video_box_before_transform (GstBaseTransform * trans,
    GstBuffer * in);
static gboolean gst_video_box_src_event (GstBaseTransform * trans,
    GstEvent * event);

static gboolean gst_video_box_set_info (GstVideoFilter * vfilter, GstCaps * in,
    GstVideoInfo * in_info, GstCaps * out, GstVideoInfo * out_info);
static GstFlowReturn gst_video_box_transform_frame (GstVideoFilter * vfilter,
    GstVideoFrame * in_frame, GstVideoFrame * out_frame);

#define GST_TYPE_VIDEO_BOX_FILL (gst_video_box_fill_get_type())
static GType
gst_video_box_fill_get_type (void)
{
  static GType video_box_fill_type = 0;
  static const GEnumValue video_box_fill[] = {
    {VIDEO_BOX_FILL_BLACK, "Black", "black"},
    {VIDEO_BOX_FILL_GREEN, "Green", "green"},
    {VIDEO_BOX_FILL_BLUE, "Blue", "blue"},
    {VIDEO_BOX_FILL_RED, "Red", "red"},
    {VIDEO_BOX_FILL_YELLOW, "Yellow", "yellow"},
    {VIDEO_BOX_FILL_WHITE, "White", "white"},
    {0, NULL, NULL},
  };

  if (!video_box_fill_type) {
    video_box_fill_type =
        g_enum_register_static ("GstVideoBoxFill", video_box_fill);
  }
  return video_box_fill_type;
}

static void
gst_video_box_finalize (GObject * object)
{
  GstVideoBox *video_box = GST_VIDEO_BOX (object);

  g_mutex_clear (&video_box->mutex);

  G_OBJECT_CLASS (parent_class)->finalize (object);
}

static void
gst_video_box_class_init (GstVideoBoxClass * klass)
{
  GObjectClass *gobject_class = (GObjectClass *) klass;
  GstElementClass *element_class = (GstElementClass *) (klass);
  GstBaseTransformClass *trans_class = (GstBaseTransformClass *) klass;
  GstVideoFilterClass *vfilter_class = (GstVideoFilterClass *) klass;

  gobject_class->set_property = gst_video_box_set_property;
  gobject_class->get_property = gst_video_box_get_property;
  gobject_class->finalize = gst_video_box_finalize;

  g_object_class_install_property (G_OBJECT_CLASS (klass), PROP_FILL_TYPE,
      g_param_spec_enum ("fill", "Fill", "How to fill the borders",
          GST_TYPE_VIDEO_BOX_FILL, DEFAULT_FILL_TYPE,
          G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS | GST_PARAM_CONTROLLABLE));
  g_object_class_install_property (G_OBJECT_CLASS (klass), PROP_LEFT,
      g_param_spec_int ("left", "Left",
          "Pixels to box at left (<0  = add a border)", G_MININT, G_MAXINT,
          DEFAULT_LEFT,
          G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS | GST_PARAM_CONTROLLABLE));
  g_object_class_install_property (G_OBJECT_CLASS (klass), PROP_RIGHT,
      g_param_spec_int ("right", "Right",
          "Pixels to box at right (<0 = add a border)", G_MININT, G_MAXINT,
          DEFAULT_RIGHT,
          G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS | GST_PARAM_CONTROLLABLE));
  g_object_class_install_property (G_OBJECT_CLASS (klass), PROP_TOP,
      g_param_spec_int ("top", "Top",
          "Pixels to box at top (<0 = add a border)", G_MININT, G_MAXINT,
          DEFAULT_TOP,
          G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS | GST_PARAM_CONTROLLABLE));
  g_object_class_install_property (G_OBJECT_CLASS (klass), PROP_BOTTOM,
      g_param_spec_int ("bottom", "Bottom",
          "Pixels to box at bottom (<0 = add a border)", G_MININT, G_MAXINT,
          DEFAULT_BOTTOM,
          G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS | GST_PARAM_CONTROLLABLE));
  g_object_class_install_property (G_OBJECT_CLASS (klass), PROP_ALPHA,
      g_param_spec_double ("alpha", "Alpha", "Alpha value picture", 0.0, 1.0,
          DEFAULT_ALPHA,
          G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS | GST_PARAM_CONTROLLABLE));
  g_object_class_install_property (G_OBJECT_CLASS (klass), PROP_BORDER_ALPHA,
      g_param_spec_double ("border-alpha", "Border Alpha",
          "Alpha value of the border", 0.0, 1.0, DEFAULT_BORDER_ALPHA,
          G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS | GST_PARAM_CONTROLLABLE));
  /**
   * GstVideoBox:autocrop
   *
   * If set to %TRUE videobox will automatically crop/pad the input
   * video to be centered in the output.
   *
   * Since: 0.10.16
   **/
  g_object_class_install_property (G_OBJECT_CLASS (klass), PROP_AUTOCROP,
      g_param_spec_boolean ("autocrop", "Auto crop",
          "Auto crop", FALSE, G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS));

  trans_class->before_transform =
      GST_DEBUG_FUNCPTR (gst_video_box_before_transform);
  trans_class->transform_caps =
      GST_DEBUG_FUNCPTR (gst_video_box_transform_caps);
  trans_class->src_event = GST_DEBUG_FUNCPTR (gst_video_box_src_event);

  vfilter_class->set_info = GST_DEBUG_FUNCPTR (gst_video_box_set_info);
  vfilter_class->transform_frame =
      GST_DEBUG_FUNCPTR (gst_video_box_transform_frame);

  gst_element_class_set_static_metadata (element_class, "Video box filter",
      "Filter/Effect/Video",
      "Resizes a video by adding borders or cropping",
      "Wim Taymans <wim@fluendo.com>");

  gst_element_class_add_pad_template (element_class,
      gst_static_pad_template_get (&gst_video_box_sink_template));
  gst_element_class_add_pad_template (element_class,
      gst_static_pad_template_get (&gst_video_box_src_template));
}

static void
gst_video_box_init (GstVideoBox * video_box)
{
  video_box->box_right = DEFAULT_RIGHT;
  video_box->box_left = DEFAULT_LEFT;
  video_box->box_top = DEFAULT_TOP;
  video_box->box_bottom = DEFAULT_BOTTOM;
  video_box->crop_right = 0;
  video_box->crop_left = 0;
  video_box->crop_top = 0;
  video_box->crop_bottom = 0;
  video_box->fill_type = DEFAULT_FILL_TYPE;
  video_box->alpha = DEFAULT_ALPHA;
  video_box->border_alpha = DEFAULT_BORDER_ALPHA;
  video_box->autocrop = FALSE;

  g_mutex_init (&video_box->mutex);
}

static void
gst_video_box_set_property (GObject * object, guint prop_id,
    const GValue * value, GParamSpec * pspec)
{
  GstVideoBox *video_box = GST_VIDEO_BOX (object);

  g_mutex_lock (&video_box->mutex);
  switch (prop_id) {
    case PROP_LEFT:
      video_box->box_left = g_value_get_int (value);
      if (video_box->box_left < 0) {
        video_box->border_left = -video_box->box_left;
        video_box->crop_left = 0;
      } else {
        video_box->border_left = 0;
        video_box->crop_left = video_box->box_left;
      }
      break;
    case PROP_RIGHT:
      video_box->box_right = g_value_get_int (value);
      if (video_box->box_right < 0) {
        video_box->border_right = -video_box->box_right;
        video_box->crop_right = 0;
      } else {
        video_box->border_right = 0;
        video_box->crop_right = video_box->box_right;
      }
      break;
    case PROP_TOP:
      video_box->box_top = g_value_get_int (value);
      if (video_box->box_top < 0) {
        video_box->border_top = -video_box->box_top;
        video_box->crop_top = 0;
      } else {
        video_box->border_top = 0;
        video_box->crop_top = video_box->box_top;
      }
      break;
    case PROP_BOTTOM:
      video_box->box_bottom = g_value_get_int (value);
      if (video_box->box_bottom < 0) {
        video_box->border_bottom = -video_box->box_bottom;
        video_box->crop_bottom = 0;
      } else {
        video_box->border_bottom = 0;
        video_box->crop_bottom = video_box->box_bottom;
      }
      break;
    case PROP_FILL_TYPE:
      video_box->fill_type = g_value_get_enum (value);
      break;
    case PROP_ALPHA:
      video_box->alpha = g_value_get_double (value);
      break;
    case PROP_BORDER_ALPHA:
      video_box->border_alpha = g_value_get_double (value);
      break;
    case PROP_AUTOCROP:
      video_box->autocrop = g_value_get_boolean (value);
      break;
    default:
      G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
      break;
  }
  gst_video_box_recalc_transform (video_box);

  GST_DEBUG_OBJECT (video_box, "Calling reconfigure");
  gst_base_transform_reconfigure_src (GST_BASE_TRANSFORM_CAST (video_box));

  g_mutex_unlock (&video_box->mutex);
}

static void
gst_video_box_autocrop (GstVideoBox * video_box)
{
  gint crop_w = video_box->in_width - video_box->out_width;
  gint crop_h = video_box->in_height - video_box->out_height;

  video_box->box_left = crop_w / 2;
  if (video_box->box_left < 0) {
    video_box->border_left = -video_box->box_left;
    video_box->crop_left = 0;
  } else {
    video_box->border_left = 0;
    video_box->crop_left = video_box->box_left;
  }

  /* Round down/up for odd width differences */
  if (crop_w < 0)
    crop_w -= 1;
  else
    crop_w += 1;

  video_box->box_right = crop_w / 2;
  if (video_box->box_right < 0) {
    video_box->border_right = -video_box->box_right;
    video_box->crop_right = 0;
  } else {
    video_box->border_right = 0;
    video_box->crop_right = video_box->box_right;
  }

  video_box->box_top = crop_h / 2;
  if (video_box->box_top < 0) {
    video_box->border_top = -video_box->box_top;
    video_box->crop_top = 0;
  } else {
    video_box->border_top = 0;
    video_box->crop_top = video_box->box_top;
  }

  /* Round down/up for odd height differences */
  if (crop_h < 0)
    crop_h -= 1;
  else
    crop_h += 1;
  video_box->box_bottom = crop_h / 2;

  if (video_box->box_bottom < 0) {
    video_box->border_bottom = -video_box->box_bottom;
    video_box->crop_bottom = 0;
  } else {
    video_box->border_bottom = 0;
    video_box->crop_bottom = video_box->box_bottom;
  }
}

static void
gst_video_box_get_property (GObject * object, guint prop_id, GValue * value,
    GParamSpec * pspec)
{
  GstVideoBox *video_box = GST_VIDEO_BOX (object);

  switch (prop_id) {
    case PROP_LEFT:
      g_value_set_int (value, video_box->box_left);
      break;
    case PROP_RIGHT:
      g_value_set_int (value, video_box->box_right);
      break;
    case PROP_TOP:
      g_value_set_int (value, video_box->box_top);
      break;
    case PROP_BOTTOM:
      g_value_set_int (value, video_box->box_bottom);
      break;
    case PROP_FILL_TYPE:
      g_value_set_enum (value, video_box->fill_type);
      break;
    case PROP_ALPHA:
      g_value_set_double (value, video_box->alpha);
      break;
    case PROP_BORDER_ALPHA:
      g_value_set_double (value, video_box->border_alpha);
      break;
    case PROP_AUTOCROP:
      g_value_set_boolean (value, video_box->autocrop);
      break;
    default:
      G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
      break;
  }
}

static inline gint
gst_video_box_transform_dimension (gint val, gint delta)
{
  gint64 new_val = (gint64) val + (gint64) delta;

  new_val = CLAMP (new_val, 1, G_MAXINT);

  return (gint) new_val;
}

static gboolean
gst_video_box_transform_dimension_value (const GValue * src_val,
    gint delta, GValue * dest_val)
{
  gboolean ret = TRUE;

  g_value_init (dest_val, G_VALUE_TYPE (src_val));

  if (G_VALUE_HOLDS_INT (src_val)) {
    gint ival = g_value_get_int (src_val);

    ival = gst_video_box_transform_dimension (ival, delta);
    g_value_set_int (dest_val, ival);
  } else if (GST_VALUE_HOLDS_INT_RANGE (src_val)) {
    gint min = gst_value_get_int_range_min (src_val);
    gint max = gst_value_get_int_range_max (src_val);

    min = gst_video_box_transform_dimension (min, delta);
    max = gst_video_box_transform_dimension (max, delta);
    if (min > max) {
      ret = FALSE;
      g_value_unset (dest_val);
    } else {
      gst_value_set_int_range (dest_val, min, max);
    }
  } else if (GST_VALUE_HOLDS_LIST (src_val)) {
    gint i;

    for (i = 0; i < gst_value_list_get_size (src_val); ++i) {
      const GValue *list_val;
      GValue newval = { 0, };

      list_val = gst_value_list_get_value (src_val, i);
      if (gst_video_box_transform_dimension_value (list_val, delta, &newval))
        gst_value_list_append_value (dest_val, &newval);
      g_value_unset (&newval);
    }

    if (gst_value_list_get_size (dest_val) == 0) {
      g_value_unset (dest_val);
      ret = FALSE;
    }
  } else {
    g_value_unset (dest_val);
    ret = FALSE;
  }

  return ret;
}

static GstCaps *
gst_video_box_transform_caps (GstBaseTransform * trans,
    GstPadDirection direction, GstCaps * from, GstCaps * filter)
{
  GstVideoBox *video_box = GST_VIDEO_BOX (trans);
  GstCaps *to, *ret;
  GstCaps *templ;
  GstStructure *structure;
  GstPad *other;
  gint i, j;

  to = gst_caps_new_empty ();
  for (i = 0; i < gst_caps_get_size (from); i++) {
    const GValue *fval, *lval;
    GValue list = { 0, };
    GValue val = { 0, };
    gboolean seen_yuv = FALSE, seen_rgb = FALSE;
    const gchar *str;

    structure = gst_structure_copy (gst_caps_get_structure (from, i));

    /* Transform width/height */
    if (video_box->autocrop) {
      gst_structure_remove_field (structure, "width");
      gst_structure_remove_field (structure, "height");
    } else {
      gint dw = 0, dh = 0;
      const GValue *v;
      GValue w_val = { 0, };
      GValue h_val = { 0, };

      /* calculate width and height */
      if (direction == GST_PAD_SINK) {
        dw -= video_box->box_left;
        dw -= video_box->box_right;
      } else {
        dw += video_box->box_left;
        dw += video_box->box_right;
      }

      if (direction == GST_PAD_SINK) {
        dh -= video_box->box_top;
        dh -= video_box->box_bottom;
      } else {
        dh += video_box->box_top;
        dh += video_box->box_bottom;
      }

      v = gst_structure_get_value (structure, "width");
      if (!gst_video_box_transform_dimension_value (v, dw, &w_val)) {
        GST_WARNING_OBJECT (video_box,
            "could not tranform width value with dw=%d" ", caps structure=%"
            GST_PTR_FORMAT, dw, structure);
        goto bail;
      }
      gst_structure_set_value (structure, "width", &w_val);

      v = gst_structure_get_value (structure, "height");
      if (!gst_video_box_transform_dimension_value (v, dh, &h_val)) {
        g_value_unset (&w_val);
        GST_WARNING_OBJECT (video_box,
            "could not tranform height value with dh=%d" ", caps structure=%"
            GST_PTR_FORMAT, dh, structure);
        goto bail;
      }
      gst_structure_set_value (structure, "height", &h_val);
      g_value_unset (&w_val);
      g_value_unset (&h_val);
    }

    /* Supported conversions:
     * I420->AYUV
     * I420->YV12
     * YV12->AYUV
     * YV12->I420
     * AYUV->I420
     * AYUV->YV12
     * AYUV->xRGB (24bpp, 32bpp, incl. alpha)
     * xRGB->xRGB (24bpp, 32bpp, from/to all variants, incl. alpha)
     * xRGB->AYUV (24bpp, 32bpp, incl. alpha)
     *
     * Passthrough only for everything else.
     */
    fval = gst_structure_get_value (structure, "format");
    if (fval && GST_VALUE_HOLDS_LIST (fval)) {
      for (j = 0; j < gst_value_list_get_size (fval); j++) {
        lval = gst_value_list_get_value (fval, j);
        if ((str = g_value_get_string (lval))) {
          if (strstr (str, "RGB") || strstr (str, "BGR") ||
              strcmp (str, "AYUV") == 0)
            seen_rgb = TRUE;
          else if (strcmp (str, "I420") == 0 || strcmp (str, "YV12") == 0 ||
              strcmp (str, "AYUV") == 0)
            seen_yuv = TRUE;
        }
      }
    } else if (fval && G_VALUE_HOLDS_STRING (fval)) {
      if ((str = g_value_get_string (fval))) {
        if (strstr (str, "RGB") || strstr (str, "BGR") ||
            strcmp (str, "AYUV") == 0)
          seen_rgb = TRUE;
        else if (strcmp (str, "I420") == 0 || strcmp (str, "YV12") == 0 ||
            strcmp (str, "AYUV") == 0)
          seen_yuv = TRUE;
      }
    }

    if (seen_yuv || seen_rgb) {
      g_value_init (&list, GST_TYPE_LIST);

      g_value_init (&val, G_TYPE_STRING);
      g_value_set_string (&val, "AYUV");
      gst_value_list_append_value (&list, &val);
      g_value_unset (&val);

      if (seen_yuv) {
        g_value_init (&val, G_TYPE_STRING);
        g_value_set_string (&val, "I420");
        gst_value_list_append_value (&list, &val);
        g_value_reset (&val);
        g_value_set_string (&val, "YV12");
        gst_value_list_append_value (&list, &val);
        g_value_unset (&val);
      }
      if (seen_rgb) {
        g_value_init (&val, G_TYPE_STRING);
        g_value_set_string (&val, "RGBx");
        gst_value_list_append_value (&list, &val);
        g_value_reset (&val);
        g_value_set_string (&val, "BGRx");
        gst_value_list_append_value (&list, &val);
        g_value_reset (&val);
        g_value_set_string (&val, "xRGB");
        gst_value_list_append_value (&list, &val);
        g_value_reset (&val);
        g_value_set_string (&val, "xBGR");
        gst_value_list_append_value (&list, &val);
        g_value_reset (&val);
        g_value_set_string (&val, "RGBA");
        gst_value_list_append_value (&list, &val);
        g_value_reset (&val);
        g_value_set_string (&val, "BGRA");
        gst_value_list_append_value (&list, &val);
        g_value_reset (&val);
        g_value_set_string (&val, "ARGB");
        gst_value_list_append_value (&list, &val);
        g_value_reset (&val);
        g_value_set_string (&val, "ABGR");
        gst_value_list_append_value (&list, &val);
        g_value_reset (&val);
        g_value_set_string (&val, "RGB");
        gst_value_list_append_value (&list, &val);
        g_value_reset (&val);
        g_value_set_string (&val, "BGR");
        gst_value_list_append_value (&list, &val);
        g_value_unset (&val);
      }
      gst_value_list_merge (&val, fval, &list);
      gst_structure_set_value (structure, "format", &val);
      g_value_unset (&val);
      g_value_unset (&list);
    }

    gst_structure_remove_field (structure, "colorimetry");
    gst_structure_remove_field (structure, "chroma-site");

    gst_caps_append_structure (to, structure);
  }

  /* filter against set allowed caps on the pad */
  other = (direction == GST_PAD_SINK) ? trans->srcpad : trans->sinkpad;
  templ = gst_pad_get_pad_template_caps (other);
  ret = gst_caps_intersect (to, templ);
  gst_caps_unref (to);
  gst_caps_unref (templ);

  GST_DEBUG_OBJECT (video_box, "direction %d, transformed %" GST_PTR_FORMAT
      " to %" GST_PTR_FORMAT, direction, from, ret);

  if (ret && filter) {
    GstCaps *intersection;

    GST_DEBUG_OBJECT (video_box, "Using filter caps %" GST_PTR_FORMAT, filter);
    intersection =
        gst_caps_intersect_full (filter, ret, GST_CAPS_INTERSECT_FIRST);
    gst_caps_unref (ret);
    ret = intersection;
    GST_DEBUG_OBJECT (video_box, "Intersection %" GST_PTR_FORMAT, ret);
  }

  return ret;

  /* ERRORS */
bail:
  {
    gst_structure_free (structure);
    gst_caps_unref (to);
    to = gst_caps_new_empty ();
    return to;
  }
}

static gboolean
gst_video_box_recalc_transform (GstVideoBox * video_box)
{
  gboolean res = TRUE;

  /* if we have the same format in and out and we don't need to perform any
   * cropping at all, we can just operate in passthrough mode */
  if (video_box->in_format == video_box->out_format &&
      video_box->box_left == 0 && video_box->box_right == 0 &&
      video_box->box_top == 0 && video_box->box_bottom == 0 &&
      video_box->in_sdtv == video_box->out_sdtv) {

    GST_LOG_OBJECT (video_box, "we are using passthrough");
    gst_base_transform_set_passthrough (GST_BASE_TRANSFORM_CAST (video_box),
        TRUE);
  } else {
    GST_LOG_OBJECT (video_box, "we are not using passthrough");
    gst_base_transform_set_passthrough (GST_BASE_TRANSFORM_CAST (video_box),
        FALSE);
  }
  return res;
}

static gboolean
gst_video_box_select_processing_functions (GstVideoBox * video_box)
{
  switch (video_box->out_format) {
    case GST_VIDEO_FORMAT_AYUV:
      video_box->fill = fill_ayuv;
      switch (video_box->in_format) {
        case GST_VIDEO_FORMAT_AYUV:
          video_box->copy = copy_ayuv_ayuv;
          break;
        case GST_VIDEO_FORMAT_I420:
        case GST_VIDEO_FORMAT_YV12:
          video_box->copy = copy_i420_ayuv;
          break;
        case GST_VIDEO_FORMAT_ARGB:
        case GST_VIDEO_FORMAT_ABGR:
        case GST_VIDEO_FORMAT_RGBA:
        case GST_VIDEO_FORMAT_BGRA:
        case GST_VIDEO_FORMAT_xRGB:
        case GST_VIDEO_FORMAT_xBGR:
        case GST_VIDEO_FORMAT_RGBx:
        case GST_VIDEO_FORMAT_BGRx:
        case GST_VIDEO_FORMAT_RGB:
        case GST_VIDEO_FORMAT_BGR:
          video_box->copy = copy_rgb32_ayuv;
          break;
        default:
          break;
      }
      break;
    case GST_VIDEO_FORMAT_I420:
    case GST_VIDEO_FORMAT_YV12:
      video_box->fill = fill_planar_yuv;
      switch (video_box->in_format) {
        case GST_VIDEO_FORMAT_AYUV:
          video_box->copy = copy_ayuv_i420;
          break;
        case GST_VIDEO_FORMAT_I420:
        case GST_VIDEO_FORMAT_YV12:
          video_box->copy = copy_i420_i420;
          break;
        default:
          break;
      }
      break;
    case GST_VIDEO_FORMAT_ARGB:
    case GST_VIDEO_FORMAT_ABGR:
    case GST_VIDEO_FORMAT_RGBA:
    case GST_VIDEO_FORMAT_BGRA:
    case GST_VIDEO_FORMAT_xRGB:
    case GST_VIDEO_FORMAT_xBGR:
    case GST_VIDEO_FORMAT_RGBx:
    case GST_VIDEO_FORMAT_BGRx:
    case GST_VIDEO_FORMAT_RGB:
    case GST_VIDEO_FORMAT_BGR:
      video_box->fill = (video_box->out_format == GST_VIDEO_FORMAT_BGR
          || video_box->out_format ==
          GST_VIDEO_FORMAT_RGB) ? fill_rgb24 : fill_rgb32;
      switch (video_box->in_format) {
        case GST_VIDEO_FORMAT_ARGB:
        case GST_VIDEO_FORMAT_ABGR:
        case GST_VIDEO_FORMAT_RGBA:
        case GST_VIDEO_FORMAT_BGRA:
        case GST_VIDEO_FORMAT_xRGB:
        case GST_VIDEO_FORMAT_xBGR:
        case GST_VIDEO_FORMAT_RGBx:
        case GST_VIDEO_FORMAT_BGRx:
        case GST_VIDEO_FORMAT_RGB:
        case GST_VIDEO_FORMAT_BGR:
          video_box->copy = copy_rgb32;
          break;
        case GST_VIDEO_FORMAT_AYUV:
          video_box->copy = copy_ayuv_rgb32;
        default:
          break;
      }
      break;
    case GST_VIDEO_FORMAT_GRAY8:
    case GST_VIDEO_FORMAT_GRAY16_BE:
    case GST_VIDEO_FORMAT_GRAY16_LE:
      video_box->fill = fill_gray;
      switch (video_box->in_format) {
        case GST_VIDEO_FORMAT_GRAY8:
        case GST_VIDEO_FORMAT_GRAY16_BE:
        case GST_VIDEO_FORMAT_GRAY16_LE:
          video_box->copy = copy_packed_simple;
          break;
        default:
          break;
      }
      break;
    case GST_VIDEO_FORMAT_YUY2:
    case GST_VIDEO_FORMAT_YVYU:
    case GST_VIDEO_FORMAT_UYVY:
      video_box->fill = fill_yuy2;
      switch (video_box->in_format) {
        case GST_VIDEO_FORMAT_YUY2:
        case GST_VIDEO_FORMAT_YVYU:
        case GST_VIDEO_FORMAT_UYVY:
          video_box->copy = copy_yuy2_yuy2;
          break;
        default:
          break;
      }
      break;
    case GST_VIDEO_FORMAT_Y444:
    case GST_VIDEO_FORMAT_Y42B:
    case GST_VIDEO_FORMAT_Y41B:
      video_box->fill = fill_planar_yuv;
      switch (video_box->in_format) {
        case GST_VIDEO_FORMAT_Y444:
          video_box->copy = copy_y444_y444;
          break;
        case GST_VIDEO_FORMAT_Y42B:
          video_box->copy = copy_y42b_y42b;
          break;
        case GST_VIDEO_FORMAT_Y41B:
          video_box->copy = copy_y41b_y41b;
          break;
        default:
          break;
      }
      break;
    default:
      break;
  }

  return video_box->fill != NULL && video_box->copy != NULL;
}

static gboolean
gst_video_box_set_info (GstVideoFilter * vfilter, GstCaps * in,
    GstVideoInfo * in_info, GstCaps * out, GstVideoInfo * out_info)
{
  GstVideoBox *video_box = GST_VIDEO_BOX (vfilter);
  gboolean ret;

  g_mutex_lock (&video_box->mutex);

  video_box->in_format = GST_VIDEO_INFO_FORMAT (in_info);
  video_box->in_width = GST_VIDEO_INFO_WIDTH (in_info);
  video_box->in_height = GST_VIDEO_INFO_HEIGHT (in_info);

  video_box->out_format = GST_VIDEO_INFO_FORMAT (out_info);
  video_box->out_width = GST_VIDEO_INFO_WIDTH (out_info);
  video_box->out_height = GST_VIDEO_INFO_HEIGHT (out_info);

  video_box->in_sdtv =
      in_info->colorimetry.matrix == GST_VIDEO_COLOR_MATRIX_BT601;
  video_box->out_sdtv =
      out_info->colorimetry.matrix == GST_VIDEO_COLOR_MATRIX_BT601;

  GST_DEBUG_OBJECT (video_box, "Input w: %d h: %d", video_box->in_width,
      video_box->in_height);
  GST_DEBUG_OBJECT (video_box, "Output w: %d h: %d", video_box->out_width,
      video_box->out_height);

  if (video_box->autocrop)
    gst_video_box_autocrop (video_box);

  /* recalc the transformation strategy */
  ret = gst_video_box_recalc_transform (video_box);

  if (ret)
    ret = gst_video_box_select_processing_functions (video_box);
  g_mutex_unlock (&video_box->mutex);

  return ret;
}

static gboolean
gst_video_box_src_event (GstBaseTransform * trans, GstEvent * event)
{
  GstVideoBox *video_box = GST_VIDEO_BOX (trans);
  GstStructure *new_structure;
  const GstStructure *structure;
  const gchar *event_name;
  gdouble pointer_x;
  gdouble pointer_y;

  GST_OBJECT_LOCK (video_box);
  if (GST_EVENT_TYPE (event) == GST_EVENT_NAVIGATION &&
      (video_box->box_left != 0 || video_box->box_top != 0)) {
    structure = gst_event_get_structure (event);
    event_name = gst_structure_get_string (structure, "event");

    if (event_name &&
        (strcmp (event_name, "mouse-move") == 0 ||
            strcmp (event_name, "mouse-button-press") == 0 ||
            strcmp (event_name, "mouse-button-release") == 0)) {
      if (gst_structure_get_double (structure, "pointer_x", &pointer_x) &&
          gst_structure_get_double (structure, "pointer_y", &pointer_y)) {
        gdouble new_pointer_x, new_pointer_y;
        GstEvent *new_event;

        new_pointer_x = pointer_x + video_box->box_left;
        new_pointer_y = pointer_y + video_box->box_top;

        new_structure = gst_structure_copy (structure);
        gst_structure_set (new_structure,
            "pointer_x", G_TYPE_DOUBLE, (gdouble) (new_pointer_x),
            "pointer_y", G_TYPE_DOUBLE, (gdouble) (new_pointer_y), NULL);

        new_event = gst_event_new_navigation (new_structure);
        gst_event_unref (event);
        event = new_event;
      } else {
        GST_WARNING_OBJECT (video_box, "Failed to read navigation event");
      }
    }
  }
  GST_OBJECT_UNLOCK (video_box);

  return GST_BASE_TRANSFORM_CLASS (parent_class)->src_event (trans, event);
}

static void
gst_video_box_process (GstVideoBox * video_box, GstVideoFrame * in,
    GstVideoFrame * out)
{
  guint b_alpha = CLAMP (video_box->border_alpha * 256, 0, 255);
  guint i_alpha = CLAMP (video_box->alpha * 256, 0, 255);
  GstVideoBoxFill fill_type = video_box->fill_type;
  gint br, bl, bt, bb, crop_w, crop_h;

  crop_h = 0;
  crop_w = 0;

  br = video_box->box_right;
  bl = video_box->box_left;
  bt = video_box->box_top;
  bb = video_box->box_bottom;

  if (br >= 0 && bl >= 0) {
    crop_w = video_box->in_width - (br + bl);
  } else if (br >= 0 && bl < 0) {
    crop_w = video_box->in_width - (br);
  } else if (br < 0 && bl >= 0) {
    crop_w = video_box->in_width - (bl);
  } else if (br < 0 && bl < 0) {
    crop_w = video_box->in_width;
  }

  if (bb >= 0 && bt >= 0) {
    crop_h = video_box->in_height - (bb + bt);
  } else if (bb >= 0 && bt < 0) {
    crop_h = video_box->in_height - (bb);
  } else if (bb < 0 && bt >= 0) {
    crop_h = video_box->in_height - (bt);
  } else if (bb < 0 && bt < 0) {
    crop_h = video_box->in_height;
  }

  GST_DEBUG_OBJECT (video_box, "Borders are: L:%d, R:%d, T:%d, B:%d", bl, br,
      bt, bb);
  GST_DEBUG_OBJECT (video_box, "Alpha value is: %u (frame) %u (border)",
      i_alpha, b_alpha);

  if (crop_h < 0 || crop_w < 0) {
    video_box->fill (fill_type, b_alpha, out, video_box->out_sdtv);
  } else if (bb == 0 && bt == 0 && br == 0 && bl == 0) {
    video_box->copy (i_alpha, out, video_box->out_sdtv, 0, 0, in,
        video_box->in_sdtv, 0, 0, crop_w, crop_h);
  } else {
    gint src_x = 0, src_y = 0;
    gint dest_x = 0, dest_y = 0;

    /* Fill everything if a border should be added somewhere */
    if (bt < 0 || bb < 0 || br < 0 || bl < 0)
      video_box->fill (fill_type, b_alpha, out, video_box->out_sdtv);

    /* Top border */
    if (bt < 0) {
      dest_y += -bt;
    } else {
      src_y += bt;
    }

    /* Left border */
    if (bl < 0) {
      dest_x += -bl;
    } else {
      src_x += bl;
    }

    /* Frame */
    video_box->copy (i_alpha, out, video_box->out_sdtv, dest_x, dest_y,
        in, video_box->in_sdtv, src_x, src_y, crop_w, crop_h);
  }

  GST_LOG_OBJECT (video_box, "image created");
}

static void
gst_video_box_before_transform (GstBaseTransform * trans, GstBuffer * in)
{
  GstVideoBox *video_box = GST_VIDEO_BOX (trans);
  GstClockTime timestamp, stream_time;

  timestamp = GST_BUFFER_TIMESTAMP (in);
  stream_time =
      gst_segment_to_stream_time (&trans->segment, GST_FORMAT_TIME, timestamp);

  GST_DEBUG_OBJECT (video_box, "sync to %" GST_TIME_FORMAT,
      GST_TIME_ARGS (timestamp));

  if (GST_CLOCK_TIME_IS_VALID (stream_time))
    gst_object_sync_values (GST_OBJECT (video_box), stream_time);
}

static GstFlowReturn
gst_video_box_transform_frame (GstVideoFilter * vfilter,
    GstVideoFrame * in_frame, GstVideoFrame * out_frame)
{
  GstVideoBox *video_box = GST_VIDEO_BOX (vfilter);

  g_mutex_lock (&video_box->mutex);
  gst_video_box_process (video_box, in_frame, out_frame);
  g_mutex_unlock (&video_box->mutex);
  return GST_FLOW_OK;
}

/* FIXME: 0.11 merge with videocrop plugin */
static gboolean
plugin_init (GstPlugin * plugin)
{
  GST_DEBUG_CATEGORY_INIT (videobox_debug, "videobox", 0,
      "Resizes a video by adding borders or cropping");

  return gst_element_register (plugin, "videobox", GST_RANK_NONE,
      GST_TYPE_VIDEO_BOX);
}

GST_PLUGIN_DEFINE (GST_VERSION_MAJOR,
    GST_VERSION_MINOR,
    videobox,
    "resizes a video by adding borders or cropping",
    plugin_init, VERSION, GST_LICENSE, GST_PACKAGE_NAME, GST_PACKAGE_ORIGIN)